Hi.
I never used GScanner before so I took this opportunity to see what's all about. What I came up with is this quite general Bibtex parser that can extract information from entries in almost no time.
It still parses text string, embeded in application code, but since replacing it with reading from text is trivial, I thought I would let this as a practice to reader;)
Here is the code:
Code:
#include <glib.h>
#include <string.h>
/* Test data */
static const gchar *ttest = "@phdthesis{chow1983thesis,\n"
"author = \"Chowdhury, D.\",\n"
"institution = \"Department of Physics, IIT, Kanpur\",\n"
"location = \"Kanpur\",\n"
"publisher = \"Department of Physics, IIT, Kanpur\",\n"
"school = \"Department of Physics, IIT, Kanpur\",\n"
"title = \"{The Spin Glass Transition}\",\n"
"year = \"1983\"\n"
"}";
static void
output_entry (GHashTable *table)
{
GHashTableIter iter;
char *key, *val;
g_print ("Citation entry:\n");
g_hash_table_iter_init (&iter, table);
while (g_hash_table_iter_next (&iter, &key, &val))
g_print (" %16s: %s\n", key, val);
g_print ("\n");
}
static guint
parse_entry (GScanner *scanner,
GHashTable *table)
{
/* Entry starts with @ */
g_scanner_get_next_token (scanner);
if (scanner->token != '@')
return G_TOKEN_ERROR;
/* Now get identifier */
g_scanner_get_next_token (scanner);
if (scanner->token != G_TOKEN_IDENTIFIER)
return G_TOKEN_ERROR;
g_hash_table_insert (table, g_strdup ("type"),
g_strdup (scanner->value.v_identifier));
/* Brace */
g_scanner_get_next_token (scanner);
if (scanner->token != G_TOKEN_LEFT_CURLY)
return G_TOKEN_ERROR;
/* ID */
g_scanner_get_next_token (scanner);
if (scanner->token != G_TOKEN_IDENTIFIER)
return G_TOKEN_ERROR;
g_hash_table_insert (table, g_strdup ("id"),
g_strdup (scanner->value.v_identifier));
while (TRUE)
{
char *key, *val;
g_scanner_get_next_token (scanner);
if (scanner->token != G_TOKEN_COMMA)
return G_TOKEN_ERROR;
g_scanner_get_next_token (scanner);
if (scanner->token != G_TOKEN_IDENTIFIER)
return G_TOKEN_ERROR;
key = g_strdup (scanner->value.v_identifier);
g_scanner_get_next_token (scanner);
if (scanner->token != '=')
{
g_free (key);
return G_TOKEN_ERROR;
}
g_scanner_get_next_token (scanner);
if (scanner->token != G_TOKEN_STRING)
{
g_free (key);
return G_TOKEN_ERROR;
}
val = g_strdup (scanner->value.v_string);
g_hash_table_insert(table, key, val);
g_scanner_peek_next_token (scanner);
if (scanner->next_token == G_TOKEN_RIGHT_CURLY)
break;
}
/* Eat last curly brace and return */
g_scanner_get_next_token (scanner);
return G_TOKEN_NONE;
}
int
main (int argc,
char **argv)
{
GScanner *scanner;
GHashTable *table;
guint ret;
scanner = g_scanner_new (NULL);
g_scanner_input_text (scanner, ttest, strlen (ttest));
table = g_hash_table_new_full (g_str_hash, g_str_equal, g_free, g_free);
do
{
g_hash_table_remove_all (table);
ret = parse_entry (scanner, table);
if (ret == G_TOKEN_ERROR)
break;
else
output_entry (table);
g_scanner_peek_next_token (scanner);
}
while (scanner->next_token != G_TOKEN_EOF &&
scanner->next_token != G_TOKEN_ERROR);
/* finsish parsing */
g_scanner_destroy (scanner);
g_hash_table_destroy (table);
return 0;
}
Cheers,
Tadej