diff options
author | Ryan Lortie <desrt@desrt.ca> | 2013-10-04 21:43:15 -0400 |
---|---|---|
committer | Ryan Lortie <desrt@desrt.ca> | 2013-10-04 21:43:15 -0400 |
commit | 2bb8dd2bb61bbcfe528ef7ff8dff78f328460369 (patch) | |
tree | 7d730f91ec516fb7cc0da6a94ec177219489f35a | |
parent | 40b616d64847bba6de03eb6b8e7a3e311c5f58cd (diff) |
Convert text index to a hash table
Just like the string list before it, we see a substantial performance
improvement from converting DfiTextIndex to be based on GHashTable instead of
GSequence.
-rw-r--r-- | src/dfi-builder.c | 31 | ||||
-rw-r--r-- | src/dfi-id-list.c | 4 | ||||
-rw-r--r-- | src/dfi-id-list.h | 2 | ||||
-rw-r--r-- | src/dfi-text-index.c | 163 | ||||
-rw-r--r-- | src/dfi-text-index.h | 13 |
5 files changed, 106 insertions, 107 deletions
diff --git a/src/dfi-builder.c b/src/dfi-builder.c index 1821e9e..444b757 100644 --- a/src/dfi-builder.c +++ b/src/dfi-builder.c @@ -50,11 +50,6 @@ typedef struct GString *string; /* file contents */ } DfiBuilder; -#define foreach_sequence_item_and_position(iter, sequence, counter) \ - for (counter = 0, iter = g_sequence_get_begin_iter (sequence); \ - !g_sequence_iter_is_end (iter); \ - iter = g_sequence_iter_next (iter), counter++) - static GHashTable * dfi_builder_get_string_table (DfiBuilder *builder, const gchar *locale) @@ -289,11 +284,10 @@ dfi_builder_write_text_index (DfiBuilder *builder, const gchar *key, gpointer data) { - GSequence *text_index = data; + DfiTextIndex *text_index = data; const gchar *locale = key; GHashTable *string_table; - GSequenceIter *iter; - const gchar **strings; + const gchar * const *tokens; guint *id_lists; guint offset; guint n_items; @@ -308,18 +302,16 @@ dfi_builder_write_text_index (DfiBuilder *builder, dfi_string_table_write (string_table, c_string_table, builder->string); } - n_items = g_sequence_get_length (text_index); - - strings = g_new (const gchar *, n_items); + tokens = dfi_text_index_get_tokens (text_index, &n_items); id_lists = g_new (guint, n_items); dfi_builder_align (builder, sizeof (guint16)); - foreach_sequence_item_and_position (iter, text_index, i) + for (i = 0; i < n_items; i++) { - GArray *id_list; + DfiIdList *id_list; - dfi_text_index_get_item (iter, &strings[i], &id_list); + id_list = dfi_text_index_get_id_list_for_token (text_index, tokens[i]); id_lists[i] = dfi_builder_write_id_list (builder, NULL, id_list); } @@ -331,11 +323,10 @@ dfi_builder_write_text_index (DfiBuilder *builder, for (i = 0; i < n_items; i++) { - dfi_builder_write_string (builder, locale, strings[i]); + dfi_builder_write_string (builder, locale, tokens[i]); dfi_builder_write_uint32 (builder, id_lists[i]); } - g_free (strings); g_free (id_lists); return offset; @@ -504,7 +495,7 @@ dfi_builder_add_strings (DfiBuilder *builder) } } -static GSequence * +static DfiTextIndex * dfi_builder_index_one_locale (DfiBuilder *builder, const gchar *locale) { @@ -512,7 +503,7 @@ dfi_builder_index_one_locale (DfiBuilder *builder, gchar **locale_variants; GHashTableIter keyfile_iter; gpointer key, val; - GSequence *text_index; + DfiTextIndex *text_index; if (locale) locale_variants = g_get_locale_variants (locale); @@ -549,6 +540,8 @@ dfi_builder_index_one_locale (DfiBuilder *builder, g_free (locale_variants); + dfi_text_index_convert (text_index); + return text_index; } @@ -571,7 +564,7 @@ dfi_builder_index_strings (DfiBuilder *builder) { const gchar *locale = locale_names[i]; GHashTable *string_table; - GSequence *text_index; + DfiTextIndex *text_index; text_index = dfi_builder_index_one_locale (builder, locale); g_hash_table_insert (builder->locale_text_indexes, g_strdup (locale), text_index); diff --git a/src/dfi-id-list.c b/src/dfi-id-list.c index 22813d4..90282b3 100644 --- a/src/dfi-id-list.c +++ b/src/dfi-id-list.c @@ -32,9 +32,9 @@ dfi_id_list_new (void) } void -dfi_id_list_free (GArray *id_list) +dfi_id_list_free (gpointer data) { - g_array_free (id_list, TRUE); + g_array_free (data, TRUE); } void diff --git a/src/dfi-id-list.h b/src/dfi-id-list.h index f2fe6b0..1c4d123 100644 --- a/src/dfi-id-list.h +++ b/src/dfi-id-list.h @@ -28,7 +28,7 @@ typedef GArray DfiIdList; DfiIdList * dfi_id_list_new (void); -void dfi_id_list_free (DfiIdList *id_list); +void dfi_id_list_free (gpointer id_list); void dfi_id_list_add_ids (DfiIdList *id_list, const guint16 *ids, diff --git a/src/dfi-text-index.c b/src/dfi-text-index.c index 19811ff..2f8e902 100644 --- a/src/dfi-text-index.c +++ b/src/dfi-text-index.c @@ -25,89 +25,56 @@ #include "dfi-id-list.h" #include <string.h> +#include <stdlib.h> -typedef struct +struct _DfiTextIndex { - /* Our GSequence compare function treats DesktopFileIndexTextIndexItem - * as a subclass of 'string' for purposes of comparison. - * - * The string, therefore, must come first. - */ - gchar *token; - - GArray *id_list; -} DesktopFileIndexTextIndexItem; - -static gint -dfi_text_index_string_compare (gconstpointer a, - gconstpointer b, - gpointer user_data) -{ - /* As mentioned above: the pointers can equivalently be pointers to a - * 'DesktopFileIndexTextIndexItem' or to a 'gchar *'. - */ - const gchar * const *str_a = a; - const gchar * const *str_b = b; - - return strcmp (*str_a, *str_b); -} - -static DesktopFileIndexTextIndexItem * -dfi_text_index_item_new (const gchar *token) -{ - DesktopFileIndexTextIndexItem *item; - - item = g_slice_new (DesktopFileIndexTextIndexItem); - item->token = g_strdup (token); - item->id_list = dfi_id_list_new (); - - return item; -} + GHashTable *table; + gchar **tokens; +}; -static void -dfi_text_index_item_free (gpointer data) +DfiTextIndex * +dfi_text_index_new (void) { - DesktopFileIndexTextIndexItem *item = data; - - dfi_id_list_free (item->id_list); - g_free (item->token); + DfiTextIndex *text_index; - g_slice_free (DesktopFileIndexTextIndexItem, item); -} + text_index = g_slice_new0 (DfiTextIndex); + text_index->table = g_hash_table_new_full (g_str_hash, g_str_equal, g_free, dfi_id_list_free); -GSequence * -dfi_text_index_new (void) -{ - return g_sequence_new (dfi_text_index_item_free); + return text_index; } void dfi_text_index_free (gpointer data) { - g_sequence_free (data); + DfiTextIndex *text_index = data; + + g_hash_table_unref (text_index->table); + g_free (text_index->tokens); + + g_slice_free (DfiTextIndex, text_index); } void -dfi_text_index_add_ids (GSequence *text_index, +dfi_text_index_add_ids (DfiTextIndex *text_index, const gchar *token, const guint16 *ids, gint n_ids) { - DesktopFileIndexTextIndexItem *item; - GSequenceIter *iter; + DfiIdList *id_list; - iter = g_sequence_lookup (text_index, &token, dfi_text_index_string_compare, NULL); - if (iter) - { - item = g_sequence_get (iter); - } - else + /* Ensure we're not already converted */ + g_assert (text_index->tokens == NULL); + + id_list = g_hash_table_lookup (text_index->table, token); + + if (id_list == NULL) { - item = dfi_text_index_item_new (token); - g_sequence_insert_sorted (text_index, item, dfi_text_index_string_compare, NULL); + id_list = dfi_id_list_new (); + g_hash_table_insert (text_index->table, g_strdup (token), id_list); } - dfi_id_list_add_ids (item->id_list, ids, n_ids); + dfi_id_list_add_ids (id_list, ids, n_ids); } static void @@ -196,7 +163,7 @@ dfi_text_index_split_words (const gchar *value) } void -dfi_text_index_add_ids_tokenised (GSequence *text_index, +dfi_text_index_add_ids_tokenised (DfiTextIndex *text_index, const gchar *string_to_tokenise, const guint16 *ids, gint n_ids) @@ -218,36 +185,70 @@ dfi_text_index_add_ids_tokenised (GSequence *text_index, dfi_text_index_add_ids (text_index, tokens[i], ids, n_ids); } - } void -dfi_text_index_get_item (GSequenceIter *iter, - const gchar **token, - GArray **id_list) +dfi_text_index_convert (DfiTextIndex *text_index) { - DesktopFileIndexTextIndexItem *item; + GHashTableIter iter; + gpointer key; + guint n, i; + + /* Ensure we're not already converted */ + g_assert (text_index->tokens == NULL); - item = g_sequence_get (iter); + n = g_hash_table_size (text_index->table); + text_index->tokens = g_new (gchar *, n + 1); + i = 0; - *token = item->token; - *id_list = item->id_list; + g_hash_table_iter_init (&iter, text_index->table); + while (g_hash_table_iter_next (&iter, &key, NULL)) + text_index->tokens[i++] = key; + g_assert_cmpint (i, ==, n); + text_index->tokens[n] = NULL; + + qsort (text_index->tokens, n, sizeof (char *), (GCompareFunc) strcmp); } -void -dfi_text_index_populate_strings (GSequence *text_index, - DfiStringTable *string_table) +const gchar * const * +dfi_text_index_get_tokens (DfiTextIndex *text_index, + guint *n_tokens) { - GSequenceIter *iter; + /* Ensure that we've been converted */ + g_assert (text_index->tokens); - iter = g_sequence_get_begin_iter (text_index); + if (n_tokens) + *n_tokens = g_hash_table_size (text_index->table); - while (!g_sequence_iter_is_end (iter)) - { - DesktopFileIndexTextIndexItem *item = g_sequence_get (iter); + return (const gchar **) text_index->tokens; +} - dfi_string_table_add_string (string_table, item->token); +DfiIdList * +dfi_text_index_get_id_list_for_token (DfiTextIndex *text_index, + const gchar *token) +{ + DfiIdList *id_list; - iter = g_sequence_iter_next (iter); - } + /* Ensure that we've been converted */ + g_assert (text_index->tokens); + + id_list = g_hash_table_lookup (text_index->table, token); + g_assert (id_list != NULL); + + return id_list; +} + +void +dfi_text_index_populate_strings (DfiTextIndex *text_index, + DfiStringTable *string_table) +{ + GHashTableIter iter; + gpointer string; + + /* Ensure that we've been converted */ + g_assert (text_index->tokens); + + g_hash_table_iter_init (&iter, text_index->table); + while (g_hash_table_iter_next (&iter, &string, NULL)) + dfi_string_table_add_string (string_table, string); } diff --git a/src/dfi-text-index.h b/src/dfi-text-index.h index 5e53dd8..1683a3d 100644 --- a/src/dfi-text-index.h +++ b/src/dfi-text-index.h @@ -23,8 +23,9 @@ #define __dfi_text_index_h__ #include "dfi-string-table.h" +#include "dfi-id-list.h" -typedef GSequence DfiTextIndex; +typedef struct _DfiTextIndex DfiTextIndex; DfiTextIndex * dfi_text_index_new (void); @@ -40,9 +41,13 @@ void dfi_text_index_add_ids_tokenised (DfiText const guint16 *ids, gint n_ids); -void dfi_text_index_get_item (GSequenceIter *iter, - const gchar **token, - GArray **id_list); +void dfi_text_index_convert (DfiTextIndex *text_index); + +const gchar * const * dfi_text_index_get_tokens (DfiTextIndex *text_index, + guint *n_tokens); + +DfiIdList * dfi_text_index_get_id_list_for_token (DfiTextIndex *text_index, + const gchar *token); void dfi_text_index_populate_strings (DfiTextIndex *text_index, DfiStringTable *string_table); |