summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorRyan Lortie <desrt@desrt.ca>2013-10-04 21:43:15 -0400
committerRyan Lortie <desrt@desrt.ca>2013-10-04 21:43:15 -0400
commit2bb8dd2bb61bbcfe528ef7ff8dff78f328460369 (patch)
tree7d730f91ec516fb7cc0da6a94ec177219489f35a
parent40b616d64847bba6de03eb6b8e7a3e311c5f58cd (diff)
Convert text index to a hash table
Just like the string list before it, we see a substantial performance improvement from converting DfiTextIndex to be based on GHashTable instead of GSequence.
-rw-r--r--src/dfi-builder.c31
-rw-r--r--src/dfi-id-list.c4
-rw-r--r--src/dfi-id-list.h2
-rw-r--r--src/dfi-text-index.c163
-rw-r--r--src/dfi-text-index.h13
5 files changed, 106 insertions, 107 deletions
diff --git a/src/dfi-builder.c b/src/dfi-builder.c
index 1821e9e..444b757 100644
--- a/src/dfi-builder.c
+++ b/src/dfi-builder.c
@@ -50,11 +50,6 @@ typedef struct
GString *string; /* file contents */
} DfiBuilder;
-#define foreach_sequence_item_and_position(iter, sequence, counter) \
- for (counter = 0, iter = g_sequence_get_begin_iter (sequence); \
- !g_sequence_iter_is_end (iter); \
- iter = g_sequence_iter_next (iter), counter++)
-
static GHashTable *
dfi_builder_get_string_table (DfiBuilder *builder,
const gchar *locale)
@@ -289,11 +284,10 @@ dfi_builder_write_text_index (DfiBuilder *builder,
const gchar *key,
gpointer data)
{
- GSequence *text_index = data;
+ DfiTextIndex *text_index = data;
const gchar *locale = key;
GHashTable *string_table;
- GSequenceIter *iter;
- const gchar **strings;
+ const gchar * const *tokens;
guint *id_lists;
guint offset;
guint n_items;
@@ -308,18 +302,16 @@ dfi_builder_write_text_index (DfiBuilder *builder,
dfi_string_table_write (string_table, c_string_table, builder->string);
}
- n_items = g_sequence_get_length (text_index);
-
- strings = g_new (const gchar *, n_items);
+ tokens = dfi_text_index_get_tokens (text_index, &n_items);
id_lists = g_new (guint, n_items);
dfi_builder_align (builder, sizeof (guint16));
- foreach_sequence_item_and_position (iter, text_index, i)
+ for (i = 0; i < n_items; i++)
{
- GArray *id_list;
+ DfiIdList *id_list;
- dfi_text_index_get_item (iter, &strings[i], &id_list);
+ id_list = dfi_text_index_get_id_list_for_token (text_index, tokens[i]);
id_lists[i] = dfi_builder_write_id_list (builder, NULL, id_list);
}
@@ -331,11 +323,10 @@ dfi_builder_write_text_index (DfiBuilder *builder,
for (i = 0; i < n_items; i++)
{
- dfi_builder_write_string (builder, locale, strings[i]);
+ dfi_builder_write_string (builder, locale, tokens[i]);
dfi_builder_write_uint32 (builder, id_lists[i]);
}
- g_free (strings);
g_free (id_lists);
return offset;
@@ -504,7 +495,7 @@ dfi_builder_add_strings (DfiBuilder *builder)
}
}
-static GSequence *
+static DfiTextIndex *
dfi_builder_index_one_locale (DfiBuilder *builder,
const gchar *locale)
{
@@ -512,7 +503,7 @@ dfi_builder_index_one_locale (DfiBuilder *builder,
gchar **locale_variants;
GHashTableIter keyfile_iter;
gpointer key, val;
- GSequence *text_index;
+ DfiTextIndex *text_index;
if (locale)
locale_variants = g_get_locale_variants (locale);
@@ -549,6 +540,8 @@ dfi_builder_index_one_locale (DfiBuilder *builder,
g_free (locale_variants);
+ dfi_text_index_convert (text_index);
+
return text_index;
}
@@ -571,7 +564,7 @@ dfi_builder_index_strings (DfiBuilder *builder)
{
const gchar *locale = locale_names[i];
GHashTable *string_table;
- GSequence *text_index;
+ DfiTextIndex *text_index;
text_index = dfi_builder_index_one_locale (builder, locale);
g_hash_table_insert (builder->locale_text_indexes, g_strdup (locale), text_index);
diff --git a/src/dfi-id-list.c b/src/dfi-id-list.c
index 22813d4..90282b3 100644
--- a/src/dfi-id-list.c
+++ b/src/dfi-id-list.c
@@ -32,9 +32,9 @@ dfi_id_list_new (void)
}
void
-dfi_id_list_free (GArray *id_list)
+dfi_id_list_free (gpointer data)
{
- g_array_free (id_list, TRUE);
+ g_array_free (data, TRUE);
}
void
diff --git a/src/dfi-id-list.h b/src/dfi-id-list.h
index f2fe6b0..1c4d123 100644
--- a/src/dfi-id-list.h
+++ b/src/dfi-id-list.h
@@ -28,7 +28,7 @@ typedef GArray DfiIdList;
DfiIdList * dfi_id_list_new (void);
-void dfi_id_list_free (DfiIdList *id_list);
+void dfi_id_list_free (gpointer id_list);
void dfi_id_list_add_ids (DfiIdList *id_list,
const guint16 *ids,
diff --git a/src/dfi-text-index.c b/src/dfi-text-index.c
index 19811ff..2f8e902 100644
--- a/src/dfi-text-index.c
+++ b/src/dfi-text-index.c
@@ -25,89 +25,56 @@
#include "dfi-id-list.h"
#include <string.h>
+#include <stdlib.h>
-typedef struct
+struct _DfiTextIndex
{
- /* Our GSequence compare function treats DesktopFileIndexTextIndexItem
- * as a subclass of 'string' for purposes of comparison.
- *
- * The string, therefore, must come first.
- */
- gchar *token;
-
- GArray *id_list;
-} DesktopFileIndexTextIndexItem;
-
-static gint
-dfi_text_index_string_compare (gconstpointer a,
- gconstpointer b,
- gpointer user_data)
-{
- /* As mentioned above: the pointers can equivalently be pointers to a
- * 'DesktopFileIndexTextIndexItem' or to a 'gchar *'.
- */
- const gchar * const *str_a = a;
- const gchar * const *str_b = b;
-
- return strcmp (*str_a, *str_b);
-}
-
-static DesktopFileIndexTextIndexItem *
-dfi_text_index_item_new (const gchar *token)
-{
- DesktopFileIndexTextIndexItem *item;
-
- item = g_slice_new (DesktopFileIndexTextIndexItem);
- item->token = g_strdup (token);
- item->id_list = dfi_id_list_new ();
-
- return item;
-}
+ GHashTable *table;
+ gchar **tokens;
+};
-static void
-dfi_text_index_item_free (gpointer data)
+DfiTextIndex *
+dfi_text_index_new (void)
{
- DesktopFileIndexTextIndexItem *item = data;
-
- dfi_id_list_free (item->id_list);
- g_free (item->token);
+ DfiTextIndex *text_index;
- g_slice_free (DesktopFileIndexTextIndexItem, item);
-}
+ text_index = g_slice_new0 (DfiTextIndex);
+ text_index->table = g_hash_table_new_full (g_str_hash, g_str_equal, g_free, dfi_id_list_free);
-GSequence *
-dfi_text_index_new (void)
-{
- return g_sequence_new (dfi_text_index_item_free);
+ return text_index;
}
void
dfi_text_index_free (gpointer data)
{
- g_sequence_free (data);
+ DfiTextIndex *text_index = data;
+
+ g_hash_table_unref (text_index->table);
+ g_free (text_index->tokens);
+
+ g_slice_free (DfiTextIndex, text_index);
}
void
-dfi_text_index_add_ids (GSequence *text_index,
+dfi_text_index_add_ids (DfiTextIndex *text_index,
const gchar *token,
const guint16 *ids,
gint n_ids)
{
- DesktopFileIndexTextIndexItem *item;
- GSequenceIter *iter;
+ DfiIdList *id_list;
- iter = g_sequence_lookup (text_index, &token, dfi_text_index_string_compare, NULL);
- if (iter)
- {
- item = g_sequence_get (iter);
- }
- else
+ /* Ensure we're not already converted */
+ g_assert (text_index->tokens == NULL);
+
+ id_list = g_hash_table_lookup (text_index->table, token);
+
+ if (id_list == NULL)
{
- item = dfi_text_index_item_new (token);
- g_sequence_insert_sorted (text_index, item, dfi_text_index_string_compare, NULL);
+ id_list = dfi_id_list_new ();
+ g_hash_table_insert (text_index->table, g_strdup (token), id_list);
}
- dfi_id_list_add_ids (item->id_list, ids, n_ids);
+ dfi_id_list_add_ids (id_list, ids, n_ids);
}
static void
@@ -196,7 +163,7 @@ dfi_text_index_split_words (const gchar *value)
}
void
-dfi_text_index_add_ids_tokenised (GSequence *text_index,
+dfi_text_index_add_ids_tokenised (DfiTextIndex *text_index,
const gchar *string_to_tokenise,
const guint16 *ids,
gint n_ids)
@@ -218,36 +185,70 @@ dfi_text_index_add_ids_tokenised (GSequence *text_index,
dfi_text_index_add_ids (text_index, tokens[i], ids, n_ids);
}
-
}
void
-dfi_text_index_get_item (GSequenceIter *iter,
- const gchar **token,
- GArray **id_list)
+dfi_text_index_convert (DfiTextIndex *text_index)
{
- DesktopFileIndexTextIndexItem *item;
+ GHashTableIter iter;
+ gpointer key;
+ guint n, i;
+
+ /* Ensure we're not already converted */
+ g_assert (text_index->tokens == NULL);
- item = g_sequence_get (iter);
+ n = g_hash_table_size (text_index->table);
+ text_index->tokens = g_new (gchar *, n + 1);
+ i = 0;
- *token = item->token;
- *id_list = item->id_list;
+ g_hash_table_iter_init (&iter, text_index->table);
+ while (g_hash_table_iter_next (&iter, &key, NULL))
+ text_index->tokens[i++] = key;
+ g_assert_cmpint (i, ==, n);
+ text_index->tokens[n] = NULL;
+
+ qsort (text_index->tokens, n, sizeof (char *), (GCompareFunc) strcmp);
}
-void
-dfi_text_index_populate_strings (GSequence *text_index,
- DfiStringTable *string_table)
+const gchar * const *
+dfi_text_index_get_tokens (DfiTextIndex *text_index,
+ guint *n_tokens)
{
- GSequenceIter *iter;
+ /* Ensure that we've been converted */
+ g_assert (text_index->tokens);
- iter = g_sequence_get_begin_iter (text_index);
+ if (n_tokens)
+ *n_tokens = g_hash_table_size (text_index->table);
- while (!g_sequence_iter_is_end (iter))
- {
- DesktopFileIndexTextIndexItem *item = g_sequence_get (iter);
+ return (const gchar **) text_index->tokens;
+}
- dfi_string_table_add_string (string_table, item->token);
+DfiIdList *
+dfi_text_index_get_id_list_for_token (DfiTextIndex *text_index,
+ const gchar *token)
+{
+ DfiIdList *id_list;
- iter = g_sequence_iter_next (iter);
- }
+ /* Ensure that we've been converted */
+ g_assert (text_index->tokens);
+
+ id_list = g_hash_table_lookup (text_index->table, token);
+ g_assert (id_list != NULL);
+
+ return id_list;
+}
+
+void
+dfi_text_index_populate_strings (DfiTextIndex *text_index,
+ DfiStringTable *string_table)
+{
+ GHashTableIter iter;
+ gpointer string;
+
+ /* Ensure that we've been converted */
+ g_assert (text_index->tokens);
+
+ g_hash_table_iter_init (&iter, text_index->table);
+ while (g_hash_table_iter_next (&iter, &string, NULL))
+ dfi_string_table_add_string (string_table, string);
}
diff --git a/src/dfi-text-index.h b/src/dfi-text-index.h
index 5e53dd8..1683a3d 100644
--- a/src/dfi-text-index.h
+++ b/src/dfi-text-index.h
@@ -23,8 +23,9 @@
#define __dfi_text_index_h__
#include "dfi-string-table.h"
+#include "dfi-id-list.h"
-typedef GSequence DfiTextIndex;
+typedef struct _DfiTextIndex DfiTextIndex;
DfiTextIndex * dfi_text_index_new (void);
@@ -40,9 +41,13 @@ void dfi_text_index_add_ids_tokenised (DfiText
const guint16 *ids,
gint n_ids);
-void dfi_text_index_get_item (GSequenceIter *iter,
- const gchar **token,
- GArray **id_list);
+void dfi_text_index_convert (DfiTextIndex *text_index);
+
+const gchar * const * dfi_text_index_get_tokens (DfiTextIndex *text_index,
+ guint *n_tokens);
+
+DfiIdList * dfi_text_index_get_id_list_for_token (DfiTextIndex *text_index,
+ const gchar *token);
void dfi_text_index_populate_strings (DfiTextIndex *text_index,
DfiStringTable *string_table);