summaryrefslogtreecommitdiff
path: root/src/dfi-text-index.c
diff options
context:
space:
mode:
Diffstat (limited to 'src/dfi-text-index.c')
-rw-r--r--src/dfi-text-index.c253
1 files changed, 253 insertions, 0 deletions
diff --git a/src/dfi-text-index.c b/src/dfi-text-index.c
new file mode 100644
index 0000000..19811ff
--- /dev/null
+++ b/src/dfi-text-index.c
@@ -0,0 +1,253 @@
+/*
+ * Copyright © 2013 Canonical Limited
+ *
+ * update-desktop-database is free software; you can redistribute it
+ * and/or modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation; either version 2 of the
+ * License, or (at your option) any later version.
+ *
+ * update-desktop-database is distributed in the hope that it will be
+ * useful, but WITHOUT ANY WARRANTY; without even the implied warranty
+ * of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with update-desktop-database; see the file COPYING. If not,
+ * write to the Free Software Foundation, Inc., 59 Temple Place - Suite
+ * 330, Boston, MA 02111-1307, USA.
+ *
+ * Author: Ryan Lortie <desrt@desrt.ca>
+ */
+
+#include "dfi-text-index.h"
+
+#include "dfi-string-table.h"
+#include "dfi-id-list.h"
+
+#include <string.h>
+
+typedef struct
+{
+ /* Our GSequence compare function treats DesktopFileIndexTextIndexItem
+ * as a subclass of 'string' for purposes of comparison.
+ *
+ * The string, therefore, must come first.
+ */
+ gchar *token;
+
+ GArray *id_list;
+} DesktopFileIndexTextIndexItem;
+
+static gint
+dfi_text_index_string_compare (gconstpointer a,
+ gconstpointer b,
+ gpointer user_data)
+{
+ /* As mentioned above: the pointers can equivalently be pointers to a
+ * 'DesktopFileIndexTextIndexItem' or to a 'gchar *'.
+ */
+ const gchar * const *str_a = a;
+ const gchar * const *str_b = b;
+
+ return strcmp (*str_a, *str_b);
+}
+
+static DesktopFileIndexTextIndexItem *
+dfi_text_index_item_new (const gchar *token)
+{
+ DesktopFileIndexTextIndexItem *item;
+
+ item = g_slice_new (DesktopFileIndexTextIndexItem);
+ item->token = g_strdup (token);
+ item->id_list = dfi_id_list_new ();
+
+ return item;
+}
+
+static void
+dfi_text_index_item_free (gpointer data)
+{
+ DesktopFileIndexTextIndexItem *item = data;
+
+ dfi_id_list_free (item->id_list);
+ g_free (item->token);
+
+ g_slice_free (DesktopFileIndexTextIndexItem, item);
+}
+
+GSequence *
+dfi_text_index_new (void)
+{
+ return g_sequence_new (dfi_text_index_item_free);
+}
+
+void
+dfi_text_index_free (gpointer data)
+{
+ g_sequence_free (data);
+}
+
+void
+dfi_text_index_add_ids (GSequence *text_index,
+ const gchar *token,
+ const guint16 *ids,
+ gint n_ids)
+{
+ DesktopFileIndexTextIndexItem *item;
+ GSequenceIter *iter;
+
+ iter = g_sequence_lookup (text_index, &token, dfi_text_index_string_compare, NULL);
+ if (iter)
+ {
+ item = g_sequence_get (iter);
+ }
+ else
+ {
+ item = dfi_text_index_item_new (token);
+ g_sequence_insert_sorted (text_index, item, dfi_text_index_string_compare, NULL);
+ }
+
+ dfi_id_list_add_ids (item->id_list, ids, n_ids);
+}
+
+static void
+dfi_text_index_add_folded (GPtrArray *array,
+ const gchar *start,
+ const gchar *end)
+{
+ gchar *normal;
+
+ normal = g_utf8_normalize (start, end - start, G_NORMALIZE_ALL_COMPOSE);
+
+ /* TODO: Invent time machine. Converse with Mustafa Ataturk... */
+ if (strstr (normal, "ı") || strstr (normal, "İ"))
+ {
+ gchar *s = normal;
+ GString *tmp;
+
+ tmp = g_string_new (NULL);
+
+ while (*s)
+ {
+ gchar *i, *I, *e;
+
+ i = strstr (s, "ı");
+ I = strstr (s, "İ");
+
+ if (!i && !I)
+ break;
+ else if (i && !I)
+ e = i;
+ else if (I && !i)
+ e = I;
+ else if (i < I)
+ e = i;
+ else
+ e = I;
+
+ g_string_append_len (tmp, s, e - s);
+ g_string_append_c (tmp, 'i');
+ s = g_utf8_next_char (e);
+ }
+
+ g_string_append (tmp, s);
+ g_free (normal);
+ normal = g_string_free (tmp, FALSE);
+ }
+
+ g_ptr_array_add (array, g_utf8_casefold (normal, -1));
+ g_free (normal);
+}
+
+static gchar **
+dfi_text_index_split_words (const gchar *value)
+{
+ const gchar *start = NULL;
+ GPtrArray *result;
+ const gchar *s;
+
+ result = g_ptr_array_new ();
+
+ for (s = value; *s; s = g_utf8_next_char (s))
+ {
+ gunichar c = g_utf8_get_char (s);
+
+ if (start == NULL)
+ {
+ if (g_unichar_isalnum (c))
+ start = s;
+ }
+ else
+ {
+ if (!g_unichar_isalnum (c))
+ {
+ dfi_text_index_add_folded (result, start, s);
+ start = NULL;
+ }
+ }
+ }
+
+ if (start)
+ dfi_text_index_add_folded (result, start, s);
+
+ g_ptr_array_add (result, NULL);
+
+ return (gchar **) g_ptr_array_free (result, FALSE);
+}
+
+void
+dfi_text_index_add_ids_tokenised (GSequence *text_index,
+ const gchar *string_to_tokenise,
+ const guint16 *ids,
+ gint n_ids)
+{
+ gchar **tokens;
+ gint i;
+
+ tokens = dfi_text_index_split_words (string_to_tokenise);
+ for (i = 0; tokens[i]; i++)
+ {
+ gint j;
+
+ for (j = 0; j < i; j++)
+ if (g_str_equal (tokens[i], tokens[j]))
+ break;
+
+ if (j < i)
+ continue;
+
+ dfi_text_index_add_ids (text_index, tokens[i], ids, n_ids);
+ }
+
+}
+
+void
+dfi_text_index_get_item (GSequenceIter *iter,
+ const gchar **token,
+ GArray **id_list)
+{
+ DesktopFileIndexTextIndexItem *item;
+
+ item = g_sequence_get (iter);
+
+ *token = item->token;
+ *id_list = item->id_list;
+}
+
+void
+dfi_text_index_populate_strings (GSequence *text_index,
+ DfiStringTable *string_table)
+{
+ GSequenceIter *iter;
+
+ iter = g_sequence_get_begin_iter (text_index);
+
+ while (!g_sequence_iter_is_end (iter))
+ {
+ DesktopFileIndexTextIndexItem *item = g_sequence_get (iter);
+
+ dfi_string_table_add_string (string_table, item->token);
+
+ iter = g_sequence_iter_next (iter);
+ }
+}