summaryrefslogtreecommitdiff
path: root/tests/unicode-normalize.c
diff options
context:
space:
mode:
authorOwen Taylor <otaylor@redhat.com>2001-07-02 00:49:21 +0000
committerOwen Taylor <otaylor@src.gnome.org>2001-07-02 00:49:21 +0000
commit4f96a13cba0fc1d445c76d30a7cb90b2971de06a (patch)
tree815b4a733bf96b7da6d4dd217cff264b6be31b60 /tests/unicode-normalize.c
parentb37e7bbb53afd0f8d3386065aff0d74195737fd1 (diff)
Use G_N_ELEMENTS rather than a custom macro.
Sun Jul 1 20:16:25 2001 Owen Taylor <otaylor@redhat.com> * glib/guniprop.c (g_unichar_totitle): Use G_N_ELEMENTS rather than a custom macro. * glib/gen-unicode-tables.pl: Adapt to changes in table formats for Unicode 3.1 * glib/gunicode.h glib/guniprop.c glib/gunichartables.h glib/gen-unicode-tables.pl: Add case conversion functions g_utf8_casefold, g_utf8_strup, g_utf8_strdown. * tests/unicode-caseconv.c tests/gen-casefold-txt.pl tests/gen-casemap-txt.pl tests/casefold.txt tests/casemap.txt: Test cases for case conversion. * glib/gunicode.h glib/gunidecomp.[ch] glib/gunicomp.h glib/gen-unicode-tables.pl: Add function to do Unicode normalization g_utf8_normalize(). * tests/unicode-normalize.c: Test program for case conversion. * glib/gunicode.h glib/gunicollate.c: Add collation functions g_utf8_collate, g_utf8_collate_key. * test/unicode-collate.c: Test program for collation. * glib/gdate.c (g_date_fill_parse_tokens): Fix uninitialized variable. * glib/gdate.c (g_date_strftime) docs/Changes-2.0.txt: Make work with UTF-8 even if the locale isn't UTF-8 based. Still somewhat of broken, if the format string contains characters not representable in the current locale, will warn and not work. * glib/gdate.c: Use UTF-8 normalization and casefolding.
Diffstat (limited to 'tests/unicode-normalize.c')
-rw-r--r--tests/unicode-normalize.c194
1 files changed, 194 insertions, 0 deletions
diff --git a/tests/unicode-normalize.c b/tests/unicode-normalize.c
new file mode 100644
index 000000000..1c8318d97
--- /dev/null
+++ b/tests/unicode-normalize.c
@@ -0,0 +1,194 @@
+#include <glib.h>
+#include <stdio.h>
+#include <stdlib.h>
+
+gboolean success = TRUE;
+
+static char *
+decode (const gchar *input)
+{
+ unsigned ch;
+ int offset = 0;
+ GString *result = g_string_new (NULL);
+ int len;
+ char buf[6];
+
+ do
+ {
+ if (sscanf (input + offset, "%x", &ch) != 1)
+ {
+ fprintf (stderr, "Error parsing character string %s\n", input);
+ exit (1);
+ }
+
+ /* FIXME: We don't handle the > BMP or Hangul syllables */
+ if (ch > 0xffff || /* > BMP */
+ (ch >= 0xac00 && ch <= 0xd7ff)) /* Hangul syllables */
+ {
+ g_string_free (result, TRUE);
+ return NULL;
+ }
+
+ len = g_unichar_to_utf8 (ch, buf);
+ g_string_append_len (result, buf, len);
+
+ while (input[offset] && input[offset] != ' ')
+ offset++;
+ while (input[offset] && input[offset] == ' ')
+ offset++;
+ }
+ while (input[offset]);
+
+ return g_string_free (result, FALSE);
+}
+
+const char *names[4] = {
+ "NFD",
+ "NFC",
+ "NFKD",
+ "NFKC"
+};
+
+static void
+test_form (int line,
+ GNormalizeMode mode,
+ gboolean do_compat,
+ int expected,
+ char **c,
+ char **raw)
+{
+ int i;
+
+ gboolean mode_is_compat = (mode == G_NORMALIZE_NFKC ||
+ mode == G_NORMALIZE_NFKD);
+
+ if (mode_is_compat || !do_compat)
+ {
+ for (i = 0; i < 3; i++)
+ {
+ char *result = g_utf8_normalize (c[i], mode);
+ if (strcmp (result, c[expected]) != 0)
+ {
+ fprintf (stderr, "\nFailure: %d/%d: %s\n", line, i + 1, raw[5]);
+ fprintf (stderr, " g_utf8_normalize (%s, %s) != %s\n",
+ raw[i], names[mode], raw[expected]);
+ success = FALSE;
+ }
+
+ g_free (result);
+ }
+ }
+ if (mode_is_compat || do_compat)
+ {
+ for (i = 3; i < 5; i++)
+ {
+ char *result = g_utf8_normalize (c[i], mode);
+ if (strcmp (result, c[expected]) != 0)
+ {
+ fprintf (stderr, "\nFailure: %d/%d: %s\n", line, i, raw[5]);
+ fprintf (stderr, " g_utf8_normalize (%s, %s) != %s\n",
+ raw[i], names[mode], raw[expected]);
+ success = FALSE;
+ }
+
+ g_free (result);
+ }
+ }
+}
+
+static gboolean
+process_one (int line, gchar **columns)
+{
+ char *c[5];
+ int i;
+ gboolean skip = FALSE;
+
+ for (i=0; i < 5; i++)
+ {
+ c[i] = decode(columns[i]);
+ if (!c[i])
+ skip = TRUE;
+ }
+
+ if (!skip)
+ {
+ test_form (line, G_NORMALIZE_NFD, FALSE, 2, c, columns);
+ test_form (line, G_NORMALIZE_NFD, TRUE, 4, c, columns);
+ test_form (line, G_NORMALIZE_NFC, FALSE, 1, c, columns);
+ test_form (line, G_NORMALIZE_NFC, TRUE, 3, c, columns);
+ test_form (line, G_NORMALIZE_NFKD, TRUE, 4, c, columns);
+ test_form (line, G_NORMALIZE_NFKC, TRUE, 3, c, columns);
+ }
+
+ for (i=0; i < 5; i++)
+ g_free (c[i]);
+
+ return TRUE;
+}
+
+int main (int argc, char **argv)
+{
+ GIOChannel *in;
+ GError *error = NULL;
+ GString *buffer = g_string_new (NULL);
+ int line_to_do = 0;
+ int line = 1;
+
+ if (argc != 2 && argc != 3)
+ {
+ fprintf (stderr, "Usage: unicode-normalize NormalizationTest.txt LINE\n");
+ return 1;
+ }
+
+ if (argc == 3)
+ line_to_do = atoi(argv[2]);
+
+ in = g_io_channel_new_file (argv[1], G_IO_FILE_MODE_READ, &error);
+ if (!in)
+ {
+ fprintf (stderr, "Cannot open %s: %s\n", argv[1], error->message);
+ return 1;
+ }
+
+ while (TRUE)
+ {
+ gsize term_pos;
+ gchar **columns;
+
+ if (g_io_channel_read_line_string (in, buffer, &term_pos, &error) != G_IO_STATUS_NORMAL)
+ break;
+
+ if (line_to_do && line != line_to_do)
+ goto next;
+
+ buffer->str[term_pos] = '\0';
+
+ if (buffer->str[0] == '#') /* Comment */
+ goto next;
+ if (buffer->str[0] == '@') /* Part */
+ {
+ fprintf (stderr, "\nProcessing %s\n", buffer->str + 1);
+ goto next;
+ }
+
+ columns = g_strsplit (buffer->str, ";", -1);
+ if (!process_one (line, columns))
+ return 1;
+ g_strfreev (columns);
+
+ next:
+ g_string_truncate (buffer, 0);
+ line++;
+ }
+
+ if (error)
+ {
+ fprintf (stderr, "Error reading test file, %s\n", error->message);
+ return 1;
+ }
+
+ g_io_channel_close (in);
+ g_string_free (buffer, TRUE);
+
+ return !success;
+}