diff options
author | Owen Taylor <otaylor@redhat.com> | 2001-07-02 00:49:21 +0000 |
---|---|---|
committer | Owen Taylor <otaylor@src.gnome.org> | 2001-07-02 00:49:21 +0000 |
commit | 4f96a13cba0fc1d445c76d30a7cb90b2971de06a (patch) | |
tree | 815b4a733bf96b7da6d4dd217cff264b6be31b60 /tests/unicode-normalize.c | |
parent | b37e7bbb53afd0f8d3386065aff0d74195737fd1 (diff) |
Use G_N_ELEMENTS rather than a custom macro.
Sun Jul 1 20:16:25 2001 Owen Taylor <otaylor@redhat.com>
* glib/guniprop.c (g_unichar_totitle): Use G_N_ELEMENTS
rather than a custom macro.
* glib/gen-unicode-tables.pl: Adapt to changes in table
formats for Unicode 3.1
* glib/gunicode.h glib/guniprop.c glib/gunichartables.h
glib/gen-unicode-tables.pl: Add case conversion functions
g_utf8_casefold, g_utf8_strup, g_utf8_strdown.
* tests/unicode-caseconv.c tests/gen-casefold-txt.pl
tests/gen-casemap-txt.pl tests/casefold.txt
tests/casemap.txt: Test cases for case conversion.
* glib/gunicode.h glib/gunidecomp.[ch] glib/gunicomp.h
glib/gen-unicode-tables.pl: Add function to do Unicode
normalization g_utf8_normalize().
* tests/unicode-normalize.c: Test program for case conversion.
* glib/gunicode.h glib/gunicollate.c: Add collation functions
g_utf8_collate, g_utf8_collate_key.
* test/unicode-collate.c: Test program for collation.
* glib/gdate.c (g_date_fill_parse_tokens): Fix uninitialized
variable.
* glib/gdate.c (g_date_strftime) docs/Changes-2.0.txt:
Make work with UTF-8 even if the locale isn't UTF-8 based.
Still somewhat of broken, if the format string contains
characters not representable in the current locale, will warn
and not work.
* glib/gdate.c: Use UTF-8 normalization and casefolding.
Diffstat (limited to 'tests/unicode-normalize.c')
-rw-r--r-- | tests/unicode-normalize.c | 194 |
1 files changed, 194 insertions, 0 deletions
diff --git a/tests/unicode-normalize.c b/tests/unicode-normalize.c new file mode 100644 index 000000000..1c8318d97 --- /dev/null +++ b/tests/unicode-normalize.c @@ -0,0 +1,194 @@ +#include <glib.h> +#include <stdio.h> +#include <stdlib.h> + +gboolean success = TRUE; + +static char * +decode (const gchar *input) +{ + unsigned ch; + int offset = 0; + GString *result = g_string_new (NULL); + int len; + char buf[6]; + + do + { + if (sscanf (input + offset, "%x", &ch) != 1) + { + fprintf (stderr, "Error parsing character string %s\n", input); + exit (1); + } + + /* FIXME: We don't handle the > BMP or Hangul syllables */ + if (ch > 0xffff || /* > BMP */ + (ch >= 0xac00 && ch <= 0xd7ff)) /* Hangul syllables */ + { + g_string_free (result, TRUE); + return NULL; + } + + len = g_unichar_to_utf8 (ch, buf); + g_string_append_len (result, buf, len); + + while (input[offset] && input[offset] != ' ') + offset++; + while (input[offset] && input[offset] == ' ') + offset++; + } + while (input[offset]); + + return g_string_free (result, FALSE); +} + +const char *names[4] = { + "NFD", + "NFC", + "NFKD", + "NFKC" +}; + +static void +test_form (int line, + GNormalizeMode mode, + gboolean do_compat, + int expected, + char **c, + char **raw) +{ + int i; + + gboolean mode_is_compat = (mode == G_NORMALIZE_NFKC || + mode == G_NORMALIZE_NFKD); + + if (mode_is_compat || !do_compat) + { + for (i = 0; i < 3; i++) + { + char *result = g_utf8_normalize (c[i], mode); + if (strcmp (result, c[expected]) != 0) + { + fprintf (stderr, "\nFailure: %d/%d: %s\n", line, i + 1, raw[5]); + fprintf (stderr, " g_utf8_normalize (%s, %s) != %s\n", + raw[i], names[mode], raw[expected]); + success = FALSE; + } + + g_free (result); + } + } + if (mode_is_compat || do_compat) + { + for (i = 3; i < 5; i++) + { + char *result = g_utf8_normalize (c[i], mode); + if (strcmp (result, c[expected]) != 0) + { + fprintf (stderr, "\nFailure: %d/%d: %s\n", line, i, raw[5]); + fprintf (stderr, " g_utf8_normalize (%s, %s) != %s\n", + raw[i], names[mode], raw[expected]); + success = FALSE; + } + + g_free (result); + } + } +} + +static gboolean +process_one (int line, gchar **columns) +{ + char *c[5]; + int i; + gboolean skip = FALSE; + + for (i=0; i < 5; i++) + { + c[i] = decode(columns[i]); + if (!c[i]) + skip = TRUE; + } + + if (!skip) + { + test_form (line, G_NORMALIZE_NFD, FALSE, 2, c, columns); + test_form (line, G_NORMALIZE_NFD, TRUE, 4, c, columns); + test_form (line, G_NORMALIZE_NFC, FALSE, 1, c, columns); + test_form (line, G_NORMALIZE_NFC, TRUE, 3, c, columns); + test_form (line, G_NORMALIZE_NFKD, TRUE, 4, c, columns); + test_form (line, G_NORMALIZE_NFKC, TRUE, 3, c, columns); + } + + for (i=0; i < 5; i++) + g_free (c[i]); + + return TRUE; +} + +int main (int argc, char **argv) +{ + GIOChannel *in; + GError *error = NULL; + GString *buffer = g_string_new (NULL); + int line_to_do = 0; + int line = 1; + + if (argc != 2 && argc != 3) + { + fprintf (stderr, "Usage: unicode-normalize NormalizationTest.txt LINE\n"); + return 1; + } + + if (argc == 3) + line_to_do = atoi(argv[2]); + + in = g_io_channel_new_file (argv[1], G_IO_FILE_MODE_READ, &error); + if (!in) + { + fprintf (stderr, "Cannot open %s: %s\n", argv[1], error->message); + return 1; + } + + while (TRUE) + { + gsize term_pos; + gchar **columns; + + if (g_io_channel_read_line_string (in, buffer, &term_pos, &error) != G_IO_STATUS_NORMAL) + break; + + if (line_to_do && line != line_to_do) + goto next; + + buffer->str[term_pos] = '\0'; + + if (buffer->str[0] == '#') /* Comment */ + goto next; + if (buffer->str[0] == '@') /* Part */ + { + fprintf (stderr, "\nProcessing %s\n", buffer->str + 1); + goto next; + } + + columns = g_strsplit (buffer->str, ";", -1); + if (!process_one (line, columns)) + return 1; + g_strfreev (columns); + + next: + g_string_truncate (buffer, 0); + line++; + } + + if (error) + { + fprintf (stderr, "Error reading test file, %s\n", error->message); + return 1; + } + + g_io_channel_close (in); + g_string_free (buffer, TRUE); + + return !success; +} |