diff options
author | behdad <behdad> | 2001-03-26 16:15:52 +0000 |
---|---|---|
committer | behdad <behdad> | 2001-03-26 16:15:52 +0000 |
commit | 0ef1a61fc316161edcd15074dba480d52836258f (patch) | |
tree | 1773d026154cc12d5f143b3b01e18afee028de85 /fribidi_char_sets.c | |
parent | 736d073fe0fce8d677ee8179321e3ae443f52c7c (diff) |
Final work for the next release
Diffstat (limited to 'fribidi_char_sets.c')
-rw-r--r-- | fribidi_char_sets.c | 775 |
1 files changed, 117 insertions, 658 deletions
diff --git a/fribidi_char_sets.c b/fribidi_char_sets.c index 55f095a..86f7c4b 100644 --- a/fribidi_char_sets.c +++ b/fribidi_char_sets.c @@ -1,5 +1,6 @@ /* FriBidi - Library of BiDi algorithm * Copyright (C) 1999 Dov Grobgeld + * Copyright (C) 2001 Behdad Esfahbod * * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Library General Public @@ -16,695 +17,153 @@ * Free Software Foundation, Inc., 59 Temple Place - Suite 330, * Boston, MA 02111-1307, USA. */ -#include "fribidi.h" - -#define ISO_ALEF 224 -#define ISO_TAV 250 - -/* The following are proposed extensions to iso-8859-8. */ -#define ISO_8859_8_LRM 253 -#define ISO_8859_8_RLM 254 -#define ISO_8859_8_LRE 251 -#define ISO_8859_8_RLE 252 -#define ISO_8859_8_PDF 221 -#define ISO_8859_8_LRO 219 -#define ISO_8859_8_RLO 220 - -#define UNI_ALEF 0x05D0 -#define UNI_TAV 0x05EA -#define UNI_LRM 0x200E -#define UNI_RLM 0x200F -#define UNI_LRE 0x202a -#define UNI_RLE 0x202b -#define UNI_PDF 0x202c -#define UNI_LRO 0x202d -#define UNI_RLO 0x202e - -#define CP1255_SHEVA 0xC0 -#define UNI_SHEVA 0x05B0 -#define CP1255_SOF_PASUQ 0xD3 -#define UNI_SOF_PASUQ 0x05C3 -#define CP1255_DOUBLE_VAV 0xD4 -#define UNI_DOUBLE_VAV 0x05F0 -#define CP1255_GERSHAYIM 0xD8 -#define UNI_GERSHAYIM 0x05F4 - -#define ISO_HAMZA 0xc1 -#define ISO_SUKUN 0xf2 -#define UNI_HAMZA 0x0621 -#define UNI_SUKUN 0x0652 - -#define CP1256_DAD 0xD6 -#define UNI_DAD 0x0636 - -FriBidiChar -fribidi_iso8859_8_to_unicode_c(guchar ch) -{ - /* optimization */ - if (ch < ISO_8859_8_LRO) - return ch; - else if (ch >= ISO_ALEF && ch <= ISO_TAV) - return ch-ISO_ALEF+UNI_ALEF; - switch(ch) - { - case ISO_8859_8_RLM: return UNI_RLM; - case ISO_8859_8_LRM: return UNI_LRM; - case ISO_8859_8_RLO: return UNI_RLO; - case ISO_8859_8_LRO: return UNI_LRO; - case ISO_8859_8_RLE: return UNI_RLE; - case ISO_8859_8_LRE: return UNI_LRE; - case ISO_8859_8_PDF: return UNI_PDF; - default: - return '?'; /* This shouldn't happen! */ - } -} - -guchar -fribidi_unicode_to_iso8859_8_c(FriBidiChar uch) -{ - if (uch<128) - return uch; - if (uch >= UNI_ALEF && uch <= UNI_TAV) - return (guchar)(uch-UNI_ALEF+ISO_ALEF); - switch (uch) { - case UNI_RLM: return ISO_8859_8_RLM; - case UNI_LRM: return ISO_8859_8_LRM; - case UNI_RLO: return ISO_8859_8_RLO; - case UNI_LRO: return ISO_8859_8_LRO; - case UNI_RLE: return ISO_8859_8_RLE; - case UNI_LRE: return ISO_8859_8_LRE; - case UNI_PDF: return ISO_8859_8_PDF; +#include "fribidi_char_sets.h" + +typedef struct +{ + /* Convert the character string "s" to unicode string "us" and + return it's length. */ + gint (*charset_to_unicode) (guchar * s, + /* output */ + FriBidiChar * us); + /* Convert the unicode string "us" with length "length" to character + string "s" and return it's length. */ + gint (*unicode_to_charset) (FriBidiChar * us, gint length, + /* output */ + guchar * s); + /* Charset's name. */ + guchar *name; + /* Charset's title. */ + guchar *title; + /* Comments, if any. */ + guchar *(*desc) (); + /* Some charsets like CapRTL may need to change some fribidis tables, by + calling this function, they can do this changes. */ + gboolean (*enter) (void); + /* Some charsets like CapRTL may need to change some fribidis tables, by + calling this function, they can undo their changes, perhaps to enter + another mode. */ + gboolean (*leave) (void); +} +FriBidiCharSetHandler; + +/* Each charset must define the functions and strings named below + (in _FRIBIDI_ADD_CHAR_SET) or define them as NULL, if not any. */ + +#define _FRIBIDI_ADD_CHAR_SET(char_set) \ + { \ + fribidi_##char_set##_to_unicode, \ + fribidi_unicode_to_##char_set, \ + fribidi_char_set_name_##char_set, \ + fribidi_char_set_title_##char_set, \ + fribidi_char_set_desc_##char_set, \ + fribidi_char_set_enter_##char_set, \ + fribidi_char_set_leave_##char_set, \ } - return '¿'; -} - -FriBidiChar -fribidi_iso8859_6_to_unicode_c(guchar ch) -{ - if (ch >= ISO_HAMZA && ch <= ISO_SUKUN) - return ch-ISO_HAMZA+UNI_HAMZA; - else - return ch; -} - -FriBidiChar -fribidi_cp1255_to_unicode_tab[] = { /* 0x80-0xBF */ - 0x20AC, 0x81,0x201A,0x0192,0x201E,0x2026,0x2020,0x2021, - 0x02C6,0x2030, 0x8a,0x2039, 0x8c, 0x8d, 0x8e, 0x8f, - 0x90,0x2018,0x2019,0x201C,0x201D,0x2022,0x2013,0x2014, - 0x02DC,0x2122, 0x9a,0x203A, 0x9c, 0x9d, 0x9e, 0x9f, - 0x00A0,0x00A1,0x00A2,0x00A3,0x20AA,0x00A5,0x00A6,0x00A7, - 0x00A8,0x00A9,0x00D7,0x00AB,0x00AC,0x00AD,0x00AE,0x00AF, - 0x00B0,0x00B1,0x00B2,0x00B3,0x00B4,0x00B5,0x00B6,0x00B7, - 0x00B8,0x00B9,0x00F7,0x00BB,0x00BC,0x00BD,0x00BE,0x00BF -}; - -FriBidiChar -fribidi_cp1255_to_unicode_c(guchar ch) -{ - if (ch >= ISO_ALEF && ch <= ISO_TAV) - return ch-ISO_ALEF+UNI_ALEF; - else if (ch >= CP1255_SHEVA && ch <= CP1255_SOF_PASUQ) - return ch-CP1255_SHEVA+UNI_SHEVA; - else if (ch >= CP1255_DOUBLE_VAV && ch <= CP1255_GERSHAYIM) - return ch-CP1255_DOUBLE_VAV+UNI_DOUBLE_VAV; - /* cp1256 specific chars */ - else if (ch >= 0x80 && ch <= 0xbf) - return fribidi_cp1255_to_unicode_tab[ch-0x80]; - else - return ch; -} -FriBidiChar -fribidi_cp1256_to_unicode_tab[] = { /* 0x80-0xFF */ - 0x20AC,0x067E,0x201A,0x0192,0x201E,0x2026,0x2020,0x2021, - 0x02C6,0x2030,0x0679,0x2039,0x0152,0x0686,0x0698,0x0688, - 0x06AF,0x2018,0x2019,0x201C,0x201D,0x2022,0x2013,0x2014, - 0x06A9,0x2122,0x0691,0x203A,0x0153,0x200C,0x200D,0x06BA, - 0x00A0,0x060C,0x00A2,0x00A3,0x00A4,0x00A5,0x00A6,0x00A7, - 0x00A8,0x00A9,0x06BE,0x00AB,0x00AC,0x00AD,0x00AE,0x00AF, - 0x00B0,0x00B1,0x00B2,0x00B3,0x00B4,0x00B5,0x00B6,0x00B7, - 0x00B8,0x00B9,0x061B,0x00BB,0x00BC,0x00BD,0x00BE,0x061F, - 0x06C1,0x0621,0x0622,0x0623,0x0624,0x0625,0x0626,0x0627, - 0x0628,0x0629,0x062A,0x062B,0x062C,0x062D,0x062E,0x062F, - 0x0630,0x0631,0x0632,0x0633,0x0634,0x0635,0x0636,0x00D7, - 0x0637,0x0638,0x0639,0x063A,0x0640,0x0641,0x0642,0x0643, - 0x00E0,0x0644,0x00E2,0x0645,0x0646,0x0647,0x0648,0x00E7, - 0x00E8,0x00E9,0x00EA,0x00EB,0x0649,0x064A,0x00EE,0x00EF, - 0x064B,0x064C,0x064D,0x064E,0x00F4,0x064F,0x0650,0x00F7, - 0x0651,0x00F9,0x0652,0x00FB,0x00FC,0x200E,0x200F,0x00ff +FriBidiCharSetHandler fribidi_char_sets[FRIBIDI_CHAR_SETS_NUM + 1] = { + {NULL, NULL, "Not Implemented", NULL, NULL, NULL}, + _FRIBIDI_ADD_CHAR_SET (utf8), + _FRIBIDI_ADD_CHAR_SET (cap_rtl), + _FRIBIDI_ADD_CHAR_SET (iso8859_6), + _FRIBIDI_ADD_CHAR_SET (iso8859_8), + _FRIBIDI_ADD_CHAR_SET (cp1255), + _FRIBIDI_ADD_CHAR_SET (cp1256), + _FRIBIDI_ADD_CHAR_SET (isiri_3342), }; -FriBidiChar -fribidi_cp1256_to_unicode_c(guchar ch) -{ - if (ch >= 0x80 && ch <= 0xff) - return fribidi_cp1256_to_unicode_tab[ch-0x80]; - else - return ch; -} - -FriBidiChar -fribidi_isiri_3342_to_unicode_tab[] = { /* 0xA0-0xFF */ - 0x0020,0x200C,0x200D,0x0021,0x00A4,0x066A,0x002E,0x066C, - 0x0029,0x0028,0x00D7,0x002B,0x060C,0x002D,0x066B,0x002F, - 0x06F0,0x06F1,0x06F2,0x06F3,0x06F4,0x06F5,0x06F6,0x06F7, - 0x06F8,0x06F9,0x003A,0x061B,0x003C,0x003D,0x003E,0x061F, - 0x0622,0x0627,0x0621,0x0628,0x067E,0x062A,0x062B,0x062C, - 0x0686,0x062D,0x062E,0x062F,0x0630,0x0631,0x0632,0x0698, - 0x0633,0x0634,0x0635,0x0636,0x0637,0x0638,0x0639,0x063A, - 0x0641,0x0642,0x06A9,0x06AF,0x0644,0x0645,0x0646,0x0648, - 0x0647,0x06CC,0x005D,0x005B,0x007D,0x007B,0x00AB,0x00BB, - /* 0xec,0xed,0xee,0xef not defined */ - 0x002A,0x0640,0x007C,0x005C,0xec ,0xed ,0xee ,0xef , - 0x064E,0x0650,0x064F,0x064B,0x064D,0x064C,0x0651,0x0652, - 0x0623,0x0624,0x0625,0x0626,0x0629,0x0643,0x064A,0x007F -}; - -FriBidiChar -fribidi_isiri_3342_to_unicode_c(guchar ch) -{ - if ((ch >= 0x80 && ch <= 0xa0) || ch == 0xff) - return ch-0x80; /* FIXME: but they are strong typed RTL ! */ - else if (ch >= 0xa1 && ch <= 0xfe) - return fribidi_isiri_3342_to_unicode_tab[ch-0xa0]; - /* TODO */ - else - return ch; -} - -void -fribidi_iso8859_6_to_unicode(guchar *s, - FriBidiChar *us) -{ - int i; - int len = strlen(s); - - for (i=0; i<len+1; i++) - us[i] = fribidi_iso8859_6_to_unicode_c(s[i]); -} - -void -fribidi_iso8859_8_to_unicode(guchar *s, - FriBidiChar *us) -{ - int i; - int len = strlen(s); - - for (i=0; i<len+1; i++) - us[i] = fribidi_iso8859_8_to_unicode_c(s[i]); -} - -void -fribidi_cp1255_to_unicode(guchar *s, - FriBidiChar *us) -{ - int i; - int len = strlen(s); - - for (i=0; i<len+1; i++) - us[i] = fribidi_cp1255_to_unicode_c(s[i]); -} - -void -fribidi_cp1256_to_unicode(guchar *s, - FriBidiChar *us) -{ - int i; - int len = strlen(s); - - for (i=0; i<len+1; i++) - us[i] = fribidi_cp1256_to_unicode_c(s[i]); -} - -void -fribidi_isiri_3342_to_unicode(guchar *s, - FriBidiChar *us) -{ - int i; - int len = strlen(s); - - for (i=0; i<len+1; i++) - us[i] = fribidi_isiri_3342_to_unicode_c(s[i]); -} - -guchar -fribidi_unicode_to_iso8859_6_c(FriBidiChar uch) -{ - if (uch >= UNI_HAMZA && uch <= UNI_SUKUN) - return (guchar)(uch-UNI_HAMZA+ISO_HAMZA); - /* TODO: handle pre-composed and presentation chars */ - else if (uch < 256) - return (guchar)uch; - else if (uch == 0x060c) - return (guchar)0xac; - else if (uch == 0x061b) - return (guchar)0xbb; - else if (uch == 0x061f) - return (guchar)0xbf; - else - return '¿'; -} - +#undef _FRIBIDI_ADD_CHAR_SET -guchar -fribidi_unicode_to_cp1255_c(FriBidiChar uch) +/* Return the charset which name is "s". */ +FriBidiCharSet +fribidi_parse_charset (guchar * s) { - if (uch >= UNI_ALEF && uch <= UNI_TAV) - return (guchar)(uch-UNI_ALEF+ISO_ALEF); - if (uch >= UNI_SHEVA && uch <= UNI_SOF_PASUQ) - return (guchar)(uch-UNI_SHEVA+CP1255_SHEVA); - if (uch >= UNI_DOUBLE_VAV && uch <= UNI_GERSHAYIM) - return (guchar)(uch-UNI_DOUBLE_VAV+CP1255_DOUBLE_VAV); - /* TODO: handle pre-composed and presentation chars */ - else if (uch < 256) - return (guchar)uch; - else - return '¿'; -} + gint i; -guchar -fribidi_unicode_to_cp1256_c(FriBidiChar uch) -{ - if (uch < 256) - return (guchar)uch; - if (uch >= UNI_HAMZA && uch <= UNI_DAD) - return (guchar)(uch-UNI_HAMZA+ISO_HAMZA); - else switch (uch) { - case 0x0152: return 0x8c; - case 0x0153: return 0x9c; - case 0x0192: return 0x83; - case 0x02C6: return 0x88; - case 0x060C: return 0xA1; - case 0x061B: return 0xBA; - case 0x061F: return 0xBF; - case 0x0637: return 0xD8; - case 0x0638: return 0xD9; - case 0x0639: return 0xDA; - case 0x063A: return 0xDB; - case 0x0640: return 0xDC; - case 0x0641: return 0xDD; - case 0x0642: return 0xDE; - case 0x0643: return 0xDF; - case 0x0644: return 0xE1; - case 0x0645: return 0xE3; - case 0x0646: return 0xE4; - case 0x0647: return 0xE5; - case 0x0648: return 0xE6; - case 0x0649: return 0xEC; - case 0x064A: return 0xED; - case 0x064B: return 0xF0; - case 0x064C: return 0xF1; - case 0x064D: return 0xF2; - case 0x064E: return 0xF3; - case 0x064F: return 0xF5; - case 0x0650: return 0xF6; - case 0x0651: return 0xF8; - case 0x0652: return 0xFA; - case 0x0679: return 0x8A; - case 0x067E: return 0x81; - case 0x0686: return 0x8D; - case 0x0688: return 0x8F; - case 0x0691: return 0x9A; - case 0x0698: return 0x8E; - case 0x06A9: return 0x98; - case 0x06AF: return 0x90; - case 0x06BA: return 0x9F; - case 0x06BE: return 0xAA; - case 0x06C1: return 0xC0; - case 0x200C: return 0x9D; - case 0x200D: return 0x9E; - case 0x200E: return 0xFD; - case 0x200F: return 0xFE; - case 0x2013: return 0x96; - case 0x2014: return 0x97; - case 0x2018: return 0x91; - case 0x2019: return 0x92; - case 0x201A: return 0x82; - case 0x201C: return 0x93; - case 0x201D: return 0x94; - case 0x201E: return 0x84; - case 0x2020: return 0x86; - case 0x2021: return 0x87; - case 0x2022: return 0x95; - case 0x2026: return 0x85; - case 0x2030: return 0x89; - case 0x2039: return 0x8B; - case 0x203A: return 0x9B; - case 0x20AC: return 0x80; - case 0x2122: return 0x99; + for (i = FRIBIDI_CHAR_SETS_NUM; i; i--) + /* Function strcasecmp() is used here, cab be replaced with strcmp(), + if strcasecmp() is not available. */ + if (strcasecmp (s, fribidi_char_sets[i].name) == 0) + return i; - default: return '¿'; - } + return FRIBIDI_CHARSET_NOT_FOUND; } -guchar -fribidi_unicode_to_isiri_3342_c(FriBidiChar uch) -{ - /* TODO */ - if (uch < 256) - return (guchar)uch; - else switch (uch) { - case 0x060C: return 0xAC; - case 0x061B: return 0xBB; - case 0x061F: return 0xBF; - case 0x0621: return 0xC2; - case 0x0622: return 0xC0; - case 0x0623: return 0xF8; - case 0x0624: return 0xF9; - case 0x0625: return 0xFA; - case 0x0626: return 0xFB; - case 0x0627: return 0xC1; - case 0x0628: return 0xC3; - case 0x0629: return 0xFC; - case 0x062A: return 0xC5; - case 0x062B: return 0xC6; - case 0x062C: return 0xC7; - case 0x062D: return 0xC9; - case 0x062E: return 0xCA; - case 0x062F: return 0xCB; - case 0x0630: return 0xCC; - case 0x0631: return 0xCD; - case 0x0632: return 0xCE; - case 0x0633: return 0xD0; - case 0x0634: return 0xD1; - case 0x0635: return 0xD2; - case 0x0636: return 0xD3; - case 0x0637: return 0xD4; - case 0x0638: return 0xD5; - case 0x0639: return 0xD6; - case 0x063A: return 0xD7; - case 0x0640: return 0xE9; - case 0x0641: return 0xD8; - case 0x0642: return 0xD9; - case 0x0643: return 0xFD; - case 0x0644: return 0xDC; - case 0x0645: return 0xDD; - case 0x0646: return 0xDE; - case 0x0647: return 0xE0; - case 0x0648: return 0xDF; - case 0x064A: return 0xFE; - case 0x064B: return 0xF3; - case 0x064C: return 0xF5; - case 0x064D: return 0xF4; - case 0x064E: return 0xF0; - case 0x064F: return 0xF2; - case 0x0650: return 0xF1; - case 0x0651: return 0xF6; - case 0x0652: return 0xF7; - case 0x066A: return 0xA5; - case 0x066B: return 0xAE; - case 0x066C: return 0xA7; - case 0x067E: return 0xC4; - case 0x0686: return 0xC8; - case 0x0698: return 0xCF; - case 0x06A9: return 0xDA; - case 0x06AF: return 0xDB; - case 0x06CC: return 0xE1; - case 0x06F0: return 0xB0; - case 0x06F1: return 0xB1; - case 0x06F2: return 0xB2; - case 0x06F3: return 0xB3; - case 0x06F4: return 0xB4; - case 0x06F5: return 0xB5; - case 0x06F6: return 0xB6; - case 0x06F7: return 0xB7; - case 0x06F8: return 0xB8; - case 0x06F9: return 0xB9; - case 0x200C: return 0xA1; - case 0x200D: return 0xA2; - default: return '¿'; - } -} -void -fribidi_unicode_to_iso8859_6(FriBidiChar *us, - int length, - guchar *s) - +/* Convert the character string "s" in charset "char_set" to unicode + string "us" and return it's length. */ +gint +fribidi_charset_to_unicode (FriBidiCharSet char_set, guchar * s, + /* output */ + FriBidiChar * us) { - int i; - - for (i=0; i< length; i++) - s[i] = fribidi_unicode_to_iso8859_6_c(us[i]); - s[i] = 0; + fribidi_char_set_enter (char_set); + return fribidi_char_sets[char_set].charset_to_unicode == NULL ? 0 : + (*fribidi_char_sets[char_set].charset_to_unicode) (s, us); } -void -fribidi_unicode_to_iso8859_8(FriBidiChar *us, - int length, - guchar *s) - +/* Convert the unicode string "us" with length "length" to character + string "s" in charset "char_set" and return it's length. */ +gint +fribidi_unicode_to_charset (FriBidiCharSet char_set, FriBidiChar * us, + gint length, + /* output */ + gchar * s) { - int i; - - for (i=0; i< length; i++) - s[i] = fribidi_unicode_to_iso8859_8_c(us[i]); - s[i] = 0; + fribidi_char_set_enter (char_set); + return fribidi_char_sets[char_set].unicode_to_charset == NULL ? 0 : + (*fribidi_char_sets[char_set].unicode_to_charset) (us, length, s); } -void -fribidi_unicode_to_cp1255(FriBidiChar *us, - int length, - guchar *s) - +/* Return the string containing the name of the charset. */ +guchar * +fribidi_char_set_name (FriBidiCharSet char_set) { - int i; - - for (i=0; i< length; i++) - s[i] = fribidi_unicode_to_cp1255_c(us[i]); - s[i] = 0; + return fribidi_char_sets[char_set].name == NULL ? (guchar *) "" : + fribidi_char_sets[char_set].name; } -void -fribidi_unicode_to_cp1256(FriBidiChar *us, - int length, - guchar *s) - +/* Return the string containing the title of the charset. */ +guchar * +fribidi_char_set_title (FriBidiCharSet char_set) { - int i; - - for (i=0; i< length; i++) - s[i] = fribidi_unicode_to_cp1256_c(us[i]); - s[i] = 0; + return fribidi_char_sets[char_set].title == NULL ? + fribidi_char_set_name (char_set) : fribidi_char_sets[char_set].title; } -void -fribidi_unicode_to_isiri_3342(FriBidiChar *us, - int length, - guchar *s) - +/* Return the string containing the comments about the charset, if any. */ +guchar * +fribidi_char_set_desc (FriBidiCharSet char_set) { - int i; - - for (i=0; i< length; i++) - s[i] = fribidi_unicode_to_isiri_3342_c(us[i]); - s[i] = 0; + return fribidi_char_sets[char_set].desc == NULL ? + NULL : fribidi_char_sets[char_set].desc (); } -/* the following added by Raphael Finkel <raphael@cs.uky.edu> 12/1999 */ - -void -fribidi_unicode_to_utf8(FriBidiChar *us, - int length, - guchar *s) -/* warning: the length of output string may exceed the length of the input */ +static FriBidiCharSet current_char_set = FRIBIDI_CHARSET_DEFAULT; +/* Some charsets like CapRTL may need to change some fribidis tables, by + calling this function, they can do this changes. */ +gboolean +fribidi_char_set_enter (FriBidiCharSet char_set) { - int i; - - for (i=0; i< length; i++) + if (char_set != current_char_set && fribidi_char_sets[char_set].enter) { - FriBidiChar mychar = us[i]; - if (mychar <= 0x7F) { /* 7 sig bits; plain 7-bit ascii */ - *s++ = mychar; - } else if (mychar <= 0x7FF) /* 11 sig bits; Hebrew is in this range */ - { - *s++ = 0300 | (guint8) ((mychar >> 6)&037); - *s++ = 0200 | (guint8) (mychar & 077); /* lower 6 bits */ - } else if (mychar <= 0xFFFF) { /* 16 sig bits */ - *s++ = 0340 | (guint8) ((mychar >> 12)&017), /* upper 4 bits */ - *s++ = 0200 | (guint8) ((mychar >> 6)&077), /* next 6 bits */ - *s++ = 0200 | (guint8) (mychar & 077); /* lowest 6 bits */ - } - } - *s = 0; -} - - -int /* we return the length */ -fribidi_utf8_to_unicode(guchar *s, - FriBidiChar *us) -/* warning: the length of input string may exceed the length of the output */ -{ - int length; - - length = 0; - while (*s) { - if (*s <= 0177) /* one byte */ - { - *us++ = *s++; /* expand with 0s */ - } - else if (*s < 0340) /* 2 chars, such as Hebrew */ - { - *us++ = ((*s & 037) << 6) + (*(s+1) & 077); - s += 2; - } - else /* 3 chars */ - { - *us++ = ((*s & 017) << 12) + ((*(s+1) & 077) << 6) + - (*(s+2) & 077); - s += 3; - } - length += 1; - } - *us = 0; - return(length); -} - -/* the following was added by Omer Zak <omerz@actcom.co.il> Sept 2000. -** The following functions do the same thing, but have better-defined -** interfaces. -*/ - -gboolean /* Returns TRUE if the outputs are valid, even if the entire - ** Unicode string was not converted. - */ -fribidi_unicode_to_utf8_p(FriBidiChar *in_unicode_str, /* Unicode string */ - guint in_unicode_length, /* Unicode string length in - ** Unicode characters - */ - guchar *utf8_buffer, /* Buffer for UTF8 translation */ - guint utf8_buffer_length, /* Length of UTF8 buffer */ - /* Outputs */ - guint *out_uni_consumed_length_p, - /* Actual number of Unicode - ** characters translated - */ - guint *out_actual_utf8_buffer_length_p) - /* Actual number of bytes - ** used in the UTF8 buffer. - */ -{ - guint l_utf8_buffer_fence; /* Fence - 6 octets before end */ - guint index; - guint index_utf8; - - if (NULL == in_unicode_str) return(FALSE); - if (NULL == utf8_buffer) return(FALSE); - if (NULL == out_uni_consumed_length_p) return(FALSE); - if (NULL == out_actual_utf8_buffer_length_p) return(FALSE); - if (utf8_buffer_length < 6) return(FALSE); - - /* If we are here, all outputs must have some valid values. */ - *out_uni_consumed_length_p = 0; - *out_actual_utf8_buffer_length_p = 0; - - l_utf8_buffer_fence = utf8_buffer_length-6; - - for (index = 0, index_utf8 = 0; index < in_unicode_length; index++) { - FriBidiChar ucs4_char; - - if (index_utf8 > l_utf8_buffer_fence) { - break; /* No more guaranteed room in UTF8 buffer. */ - } - - ucs4_char = in_unicode_str[index]; - -#define PUT_UTF8_BYTE(b) utf8_buffer[index_utf8] = (b); index_utf8++ - - if (ucs4_char < 0x00000080) { - PUT_UTF8_BYTE(ucs4_char & 0x0000007F); - } else if (ucs4_char < 0x00000800) { - PUT_UTF8_BYTE(((ucs4_char >> 6) & 0x0000001F) | 0x000000C0); - PUT_UTF8_BYTE((ucs4_char & 0x0000003F) | 0x00000080); - } else if (ucs4_char < 0x00010000) { - PUT_UTF8_BYTE(((ucs4_char >> 12) & 0x0000000F) | 0x000000E0); - PUT_UTF8_BYTE(((ucs4_char >> 6) & 0x0000003F) | 0x00000080); - PUT_UTF8_BYTE((ucs4_char & 0x0000003F) | 0x00000080); - } else if (ucs4_char < 0x00200000) { - PUT_UTF8_BYTE(((ucs4_char >> 18) & 0x00000007) | 0x000000F0); - PUT_UTF8_BYTE(((ucs4_char >> 12) & 0x0000003F) | 0x00000080); - PUT_UTF8_BYTE(((ucs4_char >> 6) & 0x0000003F) | 0x00000080); - PUT_UTF8_BYTE((ucs4_char & 0x0000003F) | 0x00000080); - } else if (ucs4_char < 0x04000000) { - PUT_UTF8_BYTE(((ucs4_char >> 24) & 0x00000003) | 0x000000F8); - PUT_UTF8_BYTE(((ucs4_char >> 18) & 0x0000003F) | 0x00000080); - PUT_UTF8_BYTE(((ucs4_char >> 12) & 0x0000003F) | 0x00000080); - PUT_UTF8_BYTE(((ucs4_char >> 6) & 0x0000003F) | 0x00000080); - PUT_UTF8_BYTE((ucs4_char & 0x0000003F) | 0x00000080); - } else /* if (ucs4_char < 0x80000000) */ { - PUT_UTF8_BYTE(((ucs4_char >> 30) & 0x00000001) | 0x000000FC); - PUT_UTF8_BYTE(((ucs4_char >> 24) & 0x0000003F) | 0x00000080); - PUT_UTF8_BYTE(((ucs4_char >> 18) & 0x0000003F) | 0x00000080); - PUT_UTF8_BYTE(((ucs4_char >> 12) & 0x0000003F) | 0x00000080); - PUT_UTF8_BYTE(((ucs4_char >> 6) & 0x0000003F) | 0x00000080); - PUT_UTF8_BYTE((ucs4_char & 0x0000003F) | 0x00000080); + fribidi_char_set_leave (current_char_set); + current_char_set = char_set; + return (*fribidi_char_sets[char_set].enter) (); } - -#undef PUT_UTF8_BYTE - - } - - *out_uni_consumed_length_p = index; - *out_actual_utf8_buffer_length_p = index_utf8; - - return(TRUE); + else + return TRUE; } -gboolean /* Returns TRUE if the entire UTF8 string was converted without errors. */ -fribidi_utf8_to_unicode_p(guchar *in_utf8_str, /* UTF8 string */ - guint in_utf8_length, /* Length of UTF8 string in octets */ - FriBidiChar *unicode_buffer, /* Buffer for Unicode translation */ - guint unicode_buffer_length, /* Length of Unicode buffer in - ** Unicode characters - */ - /* Outputs */ - guint *out_utf8_consumed_length_p, - /* Actual number of UTF8 - ** octets translated - */ - guint *out_actual_unicode_buffer_length_p) - /* Actual number of Unicode - ** characters used in the - ** Unicode buffer. - */ +/* Some charsets like CapRTL may need to change some fribidis tables, by + calling this function, they can undo their changes, maybe to enter + another mode. */ +gboolean +fribidi_char_set_leave (FriBidiCharSet char_set) { - /* !!! To be improved using code from libutf8-0.7.3/src/mbstowcs/mbsrtowcs.c - ** !!! (the library is LGPLed :-) ) - */ - - guint index; - guint index_uni; - - if (NULL == in_utf8_str) return(FALSE); - if (NULL == unicode_buffer) return(FALSE); - if (NULL == out_utf8_consumed_length_p) return(FALSE); - if (NULL == out_actual_unicode_buffer_length_p) return(FALSE); - - /* If we are here, all outputs must have some valid values. */ - *out_utf8_consumed_length_p = 0; - *out_actual_unicode_buffer_length_p = 0; - - for (index = 0, index_uni = 0; (index < in_utf8_length) && (index_uni < unicode_buffer_length);) { - /* NOTE: there is no protection against UTF8 sequences which overflow the in_utf8_str. */ - if (in_utf8_str[index] <= 0177) /* one byte */ { - unicode_buffer[index_uni++] = in_utf8_str[index++]; /* expand with 0s */ - } - else if (in_utf8_str[index] < 0340) /* 2 chars, such as Hebrew */ { - unicode_buffer[index_uni++] = ((in_utf8_str[index] & 037) << 6) - + (in_utf8_str[index+1] & 077); - index += 2; - } - else /* 3 chars */ { - unicode_buffer[index_uni++] = ((in_utf8_str[index] & 017) << 12) - + ((in_utf8_str[index+1] & 077) << 6) - + (in_utf8_str[index+2] & 077); - index += 3; - } - } - *out_utf8_consumed_length_p = index; - *out_actual_unicode_buffer_length_p = index_uni; - - return(TRUE); + if (char_set == current_char_set && fribidi_char_sets[char_set].leave) + return (*fribidi_char_sets[char_set].leave) (); + else + return TRUE; } |