diff options
Diffstat (limited to 'xc/lib/X11/lcCT.c')
-rw-r--r-- | xc/lib/X11/lcCT.c | 1042 |
1 files changed, 533 insertions, 509 deletions
diff --git a/xc/lib/X11/lcCT.c b/xc/lib/X11/lcCT.c index 33df0df8b..0ce9ba588 100644 --- a/xc/lib/X11/lcCT.c +++ b/xc/lib/X11/lcCT.c @@ -31,80 +31,118 @@ * Modifier: Takanori Tateno FUJITSU LIMITED * */ -/* $XFree86: xc/lib/X11/lcCT.c,v 3.13 1999/06/06 14:05:53 dawes Exp $ */ +/* + * 2000 + * Modifier: Ivan Pascal The XFree86 Project + * Modifier: Bruno Haible The XFree86 Project + */ +/* $XFree86: xc/lib/X11/lcCT.c,v 3.15 2000/02/12 02:54:08 dawes Exp $ */ #include "Xlibint.h" #include "XlcPubI.h" #include <X11/Xos.h> #include <stdio.h> -typedef struct _StateRec { - XlcCharSet charset; - XlcCharSet GL_charset; - XlcCharSet GR_charset; - XlcCharSet ext_seg_charset; - int ext_seg_left; -} StateRec, *State; +/* ====================== Built-in Character Sets ====================== */ + +/* + * Static representation of a character set that can be used in Compound Text. + */ typedef struct _CTDataRec { - char *name; - char *encoding; /* Compound Text encoding */ + _Xconst char *name; + _Xconst char *encoding; /* Compound Text encoding, ESC sequence */ } CTDataRec, *CTData; -typedef struct _CTInfoRec { - XlcCharSet charset; - int encoding_len; - char *encoding; /* Compound Text encoding */ - int ext_segment_len; - char *ext_segment; /* extended segment */ - struct _CTInfoRec *next; -} CTInfoRec, *CTInfo; - static CTDataRec default_ct_data[] = { - { "ISO8859-1:GL", "\033(B" }, - { "ISO8859-1:GR", "\033-A" }, - { "ISO8859-2:GR", "\033-B" }, - { "ISO8859-3:GR", "\033-C" }, - { "ISO8859-4:GR", "\033-D" }, - { "ISO8859-7:GR", "\033-F" }, - { "ISO8859-6:GR", "\033-G" }, - { "ISO8859-8:GR", "\033-H" }, - { "ISO8859-5:GR", "\033-L" }, - { "ISO8859-9:GR", "\033-M" }, - { "ISO8859-10:GR", "\033-V" }, - { "JISX0201.1976-0:GL", "\033(J" }, - { "JISX0201.1976-0:GR", "\033)I" }, - - { "GB2312.1980-0:GL", "\033$(A" }, - { "GB2312.1980-0:GR", "\033$)A" }, - { "JISX0208.1983-0:GL", "\033$(B" }, - { "JISX0208.1983-0:GR", "\033$)B" }, - { "KSC5601.1987-0:GL", "\033$(C" }, - { "KSC5601.1987-0:GR", "\033$)C" }, -#ifdef notdef - { "JISX0212.1990-0:GL", "\033$(D" }, - { "JISX0212.1990-0:GR", "\033$)D" }, - { "CNS11643.1986-1:GL", "\033$(G" }, - { "CNS11643.1986-1:GR", "\033$)G" }, - { "CNS11643.1986-2:GL", "\033$(H" }, - { "CNS11643.1986-2:GR", "\033$)H" }, -#endif - { "TIS620.2533-1:GR", "\033-T"}, - { "ISO10646-1", "\033%B"}, - /* Non-Standard Character Set Encodings */ - { "KOI8-R:GR", "\033%/1\200\210koi8-r\002"}, - { "KOI8-U:GR", "\033%/1\200\211koi8-u\002"}, - { "ISO8859-15:GR", "\033%/1\200\213iso8859-15\002"}, - { "ARMSCII-8:GR", "\033%/1\200\210armscii-8\002"}, - { "ISO8859-14:GR", "\033%/1\200\213iso8859-14\002"}, - { "IBM-CP1133:GR", "\033%/1\200\210ibm-cp1133\002"}, - { "MULELAO-1:GR", "\033%/1\200\210mulelao-1\002"}, - { "VISCII1.1-1:GR", "\033%/1\200\210viscii1.1-1\002"}, - { "TCVN-5712:GR", "\033%/1\200\210tcvn-5712\002"}, + /* */ + /* X11 registry name MIME name ISO-IR ESC sequence */ + /* */ + + /* Registered character sets with one byte per character */ + { "ISO8859-1:GL", /* US-ASCII 6 */ "\033(B" }, + { "ISO8859-1:GR", /* ISO-8859-1 100 */ "\033-A" }, + { "ISO8859-2:GR", /* ISO-8859-2 101 */ "\033-B" }, + { "ISO8859-3:GR", /* ISO-8859-3 109 */ "\033-C" }, + { "ISO8859-4:GR", /* ISO-8859-4 110 */ "\033-D" }, + { "ISO8859-5:GR", /* ISO-8859-5 144 */ "\033-L" }, + { "ISO8859-6:GR", /* ISO-8859-6 127 */ "\033-G" }, + { "ISO8859-7:GR", /* ISO-8859-7 126 */ "\033-F" }, + { "ISO8859-8:GR", /* ISO-8859-8 138 */ "\033-H" }, + { "ISO8859-9:GR", /* ISO-8859-9 148 */ "\033-M" }, + { "ISO8859-10:GR", /* ISO-8859-10 157 */ "\033-V" }, + { "ISO8859-13:GR", /* ISO-8859-13 179 */ "\033-Y" }, + { "ISO8859-14:GR", /* ISO-8859-14 199 */ "\033-_" }, + { "ISO8859-15:GR", /* ISO-8859-15 203 */ "\033-b" }, + { "ISO8859-16:GR", /* ISO-8859-16 226 */ "\033-f" }, + { "JISX0201.1976-0:GL", /* ISO-646-JP 14 */ "\033(J" }, + { "JISX0201.1976-0:GR", "\033)I" }, + { "TIS620.2533-1:GR", /* TIS-620 166 */ "\033-T" }, + + /* Registered character sets with two byte per character */ + { "GB2312.1980-0:GL", /* GB_2312-80 58 */ "\033$(A" }, + { "GB2312.1980-0:GR", /* GB_2312-80 58 */ "\033$)A" }, + { "JISX0208.1983-0:GL", /* JIS_X0208-1983 87 */ "\033$(B" }, + { "JISX0208.1983-0:GR", /* JIS_X0208-1983 87 */ "\033$)B" }, + { "JISX0208.1990-0:GL", /* JIS_X0208-1990 168 */ "\033$(B" }, + { "JISX0208.1990-0:GR", /* JIS_X0208-1990 168 */ "\033$)B" }, + { "JISX0212.1990-0:GL", /* JIS_X0212-1990 159 */ "\033$(D" }, + { "JISX0212.1990-0:GR", /* JIS_X0212-1990 159 */ "\033$)D" }, + { "KSC5601.1987-0:GL", /* KS_C_5601-1987 149 */ "\033$(C" }, + { "KSC5601.1987-0:GR", /* KS_C_5601-1987 149 */ "\033$)C" }, + { "CNS11643.1986-1:GL", /* CNS 11643-1992 pl.1 171 */ "\033$(G" }, + { "CNS11643.1986-1:GR", /* CNS 11643-1992 pl.1 171 */ "\033$)G" }, + { "CNS11643.1986-2:GL", /* CNS 11643-1992 pl.2 172 */ "\033$(H" }, + { "CNS11643.1986-2:GR", /* CNS 11643-1992 pl.2 172 */ "\033$)H" }, + { "CNS11643.1992-3:GL", /* CNS 11643-1992 pl.3 183 */ "\033$(I" }, + { "CNS11643.1992-3:GR", /* CNS 11643-1992 pl.3 183 */ "\033$)I" }, + { "CNS11643.1992-4:GL", /* CNS 11643-1992 pl.4 184 */ "\033$(J" }, + { "CNS11643.1992-4:GR", /* CNS 11643-1992 pl.4 184 */ "\033$)J" }, + { "CNS11643.1992-5:GL", /* CNS 11643-1992 pl.5 185 */ "\033$(K" }, + { "CNS11643.1992-5:GR", /* CNS 11643-1992 pl.5 185 */ "\033$)K" }, + { "CNS11643.1992-6:GL", /* CNS 11643-1992 pl.6 186 */ "\033$(L" }, + { "CNS11643.1992-6:GR", /* CNS 11643-1992 pl.6 186 */ "\033$)L" }, + { "CNS11643.1992-7:GL", /* CNS 11643-1992 pl.7 187 */ "\033$(M" }, + { "CNS11643.1992-7:GR", /* CNS 11643-1992 pl.7 187 */ "\033$)M" }, + + /* Registered encodings with a varying number of bytes per character */ + { "ISO10646-1", /* UTF-8 196 */ "\033%G" }, + + /* Encodings without ISO-IR assigned escape sequence */ + { "KOI8-R:GR", "\033%/1\200\210koi8-r\002"}, + { "KOI8-U:GR", "\033%/1\200\211koi8-u\002"}, + { "ARMSCII-8:GR", "\033%/1\200\210armscii-8\002"}, + { "IBM-CP1133:GR", "\033%/1\200\210ibm-cp1133\002"}, + { "MULELAO-1:GR", "\033%/1\200\210mulelao-1\002"}, + { "VISCII1.1-1:GR", "\033%/1\200\210viscii1.1-1\002"}, + { "TCVN-5712:GR", "\033%/1\200\210tcvn-5712\002"}, { "GEORGIAN-ACADEMY:GR", "\033%/1\200\210georgian-academy\002"}, - { "GEORGIAN-PS:GR", "\033%/1\200\210georgian-ps\002"}, -} ; + { "GEORGIAN-PS:GR", "\033%/1\200\210georgian-ps\002"}, + /* Backward compatibility with XFree86 3.x */ + { "ISO8859-14:GR", "\033%/1\200\213iso8859-14\002"}, + { "ISO8859-15:GR", "\033%/1\200\213iso8859-15\002"}, +#ifdef notdef /* used by Emacs, but not backed by ISO-IR */ + { "BIG5-0:GL", "\033$(0" }, + { "BIG5-0:GR", "\033$)0" }, + { "BIG5-1:GL", "\033$(1" }, + { "BIG5-1:GR", "\033$)1" }, +#endif +}; + +CTDataRec *default_ct_data_list() +{ + return(default_ct_data); +} + +size_t default_ct_data_list_num() +{ + size_t num = sizeof(default_ct_data) / sizeof(CTDataRec); + return(num); +} + + +/* ======================= Parsing ESC Sequences ======================= */ #define XctC0 0x0000 #define XctHT 0x0009 @@ -114,6 +152,7 @@ static CTDataRec default_ct_data[] = #define XctC1 0x0080 #define XctCSI 0x009b #define XctGR 0x00a0 +#define XctSTX 0x0002 #define XctCntrlFunc 0x0023 #define XctMB 0x0024 @@ -137,254 +176,276 @@ static CTDataRec default_ct_data[] = #define XctESCSeq 0x1b00 #define XctCSISeq 0x9b00 -#define SKIP_I(str) while (*(str) >= 0x20 && *(str) <= 0x2f) (str)++; -#define SKIP_P(str) while (*(str) >= 0x30 && *(str) <= 0x3f) (str)++; - -typedef struct { - XlcSide side; - int char_size; - int set_size; - int ext_seg_length; - int version; - CTInfo ct_info; -} CTParseRec, *CTParse; - -CTDataRec *default_ct_data_list() -{ - return(default_ct_data); -} - -size_t default_ct_data_list_num() +/* + * Parses the header of a Compound Text segment, i.e. the charset designator. + */ +static unsigned int +_XlcParseCT(text, length, extra_data) + _Xconst char **text; + int *length; + unsigned int *extra_data; { - size_t num = sizeof(default_ct_data) / sizeof(CTDataRec); - return(num); -} - -static CTInfo ct_list = NULL; + unsigned int ret = 0, dummy, *data = extra_data; + unsigned char ch; + register _Xconst unsigned char *str = (_Xconst unsigned char *) *text; -static CTInfo -_XlcGetCTInfoFromEncoding(encoding, length) - register char *encoding; - register int length; -{ - register CTInfo ct_info; + if (data == NULL) + data = &dummy; + *data = 0; - for (ct_info = ct_list; ct_info; ct_info = ct_info->next) { - if (length >= ct_info->encoding_len) { - if (ct_info->ext_segment) { - if (!strncmp(ct_info->encoding, encoding, 4) && - !strncmp(ct_info->ext_segment, encoding + 6, - ct_info->ext_segment_len)) - return ct_info; - } else if (!strncmp(ct_info->encoding, encoding, - ct_info->encoding_len)) { - return ct_info; - } - } + switch (ch = *str++) { + case XctESC: + switch (ch = *str++) { + case XctOtherCoding: /* % */ + ch = *str++; + if (ch == XctNonStandard) { + ret = XctExtSeg; + ch = *str++; + } else { + ret = XctOtherCoding; + } + *data = (unsigned int) ch; + break; + + case XctCntrlFunc: /* # */ + *data = (unsigned int) *str++; + switch (*str++){ + case XctIgnoreExt: + ret = XctIgnoreExt; + break; + case XctNotIgnoreExt: + ret = XctNotIgnoreExt; + break; + default: + ret = 0; + break; + } + break; + + case XctMB: /* $ */ + ch = *str++; + switch (ch) { + case XctGL94: + ret = XctGL94MB; + break; + case XctGR94: + ret = XctGR94MB; + break; + default: + ret = 0; + break; + } + *data = (unsigned int) *str++; + break; + + case XctGL94: + ret = XctGL94; + *data = (unsigned int) *str++; + break; + case XctGR94: + ret = XctGR94; + *data = (unsigned int) *str++; + break; + case XctGR96: + ret = XctGR96; + *data = (unsigned int) *str++; + break; + } + break; + case XctCSI: + /* direction */ + if (*str == XctLeftToRight && *(str + 1) == XctDirection) { + ret = XctLeftToRight; + str += 2; + } else if (*str == XctRightToLeft && *(str + 1) == XctDirection) { + ret = XctRightToLeft; + str += 2; + } else if (*str == XctDirectionEnd) { + ret = XctDirectionEnd; + str++; + } else { + ret = 0; + } + break; } - return (CTInfo) NULL; + if (ret) { + *length -= (char *) str - *text; + *text = (char *) str; + } + return ret; } -static unsigned int -_XlcParseCT(parse, text, length) - register CTParse parse; - char **text; - int *length; +/* + * Fills into a freshly created XlcCharSet the fields that can be inferred + * from the ESC sequence. + * + * Used by _XlcCreateDefaultCharSet. + */ +Bool +_XlcParseCharSet(charset) + XlcCharSet charset; { - unsigned int ret = 0; - unsigned char ch; - register unsigned char *str = (unsigned char *) *text; + unsigned int type, final_byte; + _Xconst char *ptr = charset->ct_sequence; + int length; + int char_size = 1; + + if (ptr == NULL || *ptr == '\0') + return False; - bzero((char *) parse, sizeof(CTParseRec)); + length = strlen(ptr); - switch (ch = *str++) { - case XctESC: - if (*str == XctOtherCoding && *(str + 1) == XctNonStandard - && *(str + 2) >= 0x30 && *(str + 2) <= 0x3f && *length >= 6) { - - /* non-standard encodings */ - parse->side = XlcGLGR; - parse->set_size = 0; - str += 2; - if (*str <= 0x34) { - parse->char_size = *str - 0x30; - if (parse->char_size == 0) parse->char_size = 1; - ret = XctExtSeg; - parse->ct_info = _XlcGetCTInfoFromEncoding(*text, *length); - } else - ret = XctOtherSeg; - str++; - parse->ext_seg_length = (*str - 128) * 128 + *(str + 1) - 128; - str += 2; - - goto done; - } else if (*str == XctCntrlFunc && *length >= 4 && - *(str + 1) >= 0x20 && *(str + 1) <= 0x2f && - (*(str + 2) == XctIgnoreExt || - *(str + 2) == XctNotIgnoreExt)) { - - /* ignore extension or not */ - str++; - parse->version = *str++ - 0x20; - ret = *str++; - - goto done; - } - - if (*str == XctMB) { /* multiple-byte sets */ - parse->char_size = 2; - str++; - } else - parse->char_size = 1; - - switch (*str) { - case XctGL94: - parse->side = XlcGL; - parse->set_size = 94; - ret = (parse->char_size == 1) ? XctGL94 : XctGL94MB; - break; - case XctGR94: - parse->side = XlcGR; - parse->set_size = 94; - ret = (parse->char_size == 1) ? XctGR94 : XctGR94MB; - break; - case XctGR96: - if (parse->char_size == 1) { - parse->side = XlcGR; - parse->set_size = 96; - ret = XctGR96; - } - break; - } - if (ret) { - str++; - if (*str >= 0x24 && *str <= 0x2f) { /* non-standard */ - ret = 0; - str++; - } - } - - SKIP_I(str) - - if (ret && *str < 0x40) /* non-standard */ - ret = 0; - - if (*str < 0x30 || *str > 0x7e || (char *) str - *text >= *length) - break; - - if (ret == 0) - ret = XctESCSeq; - else { - if (parse->char_size == 2) { - if (*str >= 0x70) - parse->char_size = 4; - else if (*str >= 0x60) - parse->char_size = 3; - } - parse->ct_info = _XlcGetCTInfoFromEncoding(*text, *length); - } - str++; - goto done; - case XctCSI: - /* direction */ - if (*str == XctLeftToRight && *(str + 1) == XctDirection) { - ret = XctLeftToRight; - str += 2; - goto done; - } else if (*str == XctRightToLeft && *(str + 1) == XctDirection) { - ret = XctRightToLeft; - str += 2; - goto done; - } else if (*str == XctDirectionEnd) { - ret = XctDirectionEnd; - str++; - goto done; - } - - SKIP_P(str) - SKIP_I(str) - - if (*str < 0x40 && *str > 0x7e) - break; - - ret = XctCSISeq; - str++; - goto done; + type = _XlcParseCT(&ptr, &length, &final_byte); + + if (type == XctGR94MB || type == XctGL94MB) { + if (final_byte < 0x60) { + char_size = 2; + } else if (final_byte < 0x70) { + char_size = 3; + } else { + char_size = 4; + } } - if (ch & 0x80) { - if (ch < 0xa0) - ret = XctC1; - else - ret = XctGR; - } else { - if (ch == XctHT || ch == XctNL) - ret = ch; - else if (ch < 0x20) - ret = XctC0; - else - ret = XctGL; + if (type == XctExtSeg) { + char_size = final_byte - '0'; + if ((char_size < 1) || (char_size > 4)) + char_size = 1; } - return ret; + switch (type) { + case XctGR94MB : + case XctGR94 : + charset->side = XlcGR; + charset->set_size = 94; + charset->char_size = char_size; + break; + case XctGL94MB : + case XctGL94 : + charset->side = XlcGL; + charset->set_size = 94; + charset->char_size = char_size; + break; + case XctGR96: + charset->side = XlcGR; + charset->set_size = 96; + charset->char_size = char_size; + break; + case XctOtherCoding: + case XctExtSeg: + charset->side = XlcGLGR; + charset->char_size = char_size; + break; + } + return True; +} -done: - *length -= (char *) str - *text; - *text = (char *) str; - return ret; +/* =============== Management of the List of Character Sets =============== */ + +/* + * Representation of a character set that can be used for Compound Text, + * at run time. + */ +typedef struct _CTInfoRec { + XlcCharSet charset; + unsigned int type; + unsigned char final_byte; + int ext_segment_len; + char *ext_segment; /* extended segment */ + struct _CTInfoRec *next; +} CTInfoRec, *CTInfo; + +/* + * List of character sets that can be used for Compound Text, + * Includes all that are listed in default_ct_data, but more can be added + * at runtime through _XlcAddCT. + */ +static CTInfo ct_list = NULL; + +static CTInfo +_XlcGetCTInfo(text, type, final_byte) + _Xconst char *text; + unsigned int type; + unsigned char final_byte; +{ + CTInfo ct_info; + + for (ct_info = ct_list; ct_info; ct_info = ct_info->next) { + if (ct_info->type == type && ct_info->final_byte == final_byte) { + if (ct_info->ext_segment) { + if (text && + !strncmp(text, ct_info->ext_segment, ct_info->ext_segment_len)) + return ct_info; + } else { + return ct_info; + } + } + } + return (CTInfo) NULL; } XlcCharSet -_XlcAddCT(name, encoding) +_XlcAddCT(name, ct_sequence) _Xconst char *name; - char *encoding; + _Xconst char *ct_sequence; { CTInfo ct_info; XlcCharSet charset; - CTParseRec parse; - char *ct_ptr = encoding; + _Xconst char *ct_ptr = ct_sequence; int length; - unsigned int type; + unsigned int type, final_byte; + + length = strlen(ct_sequence); + + charset = _XlcGetCharSet(name); + if (charset == NULL) { + charset = _XlcCreateDefaultCharSet(name, ct_sequence); + if (charset == NULL) + return (XlcCharSet) NULL; + _XlcAddCharSet(charset); + } - length = strlen(encoding); + ct_info = (CTInfo) Xmalloc(sizeof(CTInfoRec)); + if (ct_info == NULL) + return (XlcCharSet) NULL; - switch (type = _XlcParseCT(&parse, &ct_ptr, &length)) { + ct_info->ext_segment = NULL; + ct_info->ext_segment_len = 0; + + type = _XlcParseCT(&ct_ptr, &length, &final_byte); + + switch (type) { case XctExtSeg: + if (strlen(charset->ct_sequence) > 6) { + ct_info->ext_segment = charset->ct_sequence + 6; + ct_info->ext_segment_len = strlen(ct_info->ext_segment) - 1; + } else { + ct_info->ext_segment = charset->encoding_name; + ct_info->ext_segment_len = strlen(ct_info->ext_segment); + } case XctGL94: case XctGL94MB: case XctGR94: case XctGR94MB: case XctGR96: - if (parse.ct_info) /* existed */ - return parse.ct_info->charset; - break; + case XctOtherCoding: + ct_info->type = type; + ct_info->final_byte = (unsigned char) final_byte; + ct_info->charset = charset; + break; default: - return (XlcCharSet) NULL; + Xfree(ct_info); + return (XlcCharSet) NULL; } - charset = _XlcCreateDefaultCharSet(name, encoding); - if (charset == NULL) - return (XlcCharSet) NULL; - _XlcAddCharSet(charset); - - ct_info = (CTInfo) Xmalloc(sizeof(CTInfoRec)); - if (ct_info == NULL) - return (XlcCharSet) NULL; - - ct_info->charset = charset; - ct_info->encoding = charset->ct_sequence; - ct_info->encoding_len = strlen(ct_info->encoding); - if (type == XctExtSeg) { - ct_info->ext_segment = ct_info->encoding + 6; - ct_info->ext_segment_len = strlen(ct_info->ext_segment); + if (!_XlcGetCTInfo( ct_info->ext_segment, type, ct_info->final_byte)) { + ct_info->next = ct_list; + ct_list = ct_info; } else { - ct_info->ext_segment = NULL; - ct_info->ext_segment_len = 0; + Xfree(ct_info); } - ct_info->next = ct_list; - ct_list = ct_info; return charset; } @@ -402,112 +463,77 @@ _XlcGetCTInfoFromCharSet(charset) return (CTInfo) NULL; } -Bool -_XlcParseCharSet(charset) - XlcCharSet charset; -{ - CTParseRec parse; - char *ptr, *bufp, buf[BUFSIZ]; - int length; - - if (charset->ct_sequence == NULL) - return False; - - if (charset->name && strlen(charset->name) >= sizeof(buf)) - return False; - - ptr = charset->ct_sequence; - length = strlen(ptr); - - (void) _XlcParseCT(&parse, &ptr, &length); - - if (charset->name) { - charset->xrm_name = XrmStringToQuark(charset->name); - - if ((length = strlen (charset->name)) < sizeof buf) bufp = buf; - else bufp = Xmalloc (length + 1); - - if (bufp == NULL) return False; - strcpy(bufp, charset->name); - if ((ptr = strchr(bufp, ':'))) - *ptr = '\0'; - charset->xrm_encoding_name = XrmStringToQuark(bufp); - if (bufp != buf) Xfree (bufp); - charset->encoding_name = XrmQuarkToString(charset->xrm_encoding_name); - } else { - charset->xrm_name = 0; - charset->encoding_name = NULL; - charset->xrm_encoding_name = 0; - } - charset->side = parse.side; - charset->char_size = parse.char_size; - charset->set_size = parse.set_size; - - return True; -} +/* ========== Converters String <--> CharSet <--> Compound Text ========== */ -static void init_converter(); - -Bool -_XlcInitCTInfo() -{ - register XlcCharSet charset; - register CTData ct_data; - register int num; - - if (ct_list == NULL) { - num = sizeof(default_ct_data) / sizeof(CTDataRec); - for (ct_data = default_ct_data; num-- > 0; ct_data++) { - charset = _XlcAddCT(ct_data->name, ct_data->encoding); - if (charset == NULL) - continue; - } - init_converter(); - } +/* + * Structure representing the parse state of a Compound Text string. + */ +typedef struct _StateRec { + XlcCharSet charset; + XlcCharSet GL_charset; + XlcCharSet GR_charset; + XlcCharSet ext_seg_charset; + int ext_seg_left; +} StateRec, *State; - return True; -} +typedef enum { resOK, resNotCTSeq, resNotInList } CheckResult; +/* resNotCTSeq - EscSeq not recognized, pointers not changed +* resNotInList - EscSeq recognized but charset not found, +* sequence skiped +* resOK - OK. Charset saved in 'state', sequence skiped +*/ -static int +static CheckResult _XlcCheckCTSequence(state, ctext, ctext_len) State state; - char **ctext; + _Xconst char **ctext; int *ctext_len; { XlcCharSet charset; - CTParseRec parse; CTInfo ct_info; - int length; + unsigned int type, final_byte; + unsigned int ext_seg_left; - _XlcParseCT(&parse, ctext, ctext_len); - - ct_info = parse.ct_info; - if (parse.ext_seg_length > 0) { /* XctExtSeg or XctOtherSeg */ - if (ct_info) { - length = ct_info->ext_segment_len; - *ctext += length; - *ctext_len -= length; - state->ext_seg_left = parse.ext_seg_length - length; - state->ext_seg_charset = ct_info->charset; - } else { - state->ext_seg_left = parse.ext_seg_length; - state->ext_seg_charset = NULL; - } - } else if (ct_info) { - if ((charset = ct_info->charset)) { - if (charset->side == XlcGL) - state->GL_charset = charset; - else if (charset->side == XlcGR) - state->GR_charset = charset; - } + type = _XlcParseCT(ctext, ctext_len, &final_byte); + + if (!type) + return resNotCTSeq; + + if ((type == XctExtSeg) && (*ctext_len > 2)) { + int msb = *(*ctext)++ & 0x7f; + int lsb = *(*ctext)++ & 0x7f; + ext_seg_left = (msb << 7) + lsb - 2; + *ctext_len -= 2; } - return 0; + ct_info = _XlcGetCTInfo(*ctext, type, (unsigned char) final_byte); + + if (ct_info) { + charset = ct_info->charset; + if (ct_info->ext_segment_len) { + *ctext += ct_info->ext_segment_len + 1; + *ctext_len -= ct_info->ext_segment_len + 1; + } + if (charset->side == XlcGL) { + state->GL_charset = charset; + } else if (charset->side == XlcGR) { + state->GR_charset = charset; + } else { + state->GL_charset = charset; + state->GR_charset = charset; + } + } else { + if (type == XctExtSeg) { + *ctext += ext_seg_left; + *ctext_len -= ext_seg_left; + } + return resNotInList; + } + return resOK; } - static void init_state(conv) XlcConv conv; @@ -527,6 +553,8 @@ init_state(conv) state->ext_seg_left = 0; } +/* from XlcNCompoundText to XlcNCharSet */ + static int cttocs(conv, from, from_left, to, to_left, args, num_args) XlcConv conv; @@ -539,66 +567,39 @@ cttocs(conv, from, from_left, to, to_left, args, num_args) { register State state = (State) conv->state; register unsigned char ch; - int length; + CheckResult ret; XlcCharSet charset = NULL; - char *ctptr, *bufptr; + _Xconst char *ctptr; + char *bufptr; int ctext_len, buf_len; - ctptr = *((char **) from); - bufptr = *((char **) to); + ctptr = (char *) *from; + bufptr = (char *) *to; ctext_len = *from_left; buf_len = *to_left; while (ctext_len > 0 && buf_len > 0) { - if (state->ext_seg_left > 0) { - length = min(state->ext_seg_left, ctext_len); - length = min(length, buf_len); - - ctext_len -= length; - state->ext_seg_left -= length; - - if (state->ext_seg_charset) { - charset = state->ext_seg_charset; - buf_len -= length; - if (charset->side == XlcGL) { - while (length-- > 0) - *bufptr++ = *ctptr++ & 0x7f; - } else if (charset->side == XlcGR) { - while (length-- > 0) - *bufptr++ = *ctptr++ | 0x80; - } else { - while (length-- > 0) - *bufptr++ = *ctptr++; - } - - if (state->ext_seg_left < 1) - state->ext_seg_charset = NULL; - } - break; - } - ch = *((unsigned char *) ctptr); - if (ch == 0x1b || ch == 0x9b) { - length = _XlcCheckCTSequence(state, &ctptr, &ctext_len); - if (length < 0) - return -1; - if (state->ext_seg_left > 0 && charset) - break; - } else { - if (charset) { - if (charset != (ch & 0x80 ? state->GR_charset : - state->GL_charset)) - break; - } else - charset = ch & 0x80 ? state->GR_charset : state->GL_charset; - - if ((ch < 0x20 && ch != '\0' && ch != '\n' && ch != '\t') || - (ch >= 0x80 && ch < 0xa0)) - return -1; - - *bufptr++ = *ctptr++; - ctext_len--; - buf_len--; - } + ch = *ctptr; + if (ch == XctCSI) { + /* do nothing except skip sequence if not recognized */ + if (_XlcParseCT(&ctptr, &ctext_len, NULL)) + continue; + } + if (ch == XctESC) { + ret = _XlcCheckCTSequence(state, &ctptr, &ctext_len); + if (ret == resOK || ret == resNotInList) + continue; + } + if (charset) { + if (charset != (ch & 0x80 ? state->GR_charset : state->GL_charset)) + break; + } else { + charset = (ch & 0x80 ? state->GR_charset : state->GL_charset); + } + + *bufptr++ = *ctptr++; + ctext_len--; + buf_len--; } if (charset) @@ -615,6 +616,8 @@ cttocs(conv, from, from_left, to, to_left, args, num_args) return 0; } +/* from XlcNCharSet to XlcNCompoundText */ + static int cstoct(conv, from, from_left, to, to_left, args, num_args) XlcConv conv; @@ -650,73 +653,73 @@ cstoct(conv, from, from_left, to, to_left, args, num_args) return -1; side = charset->side; + length = strlen(charset->ct_sequence); + + if (((side == XlcGR || side == XlcGLGR) && + charset != state->GR_charset) || + ((side == XlcGL || side == XlcGLGR) && + charset != state->GL_charset) ) { + + /* output esc-sequence */ + if ((ct_info->type == XctExtSeg) && (length >= 7)) { + int comp_len = length + strlen(ct_info->ext_segment) + 3; + + if (ct_len < comp_len) + return -1; + + strcpy(ctptr, ct_info->charset->ct_sequence); + ctptr += length; + + length = ct_info->ext_segment_len; + *ctptr++ = ((length + 3) / 128) | 0x80; + *ctptr++ = ((length + 3) % 128) | 0x80; + strncpy(ctptr, ct_info->ext_segment, length); + ctptr += length; + *ctptr++ = XctSTX; + ct_len -= comp_len; + + } else { + if (ct_len < length) + return -1; + + strcpy(ctptr, ct_info->charset->ct_sequence); + ctptr += length; + ct_len -= length; + } + } + min_ch = 0x20; + max_ch = 0x7f; - if (ct_info->ext_segment) { - if (charset != state->ext_seg_charset && state->ext_seg_left < 1) { - length = ct_info->encoding_len; - if (ct_len < length) - return -1; - strcpy(ctptr, ct_info->encoding); - ctptr[4] = ((ct_info->ext_segment_len + csstr_len) / 128) | 0x80; - ctptr[5] = ((ct_info->ext_segment_len + csstr_len) % 128) | 0x80; - ctptr += length; - ct_len -= length; - state->ext_seg_left = csstr_len; - } - length = min(state->ext_seg_left, csstr_len); - state->ext_seg_left -= length; - - if (side == XlcGL) { - while (length-- > 0) - *ctptr++ = *csptr++ & 0x7f; - } else if (side == XlcGR) { - while (length-- > 0) - *ctptr++ = *csptr++ | 0x80; - } else { - while (length-- > 0) - *ctptr++ = *csptr++; - } - state->ext_seg_charset = (state->ext_seg_left > 0) ? charset : NULL; - } else { - if ((side == XlcGR && charset != state->GR_charset) || - (side == XlcGL && charset != state->GL_charset)) { - - ct_len -= ct_info->encoding_len; - if (ct_len < 0) - return -1; - strcpy(ctptr, ct_info->encoding); - ctptr += ct_info->encoding_len; - } - - min_ch = 0x20; - max_ch = 0x7f; - - if (charset->set_size == 94) { - max_ch--; - if (charset->char_size > 1 || side == XlcGR) - min_ch++; - } + if (charset->set_size == 94) { + max_ch--; + if (charset->char_size > 1 || side == XlcGR) + min_ch++; + } - while (csstr_len > 0 && ct_len > 0) { - ch = *((unsigned char *) csptr++) & 0x7f; - if (ch < min_ch || ch > max_ch) - if (ch != 0x00 && ch != 0x09 && ch != 0x0a && ch != 0x1b) - continue; /* XXX */ - if (side == XlcGL) - *ctptr++ = ch & 0x7f; - else if (side == XlcGR) - *ctptr++ = ch | 0x80; - else - *ctptr++ = ch; - csstr_len--; - ct_len--; - } - if (side == XlcGR) - state->GR_charset = charset; - else if (side == XlcGL) - state->GL_charset = charset; + while (csstr_len > 0 && ct_len > 0) { + ch = *((unsigned char *) csptr) & 0x7f; + if (ch < min_ch || ch > max_ch) + if (ch != 0x00 && ch != 0x09 && ch != 0x0a && ch != 0x1b) { + csptr++; + csstr_len--; + continue; /* XXX */ + } + + if (side == XlcGL) + *ctptr++ = *csptr++ & 0x7f; + else if (side == XlcGR) + *ctptr++ = *csptr++ | 0x80; + else + *ctptr++ = *csptr++; + csstr_len--; + ct_len--; } + if (side == XlcGR || side == XlcGLGR) + state->GR_charset = charset; + if (side == XlcGL || side == XlcGLGR) + state->GL_charset = charset; + *from_left -= csptr - *((char **) from); *from = (XPointer) csptr; @@ -726,6 +729,8 @@ cstoct(conv, from, from_left, to, to_left, args, num_args) return 0; } +/* from XlcNString to XlcNCharSet */ + static int strtocs(conv, from, from_left, to, to_left, args, num_args) XlcConv conv; @@ -756,11 +761,13 @@ strtocs(conv, from, from_left, to, to_left, args, num_args) *to = (XPointer) dst; if (num_args > 0) - *((XlcCharSet *)args[0]) = side ? state->GR_charset : state->GL_charset; + *((XlcCharSet *)args[0]) = (side ? state->GR_charset : state->GL_charset); return 0; } +/* from XlcNCharSet to XlcNString */ + static int cstostr(conv, from, from_left, to, to_left, args, num_args) XlcConv conv; @@ -807,42 +814,34 @@ cstostr(conv, from, from_left, to, to_left, args, num_args) } -static void -close_converter(conv) - XlcConv conv; -{ - if (conv->state) - Xfree((char *) conv->state); - - Xfree((char *) conv); -} - static XlcConv create_conv(methods) XlcConvMethods methods; { register XlcConv conv; - conv = (XlcConv) Xmalloc(sizeof(XlcConvRec)); + conv = (XlcConv) Xmalloc(sizeof(XlcConvRec) + sizeof(StateRec)); if (conv == NULL) return (XlcConv) NULL; - conv->state = (XPointer) Xmalloc(sizeof(StateRec)); - if (conv->state == NULL) - goto err; - + conv->state = (XPointer) &conv[1]; + conv->methods = methods; init_state(conv); return conv; +} -err: - close_converter(conv); - - return (XlcConv) NULL; +static void +close_converter(conv) + XlcConv conv; +{ + /* conv->state is allocated together with conv, free both at once. */ + Xfree((char *) conv); } + static XlcConvMethodsRec cttocs_methods = { close_converter, cttocs, @@ -859,6 +858,7 @@ open_cttocs(from_lcd, from_type, to_lcd, to_type) return create_conv(&cttocs_methods); } + static XlcConvMethodsRec cstoct_methods = { close_converter, cstoct, @@ -875,6 +875,7 @@ open_cstoct(from_lcd, from_type, to_lcd, to_type) return create_conv(&cstoct_methods); } + static XlcConvMethodsRec strtocs_methods = { close_converter, strtocs, @@ -891,6 +892,7 @@ open_strtocs(from_lcd, from_type, to_lcd, to_type) return create_conv(&strtocs_methods); } + static XlcConvMethodsRec cstostr_methods = { close_converter, cstostr, @@ -907,16 +909,38 @@ open_cstostr(from_lcd, from_type, to_lcd, to_type) return create_conv(&cstostr_methods); } -static void -init_converter() + +/* =========================== Initialization =========================== */ + +Bool +_XlcInitCTInfo() { - _XlcSetConverter((XLCd) NULL, XlcNCompoundText, (XLCd) NULL, XlcNCharSet, - open_cttocs); - _XlcSetConverter((XLCd) NULL, XlcNString, (XLCd) NULL, XlcNCharSet, - open_strtocs); - - _XlcSetConverter((XLCd) NULL, XlcNCharSet, (XLCd) NULL, XlcNCompoundText, - open_cstoct); - _XlcSetConverter((XLCd) NULL, XlcNCharSet, (XLCd) NULL, XlcNString, - open_cstostr); + if (ct_list == NULL) { + CTData ct_data; + int num; + + /* Initialize ct_list. */ + + num = sizeof(default_ct_data) / sizeof(CTDataRec); + for (ct_data = default_ct_data; num > 0; ct_data++, num--) + _XlcAddCT(ct_data->name, ct_data->encoding); + + /* Register CompoundText and CharSet converters. */ + + _XlcSetConverter((XLCd) NULL, XlcNCompoundText, + (XLCd) NULL, XlcNCharSet, + open_cttocs); + _XlcSetConverter((XLCd) NULL, XlcNString, + (XLCd) NULL, XlcNCharSet, + open_strtocs); + + _XlcSetConverter((XLCd) NULL, XlcNCharSet, + (XLCd) NULL, XlcNCompoundText, + open_cstoct); + _XlcSetConverter((XLCd) NULL, XlcNCharSet, + (XLCd) NULL, XlcNString, + open_cstostr); + } + + return True; } |