diff options
author | Behnam Esfahbod <behnam@zwnj.org> | 2008-12-02 02:33:18 +0330 |
---|---|---|
committer | Behnam ZWNJ Esfahbod <behnam@zwnj.org> | 2008-12-02 02:33:18 +0330 |
commit | 8d856743e6e2d37076195f92d8ab114cd60d96bd (patch) | |
tree | 0545681190880e971e9db862606085f7ca1bbdd4 | |
parent | c0a967a01fdfa8869669ba9478a8b4d1ccc7f4d5 (diff) |
[fribidi.py] Add Types class
-rwxr-xr-x | fribidi.py | 185 |
1 files changed, 160 insertions, 25 deletions
@@ -8,13 +8,99 @@ import sys libfribidi = ctypes.CDLL("libfribidi.so") +# Character Types + +class types: + + # Define Masks + + MASK_RTL = 0x00000001L # Is right to left + MASK_ARABIC = 0x00000002L # Is arabic + + # Each char can be only one of the three following. + MASK_STRONG = 0x00000010L # Is strong + MASK_WEAK = 0x00000020L # Is weak + MASK_NEUTRAL = 0x00000040L # Is neutral + + # Each char can be only one of the five following. + MASK_LETTER = 0x00000100L # Is letter: L, R, AL + MASK_NUMBER = 0x00000200L # Is number: EN, AN + MASK_NUMSEPTER = 0x00000400L # Is number separator or terminator: ES, ET, CS + MASK_SPACE = 0x00000800L # Is space: BN, BS, SS, WS + MASK_EXPLICIT = 0x00001000L # Is expilict mark: LRE, RLE, LRO, RLO, PDF + + # Can be on only if MASK_SPACE is also on. + MASK_SEPARATOR = 0x00002000L # Is test separator: BS, SS + # Can be on only if MASK_EXPLICIT is also on. + MASK_OVERRIDE = 0x00004000L # Is explicit override: LRO, RLO + + # The following must be to make types pairwise different, some of them can + # be removed but are here because of efficiency (make queries faster). + + MASK_ES = 0x00010000L + MASK_ET = 0x00020000L + MASK_CS = 0x00040000L + + MASK_NSM = 0x00080000L + MASK_BN = 0x00100000L + + MASK_BS = 0x00200000L + MASK_SS = 0x00400000L + MASK_WS = 0x00800000L + + # Define values for FriBidiCharType + + LTR = (MASK_STRONG + MASK_LETTER) # Strong left to right + RTL = (MASK_STRONG + MASK_LETTER + MASK_RTL) # Right to left characters + AL = (MASK_STRONG + MASK_LETTER + MASK_RTL + MASK_ARABIC) # Arabic characters + LRE = (MASK_STRONG + MASK_EXPLICIT) # Left-To-Right embedding + RLE = (MASK_STRONG + MASK_EXPLICIT + MASK_RTL) # Right-To-Left embedding + LRO = (MASK_STRONG + MASK_EXPLICIT + MASK_OVERRIDE) # Left-To-Right override + RLO = (MASK_STRONG + MASK_EXPLICIT + MASK_RTL + MASK_OVERRIDE) # Right-To-Left override + + PDF = (MASK_WEAK + MASK_EXPLICIT) # Pop directional override + EN = (MASK_WEAK + MASK_NUMBER) # European digit + AN = (MASK_WEAK + MASK_NUMBER + MASK_ARABIC) # Arabic digit + ES = (MASK_WEAK + MASK_NUMSEPTER + MASK_ES) # European number separator + ET = (MASK_WEAK + MASK_NUMSEPTER + MASK_ET) # European number terminator + CS = (MASK_WEAK + MASK_NUMSEPTER + MASK_CS) # Common Separator + NSM = (MASK_WEAK + MASK_NSM) # Non spacing mark + BN = (MASK_WEAK + MASK_SPACE + MASK_BN) # Boundary neutral + + BS = (MASK_NEUTRAL + MASK_SPACE + MASK_SEPARATOR + MASK_BS) # Block separator + SS = (MASK_NEUTRAL + MASK_SPACE + MASK_SEPARATOR + MASK_SS) # Segment separator + WS = (MASK_NEUTRAL + MASK_SPACE + MASK_WS) # Whitespace + ON = (MASK_NEUTRAL) # Other Neutral + + +# Memory allocation functions + + +def _malloc_int_array (l): + """ + Returns a pointer to allocated C int array of length `l' + """ + + t = ctypes.c_int * l + return t() + +def _malloc_int8_array (l): + """ + Returns a pointer to allocated C int array of length `l' + """ + + t = ctypes.c_int8 * l + return t() + + def _malloc_utf8_array (l): """ Returns a pointer to allocated UTF8 (C char) array of length `l' """ - Utf8Array = ctypes.c_char * l - return Utf8Array() + t = ctypes.c_char * l + return t() + def _malloc_utf8_array_from_string (s): """ @@ -23,39 +109,38 @@ def _malloc_utf8_array_from_string (s): return ctypes.c_char_p(s) + def _malloc_utc32_array (l): """ Returns a pointer to allocated UTC32 (C int32) array of length `l' """ - Utc32Array = ctypes.c_uint32 * l - return Utc32Array() + t = ctypes.c_uint32 * l + return t() + +# Unicode type convertors def _pyunicode_to_utc32_p (a_pyunicode): """ Converts Python Unicode instance to UTC32 (C int32) array - - Note: Caller should free the allocated memory of returned pointer """ a_len = len(a_pyunicode) - print 'a_len', a_len + #print 'a_len', a_len utf8_pystr = a_pyunicode.encode('utf-8') utf8_len = len(utf8_pystr) utf8_p = _malloc_utf8_array_from_string(utf8_pystr) - print 'utf8_p.value', utf8_p.value - print 'utf8_len', utf8_len + #print 'utf8_p.value', utf8_p.value + #print 'utf8_len', utf8_len utc32_p = _malloc_utc32_array(a_len+1) libfribidi.fribidi_utf8_to_unicode (utf8_p, utf8_len, utc32_p) - print 'utc32_p [%04x, %04x, %04x, %04x]' % (utc32_p[0], utc32_p[1], utc32_p[2], utc32_p[3]) - - print + #print 'utc32_p [%04x, %04x, %04x, %04x]' % (utc32_p[0], utc32_p[1], utc32_p[2], utc32_p[3]) # XX: Caller should free it! return utc32_p @@ -66,39 +151,89 @@ def _utc32_p_to_pyunicode (a_utc32_p): Converts UTC32 (C int32) array to Python Unicode instance """ - print 'a_utc32_p [%04x, %04x, %04x, %04x]' % (a_utc32_p[0], a_utc32_p[1], a_utc32_p[2], a_utc32_p[3]) + #print 'a_utc32_p [%04x, %04x, %04x, %04x]' % (a_utc32_p[0], a_utc32_p[1], a_utc32_p[2], a_utc32_p[3]) - utc32_len = ctypes.sizeof(a_utc32_p) / ctypes.sizeof(ctypes.c_uint32) - 1 - print 'utc32_len', utc32_len + utc32_len = ctypes.sizeof(a_utc32_p) / ctypes.sizeof(ctypes.c_uint32) + #print 'utc32_len', utc32_len utf8_len = 6*utc32_len+1 utf8_p = _malloc_utf8_array(utf8_len) libfribidi.fribidi_unicode_to_utf8 (a_utc32_p, utc32_len, utf8_p) - print - return utf8_p.value -def log2vis (input_pyunicode): - print 'input_pyunicode', input_pyunicode +# FriBidi API + +def log2vis (input_pyunicode, input_pbase_dir, with_l2v_position=False, with_v2l_position=False, with_embedding_level=False): + input_len = len(input_pyunicode) + + # memory allocations input_utc32_p = _pyunicode_to_utc32_p(input_pyunicode) + pbase_dir = ctypes.c_int32(input_pbase_dir) + + output_utc32_p = _malloc_utc32_array(input_len) + + l2v_p = _malloc_int_array(input_len) if with_l2v_position else None + v2l_p = _malloc_int_array(input_len) if with_v2l_position else None + emb_p = _malloc_int8_array(input_len) if with_embedding_level else None + + + # calling fribidi_log2vis + + successed = libfribidi.fribidi_log2vis( + + # input + input_utc32_p, + input_len, + ctypes.pointer(pbase_dir), + + # output + output_utc32_p, + l2v_p, + v2l_p, + emb_p + ) - #print libfribidi.fribidi_log2vis() - output_utc32_p = input_utc32_p + if not successed: + raise Exception('fribidi_log2vis failed') + + + # pythonizing the output output_u = _utc32_p_to_pyunicode(output_utc32_p) - print 'output_u', output_u + if with_l2v_position or with_v2l_position or with_embedding_level: + res = [output_u] -if __name__=='__main__': - i = u"سلام" + if with_l2v_position: + res.append([i for i in l2v_p]) + + if with_v2l_position: + res.append([i for i in v2l_p]) + + if with_embedding_level: + res.append([i for i in emb_p]) + + else: + res = output_u - o = log2vis(i) + return res + +# Main + +def _test (): + i = u"سلام" + o = log2vis(i, 272, True, True, True) # LTR print o + + +if __name__=='__main__': + _test() + |