From 9191c7226a51b42eda3bacfbe276a193bde7bd69 Mon Sep 17 00:00:00 2001 From: Behnam Esfahbod Date: Tue, 2 Dec 2008 02:44:01 +0330 Subject: Basic documentation --- fribidi.py | 242 ++++++++++++++++++++++++++++++++++++++++++++++++------------- 1 file changed, 192 insertions(+), 50 deletions(-) diff --git a/fribidi.py b/fribidi.py index f02a76e..eef42f7 100755 --- a/fribidi.py +++ b/fribidi.py @@ -2,37 +2,81 @@ # coding=UTF-8 import ctypes -import sys +VERSION = '0.06' -libfribidi = ctypes.CDLL("libfribidi.so") +_libfribidi = ctypes.CDLL("libfribidi.so") # Character Types class types: + """ + Defines character type masks and types. + + Types: + + LTR Strong left to right + RTL Right to left characters + AL Arabic characters + LRE Left-To-Right embedding + RLE Right-To-Left embedding + LRO Left-To-Right override + RLO Right-To-Left override + + PDF Pop directional override + EN European digit + AN Arabic digit + ES European number separator + ET European number terminator + CS Common Separator + NSM Non spacing mark + BN Boundary neutral + + BS Block separator + SS Segment separator + WS Whitespace + ON Other Neutral + + Maskes: + + MASK_RTL Is right to left + MASK_ARABIC Is arabic + + Each character can be only one of the three following: + MASK_STRONG Is strong + MASK_WEAK Is weak + MASK_NEUTRAL Is neutral + + Each charcter can be only one of the five following: + MASK_LETTER Is letter: L, R, AL + MASK_NUMBER Is number: EN, AN + MASK_NUMSEPTER Is number separator or terminator: ES, ET, CS + MASK_SPACE Is space: BN, BS, SS, WS + MASK_EXPLICIT Is expilict mark: LRE, RLE, LRO, RLO, PDF + + MASK_SEPARATOR Is test separator: BS, SS; and can be on only if MASK_SPACE is also on. + MASK_OVERRIDE Is explicit override: LRO, RLO; and can be on only if MASK_EXPLICIT is also on. + """ # Define Masks MASK_RTL = 0x00000001 # Is right to left MASK_ARABIC = 0x00000002 # Is arabic - # Each char can be only one of the three following. MASK_STRONG = 0x00000010 # Is strong MASK_WEAK = 0x00000020 # Is weak MASK_NEUTRAL = 0x00000040 # Is neutral - # Each char can be only one of the five following. MASK_LETTER = 0x00000100 # Is letter: L, R, AL MASK_NUMBER = 0x00000200 # Is number: EN, AN MASK_NUMSEPTER = 0x00000400 # Is number separator or terminator: ES, ET, CS MASK_SPACE = 0x00000800 # Is space: BN, BS, SS, WS MASK_EXPLICIT = 0x00001000 # Is expilict mark: LRE, RLE, LRO, RLO, PDF - # Can be on only if MASK_SPACE is also on. MASK_SEPARATOR = 0x00002000 # Is test separator: BS, SS - # Can be on only if MASK_EXPLICIT is also on. + MASK_OVERRIDE = 0x00004000 # Is explicit override: LRO, RLO # The following must be to make types pairwise different, some of them can @@ -51,27 +95,27 @@ class types: # Define values for FriBidiCharType - LTR = (MASK_STRONG + MASK_LETTER) # Strong left to right - RTL = (MASK_STRONG + MASK_LETTER + MASK_RTL) # Right to left characters - AL = (MASK_STRONG + MASK_LETTER + MASK_RTL + MASK_ARABIC) # Arabic characters - LRE = (MASK_STRONG + MASK_EXPLICIT) # Left-To-Right embedding - RLE = (MASK_STRONG + MASK_EXPLICIT + MASK_RTL) # Right-To-Left embedding - LRO = (MASK_STRONG + MASK_EXPLICIT + MASK_OVERRIDE) # Left-To-Right override - RLO = (MASK_STRONG + MASK_EXPLICIT + MASK_RTL + MASK_OVERRIDE) # Right-To-Left override - - PDF = (MASK_WEAK + MASK_EXPLICIT) # Pop directional override - EN = (MASK_WEAK + MASK_NUMBER) # European digit - AN = (MASK_WEAK + MASK_NUMBER + MASK_ARABIC) # Arabic digit - ES = (MASK_WEAK + MASK_NUMSEPTER + MASK_ES) # European number separator - ET = (MASK_WEAK + MASK_NUMSEPTER + MASK_ET) # European number terminator - CS = (MASK_WEAK + MASK_NUMSEPTER + MASK_CS) # Common Separator - NSM = (MASK_WEAK + MASK_NSM) # Non spacing mark - BN = (MASK_WEAK + MASK_SPACE + MASK_BN) # Boundary neutral - - BS = (MASK_NEUTRAL + MASK_SPACE + MASK_SEPARATOR + MASK_BS) # Block separator - SS = (MASK_NEUTRAL + MASK_SPACE + MASK_SEPARATOR + MASK_SS) # Segment separator - WS = (MASK_NEUTRAL + MASK_SPACE + MASK_WS) # Whitespace - ON = (MASK_NEUTRAL) # Other Neutral + LTR = (MASK_STRONG + MASK_LETTER) # Strong left to right + RTL = (MASK_STRONG + MASK_LETTER + MASK_RTL) # Right to left characters + AL = (MASK_STRONG + MASK_LETTER + MASK_RTL + MASK_ARABIC) # Arabic characters + LRE = (MASK_STRONG + MASK_EXPLICIT) # Left-To-Right embedding + RLE = (MASK_STRONG + MASK_EXPLICIT + MASK_RTL) # Right-To-Left embedding + LRO = (MASK_STRONG + MASK_EXPLICIT + MASK_OVERRIDE) # Left-To-Right override + RLO = (MASK_STRONG + MASK_EXPLICIT + MASK_RTL + MASK_OVERRIDE) # Right-To-Left override + + PDF = (MASK_WEAK + MASK_EXPLICIT) # Pop directional override + EN = (MASK_WEAK + MASK_NUMBER) # European digit + AN = (MASK_WEAK + MASK_NUMBER + MASK_ARABIC) # Arabic digit + ES = (MASK_WEAK + MASK_NUMSEPTER + MASK_ES) # European number separator + ET = (MASK_WEAK + MASK_NUMSEPTER + MASK_ET) # European number terminator + CS = (MASK_WEAK + MASK_NUMSEPTER + MASK_CS) # Common Separator + NSM = (MASK_WEAK + MASK_NSM) # Non spacing mark + BN = (MASK_WEAK + MASK_SPACE + MASK_BN) # Boundary neutral + + BS = (MASK_NEUTRAL + MASK_SPACE + MASK_SEPARATOR + MASK_BS) # Block separator + SS = (MASK_NEUTRAL + MASK_SPACE + MASK_SEPARATOR + MASK_SS) # Segment separator + WS = (MASK_NEUTRAL + MASK_SPACE + MASK_WS) # Whitespace + ON = (MASK_NEUTRAL) # Other Neutral # Memory allocation functions @@ -131,7 +175,7 @@ def _pyunicode_to_utc32_p (a_pyunicode): #print 'a_len', a_len - utf8_pystr = a_pyunicode.encode('utf-8') + utf8_pystr = a_pyunicode.encode('UTF-8') utf8_len = len(utf8_pystr) utf8_p = _malloc_utf8_array_from_string(utf8_pystr) @@ -139,7 +183,7 @@ def _pyunicode_to_utc32_p (a_pyunicode): #print 'utf8_len', utf8_len utc32_p = _malloc_utc32_array(a_len+1) - libfribidi.fribidi_utf8_to_unicode (utf8_p, utf8_len, utc32_p) + _libfribidi.fribidi_utf8_to_unicode (utf8_p, utf8_len, utc32_p) #print 'utc32_p [%04x, %04x, %04x, %04x]' % (utc32_p[0], utc32_p[1], utc32_p[2], utc32_p[3]) @@ -160,7 +204,7 @@ def _utc32_p_to_pyunicode (a_utc32_p): utf8_len = 6*utc32_len+1 utf8_p = _malloc_utf8_array(utf8_len) - libfribidi.fribidi_unicode_to_utf8 (a_utc32_p, utc32_len, utf8_p) + _libfribidi.fribidi_unicode_to_utf8 (a_utc32_p, utc32_len, utf8_p) return utf8_p.value @@ -168,13 +212,24 @@ def _utc32_p_to_pyunicode (a_utc32_p): # FriBidi API -def log2vis (input_pyunicode, input_pbase_dir, with_l2v_position=False, with_v2l_position=False, with_embedding_level=False): - input_len = len(input_pyunicode) +def log2vis (unicode_text, base_direction, with_l2v_position=False, with_v2l_position=False, with_embedding_level=False): + """ + Returns the visual order of characters in the text. + + If with_l2v_position, with_v2l_position, or with_embedding_level are true, + the return value will an array, including logical-to-visual position, + visual-to-logical positions, or embedding-level arrays respectively. + """ + + if unicode_text.__class__ != unicode: + unicode_text = unicode(unicode_text) - # memory allocations + input_len = len(unicode_text) - input_utc32_p = _pyunicode_to_utc32_p(input_pyunicode) - pbase_dir_p = ctypes.pointer(ctypes.c_int32(input_pbase_dir)) + # Memory allocations + + input_utc32_p = _pyunicode_to_utc32_p(unicode_text) + pbase_dir_p = ctypes.pointer(ctypes.c_int32(base_direction)) output_utc32_p = _malloc_utc32_array(input_len+1) @@ -183,9 +238,23 @@ def log2vis (input_pyunicode, input_pbase_dir, with_l2v_position=False, with_v2l emb_p = _malloc_int8_array(input_len) if with_embedding_level else None - # calling fribidi_log2vis + # Calling the api + + """ + FRIBIDI_API fribidi_boolean fribidi_log2vis ( + /* input */ + FriBidiChar *str, + FriBidiStrIndex len, + FriBidiCharType *pbase_dirs, + /* output */ + FriBidiChar *visual_str, + FriBidiStrIndex *position_L_to_V_list, + FriBidiStrIndex *position_V_to_L_list, + FriBidiLevel *embedding_level_list + ); + """ - successed = libfribidi.fribidi_log2vis( + successed = _libfribidi.fribidi_log2vis( # input input_utc32_p, input_len, @@ -202,7 +271,7 @@ def log2vis (input_pyunicode, input_pbase_dir, with_l2v_position=False, with_v2l raise Exception('fribidi_log2vis failed') - # pythonizing the output + # Pythonizing the output output_u = _utc32_p_to_pyunicode(output_utc32_p) @@ -225,20 +294,38 @@ def log2vis (input_pyunicode, input_pbase_dir, with_l2v_position=False, with_v2l return res -def log2vis_get_embedding_levels (input_pyunicode, input_pbase_dir): - input_len = len(input_pyunicode) +def log2vis_get_embedding_levels (unicode_text, base_direction): + """ + Returns the embedding-level of characters in the text. + """ + + if unicode_text.__class__ != unicode: + unicode_text = unicode(unicode_text) - # memory allocations + input_len = len(unicode_text) - input_utc32_p = _pyunicode_to_utc32_p(input_pyunicode) - pbase_dir_p = ctypes.pointer(ctypes.c_int32(input_pbase_dir)) + # Memory allocations + + input_utc32_p = _pyunicode_to_utc32_p(unicode_text) + pbase_dir_p = ctypes.pointer(ctypes.c_int32(base_direction)) emb_p = _malloc_int8_array(input_len) - # calling fribidi_log2vis + # Calling the api + + """ + FRIBIDI_API fribidi_boolean fribidi_log2vis_get_embedding_levels ( + /* input */ + FriBidiChar *str, + FriBidiStrIndex len, + FriBidiCharType *pbase_dir, + /* output */ + FriBidiLevel *embedding_level_list + ); + """ - successed = libfribidi.fribidi_log2vis_get_embedding_levels( + successed = _libfribidi.fribidi_log2vis_get_embedding_levels( # input input_utc32_p, input_len, @@ -249,10 +336,61 @@ def log2vis_get_embedding_levels (input_pyunicode, input_pbase_dir): ) if not successed: - raise Exception('fribidi_log2vis failed') + raise Exception('fribidi_log2vis_get_embedding_levels failed') + + + # Pythonizing the output + + res = [i for i in emb_p] + + return res + + +def remove_bidi_marks (unicode_text, base_direction): + """ + TODO + """ + + if unicode_text.__class__ != unicode: + unicode_text = unicode(unicode_text) + + input_len = len(unicode_text) + + # Memory allocations + + input_utc32_p = _pyunicode_to_utc32_p(unicode_text) + pbase_dir_p = ctypes.pointer(ctypes.c_int32(base_direction)) + + emb_p = _malloc_int8_array(input_len) + + # Calling the api - # pythonizing the output + """ + FRIBIDI_API FriBidiStrIndex fribidi_remove_bidi_marks ( + FriBidiChar *str, + FriBidiStrIndex length, + FriBidiStrIndex *position_to_this_list, + FriBidiStrIndex *position_from_this_list, + FriBidiLevel *embedding_level_list + ); + """ + + successed = _libfribidi.fribidi_remove_bidi_marks( + # input + input_utc32_p, + input_len, + pbase_dir_p, + + # output + emb_p + ) + + if not successed: + raise Exception('fribidi_remove_bidi_marks failed') + + + # Pythonizing the output res = [i for i in emb_p] @@ -261,7 +399,10 @@ def log2vis_get_embedding_levels (input_pyunicode, input_pbase_dir): # Main -VERSION = '0.05' +def _main (): + import sys + text = ' '.join(sys.argv[1:]).decode('UTF-8') + print log2vis(text, types.LTR) def _test (): @@ -274,12 +415,13 @@ def _test (): print log2vis(u"aسلام", types.LTR, True, True, True) print log2vis(u"aسلام", types.RTL, True, True, True) + print log2vis_get_embedding_levels("abc", types.LTR) print log2vis_get_embedding_levels(u"aسلام", types.LTR) print log2vis_get_embedding_levels(u"aسلام", types.RTL) - if __name__=='__main__': - _test() + _main() + _test() -- cgit v1.2.3