Basic documentation

author: Behnam Esfahbod <behnam@zwnj.org> 2008-12-02 02:44:01 +0330
committer: Behnam ZWNJ Esfahbod <behnam@zwnj.org> 2008-12-02 02:44:01 +0330
commit: 9191c7226a51b42eda3bacfbe276a193bde7bd69 (patch)
tree: 87b1bd2d52b2c6a143e812a2e15b56f51c8fc4a5
parent: ac7895e848e4c3f4b5c03d6a8e61338c07dd41de (diff)
1 files changed, 192 insertions, 50 deletions
diff --git a/fribidi.py b/fribidi.py
index f02a76e..eef42f7 100755
--- a/fribidi.py
+++ b/fribidi.py
@@ -2,37 +2,81 @@
 # coding=UTF-8
 
 import ctypes
-import sys
 
 
+VERSION = '0.06'
 
-libfribidi = ctypes.CDLL("libfribidi.so")
+_libfribidi = ctypes.CDLL("libfribidi.so")
 
 
 # Character Types
 
 class types:
+    """
+    Defines character type masks and types.
+
+    Types:
+
+        LTR     Strong left to right
+        RTL     Right to left characters
+        AL      Arabic characters
+        LRE     Left-To-Right embedding
+        RLE     Right-To-Left embedding
+        LRO     Left-To-Right override
+        RLO     Right-To-Left override
+
+        PDF     Pop directional override
+        EN      European digit
+        AN      Arabic digit
+        ES      European number separator
+        ET      European number terminator
+        CS      Common Separator
+        NSM     Non spacing mark
+        BN      Boundary neutral
+
+        BS      Block separator
+        SS      Segment separator
+        WS      Whitespace
+        ON      Other Neutral
+
+    Maskes:
+
+        MASK_RTL        Is right to left
+        MASK_ARABIC     Is arabic
+
+    Each character can be only one of the three following:
+        MASK_STRONG     Is strong
+        MASK_WEAK       Is weak
+        MASK_NEUTRAL    Is neutral
+
+    Each charcter can be only one of the five following:
+        MASK_LETTER     Is letter: L, R, AL
+        MASK_NUMBER     Is number: EN, AN
+        MASK_NUMSEPTER  Is number separator or terminator: ES, ET, CS
+        MASK_SPACE      Is space: BN, BS, SS, WS
+        MASK_EXPLICIT   Is expilict mark: LRE, RLE, LRO, RLO, PDF
+
+        MASK_SEPARATOR  Is test separator: BS, SS; and can be on only if MASK_SPACE is also on.
+        MASK_OVERRIDE   Is explicit override: LRO, RLO; and can be on only if MASK_EXPLICIT is also on.
+    """
 
     # Define Masks
 
     MASK_RTL        = 0x00000001   # Is right to left
     MASK_ARABIC     = 0x00000002   # Is arabic
 
-    # Each char can be only one of the three following.
     MASK_STRONG     = 0x00000010   # Is strong
     MASK_WEAK       = 0x00000020   # Is weak
     MASK_NEUTRAL    = 0x00000040   # Is neutral
 
-    # Each char can be only one of the five following.
     MASK_LETTER     = 0x00000100   # Is letter: L, R, AL
     MASK_NUMBER     = 0x00000200   # Is number: EN, AN
     MASK_NUMSEPTER  = 0x00000400   # Is number separator or terminator: ES, ET, CS
     MASK_SPACE      = 0x00000800   # Is space: BN, BS, SS, WS
     MASK_EXPLICIT   = 0x00001000   # Is expilict mark: LRE, RLE, LRO, RLO, PDF
 
-    # Can be on only if MASK_SPACE is also on.
     MASK_SEPARATOR  = 0x00002000   # Is test separator: BS, SS
-    # Can be on only if MASK_EXPLICIT is also on.
+
     MASK_OVERRIDE   = 0x00004000   # Is explicit override: LRO, RLO
 
     # The following must be to make types pairwise different, some of them can
@@ -51,27 +95,27 @@ class types:
 
     # Define values for FriBidiCharType
 
-    LTR     = (MASK_STRONG + MASK_LETTER) # Strong left to right
-    RTL     = (MASK_STRONG + MASK_LETTER + MASK_RTL) # Right to left characters
-    AL      = (MASK_STRONG + MASK_LETTER + MASK_RTL + MASK_ARABIC) # Arabic characters
-    LRE     = (MASK_STRONG + MASK_EXPLICIT) # Left-To-Right embedding
-    RLE     = (MASK_STRONG + MASK_EXPLICIT + MASK_RTL) # Right-To-Left embedding
-    LRO     = (MASK_STRONG + MASK_EXPLICIT + MASK_OVERRIDE) # Left-To-Right override
-    RLO     = (MASK_STRONG + MASK_EXPLICIT + MASK_RTL + MASK_OVERRIDE) # Right-To-Left override
-
-    PDF     = (MASK_WEAK + MASK_EXPLICIT) # Pop directional override
-    EN      = (MASK_WEAK + MASK_NUMBER) # European digit
-    AN      = (MASK_WEAK + MASK_NUMBER + MASK_ARABIC) # Arabic digit
-    ES      = (MASK_WEAK + MASK_NUMSEPTER + MASK_ES) # European number separator
-    ET      = (MASK_WEAK + MASK_NUMSEPTER + MASK_ET) # European number terminator
-    CS      = (MASK_WEAK + MASK_NUMSEPTER + MASK_CS) # Common Separator
-    NSM     = (MASK_WEAK + MASK_NSM) # Non spacing mark
-    BN      = (MASK_WEAK + MASK_SPACE + MASK_BN) # Boundary neutral
-
-    BS      = (MASK_NEUTRAL + MASK_SPACE + MASK_SEPARATOR + MASK_BS) # Block separator
-    SS      = (MASK_NEUTRAL + MASK_SPACE + MASK_SEPARATOR + MASK_SS) # Segment separator
-    WS      = (MASK_NEUTRAL + MASK_SPACE + MASK_WS) # Whitespace
-    ON      = (MASK_NEUTRAL) # Other Neutral
+    LTR     = (MASK_STRONG + MASK_LETTER)                               # Strong left to right
+    RTL     = (MASK_STRONG + MASK_LETTER + MASK_RTL)                    # Right to left characters
+    AL      = (MASK_STRONG + MASK_LETTER + MASK_RTL + MASK_ARABIC)      # Arabic characters
+    LRE     = (MASK_STRONG + MASK_EXPLICIT)                             # Left-To-Right embedding
+    RLE     = (MASK_STRONG + MASK_EXPLICIT + MASK_RTL)                  # Right-To-Left embedding
+    LRO     = (MASK_STRONG + MASK_EXPLICIT + MASK_OVERRIDE)             # Left-To-Right override
+    RLO     = (MASK_STRONG + MASK_EXPLICIT + MASK_RTL + MASK_OVERRIDE)  # Right-To-Left override
+
+    PDF     = (MASK_WEAK + MASK_EXPLICIT)                               # Pop directional override
+    EN      = (MASK_WEAK + MASK_NUMBER)                                 # European digit
+    AN      = (MASK_WEAK + MASK_NUMBER + MASK_ARABIC)                   # Arabic digit
+    ES      = (MASK_WEAK + MASK_NUMSEPTER + MASK_ES)                    # European number separator
+    ET      = (MASK_WEAK + MASK_NUMSEPTER + MASK_ET)                    # European number terminator
+    CS      = (MASK_WEAK + MASK_NUMSEPTER + MASK_CS)                    # Common Separator
+    NSM     = (MASK_WEAK + MASK_NSM)                                    # Non spacing mark
+    BN      = (MASK_WEAK + MASK_SPACE + MASK_BN)                        # Boundary neutral
+
+    BS      = (MASK_NEUTRAL + MASK_SPACE + MASK_SEPARATOR + MASK_BS)    # Block separator
+    SS      = (MASK_NEUTRAL + MASK_SPACE + MASK_SEPARATOR + MASK_SS)    # Segment separator
+    WS      = (MASK_NEUTRAL + MASK_SPACE + MASK_WS)                     # Whitespace
+    ON      = (MASK_NEUTRAL)                                            # Other Neutral
 
 
 # Memory allocation functions
@@ -131,7 +175,7 @@ def _pyunicode_to_utc32_p (a_pyunicode):
 
     #print 'a_len', a_len
 
-    utf8_pystr = a_pyunicode.encode('utf-8')
+    utf8_pystr = a_pyunicode.encode('UTF-8')
     utf8_len = len(utf8_pystr)
     utf8_p = _malloc_utf8_array_from_string(utf8_pystr)
 
@@ -139,7 +183,7 @@ def _pyunicode_to_utc32_p (a_pyunicode):
     #print 'utf8_len', utf8_len
 
     utc32_p = _malloc_utc32_array(a_len+1)
-    libfribidi.fribidi_utf8_to_unicode (utf8_p, utf8_len, utc32_p)
+    _libfribidi.fribidi_utf8_to_unicode (utf8_p, utf8_len, utc32_p)
 
     #print 'utc32_p [%04x, %04x, %04x, %04x]' % (utc32_p[0], utc32_p[1], utc32_p[2], utc32_p[3])
 
@@ -160,7 +204,7 @@ def _utc32_p_to_pyunicode (a_utc32_p):
     utf8_len = 6*utc32_len+1
     utf8_p = _malloc_utf8_array(utf8_len)
 
-    libfribidi.fribidi_unicode_to_utf8 (a_utc32_p, utc32_len, utf8_p)
+    _libfribidi.fribidi_unicode_to_utf8 (a_utc32_p, utc32_len, utf8_p)
 
     return utf8_p.value
 
@@ -168,13 +212,24 @@ def _utc32_p_to_pyunicode (a_utc32_p):
 
 # FriBidi API
 
-def log2vis (input_pyunicode, input_pbase_dir, with_l2v_position=False, with_v2l_position=False, with_embedding_level=False):
-    input_len = len(input_pyunicode)
+def log2vis (unicode_text, base_direction, with_l2v_position=False, with_v2l_position=False, with_embedding_level=False):
+    """
+    Returns the visual order of characters in the text.
+
+    If with_l2v_position, with_v2l_position, or with_embedding_level are true,
+    the return value will an array, including logical-to-visual position,
+    visual-to-logical positions, or embedding-level arrays respectively.
+    """
+
+    if unicode_text.__class__ != unicode:
+        unicode_text = unicode(unicode_text)
 
-    # memory allocations
+    input_len = len(unicode_text)
 
-    input_utc32_p = _pyunicode_to_utc32_p(input_pyunicode)
-    pbase_dir_p = ctypes.pointer(ctypes.c_int32(input_pbase_dir))
+    # Memory allocations
+
+    input_utc32_p = _pyunicode_to_utc32_p(unicode_text)
+    pbase_dir_p = ctypes.pointer(ctypes.c_int32(base_direction))
 
     output_utc32_p = _malloc_utc32_array(input_len+1)
 
@@ -183,9 +238,23 @@ def log2vis (input_pyunicode, input_pbase_dir, with_l2v_position=False, with_v2l
     emb_p = _malloc_int8_array(input_len) if with_embedding_level else None
 
 
-    # calling fribidi_log2vis
+    # Calling the api
+
+    """
+    FRIBIDI_API fribidi_boolean fribidi_log2vis (
+        /* input */
+        FriBidiChar     *str,
+        FriBidiStrIndex len,
+        FriBidiCharType *pbase_dirs,
+        /* output */
+        FriBidiChar     *visual_str,
+        FriBidiStrIndex *position_L_to_V_list,
+        FriBidiStrIndex *position_V_to_L_list,
+        FriBidiLevel    *embedding_level_list
+    );
+    """
 
-    successed = libfribidi.fribidi_log2vis(
+    successed = _libfribidi.fribidi_log2vis(
         # input
         input_utc32_p,
         input_len,
@@ -202,7 +271,7 @@ def log2vis (input_pyunicode, input_pbase_dir, with_l2v_position=False, with_v2l
         raise Exception('fribidi_log2vis failed')
 
 
-    # pythonizing the output
+    # Pythonizing the output
 
     output_u = _utc32_p_to_pyunicode(output_utc32_p)
 
@@ -225,20 +294,38 @@ def log2vis (input_pyunicode, input_pbase_dir, with_l2v_position=False, with_v2l
     return res
 
 
-def log2vis_get_embedding_levels (input_pyunicode, input_pbase_dir):
-    input_len = len(input_pyunicode)
+def log2vis_get_embedding_levels (unicode_text, base_direction):
+    """
+    Returns the embedding-level of characters in the text.
+    """
+
+    if unicode_text.__class__ != unicode:
+        unicode_text = unicode(unicode_text)
 
-    # memory allocations
+    input_len = len(unicode_text)
 
-    input_utc32_p = _pyunicode_to_utc32_p(input_pyunicode)
-    pbase_dir_p = ctypes.pointer(ctypes.c_int32(input_pbase_dir))
+    # Memory allocations
+
+    input_utc32_p = _pyunicode_to_utc32_p(unicode_text)
+    pbase_dir_p = ctypes.pointer(ctypes.c_int32(base_direction))
 
     emb_p = _malloc_int8_array(input_len)
 
 
-    # calling fribidi_log2vis
+    # Calling the api
+
+    """
+    FRIBIDI_API fribidi_boolean fribidi_log2vis_get_embedding_levels (
+        /* input */
+        FriBidiChar     *str,
+        FriBidiStrIndex len,
+        FriBidiCharType *pbase_dir,
+        /* output */
+        FriBidiLevel    *embedding_level_list
+    );
+    """
 
-    successed = libfribidi.fribidi_log2vis_get_embedding_levels(
+    successed = _libfribidi.fribidi_log2vis_get_embedding_levels(
         # input
         input_utc32_p,
         input_len,
@@ -249,10 +336,61 @@ def log2vis_get_embedding_levels (input_pyunicode, input_pbase_dir):
     )
 
     if not successed:
-        raise Exception('fribidi_log2vis failed')
+        raise Exception('fribidi_log2vis_get_embedding_levels failed')
+
+
+    # Pythonizing the output
+
+    res = [i for i in emb_p]
+
+    return res
+
+
+def remove_bidi_marks (unicode_text, base_direction):
+    """
+    TODO
+    """
+
+    if unicode_text.__class__ != unicode:
+        unicode_text = unicode(unicode_text)
+
+    input_len = len(unicode_text)
+
+    # Memory allocations
+
+    input_utc32_p = _pyunicode_to_utc32_p(unicode_text)
+    pbase_dir_p = ctypes.pointer(ctypes.c_int32(base_direction))
+
+    emb_p = _malloc_int8_array(input_len)
+
 
+    # Calling the api
 
-    # pythonizing the output
+    """
+    FRIBIDI_API FriBidiStrIndex fribidi_remove_bidi_marks (
+        FriBidiChar     *str,
+        FriBidiStrIndex length,
+        FriBidiStrIndex *position_to_this_list,
+        FriBidiStrIndex *position_from_this_list,
+        FriBidiLevel    *embedding_level_list
+    );
+    """
+
+    successed = _libfribidi.fribidi_remove_bidi_marks(
+        # input
+        input_utc32_p,
+        input_len,
+        pbase_dir_p,
+
+        # output
+        emb_p
+    )
+
+    if not successed:
+        raise Exception('fribidi_remove_bidi_marks failed')
+
+
+    # Pythonizing the output
 
     res = [i for i in emb_p]
 
@@ -261,7 +399,10 @@ def log2vis_get_embedding_levels (input_pyunicode, input_pbase_dir):
 
 # Main
 
-VERSION = '0.05'
+def _main ():
+    import sys
+    text = ' '.join(sys.argv[1:]).decode('UTF-8')
+    print log2vis(text, types.LTR)
 
 
 def _test ():
@@ -274,12 +415,13 @@ def _test ():
     print log2vis(u"aسلام", types.LTR, True, True, True)
     print log2vis(u"aسلام", types.RTL, True, True, True)
 
+    print log2vis_get_embedding_levels("abc", types.LTR)
     print log2vis_get_embedding_levels(u"aسلام", types.LTR)
     print log2vis_get_embedding_levels(u"aسلام", types.RTL)
 
 
-
 if __name__=='__main__':
-    _test()
+    _main()
 
+    _test()
author	Behnam Esfahbod <behnam@zwnj.org>	2008-12-02 02:44:01 +0330
committer	Behnam ZWNJ Esfahbod <behnam@zwnj.org>	2008-12-02 02:44:01 +0330
commit	9191c7226a51b42eda3bacfbe276a193bde7bd69 (patch)
tree	87b1bd2d52b2c6a143e812a2e15b56f51c8fc4a5
parent	ac7895e848e4c3f4b5c03d6a8e61338c07dd41de (diff)