[fribidi.py] Add remove_bidi_marks()

Set version to 0.08.
author: Behnam Esfahbod <behnam@zwnj.org> 2008-12-02 02:45:30 +0330
committer: Behnam ZWNJ Esfahbod <behnam@zwnj.org> 2008-12-02 02:45:30 +0330
commit: 4b62b8313ce033f1a9480216b76ce7b362108a19 (patch)
tree: ba81b52bc56a2408ba322c2fdc5adf87ca49e023
parent: 9860c711484812bea3850b5b0073b26f268a0cbe (diff)
1 files changed, 64 insertions, 27 deletions
diff --git a/fribidi.py b/fribidi.py
index e3e40f1..2486454 100755
--- a/fribidi.py
+++ b/fribidi.py
@@ -4,7 +4,7 @@
 import ctypes
 
 
-VERSION = '0.06'
+VERSION = '0.08'
 
 _libfribidi = ctypes.CDLL("libfribidi.so")
 
@@ -216,9 +216,10 @@ def log2vis (unicode_text, base_direction, with_l2v_position=False, with_v2l_pos
     """
     Returns the visual order of characters in the text.
 
-    If with_l2v_position, with_v2l_position, or with_embedding_level are true,
-    the return value will an array, including logical-to-visual position,
-    visual-to-logical positions, or embedding-level arrays respectively.
+    If with_l2v_position, with_v2l_position, or with_embedding_level are
+    True, the return value will be a tuple including logical-to-visual
+    position, visual-to-logical positions, or embedding-level lists
+    respectively.
     """
 
     if unicode_text.__class__ != unicode:
@@ -233,9 +234,9 @@ def log2vis (unicode_text, base_direction, with_l2v_position=False, with_v2l_pos
 
     output_utc32_p = _malloc_utc32_array(input_len+1)
 
-    l2v_p = _malloc_int_array(input_len) if with_l2v_position else None
-    v2l_p = _malloc_int_array(input_len) if with_v2l_position else None
-    emb_p = _malloc_int8_array(input_len) if with_embedding_level else None
+    l2v_p = _malloc_int_array(input_len)    if with_l2v_position    else None
+    v2l_p = _malloc_int_array(input_len)    if with_v2l_position    else None
+    emb_p = _malloc_int8_array(input_len)   if with_embedding_level else None
 
 
     # Calling the api
@@ -277,15 +278,9 @@ def log2vis (unicode_text, base_direction, with_l2v_position=False, with_v2l_pos
 
     if with_l2v_position or with_v2l_position or with_embedding_level:
         res = (output_u, )
-
-        if with_l2v_position:
-            res += ([i for i in l2v_p], )
-
-        if with_v2l_position:
-            res += ([i for i in v2l_p], )
-
-        if with_embedding_level:
-            res += ([i for i in emb_p], )
+        if with_l2v_position:       res += ([i for i in l2v_p], )
+        if with_v2l_position:       res += ([i for i in v2l_p], )
+        if with_embedding_level:    res += ([i for i in emb_p], )
 
     else:
         res = output_u
@@ -345,9 +340,14 @@ def log2vis_get_embedding_levels (unicode_text, base_direction):
     return res
 
 
-def remove_bidi_marks (unicode_text, base_direction):
+def remove_bidi_marks (unicode_text, with_position_to=False, with_position_from=False, with_embedding_level=False):
     """
-    TODO
+    Returns the text with all Bidirectional Marks removed.
+
+    If with_position_to, with_position_from, or with_embedding_level are
+    True, the return value will be a tuple including positions from
+    input text to output text, positions from output text to input text,
+    or embedding-level lists respectively.
     """
 
     if unicode_text.__class__ != unicode:
@@ -355,43 +355,60 @@ def remove_bidi_marks (unicode_text, base_direction):
 
     input_len = len(unicode_text)
 
+
     # Memory allocations
 
     input_utc32_p = _pyunicode_to_utc32_p(unicode_text)
-    pbase_dir_p = ctypes.pointer(ctypes.c_int32(base_direction))
 
-    emb_p = _malloc_int8_array(input_len)
+    pto_p = _malloc_int_array(input_len*3)    if with_position_to     else None
+    pfr_p = _malloc_int_array(input_len)    if with_position_from   else None
+    emb_p = _malloc_int8_array(input_len)   if with_embedding_level else None
+    #print 'pto_p', pto_p
 
 
     # Calling the api
 
     """
     FRIBIDI_API FriBidiStrIndex fribidi_remove_bidi_marks (
+        /* input & output */
         FriBidiChar     *str,
+
+        /* input */
         FriBidiStrIndex length,
+
+        /* output */
         FriBidiStrIndex *position_to_this_list,
         FriBidiStrIndex *position_from_this_list,
         FriBidiLevel    *embedding_level_list
     );
     """
 
-    successed = _libfribidi.fribidi_remove_bidi_marks(
-        # input
+    new_length = _libfribidi.fribidi_remove_bidi_marks(
+        # input & output
         input_utc32_p,
+
+        # input
         input_len,
-        pbase_dir_p,
 
         # output
+        pto_p,
+        pfr_p,
         emb_p
     )
 
-    if not successed:
-        raise Exception('fribidi_remove_bidi_marks failed')
-
 
     # Pythonizing the output
 
-    res = [i for i in emb_p]
+    output_u = _utc32_p_to_pyunicode(input_utc32_p)
+
+    if with_position_to or with_position_from or with_embedding_level:
+        res = (output_u, )
+        if with_position_to:        res += ([i for i in pto_p], )
+        if with_position_from:      res += ([i for i in pfr_p], )
+        if with_embedding_level:    res += ([i for i in emb_p], )
+
+    else:
+        res = output_u
 
     return res
 
@@ -405,6 +422,10 @@ def _main ():
 
 
 def _test ():
+
+    print
+    print 'TEST log2vis()'
+
     print log2vis(u"سلام", types.LTR)
     print log2vis(u"سلام", types.LTR, True)
     print log2vis(u"سلام", types.LTR, False, True)
@@ -419,10 +440,26 @@ def _test ():
     print log2vis(u"aسلام", types.LTR, True, True, True)
     print log2vis(u"aسلام", types.RTL, True, True, True)
 
+    print
+    print 'TEST log2vis_get_embedding_levels()'
+
     print log2vis_get_embedding_levels("abc", types.LTR)
     print log2vis_get_embedding_levels(u"aسلام", types.LTR)
     print log2vis_get_embedding_levels(u"aسلام", types.RTL)
 
+    print
+    print 'TEST remove_bidi_marks()'
+
+    print remove_bidi_marks(u"سلامa")
+    print remove_bidi_marks(u"سلامa", False, True)
+    print remove_bidi_marks(u"سلامa", False, False, True)
+    print remove_bidi_marks(u"سلامa", True)
+
+    print remove_bidi_marks(u"سل‌ام")
+    print remove_bidi_marks(u"سل‌ام", True)
+    print remove_bidi_marks(u"سل‌ام", False, True)
+    print remove_bidi_marks(u"سل‌ام", False, False, True)
+
 
 if __name__=='__main__':
     _main()
author	Behnam Esfahbod <behnam@zwnj.org>	2008-12-02 02:45:30 +0330
committer	Behnam ZWNJ Esfahbod <behnam@zwnj.org>	2008-12-02 02:45:30 +0330
commit	4b62b8313ce033f1a9480216b76ce7b362108a19 (patch)
tree	ba81b52bc56a2408ba322c2fdc5adf87ca49e023
parent	9860c711484812bea3850b5b0073b26f268a0cbe (diff)