summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorBehnam Esfahbod <behnam@zwnj.org>2008-12-02 02:44:01 +0330
committerBehnam ZWNJ Esfahbod <behnam@zwnj.org>2008-12-02 02:44:01 +0330
commit9191c7226a51b42eda3bacfbe276a193bde7bd69 (patch)
tree87b1bd2d52b2c6a143e812a2e15b56f51c8fc4a5
parentac7895e848e4c3f4b5c03d6a8e61338c07dd41de (diff)
Basic documentation
-rwxr-xr-xfribidi.py242
1 files changed, 192 insertions, 50 deletions
diff --git a/fribidi.py b/fribidi.py
index f02a76e..eef42f7 100755
--- a/fribidi.py
+++ b/fribidi.py
@@ -2,37 +2,81 @@
# coding=UTF-8
import ctypes
-import sys
+VERSION = '0.06'
-libfribidi = ctypes.CDLL("libfribidi.so")
+_libfribidi = ctypes.CDLL("libfribidi.so")
# Character Types
class types:
+ """
+ Defines character type masks and types.
+
+ Types:
+
+ LTR Strong left to right
+ RTL Right to left characters
+ AL Arabic characters
+ LRE Left-To-Right embedding
+ RLE Right-To-Left embedding
+ LRO Left-To-Right override
+ RLO Right-To-Left override
+
+ PDF Pop directional override
+ EN European digit
+ AN Arabic digit
+ ES European number separator
+ ET European number terminator
+ CS Common Separator
+ NSM Non spacing mark
+ BN Boundary neutral
+
+ BS Block separator
+ SS Segment separator
+ WS Whitespace
+ ON Other Neutral
+
+ Maskes:
+
+ MASK_RTL Is right to left
+ MASK_ARABIC Is arabic
+
+ Each character can be only one of the three following:
+ MASK_STRONG Is strong
+ MASK_WEAK Is weak
+ MASK_NEUTRAL Is neutral
+
+ Each charcter can be only one of the five following:
+ MASK_LETTER Is letter: L, R, AL
+ MASK_NUMBER Is number: EN, AN
+ MASK_NUMSEPTER Is number separator or terminator: ES, ET, CS
+ MASK_SPACE Is space: BN, BS, SS, WS
+ MASK_EXPLICIT Is expilict mark: LRE, RLE, LRO, RLO, PDF
+
+ MASK_SEPARATOR Is test separator: BS, SS; and can be on only if MASK_SPACE is also on.
+ MASK_OVERRIDE Is explicit override: LRO, RLO; and can be on only if MASK_EXPLICIT is also on.
+ """
# Define Masks
MASK_RTL = 0x00000001 # Is right to left
MASK_ARABIC = 0x00000002 # Is arabic
- # Each char can be only one of the three following.
MASK_STRONG = 0x00000010 # Is strong
MASK_WEAK = 0x00000020 # Is weak
MASK_NEUTRAL = 0x00000040 # Is neutral
- # Each char can be only one of the five following.
MASK_LETTER = 0x00000100 # Is letter: L, R, AL
MASK_NUMBER = 0x00000200 # Is number: EN, AN
MASK_NUMSEPTER = 0x00000400 # Is number separator or terminator: ES, ET, CS
MASK_SPACE = 0x00000800 # Is space: BN, BS, SS, WS
MASK_EXPLICIT = 0x00001000 # Is expilict mark: LRE, RLE, LRO, RLO, PDF
- # Can be on only if MASK_SPACE is also on.
MASK_SEPARATOR = 0x00002000 # Is test separator: BS, SS
- # Can be on only if MASK_EXPLICIT is also on.
+
MASK_OVERRIDE = 0x00004000 # Is explicit override: LRO, RLO
# The following must be to make types pairwise different, some of them can
@@ -51,27 +95,27 @@ class types:
# Define values for FriBidiCharType
- LTR = (MASK_STRONG + MASK_LETTER) # Strong left to right
- RTL = (MASK_STRONG + MASK_LETTER + MASK_RTL) # Right to left characters
- AL = (MASK_STRONG + MASK_LETTER + MASK_RTL + MASK_ARABIC) # Arabic characters
- LRE = (MASK_STRONG + MASK_EXPLICIT) # Left-To-Right embedding
- RLE = (MASK_STRONG + MASK_EXPLICIT + MASK_RTL) # Right-To-Left embedding
- LRO = (MASK_STRONG + MASK_EXPLICIT + MASK_OVERRIDE) # Left-To-Right override
- RLO = (MASK_STRONG + MASK_EXPLICIT + MASK_RTL + MASK_OVERRIDE) # Right-To-Left override
-
- PDF = (MASK_WEAK + MASK_EXPLICIT) # Pop directional override
- EN = (MASK_WEAK + MASK_NUMBER) # European digit
- AN = (MASK_WEAK + MASK_NUMBER + MASK_ARABIC) # Arabic digit
- ES = (MASK_WEAK + MASK_NUMSEPTER + MASK_ES) # European number separator
- ET = (MASK_WEAK + MASK_NUMSEPTER + MASK_ET) # European number terminator
- CS = (MASK_WEAK + MASK_NUMSEPTER + MASK_CS) # Common Separator
- NSM = (MASK_WEAK + MASK_NSM) # Non spacing mark
- BN = (MASK_WEAK + MASK_SPACE + MASK_BN) # Boundary neutral
-
- BS = (MASK_NEUTRAL + MASK_SPACE + MASK_SEPARATOR + MASK_BS) # Block separator
- SS = (MASK_NEUTRAL + MASK_SPACE + MASK_SEPARATOR + MASK_SS) # Segment separator
- WS = (MASK_NEUTRAL + MASK_SPACE + MASK_WS) # Whitespace
- ON = (MASK_NEUTRAL) # Other Neutral
+ LTR = (MASK_STRONG + MASK_LETTER) # Strong left to right
+ RTL = (MASK_STRONG + MASK_LETTER + MASK_RTL) # Right to left characters
+ AL = (MASK_STRONG + MASK_LETTER + MASK_RTL + MASK_ARABIC) # Arabic characters
+ LRE = (MASK_STRONG + MASK_EXPLICIT) # Left-To-Right embedding
+ RLE = (MASK_STRONG + MASK_EXPLICIT + MASK_RTL) # Right-To-Left embedding
+ LRO = (MASK_STRONG + MASK_EXPLICIT + MASK_OVERRIDE) # Left-To-Right override
+ RLO = (MASK_STRONG + MASK_EXPLICIT + MASK_RTL + MASK_OVERRIDE) # Right-To-Left override
+
+ PDF = (MASK_WEAK + MASK_EXPLICIT) # Pop directional override
+ EN = (MASK_WEAK + MASK_NUMBER) # European digit
+ AN = (MASK_WEAK + MASK_NUMBER + MASK_ARABIC) # Arabic digit
+ ES = (MASK_WEAK + MASK_NUMSEPTER + MASK_ES) # European number separator
+ ET = (MASK_WEAK + MASK_NUMSEPTER + MASK_ET) # European number terminator
+ CS = (MASK_WEAK + MASK_NUMSEPTER + MASK_CS) # Common Separator
+ NSM = (MASK_WEAK + MASK_NSM) # Non spacing mark
+ BN = (MASK_WEAK + MASK_SPACE + MASK_BN) # Boundary neutral
+
+ BS = (MASK_NEUTRAL + MASK_SPACE + MASK_SEPARATOR + MASK_BS) # Block separator
+ SS = (MASK_NEUTRAL + MASK_SPACE + MASK_SEPARATOR + MASK_SS) # Segment separator
+ WS = (MASK_NEUTRAL + MASK_SPACE + MASK_WS) # Whitespace
+ ON = (MASK_NEUTRAL) # Other Neutral
# Memory allocation functions
@@ -131,7 +175,7 @@ def _pyunicode_to_utc32_p (a_pyunicode):
#print 'a_len', a_len
- utf8_pystr = a_pyunicode.encode('utf-8')
+ utf8_pystr = a_pyunicode.encode('UTF-8')
utf8_len = len(utf8_pystr)
utf8_p = _malloc_utf8_array_from_string(utf8_pystr)
@@ -139,7 +183,7 @@ def _pyunicode_to_utc32_p (a_pyunicode):
#print 'utf8_len', utf8_len
utc32_p = _malloc_utc32_array(a_len+1)
- libfribidi.fribidi_utf8_to_unicode (utf8_p, utf8_len, utc32_p)
+ _libfribidi.fribidi_utf8_to_unicode (utf8_p, utf8_len, utc32_p)
#print 'utc32_p [%04x, %04x, %04x, %04x]' % (utc32_p[0], utc32_p[1], utc32_p[2], utc32_p[3])
@@ -160,7 +204,7 @@ def _utc32_p_to_pyunicode (a_utc32_p):
utf8_len = 6*utc32_len+1
utf8_p = _malloc_utf8_array(utf8_len)
- libfribidi.fribidi_unicode_to_utf8 (a_utc32_p, utc32_len, utf8_p)
+ _libfribidi.fribidi_unicode_to_utf8 (a_utc32_p, utc32_len, utf8_p)
return utf8_p.value
@@ -168,13 +212,24 @@ def _utc32_p_to_pyunicode (a_utc32_p):
# FriBidi API
-def log2vis (input_pyunicode, input_pbase_dir, with_l2v_position=False, with_v2l_position=False, with_embedding_level=False):
- input_len = len(input_pyunicode)
+def log2vis (unicode_text, base_direction, with_l2v_position=False, with_v2l_position=False, with_embedding_level=False):
+ """
+ Returns the visual order of characters in the text.
+
+ If with_l2v_position, with_v2l_position, or with_embedding_level are true,
+ the return value will an array, including logical-to-visual position,
+ visual-to-logical positions, or embedding-level arrays respectively.
+ """
+
+ if unicode_text.__class__ != unicode:
+ unicode_text = unicode(unicode_text)
- # memory allocations
+ input_len = len(unicode_text)
- input_utc32_p = _pyunicode_to_utc32_p(input_pyunicode)
- pbase_dir_p = ctypes.pointer(ctypes.c_int32(input_pbase_dir))
+ # Memory allocations
+
+ input_utc32_p = _pyunicode_to_utc32_p(unicode_text)
+ pbase_dir_p = ctypes.pointer(ctypes.c_int32(base_direction))
output_utc32_p = _malloc_utc32_array(input_len+1)
@@ -183,9 +238,23 @@ def log2vis (input_pyunicode, input_pbase_dir, with_l2v_position=False, with_v2l
emb_p = _malloc_int8_array(input_len) if with_embedding_level else None
- # calling fribidi_log2vis
+ # Calling the api
+
+ """
+ FRIBIDI_API fribidi_boolean fribidi_log2vis (
+ /* input */
+ FriBidiChar *str,
+ FriBidiStrIndex len,
+ FriBidiCharType *pbase_dirs,
+ /* output */
+ FriBidiChar *visual_str,
+ FriBidiStrIndex *position_L_to_V_list,
+ FriBidiStrIndex *position_V_to_L_list,
+ FriBidiLevel *embedding_level_list
+ );
+ """
- successed = libfribidi.fribidi_log2vis(
+ successed = _libfribidi.fribidi_log2vis(
# input
input_utc32_p,
input_len,
@@ -202,7 +271,7 @@ def log2vis (input_pyunicode, input_pbase_dir, with_l2v_position=False, with_v2l
raise Exception('fribidi_log2vis failed')
- # pythonizing the output
+ # Pythonizing the output
output_u = _utc32_p_to_pyunicode(output_utc32_p)
@@ -225,20 +294,38 @@ def log2vis (input_pyunicode, input_pbase_dir, with_l2v_position=False, with_v2l
return res
-def log2vis_get_embedding_levels (input_pyunicode, input_pbase_dir):
- input_len = len(input_pyunicode)
+def log2vis_get_embedding_levels (unicode_text, base_direction):
+ """
+ Returns the embedding-level of characters in the text.
+ """
+
+ if unicode_text.__class__ != unicode:
+ unicode_text = unicode(unicode_text)
- # memory allocations
+ input_len = len(unicode_text)
- input_utc32_p = _pyunicode_to_utc32_p(input_pyunicode)
- pbase_dir_p = ctypes.pointer(ctypes.c_int32(input_pbase_dir))
+ # Memory allocations
+
+ input_utc32_p = _pyunicode_to_utc32_p(unicode_text)
+ pbase_dir_p = ctypes.pointer(ctypes.c_int32(base_direction))
emb_p = _malloc_int8_array(input_len)
- # calling fribidi_log2vis
+ # Calling the api
+
+ """
+ FRIBIDI_API fribidi_boolean fribidi_log2vis_get_embedding_levels (
+ /* input */
+ FriBidiChar *str,
+ FriBidiStrIndex len,
+ FriBidiCharType *pbase_dir,
+ /* output */
+ FriBidiLevel *embedding_level_list
+ );
+ """
- successed = libfribidi.fribidi_log2vis_get_embedding_levels(
+ successed = _libfribidi.fribidi_log2vis_get_embedding_levels(
# input
input_utc32_p,
input_len,
@@ -249,10 +336,61 @@ def log2vis_get_embedding_levels (input_pyunicode, input_pbase_dir):
)
if not successed:
- raise Exception('fribidi_log2vis failed')
+ raise Exception('fribidi_log2vis_get_embedding_levels failed')
+
+
+ # Pythonizing the output
+
+ res = [i for i in emb_p]
+
+ return res
+
+
+def remove_bidi_marks (unicode_text, base_direction):
+ """
+ TODO
+ """
+
+ if unicode_text.__class__ != unicode:
+ unicode_text = unicode(unicode_text)
+
+ input_len = len(unicode_text)
+
+ # Memory allocations
+
+ input_utc32_p = _pyunicode_to_utc32_p(unicode_text)
+ pbase_dir_p = ctypes.pointer(ctypes.c_int32(base_direction))
+
+ emb_p = _malloc_int8_array(input_len)
+
+ # Calling the api
- # pythonizing the output
+ """
+ FRIBIDI_API FriBidiStrIndex fribidi_remove_bidi_marks (
+ FriBidiChar *str,
+ FriBidiStrIndex length,
+ FriBidiStrIndex *position_to_this_list,
+ FriBidiStrIndex *position_from_this_list,
+ FriBidiLevel *embedding_level_list
+ );
+ """
+
+ successed = _libfribidi.fribidi_remove_bidi_marks(
+ # input
+ input_utc32_p,
+ input_len,
+ pbase_dir_p,
+
+ # output
+ emb_p
+ )
+
+ if not successed:
+ raise Exception('fribidi_remove_bidi_marks failed')
+
+
+ # Pythonizing the output
res = [i for i in emb_p]
@@ -261,7 +399,10 @@ def log2vis_get_embedding_levels (input_pyunicode, input_pbase_dir):
# Main
-VERSION = '0.05'
+def _main ():
+ import sys
+ text = ' '.join(sys.argv[1:]).decode('UTF-8')
+ print log2vis(text, types.LTR)
def _test ():
@@ -274,12 +415,13 @@ def _test ():
print log2vis(u"aسلام", types.LTR, True, True, True)
print log2vis(u"aسلام", types.RTL, True, True, True)
+ print log2vis_get_embedding_levels("abc", types.LTR)
print log2vis_get_embedding_levels(u"aسلام", types.LTR)
print log2vis_get_embedding_levels(u"aسلام", types.RTL)
-
if __name__=='__main__':
- _test()
+ _main()
+ _test()