summaryrefslogtreecommitdiff
path: root/src/gen-arabic-table.py
diff options
context:
space:
mode:
authorBehdad Esfahbod <behdad@behdad.org>2012-04-10 16:25:08 -0400
committerBehdad Esfahbod <behdad@behdad.org>2012-04-10 16:25:08 -0400
commitae4a2b9365051c23c9a299cf76f3ab7e661999b1 (patch)
tree2e43ddccc0e619f64bb879fa39f1b5f14eaa8f1b /src/gen-arabic-table.py
parent3b26f96ebe859570d14c6902afc23462bca40712 (diff)
Generate fallback Arabic shaping table
Not hooked up yet.
Diffstat (limited to 'src/gen-arabic-table.py')
-rwxr-xr-xsrc/gen-arabic-table.py220
1 files changed, 158 insertions, 62 deletions
diff --git a/src/gen-arabic-table.py b/src/gen-arabic-table.py
index 32bf66cf..6549cb40 100755
--- a/src/gen-arabic-table.py
+++ b/src/gen-arabic-table.py
@@ -1,89 +1,185 @@
#!/usr/bin/python
import sys
+import os.path
-if len (sys.argv) < 2:
- print >>sys.stderr, "usage: ./gen-arabic-table.py ArabicShaping.txt"
+if len (sys.argv) != 3:
+ print >>sys.stderr, "usage: ./gen-arabic-table.py ArabicShaping.txt UnicodeData.txt"
sys.exit (1)
-f = file (sys.argv[1])
+files = [file (x) for x in sys.argv[1:]]
-header = f.readline (), f.readline ()
-while f.readline ().find ('##################') < 0:
+headers = [[files[0].readline (), files[0].readline ()]]
+headers.append (["UnicodeData.txt does not have a header."])
+while files[0].readline ().find ('##################') < 0:
pass
+def print_joining_table(f):
+
+ print
+ print "static const uint8_t joining_table[] ="
+ print "{"
+
+ min_u = 0x110000
+ max_u = 0
+ num = 0
+ last = -1
+ block = ''
+ for line in f:
+
+ if line[0] == '#':
+ if line.find (" characters"):
+ block = line[2:].strip ()
+ continue
+
+ fields = [x.strip () for x in line.split (';')]
+ if len (fields) == 1:
+ continue
+
+ u = int (fields[0], 16)
+ if u == 0x200C or u == 0x200D:
+ continue
+ if u < last:
+ raise Exception ("Input data character not sorted", u)
+ min_u = min (min_u, u)
+ max_u = max (max_u, u)
+ num += 1
+
+ if block:
+ print "\n /* %s */\n" % block
+ block = ''
+
+ if last != -1:
+ last += 1
+ while last < u:
+ print " JOINING_TYPE_X, /* %04X */" % last
+ last += 1
+ else:
+ last = u
+
+ if fields[3] in ["ALAPH", "DALATH RISH"]:
+ value = "JOINING_GROUP_" + fields[3].replace(' ', '_')
+ else:
+ value = "JOINING_TYPE_" + fields[2]
+ print " %s, /* %s */" % (value, '; '.join(fields))
+
+ print
+ print "};"
+ print
+ print "#define JOINING_TABLE_FIRST 0x%04X" % min_u
+ print "#define JOINING_TABLE_LAST 0x%04X" % max_u
+ print
+
+ occupancy = num * 100 / (max_u - min_u + 1)
+ # Maintain at least 40% occupancy in the table */
+ if occupancy < 40:
+ raise Exception ("Table too sparse, please investigate: ", occupancy)
+
+def print_shaping_table(f):
+
+ shapes = {}
+ ligatures = {}
+ names = {}
+ for line in f:
+
+ fields = [x.strip () for x in line.split (';')]
+ if fields[5][0:1] != '<':
+ continue
+
+ items = fields[5].split (' ')
+ shape, items = items[0][1:-1], tuple (int (x, 16) for x in items[1:])
+
+ if not shape in ['initial', 'medial', 'isolated', 'final']:
+ continue
+
+ c = int (fields[0], 16)
+ if len (items) != 1:
+ # We only care about lam-alef ligatures
+ if len (items) != 2 or items[0] != 0x0644 or items[1] not in [0x0622, 0x0623, 0x0625, 0x0627]:
+ continue
+
+ # Save ligature
+ names[c] = fields[1]
+ if items not in ligatures:
+ ligatures[items] = {}
+ ligatures[items][shape] = c
+ pass
+ else:
+ # Save shape
+ if items[0] not in names:
+ names[items[0]] = fields[1]
+ else:
+ names[items[0]] = os.path.commonprefix ([names[items[0]], fields[1]]).strip ()
+ if items[0] not in shapes:
+ shapes[items[0]] = {}
+ shapes[items[0]][shape] = c
+
+ print
+ print "static const uint16_t shaping_table[][4] ="
+ print "{"
+
+ keys = shapes.keys ()
+ min_u, max_u = min (keys), max (keys)
+ for u in range (min_u, max_u + 1):
+ s = [shapes[u][shape] if u in shapes and shape in shapes[u] else u
+ for shape in ['initial', 'medial', 'final', 'isolated']]
+ value = ', '.join ("0x%04X" % c for c in s)
+ print " {%s}, /* U+%04X %s */" % (value, u, names[u] if u in names else "")
+
+ print "};"
+ print
+ print "#define SHAPING_TABLE_FIRST 0x%04X" % min_u
+ print "#define SHAPING_TABLE_LAST 0x%04X" % max_u
+ print
+
+ print
+ print "static const uint16_t ligature_table[][3] ="
+ print "{"
+
+ ligas = []
+ for pair in ligatures.keys ():
+ for shape in ligatures[pair]:
+ c = ligatures[pair][shape]
+ if shape == 'isolated':
+ liga = (shapes[pair[0]]['initial'], shapes[pair[1]]['final'])
+ elif shape == 'final':
+ liga = (shapes[pair[0]]['medial'], shapes[pair[1]]['final'])
+ else:
+ raise Exception ("Unexpected shape", shape)
+ ligas.append (liga + (c,))
+ ligas.sort ()
+ for liga in ligas:
+ value = ', '.join ("0x%04X" % c for c in liga)
+ print " {%s}, /* U+%04X %s */" % (value, liga[2], names[liga[2]])
+
+ print "};"
+ print
+
+
+
print "/* == Start of generated table == */"
print "/*"
print " * The following table is generated by running:"
print " *"
-print " * ./gen-arabic-table.py ArabicShaping.txt"
+print " * ./gen-arabic-table.py ArabicShaping.txt UnicodeData.txt"
print " *"
print " * on files with these headers:"
print " *"
-for line in header:
- print " * %s" % (line.strip())
+for h in headers:
+ for l in h:
+ print " * %s" % (l.strip())
print " */"
print
print "#ifndef HB_OT_SHAPE_COMPLEX_ARABIC_TABLE_HH"
print "#define HB_OT_SHAPE_COMPLEX_ARABIC_TABLE_HH"
print
-print "static const uint8_t joining_table[] ="
-print "{"
-
-min_u = 0x110000
-max_u = 0
-num = 0
-last = -1
-block = ''
-for line in f:
-
- if line[0] == '#':
- if line.find (" characters"):
- block = line[2:].strip ()
- continue
-
- fields = [x.strip () for x in line.split (';')]
- if len (fields) == 1:
- continue
-
- u = int (fields[0], 16)
- if u == 0x200C or u == 0x200D:
- continue
- if u < last:
- raise Exception ("Input data character not sorted", u)
- min_u = min (min_u, u)
- max_u = max (max_u, u)
- num += 1
-
- if block:
- print "\n /* %s */\n" % block
- block = ''
-
- if last != -1:
- last += 1
- while last < u:
- print " JOINING_TYPE_X, /* %04X */" % last
- last += 1
- else:
- last = u
-
- if fields[3] in ["ALAPH", "DALATH RISH"]:
- value = "JOINING_GROUP_" + fields[3].replace(' ', '_')
- else:
- value = "JOINING_TYPE_" + fields[2]
- print " %s, /* %s */" % (value, '; '.join(fields))
-print
-print "};"
-print
-print "#define JOINING_TABLE_FIRST 0x%04X" % min_u
-print "#define JOINING_TABLE_LAST 0x%04X" % max_u
+
+print_joining_table (files[0])
+print_shaping_table (files[1])
+
print
print "#endif /* HB_OT_SHAPE_COMPLEX_ARABIC_TABLE_HH */"
print
print "/* == End of generated table == */"
-occupancy = num * 100 / (max_u - min_u + 1)
-# Maintain at least 40% occupancy in the table */
-if occupancy < 40:
- raise Exception ("Table too sparse, please investigate: ", occupancy)