summaryrefslogtreecommitdiff
path: root/fc-lang
diff options
context:
space:
mode:
authorTim-Philipp Müller <tim@centricular.com>2020-07-31 07:26:11 +0000
committerAkira TAGOH <akira@tagoh.org>2020-07-31 07:26:11 +0000
commit57a224f51d6c019e4ce5d75efb22f34a8330423e (patch)
treee3d7acfe511c07650db57c485c6dcf134e2c78a5 /fc-lang
parent03aa12c75e117acb0d160212536f6f832e0dc8d9 (diff)
Add Meson build system
See https://mesonbuild.com
Diffstat (limited to 'fc-lang')
-rwxr-xr-xfc-lang/fc-lang.py387
-rw-r--r--fc-lang/meson.build256
2 files changed, 643 insertions, 0 deletions
diff --git a/fc-lang/fc-lang.py b/fc-lang/fc-lang.py
new file mode 100755
index 0000000..cc1dea8
--- /dev/null
+++ b/fc-lang/fc-lang.py
@@ -0,0 +1,387 @@
+#!/usr/bin/env python3
+#
+# fontconfig/fc-lang/fc-lang.py
+#
+# Copyright © 2001-2002 Keith Packard
+# Copyright © 2019 Tim-Philipp Müller
+#
+# Permission to use, copy, modify, distribute, and sell this software and its
+# documentation for any purpose is hereby granted without fee, provided that
+# the above copyright notice appear in all copies and that both that
+# copyright notice and this permission notice appear in supporting
+# documentation, and that the name of the author(s) not be used in
+# advertising or publicity pertaining to distribution of the software without
+# specific, written prior permission. The authors make no
+# representations about the suitability of this software for any purpose. It
+# is provided "as is" without express or implied warranty.
+#
+# THE AUTHOR(S) DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE,
+# INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO
+# EVENT SHALL THE AUTHOR(S) BE LIABLE FOR ANY SPECIAL, INDIRECT OR
+# CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE,
+# DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER
+# TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
+# PERFORMANCE OF THIS SOFTWARE.
+
+# fc-lang
+#
+# Read a set of language orthographies and build C declarations for
+# charsets which can then be used to identify which languages are
+# supported by a given font.
+#
+# TODO: this code is not very pythonic, a lot of it is a 1:1 translation
+# of the C code and we could probably simplify it a bit
+import argparse
+import string
+import sys
+import os
+
+# we just store the leaves in a dict, we can order the leaves later if needed
+class CharSet:
+ def __init__(self):
+ self.leaves = {} # leaf_number -> leaf data (= 16 uint32)
+
+ def add_char(self, ucs4):
+ assert ucs4 < 0x01000000
+ leaf_num = ucs4 >> 8
+ if leaf_num in self.leaves:
+ leaf = self.leaves[leaf_num]
+ else:
+ leaf = [0, 0, 0, 0, 0, 0, 0, 0] # 256/32 = 8
+ self.leaves[leaf_num] = leaf
+ leaf[(ucs4 & 0xff) >> 5] |= (1 << (ucs4 & 0x1f))
+ #print('{:08x} [{:04x}] --> {}'.format(ucs4, ucs4>>8, leaf))
+
+ def del_char(self, ucs4):
+ assert ucs4 < 0x01000000
+ leaf_num = ucs4 >> 8
+ if leaf_num in self.leaves:
+ leaf = self.leaves[leaf_num]
+ leaf[(ucs4 & 0xff) >> 5] &= ~(1 << (ucs4 & 0x1f))
+ # We don't bother removing the leaf if it's empty */
+ #print('{:08x} [{:04x}] --> {}'.format(ucs4, ucs4>>8, leaf))
+
+ def equals(self, other_cs):
+ keys = sorted(self.leaves.keys())
+ other_keys = sorted(other_cs.leaves.keys())
+ if len(keys) != len(other_keys):
+ return False
+ for k1, k2 in zip(keys, other_keys):
+ if k1 != k2:
+ return False
+ if not leaves_equal(self.leaves[k1], other_cs.leaves[k2]):
+ return False
+ return True
+
+# Convert a file name into a name suitable for C declarations
+def get_name(file_name):
+ return file_name.split('.')[0]
+
+# Convert a C name into a language name
+def get_lang(c_name):
+ return c_name.replace('_', '-').replace(' ', '').lower()
+
+def read_orth_file(file_name):
+ lines = []
+ with open(file_name, 'r', encoding='utf-8') as orth_file:
+ for num, line in enumerate(orth_file):
+ if line.startswith('include '):
+ include_fn = line[8:].strip()
+ lines += read_orth_file(include_fn)
+ else:
+ # remove comments and strip whitespaces
+ line = line.split('#')[0].strip()
+ line = line.split('\t')[0].strip()
+ # skip empty lines
+ if line:
+ lines += [(file_name, num, line)]
+
+ return lines
+
+def leaves_equal(leaf1, leaf2):
+ for v1, v2 in zip(leaf1, leaf2):
+ if v1 != v2:
+ return False
+ return True
+
+# Build a single charset from a source file
+#
+# The file format is quite simple, either
+# a single hex value or a pair separated with a dash
+def parse_orth_file(file_name, lines):
+ charset = CharSet()
+ for fn, num, line in lines:
+ delete_char = line.startswith('-')
+ if delete_char:
+ line = line[1:]
+ if line.find('-') != -1:
+ parts = line.split('-')
+ elif line.find('..') != -1:
+ parts = line.split('..')
+ else:
+ parts = [line]
+
+ start = int(parts.pop(0), 16)
+ end = start
+ if parts:
+ end = int(parts.pop(0), 16)
+ if parts:
+ print('ERROR: {} line {}: parse error (too many parts)'.format(fn, num))
+
+ for ucs4 in range(start, end+1):
+ if delete_char:
+ charset.del_char(ucs4)
+ else:
+ charset.add_char(ucs4)
+
+ assert charset.equals(charset) # sanity check for the equals function
+
+ return charset
+
+if __name__=='__main__':
+ parser = argparse.ArgumentParser()
+ parser.add_argument('orth_files', nargs='+', help='List of .orth files')
+ parser.add_argument('--directory', dest='directory', default=None)
+ parser.add_argument('--template', dest='template_file', default=None)
+ parser.add_argument('--output', dest='output_file', default=None)
+
+ args = parser.parse_args()
+
+ sets = []
+ names = []
+ langs = []
+ country = []
+
+ total_leaves = 0
+
+ LangCountrySets = {}
+
+ # Open output file
+ if args.output_file:
+ sys.stdout = open(args.output_file, 'w', encoding='utf-8')
+
+ # Read the template file
+ if args.template_file:
+ tmpl_file = open(args.template_file, 'r', encoding='utf-8')
+ else:
+ tmpl_file = sys.stdin
+
+ # Change into source dir if specified (after opening other files)
+ if args.directory:
+ os.chdir(args.directory)
+
+ orth_entries = {}
+ for i, fn in enumerate(args.orth_files):
+ orth_entries[fn] = i
+
+ for fn in sorted(orth_entries.keys()):
+ lines = read_orth_file(fn)
+ charset = parse_orth_file(fn, lines)
+
+ sets.append(charset)
+
+ name = get_name(fn)
+ names.append(name)
+
+ lang = get_lang(name)
+ langs.append(lang)
+ if lang.find('-') != -1:
+ country.append(orth_entries[fn]) # maps to original index
+ language_family = lang.split('-')[0]
+ if not language_family in LangCountrySets:
+ LangCountrySets[language_family] = []
+ LangCountrySets[language_family] += [orth_entries[fn]]
+
+ total_leaves += len(charset.leaves)
+
+ # Find unique leaves
+ leaves = []
+ for s in sets:
+ for leaf_num in sorted(s.leaves.keys()):
+ leaf = s.leaves[leaf_num]
+ is_unique = True
+ for existing_leaf in leaves:
+ if leaves_equal(leaf, existing_leaf):
+ is_unique = False
+ break
+ #print('unique: ', is_unique)
+ if is_unique:
+ leaves.append(leaf)
+
+ # Find duplicate charsets
+ duplicate = []
+ for i, s in enumerate(sets):
+ dup_num = None
+ if i >= 1:
+ for j, s_cmp in enumerate(sets):
+ if j >= i:
+ break
+ if s_cmp.equals(s):
+ dup_num = j
+ break
+
+ duplicate.append(dup_num)
+
+ tn = 0
+ off = {}
+ for i, s in enumerate(sets):
+ if duplicate[i]:
+ continue
+ off[i] = tn
+ tn += len(s.leaves)
+
+ # Scan the input until the marker is found
+ # FIXME: this is a bit silly really, might just as well hardcode
+ # the license header in the script and drop the template
+ for line in tmpl_file:
+ if line.strip() == '@@@':
+ break
+ print(line, end='')
+
+ print('/* total size: {} unique leaves: {} */\n'.format(total_leaves, len(leaves)))
+
+ print('#define LEAF0 ({} * sizeof (FcLangCharSet))'.format(len(sets)))
+ print('#define OFF0 (LEAF0 + {} * sizeof (FcCharLeaf))'.format(len(leaves)))
+ print('#define NUM0 (OFF0 + {} * sizeof (uintptr_t))'.format(tn))
+ print('#define SET(n) (n * sizeof (FcLangCharSet) + offsetof (FcLangCharSet, charset))')
+ print('#define OFF(s,o) (OFF0 + o * sizeof (uintptr_t) - SET(s))')
+ print('#define NUM(s,n) (NUM0 + n * sizeof (FcChar16) - SET(s))')
+ print('#define LEAF(o,l) (LEAF0 + l * sizeof (FcCharLeaf) - (OFF0 + o * sizeof (intptr_t)))')
+ print('#define fcLangCharSets (fcLangData.langCharSets)')
+ print('#define fcLangCharSetIndices (fcLangData.langIndices)')
+ print('#define fcLangCharSetIndicesInv (fcLangData.langIndicesInv)')
+
+ assert len(sets) < 256 # FIXME: need to change index type to 16-bit below then
+
+ print('''
+static const struct {{
+ FcLangCharSet langCharSets[{}];
+ FcCharLeaf leaves[{}];
+ uintptr_t leaf_offsets[{}];
+ FcChar16 numbers[{}];
+ {} langIndices[{}];
+ {} langIndicesInv[{}];
+}} fcLangData = {{'''.format(len(sets), len(leaves), tn, tn,
+ 'FcChar8 ', len(sets), 'FcChar8 ', len(sets)))
+
+ # Dump sets
+ print('{')
+ for i, s in enumerate(sets):
+ if duplicate[i]:
+ j = duplicate[i]
+ else:
+ j = i
+ print(' {{ "{}", {{ FC_REF_CONSTANT, {}, OFF({},{}), NUM({},{}) }} }}, /* {} */'.format(
+ langs[i], len(sets[j].leaves), i, off[j], i, off[j], i))
+
+ print('},')
+
+ # Dump leaves
+ print('{')
+ for l, leaf in enumerate(leaves):
+ print(' {{ {{ /* {} */'.format(l), end='')
+ for i in range(0, 8): # 256/32 = 8
+ if i % 4 == 0:
+ print('\n ', end='')
+ print(' 0x{:08x},'.format(leaf[i]), end='')
+ print('\n } },')
+ print('},')
+
+ # Dump leaves
+ print('{')
+ for i, s in enumerate(sets):
+ if duplicate[i]:
+ continue
+
+ print(' /* {} */'.format(names[i]))
+
+ for n, leaf_num in enumerate(sorted(s.leaves.keys())):
+ leaf = s.leaves[leaf_num]
+ if n % 4 == 0:
+ print(' ', end='')
+ found = [k for k, unique_leaf in enumerate(leaves) if leaves_equal(unique_leaf,leaf)]
+ assert found, "Couldn't find leaf in unique leaves list!"
+ assert len(found) == 1
+ print(' LEAF({:3},{:3}),'.format(off[i], found[0]), end='')
+ if n % 4 == 3:
+ print('')
+ if len(s.leaves) % 4 != 0:
+ print('')
+
+ print('},')
+
+ print('{')
+ for i, s in enumerate(sets):
+ if duplicate[i]:
+ continue
+
+ print(' /* {} */'.format(names[i]))
+
+ for n, leaf_num in enumerate(sorted(s.leaves.keys())):
+ leaf = s.leaves[leaf_num]
+ if n % 8 == 0:
+ print(' ', end='')
+ print(' 0x{:04x},'.format(leaf_num), end='')
+ if n % 8 == 7:
+ print('')
+ if len(s.leaves) % 8 != 0:
+ print('')
+
+ print('},')
+
+ # langIndices
+ print('{')
+ for i, s in enumerate(sets):
+ fn = '{}.orth'.format(names[i])
+ print(' {}, /* {} */'.format(orth_entries[fn], names[i]))
+ print('},')
+
+ # langIndicesInv
+ print('{')
+ for i, k in enumerate(orth_entries.keys()):
+ name = get_name(k)
+ idx = names.index(name)
+ print(' {}, /* {} */'.format(idx, name))
+ print('}')
+
+ print('};\n')
+
+ print('#define NUM_LANG_CHAR_SET {}'.format(len(sets)))
+ num_lang_set_map = (len(sets) + 31) // 32;
+ print('#define NUM_LANG_SET_MAP {}'.format(num_lang_set_map))
+
+ # Dump indices with country codes
+ assert len(country) > 0
+ assert len(LangCountrySets) > 0
+ print('')
+ print('static const FcChar32 fcLangCountrySets[][NUM_LANG_SET_MAP] = {')
+ for k in sorted(LangCountrySets.keys()):
+ langset_map = [0] * num_lang_set_map # initialise all zeros
+ for entries_id in LangCountrySets[k]:
+ langset_map[entries_id >> 5] |= (1 << (entries_id & 0x1f))
+ print(' {', end='')
+ for v in langset_map:
+ print(' 0x{:08x},'.format(v), end='')
+ print(' }}, /* {} */'.format(k))
+
+ print('};\n')
+ print('#define NUM_COUNTRY_SET {}\n'.format(len(LangCountrySets)))
+
+ # Find ranges for each letter for faster searching
+ # Dump sets start/finish for the fastpath
+ print('static const FcLangCharSetRange fcLangCharSetRanges[] = {\n')
+ for c in string.ascii_lowercase: # a-z
+ start = 9999
+ stop = -1
+ for i, s in enumerate(sets):
+ if names[i].startswith(c):
+ start = min(start,i)
+ stop = max(stop,i)
+ print(' {{ {}, {} }}, /* {} */'.format(start, stop, c))
+ print('};\n')
+
+ # And flush out the rest of the input file
+ for line in tmpl_file:
+ print(line, end='')
+
+ sys.stdout.flush()
diff --git a/fc-lang/meson.build b/fc-lang/meson.build
new file mode 100644
index 0000000..2c5a1c5
--- /dev/null
+++ b/fc-lang/meson.build
@@ -0,0 +1,256 @@
+# Do not reorder, magic
+orth_files = [
+ 'aa.orth',
+ 'ab.orth',
+ 'af.orth',
+ 'am.orth',
+ 'ar.orth',
+ 'as.orth',
+ 'ast.orth',
+ 'av.orth',
+ 'ay.orth',
+ 'az_az.orth',
+ 'az_ir.orth',
+ 'ba.orth',
+ 'bm.orth',
+ 'be.orth',
+ 'bg.orth',
+ 'bh.orth',
+ 'bho.orth',
+ 'bi.orth',
+ 'bin.orth',
+ 'bn.orth',
+ 'bo.orth',
+ 'br.orth',
+ 'bs.orth',
+ 'bua.orth',
+ 'ca.orth',
+ 'ce.orth',
+ 'ch.orth',
+ 'chm.orth',
+ 'chr.orth',
+ 'co.orth',
+ 'cs.orth',
+ 'cu.orth',
+ 'cv.orth',
+ 'cy.orth',
+ 'da.orth',
+ 'de.orth',
+ 'dz.orth',
+ 'el.orth',
+ 'en.orth',
+ 'eo.orth',
+ 'es.orth',
+ 'et.orth',
+ 'eu.orth',
+ 'fa.orth',
+ 'fi.orth',
+ 'fj.orth',
+ 'fo.orth',
+ 'fr.orth',
+ 'ff.orth',
+ 'fur.orth',
+ 'fy.orth',
+ 'ga.orth',
+ 'gd.orth',
+ 'gez.orth',
+ 'gl.orth',
+ 'gn.orth',
+ 'gu.orth',
+ 'gv.orth',
+ 'ha.orth',
+ 'haw.orth',
+ 'he.orth',
+ 'hi.orth',
+ 'ho.orth',
+ 'hr.orth',
+ 'hu.orth',
+ 'hy.orth',
+ 'ia.orth',
+ 'ig.orth',
+ 'id.orth',
+ 'ie.orth',
+ 'ik.orth',
+ 'io.orth',
+ 'is.orth',
+ 'it.orth',
+ 'iu.orth',
+ 'ja.orth',
+ 'ka.orth',
+ 'kaa.orth',
+ 'ki.orth',
+ 'kk.orth',
+ 'kl.orth',
+ 'km.orth',
+ 'kn.orth',
+ 'ko.orth',
+ 'kok.orth',
+ 'ks.orth',
+ 'ku_am.orth',
+ 'ku_ir.orth',
+ 'kum.orth',
+ 'kv.orth',
+ 'kw.orth',
+ 'ky.orth',
+ 'la.orth',
+ 'lb.orth',
+ 'lez.orth',
+ 'ln.orth',
+ 'lo.orth',
+ 'lt.orth',
+ 'lv.orth',
+ 'mg.orth',
+ 'mh.orth',
+ 'mi.orth',
+ 'mk.orth',
+ 'ml.orth',
+ 'mn_cn.orth',
+ 'mo.orth',
+ 'mr.orth',
+ 'mt.orth',
+ 'my.orth',
+ 'nb.orth',
+ 'nds.orth',
+ 'ne.orth',
+ 'nl.orth',
+ 'nn.orth',
+ 'no.orth',
+ 'nr.orth',
+ 'nso.orth',
+ 'ny.orth',
+ 'oc.orth',
+ 'om.orth',
+ 'or.orth',
+ 'os.orth',
+ 'pa.orth',
+ 'pl.orth',
+ 'ps_af.orth',
+ 'ps_pk.orth',
+ 'pt.orth',
+ 'rm.orth',
+ 'ro.orth',
+ 'ru.orth',
+ 'sa.orth',
+ 'sah.orth',
+ 'sco.orth',
+ 'se.orth',
+ 'sel.orth',
+ 'sh.orth',
+ 'shs.orth',
+ 'si.orth',
+ 'sk.orth',
+ 'sl.orth',
+ 'sm.orth',
+ 'sma.orth',
+ 'smj.orth',
+ 'smn.orth',
+ 'sms.orth',
+ 'so.orth',
+ 'sq.orth',
+ 'sr.orth',
+ 'ss.orth',
+ 'st.orth',
+ 'sv.orth',
+ 'sw.orth',
+ 'syr.orth',
+ 'ta.orth',
+ 'te.orth',
+ 'tg.orth',
+ 'th.orth',
+ 'ti_er.orth',
+ 'ti_et.orth',
+ 'tig.orth',
+ 'tk.orth',
+ 'tl.orth',
+ 'tn.orth',
+ 'to.orth',
+ 'tr.orth',
+ 'ts.orth',
+ 'tt.orth',
+ 'tw.orth',
+ 'tyv.orth',
+ 'ug.orth',
+ 'uk.orth',
+ 'ur.orth',
+ 'uz.orth',
+ 've.orth',
+ 'vi.orth',
+ 'vo.orth',
+ 'vot.orth',
+ 'wa.orth',
+ 'wen.orth',
+ 'wo.orth',
+ 'xh.orth',
+ 'yap.orth',
+ 'yi.orth',
+ 'yo.orth',
+ 'zh_cn.orth',
+ 'zh_hk.orth',
+ 'zh_mo.orth',
+ 'zh_sg.orth',
+ 'zh_tw.orth',
+ 'zu.orth',
+ 'ak.orth',
+ 'an.orth',
+ 'ber_dz.orth',
+ 'ber_ma.orth',
+ 'byn.orth',
+ 'crh.orth',
+ 'csb.orth',
+ 'dv.orth',
+ 'ee.orth',
+ 'fat.orth',
+ 'fil.orth',
+ 'hne.orth',
+ 'hsb.orth',
+ 'ht.orth',
+ 'hz.orth',
+ 'ii.orth',
+ 'jv.orth',
+ 'kab.orth',
+ 'kj.orth',
+ 'kr.orth',
+ 'ku_iq.orth',
+ 'ku_tr.orth',
+ 'kwm.orth',
+ 'lg.orth',
+ 'li.orth',
+ 'mai.orth',
+ 'mn_mn.orth',
+ 'ms.orth',
+ 'na.orth',
+ 'ng.orth',
+ 'nv.orth',
+ 'ota.orth',
+ 'pa_pk.orth',
+ 'pap_an.orth',
+ 'pap_aw.orth',
+ 'qu.orth',
+ 'quz.orth',
+ 'rn.orth',
+ 'rw.orth',
+ 'sc.orth',
+ 'sd.orth',
+ 'sg.orth',
+ 'sid.orth',
+ 'sn.orth',
+ 'su.orth',
+ 'ty.orth',
+ 'wal.orth',
+ 'za.orth',
+ 'lah.orth',
+ 'nqo.orth',
+ 'brx.orth',
+ 'sat.orth',
+ 'doi.orth',
+ 'mni.orth',
+ 'und_zsye.orth',
+ 'und_zmth.orth',
+]
+
+fclang_h = custom_target('fclang.h',
+ output: ['fclang.h'],
+ input: orth_files,
+ command: [find_program('fc-lang.py'), orth_files, '--template', files('fclang.tmpl.h')[0], '--output', '@OUTPUT@', '--directory', meson.current_source_dir()],
+ build_by_default: true,
+)