Add Meson build system

See https://mesonbuild.com
author: Tim-Philipp Müller <tim@centricular.com> 2020-07-31 07:26:11 +0000
committer: Akira TAGOH <akira@tagoh.org> 2020-07-31 07:26:11 +0000
commit: 57a224f51d6c019e4ce5d75efb22f34a8330423e (patch)
tree: e3d7acfe511c07650db57c485c6dcf134e2c78a5 /fc-lang
parent: 03aa12c75e117acb0d160212536f6f832e0dc8d9 (diff)
2 files changed, 643 insertions, 0 deletions
diff --git a/fc-lang/fc-lang.py b/fc-lang/fc-lang.py
new file mode 100755
index 0000000..cc1dea8
--- /dev/null
+++ b/fc-lang/fc-lang.py
@@ -0,0 +1,387 @@
+#!/usr/bin/env python3
+#
+# fontconfig/fc-lang/fc-lang.py
+#
+# Copyright © 2001-2002 Keith Packard
+# Copyright © 2019 Tim-Philipp Müller
+#
+# Permission to use, copy, modify, distribute, and sell this software and its
+# documentation for any purpose is hereby granted without fee, provided that
+# the above copyright notice appear in all copies and that both that
+# copyright notice and this permission notice appear in supporting
+# documentation, and that the name of the author(s) not be used in
+# advertising or publicity pertaining to distribution of the software without
+# specific, written prior permission.  The authors make no
+# representations about the suitability of this software for any purpose.  It
+# is provided "as is" without express or implied warranty.
+#
+# THE AUTHOR(S) DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE,
+# INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO
+# EVENT SHALL THE AUTHOR(S) BE LIABLE FOR ANY SPECIAL, INDIRECT OR
+# CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE,
+# DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER
+# TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
+# PERFORMANCE OF THIS SOFTWARE.
+
+# fc-lang
+#
+# Read a set of language orthographies and build C declarations for
+# charsets which can then be used to identify which languages are
+# supported by a given font.
+#
+# TODO: this code is not very pythonic, a lot of it is a 1:1 translation
+# of the C code and we could probably simplify it a bit
+import argparse
+import string
+import sys
+import os
+
+# we just store the leaves in a dict, we can order the leaves later if needed
+class CharSet:
+    def __init__(self):
+        self.leaves = {} # leaf_number -> leaf data (= 16 uint32)
+
+    def add_char(self, ucs4):
+        assert ucs4 < 0x01000000
+        leaf_num = ucs4 >> 8
+        if leaf_num in self.leaves:
+            leaf = self.leaves[leaf_num]
+        else:
+            leaf = [0, 0, 0, 0, 0, 0, 0, 0] # 256/32 = 8
+            self.leaves[leaf_num] = leaf
+        leaf[(ucs4 & 0xff) >> 5] |= (1 << (ucs4 & 0x1f))
+        #print('{:08x} [{:04x}] --> {}'.format(ucs4, ucs4>>8, leaf))
+
+    def del_char(self, ucs4):
+        assert ucs4 < 0x01000000
+        leaf_num = ucs4 >> 8
+        if leaf_num in self.leaves:
+            leaf = self.leaves[leaf_num]
+            leaf[(ucs4 & 0xff) >> 5] &= ~(1 << (ucs4 & 0x1f))
+            # We don't bother removing the leaf if it's empty */
+            #print('{:08x} [{:04x}] --> {}'.format(ucs4, ucs4>>8, leaf))
+
+    def equals(self, other_cs):
+        keys = sorted(self.leaves.keys())
+        other_keys = sorted(other_cs.leaves.keys())
+        if len(keys) != len(other_keys):
+            return False
+        for k1, k2 in zip(keys, other_keys):
+            if k1 != k2:
+                return False
+            if not leaves_equal(self.leaves[k1], other_cs.leaves[k2]):
+                return False
+        return True
+
+# Convert a file name into a name suitable for C declarations
+def get_name(file_name):
+    return file_name.split('.')[0]
+
+# Convert a C name into a language name
+def get_lang(c_name):
+    return c_name.replace('_', '-').replace(' ', '').lower()
+
+def read_orth_file(file_name):
+    lines = []
+    with open(file_name, 'r', encoding='utf-8') as orth_file:
+        for num, line in enumerate(orth_file):
+            if line.startswith('include '):
+                include_fn = line[8:].strip()
+                lines += read_orth_file(include_fn)
+            else:
+                # remove comments and strip whitespaces
+                line = line.split('#')[0].strip()
+                line = line.split('\t')[0].strip()
+                # skip empty lines
+                if line:
+                    lines += [(file_name, num, line)]
+
+    return lines
+
+def leaves_equal(leaf1, leaf2):
+    for v1, v2 in zip(leaf1, leaf2):
+        if v1 != v2:
+            return False
+    return True
+
+# Build a single charset from a source file
+#
+# The file format is quite simple, either
+# a single hex value or a pair separated with a dash
+def parse_orth_file(file_name, lines):
+    charset = CharSet()
+    for fn, num, line in lines:
+        delete_char = line.startswith('-')
+        if delete_char:
+            line = line[1:]
+        if line.find('-') != -1:
+            parts = line.split('-')
+        elif line.find('..') != -1:
+            parts = line.split('..')
+        else:
+            parts = [line]
+
+        start = int(parts.pop(0), 16)
+        end = start
+        if parts:
+            end = int(parts.pop(0), 16)
+        if parts:
+            print('ERROR: {} line {}: parse error (too many parts)'.format(fn, num))
+
+        for ucs4 in range(start, end+1):
+            if delete_char:
+                charset.del_char(ucs4)
+            else:
+                charset.add_char(ucs4)
+
+    assert charset.equals(charset) # sanity check for the equals function
+
+    return charset
+
+if __name__=='__main__':
+    parser = argparse.ArgumentParser()
+    parser.add_argument('orth_files', nargs='+', help='List of .orth files')
+    parser.add_argument('--directory', dest='directory', default=None)
+    parser.add_argument('--template', dest='template_file', default=None)
+    parser.add_argument('--output', dest='output_file', default=None)
+
+    args = parser.parse_args()
+
+    sets = []
+    names = []
+    langs = []
+    country = []
+
+    total_leaves = 0
+
+    LangCountrySets = {}
+
+    # Open output file
+    if args.output_file:
+        sys.stdout = open(args.output_file, 'w', encoding='utf-8')
+
+    # Read the template file
+    if args.template_file:
+        tmpl_file = open(args.template_file, 'r', encoding='utf-8')
+    else:
+        tmpl_file = sys.stdin
+
+    # Change into source dir if specified (after opening other files)
+    if args.directory:
+        os.chdir(args.directory)
+
+    orth_entries = {}
+    for i, fn in enumerate(args.orth_files):
+        orth_entries[fn] = i
+
+    for fn in sorted(orth_entries.keys()):
+        lines = read_orth_file(fn)
+        charset = parse_orth_file(fn, lines)
+
+        sets.append(charset)
+
+        name = get_name(fn)
+        names.append(name)
+
+        lang = get_lang(name)
+        langs.append(lang)
+        if lang.find('-') != -1:
+            country.append(orth_entries[fn]) # maps to original index
+            language_family = lang.split('-')[0]
+            if not language_family in LangCountrySets:
+              LangCountrySets[language_family] = []
+            LangCountrySets[language_family] += [orth_entries[fn]]
+
+        total_leaves += len(charset.leaves)
+
+    # Find unique leaves
+    leaves = []
+    for s in sets:
+       for leaf_num in sorted(s.leaves.keys()):
+           leaf = s.leaves[leaf_num]
+           is_unique = True
+           for existing_leaf in leaves:
+               if leaves_equal(leaf, existing_leaf):
+                  is_unique = False
+                  break
+           #print('unique: ', is_unique)
+           if is_unique:
+               leaves.append(leaf)
+
+    # Find duplicate charsets
+    duplicate = []
+    for i, s in enumerate(sets):
+        dup_num = None
+        if i >= 1:
+            for j, s_cmp in enumerate(sets):
+                if j >= i:
+                    break
+                if s_cmp.equals(s):
+                    dup_num = j
+                    break
+
+        duplicate.append(dup_num)
+
+    tn = 0
+    off = {}
+    for i, s in enumerate(sets):
+        if duplicate[i]:
+            continue
+        off[i] = tn
+        tn += len(s.leaves)
+
+    # Scan the input until the marker is found
+    # FIXME: this is a bit silly really, might just as well hardcode
+    #        the license header in the script and drop the template
+    for line in tmpl_file:
+        if line.strip() == '@@@':
+            break
+        print(line, end='')
+
+    print('/* total size: {} unique leaves: {} */\n'.format(total_leaves, len(leaves)))
+
+    print('#define LEAF0       ({} * sizeof (FcLangCharSet))'.format(len(sets)))
+    print('#define OFF0        (LEAF0 + {} * sizeof (FcCharLeaf))'.format(len(leaves)))
+    print('#define NUM0        (OFF0 + {} * sizeof (uintptr_t))'.format(tn))
+    print('#define SET(n)      (n * sizeof (FcLangCharSet) + offsetof (FcLangCharSet, charset))')
+    print('#define OFF(s,o)    (OFF0 + o * sizeof (uintptr_t) - SET(s))')
+    print('#define NUM(s,n)    (NUM0 + n * sizeof (FcChar16) - SET(s))')
+    print('#define LEAF(o,l)   (LEAF0 + l * sizeof (FcCharLeaf) - (OFF0 + o * sizeof (intptr_t)))')
+    print('#define fcLangCharSets (fcLangData.langCharSets)')
+    print('#define fcLangCharSetIndices (fcLangData.langIndices)')
+    print('#define fcLangCharSetIndicesInv (fcLangData.langIndicesInv)')
+
+    assert len(sets) < 256 # FIXME: need to change index type to 16-bit below then
+
+    print('''
+static const struct {{
+    FcLangCharSet  langCharSets[{}];
+    FcCharLeaf     leaves[{}];
+    uintptr_t      leaf_offsets[{}];
+    FcChar16       numbers[{}];
+    {}       langIndices[{}];
+    {}       langIndicesInv[{}];
+}} fcLangData = {{'''.format(len(sets), len(leaves), tn, tn,
+                             'FcChar8 ', len(sets), 'FcChar8 ', len(sets)))
+
+    # Dump sets
+    print('{')
+    for i, s in enumerate(sets):
+        if duplicate[i]:
+            j = duplicate[i]
+        else:
+            j = i
+        print('    {{ "{}",  {{ FC_REF_CONSTANT, {}, OFF({},{}), NUM({},{}) }} }}, /* {} */'.format(
+		langs[i], len(sets[j].leaves), i, off[j], i, off[j], i))
+
+    print('},')
+
+    # Dump leaves
+    print('{')
+    for l, leaf in enumerate(leaves):
+        print('    {{ {{ /* {} */'.format(l), end='')
+        for i in range(0, 8): # 256/32 = 8
+            if i % 4 == 0:
+                print('\n   ', end='')
+            print(' 0x{:08x},'.format(leaf[i]), end='')
+        print('\n    } },')
+    print('},')
+
+    # Dump leaves
+    print('{')
+    for i, s in enumerate(sets):
+        if duplicate[i]:
+            continue
+
+        print('    /* {} */'.format(names[i]))
+
+        for n, leaf_num in enumerate(sorted(s.leaves.keys())):
+            leaf = s.leaves[leaf_num]
+            if n % 4 == 0:
+                print('   ', end='')
+            found = [k for k, unique_leaf in enumerate(leaves) if leaves_equal(unique_leaf,leaf)] 
+            assert found, "Couldn't find leaf in unique leaves list!"
+            assert len(found) == 1
+            print(' LEAF({:3},{:3}),'.format(off[i], found[0]), end='')
+            if n % 4 == 3:
+                print('')
+        if len(s.leaves) % 4 != 0:
+            print('')
+
+    print('},')
+	
+    print('{')
+    for i, s in enumerate(sets):
+        if duplicate[i]:
+            continue
+
+        print('    /* {} */'.format(names[i]))
+
+        for n, leaf_num in enumerate(sorted(s.leaves.keys())):
+            leaf = s.leaves[leaf_num]
+            if n % 8 == 0:
+                print('   ', end='')
+            print(' 0x{:04x},'.format(leaf_num), end='')
+            if n % 8 == 7:
+                print('')
+        if len(s.leaves) % 8 != 0:
+            print('')
+
+    print('},')
+
+    # langIndices
+    print('{')
+    for i, s in enumerate(sets):
+        fn = '{}.orth'.format(names[i])
+        print('    {}, /* {} */'.format(orth_entries[fn], names[i]))
+    print('},')
+
+    # langIndicesInv
+    print('{')
+    for i, k in enumerate(orth_entries.keys()):
+        name = get_name(k)
+        idx = names.index(name)
+        print('    {}, /* {} */'.format(idx, name))
+    print('}')
+
+    print('};\n')
+
+    print('#define NUM_LANG_CHAR_SET	{}'.format(len(sets)))
+    num_lang_set_map = (len(sets) + 31) // 32;
+    print('#define NUM_LANG_SET_MAP	{}'.format(num_lang_set_map))
+
+    # Dump indices with country codes
+    assert len(country) > 0
+    assert len(LangCountrySets) > 0
+    print('')
+    print('static const FcChar32 fcLangCountrySets[][NUM_LANG_SET_MAP] = {')
+    for k in sorted(LangCountrySets.keys()):
+        langset_map = [0] * num_lang_set_map # initialise all zeros
+        for entries_id in LangCountrySets[k]:
+            langset_map[entries_id >> 5] |= (1 << (entries_id & 0x1f))
+        print('    {', end='')
+        for v in langset_map:
+            print(' 0x{:08x},'.format(v), end='')
+        print(' }}, /* {} */'.format(k))
+
+    print('};\n')
+    print('#define NUM_COUNTRY_SET {}\n'.format(len(LangCountrySets)))
+
+    # Find ranges for each letter for faster searching
+    # Dump sets start/finish for the fastpath
+    print('static const FcLangCharSetRange  fcLangCharSetRanges[] = {\n')
+    for c in string.ascii_lowercase: # a-z
+        start = 9999
+        stop = -1
+        for i, s in enumerate(sets):
+            if names[i].startswith(c):
+                start = min(start,i)
+                stop = max(stop,i)
+        print('    {{ {}, {} }}, /* {} */'.format(start, stop, c))
+    print('};\n')
+
+    # And flush out the rest of the input file
+    for line in tmpl_file:
+        print(line, end='')
+    
+    sys.stdout.flush()
diff --git a/fc-lang/meson.build b/fc-lang/meson.build
new file mode 100644
index 0000000..2c5a1c5
--- /dev/null
+++ b/fc-lang/meson.build
@@ -0,0 +1,256 @@
+# Do not reorder, magic
+orth_files = [
+  'aa.orth',
+  'ab.orth',
+  'af.orth',
+  'am.orth',
+  'ar.orth',
+  'as.orth',
+  'ast.orth',
+  'av.orth',
+  'ay.orth',
+  'az_az.orth',
+  'az_ir.orth',
+  'ba.orth',
+  'bm.orth',
+  'be.orth',
+  'bg.orth',
+  'bh.orth',
+  'bho.orth',
+  'bi.orth',
+  'bin.orth',
+  'bn.orth',
+  'bo.orth',
+  'br.orth',
+  'bs.orth',
+  'bua.orth',
+  'ca.orth',
+  'ce.orth',
+  'ch.orth',
+  'chm.orth',
+  'chr.orth',
+  'co.orth',
+  'cs.orth',
+  'cu.orth',
+  'cv.orth',
+  'cy.orth',
+  'da.orth',
+  'de.orth',
+  'dz.orth',
+  'el.orth',
+  'en.orth',
+  'eo.orth',
+  'es.orth',
+  'et.orth',
+  'eu.orth',
+  'fa.orth',
+  'fi.orth',
+  'fj.orth',
+  'fo.orth',
+  'fr.orth',
+  'ff.orth',
+  'fur.orth',
+  'fy.orth',
+  'ga.orth',
+  'gd.orth',
+  'gez.orth',
+  'gl.orth',
+  'gn.orth',
+  'gu.orth',
+  'gv.orth',
+  'ha.orth',
+  'haw.orth',
+  'he.orth',
+  'hi.orth',
+  'ho.orth',
+  'hr.orth',
+  'hu.orth',
+  'hy.orth',
+  'ia.orth',
+  'ig.orth',
+  'id.orth',
+  'ie.orth',
+  'ik.orth',
+  'io.orth',
+  'is.orth',
+  'it.orth',
+  'iu.orth',
+  'ja.orth',
+  'ka.orth',
+  'kaa.orth',
+  'ki.orth',
+  'kk.orth',
+  'kl.orth',
+  'km.orth',
+  'kn.orth',
+  'ko.orth',
+  'kok.orth',
+  'ks.orth',
+  'ku_am.orth',
+  'ku_ir.orth',
+  'kum.orth',
+  'kv.orth',
+  'kw.orth',
+  'ky.orth',
+  'la.orth',
+  'lb.orth',
+  'lez.orth',
+  'ln.orth',
+  'lo.orth',
+  'lt.orth',
+  'lv.orth',
+  'mg.orth',
+  'mh.orth',
+  'mi.orth',
+  'mk.orth',
+  'ml.orth',
+  'mn_cn.orth',
+  'mo.orth',
+  'mr.orth',
+  'mt.orth',
+  'my.orth',
+  'nb.orth',
+  'nds.orth',
+  'ne.orth',
+  'nl.orth',
+  'nn.orth',
+  'no.orth',
+  'nr.orth',
+  'nso.orth',
+  'ny.orth',
+  'oc.orth',
+  'om.orth',
+  'or.orth',
+  'os.orth',
+  'pa.orth',
+  'pl.orth',
+  'ps_af.orth',
+  'ps_pk.orth',
+  'pt.orth',
+  'rm.orth',
+  'ro.orth',
+  'ru.orth',
+  'sa.orth',
+  'sah.orth',
+  'sco.orth',
+  'se.orth',
+  'sel.orth',
+  'sh.orth',
+  'shs.orth',
+  'si.orth',
+  'sk.orth',
+  'sl.orth',
+  'sm.orth',
+  'sma.orth',
+  'smj.orth',
+  'smn.orth',
+  'sms.orth',
+  'so.orth',
+  'sq.orth',
+  'sr.orth',
+  'ss.orth',
+  'st.orth',
+  'sv.orth',
+  'sw.orth',
+  'syr.orth',
+  'ta.orth',
+  'te.orth',
+  'tg.orth',
+  'th.orth',
+  'ti_er.orth',
+  'ti_et.orth',
+  'tig.orth',
+  'tk.orth',
+  'tl.orth',
+  'tn.orth',
+  'to.orth',
+  'tr.orth',
+  'ts.orth',
+  'tt.orth',
+  'tw.orth',
+  'tyv.orth',
+  'ug.orth',
+  'uk.orth',
+  'ur.orth',
+  'uz.orth',
+  've.orth',
+  'vi.orth',
+  'vo.orth',
+  'vot.orth',
+  'wa.orth',
+  'wen.orth',
+  'wo.orth',
+  'xh.orth',
+  'yap.orth',
+  'yi.orth',
+  'yo.orth',
+  'zh_cn.orth',
+  'zh_hk.orth',
+  'zh_mo.orth',
+  'zh_sg.orth',
+  'zh_tw.orth',
+  'zu.orth',
+  'ak.orth',
+  'an.orth',
+  'ber_dz.orth',
+  'ber_ma.orth',
+  'byn.orth',
+  'crh.orth',
+  'csb.orth',
+  'dv.orth',
+  'ee.orth',
+  'fat.orth',
+  'fil.orth',
+  'hne.orth',
+  'hsb.orth',
+  'ht.orth',
+  'hz.orth',
+  'ii.orth',
+  'jv.orth',
+  'kab.orth',
+  'kj.orth',
+  'kr.orth',
+  'ku_iq.orth',
+  'ku_tr.orth',
+  'kwm.orth',
+  'lg.orth',
+  'li.orth',
+  'mai.orth',
+  'mn_mn.orth',
+  'ms.orth',
+  'na.orth',
+  'ng.orth',
+  'nv.orth',
+  'ota.orth',
+  'pa_pk.orth',
+  'pap_an.orth',
+  'pap_aw.orth',
+  'qu.orth',
+  'quz.orth',
+  'rn.orth',
+  'rw.orth',
+  'sc.orth',
+  'sd.orth',
+  'sg.orth',
+  'sid.orth',
+  'sn.orth',
+  'su.orth',
+  'ty.orth',
+  'wal.orth',
+  'za.orth',
+  'lah.orth',
+  'nqo.orth',
+  'brx.orth',
+  'sat.orth',
+  'doi.orth',
+  'mni.orth',
+  'und_zsye.orth',
+  'und_zmth.orth',
+]
+
+fclang_h = custom_target('fclang.h',
+  output: ['fclang.h'],
+  input: orth_files,
+  command: [find_program('fc-lang.py'), orth_files, '--template', files('fclang.tmpl.h')[0], '--output', '@OUTPUT@', '--directory', meson.current_source_dir()],
+  build_by_default: true,
+)
author	Tim-Philipp Müller <tim@centricular.com>	2020-07-31 07:26:11 +0000
committer	Akira TAGOH <akira@tagoh.org>	2020-07-31 07:26:11 +0000
commit	57a224f51d6c019e4ce5d75efb22f34a8330423e (patch)
tree	e3d7acfe511c07650db57c485c6dcf134e2c78a5 /fc-lang
parent	03aa12c75e117acb0d160212536f6f832e0dc8d9 (diff)