summaryrefslogtreecommitdiff
path: root/genmap.py
blob: 9a48b2005c502a415ffc4fa80b229762f23a31af (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
#!/bin/env python
import encodings

maps =  ['cp037', 'cp1250', 'cp1254', 'cp1258', 'cp737', 'cp855', 'cp861', 'cp865', 'cp875',
         'iso8859_10', 'iso8859_15', 'iso8859_3', 'iso8859_7', 'cp1006', 'cp1251', 'cp1255',
         'cp424', 'cp775', 'cp856', 'cp862', 'cp866', 'cp932', 'iso8859_11', 'iso8859_16',
         'iso8859_4', 'iso8859_8', 'cp1026', 'cp1252', 'cp1256', 'cp437', 'cp850', 'cp857',
         'cp863', 'cp869', 'cp949', 'iso8859_13', 'iso8859_1', 'iso8859_5', 'iso8859_9',
         'cp1140', 'cp1253', 'cp1257', 'cp500', 'cp852', 'cp860', 'cp864', 'cp874', 'cp950',
         'iso8859_14', 'iso8859_2', 'iso8859_6']

def unichar2str(u):
    if not u:
        return '0x0'
    if type(u) is not unicode:
        return '0x%x' % u
    return '0x%x' % ord(u)

mappings = {}
def genmaptable(m, encoding_map, decoding_table):
    #print encoding_map, decoding_table
    f = file(m + '-table.c', 'wb')
    f.write('#include <singlebytecodec.h>\n\n')
    f.write('static const int %s_decoding_table[%d] = {\n' % (m, len(decoding_table)))
    for i, v in enumerate(decoding_table):
        f.write('\t0x%x' % ord(v))
        if i < len(decoding_table) - 1:
            f.write(',\n')
        else:
            f.write('\n')
    f.write('};\n\n')
    f.write('static const struct encoding_map %s_encoding_map[] = {\n' % m)
    keys = encoding_map.keys()
    keys.sort()
    for i, k in enumerate(keys):
        v = encoding_map[k]
        f.write('\t{ %s, 0x%x }' % (unichar2str(k), v))
        if i < len(keys) - 1:
            f.write(',\n')
        else:
            f.write('\n')
    f.write('};\n\n')
    f.write('SingleByteCodecState __uniconv_%s_state = {\n' % m)
    f.write('\t"%s", %s_decoding_table, %d, %s_encoding_map, %d\n' % \
            (m, m, len(decoding_table), m, len(keys)))
    f.write('};\n\n')
    f.close()

def gensinglebytemaps(table):
    fp = file('singlebytetables.c', 'wb')
    fp.write('#include <singlebytecodec.h>\n\n')

    table.sort()
    for t in table:
        fp.write('extern SingleByteCodecState __uniconv_%s_state;\n' % t)
    fp.write('\n')
    fp.write('static SingleByteCodecState *singlebytecodecs[] = {\n')
    for t in table:
        fp.write('\t&__uniconv_%s_state,\n' % t)
    fp.write('\tNULL,\n')
    fp.write('};\n\n')
    fp.write('SingleByteCodecState** __uniconv_get_single_byte_codecs(void)\n')
    fp.write('{\n\treturn singlebytecodecs;\n}\n\n')
    fp.close()

encodings_table = []
for m in maps:
    encoding = getattr(__import__('encodings.%s' % m), m)
    decoding_map = {}
    decoding_table = []
    encoding_map = {}
    if hasattr(encoding, 'decoding_map'):
        decoding_map = encoding.decoding_map
    if hasattr(encoding, 'decoding_table'):
        decoding_table = list(encoding.decoding_table)
    if hasattr(encoding, 'encoding_map'):
        encoding_map =  encoding.encoding_map
    if not decoding_table and decoding_map:
        keys = decoding_map.keys()
        keys.sort()
        for k in keys:
            decoding_table[i] = decoding_map[k]
    if not encoding_map and decoding_table:
        for i in range(len(decoding_table)):
            encoding_map[decoding_table[i]] = i
    if decoding_table and encoding_map:
        mappings[m] = (encoding_map, decoding_table)

def get_ascii_mapping():
    encoding_map = {}
    decoding_table = [unicode(chr(c)) for c in range(128)]
    for i in range(128):
        encoding_map[i] = i
    return (encoding_map, decoding_table)
mappings['ascii'] = get_ascii_mapping()
#print mappings

for k, v in mappings.items():
    genmaptable(k, *v)
    encodings_table.append(k)

gensinglebytemaps(encodings_table)