summaryrefslogtreecommitdiff
path: root/scripts/untranslated.py
blob: 5512f2344a41f889d89d18a150d02182dd68c3d3 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
#!/usr/bin/env python
# -*- tab-width: 4; indent-tabs-mode: nil; py-indent-offset: 4 -*-
#
# This file is part of the LibreOffice project.
#
# This Source Code Form is subject to the terms of the Mozilla Public
# License, v. 2.0. If a copy of the MPL was not distributed with this
# file, You can obtain one at http://mozilla.org/MPL/2.0/.
#

import os
import polib
import sys
import itertools
import re

def usageAndExit():
    message = """usage: {program} online_dir lo_translations_dir lang

Prints en-US strings that do not have translations in the specified language.

"""
    print(message.format(program = os.path.basename(sys.argv[0])))
    exit(1)

# extract translations from po files
def extractFromPo(poFile, stringIds, untranslated):
    if not os.path.isfile(poFile):
        return

    po = polib.pofile(poFile, autodetect_encoding=False, encoding="utf-8", wrapwidth=-1)

    for entry in itertools.chain(po.untranslated_entries(), po.fuzzy_entries()):
        for stringId in stringIds:
            if stringId in entry.msgctxt:
                untranslated.append(entry.msgid)

# Read the uno commands present in the unocommands.js for checking
def parseUnocommandsJS(onlineDir):
    strings = {}

    f = open(onlineDir + '/loleaflet/src/unocommands.js', 'r')
    readingCommands = False
    for line in f:
        line = line.decode('utf-8')
        m = re.match(r"\t([^:]*):.*", line)
        if m:
            command = m.group(1)

            n = re.findall(r"_\('([^']*)'\)", line)
            if n:
                strings[command] = n

    return strings

# Remove duplicates from list
def uniq(seq):
    seen = set()
    seen_add = seen.add
    return [x for x in seq if not (x in seen or seen_add(x))]

if __name__ == "__main__":
    if len(sys.argv) != 4:
        usageAndExit()

    onlineDir = sys.argv[1]
    translationsDir = sys.argv[2]
    lang = sys.argv[3]

    dir = translationsDir + '/source/'

    untranslated = []

# LO Core strings

    # extract 'Clear formatting' and some status bar strings
    poFile = dir + lang + '/svx/messages.po'
    extractFromPo(poFile, ["RID_SVXSTR_CLEARFORM", "RID_SVXSTR_OVERWRITE_TEXT", "selectionmenu|"], untranslated)

    # extract Writer style names and status bar strings
    poFile = dir + lang + '/sw/messages.po'
    extractFromPo(poFile, ["STR_POOL", "STR_PAGE_COUNT", "STR_STATUSBAR_WORDCOUNT_NO_SELECTION", "STR_LANGSTATUS_NONE"], untranslated)

    # extract Impress/Draw style names, layout names and 'Slide %1 of %2'
    poFile = dir + lang + '/sd/messages.po'
    extractFromPo(poFile,  ["STR_STANDARD_STYLESHEET_NAME", "STR_POOL", "STR_PSEUDOSHEET", "STR_AUTOLAYOUT", "STR_AL_", "STR_SD_PAGE_COUNT"], untranslated)

    # extract Calc style names and strings for status bar
    poFile = dir + lang + '/sc/messages.po'
    extractFromPo(poFile, ["STR_STYLENAME_", "STR_FILTER_SELCOUNT", "STR_ROWCOL_SELCOUNT", "STR_FUN_TEXT_", "STR_UNDO_INSERTCELLS", "STR_TABLE_COUNT"], untranslated)

    # extract language names
    poFile = dir + lang + '/svtools/messages.po'
    extractFromPo(poFile, ["STR_ARR_SVT_LANGUAGE_TABLE"], untranslated)

# UNO command strings

    parsed = parseUnocommandsJS(onlineDir)
    keys = set(parsed.keys())

    poFile = dir + lang + '/officecfg/registry/data/org/openoffice/Office/UI.po'

    po = polib.pofile(poFile, autodetect_encoding=False, encoding="utf-8", wrapwidth=-1)

    for entry in itertools.chain(po.untranslated_entries(), po.fuzzy_entries()):
        m = re.search(r"\.uno:([^\n]*)\n", entry.msgctxt)
        if m:
            command = m.group(1)
            if command in keys:
                for text in parsed[command]:
                    if text == entry.msgid:
                        untranslated.append(entry.msgid.replace("~",""))

# Online UI

    poFile = onlineDir + '/loleaflet/po/ui-' + lang.replace("-","_") + '.po'
    po = polib.pofile(poFile, autodetect_encoding=False, encoding="utf-8", wrapwidth=-1)

    for entry in itertools.chain(po.untranslated_entries(), po.fuzzy_entries()):
        untranslated.append(entry.msgid)

# Online help (keyboard shortcuts)

    poFile = onlineDir + '/loleaflet/po/help-' + lang.replace("-","_") + '.po'
    po = polib.pofile(poFile, autodetect_encoding=False, encoding="utf-8", wrapwidth=-1)

    for entry in itertools.chain(po.untranslated_entries(), po.fuzzy_entries()):
        untranslated.append(entry.msgid)

# Print the results

    for elem in uniq(untranslated):
        print elem.encode('utf-8')


# vim: set shiftwidth=4 softtabstop=4 expandtab: