diff options
author | Daniel <ted@localhost.localdomain> | 2012-06-20 14:59:09 +0300 |
---|---|---|
committer | Daniel <ted@localhost.localdomain> | 2012-06-20 14:59:09 +0300 |
commit | 81e8a4c82f64dcb576b93e04801b7344d0837843 (patch) | |
tree | 2299d2baf29d5b023677e6bcad79a0888eafe27d | |
parent | a72754bde51fe6e3111b994d727de7d1e49d8e48 (diff) |
Uploaded unital converter file
-rw-r--r-- | src/convertor/xml2reexp.py | 15 |
1 files changed, 15 insertions, 0 deletions
diff --git a/src/convertor/xml2reexp.py b/src/convertor/xml2reexp.py new file mode 100644 index 0000000..fc235c3 --- /dev/null +++ b/src/convertor/xml2reexp.py @@ -0,0 +1,15 @@ +import xml.etree.ElementTree as ET + +tree = ET.parse("grammar.xml") # parsing grammar.xml into an ElementTree instance + +# list all rules with simple tokens +for rule in tree.iter("rule"): # cycle for all <rule> elements of grammar.xml, variable rule contains the data of the actual element + simple = True # simple rule is a rule with tokens without attributes (see documentaton of LanguageTool grammar.xml) + for token in rule.iter("token"): # cycle for all tokens in the actual rule, variable token contains the data of the actual <token> element + if token.attrib and token.attrib.keys() != ["regexp"]: # if attrib is not an empty dict (attrib is the Python dict of attributes of the XML element, see ElementTree doc), regexp is supported by the parethesized tokens in the output + simple = False # the rule is not simple + if simple: + for token in rule.iter("token"): + print "(%s)" % token.text, + print "->", rule.find('message').find('suggestion').text, "# Did you mean?" + |