summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorDaniel <ted@localhost.localdomain>2012-06-20 14:59:09 +0300
committerDaniel <ted@localhost.localdomain>2012-06-20 14:59:09 +0300
commit81e8a4c82f64dcb576b93e04801b7344d0837843 (patch)
tree2299d2baf29d5b023677e6bcad79a0888eafe27d
parenta72754bde51fe6e3111b994d727de7d1e49d8e48 (diff)
Uploaded unital converter file
-rw-r--r--src/convertor/xml2reexp.py15
1 files changed, 15 insertions, 0 deletions
diff --git a/src/convertor/xml2reexp.py b/src/convertor/xml2reexp.py
new file mode 100644
index 0000000..fc235c3
--- /dev/null
+++ b/src/convertor/xml2reexp.py
@@ -0,0 +1,15 @@
+import xml.etree.ElementTree as ET
+
+tree = ET.parse("grammar.xml") # parsing grammar.xml into an ElementTree instance
+
+# list all rules with simple tokens
+for rule in tree.iter("rule"): # cycle for all <rule> elements of grammar.xml, variable rule contains the data of the actual element
+ simple = True # simple rule is a rule with tokens without attributes (see documentaton of LanguageTool grammar.xml)
+ for token in rule.iter("token"): # cycle for all tokens in the actual rule, variable token contains the data of the actual <token> element
+ if token.attrib and token.attrib.keys() != ["regexp"]: # if attrib is not an empty dict (attrib is the Python dict of attributes of the XML element, see ElementTree doc), regexp is supported by the parethesized tokens in the output
+ simple = False # the rule is not simple
+ if simple:
+ for token in rule.iter("token"):
+ print "(%s)" % token.text,
+ print "->", rule.find('message').find('suggestion').text, "# Did you mean?"
+