Uploaded unital converter file

author: Daniel <ted@localhost.localdomain> 2012-06-20 14:59:09 +0300
committer: Daniel <ted@localhost.localdomain> 2012-06-20 14:59:09 +0300
commit: 81e8a4c82f64dcb576b93e04801b7344d0837843 (patch)
tree: 2299d2baf29d5b023677e6bcad79a0888eafe27d
parent: a72754bde51fe6e3111b994d727de7d1e49d8e48 (diff)
1 files changed, 15 insertions, 0 deletions
diff --git a/src/convertor/xml2reexp.py b/src/convertor/xml2reexp.py
new file mode 100644
index 0000000..fc235c3
--- /dev/null
+++ b/src/convertor/xml2reexp.py
@@ -0,0 +1,15 @@
+import xml.etree.ElementTree as ET
+
+tree = ET.parse("grammar.xml") # parsing grammar.xml into an ElementTree instance
+
+# list all rules with simple tokens
+for rule in tree.iter("rule"):  # cycle for all <rule> elements of grammar.xml, variable rule contains the data of the actual element
+  simple = True  # simple rule is a rule with tokens without attributes (see documentaton of LanguageTool grammar.xml)
+  for token in rule.iter("token"): # cycle for all tokens in the actual rule, variable token contains the data of the actual <token> element
+    if token.attrib and token.attrib.keys() != ["regexp"]: # if attrib is not an empty dict (attrib is the Python dict of attributes of the XML element, see ElementTree doc), regexp is supported by the parethesized tokens in the output
+      simple = False  # the rule is not simple
+  if simple:
+    for token in rule.iter("token"):
+      print "(%s)" % token.text,
+    print "->", rule.find('message').find('suggestion').text, "# Did you mean?"
+
author	Daniel <ted@localhost.localdomain>	2012-06-20 14:59:09 +0300
committer	Daniel <ted@localhost.localdomain>	2012-06-20 14:59:09 +0300
commit	81e8a4c82f64dcb576b93e04801b7344d0837843 (patch)
tree	2299d2baf29d5b023677e6bcad79a0888eafe27d
parent	a72754bde51fe6e3111b994d727de7d1e49d8e48 (diff)