summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorDaniel Korostil <ted.korostiled@gmail.com>2012-06-20 20:53:08 +0300
committerDaniel Korostil <ted.korostiled@gmail.com>2012-06-20 20:53:08 +0300
commit7dc45cddcf22d28ada6ba46b717298e37fb00826 (patch)
tree76be1b1fd9c5c6fd2b82df22e1f3afd59eb67939
parent1b3ba6ba926cdbdff06ca9267333f2294971961e (diff)
testing parser
-rw-r--r--src/converter/test.py26
-rw-r--r--src/converter/test.xml14
2 files changed, 40 insertions, 0 deletions
diff --git a/src/converter/test.py b/src/converter/test.py
new file mode 100644
index 0000000..051987e
--- /dev/null
+++ b/src/converter/test.py
@@ -0,0 +1,26 @@
+import xml.etree.ElementTree as ET
+import string
+
+
+tree = ET.parse("test.xml") # parsing grammar.xml into an ElementTree instance
+
+# list all rules with simple tokens
+for rule in tree.iter("rule"): # cycle for all <rule> elements of grammar.xml, variable rule contains the data of the actual element
+ simple = True # simple rule is a rule with tokens without attributes (see documentaton of LanguageTool grammar.xml)
+ for token in rule.iter("token"): # cycle for all tokens in the actual rule, variable token contains the data of the actual <token> element
+ if token.attrib and token.attrib.keys() != ["regexp"]: # if attrib is not an empty dict (attrib is the Python dict of attributes of the XML element, see ElementTree doc), regexp is supported by the parethesized tokens in the output
+ simple = False # the rule is not simple
+ if simple:
+ for token in rule.iter("token"):
+ if pattern.attrib == None:
+ print "(%s)" % token.text,
+ if pattern.attrib != None:
+ MarkFrom = pattern.attrib['mark_from'].text - 1
+ MarkTo = pattern.attrib['mark_to'].text + 1
+ MarkText = token.text
+ Mark = string.split(MarkText)
+ for list in Mark:
+ list[MarkFrom:MarkTo] = "(" + Mark[MarkFrom:MarkTo] + ")"
+ print "(%s)" % Mark,
+ print "->", rule.find('message').find('suggestion').text, "# Did you mean?"
+
diff --git a/src/converter/test.xml b/src/converter/test.xml
new file mode 100644
index 0000000..61d4e58
--- /dev/null
+++ b/src/converter/test.xml
@@ -0,0 +1,14 @@
+<rule>
+ <pattern mark_from="2" mark_to="-2">
+ <token postag="SENT_START"></token>
+ <token>To</token>
+ <token>being</token>
+ <token>with</token>
+ <token>,</token>
+ </pattern>
+ <message>Did you mean <suggestion>begin</suggestion>?</message>
+ <short>Possible typo</short>
+ <example correction="begin" type="incorrect">To
+<marker>being</marker> with, she is a Russian spy.</example>
+ <example type="correct">To begin with, she's a spy.</example>
+ </rule> \ No newline at end of file