diff options
author | Daniel Korostil <ted.korostiled@gmail.com> | 2012-06-20 20:53:08 +0300 |
---|---|---|
committer | Daniel Korostil <ted.korostiled@gmail.com> | 2012-06-20 20:53:08 +0300 |
commit | 7dc45cddcf22d28ada6ba46b717298e37fb00826 (patch) | |
tree | 76be1b1fd9c5c6fd2b82df22e1f3afd59eb67939 | |
parent | 1b3ba6ba926cdbdff06ca9267333f2294971961e (diff) |
testing parser
-rw-r--r-- | src/converter/test.py | 26 | ||||
-rw-r--r-- | src/converter/test.xml | 14 |
2 files changed, 40 insertions, 0 deletions
diff --git a/src/converter/test.py b/src/converter/test.py new file mode 100644 index 0000000..051987e --- /dev/null +++ b/src/converter/test.py @@ -0,0 +1,26 @@ +import xml.etree.ElementTree as ET +import string + + +tree = ET.parse("test.xml") # parsing grammar.xml into an ElementTree instance + +# list all rules with simple tokens +for rule in tree.iter("rule"): # cycle for all <rule> elements of grammar.xml, variable rule contains the data of the actual element + simple = True # simple rule is a rule with tokens without attributes (see documentaton of LanguageTool grammar.xml) + for token in rule.iter("token"): # cycle for all tokens in the actual rule, variable token contains the data of the actual <token> element + if token.attrib and token.attrib.keys() != ["regexp"]: # if attrib is not an empty dict (attrib is the Python dict of attributes of the XML element, see ElementTree doc), regexp is supported by the parethesized tokens in the output + simple = False # the rule is not simple + if simple: + for token in rule.iter("token"): + if pattern.attrib == None: + print "(%s)" % token.text, + if pattern.attrib != None: + MarkFrom = pattern.attrib['mark_from'].text - 1 + MarkTo = pattern.attrib['mark_to'].text + 1 + MarkText = token.text + Mark = string.split(MarkText) + for list in Mark: + list[MarkFrom:MarkTo] = "(" + Mark[MarkFrom:MarkTo] + ")" + print "(%s)" % Mark, + print "->", rule.find('message').find('suggestion').text, "# Did you mean?" + diff --git a/src/converter/test.xml b/src/converter/test.xml new file mode 100644 index 0000000..61d4e58 --- /dev/null +++ b/src/converter/test.xml @@ -0,0 +1,14 @@ +<rule> + <pattern mark_from="2" mark_to="-2"> + <token postag="SENT_START"></token> + <token>To</token> + <token>being</token> + <token>with</token> + <token>,</token> + </pattern> + <message>Did you mean <suggestion>begin</suggestion>?</message> + <short>Possible typo</short> + <example correction="begin" type="incorrect">To +<marker>being</marker> with, she is a Russian spy.</example> + <example type="correct">To begin with, she's a spy.</example> + </rule>
\ No newline at end of file |