From 89a111ceb8f5808a8b66545dea4e8232471fc4ba Mon Sep 17 00:00:00 2001 From: Kohei Yoshida Date: Sun, 20 Dec 2009 18:05:24 -0500 Subject: More on parsing xhp files. + support writing to a file. + encode non-ascii characters to avoid unicode error by python interpreter. --- ooo-help-parser.py | 28 ++++++++++++++++++++++++---- source/expatimpl.py | 53 +++++++++++++++++++++++++++++++++++++++-------------- source/globals.py | 3 +++ 3 files changed, 66 insertions(+), 18 deletions(-) diff --git a/ooo-help-parser.py b/ooo-help-parser.py index 2a3a255..d91f08c 100755 --- a/ooo-help-parser.py +++ b/ooo-help-parser.py @@ -7,20 +7,40 @@ import globals, expatimpl def main (): parser = optparse.OptionParser() + parser.set_defaults(output=None) + parser.add_option("-o", "--output", dest="output", help="write output to FILE", metavar="FILE") options, args = parser.parse_args() if len(args) == 0: parser.print_help() sys.exit(1) + fd = sys.stdout + if options.output != None: + if os.path.isdir(options.output): + globals.error("cannot create output file: " + optiont.output) + sys.exit(1) + fd = open(options.output, 'w') + + filesParsed = 0 for fpath in args: - if not os.path.isfile(args[0]): - globals.error(args[0] + " is not a valid file. Skipping.") - - file = open(args[0], 'r') + if not os.path.isfile(fpath): + globals.error(fpath + " is not a valid file. Skipping.") + continue + +# globals.info("processing " + fpath) + file = open(fpath, 'r') strm = file.read() file.close() p = expatimpl.Parser(strm) p.parse() + p.printSummary(fd) + p.prettyPrint(fd) + filesParsed += 1 + + globals.info("%d files have been processed"%filesParsed) + if fd != sys.stdout: + fd.close() + if __name__ == '__main__': main() diff --git a/source/expatimpl.py b/source/expatimpl.py index a147162..15e40c5 100644 --- a/source/expatimpl.py +++ b/source/expatimpl.py @@ -1,4 +1,4 @@ -import xml.parsers.expat +import xml.parsers.expat, sys import globals class NodeType: @@ -36,6 +36,26 @@ class Node(NodeBase): self.name = name self.attrs = attrs +# ============================================================================ + +class Meta: + def __init__ (self): + self.title = None + self.filename = None + +# ============================================================================ + +def encodeNonAscii (sin): + sout = '' + for c in sin: + if ord(c) >= 128: + sout += "\\x%2.2x"%ord(c) + else: + sout += c + + return sout + +# ============================================================================ class Parser: @@ -75,16 +95,18 @@ class Parser: p.EndElementHandler = self.endElement p.CharacterDataHandler = self.character p.Parse(self.strm, 1) - self.prettyPrint() - def prettyPrint (self): + def printSummary (self, fd): + pass + + def prettyPrint (self, fd): if len(self.root.children) != 1: return node = self.root.firstChild() - self.printNode(node, 0) + self.printNode(fd, node, 0) - def printNode (self, node, level): + def printNode (self, fd, node, level): singleIndent = ' '*4 indent = singleIndent*level if node.nodeType == NodeType.Node: @@ -97,18 +119,21 @@ class Parser: for key in keys: line += " " + key + '="' + node.attrs[key] + '"' if hasChildren: - line += ">" - print (indent + line) + line += ">\n" + line = encodeNonAscii(line) + fd.write (indent + line) for child in node.children: - self.printNode(child, level+1) - line = ""%node.name - print (indent + line) + self.printNode(fd, child, level+1) + line = "\n"%node.name + line = encodeNonAscii(line) + fd.write (indent + line) else: - line += "/>" - print (indent + line) - + line += "/>\n" + line = encodeNonAscii(line) + fd.write (indent + line) elif node.nodeType == NodeType.Content: if len(node.content) > 0: - print (indent + node.content) + content = encodeNonAscii(node.content) + fd.write (indent + content + "\n") diff --git a/source/globals.py b/source/globals.py index f7e6388..746fcba 100644 --- a/source/globals.py +++ b/source/globals.py @@ -4,6 +4,9 @@ import sys def error (msg): sys.stderr.write(msg + "\n") +def info (msg): + sys.stderr.write(msg + "\n") + class Exception: def __init__ (self, msg): self.msg = msg -- cgit v1.2.3