summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorKohei Yoshida <kyoshida@novell.com>2009-12-20 18:05:24 -0500
committerKohei Yoshida <kyoshida@novell.com>2009-12-20 18:05:24 -0500
commit89a111ceb8f5808a8b66545dea4e8232471fc4ba (patch)
treee70c63adbd06b4af1f6c6e70a1a1d7fdb3d0a606
parent4447ca069afcb1cda3297f51e0778053a91d52c7 (diff)
More on parsing xhp files.
+ support writing to a file. + encode non-ascii characters to avoid unicode error by python interpreter.
-rwxr-xr-xooo-help-parser.py28
-rw-r--r--source/expatimpl.py53
-rw-r--r--source/globals.py3
3 files changed, 66 insertions, 18 deletions
diff --git a/ooo-help-parser.py b/ooo-help-parser.py
index 2a3a255..d91f08c 100755
--- a/ooo-help-parser.py
+++ b/ooo-help-parser.py
@@ -7,20 +7,40 @@ import globals, expatimpl
def main ():
parser = optparse.OptionParser()
+ parser.set_defaults(output=None)
+ parser.add_option("-o", "--output", dest="output", help="write output to FILE", metavar="FILE")
options, args = parser.parse_args()
if len(args) == 0:
parser.print_help()
sys.exit(1)
+ fd = sys.stdout
+ if options.output != None:
+ if os.path.isdir(options.output):
+ globals.error("cannot create output file: " + optiont.output)
+ sys.exit(1)
+ fd = open(options.output, 'w')
+
+ filesParsed = 0
for fpath in args:
- if not os.path.isfile(args[0]):
- globals.error(args[0] + " is not a valid file. Skipping.")
-
- file = open(args[0], 'r')
+ if not os.path.isfile(fpath):
+ globals.error(fpath + " is not a valid file. Skipping.")
+ continue
+
+# globals.info("processing " + fpath)
+ file = open(fpath, 'r')
strm = file.read()
file.close()
p = expatimpl.Parser(strm)
p.parse()
+ p.printSummary(fd)
+ p.prettyPrint(fd)
+ filesParsed += 1
+
+ globals.info("%d files have been processed"%filesParsed)
+ if fd != sys.stdout:
+ fd.close()
+
if __name__ == '__main__':
main()
diff --git a/source/expatimpl.py b/source/expatimpl.py
index a147162..15e40c5 100644
--- a/source/expatimpl.py
+++ b/source/expatimpl.py
@@ -1,4 +1,4 @@
-import xml.parsers.expat
+import xml.parsers.expat, sys
import globals
class NodeType:
@@ -36,6 +36,26 @@ class Node(NodeBase):
self.name = name
self.attrs = attrs
+# ============================================================================
+
+class Meta:
+ def __init__ (self):
+ self.title = None
+ self.filename = None
+
+# ============================================================================
+
+def encodeNonAscii (sin):
+ sout = ''
+ for c in sin:
+ if ord(c) >= 128:
+ sout += "\\x%2.2x"%ord(c)
+ else:
+ sout += c
+
+ return sout
+
+# ============================================================================
class Parser:
@@ -75,16 +95,18 @@ class Parser:
p.EndElementHandler = self.endElement
p.CharacterDataHandler = self.character
p.Parse(self.strm, 1)
- self.prettyPrint()
- def prettyPrint (self):
+ def printSummary (self, fd):
+ pass
+
+ def prettyPrint (self, fd):
if len(self.root.children) != 1:
return
node = self.root.firstChild()
- self.printNode(node, 0)
+ self.printNode(fd, node, 0)
- def printNode (self, node, level):
+ def printNode (self, fd, node, level):
singleIndent = ' '*4
indent = singleIndent*level
if node.nodeType == NodeType.Node:
@@ -97,18 +119,21 @@ class Parser:
for key in keys:
line += " " + key + '="' + node.attrs[key] + '"'
if hasChildren:
- line += ">"
- print (indent + line)
+ line += ">\n"
+ line = encodeNonAscii(line)
+ fd.write (indent + line)
for child in node.children:
- self.printNode(child, level+1)
- line = "</%s>"%node.name
- print (indent + line)
+ self.printNode(fd, child, level+1)
+ line = "</%s>\n"%node.name
+ line = encodeNonAscii(line)
+ fd.write (indent + line)
else:
- line += "/>"
- print (indent + line)
-
+ line += "/>\n"
+ line = encodeNonAscii(line)
+ fd.write (indent + line)
elif node.nodeType == NodeType.Content:
if len(node.content) > 0:
- print (indent + node.content)
+ content = encodeNonAscii(node.content)
+ fd.write (indent + content + "\n")
diff --git a/source/globals.py b/source/globals.py
index f7e6388..746fcba 100644
--- a/source/globals.py
+++ b/source/globals.py
@@ -4,6 +4,9 @@ import sys
def error (msg):
sys.stderr.write(msg + "\n")
+def info (msg):
+ sys.stderr.write(msg + "\n")
+
class Exception:
def __init__ (self, msg):
self.msg = msg