diff options
author | Kohei Yoshida <kyoshida@novell.com> | 2009-12-20 23:46:06 -0500 |
---|---|---|
committer | Kohei Yoshida <kyoshida@novell.com> | 2009-12-20 23:46:06 -0500 |
commit | 6a834d696d2eefe684593c29be6cdc4d4ac48a1f (patch) | |
tree | f835e7f1e28665a36f1f61808d12491f2bd475f4 | |
parent | 93c0674b5f8c6d5b444cf17479dd94346db812f5 (diff) |
Moved node-related code into node.py, added docbook.py for docbook converter.
-rwxr-xr-x | ooo-help-parser.py | 16 | ||||
-rw-r--r-- | source/docbook.py | 13 | ||||
-rw-r--r-- | source/expatimpl.py | 113 | ||||
-rw-r--r-- | source/node.py | 102 |
4 files changed, 130 insertions, 114 deletions
diff --git a/ooo-help-parser.py b/ooo-help-parser.py index 7fa8b6b..7af6cce 100755 --- a/ooo-help-parser.py +++ b/ooo-help-parser.py @@ -3,7 +3,7 @@ import sys, os, os.path, optparse sys.path.append(sys.path[0]+"/source") -import globals, expatimpl +import globals, expatimpl, docbook, node def main (): parser = optparse.OptionParser() @@ -36,18 +36,24 @@ def main (): fd = open(options.output, 'w') filesParsed = 0 + rootNodes = [] for fpath in filepaths: file = open(fpath, 'r') strm = file.read() file.close() p = expatimpl.Parser(strm) p.parse() - if options.convert: - pass - else options.convert: - p.prettyPrint(fd) + rootNodes.append(p.root) filesParsed += 1 + if options.convert: + converter = docbook.DocBookConverter(rootNodes) + converter.convert() + converter.prettyPrint(fd) + else: + for root in rootNodes: + node.prettyPrint(fd, root) + globals.info("%d files have been processed"%filesParsed) if fd != sys.stdout: fd.close() diff --git a/source/docbook.py b/source/docbook.py new file mode 100644 index 0000000..84f38c2 --- /dev/null +++ b/source/docbook.py @@ -0,0 +1,13 @@ +import globals, node + +class DocBookConverter: + def __init__ (self, xhproots): + self.__xhproots = xhproots + self.root = node.Root() + + def convert (self): + pass + + def prettyPrint (self, fd): + pass + diff --git a/source/expatimpl.py b/source/expatimpl.py index 9093538..acb823f 100644 --- a/source/expatimpl.py +++ b/source/expatimpl.py @@ -1,70 +1,5 @@ import xml.parsers.expat, sys -import globals - -class NodeType: - Unknown = 0 - Root = 1 - Node = 2 - Content = 3 - -class NodeBase: - def __init__ (self, nodeType = NodeType.Unknown): - self.children = [] - self.nodeType = nodeType - - def appendChild (self, node): - self.children.append(node) - - def setContent (self, content): - self.content = content - - def firstChild (self): - return self.children[0] - -class Root(NodeBase): - def __init__ (self): - NodeBase.__init__(self, NodeType.Root) - -class Content(NodeBase): - def __init__ (self, content): - NodeBase.__init__(self, NodeType.Content) - self.content = content - -class Node(NodeBase): - def __init__ (self, name, attrs={}): - NodeBase.__init__(self, NodeType.Node) - self.name = name - self.attrs = attrs - -# ============================================================================ - -encodeTable = { - '>': 'gt', - '<': 'lt', - '&': 'amp' -} - -def encodeString (sin): - sout = '' - for c in sin: - if ord(c) >= 128: - # encode non-ascii ranges. - sout += "\\x%2.2x"%ord(c) - elif encodeTable.has_key(c): - # encode html symbols. - sout += '&' + encodeTable[c] + ';' - else: - sout += c - - return sout - -# ============================================================================ - -nodesToSkip = { -# 'paragraph': 1, - 'comment': 1, - 'history': 1 -} +import globals, node class Parser: @@ -74,11 +9,11 @@ class Parser: def __init__ (self, strm): self.strm = strm - self.root = Root() + self.root = node.Root() self.nodestack = [self.root] def startElement(self, name, attrs): - n = Node(name, attrs) + n = node.Node(name, attrs) self.nodestack[-1].appendChild(n) self.nodestack.append(n) @@ -96,7 +31,7 @@ class Parser: # move the unicode quote (e.g. u'foo' -> foo) if exists. s = s[2:-1] - self.nodestack[-1].appendChild(Content(s)) + self.nodestack[-1].appendChild(node.Content(s)) def parse (self): p = xml.parsers.expat.ParserCreate() @@ -105,43 +40,3 @@ class Parser: p.CharacterDataHandler = self.character p.Parse(self.strm, 1) - def prettyPrint (self, fd): - if len(self.root.children) != 1: - return - - node = self.root.firstChild() - self.printNode(fd, node, 0) - - def printNode (self, fd, node, level): - singleIndent = ' '*4 - indent = singleIndent*level - if node.nodeType == NodeType.Node: - hasChildren = len(node.children) > 0 - - # We add '<' and '>' (or '/>') after the element content gets - # encoded. - line = node.name - if len(node.attrs) > 0: - keys = node.attrs.keys() - keys.sort() - for key in keys: - line += " " + key + '="' + node.attrs[key] + '"' - if hasChildren and not nodesToSkip.has_key(node.name): - line = encodeString(line) - line = "<%s>\n"%line - fd.write (indent + line) - for child in node.children: - self.printNode(fd, child, level+1) - line = "</%s>\n"%node.name - fd.write (indent + line) - else: - line = encodeString(line) - line = "<%s/>\n"%line - fd.write (indent + line) - - elif node.nodeType == NodeType.Content: - content = node.content.strip() - if len(content) > 0: - content = encodeString(content) - fd.write (indent + content + "\n") - diff --git a/source/node.py b/source/node.py new file mode 100644 index 0000000..d956451 --- /dev/null +++ b/source/node.py @@ -0,0 +1,102 @@ + +class NodeType: + Unknown = 0 + Root = 1 + Node = 2 + Content = 3 + +class NodeBase: + def __init__ (self, nodeType = NodeType.Unknown): + self.children = [] + self.nodeType = nodeType + + def appendChild (self, node): + self.children.append(node) + + def setContent (self, content): + self.content = content + + def firstChild (self): + return self.children[0] + +class Root(NodeBase): + def __init__ (self): + NodeBase.__init__(self, NodeType.Root) + +class Content(NodeBase): + def __init__ (self, content): + NodeBase.__init__(self, NodeType.Content) + self.content = content + +class Node(NodeBase): + def __init__ (self, name, attrs={}): + NodeBase.__init__(self, NodeType.Node) + self.name = name + self.attrs = attrs + + +encodeTable = { + '>': 'gt', + '<': 'lt', + '&': 'amp' +} + +def encodeString (sin): + sout = '' + for c in sin: + if ord(c) >= 128: + # encode non-ascii ranges. + sout += "\\x%2.2x"%ord(c) + elif encodeTable.has_key(c): + # encode html symbols. + sout += '&' + encodeTable[c] + ';' + else: + sout += c + + return sout + +nodesToSkip = { +# 'paragraph': 1, + 'comment': 1, + 'history': 1 +} + +def prettyPrint (fd, node): + printNode(fd, node, 0) + +def printNode (fd, node, level): + singleIndent = ' '*4 + indent = singleIndent*level + if node.nodeType == NodeType.Root: + # root node itself only contains child nodes. + for child in node.children: + printNode(fd, child, level) + elif node.nodeType == NodeType.Node: + hasChildren = len(node.children) > 0 + + # We add '<' and '>' (or '/>') after the element content gets + # encoded. + line = node.name + if len(node.attrs) > 0: + keys = node.attrs.keys() + keys.sort() + for key in keys: + line += " " + key + '="' + node.attrs[key] + '"' + if hasChildren:# and not nodesToSkip.has_key(node.name): + line = encodeString(line) + line = "<%s>\n"%line + fd.write (indent + line) + for child in node.children: + printNode(fd, child, level+1) + line = "</%s>\n"%node.name + fd.write (indent + line) + else: + line = encodeString(line) + line = "<%s/>\n"%line + fd.write (indent + line) + + elif node.nodeType == NodeType.Content: + content = node.content.strip() + if len(content) > 0: + content = encodeString(content) + fd.write (indent + content + "\n") |