summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorKohei Yoshida <kyoshida@novell.com>2009-12-20 23:46:06 -0500
committerKohei Yoshida <kyoshida@novell.com>2009-12-20 23:46:06 -0500
commit6a834d696d2eefe684593c29be6cdc4d4ac48a1f (patch)
treef835e7f1e28665a36f1f61808d12491f2bd475f4
parent93c0674b5f8c6d5b444cf17479dd94346db812f5 (diff)
Moved node-related code into node.py, added docbook.py for docbook converter.
-rwxr-xr-xooo-help-parser.py16
-rw-r--r--source/docbook.py13
-rw-r--r--source/expatimpl.py113
-rw-r--r--source/node.py102
4 files changed, 130 insertions, 114 deletions
diff --git a/ooo-help-parser.py b/ooo-help-parser.py
index 7fa8b6b..7af6cce 100755
--- a/ooo-help-parser.py
+++ b/ooo-help-parser.py
@@ -3,7 +3,7 @@
import sys, os, os.path, optparse
sys.path.append(sys.path[0]+"/source")
-import globals, expatimpl
+import globals, expatimpl, docbook, node
def main ():
parser = optparse.OptionParser()
@@ -36,18 +36,24 @@ def main ():
fd = open(options.output, 'w')
filesParsed = 0
+ rootNodes = []
for fpath in filepaths:
file = open(fpath, 'r')
strm = file.read()
file.close()
p = expatimpl.Parser(strm)
p.parse()
- if options.convert:
- pass
- else options.convert:
- p.prettyPrint(fd)
+ rootNodes.append(p.root)
filesParsed += 1
+ if options.convert:
+ converter = docbook.DocBookConverter(rootNodes)
+ converter.convert()
+ converter.prettyPrint(fd)
+ else:
+ for root in rootNodes:
+ node.prettyPrint(fd, root)
+
globals.info("%d files have been processed"%filesParsed)
if fd != sys.stdout:
fd.close()
diff --git a/source/docbook.py b/source/docbook.py
new file mode 100644
index 0000000..84f38c2
--- /dev/null
+++ b/source/docbook.py
@@ -0,0 +1,13 @@
+import globals, node
+
+class DocBookConverter:
+ def __init__ (self, xhproots):
+ self.__xhproots = xhproots
+ self.root = node.Root()
+
+ def convert (self):
+ pass
+
+ def prettyPrint (self, fd):
+ pass
+
diff --git a/source/expatimpl.py b/source/expatimpl.py
index 9093538..acb823f 100644
--- a/source/expatimpl.py
+++ b/source/expatimpl.py
@@ -1,70 +1,5 @@
import xml.parsers.expat, sys
-import globals
-
-class NodeType:
- Unknown = 0
- Root = 1
- Node = 2
- Content = 3
-
-class NodeBase:
- def __init__ (self, nodeType = NodeType.Unknown):
- self.children = []
- self.nodeType = nodeType
-
- def appendChild (self, node):
- self.children.append(node)
-
- def setContent (self, content):
- self.content = content
-
- def firstChild (self):
- return self.children[0]
-
-class Root(NodeBase):
- def __init__ (self):
- NodeBase.__init__(self, NodeType.Root)
-
-class Content(NodeBase):
- def __init__ (self, content):
- NodeBase.__init__(self, NodeType.Content)
- self.content = content
-
-class Node(NodeBase):
- def __init__ (self, name, attrs={}):
- NodeBase.__init__(self, NodeType.Node)
- self.name = name
- self.attrs = attrs
-
-# ============================================================================
-
-encodeTable = {
- '>': 'gt',
- '<': 'lt',
- '&': 'amp'
-}
-
-def encodeString (sin):
- sout = ''
- for c in sin:
- if ord(c) >= 128:
- # encode non-ascii ranges.
- sout += "\\x%2.2x"%ord(c)
- elif encodeTable.has_key(c):
- # encode html symbols.
- sout += '&' + encodeTable[c] + ';'
- else:
- sout += c
-
- return sout
-
-# ============================================================================
-
-nodesToSkip = {
-# 'paragraph': 1,
- 'comment': 1,
- 'history': 1
-}
+import globals, node
class Parser:
@@ -74,11 +9,11 @@ class Parser:
def __init__ (self, strm):
self.strm = strm
- self.root = Root()
+ self.root = node.Root()
self.nodestack = [self.root]
def startElement(self, name, attrs):
- n = Node(name, attrs)
+ n = node.Node(name, attrs)
self.nodestack[-1].appendChild(n)
self.nodestack.append(n)
@@ -96,7 +31,7 @@ class Parser:
# move the unicode quote (e.g. u'foo' -> foo) if exists.
s = s[2:-1]
- self.nodestack[-1].appendChild(Content(s))
+ self.nodestack[-1].appendChild(node.Content(s))
def parse (self):
p = xml.parsers.expat.ParserCreate()
@@ -105,43 +40,3 @@ class Parser:
p.CharacterDataHandler = self.character
p.Parse(self.strm, 1)
- def prettyPrint (self, fd):
- if len(self.root.children) != 1:
- return
-
- node = self.root.firstChild()
- self.printNode(fd, node, 0)
-
- def printNode (self, fd, node, level):
- singleIndent = ' '*4
- indent = singleIndent*level
- if node.nodeType == NodeType.Node:
- hasChildren = len(node.children) > 0
-
- # We add '<' and '>' (or '/>') after the element content gets
- # encoded.
- line = node.name
- if len(node.attrs) > 0:
- keys = node.attrs.keys()
- keys.sort()
- for key in keys:
- line += " " + key + '="' + node.attrs[key] + '"'
- if hasChildren and not nodesToSkip.has_key(node.name):
- line = encodeString(line)
- line = "<%s>\n"%line
- fd.write (indent + line)
- for child in node.children:
- self.printNode(fd, child, level+1)
- line = "</%s>\n"%node.name
- fd.write (indent + line)
- else:
- line = encodeString(line)
- line = "<%s/>\n"%line
- fd.write (indent + line)
-
- elif node.nodeType == NodeType.Content:
- content = node.content.strip()
- if len(content) > 0:
- content = encodeString(content)
- fd.write (indent + content + "\n")
-
diff --git a/source/node.py b/source/node.py
new file mode 100644
index 0000000..d956451
--- /dev/null
+++ b/source/node.py
@@ -0,0 +1,102 @@
+
+class NodeType:
+ Unknown = 0
+ Root = 1
+ Node = 2
+ Content = 3
+
+class NodeBase:
+ def __init__ (self, nodeType = NodeType.Unknown):
+ self.children = []
+ self.nodeType = nodeType
+
+ def appendChild (self, node):
+ self.children.append(node)
+
+ def setContent (self, content):
+ self.content = content
+
+ def firstChild (self):
+ return self.children[0]
+
+class Root(NodeBase):
+ def __init__ (self):
+ NodeBase.__init__(self, NodeType.Root)
+
+class Content(NodeBase):
+ def __init__ (self, content):
+ NodeBase.__init__(self, NodeType.Content)
+ self.content = content
+
+class Node(NodeBase):
+ def __init__ (self, name, attrs={}):
+ NodeBase.__init__(self, NodeType.Node)
+ self.name = name
+ self.attrs = attrs
+
+
+encodeTable = {
+ '>': 'gt',
+ '<': 'lt',
+ '&': 'amp'
+}
+
+def encodeString (sin):
+ sout = ''
+ for c in sin:
+ if ord(c) >= 128:
+ # encode non-ascii ranges.
+ sout += "\\x%2.2x"%ord(c)
+ elif encodeTable.has_key(c):
+ # encode html symbols.
+ sout += '&' + encodeTable[c] + ';'
+ else:
+ sout += c
+
+ return sout
+
+nodesToSkip = {
+# 'paragraph': 1,
+ 'comment': 1,
+ 'history': 1
+}
+
+def prettyPrint (fd, node):
+ printNode(fd, node, 0)
+
+def printNode (fd, node, level):
+ singleIndent = ' '*4
+ indent = singleIndent*level
+ if node.nodeType == NodeType.Root:
+ # root node itself only contains child nodes.
+ for child in node.children:
+ printNode(fd, child, level)
+ elif node.nodeType == NodeType.Node:
+ hasChildren = len(node.children) > 0
+
+ # We add '<' and '>' (or '/>') after the element content gets
+ # encoded.
+ line = node.name
+ if len(node.attrs) > 0:
+ keys = node.attrs.keys()
+ keys.sort()
+ for key in keys:
+ line += " " + key + '="' + node.attrs[key] + '"'
+ if hasChildren:# and not nodesToSkip.has_key(node.name):
+ line = encodeString(line)
+ line = "<%s>\n"%line
+ fd.write (indent + line)
+ for child in node.children:
+ printNode(fd, child, level+1)
+ line = "</%s>\n"%node.name
+ fd.write (indent + line)
+ else:
+ line = encodeString(line)
+ line = "<%s/>\n"%line
+ fd.write (indent + line)
+
+ elif node.nodeType == NodeType.Content:
+ content = node.content.strip()
+ if len(content) > 0:
+ content = encodeString(content)
+ fd.write (indent + content + "\n")