More on converting the parsed xhp contents to docbook.

author: Kohei Yoshida <kyoshida@novell.com> 2009-12-21 01:51:44 -0500
committer: Kohei Yoshida <kyoshida@novell.com> 2009-12-21 01:51:44 -0500
commit: 7fe4aea75e8d7174b710713ba53f8fa8744a0c3c (patch)
tree: 8fc9bd4ce50e38b04753688a61781ef4075e132a
parent: 6a834d696d2eefe684593c29be6cdc4d4ac48a1f (diff)
4 files changed, 82 insertions, 13 deletions
diff --git a/ooo-help-parser.py b/ooo-help-parser.py
index 7af6cce..5b0e785 100755
--- a/ooo-help-parser.py
+++ b/ooo-help-parser.py
@@ -36,14 +36,15 @@ def main ():
         fd = open(options.output, 'w')
 
     filesParsed = 0
-    rootNodes = []
+    rootNodes = {}
     for fpath in filepaths:
         file = open(fpath, 'r')
         strm = file.read()
         file.close()
         p = expatimpl.Parser(strm)
         p.parse()
-        rootNodes.append(p.root)
+        if p.filename != None:
+            rootNodes[p.filename] = p.root
         filesParsed += 1
 
     if options.convert:
@@ -51,8 +52,8 @@ def main ():
         converter.convert()
         converter.prettyPrint(fd)
     else:
-        for root in rootNodes:
-            node.prettyPrint(fd, root)
+        for filename in rootNodes.keys():
+            node.prettyPrint(fd, rootNodes[filename])
 
     globals.info("%d files have been processed"%filesParsed)
     if fd != sys.stdout:
diff --git a/source/docbook.py b/source/docbook.py
index 84f38c2..8363b9e 100644
--- a/source/docbook.py
+++ b/source/docbook.py
@@ -1,13 +1,41 @@
 import globals, node
 
+chapterNames = [
+    '/text/swriter/main0000.xhp',
+    '/text/scalc/main0000.xhp',
+    '/text/sdraw/main0000.xhp',
+    '/text/simpress/main0000.xhp',
+    '/text/smath/main0000.xhp',
+    '/text/schart/main0000.xhp'
+]
+
 class DocBookConverter:
     def __init__ (self, xhproots):
         self.__xhproots = xhproots
         self.root = node.Root()
 
     def convert (self):
-        pass
+        book = self.root.appendElement('book')
+        bookinfo = book.appendElement('bookinfo')
+        title = bookinfo.appendElement('title')
+        title.appendContent("OpenOffice.org Help")
+
+        for chapterName in chapterNames:
+            if not self.__xhproots.has_key(chapterName):
+                continue
+
+            xhproot = self.__xhproots[chapterName]
+            # go to helpdocument/meta/topic/title to get the title text.
+            chapter = book.appendElement('chapter')
+            titleText = xhproot.firstChild().firstChildByName('meta').firstChildByName('topic').firstChildByName('title').getContent()
+            chapter.appendElement('title').appendContent(titleText)
+
+            # convert all paragraphs.
+            xhpbody = xhproot.firstChild().firstChildByName('body')
+            for xhppara in xhpbody.getChildByName('paragraph'):
+                para = chapter.appendElement('para')
+                para.appendContent(xhppara.getContent())
 
     def prettyPrint (self, fd):
-        pass
+        node.prettyPrint(fd, self.root)
 
diff --git a/source/expatimpl.py b/source/expatimpl.py
index acb823f..55a98d8 100644
--- a/source/expatimpl.py
+++ b/source/expatimpl.py
@@ -11,9 +11,10 @@ class Parser:
         self.strm = strm
         self.root = node.Root()
         self.nodestack = [self.root]
+        self.filename = None
 
     def startElement(self, name, attrs):
-        n = node.Node(name, attrs)
+        n = node.Element(name, attrs)
         self.nodestack[-1].appendChild(n)
         self.nodestack.append(n)
 
@@ -31,7 +32,12 @@ class Parser:
             # move the unicode quote (e.g. u'foo' -> foo) if exists.
             s = s[2:-1]
 
+        s = s.replace('$[officename]', 'OpenOffice.org')
+        s = s.replace('%PRODUCTNAME', 'OpenOffice.org')
         self.nodestack[-1].appendChild(node.Content(s))
+        if self.nodestack[-1].name == 'filename':
+            # For now, I just assume that the filename element is always at the correct position.
+            self.filename = s
 
     def parse (self):
         p = xml.parsers.expat.ParserCreate()
diff --git a/source/node.py b/source/node.py
index d956451..dcda870 100644
--- a/source/node.py
+++ b/source/node.py
@@ -2,7 +2,7 @@
 class NodeType:
     Unknown = 0
     Root    = 1
-    Node    = 2
+    Element = 2
     Content = 3
 
 class NodeBase:
@@ -13,12 +13,32 @@ class NodeBase:
     def appendChild (self, node):
         self.children.append(node)
 
-    def setContent (self, content):
-        self.content = content
+    def appendElement (self, name):
+        node = Element(name)
+        self.appendChild(node)
+        return node
+
+    def appendContent (self, text):
+        node = Content(text)
+        self.appendChild(node)
+        return node
 
     def firstChild (self):
         return self.children[0]
 
+    def firstChildByName (self, name):
+        for child in self.children:
+            if child.nodeType == NodeType.Element and child.name == name:
+                return child
+        return None
+
+    def getChildByName (self, name):
+        children = []
+        for child in self.children:
+            if child.nodeType == NodeType.Element and child.name == name:
+                children.append(child)
+        return children
+
 class Root(NodeBase):
     def __init__ (self):
         NodeBase.__init__(self, NodeType.Root)
@@ -28,12 +48,26 @@ class Content(NodeBase):
         NodeBase.__init__(self, NodeType.Content)
         self.content = content
 
-class Node(NodeBase):
+class Element(NodeBase):
     def __init__ (self, name, attrs={}):
-        NodeBase.__init__(self, NodeType.Node)
+        NodeBase.__init__(self, NodeType.Element)
         self.name = name
         self.attrs = attrs
 
+    def getContent (self):
+        text = ''
+        first = True
+        for child in self.children:
+            if first:
+                first = False
+            else:
+                text += ' '
+            if child.nodeType == NodeType.Content:
+                text += child.content
+            elif child.nodeType == NodeType.Element:
+                text += child.getContent()
+        return text
+
 
 encodeTable = {
     '>': 'gt',
@@ -71,7 +105,7 @@ def printNode (fd, node, level):
         # root node itself only contains child nodes.
         for child in node.children:
             printNode(fd, child, level)
-    elif node.nodeType == NodeType.Node:
+    elif node.nodeType == NodeType.Element:
         hasChildren = len(node.children) > 0
 
         # We add '<' and '>' (or '/>') after the element content gets
author	Kohei Yoshida <kyoshida@novell.com>	2009-12-21 01:51:44 -0500
committer	Kohei Yoshida <kyoshida@novell.com>	2009-12-21 01:51:44 -0500
commit	7fe4aea75e8d7174b710713ba53f8fa8744a0c3c (patch)
tree	8fc9bd4ce50e38b04753688a61781ef4075e132a
parent	6a834d696d2eefe684593c29be6cdc4d4ac48a1f (diff)