summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorKohei Yoshida <kyoshida@novell.com>2009-12-21 01:51:44 -0500
committerKohei Yoshida <kyoshida@novell.com>2009-12-21 01:51:44 -0500
commit7fe4aea75e8d7174b710713ba53f8fa8744a0c3c (patch)
tree8fc9bd4ce50e38b04753688a61781ef4075e132a
parent6a834d696d2eefe684593c29be6cdc4d4ac48a1f (diff)
More on converting the parsed xhp contents to docbook.
-rwxr-xr-xooo-help-parser.py9
-rw-r--r--source/docbook.py32
-rw-r--r--source/expatimpl.py8
-rw-r--r--source/node.py46
4 files changed, 82 insertions, 13 deletions
diff --git a/ooo-help-parser.py b/ooo-help-parser.py
index 7af6cce..5b0e785 100755
--- a/ooo-help-parser.py
+++ b/ooo-help-parser.py
@@ -36,14 +36,15 @@ def main ():
fd = open(options.output, 'w')
filesParsed = 0
- rootNodes = []
+ rootNodes = {}
for fpath in filepaths:
file = open(fpath, 'r')
strm = file.read()
file.close()
p = expatimpl.Parser(strm)
p.parse()
- rootNodes.append(p.root)
+ if p.filename != None:
+ rootNodes[p.filename] = p.root
filesParsed += 1
if options.convert:
@@ -51,8 +52,8 @@ def main ():
converter.convert()
converter.prettyPrint(fd)
else:
- for root in rootNodes:
- node.prettyPrint(fd, root)
+ for filename in rootNodes.keys():
+ node.prettyPrint(fd, rootNodes[filename])
globals.info("%d files have been processed"%filesParsed)
if fd != sys.stdout:
diff --git a/source/docbook.py b/source/docbook.py
index 84f38c2..8363b9e 100644
--- a/source/docbook.py
+++ b/source/docbook.py
@@ -1,13 +1,41 @@
import globals, node
+chapterNames = [
+ '/text/swriter/main0000.xhp',
+ '/text/scalc/main0000.xhp',
+ '/text/sdraw/main0000.xhp',
+ '/text/simpress/main0000.xhp',
+ '/text/smath/main0000.xhp',
+ '/text/schart/main0000.xhp'
+]
+
class DocBookConverter:
def __init__ (self, xhproots):
self.__xhproots = xhproots
self.root = node.Root()
def convert (self):
- pass
+ book = self.root.appendElement('book')
+ bookinfo = book.appendElement('bookinfo')
+ title = bookinfo.appendElement('title')
+ title.appendContent("OpenOffice.org Help")
+
+ for chapterName in chapterNames:
+ if not self.__xhproots.has_key(chapterName):
+ continue
+
+ xhproot = self.__xhproots[chapterName]
+ # go to helpdocument/meta/topic/title to get the title text.
+ chapter = book.appendElement('chapter')
+ titleText = xhproot.firstChild().firstChildByName('meta').firstChildByName('topic').firstChildByName('title').getContent()
+ chapter.appendElement('title').appendContent(titleText)
+
+ # convert all paragraphs.
+ xhpbody = xhproot.firstChild().firstChildByName('body')
+ for xhppara in xhpbody.getChildByName('paragraph'):
+ para = chapter.appendElement('para')
+ para.appendContent(xhppara.getContent())
def prettyPrint (self, fd):
- pass
+ node.prettyPrint(fd, self.root)
diff --git a/source/expatimpl.py b/source/expatimpl.py
index acb823f..55a98d8 100644
--- a/source/expatimpl.py
+++ b/source/expatimpl.py
@@ -11,9 +11,10 @@ class Parser:
self.strm = strm
self.root = node.Root()
self.nodestack = [self.root]
+ self.filename = None
def startElement(self, name, attrs):
- n = node.Node(name, attrs)
+ n = node.Element(name, attrs)
self.nodestack[-1].appendChild(n)
self.nodestack.append(n)
@@ -31,7 +32,12 @@ class Parser:
# move the unicode quote (e.g. u'foo' -> foo) if exists.
s = s[2:-1]
+ s = s.replace('$[officename]', 'OpenOffice.org')
+ s = s.replace('%PRODUCTNAME', 'OpenOffice.org')
self.nodestack[-1].appendChild(node.Content(s))
+ if self.nodestack[-1].name == 'filename':
+ # For now, I just assume that the filename element is always at the correct position.
+ self.filename = s
def parse (self):
p = xml.parsers.expat.ParserCreate()
diff --git a/source/node.py b/source/node.py
index d956451..dcda870 100644
--- a/source/node.py
+++ b/source/node.py
@@ -2,7 +2,7 @@
class NodeType:
Unknown = 0
Root = 1
- Node = 2
+ Element = 2
Content = 3
class NodeBase:
@@ -13,12 +13,32 @@ class NodeBase:
def appendChild (self, node):
self.children.append(node)
- def setContent (self, content):
- self.content = content
+ def appendElement (self, name):
+ node = Element(name)
+ self.appendChild(node)
+ return node
+
+ def appendContent (self, text):
+ node = Content(text)
+ self.appendChild(node)
+ return node
def firstChild (self):
return self.children[0]
+ def firstChildByName (self, name):
+ for child in self.children:
+ if child.nodeType == NodeType.Element and child.name == name:
+ return child
+ return None
+
+ def getChildByName (self, name):
+ children = []
+ for child in self.children:
+ if child.nodeType == NodeType.Element and child.name == name:
+ children.append(child)
+ return children
+
class Root(NodeBase):
def __init__ (self):
NodeBase.__init__(self, NodeType.Root)
@@ -28,12 +48,26 @@ class Content(NodeBase):
NodeBase.__init__(self, NodeType.Content)
self.content = content
-class Node(NodeBase):
+class Element(NodeBase):
def __init__ (self, name, attrs={}):
- NodeBase.__init__(self, NodeType.Node)
+ NodeBase.__init__(self, NodeType.Element)
self.name = name
self.attrs = attrs
+ def getContent (self):
+ text = ''
+ first = True
+ for child in self.children:
+ if first:
+ first = False
+ else:
+ text += ' '
+ if child.nodeType == NodeType.Content:
+ text += child.content
+ elif child.nodeType == NodeType.Element:
+ text += child.getContent()
+ return text
+
encodeTable = {
'>': 'gt',
@@ -71,7 +105,7 @@ def printNode (fd, node, level):
# root node itself only contains child nodes.
for child in node.children:
printNode(fd, child, level)
- elif node.nodeType == NodeType.Node:
+ elif node.nodeType == NodeType.Element:
hasChildren = len(node.children) > 0
# We add '<' and '>' (or '/>') after the element content gets