diff options
author | Kohei Yoshida <kyoshida@novell.com> | 2009-12-21 01:51:44 -0500 |
---|---|---|
committer | Kohei Yoshida <kyoshida@novell.com> | 2009-12-21 01:51:44 -0500 |
commit | 7fe4aea75e8d7174b710713ba53f8fa8744a0c3c (patch) | |
tree | 8fc9bd4ce50e38b04753688a61781ef4075e132a | |
parent | 6a834d696d2eefe684593c29be6cdc4d4ac48a1f (diff) |
More on converting the parsed xhp contents to docbook.
-rwxr-xr-x | ooo-help-parser.py | 9 | ||||
-rw-r--r-- | source/docbook.py | 32 | ||||
-rw-r--r-- | source/expatimpl.py | 8 | ||||
-rw-r--r-- | source/node.py | 46 |
4 files changed, 82 insertions, 13 deletions
diff --git a/ooo-help-parser.py b/ooo-help-parser.py index 7af6cce..5b0e785 100755 --- a/ooo-help-parser.py +++ b/ooo-help-parser.py @@ -36,14 +36,15 @@ def main (): fd = open(options.output, 'w') filesParsed = 0 - rootNodes = [] + rootNodes = {} for fpath in filepaths: file = open(fpath, 'r') strm = file.read() file.close() p = expatimpl.Parser(strm) p.parse() - rootNodes.append(p.root) + if p.filename != None: + rootNodes[p.filename] = p.root filesParsed += 1 if options.convert: @@ -51,8 +52,8 @@ def main (): converter.convert() converter.prettyPrint(fd) else: - for root in rootNodes: - node.prettyPrint(fd, root) + for filename in rootNodes.keys(): + node.prettyPrint(fd, rootNodes[filename]) globals.info("%d files have been processed"%filesParsed) if fd != sys.stdout: diff --git a/source/docbook.py b/source/docbook.py index 84f38c2..8363b9e 100644 --- a/source/docbook.py +++ b/source/docbook.py @@ -1,13 +1,41 @@ import globals, node +chapterNames = [ + '/text/swriter/main0000.xhp', + '/text/scalc/main0000.xhp', + '/text/sdraw/main0000.xhp', + '/text/simpress/main0000.xhp', + '/text/smath/main0000.xhp', + '/text/schart/main0000.xhp' +] + class DocBookConverter: def __init__ (self, xhproots): self.__xhproots = xhproots self.root = node.Root() def convert (self): - pass + book = self.root.appendElement('book') + bookinfo = book.appendElement('bookinfo') + title = bookinfo.appendElement('title') + title.appendContent("OpenOffice.org Help") + + for chapterName in chapterNames: + if not self.__xhproots.has_key(chapterName): + continue + + xhproot = self.__xhproots[chapterName] + # go to helpdocument/meta/topic/title to get the title text. + chapter = book.appendElement('chapter') + titleText = xhproot.firstChild().firstChildByName('meta').firstChildByName('topic').firstChildByName('title').getContent() + chapter.appendElement('title').appendContent(titleText) + + # convert all paragraphs. + xhpbody = xhproot.firstChild().firstChildByName('body') + for xhppara in xhpbody.getChildByName('paragraph'): + para = chapter.appendElement('para') + para.appendContent(xhppara.getContent()) def prettyPrint (self, fd): - pass + node.prettyPrint(fd, self.root) diff --git a/source/expatimpl.py b/source/expatimpl.py index acb823f..55a98d8 100644 --- a/source/expatimpl.py +++ b/source/expatimpl.py @@ -11,9 +11,10 @@ class Parser: self.strm = strm self.root = node.Root() self.nodestack = [self.root] + self.filename = None def startElement(self, name, attrs): - n = node.Node(name, attrs) + n = node.Element(name, attrs) self.nodestack[-1].appendChild(n) self.nodestack.append(n) @@ -31,7 +32,12 @@ class Parser: # move the unicode quote (e.g. u'foo' -> foo) if exists. s = s[2:-1] + s = s.replace('$[officename]', 'OpenOffice.org') + s = s.replace('%PRODUCTNAME', 'OpenOffice.org') self.nodestack[-1].appendChild(node.Content(s)) + if self.nodestack[-1].name == 'filename': + # For now, I just assume that the filename element is always at the correct position. + self.filename = s def parse (self): p = xml.parsers.expat.ParserCreate() diff --git a/source/node.py b/source/node.py index d956451..dcda870 100644 --- a/source/node.py +++ b/source/node.py @@ -2,7 +2,7 @@ class NodeType: Unknown = 0 Root = 1 - Node = 2 + Element = 2 Content = 3 class NodeBase: @@ -13,12 +13,32 @@ class NodeBase: def appendChild (self, node): self.children.append(node) - def setContent (self, content): - self.content = content + def appendElement (self, name): + node = Element(name) + self.appendChild(node) + return node + + def appendContent (self, text): + node = Content(text) + self.appendChild(node) + return node def firstChild (self): return self.children[0] + def firstChildByName (self, name): + for child in self.children: + if child.nodeType == NodeType.Element and child.name == name: + return child + return None + + def getChildByName (self, name): + children = [] + for child in self.children: + if child.nodeType == NodeType.Element and child.name == name: + children.append(child) + return children + class Root(NodeBase): def __init__ (self): NodeBase.__init__(self, NodeType.Root) @@ -28,12 +48,26 @@ class Content(NodeBase): NodeBase.__init__(self, NodeType.Content) self.content = content -class Node(NodeBase): +class Element(NodeBase): def __init__ (self, name, attrs={}): - NodeBase.__init__(self, NodeType.Node) + NodeBase.__init__(self, NodeType.Element) self.name = name self.attrs = attrs + def getContent (self): + text = '' + first = True + for child in self.children: + if first: + first = False + else: + text += ' ' + if child.nodeType == NodeType.Content: + text += child.content + elif child.nodeType == NodeType.Element: + text += child.getContent() + return text + encodeTable = { '>': 'gt', @@ -71,7 +105,7 @@ def printNode (fd, node, level): # root node itself only contains child nodes. for child in node.children: printNode(fd, child, level) - elif node.nodeType == NodeType.Node: + elif node.nodeType == NodeType.Element: hasChildren = len(node.children) > 0 # We add '<' and '>' (or '/>') after the element content gets |