diff options
author | Kohei Yoshida <kyoshida@novell.com> | 2009-12-20 20:38:39 -0500 |
---|---|---|
committer | Kohei Yoshida <kyoshida@novell.com> | 2009-12-20 20:38:39 -0500 |
commit | 7cd58828d2f7819610bc4820959af1e47809a796 (patch) | |
tree | 39e8a6f4fa938fd90b2747f46fbfbe61c9f2163e | |
parent | 89a111ceb8f5808a8b66545dea4e8232471fc4ba (diff) |
More on encoding non-ascii and sgml symbols.
-rw-r--r-- | source/expatimpl.py | 36 |
1 files changed, 23 insertions, 13 deletions
diff --git a/source/expatimpl.py b/source/expatimpl.py index 15e40c5..5d10569 100644 --- a/source/expatimpl.py +++ b/source/expatimpl.py @@ -38,18 +38,21 @@ class Node(NodeBase): # ============================================================================ -class Meta: - def __init__ (self): - self.title = None - self.filename = None - -# ============================================================================ +encodeTable = { + '>': 'gt', + '<': 'lt', + '&': 'amp' +} -def encodeNonAscii (sin): +def encodeString (sin): sout = '' for c in sin: if ord(c) >= 128: + # encode non-ascii ranges. sout += "\\x%2.2x"%ord(c) + elif encodeTable.has_key(c): + # encode html symbols. + sout += '&' + encodeTable[c] + ';' else: sout += c @@ -57,6 +60,12 @@ def encodeNonAscii (sin): # ============================================================================ +nodesToSkip = { +# 'paragraph': 1, + 'comment': 1, + 'history': 1 +} + class Parser: class ParseFailed(globals.Exception): @@ -118,22 +127,23 @@ class Parser: keys.sort() for key in keys: line += " " + key + '="' + node.attrs[key] + '"' - if hasChildren: + if hasChildren and not nodesToSkip.has_key(node.name): line += ">\n" - line = encodeNonAscii(line) + line = encodeString(line) fd.write (indent + line) for child in node.children: self.printNode(fd, child, level+1) line = "</%s>\n"%node.name - line = encodeNonAscii(line) + line = encodeString(line) fd.write (indent + line) else: line += "/>\n" - line = encodeNonAscii(line) + line = encodeString(line) fd.write (indent + line) elif node.nodeType == NodeType.Content: - if len(node.content) > 0: - content = encodeNonAscii(node.content) + content = node.content.strip() + if len(content) > 0: + content = encodeString(content) fd.write (indent + content + "\n") |