summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorKohei Yoshida <kyoshida@novell.com>2009-12-20 20:38:39 -0500
committerKohei Yoshida <kyoshida@novell.com>2009-12-20 20:38:39 -0500
commit7cd58828d2f7819610bc4820959af1e47809a796 (patch)
tree39e8a6f4fa938fd90b2747f46fbfbe61c9f2163e
parent89a111ceb8f5808a8b66545dea4e8232471fc4ba (diff)
More on encoding non-ascii and sgml symbols.
-rw-r--r--source/expatimpl.py36
1 files changed, 23 insertions, 13 deletions
diff --git a/source/expatimpl.py b/source/expatimpl.py
index 15e40c5..5d10569 100644
--- a/source/expatimpl.py
+++ b/source/expatimpl.py
@@ -38,18 +38,21 @@ class Node(NodeBase):
# ============================================================================
-class Meta:
- def __init__ (self):
- self.title = None
- self.filename = None
-
-# ============================================================================
+encodeTable = {
+ '>': 'gt',
+ '<': 'lt',
+ '&': 'amp'
+}
-def encodeNonAscii (sin):
+def encodeString (sin):
sout = ''
for c in sin:
if ord(c) >= 128:
+ # encode non-ascii ranges.
sout += "\\x%2.2x"%ord(c)
+ elif encodeTable.has_key(c):
+ # encode html symbols.
+ sout += '&' + encodeTable[c] + ';'
else:
sout += c
@@ -57,6 +60,12 @@ def encodeNonAscii (sin):
# ============================================================================
+nodesToSkip = {
+# 'paragraph': 1,
+ 'comment': 1,
+ 'history': 1
+}
+
class Parser:
class ParseFailed(globals.Exception):
@@ -118,22 +127,23 @@ class Parser:
keys.sort()
for key in keys:
line += " " + key + '="' + node.attrs[key] + '"'
- if hasChildren:
+ if hasChildren and not nodesToSkip.has_key(node.name):
line += ">\n"
- line = encodeNonAscii(line)
+ line = encodeString(line)
fd.write (indent + line)
for child in node.children:
self.printNode(fd, child, level+1)
line = "</%s>\n"%node.name
- line = encodeNonAscii(line)
+ line = encodeString(line)
fd.write (indent + line)
else:
line += "/>\n"
- line = encodeNonAscii(line)
+ line = encodeString(line)
fd.write (indent + line)
elif node.nodeType == NodeType.Content:
- if len(node.content) > 0:
- content = encodeNonAscii(node.content)
+ content = node.content.strip()
+ if len(content) > 0:
+ content = encodeString(content)
fd.write (indent + content + "\n")