summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorThorsten Behrens <tbehrens@novell.com>2010-05-26 09:25:18 +0200
committerThorsten Behrens <tbehrens@novell.com>2010-05-26 09:25:18 +0200
commitc40781c051cf6f37a489f7aeda7294cec756e535 (patch)
tree11b343ea5f075e799a8457939dd61185f7174090
Initial import of pyx tools
* xml2pyx.py: Convert from xml to PYX line-based format * pyx2xml.py: Convert back from PYX to xml PYX is a line-based representation for (much of) the xml info set, see http://www.ibm.com/developerworks/xml/library/x-matters17.html for an introduction
-rwxr-xr-xpyx2xml.py79
-rwxr-xr-xxml2pyx.py62
2 files changed, 141 insertions, 0 deletions
diff --git a/pyx2xml.py b/pyx2xml.py
new file mode 100755
index 0000000..3e478d7
--- /dev/null
+++ b/pyx2xml.py
@@ -0,0 +1,79 @@
+#!/usr/bin/env python
+#
+# Licensed under the Apache License, Version 2.0 (the "License"); you may
+# not use this file except in compliance with the License. You may obtain a
+# copy of the License at:
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+# License for the specific language governing permissions and limitations
+# under the License.
+#
+# Inspired by Sean McGrath's pyx2xml, and David Mertz' XML matters
+# Cobbled together by Thorsten Behrens <thb@openoffice.org>
+#
+import sys, os, re
+
+def grabNS (qname):
+ global num_ns, uris
+ delimit = qname.find('}')
+ uri = qname[1:delimit]
+ name = qname[delimit+1:]
+ if not uri in uris:
+ num_ns += 1
+ uris[uri] = 'ns'+str(num_ns)
+ return uris[uri] + ':' + name
+
+num_ns = 0
+get_attrs = 0
+uris = {}
+lines = []
+
+unescape = lambda s: s.replace(r'\t','\t').replace(r'\\','\\')
+ns_handling = len(sys.argv) > 1 and sys.argv[1] == '-ns'
+if ns_handling:
+ writeln = lambda s: lines.append(s)
+else:
+ writeln = lambda s: sys.stdout.write(s+'\n')
+
+writeln('<?xml version="1.0" encoding="UTF-8"?>')
+curr_line=""
+for line in sys.stdin:
+ if get_attrs and line[0] != 'A':
+ # attr section ends here
+ get_attrs = 0
+ curr_line += '>'
+ if line[0] == '?':
+ writeln(curr_line+'<?%s?>' % line[1:-1])
+ curr_line=""
+ elif line[0] == '(':
+ curr_line += '<%s' % grabNS(line[1:-1])
+ get_attrs = 1
+ elif line[0] == 'A':
+ name,val = line[1:].split(None, 1)
+ curr_line += ' %s="%s"' % (grabNS(name), unescape(val)[:-1])
+ elif line[:3] == r'-\n':
+ writeln(curr_line)
+ curr_line=""
+ elif line[0] == '-':
+ curr_line += unescape(line[1:-1])
+ elif line[0] == ')':
+ curr_line += '</%s>' % grabNS(line[1:-1])
+
+if len(curr_line):
+ writeln(curr_line)
+
+if ns_handling:
+ opening_tag=re.compile("(\\s*<\\s*)([^\\? \\t\\n\\r\\f\\v]+)")
+ ns_written=False
+ for line in lines:
+ if not ns_written and re.match(opening_tag,line):
+ line = re.split(opening_tag,line)
+ line.insert(3, ' ' + ' '.join(['xmlns:'+n+'="'+u+'"' for (u,n) in uris.items()]))
+ sys.stdout.write(''.join(line) + '\n')
+ ns_written = True
+ else:
+ sys.stdout.write(line+'\n')
diff --git a/xml2pyx.py b/xml2pyx.py
new file mode 100755
index 0000000..4b16bac
--- /dev/null
+++ b/xml2pyx.py
@@ -0,0 +1,62 @@
+#!/usr/bin/env python
+#
+# Licensed under the Apache License, Version 2.0 (the "License"); you may
+# not use this file except in compliance with the License. You may obtain a
+# copy of the License at:
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+# License for the specific language governing permissions and limitations
+# under the License.
+#
+# Inspired by Sean McGrath's html2pyx, and Peter A. Bigot's saxutils
+# Cobbled together by Thorsten Behrens <thb@openoffice.org>
+#
+import xml.sax
+import string
+import StringIO
+
+class DummyResolver:
+ """Dummy - ignore PIs we dont' care about."""
+ def resolveEntity (self, p, s):
+ return StringIO.StringIO('')
+
+class PyxConverter (xml.sax.handler.ContentHandler):
+ """SAX handler class that transforms xml into pyx."""
+
+ def setDocumentLocator (self, locator):
+ pass
+
+ def encode (self,s):
+ s = string.replace (s,"\\","\\\\")
+ s = string.replace (s,"\n","\\n")
+ s = string.replace (s,"\t","\\t")
+ return s
+
+ def startElementNS (self, name, qname, attrs):
+ print "({%s}%s" % name
+ for (n,v) in attrs.items():
+ print "A{%s}%s %s" % (n[0], n[1], (self.encode(v)))
+
+ def endElementNS (self, name, qname):
+ print "){%s}%s" % name
+
+ def characters (self, content):
+ print "-%s" % self.encode(content)
+
+ def processingInstruction (self, data):
+ print "?%s" % self.encode(data)
+
+if __name__ == "__main__":
+ import sys
+
+ parser = xml.sax.make_parser()
+ parser.setFeature(xml.sax.handler.feature_namespaces, True)
+ parser.setFeature(xml.sax.handler.feature_namespace_prefixes, False)
+ parser.setContentHandler(PyxConverter())
+ parser.setEntityResolver(DummyResolver())
+
+ parser.parse(open(sys.argv[1],"r"))