summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorThorsten Behrens <tbehrens@novell.com>2010-05-26 10:45:10 +0200
committerThorsten Behrens <tbehrens@novell.com>2010-05-26 10:45:10 +0200
commite7557737a0706b83a373dca1d75918038339deb2 (patch)
tree1dcbbb1d6f64962314596c62ee4475e36310c748
parentc40781c051cf6f37a489f7aeda7294cec756e535 (diff)
Added README
-rw-r--r--README88
1 files changed, 88 insertions, 0 deletions
diff --git a/README b/README
new file mode 100644
index 0000000..73ec493
--- /dev/null
+++ b/README
@@ -0,0 +1,88 @@
+README
+======
+
+Much of this content I learned from
+http://www.ibm.com/developerworks/xml/library/x-matters17.html, and
+Dave Mertz is also the one who introduced me to PYX. The authoritative
+source, and inventor of this format, is Sean McGrath. His article is
+here: http://www.xml.com/pub/a/2000/03/15/feature/index.html. You may
+also want to browse a copy of "McGrath - XML Processing with Python,
+Prentice Hall, 2000".
+
+The idea behind PYX is to have xml files easily digestible by common
+unix textutils, like grep, sed, awk, diff etc. The format itself owes
+ideas to SGML's ESIS standard.
+
+This is how the format works: each line has a prefix character, that
+determines the content-type of the line. Start and end tags, and
+individual attributes each get their own line, to facilitate easy
+line-based processing. The prefix characters are:
+
+ ( start-tag
+ ) end-tag
+ A attribute
+ - character data (content)
+ ? processing instruction
+
+So this xml snippet
+
+<?xml version="1.0" encoding="utf-8"?>
+<office:document-content xmlns:office="urn:oasis:names:tc:opendocument:xmlns:office:1.0" xmlns:style="urn:oasis:names:tc:opendocument:xmlns:style:1.0" xmlns:text="urn:oasis:names:tc:opendocument:xmlns:text:1.0" xmlns:table="urn:oasis:names:tc:opendocument:xmlns:table:1.0" xmlns:draw="urn:oasis:names:tc:opendocument:xmlns:drawing:1.0" xmlns:fo="urn:oasis:names:tc:opendocument:xmlns:xsl-fo-compatible:1.0" xmlns:xlink="http://www.w3.org/1999/xlink" xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:meta="urn:oasis:names:tc:opendocument:xmlns:meta:1.0" xmlns:number="urn:oasis:names:tc:opendocument:xmlns:datastyle:1.0" xmlns:presentation="urn:oasis:names:tc:opendocument:xmlns:presentation:1.0" xmlns:svg="urn:oasis:names:tc:opendocument:xmlns:svg-compatible:1.0" xmlns:chart="urn:oasis:names:tc:opendocument:xmlns:chart:1.0" xmlns:dr3d="urn:oasis:names:tc:opendocument:xmlns:dr3d:1.0" xmlns:math="http://www.w3.org/1998/Math/MathML" xmlns:form="urn:oasis:names:tc:opendocument:xmlns:form:1.0" xmlns:script="urn:oasis:names:tc:opendocument:xmlns:script:1.0" xmlns:ooo="http://openoffice.org/2004/office" xmlns:ooow="http://openoffice.org/2004/writer" xmlns:oooc="http://openoffice.org/2004/calc" xmlns:dom="http://www.w3.org/2001/xml-events" xmlns:xforms="http://www.w3.org/2002/xforms" xmlns:xsd="http://www.w3.org/2001/XMLSchema" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xmlns:smil="urn:oasis:names:tc:opendocument:xmlns:smil-compatible:1.0" xmlns:anim="urn:oasis:names:tc:opendocument:xmlns:animation:1.0" xmlns:rpt="http://openoffice.org/2005/report" xmlns:of="urn:oasis:names:tc:opendocument:xmlns:of:1.2" xmlns:xhtml="http://www.w3.org/1999/xhtml" xmlns:grddl="http://www.w3.org/2003/g/data-view#" xmlns:officeooo="http://openoffice.org/2009/office" xmlns:formx="urn:openoffice:names:experimental:ooxml-odf-interop:xmlns:form:1.0" xmlns:field="urn:openoffice:names:experimental:ooo-ms-interop:xmlns:field:1.0" xmlns:tableooo="http://openoffice.org/2009/table" xmlns:css3t="http://www.w3.org/TR/css3-text/" office:version="1.2" grddl:transformation="http://docs.oasis-open.org/office/1.2/xslt/odf2rdf.xsl">
+ <office:body>
+ <office:presentation>
+ <draw:page>
+ blah
+ </draw:page>
+ </office:presentation>
+ </office:body>
+</office:document-content>
+
+looks like this in PYX:
+
+({urn:oasis:names:tc:opendocument:xmlns:office:1.0}document-content
+A{urn:oasis:names:tc:opendocument:xmlns:office:1.0}version 1.2
+A{http://www.w3.org/2003/g/data-view#}transformation http://docs.oasis-open.org/office/1.2/xslt/odf2rdf.xsl
+-\n
+-
+({urn:oasis:names:tc:opendocument:xmlns:office:1.0}body
+-\n
+-
+({urn:oasis:names:tc:opendocument:xmlns:office:1.0}presentation
+-\n
+-
+({urn:oasis:names:tc:opendocument:xmlns:drawing:1.0}page
+-\n
+- blah
+-\n
+-
+){urn:oasis:names:tc:opendocument:xmlns:drawing:1.0}page
+-\n
+-
+){urn:oasis:names:tc:opendocument:xmlns:office:1.0}presentation
+-\n
+-
+){urn:oasis:names:tc:opendocument:xmlns:office:1.0}body
+-\n
+){urn:oasis:names:tc:opendocument:xmlns:office:1.0}document-content
+
+Converting that back to xml via "pyx2xml -ns" yields this:
+
+<?xml version="1.0" encoding="UTF-8"?>
+<ns1:document-content xmlns:ns3="urn:oasis:names:tc:opendocument:xmlns:drawing:1.0" xmlns:ns1="urn:oasis:names:tc:opendocument:xmlns:office:1.0" xmlns:ns2="http://www.w3.org/2003/g/data-view#" ns1:version="1.2" ns2:transformation="http://docs.oasis-open.org/office/1.2/xslt/odf2rdf.xsl">
+ <ns1:body>
+ <ns1:presentation>
+ <ns3:page>
+ blah
+ </ns3:page>
+ </ns1:presentation>
+ </ns1:body>
+</ns1:document-content>
+
+You notice that the conversion is not lossless in terms of octets
+produced, but should reasonably faithfully conserve the xml info set.
+
+Feedback to <thb at openoffice dot org>
+
+Have fun hacking,
+
+Thorsten