summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorThorsten Behrens <tbehrens@novell.com>2010-06-05 01:58:23 +0200
committerThorsten Behrens <tbehrens@novell.com>2010-06-05 01:58:23 +0200
commite93f074145914a1d3b30da0e9a574e8b08ddb769 (patch)
treec3e4f1ea368879372b8fc17b7e4371c68a569b46
parent538372e2884c8eb2a2e19ac128e906ff6fba2cb6 (diff)
Added demo iteration code, more cleanup, demo docx
-rw-r--r--examples/ooxml-strict/Lorem-ipsum.docxbin0 -> 10395 bytes
-rw-r--r--examples/ooxml-strict/README23
-rw-r--r--examples/ooxml-strict/demo.py24
-rw-r--r--examples/ooxml-strict/dumpsample.py49
-rw-r--r--examples/ooxml-strict/opc.py2
-rwxr-xr-xexamples/ooxml-strict/test.sh7
6 files changed, 79 insertions, 26 deletions
diff --git a/examples/ooxml-strict/Lorem-ipsum.docx b/examples/ooxml-strict/Lorem-ipsum.docx
new file mode 100644
index 0000000..34d9b0d
--- /dev/null
+++ b/examples/ooxml-strict/Lorem-ipsum.docx
Binary files differ
diff --git a/examples/ooxml-strict/README b/examples/ooxml-strict/README
new file mode 100644
index 0000000..f09bc9b
--- /dev/null
+++ b/examples/ooxml-strict/README
@@ -0,0 +1,23 @@
+HOWTO run all of this:
+======================
+
+You've obviously cloned the git repo already. You'll now want to
+generate the Python binding classes from the schema. cd into this here
+directory, and issue:
+
+"./test.sh"
+
+That'll spew loads of python files into wml, sml, pml etc. subdirs
+inside this directory. And afterwards run a demo iteration with the
+Lorem-ipsum.docx file. Inspect results in your /tmp/ dir.
+
+How to write your custom iteration rules: look into demo.py, you'll
+basically want to hack the 'worklist' list of regular expressions -
+all schema types matched there will get iterated. See *.xsd files for
+the corresponding XMl schema type names.
+
+More advanced stuff to follow.
+
+Thorsten
+
+
diff --git a/examples/ooxml-strict/demo.py b/examples/ooxml-strict/demo.py
new file mode 100644
index 0000000..7fcf4af
--- /dev/null
+++ b/examples/ooxml-strict/demo.py
@@ -0,0 +1,24 @@
+# Copyright 2010, Thorsten Behrens, Novell Inc.
+#
+# Licensed under the Apache License, Version 2.0 (the "License"); you may
+# not use this file except in compliance with the License. You may obtain a
+# copy of the License at:
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+# License for the specific language governing permissions and limitations
+# under the License.
+
+import re
+
+# use default list of xml mimetypes in opc
+mimetypes = None
+
+# always iterate twice over a matched element
+iterations = lambda i: 2
+
+# always use the same regexp regardless of mimetype
+worklist = lambda i: [re.compile('.*CT_R'), re.compile('.*rsidR.*')]
diff --git a/examples/ooxml-strict/dumpsample.py b/examples/ooxml-strict/dumpsample.py
index 36b5fc4..afec026 100644
--- a/examples/ooxml-strict/dumpsample.py
+++ b/examples/ooxml-strict/dumpsample.py
@@ -12,36 +12,39 @@
# License for the specific language governing permissions and limitations
# under the License.
-import re
-import sys
-import StringIO
+import sys, os, StringIO
import pyxb.binding.saxer
import opc
-import dml.dml
-import pml.pml
-import props.props
-import sml.sml
-import wml.wml
+import dml.dml, pml.pml, props.props, sml.sml, wml.wml
-
-if len(sys.argv) > 1:
- package = opc.OPCPackage(sys.argv[1])
-
- for fragment in package.files():
+if len(sys.argv) < 3:
+ print "Usage: dumpsample.py <worklist.py> <input_file> <output_dir>"
+ sys.exit(1)
+else:
+ exec "import "+sys.argv[1]+" as worklist"
+ inFile = sys.argv[2]
+ (inFileName,inFileExt) = os.path.splitext(os.path.basename(inFile))
+ outDir = sys.argv[3]
+
+ package = opc.OPCPackage(inFile)
+ iteration=1
+
+ for (fragment, mimetype, schema, reltype) in package.files(worklist.mimetypes):
saxer = pyxb.binding.saxer.make_parser(location_base=fragment)
handler = saxer.getContentHandler()
saxer.parse(StringIO.StringIO(package.read(fragment)))
sax_instance = handler.rootObject()
- for i in sax_instance.iterateBinding([re.compile('.*CT_R'), re.compile('.*rsidR.*')]):
- i()
- print sax_instance.toxml()
- print
- i()
- print sax_instance.toxml()
- print
- i()
- print sax_instance.toxml()
- print
+ for contentIter in sax_instance.iterateBinding(worklist.worklist(mimetype)):
+ # iterate content n times
+ for i in range(worklist.iterations(mimetype)):
+ contentIter()
+ currOutFile = outDir+"/"+inFileName+str(iteration)+inFileExt
+ if os.system("cp "+inFile+" "+currOutFile) == 0:
+ outPackage = opc.OPCPackage(currOutFile,"a")
+ outPackage.writestr(fragment,sax_instance.toxml())
+ outPackage.close()
+ iteration += 1
+ print "Written "+currOutFile
diff --git a/examples/ooxml-strict/opc.py b/examples/ooxml-strict/opc.py
index ba96ffa..53de1b0 100644
--- a/examples/ooxml-strict/opc.py
+++ b/examples/ooxml-strict/opc.py
@@ -313,4 +313,4 @@ class OPCPackage (zipfile.ZipFile):
if not mimetype is None:
for elem in mimetypes:
if elem[0] == mimetype:
- yield fragment
+ yield (fragment, mimetype, elem[1], elem[2])
diff --git a/examples/ooxml-strict/test.sh b/examples/ooxml-strict/test.sh
index 52920df..d85dd76 100755
--- a/examples/ooxml-strict/test.sh
+++ b/examples/ooxml-strict/test.sh
@@ -1,6 +1,9 @@
-PYXB_ROOT=${PYXB_ROOT:-../..}
+PYXB_ROOT=${PYXB_ROOT:-`pwd`/../..}
export PYTHONPATH=${PYXB_ROOT}
export PATH=${PYXB_ROOT}/scripts:${PATH}
+echo "Generating binding classes" ; echo ; echo
sh genbindings.sh
-#python dumpsample.py
+
+echo ; echo ; echo "Running demo iteration" ; echo ; echo
+python dumpsample.py demo Lorem-ipsum.docx /tmp