diff options
author | Thorsten Behrens <tbehrens@novell.com> | 2010-06-05 01:58:23 +0200 |
---|---|---|
committer | Thorsten Behrens <tbehrens@novell.com> | 2010-06-05 01:58:23 +0200 |
commit | e93f074145914a1d3b30da0e9a574e8b08ddb769 (patch) | |
tree | c3e4f1ea368879372b8fc17b7e4371c68a569b46 | |
parent | 538372e2884c8eb2a2e19ac128e906ff6fba2cb6 (diff) |
Added demo iteration code, more cleanup, demo docx
-rw-r--r-- | examples/ooxml-strict/Lorem-ipsum.docx | bin | 0 -> 10395 bytes | |||
-rw-r--r-- | examples/ooxml-strict/README | 23 | ||||
-rw-r--r-- | examples/ooxml-strict/demo.py | 24 | ||||
-rw-r--r-- | examples/ooxml-strict/dumpsample.py | 49 | ||||
-rw-r--r-- | examples/ooxml-strict/opc.py | 2 | ||||
-rwxr-xr-x | examples/ooxml-strict/test.sh | 7 |
6 files changed, 79 insertions, 26 deletions
diff --git a/examples/ooxml-strict/Lorem-ipsum.docx b/examples/ooxml-strict/Lorem-ipsum.docx Binary files differnew file mode 100644 index 0000000..34d9b0d --- /dev/null +++ b/examples/ooxml-strict/Lorem-ipsum.docx diff --git a/examples/ooxml-strict/README b/examples/ooxml-strict/README new file mode 100644 index 0000000..f09bc9b --- /dev/null +++ b/examples/ooxml-strict/README @@ -0,0 +1,23 @@ +HOWTO run all of this: +====================== + +You've obviously cloned the git repo already. You'll now want to +generate the Python binding classes from the schema. cd into this here +directory, and issue: + +"./test.sh" + +That'll spew loads of python files into wml, sml, pml etc. subdirs +inside this directory. And afterwards run a demo iteration with the +Lorem-ipsum.docx file. Inspect results in your /tmp/ dir. + +How to write your custom iteration rules: look into demo.py, you'll +basically want to hack the 'worklist' list of regular expressions - +all schema types matched there will get iterated. See *.xsd files for +the corresponding XMl schema type names. + +More advanced stuff to follow. + +Thorsten + + diff --git a/examples/ooxml-strict/demo.py b/examples/ooxml-strict/demo.py new file mode 100644 index 0000000..7fcf4af --- /dev/null +++ b/examples/ooxml-strict/demo.py @@ -0,0 +1,24 @@ +# Copyright 2010, Thorsten Behrens, Novell Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain a +# copy of the License at: +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + +import re + +# use default list of xml mimetypes in opc +mimetypes = None + +# always iterate twice over a matched element +iterations = lambda i: 2 + +# always use the same regexp regardless of mimetype +worklist = lambda i: [re.compile('.*CT_R'), re.compile('.*rsidR.*')] diff --git a/examples/ooxml-strict/dumpsample.py b/examples/ooxml-strict/dumpsample.py index 36b5fc4..afec026 100644 --- a/examples/ooxml-strict/dumpsample.py +++ b/examples/ooxml-strict/dumpsample.py @@ -12,36 +12,39 @@ # License for the specific language governing permissions and limitations # under the License. -import re -import sys -import StringIO +import sys, os, StringIO import pyxb.binding.saxer import opc -import dml.dml -import pml.pml -import props.props -import sml.sml -import wml.wml +import dml.dml, pml.pml, props.props, sml.sml, wml.wml - -if len(sys.argv) > 1: - package = opc.OPCPackage(sys.argv[1]) - - for fragment in package.files(): +if len(sys.argv) < 3: + print "Usage: dumpsample.py <worklist.py> <input_file> <output_dir>" + sys.exit(1) +else: + exec "import "+sys.argv[1]+" as worklist" + inFile = sys.argv[2] + (inFileName,inFileExt) = os.path.splitext(os.path.basename(inFile)) + outDir = sys.argv[3] + + package = opc.OPCPackage(inFile) + iteration=1 + + for (fragment, mimetype, schema, reltype) in package.files(worklist.mimetypes): saxer = pyxb.binding.saxer.make_parser(location_base=fragment) handler = saxer.getContentHandler() saxer.parse(StringIO.StringIO(package.read(fragment))) sax_instance = handler.rootObject() - for i in sax_instance.iterateBinding([re.compile('.*CT_R'), re.compile('.*rsidR.*')]): - i() - print sax_instance.toxml() - print - i() - print sax_instance.toxml() - print - i() - print sax_instance.toxml() - print + for contentIter in sax_instance.iterateBinding(worklist.worklist(mimetype)): + # iterate content n times + for i in range(worklist.iterations(mimetype)): + contentIter() + currOutFile = outDir+"/"+inFileName+str(iteration)+inFileExt + if os.system("cp "+inFile+" "+currOutFile) == 0: + outPackage = opc.OPCPackage(currOutFile,"a") + outPackage.writestr(fragment,sax_instance.toxml()) + outPackage.close() + iteration += 1 + print "Written "+currOutFile diff --git a/examples/ooxml-strict/opc.py b/examples/ooxml-strict/opc.py index ba96ffa..53de1b0 100644 --- a/examples/ooxml-strict/opc.py +++ b/examples/ooxml-strict/opc.py @@ -313,4 +313,4 @@ class OPCPackage (zipfile.ZipFile): if not mimetype is None: for elem in mimetypes: if elem[0] == mimetype: - yield fragment + yield (fragment, mimetype, elem[1], elem[2]) diff --git a/examples/ooxml-strict/test.sh b/examples/ooxml-strict/test.sh index 52920df..d85dd76 100755 --- a/examples/ooxml-strict/test.sh +++ b/examples/ooxml-strict/test.sh @@ -1,6 +1,9 @@ -PYXB_ROOT=${PYXB_ROOT:-../..} +PYXB_ROOT=${PYXB_ROOT:-`pwd`/../..} export PYTHONPATH=${PYXB_ROOT} export PATH=${PYXB_ROOT}/scripts:${PATH} +echo "Generating binding classes" ; echo ; echo sh genbindings.sh -#python dumpsample.py + +echo ; echo ; echo "Running demo iteration" ; echo ; echo +python dumpsample.py demo Lorem-ipsum.docx /tmp |