diff options
author | Jean-Francois Dockes <jf@dockes.org> | 2018-03-08 17:50:55 +0100 |
---|---|---|
committer | Miklos Vajna <vmiklos@collabora.co.uk> | 2018-03-19 17:45:47 +0100 |
commit | 3401d913449a4b031094f7ad2420994d99cd1d7a (patch) | |
tree | 57fcb848ac942615ce3481df956b499f053f0a1c | |
parent | c99d5a22f9dfebd595308d9dc5f20112674a3b8c (diff) |
Port to Python3.
Compatible with Python 2.7, with an added dependancy on the 'future' module.
Main modifications:
- Change the 'import' statements to package-relative
- dic.has_key()->key in dic
- xrange() -> future.builtins.range() (py2) / range (py3)
- Convert print statements to method calls
- Fix exception statements: except Error, err -> except Error as err
- StringIO.StringIO -> io.BytesIO
- Change explicit unicode() constructors to somebytes.decode(), and other
uses of the unicode type.
- Fix indexing into bytes (bytes[i] -> byte string of length 1 in py2, int in
py3), ord(), chr() calls.
- Fix output functions to generally accept both bytes() and str() because
both types are still used by the parser outputs. Make sure they work the
same when piped.
- Fix comparisons between bytes and strings (dirname == "Workbook" ->
dirname == b"Workbook")
- Use explicit integer division in many places / -> //
- Deal with long ints being gone (0L is a syntax error)
Change-Id: Ife0b6f9fa8ab4c95ba203013b894a67c85c8e0ad
Reviewed-on: https://gerrit.libreoffice.org/50967
Reviewed-by: Miklos Vajna <vmiklos@collabora.co.uk>
Tested-by: Miklos Vajna <vmiklos@collabora.co.uk>
-rwxr-xr-x | doc-dump.py | 15 | ||||
-rwxr-xr-x | emf-dump.py | 3 | ||||
-rw-r--r-- | msodumper/binarystream.py | 17 | ||||
-rw-r--r-- | msodumper/docrecord.py | 920 | ||||
-rw-r--r-- | msodumper/docstream.py | 134 | ||||
-rw-r--r-- | msodumper/emfrecord.py | 79 | ||||
-rw-r--r-- | msodumper/formula.py | 8 | ||||
-rw-r--r-- | msodumper/globals.py | 164 | ||||
-rw-r--r-- | msodumper/msocrypto.py | 2 | ||||
-rw-r--r-- | msodumper/msodraw.py | 62 | ||||
-rw-r--r-- | msodumper/msometa.py | 64 | ||||
-rw-r--r-- | msodumper/node.py | 62 | ||||
-rw-r--r-- | msodumper/ole.py | 69 | ||||
-rw-r--r-- | msodumper/olestream.py | 12 | ||||
-rwxr-xr-x | msodumper/oletool.py | 2 | ||||
-rw-r--r-- | msodumper/pptrecord.py | 30 | ||||
-rw-r--r-- | msodumper/pptstream.py | 12 | ||||
-rw-r--r-- | msodumper/vbahelper.py | 14 | ||||
-rw-r--r-- | msodumper/vsdstream.py | 8 | ||||
-rw-r--r-- | msodumper/wmfrecord.py | 22 | ||||
-rw-r--r-- | msodumper/xlsmodel.py | 24 | ||||
-rw-r--r-- | msodumper/xlsparser.py | 6 | ||||
-rw-r--r-- | msodumper/xlsrecord.py | 135 | ||||
-rw-r--r-- | msodumper/xlsstream.py | 50 | ||||
-rwxr-xr-x | msodumper/xmlpp.py | 9 | ||||
-rwxr-xr-x | ppt-dump.py | 15 | ||||
-rwxr-xr-x | test/doc/test.py | 1 | ||||
-rwxr-xr-x | test/vsd-test.py | 33 | ||||
-rwxr-xr-x | vbadump.py | 12 | ||||
-rwxr-xr-x | vsd-dump.py | 7 | ||||
-rwxr-xr-x | xls-dump.py | 33 |
31 files changed, 1087 insertions, 937 deletions
diff --git a/doc-dump.py b/doc-dump.py index ca98cc5..164ea1c 100755 --- a/doc-dump.py +++ b/doc-dump.py @@ -7,8 +7,10 @@ from msodumper import globals, docstream import sys -sys = reload(sys) -sys.setdefaultencoding("utf-8") + +if not globals.PY3: + sys = reload(sys) + sys.setdefaultencoding("utf-8") class DOCDumper: @@ -21,14 +23,14 @@ class DOCDumper: strm = docstream.createDOCFile(file.read(), self.params) file.close() dirnames = strm.getDirectoryNames() - print '<?xml version="1.0"?>\n<streams ole-type="%s">' % strm.getName() + print('<?xml version="1.0"?>\n<streams ole-type="%s">' % strm.getName()) if strm.error: - print '<error what="%s"/>' % strm.error + print('<error what="%s"/>' % strm.error) for dirname in dirnames: - if len(dirname) == 0 or dirname in ['Root Entry']: + if len(dirname) == 0 or dirname in [b'Root Entry']: continue strm.getDirectoryStreamByName(dirname).dump() - print '</streams>' + print('</streams>') def main(args): @@ -36,6 +38,7 @@ def main(args): dumper = DOCDumper(args[1], params) dumper.dump() + if __name__ == '__main__': main(sys.argv) diff --git a/emf-dump.py b/emf-dump.py index 2b12b06..c40f101 100755 --- a/emf-dump.py +++ b/emf-dump.py @@ -19,7 +19,7 @@ class EMFDumper: file = open(self.filepath, 'rb') strm = emfrecord.EMFStream(file.read()) file.close() - print '<?xml version="1.0"?>' + print('<?xml version="1.0"?>') strm.dump() @@ -27,6 +27,7 @@ def main(args): dumper = EMFDumper(args[1]) dumper.dump() + if __name__ == '__main__': main(sys.argv) diff --git a/msodumper/binarystream.py b/msodumper/binarystream.py index 15fcc5d..9ab829d 100644 --- a/msodumper/binarystream.py +++ b/msodumper/binarystream.py @@ -5,7 +5,7 @@ # file, You can obtain one at http://mozilla.org/MPL/2.0/. # -import globals +from . import globals import struct from xml.sax.saxutils import quoteattr @@ -37,12 +37,17 @@ class BinaryStream: if offset: attrs += ' offset="%s"' % hex(self.pos) if end: - print '<%s value="%s"%s/>' % (key, value, attrs) + print('<%s value="%s"%s/>' % (key, value, attrs)) else: - print '<%s value="%s"%s>' % (key, value, attrs) + print('<%s value="%s"%s>' % (key, value, attrs)) def quoteAttr(self, value): """Wrapper around xml.sax.saxutils.quoteattr, assumes the caller will put " around the result.""" + + if globals.PY3: + if isinstance(value, bytes): + # can't have bytes here, crashes later in saxutils + value = value.decode('cp1252') ret = quoteattr("'" + value + "'") return ret[2:len(ret) - 2] @@ -83,7 +88,7 @@ class BinaryStream: return ret def getuInt24(self): - return struct.unpack("<I", self.bytes[self.pos:self.pos + 3] + "\x00")[0] + return struct.unpack("<I", self.bytes[self.pos:self.pos + 3] + b"\x00")[0] def getuInt32(self, bytes=None, pos=None): if not bytes: @@ -164,7 +169,7 @@ class BinaryStream: return (byte & (1 << bitNumber)) >> bitNumber def dump(self): - print '<stream name="%s" size="%s"/>' % (self.quoteAttr(globals.encodeName(self.name)), self.size) + print('<stream name="%s" size="%s"/>' % (self.quoteAttr(globals.encodeName(self.name)), self.size)) # compat methods to make msodraw happy def readUnsignedInt(self, size): @@ -190,6 +195,6 @@ class BinaryStream: self.pos += byteCount def appendLine(self, line): - print line + print("%s" % line) # vim:set filetype=python shiftwidth=4 softtabstop=4 expandtab: diff --git a/msodumper/docrecord.py b/msodumper/docrecord.py index d3ba5c1..37ad734 100644 --- a/msodumper/docrecord.py +++ b/msodumper/docrecord.py @@ -4,12 +4,12 @@ # License, v. 2.0. If a copy of the MPL was not distributed with this # file, You can obtain one at http://mozilla.org/MPL/2.0/. # - +from builtins import range import locale -import globals -from binarystream import BinaryStream -import docsprm -import msodraw +from . import globals +from .binarystream import BinaryStream +from . import docsprm +from . import msodraw def getWordModel(mainStream): @@ -31,11 +31,11 @@ class FcCompressed(BinaryStream): self.r1 = self.getBit(buf, 31) def dump(self): - print '<fcCompressed type="FcCompressed" offset="%d" size="%d bytes">' % (self.pos, self.size) + print('<fcCompressed type="FcCompressed" offset="%d" size="%d bytes">' % (self.pos, self.size)) self.printAndSet("fc", self.fc) self.printAndSet("fCompressed", self.fCompressed) self.printAndSet("r1", self.r1) - print '</fcCompressed>' + print('</fcCompressed>') class Pcd(BinaryStream): @@ -54,13 +54,13 @@ class Pcd(BinaryStream): self.pos += 4 def dump(self): - print '<pcd type="Pcd" offset="%d" size="%d bytes">' % (self.pos, self.size) + print('<pcd type="Pcd" offset="%d" size="%d bytes">' % (self.pos, self.size)) self.printAndSet("fNoParaLast", self.fNoParaLast) self.printAndSet("fR1", self.fR1) self.printAndSet("fDirty", self.fDirty) self.printAndSet("fR2", self.fR2) self.fc.dump() - print '</pcd>' + print('</pcd>') class PLC: @@ -70,7 +70,7 @@ class PLC: self.structSize = structSize def getElements(self): - return (self.totalSize - 4) / (4 + self.structSize) # defined by 2.2.2 + return (self.totalSize - 4) // (4 + self.structSize) # defined by 2.2.2 def getOffset(self, pos, i): return self.getPLCOffset(pos, self.getElements(), self.structSize, i) @@ -86,13 +86,13 @@ class BKC(BinaryStream): self.bkc = bkc def dump(self): - print '<bkc type="BKC">' + print('<bkc type="BKC">') self.printAndSet("itcFirst", self.bkc & 0x007f) # 1..7th bits self.printAndSet("fPub", self.getBit(self.bkc, 8)) self.printAndSet("itcLim", (self.bkc & 0x3f00) >> 8) # 9..14th bits self.printAndSet("fNative", self.getBit(self.bkc, 15)) self.printAndSet("fCol", self.getBit(self.bkc, 16)) - print '</bkc>' + print('</bkc>') class FBKF(BinaryStream): @@ -102,10 +102,10 @@ class FBKF(BinaryStream): self.pos = offset def dump(self): - print '<aFBKF type="FBKF" offset="%d">' % self.pos + print('<aFBKF type="FBKF" offset="%d">' % self.pos) self.printAndSet("ibkl", self.readuInt16()) BKC(self.readuInt16()).dump() - print '</aFBKF>' + print('</aFBKF>') class PlcfBkf(BinaryStream, PLC): @@ -119,21 +119,21 @@ class PlcfBkf(BinaryStream, PLC): self.aFBKF = [] def dump(self): - print '<plcfBkf type="PlcfBkf" offset="%d" size="%d bytes">' % (self.pos, self.size) + print('<plcfBkf type="PlcfBkf" offset="%d" size="%d bytes">' % (self.pos, self.size)) pos = self.pos for i in range(self.getElements()): # aCp start = self.getuInt32(pos=pos) self.aCP.append(start) - print '<aCP index="%d" bookmarkStart="%d">' % (i, start) + print('<aCP index="%d" bookmarkStart="%d">' % (i, start)) pos += 4 # aFBKF aFBKF = FBKF(self, self.getOffset(self.pos, i)) aFBKF.dump() self.aFBKF.append(aFBKF) - print '</aCP>' - print '</plcfBkf>' + print('</aCP>') + print('</plcfBkf>') class FBKFD(BinaryStream): @@ -143,11 +143,11 @@ class FBKFD(BinaryStream): self.pos = offset def dump(self): - print '<aFBKFD type="FBKFD" offset="%d">' % self.pos + print('<aFBKFD type="FBKFD" offset="%d">' % self.pos) FBKF(self, self.pos).dump() self.pos += 4 self.printAndSet("cDepth", self.readInt16()) - print '</aFBKFD>' + print('</aFBKFD>') class PlcfBkfd(BinaryStream, PLC): @@ -161,21 +161,21 @@ class PlcfBkfd(BinaryStream, PLC): self.aFBKFD = [] def dump(self): - print '<plcfBkfd type="PlcfBkfd" offset="%d" size="%d bytes">' % (self.pos, self.size) + print('<plcfBkfd type="PlcfBkfd" offset="%d" size="%d bytes">' % (self.pos, self.size)) pos = self.pos for i in range(self.getElements()): # aCp start = self.getuInt32(pos=pos) self.aCP.append(start) - print '<aCP index="%d" bookmarkStart="%d">' % (i, start) + print('<aCP index="%d" bookmarkStart="%d">' % (i, start)) pos += 4 # aFBKFD aFBKFD = FBKFD(self, self.getOffset(self.pos, i)) aFBKFD.dump() self.aFBKFD.append(aFBKFD) - print '</aCP>' - print '</plcfBkfd>' + print('</aCP>') + print('</plcfBkfd>') class FBKLD(BinaryStream): @@ -185,10 +185,10 @@ class FBKLD(BinaryStream): self.pos = offset def dump(self): - print '<aFBKLD type="FBKLD" offset="%d">' % self.pos + print('<aFBKLD type="FBKLD" offset="%d">' % self.pos) self.printAndSet("ibkf", self.readuInt16()) self.printAndSet("cDepth", self.readuInt16()) - print '</aFBKLD>' + print('</aFBKLD>') class PlcfBkld(BinaryStream, PLC): @@ -202,21 +202,21 @@ class PlcfBkld(BinaryStream, PLC): self.aFBKLD = [] def dump(self): - print '<plcfBkld type="PlcfBkld" offset="%d" size="%d bytes">' % (self.pos, self.size) + print('<plcfBkld type="PlcfBkld" offset="%d" size="%d bytes">' % (self.pos, self.size)) pos = self.pos for i in range(self.getElements()): # aCp start = self.getuInt32(pos=pos) self.aCP.append(start) - print '<aCP index="%d" bookmarkEnd="%d">' % (i, start) + print('<aCP index="%d" bookmarkEnd="%d">' % (i, start)) pos += 4 # aFBKLD aFBKLD = FBKLD(self, self.getOffset(self.pos, i)) aFBKLD.dump() self.aFBKLD.append(aFBKLD) - print '</aCP>' - print '</plcfBkld>' + print('</aCP>') + print('</plcfBkld>') class FactoidSpls(BinaryStream): @@ -227,9 +227,9 @@ class FactoidSpls(BinaryStream): self.pos = offset def dump(self): - print '<factoidSpls type="FactoidSpls" offset="%d">' % self.pos + print('<factoidSpls type="FactoidSpls" offset="%d">' % self.pos) SPLS("spls", self, self.pos).dump() - print '</factoidSpls>' + print('</factoidSpls>') class Plcffactoid(BinaryStream, PLC): @@ -243,13 +243,13 @@ class Plcffactoid(BinaryStream, PLC): self.aFactoidSpls = [] def dump(self): - print '<plcffactoid type="Plcffactoid" offset="%d" size="%d bytes">' % (self.pos, self.size) + print('<plcffactoid type="Plcffactoid" offset="%d" size="%d bytes">' % (self.pos, self.size)) pos = self.pos for i in range(self.getElements() + 1): # aCp aCp = self.getuInt32(pos=pos) self.aCPs.append(aCp) - print '<aCP index="%d" value="%d">' % (i, aCp) + print('<aCP index="%d" value="%d">' % (i, aCp)) pos += 4 if i < self.getElements(): @@ -257,8 +257,8 @@ class Plcffactoid(BinaryStream, PLC): aFactoidSpls = FactoidSpls(self, self.getOffset(self.pos, i)) aFactoidSpls.dump() self.aFactoidSpls.append(aFactoidSpls) - print '</aCP>' - print '</plcffactoid>' + print('</aCP>') + print('</plcffactoid>') class Fldch(BinaryStream): @@ -269,11 +269,11 @@ class Fldch(BinaryStream): self.parent = parent def dump(self): - print '<fldch type="fldch" offset="%d" size="1 byte">' % self.pos + print('<fldch type="fldch" offset="%d" size="1 byte">' % self.pos) buf = self.readuInt8() self.printAndSet("ch", buf & 0x1f) # 1..5th bits self.printAndSet("reserved", (buf & 0xe0) >> 5) # 6..8th bits - print '</fldch>' + print('</fldch>') self.parent.pos = self.pos @@ -284,11 +284,11 @@ class Fld(BinaryStream): self.pos = offset def dump(self): - print '<fld type="FLD" offset="%d" size="2 bytes">' % self.pos + print('<fld type="FLD" offset="%d" size="2 bytes">' % self.pos) self.fldch = Fldch(self) self.fldch.dump() self.printAndSet("grffld", self.readuInt8()) # TODO parse flt and grffldEnd - print '</fld>' + print('</fld>') class PlcFld(BinaryStream, PLC): @@ -300,13 +300,13 @@ class PlcFld(BinaryStream, PLC): self.size = mainStream.lcbPlcfFldMom def dump(self): - print '<plcFld type="PlcFld" offset="%d" size="%d bytes">' % (self.pos, self.size) + print('<plcFld type="PlcFld" offset="%d" size="%d bytes">' % (self.pos, self.size)) pos = self.pos aFlds = [] for i in range(self.getElements()): # aCp value = self.getuInt32(pos=pos) - print '<aCP index="%d" value="%d">' % (i, value) + print('<aCP index="%d" value="%d">' % (i, value)) pos += 4 # aFld @@ -315,13 +315,13 @@ class PlcFld(BinaryStream, PLC): # This is a separator and the previous was a start: display the field instructions. if aFld.fldch.ch == 0x14 and aFlds[-1][1].fldch.ch == 0x13: - print '<transformed value="%s"/>' % self.quoteAttr(self.mainStream.retrieveCPs(aFlds[-1][0] + 1, value)) + print('<transformed value="%s"/>' % self.quoteAttr(self.mainStream.retrieveCPs(aFlds[-1][0] + 1, value))) # This is an end and the previous was a separator: display the field result. elif aFld.fldch.ch == 0x15 and aFlds[-1][1].fldch.ch == 0x14: - print '<transformed value="%s"/>' % self.quoteAttr(self.mainStream.retrieveCPs(aFlds[-1][0] + 1, value)) + print('<transformed value="%s"/>' % self.quoteAttr(self.mainStream.retrieveCPs(aFlds[-1][0] + 1, value))) aFlds.append((value, aFld)) - print '</aCP>' - print '</plcFld>' + print('</aCP>') + print('</plcFld>') class PlcfBkl(BinaryStream, PLC): @@ -334,17 +334,17 @@ class PlcfBkl(BinaryStream, PLC): self.start = start def dump(self): - print '<plcfBkl type="PlcfBkl" offset="%d" size="%d bytes">' % (self.pos, self.size) + print('<plcfBkl type="PlcfBkl" offset="%d" size="%d bytes">' % (self.pos, self.size)) pos = self.pos for i in range(self.getElements()): # aCp end = self.getuInt32(pos=pos) - print '<aCP index="%d" bookmarkEnd="%d">' % (i, end) + print('<aCP index="%d" bookmarkEnd="%d">' % (i, end)) start = self.start.aCP[self.start.aFBKF[i].ibkl] - print '<transformed value="%s"/>' % self.quoteAttr(self.mainStream.retrieveCPs(start, end)) + print('<transformed value="%s"/>' % self.quoteAttr(self.mainStream.retrieveCPs(start, end))) pos += 4 - print '</aCP>' - print '</plcfBkl>' + print('</aCP>') + print('</plcfBkl>') class PlcPcd(BinaryStream, PLC): @@ -372,14 +372,14 @@ class PlcPcd(BinaryStream, PLC): self.aPcd.append(aPcd) def dump(self): - print '<plcPcd type="PlcPcd" offset="%d" size="%d bytes">' % (self.pos, self.size) + print('<plcPcd type="PlcPcd" offset="%d" size="%d bytes">' % (self.pos, self.size)) for i in range(self.getElements()): start, end = self.ranges[i] - print '<aCP index="%d" start="%d" end="%d">' % (i, start, end) + print('<aCP index="%d" start="%d" end="%d">' % (i, start, end)) self.aPcd[i].dump() - print '<transformed value="%s"/>' % self.quoteAttr(self.mainStream.retrieveCPs(start, end)) - print '</aCP>' - print '</plcPcd>' + print('<transformed value="%s"/>' % self.quoteAttr(self.mainStream.retrieveCPs(start, end))) + print('</aCP>') + print('</plcPcd>') class Sepx(BinaryStream): @@ -389,14 +389,14 @@ class Sepx(BinaryStream): self.pos = sed.fcSepx def dump(self): - print '<sepx type="Sepx" offset="%d">' % self.pos + print('<sepx type="Sepx" offset="%d">' % self.pos) self.printAndSet("cb", self.readInt16()) pos = self.pos while (self.cb - (pos - self.pos)) > 0: prl = Prl(self, pos) prl.dump() pos += prl.getSize() - print '</sepx>' + print('</sepx>') class Sed(BinaryStream): @@ -409,14 +409,14 @@ class Sed(BinaryStream): self.plcfSed = plcfSed def dump(self): - print '<aSed type="Sed" offset="%d" size="%d bytes">' % (self.pos, Sed.size) + print('<aSed type="Sed" offset="%d" size="%d bytes">' % (self.pos, Sed.size)) self.printAndSet("fn", self.readuInt16()) self.printAndSet("fcSepx", self.readuInt32()) if self.fcSepx != 0xffffffff: Sepx(self).dump() self.printAndSet("fnMpr", self.readuInt16()) self.printAndSet("fcMpr", self.readuInt32()) - print '</aSed>' + print('</aSed>') class PlcfSed(BinaryStream, PLC): @@ -428,22 +428,22 @@ class PlcfSed(BinaryStream, PLC): self.size = size def dump(self): - print '<plcfSed type="PlcfSed" offset="%d" size="%d bytes">' % (self.pos, self.size) + print('<plcfSed type="PlcfSed" offset="%d" size="%d bytes">' % (self.pos, self.size)) pos = self.pos for i in range(self.getElements()): # aCp start = self.getuInt32(pos=pos) end = self.getuInt32(pos=pos + 4) - print '<aCP index="%d" start="%d" end="%d">' % (i, start, end) + print('<aCP index="%d" start="%d" end="%d">' % (i, start, end)) pos += 4 # aSed aSed = Sed(self, self.getOffset(self.pos, i)) aSed.dump() - print '<transformed value="%s"/>' % self.quoteAttr(self.mainStream.retrieveCPs(start, end)) - print '</aCP>' - print '</plcfSed>' + print('<transformed value="%s"/>' % self.quoteAttr(self.mainStream.retrieveCPs(start, end))) + print('</aCP>') + print('</plcfSed>') class Tcg(BinaryStream): @@ -454,12 +454,12 @@ class Tcg(BinaryStream): self.size = size def dump(self): - print '<tcg type="Tcg" offset="%d" size="%d bytes">' % (self.pos, self.size) + print('<tcg type="Tcg" offset="%d" size="%d bytes">' % (self.pos, self.size)) self.printAndSet("nTcgVer", self.readuInt8()) self.printAndSet("chTerminator", self.readuInt8()) if self.chTerminator != 0x40: - print '<todo what="Tcg: chTerminator != 0x40"/>' - print '</tcg>' + print('<todo what="Tcg: chTerminator != 0x40"/>') + print('</tcg>') class Sty(BinaryStream): @@ -484,7 +484,7 @@ class Sty(BinaryStream): 0x000F: "styWholeTable", 0x001B: "styPrefix", } - print '<sty name="%s" value="%s"/>' % (styMap[value], hex(value)) + print('<sty name="%s" value="%s"/>' % (styMap[value], hex(value))) self.parent.pos = self.pos @@ -498,7 +498,7 @@ class Selsf(BinaryStream): self.mainStream = mainStream def dump(self): - print '<selsf type="Selsf" offset="%d" size="%d bytes">' % (self.pos, self.size) + print('<selsf type="Selsf" offset="%d" size="%d bytes">' % (self.pos, self.size)) buf = self.readuInt16() self.printAndSet("fRightward", self.getBit(buf, 0)) @@ -534,7 +534,7 @@ class Selsf(BinaryStream): self.printAndSet("xaTableLeft", self.readInt16()) self.printAndSet("xaTableRight", self.readInt16()) assert self.pos == self.mainStream.fcWss + Selsf.size - print '</selsf>' + print('</selsf>') class COLORREF(BinaryStream): @@ -549,12 +549,12 @@ class COLORREF(BinaryStream): parent.pos = self.pos def dump(self, name): - print '<%s type="COLORREF">' % name + print('<%s type="COLORREF">' % name) self.printAndSet("red", self.red) self.printAndSet("green", self.green) self.printAndSet("blue", self.blue) self.printAndSet("fAuto", self.fAuto) - print '</%s>' % name + print('</%s>' % name) class BRC(BinaryStream): @@ -575,7 +575,7 @@ class BRC(BinaryStream): self.fReserved = (buf & 0xff80) >> 7 # 8..16th bits def dump(self): - print '<%s type="BRC" offset="%d">' % (self.name, self.posOrig) + print('<%s type="BRC" offset="%d">' % (self.name, self.posOrig)) self.cv.dump("cv") self.printAndSet("dptLineWidth", self.dptLineWidth) self.printAndSet("brcType", self.brcType, dict=BrcType) @@ -583,7 +583,7 @@ class BRC(BinaryStream): self.printAndSet("fShadow", self.fShadow) self.printAndSet("fFrame", self.fFrame) self.printAndSet("fReserved", self.fReserved) - print '</%s>' % self.name + print('</%s>' % self.name) self.parent.pos = self.pos @@ -595,11 +595,11 @@ class PChgTabsDel(BinaryStream): self.pos = parent.pos def dump(self): - print '<pchgTabsDel type="PChgTabsDel" offset="%d">' % self.pos + print('<pchgTabsDel type="PChgTabsDel" offset="%d">' % self.pos) self.printAndSet("cTabs", self.readuInt8()) for i in range(self.cTabs): - print '<rgdxaDel index="%d" value="%d"/>' % (i, self.readInt16()) - print '</pchgTabsDel>' + print('<rgdxaDel index="%d" value="%d"/>' % (i, self.readInt16())) + print('</pchgTabsDel>') self.parent.pos = self.pos @@ -611,13 +611,13 @@ class PChgTabsDelClose(BinaryStream): self.pos = parent.pos def dump(self): - print '<pchgTabsDelClose type="PChgTabsDelClose" offset="%d">' % self.pos + print('<pchgTabsDelClose type="PChgTabsDelClose" offset="%d">' % self.pos) self.printAndSet("cTabs", self.readuInt8()) for i in range(self.cTabs): - print '<rgdxaDel index="%d" value="%d"/>' % (i, self.readInt16()) + print('<rgdxaDel index="%d" value="%d"/>' % (i, self.readInt16())) for i in range(self.cTabs): - print '<rgdxaClose index="%d" value="%d"/>' % (i, self.readInt16()) - print '</pchgTabsDelClose>' + print('<rgdxaClose index="%d" value="%d"/>' % (i, self.readInt16())) + print('</pchgTabsDelClose>') self.parent.pos = self.pos @@ -629,13 +629,13 @@ class PChgTabsAdd(BinaryStream): self.pos = parent.pos def dump(self): - print '<pchgTabsAdd type="PChgTabsAdd" offset="%d">' % self.pos + print('<pchgTabsAdd type="PChgTabsAdd" offset="%d">' % self.pos) self.printAndSet("cTabs", self.readuInt8()) for i in range(self.cTabs): - print '<rgdxaAdd index="%d" value="%d"/>' % (i, self.readInt16()) + print('<rgdxaAdd index="%d" value="%d"/>' % (i, self.readInt16())) for i in range(self.cTabs): - print '<rgtbdAdd index="%d" value="%d"/>' % (i, self.readuInt8()) - print '</pchgTabsAdd>' + print('<rgtbdAdd index="%d" value="%d"/>' % (i, self.readuInt8())) + print('</pchgTabsAdd>') self.parent.pos = self.pos @@ -646,10 +646,10 @@ class LSPD(BinaryStream): self.pos = parent.pos def dump(self): - print '<lspd type="LSPD" offset="%d">' % self.pos + print('<lspd type="LSPD" offset="%d">' % self.pos) self.printAndSet("dyaLine", self.readuInt16()) self.printAndSet("fMultLinespace", self.readuInt16()) - print '</lspd>' + print('</lspd>') class PChgTabsPapxOperand(BinaryStream): @@ -659,11 +659,11 @@ class PChgTabsPapxOperand(BinaryStream): self.pos = parent.pos def dump(self): - print '<pchgTabsPapxOperand type="PChgTabsPapxOperand" offset="%d">' % self.pos + print('<pchgTabsPapxOperand type="PChgTabsPapxOperand" offset="%d">' % self.pos) self.printAndSet("cb", self.readuInt8()) PChgTabsDel(self).dump() PChgTabsAdd(self).dump() - print '</pchgTabsPapxOperand>' + print('</pchgTabsPapxOperand>') class PChgTabsOperand(BinaryStream): @@ -675,11 +675,12 @@ class PChgTabsOperand(BinaryStream): self.pos = parent.pos def dump(self): - print '<pchgTabsOperand type="PChgTabsOperand" offset="%d">' % self.pos + print('<pchgTabsOperand type="PChgTabsOperand" offset="%d">' % self.pos) self.printAndSet("cb", self.readuInt8()) PChgTabsDelClose(self).dump() PChgTabsAdd(self).dump() - print '</pchgTabsOperand>' + print('</pchgTabsOperand>') + # The Ico structure specifies an entry in the color palette that is listed in the following table. Ico = { @@ -772,12 +773,12 @@ class Shd80(BinaryStream): self.index = index def dump(self): - print '<shd80 type="Shd80" offset="%d" index="%d">' % (self.pos, self.index) + print('<shd80 type="Shd80" offset="%d" index="%d">' % (self.pos, self.index)) buf = self.readuInt16() self.printAndSet("icoFore", buf & 0x001f, dict=Ico) # 1..5th bits self.printAndSet("icoBack", (buf & 0x03e0) >> 5, dict=Ico) # 6..10th bits self.printAndSet("ipat", (buf & 0xfc00) >> 10, dict=Ipat) # 11.16th bits - print '</shd80>' + print('</shd80>') self.parent.pos = self.pos @@ -789,11 +790,11 @@ class DefTableShd80Operand(BinaryStream): self.pos = parent.pos def dump(self): - print '<defTableShd80Operand type="DefTableShd80Operand" offset="%d">' % self.pos + print('<defTableShd80Operand type="DefTableShd80Operand" offset="%d">' % self.pos) self.printAndSet("cb", self.readuInt8()) - for i in xrange(self.cb / Shd80.size): + for i in range(self.cb / Shd80.size): Shd80(self, i).dump() - print '</defTableShd80Operand>' + print('</defTableShd80Operand>') class CMajorityOperand(BinaryStream): @@ -805,16 +806,17 @@ class CMajorityOperand(BinaryStream): self.pos = parent.pos def dump(self): - print '<cMajorityOperand type="CMajorityOperand" offset="%d">' % self.pos + print('<cMajorityOperand type="CMajorityOperand" offset="%d">' % self.pos) self.printAndSet("cb", self.readuInt8()) pos = 0 - print '<grpprl offset="%d" size="%d bytes">' % (self.pos, self.cb) + print('<grpprl offset="%d" size="%d bytes">' % (self.pos, self.cb)) while self.cb - pos > 0: prl = Prl(self, self.pos + pos) prl.dump() pos += prl.getSize() - print '</grpprl>' - print '</cMajorityOperand>' + print('</grpprl>') + print('</cMajorityOperand>') + # The PgbApplyTo enumeration is used to specify the pages to which a page border applies. PgbApplyTo = { @@ -846,13 +848,13 @@ class SPgbPropOperand(BinaryStream): self.pos = parent.pos def dump(self): - print '<sPgbPropOperand type="SPgbPropOperand" offset="%d">' % self.pos + print('<sPgbPropOperand type="SPgbPropOperand" offset="%d">' % self.pos) buf = self.readuInt8() self.printAndSet("pgbApplyTo", buf & 0x7, dict=PgbApplyTo) # 1..3rd bits self.printAndSet("pgbPageDepth", (buf & 0x18) >> 3, dict=PgbPageDepth) # 4..5th bits self.printAndSet("pgbOffsetFrom", (buf & 0xe0) >> 5, dict=PgbOffsetFrom) # 6..8th bits self.printAndSet("reserved", self.readuInt8()) - print '</sPgbPropOperand>' + print('</sPgbPropOperand>') class MFPF(BinaryStream): @@ -867,13 +869,13 @@ class MFPF(BinaryStream): 0x0064: "MM_SHAPE", 0x0066: "MM_SHAPEFILE", } - print '<mfpf type="MFPF" offset="%d">' % self.pos + print('<mfpf type="MFPF" offset="%d">' % self.pos) self.printAndSet("mm", self.readInt16(), dict=mmDict, default="todo") self.printAndSet("xExt", self.readuInt16()) self.printAndSet("yExt", self.readuInt16()) self.printAndSet("swHMF", self.readuInt16()) self.parent.pos = self.pos - print '</mfpf>' + print('</mfpf>') class PICF_Shape(BinaryStream): @@ -886,13 +888,14 @@ class PICF_Shape(BinaryStream): self.name = name def dump(self): - print '<%s type="PICF_Shape" offset="%d">' % (self.name, self.pos) + print('<%s type="PICF_Shape" offset="%d">' % (self.name, self.pos)) self.printAndSet("grf", self.readuInt32()) self.printAndSet("padding1", self.readuInt32()) self.printAndSet("mmpm", self.readuInt16()) self.printAndSet("padding2", self.readuInt32()) self.parent.pos = self.pos - print '</%s>' % self.name + print('</%s>' % self.name) + # BrcType is an unsigned integer that specifies the type of border. BrcType = { @@ -1099,7 +1102,7 @@ class Brc80(BinaryStream): def dump(self): buf = self.readuInt32() - print '<%s type="Brc80" offset="%d">' % (self.name, self.pos) + print('<%s type="Brc80" offset="%d">' % (self.name, self.pos)) self.printAndSet("dptLineWidth", buf & 0x000000ff) # 1..8th bits self.printAndSet("brcType", (buf & 0x0000ff00) >> 8, dict=BrcType) # 9..16th bits self.printAndSet("ico", (buf & 0x00ff0000) >> 16, dict=Ico) # 17..24th bits @@ -1107,7 +1110,7 @@ class Brc80(BinaryStream): self.printAndSet("fShadow", self.getBit(buf, 29)) self.printAndSet("fFrame", self.getBit(buf, 30)) self.printAndSet("reserved", self.getBit(buf, 31)) - print '</%s>' % self.name + print('</%s>' % self.name) self.parent.pos = self.pos @@ -1123,12 +1126,12 @@ class Brc80MayBeNil(BinaryStream): def dump(self): buf = self.getuInt32() if buf == 0xFFFFFFFF: - print '<%s type="Brc80MayBeNil" offset="%d" value="%s"/>' % (self.name, self.pos, hex(buf)) + print('<%s type="Brc80MayBeNil" offset="%d" value="%s"/>' % (self.name, self.pos, hex(buf))) self.pos += 4 else: - print '<%s type="Brc80MayBeNil" offset="%d">' % (self.name, self.pos) + print('<%s type="Brc80MayBeNil" offset="%d">' % (self.name, self.pos)) Brc80(self, self.name).dump() - print '</%s>' % self.name + print('</%s>' % self.name) self.parent.pos = self.pos @@ -1140,7 +1143,7 @@ class PICMID(BinaryStream): self.parent = parent def dump(self): - print '<picmid type="PICMID" offset="%d">' % self.pos + print('<picmid type="PICMID" offset="%d">' % self.pos) self.printAndSet("dxaGoal", self.readuInt16()) self.printAndSet("dyaGoal", self.readuInt16()) self.printAndSet("mx", self.readuInt16()) @@ -1158,7 +1161,7 @@ class PICMID(BinaryStream): self.printAndSet("dxaReserved3", self.readuInt16()) self.printAndSet("dyaReserved3", self.readuInt16()) self.parent.pos = self.pos - print '</picmid>' + print('</picmid>') class PICF(BinaryStream): @@ -1170,7 +1173,7 @@ class PICF(BinaryStream): self.parent = parent def dump(self): - print '<picf type="PICF" offset="%d">' % self.pos + print('<picf type="PICF" offset="%d">' % self.pos) posOrig = self.pos self.printAndSet("lcb", self.readInt32()) self.printAndSet("cbHeader", self.readInt16()) @@ -1184,7 +1187,7 @@ class PICF(BinaryStream): else: self.pos = posOrig + self.cbHeader self.parent.pos = self.pos - print '</picf>' + print('</picf>') IType = { @@ -1213,7 +1216,7 @@ class FFDataBits(BinaryStream): self.parent = parent def dump(self): - print '<FFDataBits>' + print('<FFDataBits>') buf = self.readuInt8() self.printAndSet("iType", buf & 0x0003, dict=IType) # 1..2nd bits self.printAndSet("iRes", (buf & 0x007c) >> 2) # 3..7th bits @@ -1225,7 +1228,7 @@ class FFDataBits(BinaryStream): self.printAndSet("iTypeTxt", (buf & 0x0038) >> 3, dict=ITypeTxt) # 4..6th bits self.printAndSet("fRecalc", self.getBit(buf, 7)) self.printAndSet("fHasListBox", self.getBit(buf, 8)) - print '</FFDataBits>' + print('</FFDataBits>') self.parent.pos = self.pos @@ -1238,7 +1241,7 @@ class FFData(BinaryStream): self.parent = parent def dump(self): - print '<FFData>' + print('<FFData>') self.printAndSet("version", self.readuInt32()) self.bits = FFDataBits(self) self.bits.dump() @@ -1268,8 +1271,8 @@ class FFData(BinaryStream): xstzExitMcr.dump() self.pos = xstzExitMcr.pos if self.bits.iType == 2: # iTypeDrop - print '<todo what="FFData::dump(): handle hsttbDropList for iTypeDrop"/>' - print '</FFData>' + print('<todo what="FFData::dump(): handle hsttbDropList for iTypeDrop"/>') + print('</FFData>') class NilPICFAndBinData(BinaryStream): @@ -1277,13 +1280,13 @@ class NilPICFAndBinData(BinaryStream): data for a hyperlink, form field, or add-in field. The NilPICFAndBinData structure MUST be stored in the Data Stream.""" def __init__(self, parent): - dataStream = parent.mainStream.doc.getDirectoryStreamByName("Data") + dataStream = parent.mainStream.doc.getDirectoryStreamByName(b"Data") BinaryStream.__init__(self, dataStream.bytes) self.pos = parent.operand self.parent = parent def dump(self): - print '<NilPICFAndBinData>' + print('<NilPICFAndBinData>') # self -> sprm -> prl -> chpx -> chpxFkp chpxFkp = self.parent.parent.parent.parent self.printAndSet("lcb", self.readInt32()) @@ -1311,21 +1314,21 @@ class NilPICFAndBinData(BinaryStream): if fieldType == " FORMTEXT ": FFData(self).dump() else: - print '<todo what="NilPICFAndBinData::dump(): handle %s"/>' % fieldType - print '</NilPICFAndBinData>' + print('<todo what="NilPICFAndBinData::dump(): handle %s"/>' % fieldType) + print('</NilPICFAndBinData>') class PICFAndOfficeArtData(BinaryStream): """The PICFAndOfficeArtData structure specifies header information and binary data for a picture.""" def __init__(self, parent): - dataStream = parent.mainStream.doc.getDirectoryStreamByName("Data") + dataStream = parent.mainStream.doc.getDirectoryStreamByName(b"Data") BinaryStream.__init__(self, dataStream.bytes) self.pos = parent.operand self.parent = parent def dump(self): - print '<PICFAndOfficeArtData>' + print('<PICFAndOfficeArtData>') found = False for prl in self.parent.parent.parent.prls: if prl.sprm.sprm in (0x0806, 0x080a): # sprmCFData, sprmCFOle2 @@ -1337,15 +1340,16 @@ class PICFAndOfficeArtData(BinaryStream): picf.dump() assert self.pos == pos + 68 if picf.mfpf.mm == 0x0066: # MM_SHAPEFILE - print '<todo what="PICFAndOfficeArtData::dump(): picf.mfpf.mm == MM_SHAPEFILE is unhandled"/>' + print('<todo what="PICFAndOfficeArtData::dump(): picf.mfpf.mm == MM_SHAPEFILE is unhandled"/>') elif picf.mfpf.mm == 0x0064: # MM_SHAPE remaining = picf.lcb - (self.pos - pos) msodraw.InlineSpContainer(self, remaining).dumpXml(self, getWordModel(self.parent.mainStream)) else: - print '<todo what="PICFAndOfficeArtData::dump(): picf.mfpf.mm is unhandled (not MM_SHAPE or MM_SHAPEFILE): %d"/>' % picf.mfpf.mm + print('<todo what="PICFAndOfficeArtData::dump(): picf.mfpf.mm is unhandled (not MM_SHAPE or MM_SHAPEFILE): %d"/>' % picf.mfpf.mm) else: - print '<todo what="PICFAndOfficeArtData::dump(): handle sprmCFData or sprmCFOle2"/>' - print '</PICFAndOfficeArtData>' + print('<todo what="PICFAndOfficeArtData::dump(): handle sprmCFData or sprmCFOle2"/>') + print('</PICFAndOfficeArtData>') + # The TextFlow enumeration specifies the rotation settings for a block of text and for the individual # East Asian characters in each line of the block. @@ -1391,11 +1395,11 @@ class SHD(BinaryStream): self.pos = parent.pos def dump(self): - print '<shd type="SHD" offset="%d">' % self.pos + print('<shd type="SHD" offset="%d">' % self.pos) COLORREF(self).dump("cvFore") COLORREF(self).dump("cvBack") self.printAndSet("ipat", self.readuInt16(), dict=Ipat) - print '</shd>' + print('</shd>') class TCGRF(BinaryStream): @@ -1406,7 +1410,7 @@ class TCGRF(BinaryStream): self.pos = parent.pos def dump(self): - print '<tcgrf type="TCGRF" offset="%d">' % self.pos + print('<tcgrf type="TCGRF" offset="%d">' % self.pos) buf = self.readuInt16() self.printAndSet("horzMerge", buf & 0x0003) # 1..2nd bits self.printAndSet("textFlow", (buf & 0x001c) >> 2, dict=TextFlow, default="todo") # 3..6th bits @@ -1417,7 +1421,7 @@ class TCGRF(BinaryStream): self.printAndSet("fNoWrap", self.getBit(buf, 13)) self.printAndSet("fHideMark", self.getBit(buf, 14)) self.printAndSet("fUnused", self.getBit(buf, 15)) - print '</tcgrf>' + print('</tcgrf>') self.parent.pos = self.pos @@ -1430,14 +1434,14 @@ class TC80(BinaryStream): self.index = index def dump(self): - print '<tc80 index="%d">' % self.index + print('<tc80 index="%d">' % self.index) TCGRF(self).dump() self.printAndSet("wWidth", self.readuInt16(), hexdump=False) Brc80MayBeNil(self, "brcTop").dump() Brc80MayBeNil(self, "brcLeft").dump() Brc80MayBeNil(self, "brcBottom").dump() Brc80MayBeNil(self, "brcRight").dump() - print '</tc80>' + print('</tc80>') self.parent.pos = self.pos @@ -1450,17 +1454,17 @@ class TDefTableOperand(BinaryStream): self.pos = parent.pos def dump(self): - print '<tDefTableOperand>' + print('<tDefTableOperand>') self.printAndSet("cb", self.readuInt16()) size = self.pos + self.cb - 1 self.printAndSet("NumberOfColumns", self.readuInt8()) for i in range(self.NumberOfColumns + 1): - print '<rgdxaCenter index="%d" value="%d"/>' % (i, self.readInt16()) + print('<rgdxaCenter index="%d" value="%d"/>' % (i, self.readInt16())) i = 0 while self.pos < size: TC80(self, i).dump() i += 1 - print '</tDefTableOperand>' + print('</tDefTableOperand>') class TableBordersOperand(BinaryStream): @@ -1470,7 +1474,7 @@ class TableBordersOperand(BinaryStream): self.pos = parent.pos def dump(self): - print '<tableBordersOperand>' + print('<tableBordersOperand>') self.printAndSet("cb", self.readuInt8()) posOrig = self.pos BRC(self, "brcTop").dump() @@ -1480,7 +1484,7 @@ class TableBordersOperand(BinaryStream): BRC(self, "brcHorizontalInside").dump() BRC(self, "brcVerticalInside").dump() assert self.pos == posOrig + 0x30 - print '</tableBordersOperand>' + print('</tableBordersOperand>') class TableBordersOperand80(BinaryStream): @@ -1491,7 +1495,7 @@ class TableBordersOperand80(BinaryStream): self.pos = parent.pos def dump(self): - print '<tableBordersOperand80>' + print('<tableBordersOperand80>') self.printAndSet("cb", self.readuInt8()) posOrig = self.pos Brc80MayBeNil(self, "brcTop").dump() @@ -1501,7 +1505,7 @@ class TableBordersOperand80(BinaryStream): Brc80MayBeNil(self, "brcHorizontalInside").dump() Brc80MayBeNil(self, "brcVerticalInside").dump() assert self.pos == posOrig + 0x18 - print '</tableBordersOperand80>' + print('</tableBordersOperand80>') class SHDOperand(BinaryStream): @@ -1512,10 +1516,10 @@ class SHDOperand(BinaryStream): self.pos = parent.pos def dump(self): - print '<shdOperand>' + print('<shdOperand>') self.printAndSet("cb", self.readuInt8()) SHD(self).dump() - print '</shdOperand>' + print('</shdOperand>') class BrcOperand(BinaryStream): @@ -1528,9 +1532,9 @@ class BrcOperand(BinaryStream): self.brc = BRC(self) def dump(self): - print '<brcOperand type="BrcOperand" offset="%d">' % self.posOrig + print('<brcOperand type="BrcOperand" offset="%d">' % self.posOrig) self.brc.dump() - print '</brcOperand>' + print('</brcOperand>') class Sprm(BinaryStream): @@ -1573,7 +1577,7 @@ class Sprm(BinaryStream): # Can't decide right now, depends on if there will be an sprmCFData later or not. self.ct = True elif self.sprm == 0x6646: # sprmPHugePapx - dataStream = mainStream.doc.getDirectoryStreamByName("Data") + dataStream = mainStream.doc.getDirectoryStreamByName(b"Data") dataStream.pos = self.operand self.ct = PrcData(dataStream) elif self.sprm == 0x6412: @@ -1593,7 +1597,7 @@ class Sprm(BinaryStream): elif self.sprm == 0xca47: self.ct = CMajorityOperand(self) else: - print '<todo what="Sprm::__init__() unhandled sprm of size 9: %s"/>' % hex(self.sprm) + print('<todo what="Sprm::__init__() unhandled sprm of size 9: %s"/>' % hex(self.sprm)) else: if self.sprm == 0xd608: self.ct = TDefTableOperand(self) @@ -1606,7 +1610,7 @@ class Sprm(BinaryStream): elif self.sprm == 0xc60d: self.ct = PChgTabsPapxOperand(self) else: - print '<todo what="Sprm::__init__() unhandled sprm of size %s: %s"/>' % (self.getOperandSize(), hex(self.sprm)) + print('<todo what="Sprm::__init__() unhandled sprm of size %s: %s"/>' % (self.getOperandSize(), hex(self.sprm))) def dump(self): sgcmap = { @@ -1640,7 +1644,7 @@ class Sprm(BinaryStream): attrs.append('operand=""') else: attrs.append('operand="%s"' % hex(self.operand)) - print '<sprm %s%s>' % (" ".join(attrs), {True: "/", False: ""}[close]) + print('<sprm %s%s>' % (" ".join(attrs), {True: "/", False: ""}[close])) if self.ct: if type(self.ct) == bool: if self.sprm == 0x6a03 and self.transformed == r"\x01": @@ -1654,7 +1658,7 @@ class Sprm(BinaryStream): else: self.ct = PICFAndOfficeArtData(self) self.ct.dump() - print '</sprm>' + print('</sprm>') def getOperandSize(self): if self.spra == 6: # variable @@ -1688,9 +1692,9 @@ class Prl(BinaryStream): indexstr = "" if self.index is not None: indexstr = ' index="%d"' % self.index - print '<prl type="Prl" offset="%d"%s>' % (self.posOrig, indexstr) + print('<prl type="Prl" offset="%d"%s>' % (self.posOrig, indexstr)) self.sprm.dump() - print '</prl>' + print('</prl>') def getSize(self): return 2 + self.sprm.getOperandSize() @@ -1704,7 +1708,7 @@ class GrpPrlAndIstd(BinaryStream): self.size = size def dump(self): - print '<grpPrlAndIstd type="GrpPrlAndIstd" offset="%d" size="%d bytes">' % (self.pos, self.size) + print('<grpPrlAndIstd type="GrpPrlAndIstd" offset="%d" size="%d bytes">' % (self.pos, self.size)) pos = self.pos self.printAndSet("istd", self.getuInt16()) pos += 2 @@ -1712,7 +1716,7 @@ class GrpPrlAndIstd(BinaryStream): prl = Prl(self, pos, mainStream=self.mainStream) prl.dump() pos += prl.getSize() - print '</grpPrlAndIstd>' + print('</grpPrlAndIstd>') class Chpx(BinaryStream): @@ -1734,11 +1738,11 @@ class Chpx(BinaryStream): index += 1 def dump(self): - print '<chpx type="Chpx" offset="%d">' % self.pos + print('<chpx type="Chpx" offset="%d">' % self.pos) self.printAndSet("cb", self.cb) for prl in self.prls: prl.dump() - print '</chpx>' + print('</chpx>') class PapxInFkp(BinaryStream): @@ -1748,7 +1752,7 @@ class PapxInFkp(BinaryStream): self.pos = offset def dump(self): - print '<papxInFkp type="PapxInFkp" offset="%d">' % self.pos + print('<papxInFkp type="PapxInFkp" offset="%d">' % self.pos) self.printAndSet("cb", self.readuInt8()) if self.cb == 0: self.printAndSet("cb_", self.readuInt8()) @@ -1756,7 +1760,7 @@ class PapxInFkp(BinaryStream): else: grpPrlAndIstd = GrpPrlAndIstd(self.bytes, self.pos, 2 * self.cb - 1, mainStream=self.mainStream) grpPrlAndIstd.dump() - print '</papxInFkp>' + print('</papxInFkp>') class BxPap(BinaryStream): @@ -1769,11 +1773,11 @@ class BxPap(BinaryStream): self.parentpos = parentoffset def dump(self): - print '<bxPap type="BxPap" offset="%d" size="%d bytes">' % (self.pos, self.size) + print('<bxPap type="BxPap" offset="%d" size="%d bytes">' % (self.pos, self.size)) self.printAndSet("bOffset", self.readuInt8()) papxInFkp = PapxInFkp(self.bytes, self.mainStream, self.parentpos + self.bOffset * 2) papxInFkp.dump() - print '</bxPap>' + print('</bxPap>') class ChpxFkp(BinaryStream): @@ -1785,7 +1789,7 @@ class ChpxFkp(BinaryStream): self.pnFkpChpx = pnFkpChpx def dump(self): - print '<chpxFkp type="ChpxFkp" offset="%d" size="%d bytes">' % (self.pos, self.size) + print('<chpxFkp type="ChpxFkp" offset="%d" size="%d bytes">' % (self.pos, self.size)) self.crun = self.getuInt8(pos=self.pos + self.size - 1) pos = self.pos self.transformeds = [] @@ -1793,9 +1797,9 @@ class ChpxFkp(BinaryStream): # rgfc start = self.getuInt32(pos=pos) end = self.getuInt32(pos=pos + 4) - print '<rgfc index="%d" start="%d" end="%d">' % (i, start, end) + print('<rgfc index="%d" start="%d" end="%d">' % (i, start, end)) self.transformed = self.quoteAttr(self.pnFkpChpx.mainStream.retrieveOffset(start, end)) - print '<transformed value="%s"/>' % self.transformed + print('<transformed value="%s"/>' % self.transformed) self.transformeds.append(self.transformed) pos += 4 @@ -1804,10 +1808,10 @@ class ChpxFkp(BinaryStream): chpxOffset = self.getuInt8(pos=offset) * 2 chpx = Chpx(self, self.mainStream, self.pos + chpxOffset, self.transformed) chpx.dump() - print '</rgfc>' + print('</rgfc>') self.printAndSet("crun", self.crun) - print '</chpxFkp>' + print('</chpxFkp>') class PapxFkp(BinaryStream): @@ -1818,25 +1822,25 @@ class PapxFkp(BinaryStream): self.size = size def dump(self): - print '<papxFkp type="PapxFkp" offset="%d" size="%d bytes">' % (self.pos, self.size) + print('<papxFkp type="PapxFkp" offset="%d" size="%d bytes">' % (self.pos, self.size)) self.cpara = self.getuInt8(pos=self.pos + self.size - 1) pos = self.pos for i in range(self.cpara): # rgfc start = self.getuInt32(pos=pos) end = self.getuInt32(pos=pos + 4) - print '<rgfc index="%d" start="%d" end="%d">' % (i, start, end) - print '<transformed value="%s"/>' % self.quoteAttr(self.mainStream.retrieveOffset(start, end)) + print('<rgfc index="%d" start="%d" end="%d">' % (i, start, end)) + print('<transformed value="%s"/>' % self.quoteAttr(self.mainStream.retrieveOffset(start, end))) pos += 4 # rgbx offset = PLC.getPLCOffset(self.pos, self.cpara, BxPap.size, i) bxPap = BxPap(self.bytes, self.mainStream, offset, self.pos) bxPap.dump() - print '</rgfc>' + print('</rgfc>') self.printAndSet("cpara", self.cpara) - print '</papxFkp>' + print('</papxFkp>') class PnFkpChpx(BinaryStream): @@ -1849,12 +1853,12 @@ class PnFkpChpx(BinaryStream): self.plcBteChpx = plcBteChpx def dump(self): - print '<%s type="PnFkpChpx" offset="%d" size="%d bytes">' % (self.name, self.pos, self.size) + print('<%s type="PnFkpChpx" offset="%d" size="%d bytes">' % (self.name, self.pos, self.size)) buf = self.readuInt32() self.printAndSet("pn", buf & (2 ** 22 - 1)) chpxFkp = ChpxFkp(self, self.pn * 512, 512) chpxFkp.dump() - print '</%s>' % self.name + print('</%s>' % self.name) class LPXCharBuffer9(BinaryStream): @@ -1865,10 +1869,10 @@ class LPXCharBuffer9(BinaryStream): self.name = name def dump(self): - print '<%s type="LPXCharBuffer9" offset="%d" size="20 bytes">' % (self.name, self.pos) + print('<%s type="LPXCharBuffer9" offset="%d" size="20 bytes">' % (self.name, self.pos)) self.printAndSet("cch", self.readuInt16()) self.printAndSet("xcharArray", self.bytes[self.pos:self.pos + (self.cch * 2)].decode('utf-16'), hexdump=False) - print '</%s>' % self.name + print('</%s>' % self.name) class ATRDPre10(BinaryStream): @@ -1878,7 +1882,7 @@ class ATRDPre10(BinaryStream): self.pos = offset def dump(self): - print '<aATRDPre10 type="ATRDPre10" offset="%d" size="30 bytes">' % self.pos + print('<aATRDPre10 type="ATRDPre10" offset="%d" size="30 bytes">' % self.pos) xstUsrInitl = LPXCharBuffer9(self, "xstUsrInitl") xstUsrInitl.dump() self.pos += 20 @@ -1886,7 +1890,7 @@ class ATRDPre10(BinaryStream): self.printAndSet("bitsNotUsed", self.readuInt16()) self.printAndSet("grfNotUsed", self.readuInt16()) self.printAndSet("ITagBkmk", self.readInt32()) - print '</aATRDPre10>' + print('</aATRDPre10>') class PnFkpPapx(BinaryStream): @@ -1898,12 +1902,12 @@ class PnFkpPapx(BinaryStream): self.name = name def dump(self): - print '<%s type="PnFkpPapx" offset="%d" size="%d bytes">' % (self.name, self.pos, self.size) + print('<%s type="PnFkpPapx" offset="%d" size="%d bytes">' % (self.name, self.pos, self.size)) buf = self.readuInt32() self.printAndSet("pn", buf & (2 ** 22 - 1)) papxFkp = PapxFkp(self.bytes, self.mainStream, self.pn * 512, 512) papxFkp.dump() - print '</%s>' % self.name + print('</%s>' % self.name) class PlcBteChpx(BinaryStream, PLC): @@ -1915,20 +1919,20 @@ class PlcBteChpx(BinaryStream, PLC): self.size = mainStream.lcbPlcfBteChpx def dump(self): - print '<plcBteChpx type="PlcBteChpx" offset="%d" size="%d bytes">' % (self.pos, self.size) + print('<plcBteChpx type="PlcBteChpx" offset="%d" size="%d bytes">' % (self.pos, self.size)) pos = self.pos for i in range(self.getElements()): # aFC start = self.getuInt32(pos=pos) end = self.getuInt32(pos=pos + 4) - print '<aFC index="%d" start="%d" end="%d">' % (i, start, end) + print('<aFC index="%d" start="%d" end="%d">' % (i, start, end)) pos += 4 # aPnBteChpx aPnBteChpx = PnFkpChpx(self, self.getOffset(self.pos, i), 4, "aPnBteChpx") aPnBteChpx.dump() - print '</aFC>' - print '</plcBteChpx>' + print('</aFC>') + print('</plcBteChpx>') class PlcfHdd(BinaryStream, PLC): @@ -1965,17 +1969,17 @@ class PlcfHdd(BinaryStream, PLC): return "%s (section #%s)" % (contentsMap[contentsIndex], sectionIndex) def dump(self): - print '<plcfHdd type="PlcfHdd" offset="%d" size="%d bytes">' % (self.pos, self.size) + print('<plcfHdd type="PlcfHdd" offset="%d" size="%d bytes">' % (self.pos, self.size)) offset = self.mainStream.getHeaderOffset() pos = self.pos for i in range(self.getElements() - 1): start = self.getuInt32(pos=pos) end = self.getuInt32(pos=pos + 4) - print '<aCP index="%d" contents="%s" start="%d" end="%d">' % (i, self.getContents(i), start, end) - print '<transformed value="%s"/>' % self.quoteAttr(self.mainStream.retrieveCPs(offset + start, offset + end)) + print('<aCP index="%d" contents="%s" start="%d" end="%d">' % (i, self.getContents(i), start, end)) + print('<transformed value="%s"/>' % self.quoteAttr(self.mainStream.retrieveCPs(offset + start, offset + end))) pos += 4 - print '</aCP>' - print '</plcfHdd>' + print('</aCP>') + print('</plcfHdd>') class PlcfandTxt(BinaryStream, PLC): @@ -1987,17 +1991,17 @@ class PlcfandTxt(BinaryStream, PLC): self.size = size def dump(self): - print '<plcfandTxt type="PlcfandTxt" offset="%d" size="%d bytes">' % (self.pos, self.size) + print('<plcfandTxt type="PlcfandTxt" offset="%d" size="%d bytes">' % (self.pos, self.size)) offset = self.mainStream.getCommentOffset() pos = self.pos for i in range(self.getElements() - 1): start = self.getuInt32(pos=pos) end = self.getuInt32(pos=pos + 4) - print '<aCP index="%d" start="%d" end="%d">' % (i, start, end) - print '<transformed value="%s"/>' % self.quoteAttr(self.mainStream.retrieveCPs(offset + start, offset + end)) + print('<aCP index="%d" start="%d" end="%d">' % (i, start, end)) + print('<transformed value="%s"/>' % self.quoteAttr(self.mainStream.retrieveCPs(offset + start, offset + end))) pos += 4 - print '</aCP>' - print '</plcfandTxt>' + print('</aCP>') + print('</plcfandTxt>') class PlcfandRef(BinaryStream, PLC): @@ -2009,19 +2013,19 @@ class PlcfandRef(BinaryStream, PLC): self.size = size def dump(self): - print '<plcfandRef type="PlcfandRef" offset="%d" size="%d bytes">' % (self.pos, self.size) + print('<plcfandRef type="PlcfandRef" offset="%d" size="%d bytes">' % (self.pos, self.size)) pos = self.pos for i in range(self.getElements()): start = self.getuInt32(pos=pos) - print '<aCP index="%d" commentEnd="%d">' % (i, start) - print '<transformed value="%s"/>' % self.quoteAttr(self.mainStream.retrieveCP(start)) + print('<aCP index="%d" commentEnd="%d">' % (i, start)) + print('<transformed value="%s"/>' % self.quoteAttr(self.mainStream.retrieveCP(start))) pos += 4 # aATRDPre10 aATRDPre10 = ATRDPre10(self, self.getOffset(self.pos, i)) aATRDPre10.dump() - print '</aCP>' - print '</plcfandRef>' + print('</aCP>') + print('</plcfandRef>') class PlcBtePapx(BinaryStream, PLC): @@ -2033,20 +2037,20 @@ class PlcBtePapx(BinaryStream, PLC): self.size = size def dump(self): - print '<plcBtePapx type="PlcBtePapx" offset="%d" size="%d bytes">' % (self.pos, self.size) + print('<plcBtePapx type="PlcBtePapx" offset="%d" size="%d bytes">' % (self.pos, self.size)) pos = self.pos for i in range(self.getElements()): # aFC start = self.getuInt32(pos=pos) end = self.getuInt32(pos=pos + 4) - print '<aFC index="%d" start="%d" end="%d">' % (i, start, end) + print('<aFC index="%d" start="%d" end="%d">' % (i, start, end)) pos += 4 # aPnBtePapx aPnBtePapx = PnFkpPapx(self.bytes, self.mainStream, self.getOffset(self.pos, i), 4, "aPnBtePapx") aPnBtePapx.dump() - print '</aFC>' - print '</plcBtePapx>' + print('</aFC>') + print('</plcBtePapx>') class Pcdt(BinaryStream): @@ -2061,11 +2065,11 @@ class Pcdt(BinaryStream): self.plcPcd = PlcPcd(self.bytes, self.mainStream, self.pos, self.lcb) def dump(self): - print '<pcdt type="Pcdt" offset="%d" size="%d bytes">' % (self.pos, self.size) + print('<pcdt type="Pcdt" offset="%d" size="%d bytes">' % (self.pos, self.size)) self.printAndSet("clxt", self.clxt) self.printAndSet("lcb", self.lcb) self.plcPcd.dump() - print '</pcdt>' + print('</pcdt>') class PrcData(BinaryStream): @@ -2086,13 +2090,13 @@ class PrcData(BinaryStream): parent.pos = self.pos def dump(self): - print '<prcData>' + print('<prcData>') self.printAndSet("cbGrpprl", self.cbGrpprl) - print '<grpPrl>' + print('<grpPrl>') for i in self.prls: i.dump() - print '</grpPrl>' - print '</prcData>' + print('</grpPrl>') + print('</prcData>') class Prc(BinaryStream): @@ -2107,9 +2111,9 @@ class Prc(BinaryStream): parent.pos = self.pos def dump(self, index): - print '<prc index="%d">' % index + print('<prc index="%d">' % index) self.prcData.dump() - print '</prc>' + print('</prc>') class Clx(BinaryStream): @@ -2128,11 +2132,11 @@ class Clx(BinaryStream): self.pcdt = Pcdt(self.bytes, self.mainStream, self.pos, self.size) def dump(self): - print '<clx type="Clx" offset="%d" size="%d bytes">' % (self.pos, self.size) + print('<clx type="Clx" offset="%d" size="%d bytes">' % (self.pos, self.size)) for index, elem in enumerate(self.prcs): elem.dump(index) self.pcdt.dump() - print '</clx>' + print('</clx>') class Copts60(BinaryStream): @@ -2142,7 +2146,7 @@ class Copts60(BinaryStream): self.pos = dop.pos def dump(self): - print '<copts60 type="Copts60" offset="%s" size="2 bytes">' % self.pos + print('<copts60 type="Copts60" offset="%s" size="2 bytes">' % self.pos) # Copts60 first byte buf = self.readuInt8() self.printAndSet("fNoTabForInd", self.getBit(buf, 0)) @@ -2164,7 +2168,7 @@ class Copts60(BinaryStream): self.printAndSet("fExpShRtn", self.getBit(buf, 5)) self.printAndSet("fDntULTrlSpc", self.getBit(buf, 6)) self.printAndSet("fDntBlnSbDbWid", self.getBit(buf, 7)) - print '</copts60>' + print('</copts60>') class DTTM(BinaryStream): @@ -2177,15 +2181,15 @@ class DTTM(BinaryStream): def dump(self): buf = self.readuInt32() - print '<%s type="DTTM" offset="%d" size="4 bytes">' % (self.name, self.pos) + print('<%s type="DTTM" offset="%d" size="4 bytes">' % (self.name, self.pos)) self.printAndSet("mint", buf & 0x0000003f) # 1..6th bits self.printAndSet("hr", (buf & 0x000007c0) >> 6) # 7..11th bits self.printAndSet("dom", (buf & 0x0000f800) >> 11) # 12..16th bits self.printAndSet("mon", (buf & 0x000f0000) >> 16) # 17..20th bits self.printAndSet("yr", (buf & 0x1ff00000) >> 20) # 21..29th bits self.printAndSet("wdy", (buf & 0xe0000000) >> 29) # 30..32th bits - print '<transformed value="%s-%s-%s %s:%s"/>' % (1900 + self.yr, self.mon, self.dom, self.hr, self.mint) - print '</%s>' % self.name + print('<transformed value="%s-%s-%s %s:%s"/>' % (1900 + self.yr, self.mon, self.dom, self.hr, self.mint)) + print('</%s>' % self.name) self.parent.pos = self.pos @@ -2197,7 +2201,7 @@ class GRFSTD(BinaryStream): self.pos = parent.pos def dump(self): - print '<grfstd type="GRFSTD" offset="%d" size="2 bytes">' % self.pos + print('<grfstd type="GRFSTD" offset="%d" size="2 bytes">' % self.pos) buf = self.readuInt8() self.printAndSet("fAutoRedef", self.getBit(buf, 0)) self.printAndSet("fHidden", self.getBit(buf, 1)) @@ -2215,7 +2219,7 @@ class GRFSTD(BinaryStream): self.printAndSet("fUnhideWhenUsed", self.getBit(buf, 3)) self.printAndSet("fQFormat", self.getBit(buf, 4)) self.printAndSet("fReserved", (buf & 0xe0) >> 5) # 6..8th bits - print '</grfstd>' + print('</grfstd>') self.parent.pos = self.pos @@ -2229,7 +2233,7 @@ class DopBase(BinaryStream): self.dop = dop def dump(self): - print '<dopBase offset="%d" size="%d bytes">' % (self.pos, 84) + print('<dopBase offset="%d" size="%d bytes">' % (self.pos, 84)) buf = self.readuInt8() self.printAndSet("fFacingPages", self.getBit(buf, 0)) self.printAndSet("unused1", self.getBit(buf, 1)) @@ -2332,7 +2336,7 @@ class DopBase(BinaryStream): self.printAndSet("zkSaved", (buf & 0x3000) >> 12) # 13..14th bits self.printAndSet("unused16", self.getBit(buf, 14)) self.printAndSet("iGutterPos", self.getBit(buf, 15)) - print '</dopBase>' + print('</dopBase>') assert self.pos == self.dop.pos + DopBase.size self.dop.pos = self.pos @@ -2344,7 +2348,7 @@ class Copts80(BinaryStream): self.pos = dop.pos def dump(self): - print '<copts80 type="Copts80" offset="%d" size="4 bytes">' % self.pos + print('<copts80 type="Copts80" offset="%d" size="4 bytes">' % self.pos) Copts60(self).dump() self.pos += 2 @@ -2367,7 +2371,7 @@ class Copts80(BinaryStream): self.printAndSet("fWPSpace", self.getBit(buf, 5)) self.printAndSet("fWPJust", self.getBit(buf, 6)) self.printAndSet("fPrintMet", self.getBit(buf, 7)) - print '</copts80>' + print('</copts80>') class Copts(BinaryStream): @@ -2380,7 +2384,7 @@ class Copts(BinaryStream): self.dop = dop def dump(self): - print '<copts type="Copts" offset="%d" size="%d bytes">' % (self.pos, Copts.size) + print('<copts type="Copts" offset="%d" size="%d bytes">' % (self.pos, Copts.size)) Copts80(self).dump() self.pos += 4 @@ -2432,7 +2436,7 @@ class Copts(BinaryStream): self.printAndSet("empty4", self.readuInt32()) self.printAndSet("empty5", self.readuInt32()) self.printAndSet("empty6", self.readuInt32()) - print '</copts>' + print('</copts>') assert self.pos == self.dop.pos + Copts.size self.dop.pos = self.pos @@ -2448,17 +2452,17 @@ class Dop95(BinaryStream): self.dopSize = dopSize def dump(self): - print '<dop95 type="Dop95" offset="%d" size="88 bytes">' % self.pos + print('<dop95 type="Dop95" offset="%d" size="88 bytes">' % self.pos) pos = self.pos dopBase = DopBase(self) dopBase.dump() if self.pos >= pos + self.dopSize: - print '</dop95>' + print('</dop95>') self.dop.pos = self.pos return Copts80(self).dump() self.pos += 4 - print '</dop95>' + print('</dop95>') assert self.pos == self.dop.pos + Dop95.size self.dop.pos = self.pos @@ -2473,7 +2477,7 @@ class DopTypography(BinaryStream): self.dop = dop def dump(self): - print '<dopTypography type="DopTypography" offset="%d" size="310 bytes">' % self.pos + print('<dopTypography type="DopTypography" offset="%d" size="310 bytes">' % self.pos) buf = self.readuInt16() self.printAndSet("fKerningPunct", self.getBit(buf, 0)) self.printAndSet("iJustification", (buf & 0x0006) >> 1) # 2..3rd bits @@ -2493,7 +2497,7 @@ class DopTypography(BinaryStream): self.printAndSet("rgxchLPunct", self.getString(self.cchLeadingPunct), hexdump=False) self.pos += 102 - print '</dopTypography>' + print('</dopTypography>') assert self.pos == self.dop.pos + DopTypography.size self.dop.pos = self.pos @@ -2508,7 +2512,7 @@ class Dogrid(BinaryStream): self.dop = dop def dump(self): - print '<dogrid type="Dogrid" offset="%d" size="%d bytes">' % (self.pos, Dogrid.size) + print('<dogrid type="Dogrid" offset="%d" size="%d bytes">' % (self.pos, Dogrid.size)) self.printAndSet("xaGrid", self.readuInt16()) self.printAndSet("yaGrid", self.readuInt16()) self.printAndSet("dxaGrid", self.readuInt16()) @@ -2521,7 +2525,7 @@ class Dogrid(BinaryStream): buf = self.readuInt8() self.printAndSet("dxGridDisplay", (buf & 0x7f)) # 1..7th bits self.printAndSet("fFollowMargins", self.getBit(buf, 7)) - print '</dogrid>' + print('</dogrid>') assert self.pos == self.dop.pos + Dogrid.size self.dop.pos = self.pos @@ -2533,7 +2537,7 @@ class Asumyi(BinaryStream): self.pos = dop.pos def dump(self): - print '<asumyi type="Asumyi" offset="%d" size="12 bytes">' % self.pos + print('<asumyi type="Asumyi" offset="%d" size="12 bytes">' % self.pos) buf = self.readuInt16() self.printAndSet("fValid", self.getBit(buf, 0)) self.printAndSet("fView", self.getBit(buf, 1)) @@ -2544,7 +2548,7 @@ class Asumyi(BinaryStream): self.printAndSet("wDlgLevel", self.readuInt16()) self.printAndSet("lHighestLevel", self.readuInt32()) self.printAndSet("lCurrentLevel", self.readuInt32()) - print '</asumyi>' + print('</asumyi>') class Dop97(BinaryStream): @@ -2558,12 +2562,12 @@ class Dop97(BinaryStream): self.dopSize = dopSize def dump(self): - print '<dop97 type="Dop97" offset="%d" size="%d bytes">' % (self.pos, Dop97.size) + print('<dop97 type="Dop97" offset="%d" size="%d bytes">' % (self.pos, Dop97.size)) pos = self.pos dop95 = Dop95(self, self.dopSize) dop95.dump() if self.pos >= pos + self.dopSize: - print '</dop97>' + print('</dop97>') self.dop.pos = self.pos return @@ -2621,7 +2625,7 @@ class Dop97(BinaryStream): self.printAndSet("nfcEdnRef", self.readuInt16()) self.printAndSet("hpsZoomFontPag", self.readuInt16()) self.printAndSet("dywDispPag", self.readuInt16()) - print '</dop97>' + print('</dop97>') assert self.pos == self.dop.pos + Dop97.size self.dop.pos = self.pos @@ -2637,13 +2641,13 @@ class Dop2000(BinaryStream): self.dopSize = dopSize def dump(self): - print '<dop2000 type="Dop2000" offset="%d" size="544 bytes">' % self.pos + print('<dop2000 type="Dop2000" offset="%d" size="544 bytes">' % self.pos) dop97 = Dop97(self, self.dopSize) dop97.dump() if self.pos == self.size: - print '<info what="Dop2000 size is smaller than expected."/>' - print '</dop2000>' + print('<info what="Dop2000 size is smaller than expected."/>') + print('</dop2000>') self.dop.pos = self.pos return @@ -2697,7 +2701,7 @@ class Dop2000(BinaryStream): self.printAndSet("fSaveInvalidXML", self.getBit(buf, 5)) self.printAndSet("fShowXMLErrors", self.getBit(buf, 6)) self.printAndSet("fAlwaysMergeEmptyNamespace", self.getBit(buf, 7)) - print '</dop2000>' + print('</dop2000>') assert self.pos == self.dop.pos + Dop2000.size self.dop.pos = self.pos @@ -2713,7 +2717,7 @@ class Dop2002(BinaryStream): self.dopSize = dopSize def dump(self): - print '<dop2002 type="Dop2002" offset="%d" size="%d bytes">' % (self.pos, Dop2002.size) + print('<dop2002 type="Dop2002" offset="%d" size="%d bytes">' % (self.pos, Dop2002.size)) dop2000 = Dop2000(self, self.dopSize) dop2000.dump() @@ -2750,7 +2754,7 @@ class Dop2002(BinaryStream): self.printAndSet("cpMinRmTxbx", self.readuInt32()) self.printAndSet("cpMinRmHdrTxbx", self.readuInt32()) self.printAndSet("rsidRoot", self.readuInt32()) - print '</dop2002>' + print('</dop2002>') assert self.pos == self.dop.pos + Dop2002.size self.dop.pos = self.pos @@ -2766,7 +2770,7 @@ class Dop2003(BinaryStream): self.dopSize = dopSize def dump(self): - print '<dop2003 type="Dop2003" offset="%d" size="616 bytes">' % self.pos + print('<dop2003 type="Dop2003" offset="%d" size="616 bytes">' % self.pos) dop2002 = Dop2002(self, self.dopSize) dop2002.dump() @@ -2804,7 +2808,7 @@ class Dop2003(BinaryStream): self.printAndSet("grfitbid", self.readuInt8()) self.printAndSet("empty3", self.readuInt8()) self.printAndSet("ilfoMacAtCleanup", self.readuInt16()) - print '</dop2003>' + print('</dop2003>') assert self.pos == self.dop.pos + Dop2003.size self.dop.pos = self.pos @@ -2816,7 +2820,7 @@ class DopMth(BinaryStream): self.pos = dop.pos def dump(self): - print '<dopMth type="DopMth" offset="%d" size="34 bytes">' % self.pos + print('<dopMth type="DopMth" offset="%d" size="34 bytes">' % self.pos) buf = self.readuInt32() self.printAndSet("mthbrk", (buf & 0x03)) # 1..2nd bits self.printAndSet("mthbrkSub", (buf & 0xc) >> 2) # 3..4th bits @@ -2837,7 +2841,7 @@ class DopMth(BinaryStream): self.printAndSet("empty3", self.readuInt32()) self.printAndSet("empty4", self.readuInt32()) self.printAndSet("dxaIndentWrapped", self.readuInt32()) - print '</dopMth>' + print('</dopMth>') class Dop2007(BinaryStream): @@ -2849,7 +2853,7 @@ class Dop2007(BinaryStream): self.dopSize = dopSize def dump(self): - print '<dop2007 type="Dop2007" offset="%d">' % self.pos + print('<dop2007 type="Dop2007" offset="%d">' % self.pos) dop2003 = Dop2003(self, self.dopSize) dop2003.dump() @@ -2873,7 +2877,7 @@ class Dop2007(BinaryStream): self.printAndSet("empty6", self.readuInt32()) DopMth(self).dump() self.pos += 34 - print '</dop2007>' + print('</dop2007>') class RC4EncryptionHeader(BinaryStream): @@ -2885,14 +2889,14 @@ class RC4EncryptionHeader(BinaryStream): self.size = size def dump(self): - print '<RC4EncryptionHeader>' + print('<RC4EncryptionHeader>') self.Salt = self.readBytes(16) - print '<Salt value="%s"/>' % globals.encodeName(self.Salt) + print('<Salt value="%s"/>' % globals.encodeName(self.Salt)) self.EncryptedVerifier = self.readBytes(16) - print '<EncryptedVerifier value="%s"/>' % globals.encodeName(self.EncryptedVerifier) + print('<EncryptedVerifier value="%s"/>' % globals.encodeName(self.EncryptedVerifier)) self.EncryptedVerifierHash = self.readBytes(16) - print '<EncryptedVerifierHash value="%s"/>' % globals.encodeName(self.EncryptedVerifierHash) - print '</RC4EncryptionHeader>' + print('<EncryptedVerifierHash value="%s"/>' % globals.encodeName(self.EncryptedVerifierHash)) + print('</RC4EncryptionHeader>') assert self.pos == self.size @@ -2905,7 +2909,7 @@ class Dop(BinaryStream): self.fib = fib def dump(self): - print '<dop type="Dop" offset="%s" size="%d bytes">' % (self.pos, self.size) + print('<dop type="Dop" offset="%s" size="%d bytes">' % (self.pos, self.size)) if self.fib.nFibNew == 0: Dop97(self, self.size).dump() elif self.fib.nFibNew == 0x00d9: @@ -2915,8 +2919,8 @@ class Dop(BinaryStream): elif self.fib.nFibNew == 0x0112: Dop2007(self, self.size).dump() else: - print """<todo what="Dop.dump() doesn't know how to handle nFibNew = %s"/>""" % hex(self.fib.nFibNew) - print '</dop>' + print("""<todo what="Dop.dump() doesn't know how to handle nFibNew = %s"/>""" % hex(self.fib.nFibNew)) + print('</dop>') class FFID(BinaryStream): @@ -2936,7 +2940,7 @@ class FFID(BinaryStream): self.ff = (self.ffid & 0x70) >> 4 # 5-7th bits self.unused2 = (self.ffid & 0x80) >> 7 # 8th bit - print '<ffid value="%s" prq="%s" fTrueType="%s" ff="%s"/>' % (hex(self.ffid), hex(self.prq), self.fTrueType, hex(self.ff)) + print('<ffid value="%s" prq="%s" fTrueType="%s" ff="%s"/>' % (hex(self.ffid), hex(self.prq), self.fTrueType, hex(self.ff))) class PANOSE(BinaryStream): @@ -2946,10 +2950,10 @@ class PANOSE(BinaryStream): self.pos = offset def dump(self): - print '<panose type="PANOSE" offset="%s" size="10 bytes">' % self.pos + print('<panose type="PANOSE" offset="%s" size="10 bytes">' % self.pos) for i in ["bFamilyType", "bSerifStyle", "bWeight", "bProportion", "bContrast", "bStrokeVariation", "bArmStyle", "bLetterform", "bMidline", "bHeight"]: self.printAndSet(i, self.readuInt8()) - print '</panose>' + print('</panose>') class FontSignature(BinaryStream): @@ -2965,9 +2969,9 @@ class FontSignature(BinaryStream): fsUsb4 = self.readuInt32() fsCsb1 = self.readuInt32() fsCsb2 = self.readInt32() - print '<fontSignature fsUsb1="%s" fsUsb2="%s" fsUsb3="%s" fsUsb4="%s" fsCsb1="%s" fsCsb2="%s"/>' % ( - hex(fsUsb1), hex(fsUsb2), hex(fsUsb3), hex(fsUsb4), hex(fsCsb1), hex(fsCsb2) - ) + print('<fontSignature fsUsb1="%s" fsUsb2="%s" fsUsb3="%s" fsUsb4="%s" fsCsb1="%s" fsCsb2="%s"/>' % + (hex(fsUsb1), hex(fsUsb2), hex(fsUsb3), hex(fsUsb4), hex(fsCsb1), hex(fsCsb2)) + ) class FFN(BinaryStream): @@ -2978,7 +2982,7 @@ class FFN(BinaryStream): self.size = size def dump(self): - print '<ffn type="FFN" offset="%d" size="%d bytes">' % (self.pos, self.size) + print('<ffn type="FFN" offset="%d" size="%d bytes">' % (self.pos, self.size)) FFID(self.bytes, self.pos).dump() self.pos += 1 self.printAndSet("wWeight", self.readInt16(), hexdump=False) @@ -2988,8 +2992,8 @@ class FFN(BinaryStream): self.pos += 10 FontSignature(self.bytes, self.pos).dump() self.pos += 24 - print '<xszFfn value="%s"/>' % self.readString() - print '</ffn>' + print('<xszFfn value="%s"/>' % self.readString()) + print('</ffn>') class SttbfFfn(BinaryStream): @@ -3000,16 +3004,16 @@ class SttbfFfn(BinaryStream): self.size = size def dump(self): - print '<sttbfFfn type="SttbfFfn" offset="%d" size="%d bytes">' % (self.pos, self.size) + print('<sttbfFfn type="SttbfFfn" offset="%d" size="%d bytes">' % (self.pos, self.size)) self.printAndSet("cData", self.readuInt16()) self.printAndSet("cbExtra", self.readuInt16()) for i in range(self.cData): cchData = self.readuInt8() - print '<cchData index="%d" offset="%d" size="%d bytes">' % (i, self.pos, cchData) + print('<cchData index="%d" offset="%d" size="%d bytes">' % (i, self.pos, cchData)) FFN(self.bytes, self.mainStream, self.pos, cchData).dump() self.pos += cchData - print '</cchData>' - print '</sttbfFfn>' + print('</cchData>') + print('</sttbfFfn>') class GrpXstAtnOwners(BinaryStream): @@ -3022,12 +3026,12 @@ class GrpXstAtnOwners(BinaryStream): def dump(self): posOrig = self.pos - print '<grpXstAtnOwners type="GrpXstAtnOwners" offset="%d" size="%d bytes">' % (self.pos, self.size) + print('<grpXstAtnOwners type="GrpXstAtnOwners" offset="%d" size="%d bytes">' % (self.pos, self.size)) while self.pos < posOrig + self.size: xst = Xst(self) xst.dump() self.pos = xst.pos - print '</grpXstAtnOwners>' + print('</grpXstAtnOwners>') class SttbfAssoc(BinaryStream): @@ -3059,7 +3063,7 @@ class SttbfAssoc(BinaryStream): 0x10: "Unused. This index MUST be ignored.", 0x11: "The write-reservation password of the document.", } - print '<sttbfAssoc type="SttbfAssoc" offset="%d" size="%d bytes">' % (self.pos, self.size) + print('<sttbfAssoc type="SttbfAssoc" offset="%d" size="%d bytes">' % (self.pos, self.size)) self.printAndSet("fExtend", self.readuInt16()) self.printAndSet("cData", self.readuInt16()) self.printAndSet("cbExtra", self.readuInt16()) @@ -3071,16 +3075,16 @@ class SttbfAssoc(BinaryStream): meaning = "unknown" if self.pos + 2 * cchData > self.size: self.cData = 0 - print '<info what="SttbfAssoc::dump() wanted to read beyond the end of the stream"/>' + print('<info what="SttbfAssoc::dump() wanted to read beyond the end of the stream"/>') break - print '<cchData index="%s" meaning="%s" offset="%d" size="%d bytes">' % (hex(i), meaning, self.pos, cchData) - print '<string value="%s"/>' % globals.encodeName(self.bytes[self.pos:self.pos + 2 * cchData].decode('utf-16'), lowOnly=True) + print('<cchData index="%s" meaning="%s" offset="%d" size="%d bytes">' % (hex(i), meaning, self.pos, cchData)) + print('<string value="%s"/>' % globals.encodeName(self.bytes[self.pos:self.pos + 2 * cchData].decode('utf-16'), lowOnly=True)) self.pos += 2 * cchData - print '</cchData>' + print('</cchData>') # Probably this was cleared manually. if self.cData != 0: assert self.pos == self.mainStream.fcSttbfAssoc + self.size - print '</sttbfAssoc>' + print('</sttbfAssoc>') class SttbfRMark(BinaryStream): @@ -3092,19 +3096,19 @@ class SttbfRMark(BinaryStream): self.mainStream = mainStream def dump(self): - print '<sttbfRMark type="SttbfRMark" offset="%d" size="%d bytes">' % (self.pos, self.size) + print('<sttbfRMark type="SttbfRMark" offset="%d" size="%d bytes">' % (self.pos, self.size)) self.printAndSet("fExtend", self.readuInt16()) self.printAndSet("cData", self.readuInt16()) self.printAndSet("cbExtra", self.readuInt16()) for i in range(self.cData): cchData = self.readuInt16() - print '<cchData index="%s" offset="%d" size="%d bytes">' % (i, self.pos, cchData) - print '<string value="%s"/>' % globals.encodeName(self.bytes[self.pos:self.pos + 2 * cchData].decode('utf-16'), lowOnly=True) + print('<cchData index="%s" offset="%d" size="%d bytes">' % (i, self.pos, cchData)) + print('<string value="%s"/>' % globals.encodeName(self.bytes[self.pos:self.pos + 2 * cchData].decode('utf-16'), lowOnly=True)) self.pos += 2 * cchData - print '</cchData>' + print('</cchData>') if self.cData != 0: assert self.pos == self.mainStream.fcSttbfRMark + self.size - print '</sttbfRMark>' + print('</sttbfRMark>') class OfficeArtWordDrawing(BinaryStream): @@ -3115,10 +3119,10 @@ class OfficeArtWordDrawing(BinaryStream): self.officeArtContent = officeArtContent def dump(self): - print '<officeArtWordDrawing type="OfficeArtWordDrawing" pos="%d">' % self.pos + print('<officeArtWordDrawing type="OfficeArtWordDrawing" pos="%d">' % self.pos) self.printAndSet("dgglbl", self.readuInt8()) msodraw.DgContainer(self, "container").dumpXml(self, getWordModel(self.officeArtContent.mainStream)) - print '</officeArtWordDrawing>' + print('</officeArtWordDrawing>') self.officeArtContent.pos = self.pos @@ -3131,17 +3135,17 @@ class OfficeArtContent(BinaryStream): self.mainStream = mainStream def dump(self): - print '<officeArtContent type="OfficeArtContent" offset="%d" size="%d bytes">' % (self.pos, self.size) + print('<officeArtContent type="OfficeArtContent" offset="%d" size="%d bytes">' % (self.pos, self.size)) msodraw.DggContainer(self, "DrawingGroupData").dumpXml(self, getWordModel(self.mainStream)) - print '<Drawings type="main" offset="%d">' % self.pos + print('<Drawings type="main" offset="%d">' % self.pos) OfficeArtWordDrawing(self).dump() - print '</Drawings>' + print('</Drawings>') if self.pos < self.mainStream.fcDggInfo + self.size: - print '<Drawings type="header" offset="%d">' % self.pos + print('<Drawings type="header" offset="%d">' % self.pos) OfficeArtWordDrawing(self).dump() - print '</Drawings>' + print('</Drawings>') assert self.pos == self.mainStream.fcDggInfo + self.size - print '</officeArtContent>' + print('</officeArtContent>') class ATNBE(BinaryStream): @@ -3153,11 +3157,11 @@ class ATNBE(BinaryStream): self.pos = sttbfAtnBkmk.pos def dump(self): - print '<atnbe type="ATNBE">' + print('<atnbe type="ATNBE">') self.printAndSet("bmc", self.readuInt16()) self.printAndSet("ITag", self.readuInt32()) self.printAndSet("ITagOld", self.readuInt32()) - print '</atnbe>' + print('</atnbe>') class SttbfAtnBkmk(BinaryStream): @@ -3168,19 +3172,19 @@ class SttbfAtnBkmk(BinaryStream): self.size = size def dump(self): - print '<sttbfAtnBkmk type="SttbfAtnBkmk" offset="%d" size="%d bytes">' % (self.pos, self.size) + print('<sttbfAtnBkmk type="SttbfAtnBkmk" offset="%d" size="%d bytes">' % (self.pos, self.size)) self.printAndSet("fExtended", self.readuInt16()) self.printAndSet("cData", self.readuInt16()) self.printAndSet("cbExtra", self.readuInt16()) for i in range(self.cData): cchData = self.readuInt16() - print '<cchData index="%d" offset="%d" size="%d bytes"/>' % (i, self.pos, cchData) - print '<extraData index="%d" offset="%d" size="%d bytes">' % (i, self.pos, ATNBE.size) + print('<cchData index="%d" offset="%d" size="%d bytes"/>' % (i, self.pos, cchData)) + print('<extraData index="%d" offset="%d" size="%d bytes">' % (i, self.pos, ATNBE.size)) atnbe = ATNBE(self) atnbe.dump() self.pos += ATNBE.size - print '</extraData>' - print '</sttbfAtnBkmk>' + print('</extraData>') + print('</sttbfAtnBkmk>') class Stshif(BinaryStream): @@ -3191,7 +3195,7 @@ class Stshif(BinaryStream): self.size = 18 def dump(self): - print '<stshif type="Stshif" offset="%d" size="%d bytes">' % (self.pos, self.size) + print('<stshif type="Stshif" offset="%d" size="%d bytes">' % (self.pos, self.size)) self.printAndSet("cstd", self.readuInt16()) self.printAndSet("cbSTDBaseInFile", self.readuInt16()) buf = self.readuInt16() @@ -3203,7 +3207,7 @@ class Stshif(BinaryStream): self.printAndSet("ftcAsci", self.readuInt16()) self.printAndSet("ftcFE", self.readuInt16()) self.printAndSet("ftcOther", self.readuInt16()) - print '</stshif>' + print('</stshif>') class LSD(BinaryStream): @@ -3230,14 +3234,14 @@ class StshiLsd(BinaryStream): self.pos = offset def dump(self): - print '<stshiLsd type="StshiLsd" offset="%d">' % (self.pos) + print('<stshiLsd type="StshiLsd" offset="%d">' % (self.pos)) self.printAndSet("cbLSD", self.readuInt16()) for i in range(self.stshi.stshif.stiMaxWhenSaved): - print '<mpstiilsd index="%d" type="LSD">' % i + print('<mpstiilsd index="%d" type="LSD">' % i) LSD(self.bytes, self.pos).dump() - print '</mpstiilsd>' + print('</mpstiilsd>') self.pos += self.cbLSD - print '</stshiLsd>' + print('</stshiLsd>') class STSHI(BinaryStream): @@ -3248,7 +3252,7 @@ class STSHI(BinaryStream): self.size = size def dump(self): - print '<stshi type="STSHI" offset="%d" size="%d bytes">' % (self.pos, self.size) + print('<stshi type="STSHI" offset="%d" size="%d bytes">' % (self.pos, self.size)) posOrig = self.pos self.stshif = Stshif(self.bytes, self.mainStream, self.pos) self.stshif.dump() @@ -3258,7 +3262,7 @@ class STSHI(BinaryStream): if self.pos - posOrig < self.size: stshiLsd = StshiLsd(self.bytes, self, self.pos) stshiLsd.dump() - print '</stshi>' + print('</stshi>') class LPStshi(BinaryStream): @@ -3268,12 +3272,12 @@ class LPStshi(BinaryStream): self.pos = offset def dump(self): - print '<lpstshi type="LPStshi" offset="%d">' % self.pos + print('<lpstshi type="LPStshi" offset="%d">' % self.pos) self.printAndSet("cbStshi", self.readuInt16(), hexdump=False) self.stshi = STSHI(self.bytes, self.mainStream, self.pos, self.cbStshi) self.stshi.dump() self.pos += self.cbStshi - print '</lpstshi>' + print('</lpstshi>') class StdfBase(BinaryStream): @@ -3284,7 +3288,7 @@ class StdfBase(BinaryStream): self.size = 10 def dump(self): - print '<stdfBase type="StdfBase" offset="%d" size="%d bytes">' % (self.pos, self.size) + print('<stdfBase type="StdfBase" offset="%d" size="%d bytes">' % (self.pos, self.size)) buf = self.readuInt16() self.printAndSet("sti", buf & 0x0fff) # 1..12th bits self.printAndSet("fScratch", self.getBit(buf, 13)) @@ -3299,14 +3303,14 @@ class StdfBase(BinaryStream): 3: "table", 4: "numbering" } - print '<stk value="%d" name="%s"/>' % (self.stk, stkmap[self.stk]) + print('<stk value="%d" name="%s"/>' % (self.stk, stkmap[self.stk])) self.printAndSet("istdBase", (buf & 0xfff0) >> 4) # 5..16th bits buf = self.readuInt16() self.printAndSet("cupx", buf & 0x000f) # 1..4th bits self.printAndSet("istdNext", (buf & 0xfff0) >> 4) # 5..16th bits self.printAndSet("bchUpe", self.readuInt16(), hexdump=False) GRFSTD(self).dump() - print '</stdfBase>' + print('</stdfBase>') class StdfPost2000(BinaryStream): @@ -3317,7 +3321,7 @@ class StdfPost2000(BinaryStream): self.size = 8 def dump(self): - print '<stdfPost2000 type="StdfPost2000" offset="%d" size="%d bytes">' % (self.pos, self.size) + print('<stdfPost2000 type="StdfPost2000" offset="%d" size="%d bytes">' % (self.pos, self.size)) buf = self.readuInt16() self.printAndSet("istdLink", buf & 0xfff) # 1..12th bits self.printAndSet("fHasOriginalStyle", self.getBit(buf, 13)) # 13th bit @@ -3327,7 +3331,7 @@ class StdfPost2000(BinaryStream): self.printAndSet("iftcHtml", buf & 0x7) # 1..3rd bits self.printAndSet("unused", self.getBit(buf, 4)) self.printAndSet("iPriority", (buf & 0xfff0) >> 4) # 5..16th bits - print '</stdfPost2000>' + print('</stdfPost2000>') class Stdf(BinaryStream): @@ -3338,20 +3342,20 @@ class Stdf(BinaryStream): self.pos = std.pos def dump(self): - print '<stdf type="Stdf" offset="%d">' % self.pos + print('<stdf type="Stdf" offset="%d">' % self.pos) self.stdfBase = StdfBase(self.bytes, self.mainStream, self.pos) self.stdfBase.dump() self.pos += self.stdfBase.size if self.pos - self.std.pos < self.std.size: stsh = self.std.lpstd.stsh # root of the stylesheet table cbSTDBaseInFile = stsh.lpstshi.stshi.stshif.cbSTDBaseInFile - print '<stdfPost2000OrNone cbSTDBaseInFile="%s">' % hex(cbSTDBaseInFile) + print('<stdfPost2000OrNone cbSTDBaseInFile="%s">' % hex(cbSTDBaseInFile)) if cbSTDBaseInFile == 0x0012: stdfPost2000 = StdfPost2000(self) stdfPost2000.dump() self.pos = stdfPost2000.pos - print '</stdfPost2000OrNone>' - print '</stdf>' + print('</stdfPost2000OrNone>') + print('</stdf>') class Xst(BinaryStream): @@ -3361,12 +3365,12 @@ class Xst(BinaryStream): self.pos = parent.pos def dump(self): - print '<xst type="Xst" offset="%d">' % self.pos + print('<xst type="Xst" offset="%d">' % self.pos) self.printAndSet("cch", self.readuInt16()) lowOnly = locale.getdefaultlocale()[1] == "UTF-8" - print '<rgtchar value="%s"/>' % globals.encodeName(self.bytes[self.pos:self.pos + 2 * self.cch].decode('utf-16'), lowOnly=lowOnly) + print('<rgtchar value="%s"/>' % globals.encodeName(self.bytes[self.pos:self.pos + 2 * self.cch].decode('utf-16'), lowOnly=lowOnly)) self.pos += 2 * self.cch - print '</xst>' + print('</xst>') class Xstz(BinaryStream): @@ -3377,12 +3381,12 @@ class Xstz(BinaryStream): self.name = name def dump(self): - print '<%s type="Xstz" offset="%d">' % (self.name, self.pos) + print('<%s type="Xstz" offset="%d">' % (self.name, self.pos)) xst = Xst(self) xst.dump() self.pos = xst.pos self.printAndSet("chTerm", self.readuInt16()) - print '</%s>' % self.name + print('</%s>' % self.name) class UpxPapx(BinaryStream): @@ -3393,17 +3397,17 @@ class UpxPapx(BinaryStream): self.pos = lPUpxPapx.pos def dump(self): - print '<upxPapx type="UpxPapx" offset="%d">' % self.pos + print('<upxPapx type="UpxPapx" offset="%d">' % self.pos) self.printAndSet("istd", self.readuInt16()) size = self.lPUpxPapx.cbUpx - 2 pos = 0 - print '<grpprlPapx offset="%d" size="%d bytes">' % (self.pos, size) + print('<grpprlPapx offset="%d" size="%d bytes">' % (self.pos, size)) while size - pos > 0: prl = Prl(self, self.pos + pos) prl.dump() pos += prl.getSize() - print '</grpprlPapx>' - print '</upxPapx>' + print('</grpprlPapx>') + print('</upxPapx>') class UpxChpx(BinaryStream): @@ -3414,16 +3418,16 @@ class UpxChpx(BinaryStream): self.pos = lPUpxChpx.pos def dump(self): - print '<upxChpx type="UpxChpx" offset="%d">' % self.pos + print('<upxChpx type="UpxChpx" offset="%d">' % self.pos) size = self.lPUpxChpx.cbUpx pos = 0 - print '<grpprlChpx offset="%d" size="%d bytes">' % (self.pos, size) + print('<grpprlChpx offset="%d" size="%d bytes">' % (self.pos, size)) while size - pos > 0: prl = Prl(self, self.pos + pos) prl.dump() pos += prl.getSize() - print '</grpprlChpx>' - print '</upxChpx>' + print('</grpprlChpx>') + print('</upxChpx>') class UpxTapx(BinaryStream): @@ -3434,16 +3438,16 @@ class UpxTapx(BinaryStream): self.pos = lPUpxTapx.pos def dump(self): - print '<upxTapx type="UpxTapx" offset="%d">' % self.pos + print('<upxTapx type="UpxTapx" offset="%d">' % self.pos) size = self.lPUpxTapx.cbUpx pos = 0 - print '<grpprlTapx offset="%d" size="%d bytes">' % (self.pos, size) + print('<grpprlTapx offset="%d" size="%d bytes">' % (self.pos, size)) while size - pos > 0: prl = Prl(self, self.pos + pos) prl.dump() pos += prl.getSize() - print '</grpprlTapx>' - print '</upxTapx>' + print('</grpprlTapx>') + print('</upxTapx>') class UPXPadding: @@ -3464,7 +3468,7 @@ class LPUpxPapx(BinaryStream): self.pos = stkParaGRLPUPX.pos def dump(self): - print '<lPUpxPapx type="LPUpxPapx" offset="%d">' % self.pos + print('<lPUpxPapx type="LPUpxPapx" offset="%d">' % self.pos) self.printAndSet("cbUpx", self.readuInt16()) upxPapx = UpxPapx(self) upxPapx.dump() @@ -3472,7 +3476,7 @@ class LPUpxPapx(BinaryStream): uPXPadding = UPXPadding(self) uPXPadding.pad() self.pos = uPXPadding.pos - print '</lPUpxPapx>' + print('</lPUpxPapx>') class LPUpxChpx(BinaryStream): @@ -3482,7 +3486,7 @@ class LPUpxChpx(BinaryStream): self.pos = stkParaGRLPUPX.pos def dump(self): - print '<lPUpxChpx type="LPUpxChpx" offset="%d">' % self.pos + print('<lPUpxChpx type="LPUpxChpx" offset="%d">' % self.pos) self.printAndSet("cbUpx", self.readuInt16()) upxChpx = UpxChpx(self) upxChpx.dump() @@ -3490,7 +3494,7 @@ class LPUpxChpx(BinaryStream): uPXPadding = UPXPadding(self) uPXPadding.pad() self.pos = uPXPadding.pos - print '</lPUpxChpx>' + print('</lPUpxChpx>') class LPUpxTapx(BinaryStream): @@ -3500,7 +3504,7 @@ class LPUpxTapx(BinaryStream): self.pos = stkParaGRLPUPX.pos def dump(self): - print '<lPUpxTapx type="LPUpxTapx" offset="%d">' % self.pos + print('<lPUpxTapx type="LPUpxTapx" offset="%d">' % self.pos) self.printAndSet("cbUpx", self.readuInt16()) upxTapx = UpxTapx(self) upxTapx.dump() @@ -3508,7 +3512,7 @@ class LPUpxTapx(BinaryStream): uPXPadding = UPXPadding(self) uPXPadding.pad() self.pos = uPXPadding.pos - print '</lPUpxTapx>' + print('</lPUpxTapx>') class StkListGRLPUPX(BinaryStream): @@ -3519,11 +3523,11 @@ class StkListGRLPUPX(BinaryStream): self.pos = grLPUpxSw.pos def dump(self): - print '<stkListGRLPUPX type="StkListGRLPUPX" offset="%d">' % self.pos + print('<stkListGRLPUPX type="StkListGRLPUPX" offset="%d">' % self.pos) lpUpxPapx = LPUpxPapx(self) lpUpxPapx.dump() self.pos = lpUpxPapx.pos - print '</stkListGRLPUPX>' + print('</stkListGRLPUPX>') class StkTableGRLPUPX(BinaryStream): @@ -3534,7 +3538,7 @@ class StkTableGRLPUPX(BinaryStream): self.pos = grLPUpxSw.pos def dump(self): - print '<stkTableGRLPUPX type="StkTableGRLPUPX" offset="%d">' % self.pos + print('<stkTableGRLPUPX type="StkTableGRLPUPX" offset="%d">' % self.pos) lpUpxTapx = LPUpxTapx(self) lpUpxTapx.dump() self.pos = lpUpxTapx.pos @@ -3544,7 +3548,7 @@ class StkTableGRLPUPX(BinaryStream): lpUpxChpx = LPUpxChpx(self) lpUpxChpx.dump() self.pos = lpUpxChpx.pos - print '</stkTableGRLPUPX>' + print('</stkTableGRLPUPX>') class StkCharGRLPUPX(BinaryStream): @@ -3556,14 +3560,14 @@ class StkCharGRLPUPX(BinaryStream): self.grLPUpxSw = grLPUpxSw def dump(self): - print '<stkCharGRLPUPX type="StkCharGRLPUPX" offset="%d">' % self.pos + print('<stkCharGRLPUPX type="StkCharGRLPUPX" offset="%d">' % self.pos) if self.grLPUpxSw.std.stdf.stdfBase.cupx == 1: lpUpxChpx = LPUpxChpx(self) lpUpxChpx.dump() self.pos = lpUpxChpx.pos else: - print '<todo what="StkCharGRLPUPX: cupx != 1"/>' - print '</stkCharGRLPUPX>' + print('<todo what="StkCharGRLPUPX: cupx != 1"/>') + print('</stkCharGRLPUPX>') class StkParaGRLPUPX(BinaryStream): @@ -3575,7 +3579,7 @@ class StkParaGRLPUPX(BinaryStream): self.grLPUpxSw = grLPUpxSw def dump(self): - print '<stkParaGRLPUPX type="StkParaGRLPUPX" offset="%d">' % self.pos + print('<stkParaGRLPUPX type="StkParaGRLPUPX" offset="%d">' % self.pos) if self.grLPUpxSw.std.stdf.stdfBase.cupx == 2: lPUpxPapx = LPUpxPapx(self) lPUpxPapx.dump() @@ -3584,8 +3588,8 @@ class StkParaGRLPUPX(BinaryStream): lpUpxChpx.dump() self.pos = lpUpxChpx.pos else: - print '<todo what="StkParaGRLPUPX: cupx != 2"/>' - print '</stkParaGRLPUPX>' + print('<todo what="StkParaGRLPUPX: cupx != 2"/>') + print('</stkParaGRLPUPX>') class GrLPUpxSw(BinaryStream): @@ -3617,7 +3621,7 @@ class STD(BinaryStream): self.size = lpstd.cbStd def dump(self): - print '<std type="STD" offset="%d" size="%d bytes">' % (self.pos, self.size) + print('<std type="STD" offset="%d" size="%d bytes">' % (self.pos, self.size)) self.stdf = Stdf(self) self.stdf.dump() self.pos = self.stdf.pos @@ -3628,7 +3632,7 @@ class STD(BinaryStream): grLPUpxSw = GrLPUpxSw(self) grLPUpxSw.dump() self.pos = grLPUpxSw.pos - print '</std>' + print('</std>') class LPStd(BinaryStream): @@ -3656,17 +3660,17 @@ class STSH(BinaryStream): self.size = size def dump(self): - print '<stsh type="STSH" offset="%d" size="%d bytes">' % (self.pos, self.size) + print('<stsh type="STSH" offset="%d" size="%d bytes">' % (self.pos, self.size)) self.lpstshi = LPStshi(self.bytes, self.mainStream, self.pos) self.lpstshi.dump() self.pos = self.lpstshi.pos for i in range(self.lpstshi.stshi.stshif.cstd): - print '<rglpstd index="%d" type="LPStd" offset="%d">' % (i, self.pos) + print('<rglpstd index="%d" type="LPStd" offset="%d">' % (i, self.pos)) lpstd = LPStd(self) lpstd.dump() self.pos = lpstd.pos - print '</rglpstd>' - print '</stsh>' + print('</rglpstd>') + print('</stsh>') class Rca(BinaryStream): @@ -3677,12 +3681,12 @@ class Rca(BinaryStream): self.pos = parent.pos def dump(self): - print '<rca type="Rca" offset="%s">' % self.pos + print('<rca type="Rca" offset="%s">' % self.pos) self.printAndSet("left", self.readuInt32()) self.printAndSet("top", self.readuInt32()) self.printAndSet("right", self.readuInt32()) self.printAndSet("bottom", self.readuInt32()) - print '</rca>' + print('</rca>') self.parent.pos = self.pos @@ -3697,7 +3701,7 @@ class SPA(BinaryStream): def dump(self): pos = self.pos - print '<spa type="SPA" offset="%s" size="%d bytes">' % (self.pos, SPA.size) + print('<spa type="SPA" offset="%s" size="%d bytes">' % (self.pos, SPA.size)) self.printAndSet("lid", self.readuInt32()) Rca(self).dump() buf = self.readuInt16() @@ -3710,7 +3714,7 @@ class SPA(BinaryStream): self.printAndSet("fBelowText", self.getBit(buf, 14)) # 15th bit self.printAndSet("fAnchorLock", self.getBit(buf, 15)) # 16th bit self.printAndSet("cTxbx", self.readuInt32()) - print '</spa>' + print('</spa>') assert pos + SPA.size == self.pos @@ -3738,16 +3742,16 @@ class SPLS(BinaryStream): 0xC: "splfUnknownWord", } buf = self.readuInt16() - print '<spls type="SPLS" offset="%d" size="%d bytes" value="%s">' % (self.pos, SPLS.size, hex(buf)) + print('<spls type="SPLS" offset="%d" size="%d bytes" value="%s">' % (self.pos, SPLS.size, hex(buf))) self.printAndSet("splf", buf & 0x000f, end=False) # 1..4th bits if self.splf in splfMap: - print '<transformed name="%s"/>' % splfMap[self.splf] - print '</splf>' + print('<transformed name="%s"/>' % splfMap[self.splf]) + print('</splf>') self.printAndSet("fError", self.getBit(buf, 4)) self.printAndSet("fExtend", self.getBit(buf, 5)) self.printAndSet("fTypo", self.getBit(buf, 6)) self.printAndSet("unused", (buf & 0xff80) >> 7) # 8..16th bits - print '</spls>' + print('</spls>') class PlcfSpl(BinaryStream, PLC): @@ -3759,22 +3763,22 @@ class PlcfSpl(BinaryStream, PLC): self.size = mainStream.lcbPlcfSpl def dump(self): - print '<plcfSpl type="PlcfSpl" offset="%d" size="%d bytes">' % (self.pos, self.size) + print('<plcfSpl type="PlcfSpl" offset="%d" size="%d bytes">' % (self.pos, self.size)) pos = self.pos for i in range(self.getElements()): # aCp start = self.getuInt32(pos=pos) end = self.getuInt32(pos=pos + 4) - print '<aCP index="%d" start="%d" end="%d">' % (i, start, end) + print('<aCP index="%d" start="%d" end="%d">' % (i, start, end)) pos += 4 # aSpellingSpls aSpellingSpls = SPLS("SpellingSpls", self, self.getOffset(self.pos, i)) aSpellingSpls.dump() - print '<transformed value="%s"/>' % self.quoteAttr(self.mainStream.retrieveCPs(start, end)) - print '</aCP>' - print '</plcfSpl>' + print('<transformed value="%s"/>' % self.quoteAttr(self.mainStream.retrieveCPs(start, end))) + print('</aCP>') + print('</plcfSpl>') class FTXBXNonReusable(BinaryStream): @@ -3787,10 +3791,10 @@ class FTXBXNonReusable(BinaryStream): self.pos = parent.pos def dump(self): - print '<ftxbxsunion type="FTXBXNonReusable" offset="%d" size="8 bytes">' % (self.pos) + print('<ftxbxsunion type="FTXBXNonReusable" offset="%d" size="8 bytes">' % (self.pos)) self.printAndSet("cTxbx", self.readuInt32()) self.printAndSet("cTxbxEdit", self.readuInt32()) - print '</ftxbxsunion>' + print('</ftxbxsunion>') self.parent.pos = self.pos @@ -3803,10 +3807,10 @@ class FTXBXSReusable(BinaryStream): self.pos = parent.pos def dump(self): - print '<ftxbxsunion type="FTXBXReusable" offset="%d" size="8 bytes">' % (self.pos) + print('<ftxbxsunion type="FTXBXReusable" offset="%d" size="8 bytes">' % (self.pos)) self.printAndSet("iNextReuse", self.readuInt32()) self.printAndSet("cReusable", self.readuInt32()) - print '</ftxbxsunion>' + print('</ftxbxsunion>') self.parent.pos = self.pos @@ -3821,7 +3825,7 @@ class FTXBXS(BinaryStream): self.pos = self.posOrig = offset def dump(self): - print '<aFTXBXS type="FTXBXS" offset="%d" size="%d bytes">' % (self.pos, FTXBXS.size) + print('<aFTXBXS type="FTXBXS" offset="%d" size="%d bytes">' % (self.pos, FTXBXS.size)) self.fReusable = self.getuInt16(pos=self.pos + 8) if self.fReusable: FTXBXSReusable(self).dump() @@ -3831,7 +3835,7 @@ class FTXBXS(BinaryStream): self.printAndSet("itxbxsDest", self.readuInt32()) self.printAndSet("lid", self.readuInt32()) self.printAndSet("txidUndo", self.readuInt32()) - print '</aFTXBXS>' + print('</aFTXBXS>') if not self.fReusable: assert self.posOrig + FTXBXS.size == self.pos @@ -3845,23 +3849,23 @@ class PlcftxbxTxt(BinaryStream, PLC): self.size = mainStream.lcbPlcftxbxTxt def dump(self): - print '<plcftxbxTxt type="PlcftxbxTxt" offset="%d" size="%d bytes">' % (self.pos, self.size) + print('<plcftxbxTxt type="PlcftxbxTxt" offset="%d" size="%d bytes">' % (self.pos, self.size)) offset = self.mainStream.getHeaderOffset() pos = self.pos for i in range(self.getElements() - 1): # aCp start = self.getuInt32(pos=pos) end = self.getuInt32(pos=pos + 4) - print '<aCP index="%d" start="%d" end="%d">' % (i, start, end) + print('<aCP index="%d" start="%d" end="%d">' % (i, start, end)) pos += 4 # aFTXBXS aFTXBXS = FTXBXS(self, self.getOffset(self.pos, i)) aFTXBXS.dump() - print '<transformed value="%s"/>' % self.quoteAttr(self.mainStream.retrieveCPs(offset + start, offset + end)) - print '</aCP>' - print '</plcftxbxTxt>' + print('<transformed value="%s"/>' % self.quoteAttr(self.mainStream.retrieveCPs(offset + start, offset + end))) + print('</aCP>') + print('</plcftxbxTxt>') class Tbkd(BinaryStream): @@ -3875,7 +3879,7 @@ class Tbkd(BinaryStream): self.pos = self.posOrig = offset def dump(self): - print '<aTbkd type="Tbkd" offset="%d" size="%d bytes">' % (self.pos, Tbkd.size) + print('<aTbkd type="Tbkd" offset="%d" size="%d bytes">' % (self.pos, Tbkd.size)) self.printAndSet("itxbxs", self.readuInt16()) self.printAndSet("dcpDepend", self.readuInt16()) buf = self.readuInt16() @@ -3884,7 +3888,7 @@ class Tbkd(BinaryStream): self.printAndSet("fUnk", self.getBit(buf, 11)) self.printAndSet("fTextOverflow", self.getBit(buf, 12)) self.printAndSet("reserved2", (buf & 0xe000) >> 13) # 14..16th bits - print '</aTbkd>' + print('</aTbkd>') assert self.posOrig + Tbkd.size == self.pos @@ -3897,21 +3901,21 @@ class PlcftxbxBkd(BinaryStream, PLC): self.size = mainStream.lcbPlcfTxbxBkd def dump(self): - print '<plcftxbxBkd type="PlcftxbxBkd" offset="%d" size="%d bytes">' % (self.pos, self.size) + print('<plcftxbxBkd type="PlcftxbxBkd" offset="%d" size="%d bytes">' % (self.pos, self.size)) offset = self.mainStream.getHeaderOffset() pos = self.pos for i in range(self.getElements()): # aCp start = self.getuInt32(pos=pos) end = self.getuInt32(pos=pos + 4) - print '<aCP index="%d" start="%d" end="%d">' % (i, start, end) + print('<aCP index="%d" start="%d" end="%d">' % (i, start, end)) pos += 4 # aTbkd Tbkd(self, self.getOffset(self.pos, i)).dump() - print '<transformed value="%s"/>' % self.quoteAttr(self.mainStream.retrieveCPs(offset + start, offset + end)) - print '</aCP>' - print '</plcftxbxBkd>' + print('<transformed value="%s"/>' % self.quoteAttr(self.mainStream.retrieveCPs(offset + start, offset + end))) + print('</aCP>') + print('</plcftxbxBkd>') class PlcfSpa(BinaryStream, PLC): @@ -3924,22 +3928,22 @@ class PlcfSpa(BinaryStream, PLC): self.size = size def dump(self): - print '<plcfSpa type="PlcfSpa" offset="%d" size="%d bytes">' % (self.pos, self.size) + print('<plcfSpa type="PlcfSpa" offset="%d" size="%d bytes">' % (self.pos, self.size)) pos = self.pos for i in range(self.getElements()): # aCp start = self.getuInt32(pos=pos) end = self.getuInt32(pos=pos + 4) - print '<aCP index="%d" start="%d" end="%d">' % (i, start, end) + print('<aCP index="%d" start="%d" end="%d">' % (i, start, end)) pos += 4 # aSpa aSpa = SPA(self, self.getOffset(self.pos, i)) aSpa.dump() - print '<transformed value="%s"/>' % self.quoteAttr(self.mainStream.retrieveCPs(start, end)) - print '</aCP>' - print '</plcfSpa>' + print('<transformed value="%s"/>' % self.quoteAttr(self.mainStream.retrieveCPs(start, end))) + print('</aCP>') + print('</plcfSpa>') class PlcfGram(BinaryStream, PLC): @@ -3951,22 +3955,22 @@ class PlcfGram(BinaryStream, PLC): self.size = mainStream.lcbPlcfGram def dump(self): - print '<plcfGram type="PlcfGram" offset="%d" size="%d bytes">' % (self.pos, self.size) + print('<plcfGram type="PlcfGram" offset="%d" size="%d bytes">' % (self.pos, self.size)) pos = self.pos for i in range(self.getElements()): # aCp start = self.getuInt32(pos=pos) end = self.getuInt32(pos=pos + 4) - print '<aCP index="%d" start="%d" end="%d">' % (i, start, end) + print('<aCP index="%d" start="%d" end="%d">' % (i, start, end)) pos += 4 # aGrammarSpls aGrammarSpls = SPLS("GrammarSpls", self, self.getOffset(self.pos, i)) aGrammarSpls.dump() - print '<transformed value="%s"/>' % self.quoteAttr(self.mainStream.retrieveCPs(start, end)) - print '</aCP>' - print '</plcfGram>' + print('<transformed value="%s"/>' % self.quoteAttr(self.mainStream.retrieveCPs(start, end))) + print('</aCP>') + print('</plcfGram>') class Grfhic(BinaryStream): @@ -3978,7 +3982,7 @@ class Grfhic(BinaryStream): self.parent = parent def dump(self): - print '<grfhic type="grfhic">' + print('<grfhic type="grfhic">') buf = self.readuInt8() self.printAndSet("fhicChecked", self.getBit(buf, 0)) self.printAndSet("fhicFormat", self.getBit(buf, 1)) @@ -3989,7 +3993,7 @@ class Grfhic(BinaryStream): self.printAndSet("unused", self.getBit(buf, 6)) self.printAndSet("fhicBullet", self.getBit(buf, 7)) self.parent.pos = self.pos - print '</grfhic>' + print('</grfhic>') class LSTF(BinaryStream): @@ -4001,11 +4005,11 @@ class LSTF(BinaryStream): self.index = index def dump(self): - print '<lstf type="LSTF" index="%d" offset="%d" size="%d bytes">' % (self.index, self.pos, self.size) + print('<lstf type="LSTF" index="%d" offset="%d" size="%d bytes">' % (self.index, self.pos, self.size)) self.printAndSet("lsid", self.readInt32()) self.printAndSet("tplc", self.readInt32()) for i in range(9): - print '<rgistdPara index="%d" value="%s"/>' % (i, self.readInt16()) + print('<rgistdPara index="%d" value="%s"/>' % (i, self.readInt16())) buf = self.readuInt8() self.printAndSet("fSimpleList", self.getBit(buf, 0)) self.printAndSet("unused1", self.getBit(buf, 1)) @@ -4014,7 +4018,7 @@ class LSTF(BinaryStream): self.printAndSet("fHybrid", self.getBit(buf, 4)) self.printAndSet("reserved1", (buf & 0xe0) >> 5) # 6..8th bits Grfhic(self).dump() - print '</lstf>' + print('</lstf>') class LVLF(BinaryStream): @@ -4024,7 +4028,7 @@ class LVLF(BinaryStream): self.pos = lvl.pos def dump(self): - print '<lvlf type="LVLF" offset="%d">' % self.pos + print('<lvlf type="LVLF" offset="%d">' % self.pos) self.printAndSet("iStartAt", self.readInt32()) self.printAndSet("nfc", self.readuInt8()) buf = self.readuInt8() @@ -4036,7 +4040,7 @@ class LVLF(BinaryStream): self.printAndSet("unused1", self.getBit(buf, 6)) self.printAndSet("fTentative", self.getBit(buf, 7)) for i in range(9): - print '<rgrgbxchNums index="%d" value="%s"/>' % (i, self.readuInt8()) + print('<rgrgbxchNums index="%d" value="%s"/>' % (i, self.readuInt8())) self.printAndSet("ixchFollow", self.readuInt8()) self.printAndSet("dxaIndentSav", self.readInt32()) self.printAndSet("unused2", self.readuInt32()) @@ -4044,7 +4048,7 @@ class LVLF(BinaryStream): self.printAndSet("cbGrpprlPapx", self.readuInt8()) self.printAndSet("ilvlRestartLim", self.readuInt8()) Grfhic(self).dump() - print '</lvlf>' + print('</lvlf>') class LVL(BinaryStream): @@ -4055,32 +4059,32 @@ class LVL(BinaryStream): self.index = index def dump(self): - print '<lvl type="LVL" index="%d" offset="%d">' % (self.index, self.pos) + print('<lvl type="LVL" index="%d" offset="%d">' % (self.index, self.pos)) lvlf = LVLF(self) lvlf.dump() self.pos = lvlf.pos - print '<grpprlPapx offset="%d">' % self.pos + print('<grpprlPapx offset="%d">' % self.pos) pos = self.pos while (lvlf.cbGrpprlPapx - (pos - self.pos)) > 0: prl = Prl(self, pos) prl.dump() pos += prl.getSize() self.pos = pos - print '</grpprlPapx>' + print('</grpprlPapx>') - print '<grpprlChpx offset="%d">' % self.pos + print('<grpprlChpx offset="%d">' % self.pos) pos = self.pos while (lvlf.cbGrpprlChpx - (pos - self.pos)) > 0: prl = Prl(self, pos) prl.dump() pos += prl.getSize() self.pos = pos - print '</grpprlChpx>' + print('</grpprlChpx>') xst = Xst(self) xst.dump() self.pos = xst.pos - print '</lvl>' + print('</lvl>') class PlfLst(BinaryStream): @@ -4091,7 +4095,7 @@ class PlfLst(BinaryStream): self.size = mainStream.lcbPlfLst def dump(self): - print '<plfLst type="PlfLst" offset="%d" size="%d bytes">' % (self.pos, self.size) + print('<plfLst type="PlfLst" offset="%d" size="%d bytes">' % (self.pos, self.size)) self.printAndSet("cLst", self.readInt16()) cLvl = 0 for i in range(self.cLst): @@ -4106,7 +4110,7 @@ class PlfLst(BinaryStream): lvl = LVL(self, i) lvl.dump() self.pos = lvl.pos - print '</plfLst>' + print('</plfLst>') class LFO(BinaryStream): @@ -4118,7 +4122,7 @@ class LFO(BinaryStream): self.index = index def dump(self): - print '<%s type="LFO" index="%s" offset="%d">' % (self.name, self.index, self.pos) + print('<%s type="LFO" index="%s" offset="%d">' % (self.name, self.index, self.pos)) self.printAndSet("lsid", self.readInt32()) self.printAndSet("unused1", self.readuInt32()) self.printAndSet("unused2", self.readuInt32()) @@ -4126,7 +4130,7 @@ class LFO(BinaryStream): self.printAndSet("ibstFltAutoNum", self.readuInt8()) Grfhic(self).dump() self.printAndSet("unused3", self.readuInt8()) - print '</%s>' % self.name + print('</%s>' % self.name) class LFOData(BinaryStream): @@ -4137,11 +4141,11 @@ class LFOData(BinaryStream): self.lfo = lfo def dump(self): - print '<lfoData type="LFOData" offset="%d">' % self.pos + print('<lfoData type="LFOData" offset="%d">' % self.pos) self.printAndSet("cp", self.readuInt32()) if self.lfo.clfolvl > 0: - print '<todo what="LFOData: clfolvl != 0"/>' - print '</lfoData>' + print('<todo what="LFOData: clfolvl != 0"/>') + print('</lfoData>') class PlfLfo(BinaryStream): @@ -4152,7 +4156,7 @@ class PlfLfo(BinaryStream): self.size = mainStream.lcbPlfLfo def dump(self): - print '<plfLfo type="PlfLfo" offset="%d" size="%d bytes">' % (self.pos, self.size) + print('<plfLfo type="PlfLfo" offset="%d" size="%d bytes">' % (self.pos, self.size)) self.printAndSet("lfoMac", self.readInt32()) lfos = [] for i in range(self.lfoMac): @@ -4164,7 +4168,7 @@ class PlfLfo(BinaryStream): lfoData = LFOData(self, lfos[i]) lfoData.dump() self.pos = lfoData.pos - print '</plfLfo>' + print('</plfLfo>') class SttbListNames(BinaryStream): @@ -4175,17 +4179,17 @@ class SttbListNames(BinaryStream): self.size = mainStream.lcbSttbListNames def dump(self): - print '<sttbListNames type="SttbListNames" offset="%d" size="%d bytes">' % (self.pos, self.size) + print('<sttbListNames type="SttbListNames" offset="%d" size="%d bytes">' % (self.pos, self.size)) self.printAndSet("fExtend", self.readuInt16()) self.printAndSet("cData", self.readuInt16()) self.printAndSet("cbExtra", self.readuInt16()) for i in range(self.cData): cchData = self.readuInt16() - print '<cchData index="%s" offset="%d" size="%d bytes">' % (i, self.pos, cchData) - print '<string value="%s"/>' % globals.encodeName(self.bytes[self.pos:self.pos + 2 * cchData].decode('utf-16'), lowOnly=True) + print('<cchData index="%s" offset="%d" size="%d bytes">' % (i, self.pos, cchData)) + print('<string value="%s"/>' % globals.encodeName(self.bytes[self.pos:self.pos + 2 * cchData].decode('utf-16'), lowOnly=True)) self.pos += 2 * cchData - print '</cchData>' - print '</sttbListNames>' + print('</cchData>') + print('</sttbListNames>') class PBString(BinaryStream): @@ -4199,9 +4203,9 @@ class PBString(BinaryStream): def dump(self): if self.index is None: - print '<%s type="PBString">' % self.name + print('<%s type="PBString">' % self.name) else: - print '<%s type="PBString" index="%s">' % (self.name, self.index) + print('<%s type="PBString" index="%s">' % (self.name, self.index)) buf = self.readuInt16() self.printAndSet("cch", buf & 0x7fff) # bits 1..15 self.printAndSet("fAnsiString", self.getBit(buf, 15)) @@ -4221,7 +4225,7 @@ class PBString(BinaryStream): encoding = "utf-16" self.printAndSet("rgxch", globals.encodeName("".join(map(lambda c: chr(c), bytes)).decode(encoding), lowOnly=True).encode('utf-8'), hexdump=False) - print '</%s>' % self.name + print('</%s>' % self.name) self.parent.pos = self.pos @@ -4233,7 +4237,7 @@ class FactoidType(BinaryStream): self.pos = parent.pos def dump(self): - print '<factoidType>' + print('<factoidType>') self.printAndSet("cbFactoid", self.readuInt32()) self.printAndSet("id", self.readuInt32()) self.rgbUri = PBString(self, "rgbUri") @@ -4242,7 +4246,7 @@ class FactoidType(BinaryStream): self.rgbTag.dump() self.rgbDownLoadURL = PBString(self, "rgbDownLoadURL") self.rgbDownLoadURL.dump() - print '</factoidType>' + print('</factoidType>') self.parent.pos = self.pos @@ -4255,29 +4259,29 @@ class PropertyBagStore(BinaryStream): self.pos = parent.pos def dump(self): - print '<propBagStore type="PropertyBagStore" offset="%s">' % self.pos + print('<propBagStore type="PropertyBagStore" offset="%s">' % self.pos) self.printAndSet("cFactoidType", self.readuInt32()) - print '<factoidTypes>' + print('<factoidTypes>') self.factoidTypes = [] for i in range(self.cFactoidType): factoidType = FactoidType(self) factoidType.dump() self.factoidTypes.append(factoidType) - print '</factoidTypes>' + print('</factoidTypes>') self.printAndSet("cbHdr", self.readuInt16()) assert self.cbHdr == 0xc self.printAndSet("sVer", self.readuInt16()) assert self.sVer == 0x0100 self.printAndSet("cfactoid", self.readuInt32()) self.printAndSet("cste", self.readuInt32()) - print '<stringTable>' + print('<stringTable>') self.stringTable = [] for i in range(self.cste): string = PBString(self, "stringTable", index=i) string.dump() self.stringTable.append(string) - print '</stringTable>' - print '</propBagStore>' + print('</stringTable>') + print('</propBagStore>') self.parent.pos = self.pos @@ -4291,10 +4295,10 @@ class Property(BinaryStream): self.index = index def dump(self): - print '<property type="Property" offset="%s" index="%s">' % (self.pos, self.index) + print('<property type="Property" offset="%s" index="%s">' % (self.pos, self.index)) self.printAndSet("keyIndex", self.readuInt32(), hexdump=False) self.printAndSet("valueIndex", self.readuInt32(), hexdump=False) - print '</property>' + print('</property>') self.parent.pos = self.pos @@ -4307,13 +4311,13 @@ class PropertyBag(BinaryStream): self.index = index def dump(self): - print '<propBag type="PropertyBag" offset="%s" index="%s">' % (self.pos, self.index) + print('<propBag type="PropertyBag" offset="%s" index="%s">' % (self.pos, self.index)) self.printAndSet("id", self.readuInt16()) self.printAndSet("cProp", self.readuInt16()) self.printAndSet("cbUnknown", self.readuInt16()) for i in range(self.cProp): Property(self, i).dump() - print '</propBag>' + print('</propBag>') self.parent.pos = self.pos @@ -4327,7 +4331,7 @@ class SmartTagData(BinaryStream): def dump(self): posOrig = self.pos - print '<smartTagData type="SmartTagData" offset="%d" size="%d bytes">' % (self.pos, self.size) + print('<smartTagData type="SmartTagData" offset="%d" size="%d bytes">' % (self.pos, self.size)) self.propBagStore = PropertyBagStore(self) self.propBagStore.dump() i = 0 @@ -4335,7 +4339,7 @@ class SmartTagData(BinaryStream): self.propBag = PropertyBag(self, i) self.propBag.dump() i += 1 - print '</smartTagData>' + print('</smartTagData>') class SttbSavedBy(BinaryStream): @@ -4346,20 +4350,20 @@ class SttbSavedBy(BinaryStream): self.size = mainStream.lcbSttbSavedBy def dump(self): - print '<sttbSavedBy type="SttbSavedBy" offset="%d" size="%d">' % (self.pos, self.size) + print('<sttbSavedBy type="SttbSavedBy" offset="%d" size="%d">' % (self.pos, self.size)) self.printAndSet("fExtend", self.readuInt16()) self.printAndSet("cData", self.readuInt16()) self.printAndSet("cbExtra", self.readuInt16()) for i in range(self.cData): cchData = self.readuInt16() - print '<cchData index="%s" offset="%d" size="%d bytes">' % (i, self.pos, cchData) - print '<string value="%s"/>' % globals.encodeName(self.bytes[self.pos:self.pos + 2 * cchData].decode('utf-16'), lowOnly=True) + print('<cchData index="%s" offset="%d" size="%d bytes">' % (i, self.pos, cchData)) + print('<string value="%s"/>' % globals.encodeName(self.bytes[self.pos:self.pos + 2 * cchData].decode('utf-16'), lowOnly=True)) self.pos += 2 * cchData - print '</cchData>' + print('</cchData>') # Probably this was cleared manually. if self.cData != 0: assert self.pos == self.mainStream.fcSttbSavedBy + self.size - print '</sttbSavedBy>' + print('</sttbSavedBy>') class SttbfBkmk(BinaryStream): @@ -4371,18 +4375,18 @@ class SttbfBkmk(BinaryStream): self.mainStream = mainStream def dump(self): - print '<sttbfBkmk type="SttbfBkmk" offset="%d" size="%d bytes">' % (self.pos, self.size) + print('<sttbfBkmk type="SttbfBkmk" offset="%d" size="%d bytes">' % (self.pos, self.size)) self.printAndSet("fExtended", self.readuInt16()) self.printAndSet("cData", self.readuInt16()) self.printAndSet("cbExtra", self.readuInt16()) for i in range(self.cData): cchData = self.readuInt16() - print '<cchData index="%d" offset="%d" size="%d bytes">' % (i, self.pos, cchData) - print '<string value="%s"/>' % globals.encodeName(self.bytes[self.pos:self.pos + 2 * cchData].decode('utf-16'), lowOnly=True) + print('<cchData index="%d" offset="%d" size="%d bytes">' % (i, self.pos, cchData)) + print('<string value="%s"/>' % globals.encodeName(self.bytes[self.pos:self.pos + 2 * cchData].decode('utf-16'), lowOnly=True)) self.pos += 2 * cchData - print '</cchData>' + print('</cchData>') assert self.pos == self.mainStream.fcSttbfBkmk + self.size - print '</sttbfBkmk>' + print('</sttbfBkmk>') # The FTO enumerated type identifies the feature that is responsible to create @@ -4405,14 +4409,14 @@ class FACTOIDINFO(BinaryStream): self.index = index def dump(self): - print '<factoidinfo index="%s">' % self.index + print('<factoidinfo index="%s">' % self.index) self.printAndSet("dwId", self.readuInt32()) buf = self.readuInt16() self.printAndSet("fSubEntry", self.getBit(buf, 0)) self.printAndSet("fUnused", (buf & 0xfffe) >> 1) # 2..16th bits self.printAndSet("fto", self.readuInt16(), dict=FTO) self.printAndSet("pfpb", self.readuInt32()) - print '</factoidinfo>' + print('</factoidinfo>') self.parent.pos = self.pos @@ -4425,7 +4429,7 @@ class SttbfBkmkFactoid(BinaryStream): self.mainStream = mainStream def dump(self): - print '<sttbfBkmkFactoid type="SttbfBkmkFactoid" offset="%d" size="%d bytes">' % (self.pos, self.size) + print('<sttbfBkmkFactoid type="SttbfBkmkFactoid" offset="%d" size="%d bytes">' % (self.pos, self.size)) self.printAndSet("fExtended", self.readuInt16()) assert self.fExtended == 0xffff self.printAndSet("cData", self.readuInt16()) @@ -4436,6 +4440,6 @@ class SttbfBkmkFactoid(BinaryStream): assert self.cchData == 0x6 FACTOIDINFO(self, i).dump() assert self.pos == self.mainStream.fcSttbfBkmkFactoid + self.size - print '</sttbfBkmkFactoid>' + print('</sttbfBkmkFactoid>') # vim:set filetype=python shiftwidth=4 softtabstop=4 expandtab: diff --git a/msodumper/docstream.py b/msodumper/docstream.py index 1bb8c85..4c294c8 100644 --- a/msodumper/docstream.py +++ b/msodumper/docstream.py @@ -5,16 +5,16 @@ # file, You can obtain one at http://mozilla.org/MPL/2.0/. # -import ole +from . import ole import ctypes -from binarystream import BinaryStream -import docrecord -import globals +from .binarystream import BinaryStream +from . import docrecord +from . import globals import sys import os import bisect -from msometa import SummaryInformationStream -from msometa import DocumentSummaryInformationStream +from .msometa import SummaryInformationStream +from .msometa import DocumentSummaryInformationStream class DOCFile: @@ -25,14 +25,14 @@ class DOCFile: self.params = params self.error = None - if ord(self.chars[0]) == 0xD0 and ord(self.chars[1]) == 0xCF and ord(self.chars[2]) == 0x11 and ord(self.chars[3]) == 0xE0: + if globals.indexbytes(self.chars, 0) == 0xD0 and globals.indexbytes(self.chars, 1) == 0xCF and globals.indexbytes(self.chars, 2) == 0x11 and globals.indexbytes(self.chars, 3) == 0xE0: self.initWW8() else: - print '<?xml version="1.0"?>' - if ord(self.chars[0]) == 0xDB and ord(self.chars[1]) == 0xA5: - print '<todo what="handle v6 of the doc format"/>' + print('<?xml version="1.0"?>') + if globals.indexbytes(self.chars, 0) == 0xDB and globals.indexbytes(self.chars, 1) == 0xA5: + print('<todo what="handle v6 of the doc format"/>') else: - print '<todo what="unhandled magic"/>' + print('<todo what="unhandled magic"/>') sys.exit(0) def initWW8(self): @@ -53,13 +53,13 @@ class DOCFile: return self.getStreamFromBytes(name, bytes) def getStreamFromBytes(self, name, bytes): - if name == "WordDocument": + if name == b"WordDocument": return WordDocumentStream(bytes, self.params, doc=self) - elif name in ("0Table", "1Table"): + elif name in (b"0Table", b"1Table"): return TableStream(bytes, self.params, name, doc=self) - elif name == "\x05SummaryInformation": + elif name == b"\x05SummaryInformation": return SummaryInformationStream(bytes, self.params, doc=self) - elif name == "\x05DocumentSummaryInformation": + elif name == b"\x05DocumentSummaryInformation": return DocumentSummaryInformationStream(bytes, self.params, doc=self) else: return BinaryStream(bytes, self.params, name, doc=self) @@ -137,7 +137,7 @@ class TableStream(BinaryStream): BinaryStream.__init__(self, bytes, params, name, doc=doc) def dump(self): - print '<stream name="%s" size="%s"/>' % (self.name, self.size) + print('<stream name="%s" size="%s"/>' % (self.name, self.size)) class WordDocumentStream(BinaryStream): @@ -145,14 +145,14 @@ class WordDocumentStream(BinaryStream): BinaryStream.__init__(self, bytes, params, "WordDocument", doc=doc) def dump(self): - print '<stream name="WordDocument" size="%d">' % self.size + print('<stream name="WordDocument" size="%d">' % self.size) self.dumpFib() - print '</stream>' + print('</stream>') def dumpFib(self): - print '<fib>' + print('<fib>') if not self.dumpFibBase("base"): - print '</fib>' + print('</fib>') return self.printAndSet("csw", self.readuInt16()) self.dumpFibRgW97("fibRgW") @@ -162,7 +162,7 @@ class WordDocumentStream(BinaryStream): self.blobOffset = self.pos cswNew = self.getuInt16(pos=self.__getCswNewOffset()) - print '<debug what="cswNew is %s"/>' % cswNew + print('<debug what="cswNew is %s"/>' % cswNew) if cswNew != 0: self.nFibNew = self.getuInt16(pos=self.__getCswNewOffset() + 2) @@ -176,53 +176,53 @@ class WordDocumentStream(BinaryStream): self.printAndSet("cswNew", self.readuInt16(), offset=True) if self.cswNew != 0: self.dumpFibRgCswNew("fibRgCswNew") - print '</fib>' + print('</fib>') def __getCswNewOffset(self): - print '<debug what="cswnew offset is %s as self.cbRgFcLcb is %s, blob offset is %s"/>' % (self.blobOffset + (8 * self.cbRgFcLcb), self.cbRgFcLcb, self.blobOffset) + print('<debug what="cswnew offset is %s as self.cbRgFcLcb is %s, blob offset is %s"/>' % (self.blobOffset + (8 * self.cbRgFcLcb), self.cbRgFcLcb, self.blobOffset)) return self.blobOffset + (8 * self.cbRgFcLcb) def dumpFibRgCswNew(self, name): - print '<%s type="FibRgCswNew" size="%d bytes">' % (name, self.cswNew) + print('<%s type="FibRgCswNew" size="%d bytes">' % (name, self.cswNew)) self.printAndSet("nFibNew", self.readuInt16()) if self.nFibNew == 0x0112: self.dumpFibRgCswNewData2007("fibRgCswNewData2007") elif self.nFibNew == 0x00D9: self.dumpFibRgCswNewData2000("fibRgCswNewData2000") else: - print """<todo what="dumpFibRgCswNew() doesn't know how to handle nFibNew = %s"/>""" % hex(self.nFibNew) - print '</%s>' % name + print("""<todo what="dumpFibRgCswNew() doesn't know how to handle nFibNew = %s"/>""" % hex(self.nFibNew)) + print('</%s>' % name) def __dumpFibRgCswNewData2000(self): self.printAndSet("cQuickSavesNew", self.readuInt16()) def dumpFibRgCswNewData2000(self, name): - print '<%s type="FibRgCswNewData2000" size="%d bytes">' % (name, 8) + print('<%s type="FibRgCswNewData2000" size="%d bytes">' % (name, 8)) self.__dumpFibRgCswNewData2000() - print '</%s>' % name + print('</%s>' % name) def dumpFibRgCswNewData2007(self, name): - print '<%s type="FibRgCswNewData2007" size="%d bytes">' % (name, 8) + print('<%s type="FibRgCswNewData2007" size="%d bytes">' % (name, 8)) self.__dumpFibRgCswNewData2000() self.printAndSet("lidThemeOther", self.readuInt16()) self.printAndSet("lidThemeFE", self.readuInt16()) self.printAndSet("lidThemeCS", self.readuInt16()) - print '</%s>' % name + print('</%s>' % name) def getTableStream(self): if self.fWhichTblStm: - return self.doc.getDirectoryStreamByName("1Table") + return self.doc.getDirectoryStreamByName(b"1Table") else: - return self.doc.getDirectoryStreamByName("0Table") + return self.doc.getDirectoryStreamByName(b"0Table") def dumpFibBase(self, name): ret = True - print '<%s type="FibBase" size="32 bytes">' % name + print('<%s type="FibBase" size="32 bytes">' % name) self.printAndSet("wIdent", self.readuInt16()) self.printAndSet("nFib", self.readuInt16()) if self.nFib >= 0x65 and self.nFib <= 0x69: - print '<todo what="handle nFib 0x65..0x69: ww6 syntax"/>' + print('<todo what="handle nFib 0x65..0x69: ww6 syntax"/>') ret = False self.printAndSet("unused", self.readuInt16()) self.printAndSet("lid", self.readuInt16()) @@ -250,19 +250,19 @@ class WordDocumentStream(BinaryStream): if self.fEncrypted == 1 and self.fObfuscated == 0: self.printAndSet("lKey", self.readuInt32(), end=False) - print '<EncryptionVersionInfo>' + print('<EncryptionVersionInfo>') tableStream = self.getTableStream() self.printAndSet("vMajor", tableStream.readuInt16()) self.printAndSet("vMinor", tableStream.readuInt16()) - print '</EncryptionVersionInfo>' + print('</EncryptionVersionInfo>') if self.vMajor == 0x0001 and self.vMinor == 0x0001: docrecord.RC4EncryptionHeader(self, tableStream.pos, self.lKey).dump() - print '<todo what="handle RC4 encryption"/>' + print('<todo what="handle RC4 encryption"/>') elif self.vMajor in (0x0002, 0x0003, 0x0004) and self.vMinor == 0x0002: - print '<todo what="handle RC4CryptoApiEncryptionHeader"/>' + print('<todo what="handle RC4CryptoApiEncryptionHeader"/>') else: - print '<todo what="unexpected vMajor %d and vMinor %d"/>' % (self.vMajor, self.vMinor) - print '</lKey>' + print('<todo what="unexpected vMajor %d and vMinor %d"/>' % (self.vMajor, self.vMinor)) + print('</lKey>') ret = False else: self.printAndSet("lKey", self.readuInt32()) @@ -283,20 +283,20 @@ class WordDocumentStream(BinaryStream): self.printAndSet("reserved5", self.readuInt32()) self.printAndSet("reserved6", self.readuInt32()) - print '</%s>' % name + print('</%s>' % name) return ret def dumpFibRgW97(self, name): - print '<%s type="FibRgW97" size="28 bytes">' % name + print('<%s type="FibRgW97" size="28 bytes">' % name) for i in range(13): self.printAndSet("reserved%d" % (i + 1), self.readuInt16()) self.printAndSet("lidFE", self.readuInt16()) - print '</%s>' % name + print('</%s>' % name) def dumpFibRgLw97(self, name): - print '<%s type="FibRgLw97" size="88 bytes">' % name + print('<%s type="FibRgLw97" size="88 bytes">' % name) fields = [ "cbMac", @@ -324,9 +324,9 @@ class WordDocumentStream(BinaryStream): ] for i in fields: self.printAndSet(i, self.readuInt32()) - print '<debug what="offset is now %s"/>' % self.pos + print('<debug what="offset is now %s"/>' % self.pos) - print '</%s>' % name + print('</%s>' % name) def dumpFibRgFcLcb(self, name): if self.nFib == 0x00c1: @@ -340,7 +340,7 @@ class WordDocumentStream(BinaryStream): elif self.nFib == 0x0112: self.dumpFibRgFcLcb2007(name) else: - print """<todo what="dumpFibRgFcLcb() doesn't know how to handle nFib = %s">""" % hex(self.nFib) + print("""<todo what="dumpFibRgFcLcb() doesn't know how to handle nFib = %s">""" % hex(self.nFib)) def __dumpFibRgFcLcb97(self): # should be 186 @@ -557,8 +557,8 @@ class WordDocumentStream(BinaryStream): if hasHandler: i[1]() else: - print '<todo what="value is non-zero and unhandled"/>' - print '</%s>' % i[0] + print('<todo what="value is non-zero and unhandled"/>') + print('</%s>' % i[0]) def handleDop(self): docrecord.Dop(self).dump() @@ -722,29 +722,29 @@ class WordDocumentStream(BinaryStream): docrecord.PlcftxbxBkd(self).dump() def dumpFibRgFcLcb97(self, name): - print '<%s type="FibRgFcLcb97" size="744 bytes">' % name + print('<%s type="FibRgFcLcb97" size="744 bytes">' % name) self.__dumpFibRgFcLcb97() - print '</%s>' % name + print('</%s>' % name) def dumpFibRgFcLcb2000(self, name): - print '<%s type="FibRgFcLcb2000" size="864 bytes">' % name + print('<%s type="FibRgFcLcb2000" size="864 bytes">' % name) self.__dumpFibRgFcLcb2000() - print '</%s>' % name + print('</%s>' % name) def dumpFibRgFcLcb2002(self, name): - print '<%s type="FibRgFcLcb2002" size="1088 bytes">' % name + print('<%s type="FibRgFcLcb2002" size="1088 bytes">' % name) self.__dumpFibRgFcLcb2002() - print '</%s>' % name + print('</%s>' % name) def dumpFibRgFcLcb2003(self, name): - print '<%s type="FibRgFcLcb2003" size="1312 bytes">' % name + print('<%s type="FibRgFcLcb2003" size="1312 bytes">' % name) self.__dumpFibRgFcLcb2003() - print '</%s>' % name + print('</%s>' % name) def dumpFibRgFcLcb2007(self, name): - print '<%s type="FibRgFcLcb2007" size="1464 bytes">' % name + print('<%s type="FibRgFcLcb2007" size="1464 bytes">' % name) self.__dumpFibRgFcLcb2007() - print '</%s>' % name + print('</%s>' % name) def __dumpFibRgFcLcb2000(self): self.__dumpFibRgFcLcb97() @@ -856,8 +856,8 @@ class WordDocumentStream(BinaryStream): if hasHandler: i[1]() else: - print '<todo what="value is non-zero and unhandled"/>' - print '</%s>' % i[0] + print('<todo what="value is non-zero and unhandled"/>') + print('</%s>' % i[0]) def __dumpFibRgFcLcb2003(self): self.__dumpFibRgFcLcb2002() @@ -989,7 +989,7 @@ class WordDocumentStream(BinaryStream): divider = 1 else: divider = 2 - return (start + ((offset - startOffset) / divider)) + return (start + ((offset - startOffset) // divider)) def __cpToOffset(self, cp): """Implements 2.4.1 Retrieving Text.""" @@ -998,7 +998,7 @@ class WordDocumentStream(BinaryStream): aPcd = plcPcd.aPcd[index] fcCompressed = aPcd.fc if fcCompressed.fCompressed == 1: - pos = (fcCompressed.fc / 2) + (cp - plcPcd.aCp[index]) + pos = (fcCompressed.fc // 2) + (cp - plcPcd.aCp[index]) return pos, True else: pos = fcCompressed.fc + 2 * (cp - plcPcd.aCp[index]) @@ -1007,19 +1007,19 @@ class WordDocumentStream(BinaryStream): def retrieveCP(self, cp): pos, compressed = self.__cpToOffset(cp) if compressed: - return globals.encodeName(self.bytes[pos]) + return globals.encodeName(globals.indexedbytetobyte(self.bytes[int(pos)])) else: try: return globals.encodeName(self.bytes[pos:pos + 2].decode('utf-16'), lowOnly=True) except UnicodeDecodeError: - reason = 'could not decode bytes in position %d-%d (%s-%s)' % (pos, pos + 1, hex(ord(self.bytes[pos])), hex(ord(self.bytes[pos + 1]))) - print '<todo what="WordDocumentStream::retrieveCP(): %s"/>' % reason + reason = 'could not decode bytes in position %d-%d (%s-%s)' % (pos, pos + 1, hex(globals.indexbytes(self.bytes, pos)), hex(globals.indexbytes(self.bytes, pos + 1))) + print('<todo what="WordDocumentStream::retrieveCP(): %s"/>' % reason) return globals.encodeName(self.bytes[pos:pos + 2].decode('utf-16', errors="replace"), lowOnly=True) def retrieveCPs(self, start, end): """Retrieves a range of characters.""" if not len(self.clx.pcdt.plcPcd.aPcd): - print '<info what="clx.pcdt.plcPcd.aPcd is empty, probably corrupted document"/>' + print('<info what="clx.pcdt.plcPcd.aPcd is empty, probably corrupted document"/>') return "" ret = [] i = start diff --git a/msodumper/emfrecord.py b/msodumper/emfrecord.py index 2e095f2..98c1611 100644 --- a/msodumper/emfrecord.py +++ b/msodumper/emfrecord.py @@ -5,8 +5,8 @@ # file, You can obtain one at http://mozilla.org/MPL/2.0/. # -from binarystream import BinaryStream -import wmfrecord +from .binarystream import BinaryStream +from . import wmfrecord import base64 @@ -60,7 +60,7 @@ class EMFStream(BinaryStream): BinaryStream.__init__(self, bytes) def dump(self): - print '<stream type="EMF" size="%d">' % self.size + print('<stream type="EMF" size="%d">' % self.size) emrHeader = EmrHeader(self) emrHeader.dump() for i in range(emrHeader.header.Records): @@ -70,15 +70,15 @@ class EMFStream(BinaryStream): size = self.getuInt32(pos=self.pos + 4) # EmrHeader is already dumped if i: - print '<record index="%s" type="%s">' % (i, type) + print('<record index="%s" type="%s">' % (i, type)) if len(record) > 1: handler = record[1](self) handler.dump() else: - print '<todo/>' - print '</record>' + print('<todo/>') + print('</record>') self.pos += size - print '</stream>' + print('</stream>') class EMFRecord(BinaryStream): @@ -146,11 +146,11 @@ class LogBrushEx(EMFRecord): self.name = name def dump(self): - print '<%s>' % self.name + print('<%s>' % self.name) self.printAndSet("BrushStyle", self.readuInt32(), dict=wmfrecord.BrushStyle) wmfrecord.ColorRef(self, "Color").dump() self.printAndSet("BrushHatch", self.readuInt32(), dict=HatchStyle) - print '</%s>' % self.name + print('</%s>' % self.name) self.parent.pos = self.pos @@ -184,14 +184,14 @@ class XForm(EMFRecord): self.name = name def dump(self): - print '<%s>' % self.name + print('<%s>' % self.name) self.printAndSet("M11", self.readFloat32()) self.printAndSet("M12", self.readFloat32()) self.printAndSet("M21", self.readFloat32()) self.printAndSet("M22", self.readFloat32()) self.printAndSet("Dx", self.readFloat32()) self.printAndSet("Dy", self.readFloat32()) - print '</%s>' % self.name + print('</%s>' % self.name) self.parent.pos = self.pos @@ -228,13 +228,13 @@ class EmrComment(EMFRecord): self.printAndSet("DataSize", self.readuInt32(), hexdump=False) commentIdentifier = self.getuInt32() if commentIdentifier == 0x00000000: # EMR_COMMENT_EMFSPOOL - print '<todo what="EmrComment::dump(): handle EMR_COMMENT_EMFSPOOL"/>' + print('<todo what="EmrComment::dump(): handle EMR_COMMENT_EMFSPOOL"/>') elif commentIdentifier == 0x2B464D45: # EMR_COMMENT_EMFPLUS - print '<todo what="EmrComment::dump(): handle EMR_COMMENT_EMFPLUS"/>' + print('<todo what="EmrComment::dump(): handle EMR_COMMENT_EMFPLUS"/>') elif commentIdentifier == 0x43494447: # EMR_COMMENT_PUBLIC - print '<todo what="EmrComment::dump(): handle EMR_COMMENT_PUBLIC"/>' + print('<todo what="EmrComment::dump(): handle EMR_COMMENT_PUBLIC"/>') else: - print '<todo what="EmrComment::dump(): handle EMR_COMMENT: %s"/>' % hex(commentIdentifier) + print('<todo what="EmrComment::dump(): handle EMR_COMMENT: %s"/>' % hex(commentIdentifier)) class EmrSetviewportorgex(EMFRecord): @@ -378,10 +378,10 @@ class EmrPolygon16(EMFRecord): self.printAndSet("Size", self.readuInt32(), hexdump=False) wmfrecord.RectL(self, "Bounds").dump() self.printAndSet("Count", self.readuInt32(), hexdump=False) - print '<aPoints>' + print('<aPoints>') for i in range(self.Count): wmfrecord.PointS(self, "aPoint%d" % i).dump() - print '</aPoints>' + print('</aPoints>') assert self.pos - posOrig == self.Size @@ -397,14 +397,14 @@ class EmrPolypolygon16(EMFRecord): wmfrecord.RectL(self, "Bounds").dump() self.printAndSet("NumberOfPolygons", self.readuInt32(), hexdump=False) self.printAndSet("Count", self.readuInt32(), hexdump=False) - print '<PolygonPointCounts>' + print('<PolygonPointCounts>') for i in range(self.NumberOfPolygons): self.printAndSet("PolygonPointCount%d" % i, self.readuInt32(), hexdump=False) - print '</PolygonPointCounts>' - print '<aPoints>' + print('</PolygonPointCounts>') + print('<aPoints>') for i in range(self.Count): wmfrecord.PointS(self, "aPoint").dump() - print '</aPoints>' + print('</aPoints>') assert self.pos - posOrig == self.Size @@ -419,10 +419,10 @@ class EmrPolylineto16(EMFRecord): self.printAndSet("Size", self.readuInt32(), hexdump=False) wmfrecord.RectL(self, "Bounds").dump() self.printAndSet("Count", self.readuInt32(), hexdump=False) - print '<aPoints>' + print('<aPoints>') for i in range(self.Count): wmfrecord.PointS(self, "aPoint%d" % i).dump() - print '</aPoints>' + print('</aPoints>') assert self.pos - posOrig == self.Size @@ -437,10 +437,10 @@ class EmrPolybezierto16(EMFRecord): self.printAndSet("Size", self.readuInt32(), hexdump=False) wmfrecord.RectL(self, "Bounds").dump() self.printAndSet("Count", self.readuInt32(), hexdump=False) - print '<aPoints>' + print('<aPoints>') for i in range(self.Count): wmfrecord.PointS(self, "aPoint%d" % i).dump() - print '</aPoints>' + print('</aPoints>') assert self.pos - posOrig == self.Size @@ -585,7 +585,7 @@ class LogPenEx(EMFRecord): self.name = name def dump(self): - print '<%s type="LogPenEx">' % self.name + print('<%s type="LogPenEx">' % self.name) self.printAndSet("PenStyle", self.readuInt32(), dict=PenStyle) self.printAndSet("Width", self.readuInt32()) self.printAndSet("BrushStyle", self.readuInt32(), dict=wmfrecord.BrushStyle) @@ -596,8 +596,8 @@ class LogPenEx(EMFRecord): self.printAndSet("BrushHatch", self.readuInt32()) self.printAndSet("NumStyleEntries", self.readuInt32()) if self.NumStyleEntries > 0: - print '<todo what="LogPenEx::dump(): self.NumStyleEntries != 0"/>' - print '</%s>' % self.name + print('<todo what="LogPenEx::dump(): self.NumStyleEntries != 0"/>') + print('</%s>' % self.name) self.parent.pos = self.pos @@ -616,9 +616,9 @@ class EmrExtcreatepen(EMFRecord): self.printAndSet("cbBits", self.readuInt32(), hexdump=False) LogPenEx(self, "elp").dump() if self.cbBmi: - print '<todo what="LogPenEx::dump(): self.cbBmi != 0"/>' + print('<todo what="LogPenEx::dump(): self.cbBmi != 0"/>') if self.cbBits: - print '<todo what="LogPenEx::dump(): self.cbBits != 0"/>' + print('<todo what="LogPenEx::dump(): self.cbBits != 0"/>') class EmrStretchdibits(EMFRecord): @@ -646,16 +646,16 @@ class EmrStretchdibits(EMFRecord): self.printAndSet("BitBltRasterOperation", self.readuInt32(), dict=wmfrecord.RasterPolishMap) self.printAndSet("cxDest", self.readInt32(), hexdump=False) self.printAndSet("cyDest", self.readInt32(), hexdump=False) - print '<BitmapBuffer>' + print('<BitmapBuffer>') if self.cbBmiSrc: self.pos = posOrig + self.offBmiSrc self.BmiSrc = self.readBytes(self.cbBmiSrc) - print '<BmiSrc value="%s"/>' % base64.b64encode(self.BmiSrc) + print('<BmiSrc value="%s"/>' % base64.b64encode(self.BmiSrc)) if self.cbBitsSrc: self.pos = posOrig + self.offBitsSrc self.BitsSrc = self.readBytes(self.cbBitsSrc) - print '<BitsSrc value="%s"/>' % base64.b64encode(self.BitsSrc) - print '</BitmapBuffer>' + print('<BitsSrc value="%s"/>' % base64.b64encode(self.BitsSrc)) + print('</BitmapBuffer>') assert self.pos - posOrig == self.Size @@ -671,7 +671,7 @@ class EmrEof(EMFRecord): self.printAndSet("nPalEntries", self.readuInt32(), hexdump=False) self.printAndSet("offPalEntries", self.readuInt32(), hexdump=False) if self.nPalEntries > 0: - print '<todo what="EmrEof::dump(): handle nPalEntries > 0"/>' + print('<todo what="EmrEof::dump(): handle nPalEntries > 0"/>') self.printAndSet("SizeLast", self.readuInt32(), hexdump=False) assert self.pos - posOrig == self.Size @@ -684,12 +684,12 @@ class RegionData(EMFRecord): self.size = size def dump(self): - print '<%s>' % self.name + print('<%s>' % self.name) header = RegionDataHeader(self) header.dump() for i in range(header.CountRects): wmfrecord.RectL(self, "Data%d" % i).dump() - print '</%s>' % self.name + print('</%s>' % self.name) self.parent.pos = self.pos @@ -713,7 +713,7 @@ class EmrHeader(EMFRecord): EMFRecord.__init__(self, parent) def dump(self): - print '<emrHeader>' + print('<emrHeader>') self.printAndSet("Type", self.readuInt32()) self.printAndSet("Size", self.readuInt32(), hexdump=False) self.header = Header(self) @@ -722,7 +722,7 @@ class EmrHeader(EMFRecord): HeaderExtension1(self).dump() if self.Size >= 108: HeaderExtension2(self).dump() - print '</emrHeader>' + print('</emrHeader>') class Header(EMFRecord): @@ -781,6 +781,7 @@ class HeaderExtension2(EMFRecord): assert posOrig == self.pos - 8 self.parent.pos = self.pos + """The RecordType enumeration defines values that uniquely identify EMF records.""" RecordType = { 0x00000001: ['EMR_HEADER'], diff --git a/msodumper/formula.py b/msodumper/formula.py index c731dd4..98f7674 100644 --- a/msodumper/formula.py +++ b/msodumper/formula.py @@ -6,7 +6,7 @@ # import struct, sys -import globals +from . import globals class InvalidCellAddress(Exception): pass class FormulaParserError(Exception): pass @@ -27,7 +27,7 @@ def toColName (colID): name = struct.pack('b', n1 + ord('A')) if n2 > 0: name += struct.pack('b', n2 + ord('A')) - return name + return name.decode('cp1252') def toAbsName (name, isRelative): if not isRelative: @@ -725,7 +725,7 @@ class PtgFuncVar(PtgBase): # I'll support this later. raise FormulaParserError("special built-in function not supported yet") - if not PtgFuncVar.funcTab.has_key(self.funcType): + if not self.funcType in PtgFuncVar.funcTab: # unknown function name return '#NAME!' @@ -804,7 +804,7 @@ associated token classes will be without the leading underscore (_).""" def parse (self, parseType=ParsedFormulaType.Cell): while not self.strm.isEndOfRecord(): b = self.strm.readUnsignedInt(1) - if not _tokenMap.has_key(b): + if not b in _tokenMap: # Unknown token. Stop parsing. raise FormulaParserError("unknown token 0x%2.2X"%b) diff --git a/msodumper/globals.py b/msodumper/globals.py index 27623f5..993cc56 100644 --- a/msodumper/globals.py +++ b/msodumper/globals.py @@ -4,8 +4,37 @@ # License, v. 2.0. If a copy of the MPL was not distributed with this # file, You can obtain one at http://mozilla.org/MPL/2.0/. # - -import sys, struct, math, zipfile, xmlpp, StringIO +from builtins import range +import sys, struct, math, zipfile, io +from . import xmlpp + +PY3 = sys.version > '3' + +# Python3 bytes[i] is an integer, Python2 str[i] is an str of length 1 +# These functions explicitely return a given type for both 2 and 3 +# - indexbytes(): return bytes element at given index, as integer (uses +# ord() for python2) +# - indexedbytetobyte: convert a value obtained by indexing into a bytes list +# (or from 'for x in somebytes') to a bytes list of len 1 +# - indexedbytetoint() return the same as an int (uses ord() for python2) +# +if PY3: + + def indexbytes(data, i): + return data[i] + def indexedbytetobyte(i): + return i.to_bytes(1, byteorder='big') + def indexedbytetoint(i): + return i + nullchar = 0 +else: + def indexbytes(data, i): + return ord(data[i]) + def indexedbytetobyte(i): + return i + def indexedbytetoint(i): + return ord(i) + nullchar = chr(0) OutputWidth = 76 @@ -124,64 +153,121 @@ def getValueOrUnknown (list, idx, errmsg='(unknown)'): return list[idx] elif listType == type({}): # dictionary - if list.has_key(idx): + if idx in list: return list[idx] return errmsg textdump = b"" +def dumptext(): + data = textdump.replace(b"\r", b"\n") + if sys.platform == "win32": + import msvcrt + msvcrt.setmode(sys.stdout.fileno(), os.O_BINARY) + if PY3: + sys.stdout.buffer.write(data) + else: + sys.stdout.write(data) + +# Write msg to stdout, as bytes (encode it if needed) def output (msg, recordType = -1): + if type(msg) == type(u''): + msg = msg.encode('utf-8') if params.noStructOutput: return if recordType == -1 or not params.dumpedIds or \ recordType in params.dumpedIds: - sys.stdout.write(msg) + if PY3: + sys.stdout.buffer.write(msg) + else: + sys.stdout.write(msg) def outputln(msg, recordType = -1): - output(msg + "\n", recordType) + if type(msg) == type(u''): + output(msg + "\n", recordType) + else: + output(msg + b"\n", recordType) + +# Replace sys.stdout as arg to prettyPrint and call our output method +class utfwriter: + def write(self, s): + output(s) def error (msg): - sys.stderr.write("Error: " + msg) + sys.stderr.write("Error: %s\n"%msg) def debug (msg): sys.stderr.write("DEBUG: %s\n"%msg) +def nulltrunc(bytes): + '''Return input truncated to first 0 byte, allowing, e.g., comparison + with a simple literal bytes string''' + try: + firstnull = bytes.index(nullchar) + bytes = bytes[:firstnull] + except: + pass + return bytes + +# This is syntaxically identical for python2 and python3 if the input is str def encodeName (name, lowOnly = False, lowLimit = 0x20): """Encode name that contains unprintable characters.""" n = len(name) if n == 0: return name + + if PY3 and (type(name) == type(b'')): + return _encodeNameBytes(name, n, lowOnly, lowLimit) newname = '' - for i in xrange(0, n): - if name[i] == '&': + for i in range(0, n): + if name[i] == '&'[0]: newname += "&" - elif name[i] == '<': + elif name[i] == '<'[0]: newname += "<" - elif name[i] == '>': + elif name[i] == '>'[0]: newname += ">" elif ord(name[i]) < lowLimit or ((not lowOnly) and ord(name[i]) >= 127): newname += "\\x%2.2X"%ord(name[i]) else: newname += name[i] - + return newname +# Python3 only. Same as above but accept bytes as input. +def _encodeNameBytes (name, n, lowOnly = False, lowLimit = 0x20): + """Encode name that contains unprintable characters.""" + + newname = '' + for i in range(0, n): + if name[i] == b'&'[0]: + newname += "&" + elif name[i] == '<'[0]: + newname += b"<" + elif name[i] == '>'[0]: + newname += b">" + elif name[i] < lowLimit or ((not lowOnly) and name[i] >= 127): + newname += "\\x%2.2X"%name[i] + else: + newname += chr(name[i]) + + return newname + # Uncompress "compressed" UTF-16. This compression strips high bytes # from a string when they are all 0. Just restore them. def uncompCompUnicode(bytes): - out = "" + out = b"" for b in bytes: - out += b - out += '\0' + out += indexedbytetobyte(b) + out += b'\0' return out class UnicodeRichExtText(object): def __init__ (self): - self.baseText = unicode() + self.baseText = u'' self.phoneticBytes = [] # Search sorted list for first element strictly bigger than input @@ -252,7 +338,7 @@ def getUnicodeRichExtText (bytes, offset = 0, rofflist = []): if bytesPerChar == 1: newdata = uncompCompUnicode(newdata) - ret.baseText += unicode(newdata, 'UTF-16LE', errors='replace') + ret.baseText += newdata.decode('UTF-16LE', errors='replace') textLen -= bytesToRead // bytesPerChar @@ -266,7 +352,7 @@ def getUnicodeRichExtText (bytes, offset = 0, rofflist = []): bytesPerChar = 1 if isRichStr: - for i in xrange(0, numElem): + for i in range(0, numElem): posChar = strm.readUnsignedInt(2) fontIdx = strm.readUnsignedInt(2) @@ -282,6 +368,7 @@ def getUnicodeRichExtText (bytes, offset = 0, rofflist = []): def getRichText (bytes, textLen=None): """parse a string of the rich-text format that Excel uses. +Return python2/3 unicode()/str() Note the following: @@ -316,22 +403,29 @@ Note the following: totalByteLen = strm.getCurrentPos() + textLen + extraBytes if is16Bit: totalByteLen += textLen # double the text length since each char is 2 bytes. - text = unicode(strm.readBytes(2*textLen), 'UTF-16LE', errors='replace') + text = strm.readBytes(2*textLen).decode('UTF-16LE', errors='replace') else: if params.utf8: # Compressed Unicode-> latin1 text = strm.readBytes(textLen).decode('cp1252') else: # Old behaviour with hex dump - text = strm.readBytes(textLen) + text = strm.readBytes(textLen).decode('cp1252') return (text, totalByteLen) -def toCharOrDot (char): - if 32 < ord(char) and ord(char) < 127: - return char - else: - return '.' +if PY3: + def toCharOrDot(char): + if 32 < char and char < 127: + return chr(char) + else: + return '.' +else: + def toCharOrDot(char): + if 32 < ord(char) and ord(char) < 127: + return char + else: + return '.' def dumpBytes (chars, subDivide=None): if params.noStructOutput or params.noRawDump: @@ -349,13 +443,13 @@ def dumpBytes (chars, subDivide=None): labelWidth = int(math.ceil(math.log(charLen, 10))) lineBuf = '' # bytes interpreted as chars at the end of each line i = 0 - for i in xrange(0, charLen): + for i in range(0, charLen): if (i+1)%16 == 1: # print line header with seek position fmt = "%%%d.%dd: "%(labelWidth, labelWidth) output(fmt%i) - byte = ord(chars[i]) + byte = indexbytes(chars, i) lineBuf += toCharOrDot(chars[i]) output("%2.2X "%byte) @@ -483,20 +577,20 @@ def getDouble (bytes): def getUTF8FromUTF16 (bytes): # little endian utf-16 strings - byteCount = len(bytes) - loopCount = int(byteCount/2) + loopCount = len(bytes) // 2 # Truncate input to first null doublet - for i in xrange(0, loopCount): - if bytes[i*2] == '\x00': - if bytes[i*2+1] == '\x00': + for i in range(0, loopCount): + if indexbytes(bytes, i*2) == 0: + if indexbytes(bytes, i*2+1) == 0: bytes = bytes[0:i*2] break # Convert from utf-16 and return utf-8, using markers for # conversion errors - text = unicode(bytes, 'UTF-16LE', errors='replace') - return text.encode('UTF-8') + if type(bytes) != type(u''): + bytes = bytes.decode('UTF-16LE', errors='replace') + return bytes.encode('UTF-8') class StreamWrap(object): def __init__ (self,printer): @@ -510,7 +604,7 @@ class StreamWrap(object): def outputZipContent (bytes, printer, width=80): printer("Zipped content:") - rawFile = StringIO.StringIO(bytes) + rawFile = io.BytesIO(bytes) zipFile = zipfile.ZipFile(rawFile) i = 0 # TODO: when 2.6/3.0 is in widespread use, change to infolist @@ -524,7 +618,7 @@ def outputZipContent (bytes, printer, width=80): printer('-'*width) contents = zipFile.read(filename) - if filename.endswith(".xml") or contents.startswith("<?xml"): + if filename.endswith(".xml") or contents.startswith(b"<?xml"): wrapper = StreamWrap(printer) xmlpp.pprint(contents,wrapper,1,80) wrapper.flush() diff --git a/msodumper/msocrypto.py b/msodumper/msocrypto.py index 5516fb2..0797269 100644 --- a/msodumper/msocrypto.py +++ b/msodumper/msocrypto.py @@ -5,7 +5,7 @@ # file, You can obtain one at http://mozilla.org/MPL/2.0/. # -import globals +from . import globals class EncryptionInfo(object): diff --git a/msodumper/msodraw.py b/msodumper/msodraw.py index 1e9c58f..15f6a9c 100644 --- a/msodumper/msodraw.py +++ b/msodumper/msodraw.py @@ -4,13 +4,13 @@ # License, v. 2.0. If a copy of the MPL was not distributed with this # file, You can obtain one at http://mozilla.org/MPL/2.0/. # - -import globals, xlsmodel +from builtins import range +from . import globals, xlsmodel import sys import textwrap import zlib import base64 -from pptrecord import shapeTypes +from .pptrecord import shapeTypes def indent (level): return ' '*level @@ -20,12 +20,12 @@ def headerLine (): def mm100_to_twip(value): if value >= 0: - return (((value)*72+63)/127) + return (((value)*72+63)//127) else: - return (((value)*72-63)/127) + return (((value)*72-63)//127) def emu_to_mm100(value): - return value / 360 + return value // 360 def emu_to_twip(value): return mm100_to_twip(emu_to_mm100(value)) @@ -33,7 +33,7 @@ def emu_to_twip(value): def hexdump(value): ret = [] for i in value: - ret.append("%02x" % ord(i)) + ret.append("%02x" % globals.indexedbytetoint(i)) return "".join(ret) def inflate(bytes): @@ -90,7 +90,7 @@ class RecordHeader: @staticmethod def getRecTypeName (recType): - if RecordHeader.containerTypeNames.has_key(recType): + if recType in RecordHeader.containerTypeNames: return RecordHeader.containerTypeNames[recType] return 'unknown' @@ -105,7 +105,7 @@ class RecordHeader: def __init__ (self, strm): mixed = strm.readUnsignedInt(2) self.recVer = (mixed & 0x000F) - self.recInstance = (mixed & 0xFFF0) / 16 + self.recInstance = (mixed & 0xFFF0) // 16 self.recType = strm.readUnsignedInt(2) self.recLen = strm.readUnsignedInt(4) @@ -131,9 +131,9 @@ class RecordHeader: class ColorRef: def __init__ (self, byte): self.red = (byte & 0x000000FF) - self.green = (byte & 0x0000FF00) / 256 - self.blue = (byte & 0x00FF0000) / 65536 - self.flag = (byte & 0xFF000000) / 16777216 + self.green = (byte & 0x0000FF00) // 256 + self.blue = (byte & 0x00FF0000) // 65536 + self.flag = (byte & 0xFF000000) // 16777216 self.paletteIndex = (self.flag & 0x01) != 0 self.paletteRGB = (self.flag & 0x02) != 0 @@ -253,7 +253,7 @@ class FDGGBlock: # NOTE: The spec says head.cidcl stores the number of IDCL's, but each # FDGGBlock only contains bytes enough to store (head.cidcl - 1) of # IDCL's. - for i in xrange(0, self.head.cidcl-1): + for i in range(0, self.head.cidcl-1): idcl = IDCL(strm) self.idcls.append(idcl) @@ -285,8 +285,8 @@ class FDGSL: self.dgslk = strm.readUnsignedInt(4) # selection mode self.shapeFocus = strm.readUnsignedInt(4) # shape ID in focus self.shapesSelected = [] - shapeCount = (strm.getSize() - 20)/4 - for i in xrange(0, shapeCount): + shapeCount = (strm.getSize() - 20)//4 + for i in range(0, shapeCount): spid = strm.readUnsignedInt(4) self.shapesSelected.append(spid) @@ -544,7 +544,7 @@ class FOPT: # A null-terminated Unicode string. try: self.string = prop.extra[0:-2].decode('utf-16') - except UnicodeDecodeError, reason: + except UnicodeDecodeError as reason: self.todo = reason self.string = prop.extra[0:-2].decode('utf-16', errors="replace") @@ -555,7 +555,7 @@ class FOPT: def dumpXml(self, recHdl, prop): self.__parseBytes(prop) if self.todo: - print '<todo what="UnicodeComplex::dumpXml(): %s"/>' % self.todo + print('<todo what="UnicodeComplex::dumpXml(): %s"/>' % self.todo) recHdl.appendLine('<%s value="%s"/>' % (self.name, globals.encodeName(self.string))) class GtextUNICODE(UnicodeComplex): @@ -651,18 +651,18 @@ class FOPT: flag = prop.value flagCount = len(FOPT.GroupShape.flagNames) recHdl.appendLine(indent(level)+"flag: 0x%8.8X"%flag) - for i in xrange(0, flagCount): + for i in range(0, flagCount): bval = (flag & 0x00000001) recHdl.appendLine(indent(level)+"%s: %s"%(FOPT.GroupShape.flagNames[i], recHdl.getTrueFalse(bval))) - flag /= 2 + flag //= 2 def dumpXml(self, recHdl, prop): flag = prop.value flagCount = len(FOPT.GroupShape.flagNames) - for i in xrange(0, flagCount): + for i in range(0, flagCount): bval = (flag & 0x00000001) recHdl.appendLine('<%s value="%s"/>' % (FOPT.GroupShape.flagNames[i], bval)) - flag /= 2 + flag //= 2 class ShapeBooleanProperties: @@ -829,7 +829,7 @@ class FOPT: def __parseBytes(self, rh): complexPos = self.strm.pos + (rh.recInstance * 6) strm = globals.ByteStream(self.strm.readBytes(rh.recLen)) - for i in xrange(0, rh.recInstance): + for i in range(0, rh.recInstance): entry = FOPT.E() val = strm.readUnsignedInt(2) entry.ID = (val & 0x3FFF) @@ -848,10 +848,10 @@ class FOPT: recHdl.appendLine("FOPT content (property table):") recHdl.appendLine(" property count: %d"%rh.recInstance) - for i in xrange(0, rh.recInstance): + for i in range(0, rh.recInstance): recHdl.appendLine(" "+"-"*57) prop = self.properties[i] - if FOPT.propTable.has_key(prop.ID) and len(FOPT.propTable[prop.ID]) > 1: + if prop.ID in FOPT.propTable and len(FOPT.propTable[prop.ID]) > 1: # We have a handler for this property. # propData is expected to have two elements: name (0) and handler (1). propHdl = FOPT.propTable[prop.ID] @@ -865,7 +865,7 @@ class FOPT: recHdl.appendLine(" blip ID: %d"%prop.value) else: # regular property value - if FOPT.propTable.has_key(prop.ID): + if prop.ID in FOPT.propTable: recHdl.appendLine(" property name: %s"%FOPT.propTable[prop.ID][0]) recHdl.appendLine(" property value: 0x%8.8X"%prop.value) @@ -874,7 +874,7 @@ class FOPT: recHdl.appendLine('<%s type="%s">' % (self.name, self.type)) recHdl.appendLine('<fopt type="OfficeArtRGFOPTE">') - for i in xrange(0, rh.recInstance): + for i in range(0, rh.recInstance): recHdl.appendLine('<rgfopte index="%d">' % i) if i < len(self.properties): prop = self.properties[i] @@ -883,7 +883,7 @@ class FOPT: recHdl.appendLine('<opid fBid="%d"/>' % prop.flagBid) recHdl.appendLine('<opid fComplex="%d"/>' % prop.flagComplex) recHdl.appendLine('</opid>') - if FOPT.propTable.has_key(prop.ID) and len(FOPT.propTable[prop.ID]) > 1: + if prop.ID in FOPT.propTable and len(FOPT.propTable[prop.ID]) > 1: # We have a handler for this property. # propData is expected to have two elements: name (0) and handler (1). propHdl = FOPT.propTable[prop.ID] @@ -891,7 +891,7 @@ class FOPT: propHdl[1]().dumpXml(recHdl, prop) recHdl.appendLine('</op>') else: - if FOPT.propTable.has_key(prop.ID): + if prop.ID in FOPT.propTable: recHdl.appendLine('<op name="%s" value="0x%8.8X"/>' % (FOPT.propTable[prop.ID][0], prop.value)) else: recHdl.appendLine('<op name="todo" value="0x%8.8X"/>' % prop.value) @@ -1080,7 +1080,7 @@ class BStoreContainer: def dumpXml(self, recHdl, model, rh): recHdl.appendLine('<bStoreContainer type="OfficeArtBStoreContainer">') - for i in xrange(rh.recInstance): + for i in range(rh.recInstance): bStoreContainerFileBlock = BStoreContainerFileBlock(self) bStoreContainerFileBlock.dumpXml(recHdl, model) recHdl.appendLine('</bStoreContainer>') @@ -1089,7 +1089,7 @@ class SplitMenuColorContainer: def __init__ (self, strm): self.smca = [] # this container contains 4 MSOCR records. - for i in xrange(0, 4): + for i in range(0, 4): msocr = MSOCR(strm) self.smca.append(msocr) @@ -1251,7 +1251,7 @@ class MSODrawHandler(globals.ByteStream): continue self.parent.appendLine(headerLine()) - if recData.has_key(rh.recType): + if rh.recType in recData: obj = recData[rh.recType](self) obj.appendLines(self.parent, rh) else: diff --git a/msodumper/msometa.py b/msodumper/msometa.py index ec62103..6d79ea9 100644 --- a/msodumper/msometa.py +++ b/msodumper/msometa.py @@ -5,8 +5,8 @@ # file, You can obtain one at http://mozilla.org/MPL/2.0/. # -from binarystream import BinaryStream -import globals +from .binarystream import BinaryStream +from . import globals import time @@ -46,9 +46,9 @@ class DocumentSummaryInformationStream(BinaryStream): BinaryStream.__init__(self, bytes, params, "\x05DocumentSummaryInformation", doc=doc) def dump(self): - print '<stream name="\\x05DocumentSummaryInformation" size="%d">' % self.size + print('<stream name="\\x05DocumentSummaryInformation" size="%d">' % self.size) PropertySetStream(self, PIDDSI).dump() - print '</stream>' + print('</stream>') PIDSI = { @@ -79,9 +79,9 @@ class SummaryInformationStream(BinaryStream): BinaryStream.__init__(self, bytes, params, "\x05SummaryInformation", doc=doc) def dump(self): - print '<stream name="\\x05SummaryInformation" size="%d">' % self.size + print('<stream name="\\x05SummaryInformation" size="%d">' % self.size) PropertySetStream(self, PIDSI).dump() - print '</stream>' + print('</stream>') class PropertySetStream(BinaryStream): @@ -92,7 +92,7 @@ class PropertySetStream(BinaryStream): self.propertyIds = PropertyIds def dump(self): - print '<propertySetStream type="PropertySetStream" offset="%s">' % self.pos + print('<propertySetStream type="PropertySetStream" offset="%s">' % self.pos) self.printAndSet("ByteOrder", self.readuInt16()) self.printAndSet("Version", self.readuInt16()) self.printAndSet("SystemIdentifier", self.readuInt32()) @@ -111,7 +111,7 @@ class PropertySetStream(BinaryStream): # The spec says: if NumPropertySets has the value 0x00000002, # FMTID1 must be set to FMTID_UserDefinedProperties. PropertySet(self, self.Offset1, userDefined=True).dump() - print '</propertySetStream>' + print('</propertySetStream>') class PropertySet(BinaryStream): @@ -131,7 +131,7 @@ class PropertySet(BinaryStream): return self.posOrig = self.pos - print '<propertySet type="PropertySet" offset="%s">' % self.pos + print('<propertySet type="PropertySet" offset="%s">' % self.pos) self.printAndSet("Size", self.readuInt32()) self.printAndSet("NumProperties", self.readuInt32()) self.idsAndOffsets = [] @@ -150,7 +150,7 @@ class PropertySet(BinaryStream): typedPropertyValue = TypedPropertyValue(self, i) typedPropertyValue.dump() self.typedPropertyValues.append(typedPropertyValue) - print '</propertySet>' + print('</propertySet>') class PropertyIdentifierAndOffset(BinaryStream): @@ -161,12 +161,13 @@ class PropertyIdentifierAndOffset(BinaryStream): self.pos = parent.pos def dump(self): - print '<propertyIdentifierAndOffset%s type="PropertyIdentifierAndOffset" offset="%s">' % (self.index, self.pos) + print('<propertyIdentifierAndOffset%s type="PropertyIdentifierAndOffset" offset="%s">' % (self.index, self.pos)) self.printAndSet("PropertyIdentifier", self.readuInt32(), dict=self.parent.parent.propertyIds, default="unknown") self.printAndSet("Offset", self.readuInt32()) - print '</propertyIdentifierAndOffset%s>' % self.index + print('</propertyIdentifierAndOffset%s>' % self.index) self.parent.pos = self.pos + PropertyType = { 0x0000: "VT_EMPTY", 0x0001: "VT_NULL", @@ -252,7 +253,7 @@ class DictionaryEntry(BinaryStream): self.index = index def dump(self): - print '<dictionaryEntry offset="%s" index="%s">' % (self.pos, self.index) + print('<dictionaryEntry offset="%s" index="%s">' % (self.pos, self.index)) self.printAndSet("PropertyIdentifier", self.readuInt32()) self.printAndSet("Length", self.readuInt32()) @@ -264,9 +265,12 @@ class DictionaryEntry(BinaryStream): bytes.append(c) # TODO support non-latin1 encoding = "latin1" - print '<Name value="%s"/>' % globals.encodeName("".join(map(lambda c: chr(c), bytes)).decode(encoding), lowOnly=True).encode('utf-8') + if globals.PY3: + print('<Name value="%s"/>' % globals.encodeName(b"".join(map(lambda c: globals.indexedbytetobyte(c), bytes)).decode(encoding), lowOnly=True).encode('utf-8')) + else: + print('<Name value="%s"/>' % globals.encodeName("".join(map(lambda c: chr(c), bytes)).decode(encoding), lowOnly=True).encode('utf-8')) - print '</dictionaryEntry>' + print('</dictionaryEntry>') self.parent.pos = self.pos @@ -280,12 +284,12 @@ class Dictionary(BinaryStream): self.pos = parent.posOrig + parent.idsAndOffsets[index].Offset def dump(self): - print '<dictionary%s type="Dictionary" offset="%s">' % (self.index, self.pos) + print('<dictionary%s type="Dictionary" offset="%s">' % (self.index, self.pos)) self.printAndSet("NumEntries", self.readuInt32()) for i in range(self.NumEntries): dictionaryEntry = DictionaryEntry(self, i) dictionaryEntry.dump() - print '</dictionary%s>' % self.index + print('</dictionary%s>' % self.index) class TypedPropertyValue(BinaryStream): @@ -296,7 +300,7 @@ class TypedPropertyValue(BinaryStream): self.pos = parent.posOrig + parent.idsAndOffsets[index].Offset def dump(self): - print '<typedPropertyValue%s type="TypedPropertyValue" offset="%s">' % (self.index, self.pos) + print('<typedPropertyValue%s type="TypedPropertyValue" offset="%s">' % (self.index, self.pos)) self.printAndSet("Type", self.readuInt16(), dict=PropertyType) self.printAndSet("Padding", self.readuInt16()) if self.Type == 0x0002: # VT_I2 @@ -312,8 +316,8 @@ class TypedPropertyValue(BinaryStream): elif self.Type == 0x101E: # VT_VECTOR | VT_LPSTR VectorHeader(self, "Value", self.parent.getCodePage()).dump() else: - print '<todo what="TypedPropertyValue::dump: unhandled Type %s"/>' % hex(self.Type) - print '</typedPropertyValue%s>' % self.index + print('<todo what="TypedPropertyValue::dump: unhandled Type %s"/>' % hex(self.Type)) + print('</typedPropertyValue%s>' % self.index) class VectorHeader(BinaryStream): @@ -327,11 +331,11 @@ class VectorHeader(BinaryStream): self.codepage = codepage def dump(self): - print '<%s type="VectorHeader">' % self.name + print('<%s type="VectorHeader">' % self.name) self.printAndSet("Length", self.readuInt32()) for dummy in range(self.Length): CodePageString(self, "String", self.codepage).dump() - print '</%s>' % self.name + print('</%s>' % self.name) class CodePageString(BinaryStream): @@ -343,7 +347,7 @@ class CodePageString(BinaryStream): self.codepage = codepage def dump(self): - print '<%s type="CodePageString">' % self.name + print('<%s type="CodePageString">' % self.name) self.printAndSet("Size", self.readuInt32()) bytes = [] for dummy in range(self.Size): @@ -364,10 +368,14 @@ class CodePageString(BinaryStream): # http://msdn.microsoft.com/en-us/library/windows/desktop/dd374130%28v=vs.85%29.aspx encoding = "utf-8" if len(encoding): - print '<Characters value="%s"/>' % globals.encodeName("".join(map(lambda c: chr(c), bytes)).decode(encoding), lowOnly=True).encode('utf-8') + if globals.PY3: + print('<Characters value="%s"/>' % globals.encodeName(b"".join(map(lambda c: globals.indexedbytetobyte(c), bytes)).decode(encoding), lowOnly=True).encode('utf-8')) + else: + # Argh cant use indexedbytetobyte because we actually have ints + print('<Characters value="%s"/>' % globals.encodeName("".join(map(lambda c: chr(c), bytes)).decode(encoding), lowOnly=True).encode('utf-8')) else: - print '<todo what="CodePageString::dump: unhandled codepage %s"/>' % codepage - print '</%s>' % self.name + print('<todo what="CodePageString::dump: unhandled codepage %s"/>' % codepage) + print('</%s>' % self.name) class GUID(BinaryStream): @@ -385,7 +393,7 @@ class GUID(BinaryStream): for dummy in range(8): Data4.append(self.readuInt8()) value = "%08x-%04x-%04x-%02x%02x-%02x%02x%02x%02x%02x%02x" % (Data1, Data2, Data3, Data4[0], Data4[1], Data4[2], Data4[3], Data4[4], Data4[5], Data4[6], Data4[7]) - print '<%s type="GUID" value="%s"/>' % (self.name, value) + print('<%s type="GUID" value="%s"/>' % (self.name, value)) self.parent.pos = self.pos @@ -413,7 +421,7 @@ class FILETIME(OLERecord): pretty = time.strftime("%Y-%m-%dT%H:%M:%SZ", time.localtime(sec)) except ValueError: pretty = "ValueError" - print '<%s type="FILETIME" value="%d" pretty="%s"/>' % (self.name, sec, pretty) + print('<%s type="FILETIME" value="%d" pretty="%s"/>' % (self.name, sec, pretty)) self.parent.pos = self.pos diff --git a/msodumper/node.py b/msodumper/node.py index bab92bd..63a8de8 100644 --- a/msodumper/node.py +++ b/msodumper/node.py @@ -9,6 +9,7 @@ # to avoid making duplicate copies in each of my projects. import sys +from . import globals class NodeType: # unknown node type. @@ -101,7 +102,7 @@ class Element(NodeBase): return text def getAttr (self, name): - if not self.attrs.has_key(name): + if not name in self.attrs: return None return self.attrs[name] @@ -109,54 +110,62 @@ class Element(NodeBase): self.attrs[name] = val def hasAttr (self, name): - return self.attrs.has_key(name) + return name in self.attrs encodeTable = { - '>': 'gt', - '<': 'lt', - '&': 'amp', - '"': 'quot', - '\'': 'apos' + b'>': b'gt', + b'<': b'lt', + b'&': b'amp', + b'"': b'quot', + b'\'': b'apos' } # If utf8 is set, the input is either utf-8 bytes or Python # Unicode. Output utf-8 instead of hex-dump. def encodeString (sin, utf8 = False): - sout = '' + sout = b'' + if type(sin) == type(u""): + sin = sin.encode('UTF-8') if utf8: - if isinstance(sin, unicode): - sout1 = sin.encode('UTF-8') - else: - sout1 = sin # Escape special characters as entities. Can't keep zero bytes either # (bad XML). They can only arrive here if there is a bug somewhere. - for c in sout1: - if ord(c) == 0: - sout += '(nullbyte)' - elif c in encodeTable: - sout += '&' + encodeTable[c] + ';' + for c in sin: + cc = globals.indexedbytetobyte(c) + if c == b'\0'[0]: + sout += b'(nullbyte)' + elif cc in encodeTable: + sout += b'&' + encodeTable[cc] + b';' else: - sout += c + sout += cc else: for c in sin: - if ord(c) >= 128 or ord(c) == 0: + ic = globals.indexedbytetoint(c) + cc = globals.indexedbytetobyte(c) + if ic >= 128 or ic == 0: # encode non-ascii ranges. - sout += "\\x%2.2x"%ord(c) - elif encodeTable.has_key(c): + sout += b"\\x%2.2x"%ic + elif cc in encodeTable: # encode html symbols. - sout += '&' + encodeTable[c] + ';' + sout += b'&' + encodeTable[cc] + b';' else: - sout += c + sout += cc - return sout + return sout.decode('UTF-8') +if globals.PY3: + def isintegertype(val): + return type(val) == int +else: + def isintegertype(val): + return type(val) == type(0) or type(val) == long + def convertAttrValue (val): if type(val) == type(True): if val: val = "true" else: val = "false" - elif type(val) == type(0) or type(val) == type(0L): + elif isintegertype(val): val = "%d"%val elif type(val) == type(0.0): val = "%g"%val @@ -185,8 +194,7 @@ def printNode (fd, node, level, breakLine, utf8 = False): # encoded. line = node.name if len(node.attrs) > 0: - keys = node.attrs.keys() - keys.sort() + keys = sorted(node.attrs.keys()) for key in keys: val = node.attrs[key] if val == None: diff --git a/msodumper/ole.py b/msodumper/ole.py index 16f3cb9..8f3d4c5 100644 --- a/msodumper/ole.py +++ b/msodumper/ole.py @@ -4,17 +4,15 @@ # License, v. 2.0. If a copy of the MPL was not distributed with this # file, You can obtain one at http://mozilla.org/MPL/2.0/. # - +from builtins import range import sys -import globals -from globals import getSignedInt +from . import globals +from .globals import getSignedInt, output # ---------------------------------------------------------------------------- # Reference: The Microsoft Compound Document File Format by Daniel Rentz # http://sc.openoffice.org/compdocfileformat.pdf # ---------------------------------------------------------------------------- -from globals import output - class NoRootStorage(Exception): pass @@ -37,7 +35,7 @@ class Header(object): @staticmethod def byteOrder (chars): - b1, b2 = ord(chars[0]), ord(chars[1]) + b1, b2 = globals.indexbytes(chars,0), globals.indexbytes(chars, 1) if b1 == 0xFE and b2 == 0xFF: return ByteOrder.LittleEndian elif b1 == 0xFF and b2 == 0xFE: @@ -90,10 +88,16 @@ class Header(object): def output (self): - def printRawBytes (bytes): - for b in bytes: - output("%2.2X "%ord(b)) - output("\n") + if globals.PY3: + def printRawBytes (bytes): + for b in bytes: + output("%2.2X "%b) + output("\n") + else: + def printRawBytes (bytes): + for b in bytes: + output("%2.2X "%ord(b)) + output("\n") def printSep (c, w, prefix=''): globals.outputln(prefix + c*w) @@ -191,7 +195,7 @@ class Header(object): # First part of MSAT consisting of an array of up to 109 sector IDs. # Each sector ID is 4 bytes in length. - for i in xrange(0, 109): + for i in range(0, 109): pos = 76 + i*4 id = getSignedInt(self.bytes[pos:pos+4]) if id == -1: @@ -210,7 +214,7 @@ class Header(object): pos = 512 + secID*size bytes = self.bytes[pos:pos+size] n = int(size/4) - for i in xrange(0, n): + for i in range(0, n): pos = i*4 id = getSignedInt(bytes[pos:pos+4]) if id < 0: @@ -342,14 +346,14 @@ class SAT(object): self.array = [] for secID in self.sectorIDs: pos = 512 + secID*self.sectorSize - for i in xrange(0, numItems): + for i in range(0, numItems): beginPos = pos + i*4 id = getSignedInt(self.bytes[beginPos:beginPos+4]) self.array.append(id) def outputRawBytes (self): - bytes = "" + bytes = b"" for secID in self.sectorIDs: pos = 512 + secID*self.sectorSize bytes += self.bytes[pos:pos+self.sectorSize] @@ -365,7 +369,7 @@ class SAT(object): sectorM4 = 0 # -4 sectorMElse = 0 # < -4 sectorLiveTotal = 0 - for i in xrange(0, len(self.array)): + for i in range(0, len(self.array)): item = self.array[i] if item >= 0: sectorP += 1 @@ -399,7 +403,7 @@ class SAT(object): if self.params.debug: self.outputRawBytes() globals.outputln("-"*globals.OutputWidth) - for i in xrange(0, len(self.array)): + for i in range(0, len(self.array)): globals.outputln("%5d: %5d"%(i, self.array[i])) globals.outputln("-"*globals.OutputWidth) @@ -440,7 +444,7 @@ sectors are contained in the SAT as a sector ID chain. if self.params.debug: self.outputRawBytes() globals.outputln("-"*globals.OutputWidth) - for i in xrange(0, len(self.array)): + for i in range(0, len(self.array)): item = self.array[i] output("%3d : %3d\n"%(i, item)) @@ -498,7 +502,7 @@ entire file stream. self.SSAT = header.getSSAT() self.header = header self.RootStorage = None - self.RootStorageBytes = "" + self.RootStorageBytes = b"" self.params = params @@ -526,7 +530,7 @@ entire file stream. if self.RootStorage == None: raise NoRootStorage - bytes = "" + bytes = b"" self.__buildRootStorageBytes() size = self.header.getShortSectorSize() for id in chain: @@ -536,7 +540,7 @@ entire file stream. offset = 512 size = self.header.getSectorSize() - bytes = "" + bytes = b"" for id in chain: pos = offset + id*size bytes += self.header.bytes[pos:pos+size] @@ -548,7 +552,7 @@ entire file stream. return bytes def getRawStreamByName (self, name): - bytes = [] + bytes = b'' for entry in self.entries: if entry.Name == name: bytes = self.__getRawStream(entry) @@ -584,11 +588,13 @@ entire file stream. def __outputEntry (self, entry, debug): globals.outputln("-"*globals.OutputWidth) - if len(entry.Name) > 0: + if len(entry.Name) > 0 and globals.indexbytes(entry.Name, 0) != 0: name = entry.Name - if ord(name[0]) <= 5: - name = "<%2.2Xh>%s"%(ord(name[0]), name[1:]) - globals.outputln("name: %s (name buffer size: %d bytes)"%(name, entry.CharBufferSize)) + # entry.Name : utf-8 bytes + if globals.indexbytes(name, 0) <= 5: + name = b"<%2.2Xh>%s"%(globals.indexbytes(name, 0), name[1:]) + sname = globals.nulltrunc(name).decode('utf-8') + globals.outputln("name: %s (name buffer size: %d bytes)"%(sname, entry.CharBufferSize)) else: globals.outputln("name: [empty] (name buffer size: %d bytes)"%entry.CharBufferSize) @@ -680,8 +686,13 @@ entire file stream. return output("%s: "%name) - for byte in bytes: - output("%2.2X "%ord(byte)) + if globals.PY3: + for byte in bytes: + output("%2.2X "%byte) + else: + for byte in bytes: + output("%2.2X "%ord(byte)) + globals.outputln("") def getDirectoryEntries (self): @@ -700,7 +711,7 @@ entire file stream. return # combine all sectors first. - bytes = "" + bytes = b"" for secID in self.sectorIDs: pos = globals.getSectorPos(secID, self.sectorSize) bytes += self.bytes[pos:pos+self.sectorSize] @@ -711,7 +722,7 @@ entire file stream. numEntries = int(len(bytes)/128) if numEntries == 0: return - for i in xrange(0, numEntries): + for i in range(0, numEntries): pos = i*128 self.entries.append(self.parseDirEntry(bytes[pos:pos+128])) diff --git a/msodumper/olestream.py b/msodumper/olestream.py index 9148d70..dc0b4dc 100644 --- a/msodumper/olestream.py +++ b/msodumper/olestream.py @@ -6,7 +6,7 @@ # import sys -import globals +from . import globals class CompObjStreamError(Exception): pass @@ -76,11 +76,11 @@ class CompObjStream(object): # LengthPrefixedAnsiString length = self.strm.readUnsignedInt(4) displayName = self.strm.readBytes(length) - if ord(displayName[-1]) != 0x00: + if globals.indexbytes(displayName, -1) != 0x00: # must be null-terminated. raise CompObjStreamError() - globals.outputln("display name: " + displayName[:-1]) + globals.outputln("display name: " + globals.encodeName(displayName[:-1])) # ClipboardFormatOrAnsiString marker = self.strm.readUnsignedInt(4) @@ -92,7 +92,7 @@ class CompObjStream(object): globals.outputln("clipboard format ID: %d"%clipFormatID) else: clipName = self.strm.readBytes(marker) - if ord(clipName[-1]) != 0x00: + if globals.indexbytes(clipName, -1) != 0x00: # must be null-terminated. raise CompObjStreamError() globals.outputln("clipboard format name: %s"%clipName[:-1]) @@ -104,7 +104,7 @@ class CompObjStream(object): raise CompObjStreamError() reserved = self.strm.readBytes(length) - if ord(reserved[-1]) != 0x00: + if globals.indexbytes(reserved, -1) != 0x00: # must be null-terminated. raise CompObjStreamError() @@ -129,7 +129,7 @@ class CompObjStream(object): globals.outputln("clipboard format ID: %d"%clipFormatID) else: clipName = globals.getUTF8FromUTF16(self.strm.readBytes(marker*2)) - if ord(clipName[-1]) != 0x00: + if globals.indexbytes(clipName, -1) != 0x00: # must be null-terminated. raise CompObjStreamError() globals.outputln("clipboard format name: %s"%clipName[:-1]) diff --git a/msodumper/oletool.py b/msodumper/oletool.py index dfab178..e4e03ce 100755 --- a/msodumper/oletool.py +++ b/msodumper/oletool.py @@ -8,7 +8,7 @@ import sys, os.path, optparse sys.path.append(sys.path[0]+"/src") -import ole, globals +from . import ole, globals def main (): diff --git a/msodumper/pptrecord.py b/msodumper/pptrecord.py index 48cc5a2..6907166 100644 --- a/msodumper/pptrecord.py +++ b/msodumper/pptrecord.py @@ -4,8 +4,8 @@ # License, v. 2.0. If a copy of the MPL was not distributed with this # file, You can obtain one at http://mozilla.org/MPL/2.0/. # - -import globals +from builtins import range +from . import globals # ------------------------------------------------------------------- # record handler classes @@ -51,7 +51,7 @@ append a line to be displayed. # can't keep the text local and let output() behave according # to params. Have to use a global if we want to keep a minimal # modification - globals.textdump += text + "\n" + globals.textdump += text + b"\n" def appendLine (self, line): self.lines.append(line) @@ -123,7 +123,7 @@ class String(BaseRecordHandler): # chars, with the high byte ignored. Only latin1 could stand # this transformation. name = name.decode('cp1252').encode('UTF-8') - self.appendLine("text: '%s'"%name) + self.appendLine("text: '%s'"%name.decode('UTF-8')) self.appendText(name) def ShapeString (*args): @@ -136,7 +136,7 @@ class UniString(BaseRecordHandler): def parseBytes (self): name = globals.getUTF8FromUTF16(globals.getTextBytes(self.readRemainingBytes())) self.appendProperty(name) - self.appendLine("text: '%s'"%name) + self.appendLine("text: '%s'"%name.decode('UTF-8')) self.appendText(name) def ShapeUniString (*args): @@ -163,7 +163,7 @@ class FontEntity(BaseRecordHandler): flags = self.readUnsignedInt(1) fontType = self.readUnsignedInt(1) pitchAndFamily = self.readUnsignedInt(1) - self.appendLine("Font: name=\"%s\" charset=%d flags=0x%x type=%d family=%d"%(faceName, charSet, flags, fontType, pitchAndFamily)) + self.appendLine("Font: name=\"%s\" charset=%d flags=0x%x type=%d family=%d"%(globals.nulltrunc(faceName).decode('cp1252'), charSet, flags, fontType, pitchAndFamily)) # ------------------------------------------------------------------- # special record handler: properties @@ -177,7 +177,7 @@ class Property(BaseRecordHandler): allComplexBytes = self.bytes[self.pos+self.recordInstance*6:] # recordInstance gives number of properties - for i in xrange(0, self.recordInstance): + for i in range(0, self.recordInstance): propType = self.readUnsignedInt(2) propValue = self.readUnsignedInt(4) @@ -521,7 +521,7 @@ class AnimationInfo(BaseRecordHandler): try: # can fail with index out of range self.appendLine("build type: %s"%buildDesc[buildType]) - except Exception, err: + except Exception as err: error("AnimationInfo::parsebytes: %s: %s" % (str(buildType),str(err))) flyDesc = ["none","random","blinds","checker","cover","dissolve", @@ -596,7 +596,7 @@ class AnimAttributeValue(BaseRecordHandler): def handleString (self): value = globals.getUTF8FromUTF16(globals.getTextBytes(self.readRemainingBytes())) - self.appendLine("text value: '%s'"%value) + self.appendLine("text value: '%s'"%value.decode('UTF-8')) valueHandlers=[handleByte,handleLong,handleFloat,handleString] @@ -728,7 +728,7 @@ class TextRulerAtom(BaseRecordHandler): if rulerMask & 0x0004: numTabStops = self.readUnsignedInt(2) - for i in xrange(0, numTabStops): + for i in range(0, numTabStops): tabDistance = self.readUnsignedInt(2) tabAlignment = self.readUnsignedInt(2) self.appendParaProp("para tab stop %d: distance %d, align %4.4Xh"%(i, tabDistance, tabAlignment)) @@ -879,7 +879,7 @@ class TextStyles(BaseRecordHandler): if styleMask & 0x100000: numTabStops = self.readUnsignedInt(2) - for i in xrange(0, numTabStops): + for i in range(0, numTabStops): tabDistance = self.readUnsignedInt(2) tabAlignment = self.readUnsignedInt(2) self.appendParaProp("para tab stop %d: distance %d, align %4.4Xh"%(i, tabDistance, tabAlignment)) @@ -947,7 +947,7 @@ class MasterTextStyles(TextStyles): # entry misses the indent specifier it has for StyleTextAtom. numLevels = self.readUnsignedInt(2) - for i in xrange(0, numLevels): + for i in range(0, numLevels): self.appendLine("para props for indent level: %d"%i) self.parseParaStyle() self.appendLine("-"*61) @@ -989,7 +989,7 @@ class BoolPropertyHandler(BasePropertyHandler): def output (self): bitMask = 1 - for i in xrange(self.propType, self.propType-32, -1): + for i in range(self.propType, self.propType-32, -1): if i in propData: propEntry = propData[i] if propEntry[1] == BoolPropertyHandler: @@ -1024,7 +1024,7 @@ class MsoArrayPropertyHandler(BasePropertyHandler): dummy = self.readUnsignedInt(2) elementSize = self.readUnsignedInt(2) self.printer("%4.4Xh: %s: [\"%s\"]"%(self.propType, self.propEntry[0], self.propEntry[2])) - for i in xrange(0, numElements): + for i in range(0, numElements): if elementSize in [0,1,2,4]: currElem = self.readUnsignedInt(elementSize) self.printer("%4.4Xh: %d = %Xh"%(self.propType,i,currElem)) @@ -1038,7 +1038,7 @@ class UniCharPropertyHandler(BasePropertyHandler): def output (self): if self.isComplex: name = globals.getUTF8FromUTF16(globals.getTextBytes(self.bytes)) - self.printer("%4.4Xh: %s = %s: [\"%s\"]"%(self.propType, self.propEntry[0], name, self.propEntry[2])) + self.printer("%4.4Xh: %s = %s: [\"%s\"]"%(self.propType, self.propEntry[0], name.decode('UTF-8'), self.propEntry[2])) class FixedPointHandler(BasePropertyHandler): """FixedPoint property.""" diff --git a/msodumper/pptstream.py b/msodumper/pptstream.py index c3d8115..f1f08e4 100644 --- a/msodumper/pptstream.py +++ b/msodumper/pptstream.py @@ -4,10 +4,10 @@ # License, v. 2.0. If a copy of the MPL was not distributed with this # file, You can obtain one at http://mozilla.org/MPL/2.0/. # - +from builtins import range import sys -import ole, globals, pptrecord -from globals import output +from . import ole, globals, pptrecord +from .globals import output class EndOfStream(Exception): pass @@ -134,10 +134,10 @@ class PPTDirStream(object): return size = len(bytes) self.__printSep('-', 61, "%4.4Xh: "%recordType, recordType = recordType) - for i in xrange(0, size): + for i in range(0, size): if (i+1) % 16 == 1: output(self.prefix + "%4.4Xh: "%recordType, recordType = recordType) - output("%2.2X "%ord(bytes[i]), recordType = recordType) + output("%2.2X "%globals.indexbytes(bytes, i), recordType = recordType) if (i+1) % 16 == 0 and i != size-1: globals.outputln("", recordType = recordType) if size > 0: @@ -188,7 +188,7 @@ class PPTDirStream(object): properties["CString"] = '' def isPPT10SpecialData (self): - return "CString" in self.properties and self.properties["CString"] == "___PPT10" + return "CString" in self.properties and self.properties["CString"] == b"___PPT10" def handlePPT10BinaryTags (self, bytes, recordInfo): subSubStrm = PPTDirStream(bytes, self.params, self.prefix+" ", recordInfo) diff --git a/msodumper/vbahelper.py b/msodumper/vbahelper.py index f36b736..42c9c99 100644 --- a/msodumper/vbahelper.py +++ b/msodumper/vbahelper.py @@ -4,7 +4,7 @@ # License, v. 2.0. If a copy of the MPL was not distributed with this # file, You can obtain one at http://mozilla.org/MPL/2.0/. # - +from builtins import range import sys, struct class VBAStreamBase: @@ -44,13 +44,13 @@ class UnCompressedVBAStream(VBAStreamBase): if self.DecompressedBufferEnd < lastByte: lastByte = self.DecompressedBufferEnd - for index in xrange( self.DecompressedChunkStart, lastByte ): + for index in range( self.DecompressedChunkStart, lastByte ): self.CompressedContainer[ self.CompressedCurrent ] = self.chars[ index ] self.CompressedCurrent = self.CompressedCurrent + 1 self.DecompressedCurrent = self.DecompressedCurrent + 1 padCount = padCount - 1 - for index in xrange( 0, padCount ): + for index in range( 0, padCount ): self.CompressedContainer[ self.CompressedCurrent ] = 0x0; self.CompressedCurrent = self.CompressedCurrent + 1 @@ -109,7 +109,7 @@ class UnCompressedVBAStream(VBAStreamBase): flagByteIndex = self.CompressedCurrent tokenFlags = 0 self.CompressedCurrent = self.CompressedCurrent + 1 - for index in xrange(0,8): + for index in range(0,8): if ( ( self.DecompressedCurrent < decompressedEnd ) and (self.CompressedCurrent < compressedEnd) ): @@ -170,7 +170,7 @@ class UnCompressedVBAStream(VBAStreamBase): class CompressedVBAStream(VBAStreamBase): def __decompressRawChunk (self): - for i in xrange(0,self.CHUNKSIZE): + for i in range(0,self.CHUNKSIZE): self.DecompressedChunk[ self.DecompressedCurrent + i ] = self.chars[self.CompressedCurrent + i ] self.CompressedCurrent += self.CHUNKSIZE self.DecompressedCurrent += self.CHUNKSIZE @@ -187,7 +187,7 @@ class CompressedVBAStream(VBAStreamBase): destSize = len( self.DecompressedChunk ) srcCurrent = srcOffSet dstCurrent = dstOffSet - for i in xrange( 0, length ): + for i in range( 0, length ): self.DecompressedChunk[ dstCurrent ] = self.DecompressedChunk[ srcCurrent ] srcCurrent +=1 dstCurrent +=1 @@ -210,7 +210,7 @@ class CompressedVBAStream(VBAStreamBase): flagByte = struct.unpack("b", self.chars[self.CompressedCurrent ])[0] self.CompressedCurrent += 1 if self.CompressedCurrent < self.CompressedEnd: - for i in xrange(0,8): + for i in range(0,8): if self.CompressedCurrent < self.CompressedEnd: self.__decompressToken(i,flagByte) diff --git a/msodumper/vsdstream.py b/msodumper/vsdstream.py index 1ab6482..b4653ec 100644 --- a/msodumper/vsdstream.py +++ b/msodumper/vsdstream.py @@ -5,10 +5,10 @@ # file, You can obtain one at http://mozilla.org/MPL/2.0/. # -import ole -from binarystream import BinaryStream -from msometa import SummaryInformationStream -from msometa import DocumentSummaryInformationStream +from . import ole +from .binarystream import BinaryStream +from .msometa import SummaryInformationStream +from .msometa import DocumentSummaryInformationStream class VSDFile: diff --git a/msodumper/wmfrecord.py b/msodumper/wmfrecord.py index d318bc4..85a8bac 100644 --- a/msodumper/wmfrecord.py +++ b/msodumper/wmfrecord.py @@ -5,7 +5,7 @@ # file, You can obtain one at http://mozilla.org/MPL/2.0/. # -from binarystream import BinaryStream +from .binarystream import BinaryStream # The BrushStyle Enumeration specifies the different possible brush types that can be used in graphics operations. @@ -313,12 +313,12 @@ class RectL(WMFRecord): self.name = "rectL" def dump(self): - print '<%s type="RectL">' % self.name + print('<%s type="RectL">' % self.name) self.printAndSet("Left", self.readInt32(), hexdump=False) self.printAndSet("Top", self.readInt32(), hexdump=False) self.printAndSet("Right", self.readInt32(), hexdump=False) self.printAndSet("Bottom", self.readInt32(), hexdump=False) - print '</%s>' % self.name + print('</%s>' % self.name) self.parent.pos = self.pos @@ -332,10 +332,10 @@ class SizeL(WMFRecord): self.name = "sizeL" def dump(self): - print '<%s type="SizeL">' % self.name + print('<%s type="SizeL">' % self.name) self.printAndSet("cx", self.readuInt32(), hexdump=False) self.printAndSet("cy", self.readuInt32(), hexdump=False) - print '</%s>' % self.name + print('</%s>' % self.name) self.parent.pos = self.pos @@ -349,10 +349,10 @@ class PointL(WMFRecord): self.name = "pointL" def dump(self): - print '<%s type="PointL">' % self.name + print('<%s type="PointL">' % self.name) self.printAndSet("x", self.readInt32(), hexdump=False) self.printAndSet("y", self.readInt32(), hexdump=False) - print '</%s>' % self.name + print('</%s>' % self.name) self.parent.pos = self.pos @@ -363,10 +363,10 @@ class PointS(WMFRecord): self.name = name def dump(self): - print '<%s type="PointS">' % self.name + print('<%s type="PointS">' % self.name) self.printAndSet("x", self.readInt16(), hexdump=False) self.printAndSet("y", self.readInt16(), hexdump=False) - print '</%s>' % self.name + print('</%s>' % self.name) self.parent.pos = self.pos @@ -377,12 +377,12 @@ class ColorRef(WMFRecord): self.name = name def dump(self): - print '<%s type="ColorRef">' % self.name + print('<%s type="ColorRef">' % self.name) self.printAndSet("Red", self.readuInt8(), hexdump=False) self.printAndSet("Green", self.readuInt8(), hexdump=False) self.printAndSet("Blue", self.readuInt8(), hexdump=False) self.printAndSet("Reserved", self.readuInt8(), hexdump=False) - print '</%s>' % self.name + print('</%s>' % self.name) self.parent.pos = self.pos # vim:set filetype=python shiftwidth=4 softtabstop=4 expandtab: diff --git a/msodumper/xlsmodel.py b/msodumper/xlsmodel.py index 8332e32..b8c7c15 100644 --- a/msodumper/xlsmodel.py +++ b/msodumper/xlsmodel.py @@ -4,8 +4,8 @@ # License, v. 2.0. If a copy of the MPL was not distributed with this # file, You can obtain one at http://mozilla.org/MPL/2.0/. # - -import globals, node, formula +from builtins import range +from . import globals, node, formula class ModelType: @@ -48,7 +48,7 @@ class Workbook(ModelBase): return self.__sheets[-1] def getWorkbookGlobal (self): - return filter(lambda x: isinstance(x, WorkbookGlobal), self.__sheets)[0] + return list(filter(lambda x: isinstance(x, WorkbookGlobal), self.__sheets))[0] def getCurrentSheet (self): return self.__sheets[-1] @@ -56,7 +56,7 @@ class Workbook(ModelBase): def createDOM (self): nd = node.Element('workbook') nd.setAttr('encrypted', self.encrypted) - sheets = filter(lambda x: isinstance(x, Worksheet), self.__sheets) + sheets = list(filter(lambda x: isinstance(x, Worksheet), self.__sheets)) n = len(sheets) if n == 0: return nd @@ -132,14 +132,13 @@ To store external reference cache from XCT/CRN records.""" self.__rows = {} def setValue (self, row, col, celltype, val): - if not self.__rows.has_key(row): + if not row in self.__rows: self.__rows[row] = {} self.__rows[row][col] = (celltype, val) def createDOM (self, wb): nd = node.Element("sheet") - rows = self.__rows.keys() - rows.sort() + rows = sorted(self.__rows.keys()) for row in rows: rowElem = nd.appendElement("row") rowElem.setAttr("id", row) @@ -257,7 +256,7 @@ class WorkbookGlobal(SheetBase): self.__dbRanges[sheetID] = tokens def getFilterRange (self, sheetID): - if not self.__dbRanges.has_key(sheetID): + if not sheetID in self.__dbRanges: return None return self.__dbRanges[sheetID] @@ -325,7 +324,7 @@ class Worksheet(SheetBase): def setAutoFilterArrowSize (self, arrowSize): arrows = [] - for i in xrange(0, arrowSize): + for i in range(0, arrowSize): arrows.append(None) # Swap with the new and empty list. @@ -335,7 +334,7 @@ class Worksheet(SheetBase): self.__autoFilterArrows[filterID] = obj def setCell (self, col, row, cell): - if not self.__rows.has_key(row): + if not row in self.__rows: self.__rows[row] = {} self.__rows[row][col] = cell @@ -364,8 +363,7 @@ class Worksheet(SheetBase): nd.setAttr('version', self.version) # cells - rows = self.__rows.keys() - rows.sort() + rows = sorted(self.__rows.keys()) for row in rows: rowNode = nd.appendElement('row') rowNode.setAttr('id', row) @@ -437,7 +435,7 @@ class Worksheet(SheetBase): elem = baseNode.appendElement('autofilter') elem.setAttr('range', cellRange.getName()) - for i in xrange(0, len(self.__autoFilterArrows)): + for i in range(0, len(self.__autoFilterArrows)): arrowObj = self.__autoFilterArrows[i] if arrowObj == None: arrow = elem.appendElement('arrow') diff --git a/msodumper/xlsparser.py b/msodumper/xlsparser.py index 0ae9e0c..aca4fac 100644 --- a/msodumper/xlsparser.py +++ b/msodumper/xlsparser.py @@ -5,7 +5,7 @@ # file, You can obtain one at http://mozilla.org/MPL/2.0/. # -import xlsrecord +from . import xlsrecord ############## Common parsers ########################################## @@ -47,7 +47,7 @@ class BaseParser(object): return Seq(self, other) def safeParse(parser, stream): - #print "TRACE:[%s,%s]" % (str(parser), str(stream.tokens[stream.currentIndex])) + #print("TRACE:[%s,%s]" % (str(parser), str(stream.tokens[stream.currentIndex]))) parsed = None try: @@ -710,7 +710,7 @@ class XlsParser(BaseParser): parsed = parser.parse(stream) # skipping the unknown stream parsedList.append(parsed) except ParseException: - print ("Parse failed, previous token is [%s], next tokens are [%s]" % (stream.tokens[stream.currentIndex-1], + print("Parse failed, previous token is [%s], next tokens are [%s]" % (stream.tokens[stream.currentIndex-1], ','.join(map(str,stream.tokens[stream.currentIndex:stream.currentIndex+5])))) raise return parsedList diff --git a/msodumper/xlsrecord.py b/msodumper/xlsrecord.py index 1103e2c..f911f4a 100644 --- a/msodumper/xlsrecord.py +++ b/msodumper/xlsrecord.py @@ -4,11 +4,11 @@ # License, v. 2.0. If a copy of the MPL was not distributed with this # file, You can obtain one at http://mozilla.org/MPL/2.0/. # - +from builtins import range import struct, sys -import globals, formula, xlsmodel, msodraw +from . import globals, formula, xlsmodel, msodraw -from globals import debug +from .globals import debug class RecordError(Exception): pass @@ -371,7 +371,7 @@ class XLStream(globals.ByteStream): cbExtRst = self.readSignedInt(4) # byte count of ExtRst if fHighByte: - rgb = unicode(self.readBytes(2*cch), 'UTF-16LE', errors='replace') + rgb = self.readBytes(2*cch).decode('UTF-16LE', errors='replace') elif globals.params.utf8: # Compressed Unicode-> latin1 rgb = self.readBytes(cch).decode('cp1252') @@ -438,21 +438,23 @@ Like parseBytes(), the derived classes must overwrite this method.""" def output (self): headerStr = self.__getHeaderStr() - print (headerStr + "-"*(globals.OutputWidth-len(headerStr))) + globals.outputln(headerStr + "-"*(globals.OutputWidth-len(headerStr))) try: self.parseBytes() for line in self.lines: + if type(line) == type(u''): + line = line.encode('utf-8') try: - print (headerStr + line) + globals.outputln(headerStr.encode('ascii') + line) except: if not globals.params.catchExceptions: raise - print (headerStr + "(xlsrecord:unprintable)") + globals.outputln(headerStr + "(xlsrecord:unprintable)") except globals.ByteStreamError: - print(headerStr + "Error interpreting the record!") + globals.outputln(headerStr + "Error interpreting the record!") def debug (self, msg): - print ("%4.4Xh: %s"%(self.header, msg)) + globals.outputln("%4.4Xh: %s"%(self.header, msg)) def appendLine (self, line): self.lines.append(line) @@ -670,7 +672,7 @@ class Autofilter(BaseRecordHandler): self.top10 = (flag & 0x0010) # top 10 autofilter self.top = (flag & 0x0020) # 1 = top 10 filter shows the top item, 0 = shows the bottom item self.percent = (flag & 0x0040) # 1 = top 10 shows percentage, 0 = shows items - self.itemCount = (flag & 0xFF80) / (2*7) + self.itemCount = (flag & 0xFF80) // (2*7) self.doper1 = self.__readDoper() self.doper2 = self.__readDoper() @@ -742,7 +744,7 @@ class BOF(BaseRecordHandler): } def getBuildIdName (self, value): - if BOF.buildId.has_key(value): + if value in BOF.buildId: return BOF.buildId[value] else: return '(unknown)' @@ -845,14 +847,14 @@ class BoundSheet(BaseRecordHandler): @staticmethod def getHiddenState (flag): - if BoundSheet.hiddenStates.has_key(flag): + if flag in BoundSheet.hiddenStates: return BoundSheet.hiddenStates[flag] else: return 'unknown' @staticmethod def getSheetType (flag): - if BoundSheet.sheetTypes.has_key(flag): + if flag in BoundSheet.sheetTypes: return BoundSheet.sheetTypes[flag] else: return 'unknown' @@ -943,12 +945,12 @@ class CondFmt(BaseRecordHandler): self.cfCount = self.readUnsignedInt(2) tmp = self.readUnsignedInt(2) self.toughRecalc = (tmp & 0x01) != 0 - self.recordID = (tmp & 0xFE) / 2 + self.recordID = (tmp & 0xFE) // 2 self.refBound = Ref8U(self) hitRangeCount = self.readUnsignedInt(2) self.hitRanges = [] - for i in xrange(0, hitRangeCount): + for i in range(0, hitRangeCount): self.hitRanges.append(Ref8U(self)) def parseBytes (self): @@ -1050,14 +1052,14 @@ class Dv(BaseRecordHandler): def __parseBytes (self): bits = self.readUnsignedInt(4) self.valType = (bits & 0x0000000F) - self.errStyle = (bits & 0x00000070) / (2**4) + self.errStyle = (bits & 0x00000070) // (2**4) self.strLookup = (bits & 0x00000080) != 0 self.allowBlank = (bits & 0x00000100) != 0 self.noDropDown = (bits & 0x00000200) != 0 - self.imeMode = (bits & 0x0003FC00) / (2**10) # take 8 bits and shift by 10 bits + self.imeMode = (bits & 0x0003FC00) // (2**10) # take 8 bits and shift by 10 bits self.showInputMsg = (bits & 0x00040000) != 0 self.showErrorMsg = (bits & 0x00080000) != 0 - self.operator = (bits & 0x00F00000) / (2**20) + self.operator = (bits & 0x00F00000) // (2**20) self.promptTitle = self.readUnicodeString() self.errorTitle = self.readUnicodeString() @@ -1084,7 +1086,7 @@ class Dv(BaseRecordHandler): rangeCount = self.readUnsignedInt(2) self.ranges = [] - for i in xrange(0, rangeCount): + for i in range(0, rangeCount): obj = formula.CellRange() obj.firstRow = self.readUnsignedInt(2) obj.lastRow = self.readUnsignedInt(2) @@ -1230,7 +1232,7 @@ class Format(BaseRecordHandler): def parseBytes (self): self.__parseBytes() self.appendLine("index: %d"%self.numfmtID) - self.appendLine("code: %s"%globals.encodeName(self.code)) + self.appendLine("code: %s"%self.code) class Formula(BaseRecordHandler): @@ -1287,7 +1289,7 @@ class HorBreaks(BaseRecordHandler): def __parseBytes (self): self.count = self.readUnsignedInt(2) self.breaks = [] - for i in xrange(0, self.count): + for i in range(0, self.count): row = self.readUnsignedInt(2) col1 = self.readUnsignedInt(2) col2 = self.readUnsignedInt(2) @@ -1296,7 +1298,7 @@ class HorBreaks(BaseRecordHandler): def parseBytes (self): self.__parseBytes() self.appendLine("count: %d"%self.count) - for i in xrange(0, self.count): + for i in range(0, self.count): self.appendLine("break: (row: %d; colums: %d-%d)"%self.breaks[i]) @@ -1377,8 +1379,8 @@ class MulRK(BaseRecordHandler): self.row = self.readUnsignedInt(2) self.col1 = self.readUnsignedInt(2) self.rkrecs = [] - rkCount = (self.getSize() - self.getCurrentPos() - 2) / 6 - for i in xrange(0, rkCount): + rkCount = (self.getSize() - self.getCurrentPos() - 2) // 6 + for i in range(0, rkCount): rec = MulRK.RKRec() rec.xfIdx = self.readUnsignedInt(2) rec.number = self.readUnsignedInt(4) @@ -1398,7 +1400,7 @@ class MulRK(BaseRecordHandler): self.__parseBytes() sheet = model.getCurrentSheet() n = len(self.rkrecs) - for i in xrange(0, n): + for i in range(0, n): rkrec = self.rkrecs[i] col = self.col1 + i cell = xlsmodel.NumberCell(decodeRK(rkrec.number)) @@ -1664,7 +1666,7 @@ class Scl(BaseRecordHandler): self.__parseBytes() val = 0.0 # force the value to be treated as double precision. val += self.numerator - val /= self.denominator + val //= self.denominator self.appendLine("zoom level: %g"%val) def dumpData(self): @@ -1742,7 +1744,7 @@ class SST(BaseRecordHandler): self.refCount = self.readSignedInt(4) # total number of references in workbook self.strCount = self.readSignedInt(4) # total number of unique strings. self.sharedStrings = [] - for i in xrange(0, self.strCount): + for i in range(0, self.strCount): extText, bytesRead = globals.getUnicodeRichExtText(self.bytes, self.getCurrentPos(), self.roflist) self.readBytes(bytesRead) # advance current position. self.sharedStrings.append(extText) @@ -1753,7 +1755,7 @@ class SST(BaseRecordHandler): self.appendLine("total number of unique strings: %d"%self.strCount) i = 0 for s in self.sharedStrings: - self.appendLine("s%d: %s"%(i, globals.encodeName(s.baseText))) + self.appendLine("s%d: %s"%(i, s.baseText)) i += 1 def fillModel (self, model): @@ -1974,7 +1976,7 @@ class Name(BaseRecordHandler): self.isMacroName = (flag & 0x0008) != 0 self.isComplFormula = (flag & 0x0010) != 0 self.isBuiltinName = (flag & 0x0020) != 0 - self.funcGrp = (flag & 0x0FC0) / 64 + self.funcGrp = (flag & 0x0FC0) // 64 reserved = (flag & 0x1000) != 0 self.isPublished = (flag & 0x2000) != 0 self.isWorkbookParam = (flag & 0x4000) != 0 @@ -2004,7 +2006,7 @@ class Name(BaseRecordHandler): def parseBytes (self): self.__parseBytes() - self.appendLine("name: %s"%globals.encodeName(self.name)) + self.appendLine("name: %s"%self.name) # is this name global or sheet-local? s = "global or local: " @@ -2117,7 +2119,7 @@ class ExternSheet(BaseRecordHandler): def __parseBytes (self): self.sheets = [] num = self.readUnsignedInt(2) - for i in xrange(0, num): + for i in range(0, num): book = self.readUnsignedInt(2) sheet1 = self.readUnsignedInt(2) sheet2 = self.readUnsignedInt(2) @@ -2156,7 +2158,7 @@ class ExternName(BaseRecordHandler): self.lastRow = self.strm.readUnsignedInt(2) self.values = [] n = (self.lastCol+1)*(self.lastRow+1) - for i in xrange(0, n): + for i in range(0, n): # parse each value oc = self.strm.readUnsignedInt(1) if oc == 0x01: @@ -2194,7 +2196,7 @@ class ExternName(BaseRecordHandler): hdl.appendLine("value: %d (boolean)"%value) elif type(value) == type(1): # error code stored as an integer. - if ExternName.MOper.Errors.has_key(value): + if value in ExternName.MOper.Errors: hdl.appendLine("value: %s"%ExternName.MOper.Errors[value]) else: hdl.appendLine("value: 0x%2.2X (unknown error)"%value) @@ -2211,7 +2213,7 @@ class ExternName(BaseRecordHandler): self.isOLELink = (flag & 0x0010) != 0 # 5 - 14 bits stores last successful clip format - self.clipFormat = (flag & 0x7FE0) / 2**5 + self.clipFormat = (flag & 0x7FE0) // 2**5 self.displayAsIcon = (flag & 0x8000) != 0 @@ -2298,7 +2300,7 @@ class Crn(BaseRecordHandler): self.firstCol = self.readUnsignedInt(1) self.rowIndex = self.readUnsignedInt(2) self.cells = [] - for i in xrange(0, self.lastCol-self.firstCol+1): + for i in range(0, self.lastCol-self.firstCol+1): typeId = self.readUnsignedInt(1) if typeId == 0x00: # empty value @@ -2313,7 +2315,6 @@ class Crn(BaseRecordHandler): pos = self.getCurrentPos() ret, length = globals.getUnicodeRichExtText(self.bytes, pos) text = ret.baseText - text = globals.encodeName(text) self.moveForward(length) self.cells.append((typeId, text)) elif typeId == 0x04: @@ -2364,7 +2365,7 @@ class Crn(BaseRecordHandler): if sb.type != xlsmodel.Supbook.Type.External: return cache = sb.getCurrentSheetCache() - for col in xrange(self.firstCol, self.lastCol+1): + for col in range(self.firstCol, self.lastCol+1): cell = self.cells[col-self.firstCol] typeId, val = cell[0], cell[1] cache.setValue(self.rowIndex, col, typeId, val) @@ -2430,7 +2431,7 @@ class PhoneticInfo(BaseRecordHandler): # | unused| B | A | phType = (flags) & 0x03 - alignType = (flags/4) & 0x03 + alignType = (flags//4) & 0x03 self.appendLine("phonetic type: %s"%PhoneticInfo.getPhoneticType(phType)) self.appendLine("alignment: %s"%PhoneticInfo.getAlignType(alignType)) @@ -2633,21 +2634,21 @@ class XF(BaseRecordHandler): byte = strm.readUnsignedInt(1) self.horAlign = (byte & 0x07) self.wrapText = (byte & 0x08) != 0 - self.verAlign = (byte & 0x70) / (2**4) + self.verAlign = (byte & 0x70) // (2**4) self.distributed = (byte & 0x80) != 0 self.textRotation = strm.readUnsignedInt(1) byte = strm.readUnsignedInt(1) self.indentLevel = (byte & 0x0F) self.shrinkToFit = (byte & 0x10) != 0 - self.readOrder = (byte & 0xC0) / (2**6) + self.readOrder = (byte & 0xC0) // (2**6) def parseBorderStyles (self, strm): byte = strm.readUnsignedInt(1) self.leftBdrStyle = (byte & 0x0F) - self.rightBdrStyle = (byte & 0xF0) / (2**4) + self.rightBdrStyle = (byte & 0xF0) // (2**4) byte = strm.readUnsignedInt(1) self.topBdrStyle = (byte & 0x0F) - self.bottomBdrStyle = (byte & 0xF0) / (2**4) + self.bottomBdrStyle = (byte & 0xF0) // (2**4) class CellXF(XFBase): def __init__ (self): @@ -2674,8 +2675,8 @@ class XF(BaseRecordHandler): self.parseBorderStyles(strm) byte = strm.readUnsignedInt(2) self.leftColor = (byte & 0x007F) # 7-bits - self.rightColor = (byte & 0x0780) / (2**7) # 7-bits - self.diagBorder = (byte & 0xC000) / (2**14) # 2-bits + self.rightColor = (byte & 0x0780) // (2**7) # 7-bits + self.diagBorder = (byte & 0xC000) // (2**14) # 2-bits def __parseBytes (self): @@ -2689,7 +2690,7 @@ class XF(BaseRecordHandler): # ID of cell style XF record which it inherits styles from. Should be # 0xFFF it the style flag is on. - self.cellStyleXFIndex = (flags & 0xFFF0) / (2**4) + self.cellStyleXFIndex = (flags & 0xFFF0) // (2**4) if self.style: self.data = XF.CellStyleXF() @@ -2702,7 +2703,11 @@ class XF(BaseRecordHandler): def parseBytes (self): self.__parseBytes() if self.style: - self.appendLine("parent style ID: 0x%2.2X (should be 0xFFF for cell style XF)"%self.cellStyleXFIndex) + # self.cellStyleXFIndex is actually something like 4095.0 Python3 + # refuses an implicit conversion to int through the format spec, + # have to do it explicitely + sxfi = int(self.cellStyleXFIndex) + self.appendLine("parent style ID: 0x%2.2X (should be 0xFFF for cell style XF)"%sxfi) else: self.appendLine("parent style ID: %d"%self.cellStyleXFIndex) self.appendLine("font ID: %d"%self.fontId) @@ -2984,7 +2989,7 @@ class FeatureData(BaseRecordHandler): self.readBytes(2) # reserved3, must be 0 refs = [] - for i in xrange(0, cref): + for i in range(0, cref): refs.append(Ref8U(self)) self.appendLine("record type: 0x%4.4X (must match the header)"%recordType) @@ -3029,7 +3034,7 @@ class Feature11(BaseRecordHandler): self.cbFeatData = self.readUnsignedInt(4) # size of rgbFeat self.readBytes(2) # ignored self.refs2 = [] - for i in xrange(0, self.cref2): + for i in range(0, self.cref2): ref = Ref8U(self) self.refs2.append(ref) @@ -3157,8 +3162,8 @@ class SxIvd(BaseRecordHandler): def __parseBytes (self): self.ids = [] - n = self.getSize() / 2 - for i in xrange(0, n): + n = self.getSize() // 2 + for i in range(0, n): self.ids.append(self.readSignedInt(2)) def parseBytes (self): @@ -3393,7 +3398,7 @@ class SXEx(BaseRecordHandler): self.fAcrossPageLay = (flag & 0x0001) # Rows in each page field column - self.cWrapPage = (flag & 0x01FE) / 2 + self.cWrapPage = (flag & 0x01FE) // 2 flag = self.readUnsignedInt(2) self.fEnableWizard = (flag & 0x0001) != 0 # D @@ -3628,7 +3633,7 @@ class SXLI(BaseRecordHandler): self.isxviMac = strm.readSignedInt(2) flag = strm.readUnsignedInt(2) self.fMultiDataName = (flag & 0x0001) != 0 - self.iData = (flag & 0x01FE) / 2 + self.iData = (flag & 0x01FE) // 2 self.fSbt = (flag & 0x0200) != 0 self.fBlock = (flag & 0x0400) != 0 self.fGrand = (flag & 0x0800) != 0 @@ -3638,7 +3643,7 @@ class SXLI(BaseRecordHandler): I = (flag & 0x8000) != 0 # reserved self.rgisxvi = [] if self.isxviMac > 0: - for i in xrange(0, self.isxviMac): + for i in range(0, self.isxviMac): id = strm.readSignedInt(2) self.rgisxvi.append(id) @@ -3711,9 +3716,9 @@ class SxRule(BaseRecordHandler): self.sxaxisCol = (flag & 0x0002) != 0 self.sxaxisPage = (flag & 0x0004) != 0 self.sxaxisData = (flag & 0x0008) != 0 - self.sxrType = (flag & 0x00F0) / (2**4) + self.sxrType = (flag & 0x00F0) // (2**4) - flag /= 2**8 # shift 8 bits + flag //= 2**8 # shift 8 bits self.fPart = (flag & 0x01) != 0 self.fDataOnly = (flag & 0x02) != 0 @@ -3862,7 +3867,7 @@ class MergeCells(BaseRecordHandler): def __parseBytes (self): self.cmcs = self.readUnsignedInt(2) self.rgref = [] - for i in xrange(0, self.cmcs): + for i in range(0, self.cmcs): self.rgref.append(Ref8(self)) def parseBytes (self): @@ -4056,11 +4061,11 @@ class SXDataItem(BaseRecordHandler): self.appendLine("field that this data item is based on: %d"%isxvdData) funcName = '(unknown)' - if SXDataItem.functionType.has_key(funcIndex): + if funcIndex in SXDataItem.functionType: funcName = SXDataItem.functionType[funcIndex] self.appendLine("aggregate function: %s"%funcName) dfName = '(unknown)' - if SXDataItem.displayFormat.has_key(df): + if df in SXDataItem.displayFormat: dfName = SXDataItem.displayFormat[df] self.appendLine("data display format: %s"%dfName) self.appendLine("SXVD record index: %d"%sxvdIndex) @@ -4306,7 +4311,7 @@ class SXRng(BaseRecordHandler): flag = self.readUnsignedInt(2) self.fAutoStart = (flag & 0x0001) != 0 # A self.fAutoEnd = (flag & 0x0002) != 0 # B - self.iByType = (flag & 0x001C) / 4 # C + self.iByType = (flag & 0x001C) // 4 # C def parseBytes (self): self.__parseBytes() @@ -4403,9 +4408,9 @@ class RRDChgCell(BaseRecordHandler): self.rrd = RRD(self) flags = self.readUnsignedInt(2) self.vt = (flags & 0x0007) - flags /= 2**3 # shift 3 bits + flags //= 2**3 # shift 3 bits self.vtOld = (flags & 0x0007) - flags /= 2**3 # shift 3 bits + flags //= 2**3 # shift 3 bits self.f123Prefix = (flags & 0x0001) unused = (flags & 0x0002) self.fOldFmt = (flags & 0x0004) @@ -4827,7 +4832,7 @@ class ChartFrtInfo(BaseRecordHandler): self.verWriter = self.readUnsignedInt(1) self.cCFRTID = self.readUnsignedInt(2) self.cfrtids = [] - for x in xrange(self.cCFRTID): + for x in range(self.cCFRTID): self.cfrtids.append(self.readCFRTID()) def parseBytes (self): @@ -5242,7 +5247,7 @@ class SeriesList(BaseRecordHandler): def __parseBytes(self): self.cser = self.readUnsignedInt(2) self.series = [] - for x in xrange(self.cser): + for x in range(self.cser): self.series.append(self.readUnsignedInt(2)) def parseBytes (self): @@ -5329,7 +5334,7 @@ class Text(BaseRecordHandler): flag = self.readUnsignedInt(2) self.dlp = (flag & 0x000F) - self.readingOrder = (flag & 0xC000) / (2**14) + self.readingOrder = (flag & 0xC000) // (2**14) self.trot = self.readUnsignedInt(2) def parseBytes (self): @@ -5512,7 +5517,7 @@ class Legend(BaseRecordHandler): spacingMap = ['close', 'medium', 'open'] def getDockModeText (self, val): - if Legend.dockModeMap.has_key(val): + if val in Legend.dockModeMap: return Legend.dockModeMap[val] else: return '(unknown)' diff --git a/msodumper/xlsstream.py b/msodumper/xlsstream.py index d25a561..bd09349 100644 --- a/msodumper/xlsstream.py +++ b/msodumper/xlsstream.py @@ -4,10 +4,10 @@ # License, v. 2.0. If a copy of the MPL was not distributed with this # file, You can obtain one at http://mozilla.org/MPL/2.0/. # - +from builtins import range import sys -import ole, globals, xlsrecord -from globals import output +from . import ole, globals, xlsrecord +from .globals import output class EndOfStream(Exception): pass @@ -337,7 +337,7 @@ class StreamData(object): self.pivotCacheIDs[strId] = True def isPivotCacheStream (self, name): - return self.pivotCacheIDs.has_key(name) + return name in self.pivotCacheIDs class XLStream(object): @@ -356,14 +356,14 @@ class XLStream(object): self.strmData = strmData def __printSep (self, c, w, prefix=''): - print(prefix + c*w) + globals.outputln(prefix + c*w) def printStreamInfo (self): self.__printSep('=', globals.OutputWidth) - print("Excel File Format Dumper by Kohei Yoshida") - print(" total stream size: %d bytes"%self.size) + globals.outputln("Excel File Format Dumper by Kohei Yoshida") + globals.outputln(" total stream size: %d bytes"%self.size) self.__printSep('=', globals.OutputWidth) - print('') + globals.outputln('') def printHeader (self): self.__parseHeader() @@ -438,8 +438,8 @@ class XLDirStream(object): def readRaw (self, size=1): # assume little endian bytes = 0 - for i in xrange(0, size): - b = ord(self.bytes[self.pos]) + for i in range(0, size): + b = globals.indexbytes(self.bytes, self.pos) if i == 0: bytes = b else: @@ -456,7 +456,7 @@ class XLDirStream(object): return self.bytes[curpos:self.pos] def __printSep (self, c, w, prefix=''): - print(prefix + c*w) + globals.outputln(prefix + c*w) def __readRecordBytes (self): if self.size - self.pos < 4: @@ -505,7 +505,7 @@ class XLDirStream(object): # record handler that parses the raw bytes and displays more # meaningful information. handler = None - if recData.has_key(header) and len(recData[header]) >= 3: + if header in recData and len(recData[header]) >= 3: handler = recData[header][2](header, size, bytes, self.strmData, roflist) if handler != None and self.strmData.encrypted: @@ -516,7 +516,7 @@ class XLDirStream(object): return handler def __postReadRecord (self, header): - if recData.has_key(header) and recData[header][0] == "FILEPASS": + if header in recData and recData[header][0] == "FILEPASS": # presence of FILEPASS record indicates that the stream is # encrypted. self.strmData.encrypted = True @@ -545,31 +545,31 @@ class XLDirStream(object): # meaningful information. handler = None - print("") + globals.outputln("") headerStr = "%4.4Xh: "%header self.__printSep('=', globals.OutputWidth-len(headerStr), headerStr) - if recData.has_key(header): - print("%4.4Xh: %s - %s (%4.4Xh)"% + if header in recData: + globals.outputln("%4.4Xh: %s - %s (%4.4Xh)"% (header, recData[header][0], recData[header][1], header)) if len(recData[header]) >= 3: handler = recData[header][2](header, size, bytes, self.strmData, roflist) - elif self.type == DirType.RevisionLog and recDataRev.has_key(header): - print("%4.4Xh: %s - %s (%4.4Xh)"% + elif self.type == DirType.RevisionLog and header in recDataRev: + globals.outputln("%4.4Xh: %s - %s (%4.4Xh)"% (header, recDataRev[header][0], recDataRev[header][1], header)) if len(recDataRev[header]) >= 3: handler = recDataRev[header][2](header, size, bytes, self.strmData, roflist) else: - print("%4.4Xh: [unknown record name] (%4.4Xh)"%(header, header)) + globals.outputln("%4.4Xh: [unknown record name] (%4.4Xh)"%(header, header)) if self.params.showStreamPos: - print("%4.4Xh: size = %d; pos = %d"%(header, size, pos)) + globals.outputln("%4.4Xh: size = %d; pos = %d"%(header, size, pos)) else: - print("%4.4Xh: size = %d"%(header, size)) + globals.outputln("%4.4Xh: size = %d"%(header, size)) # print the raw bytes, with 16 bytes per line. self.__printSep('-', globals.OutputWidth-len(headerStr), headerStr) lines = [] - for i in xrange(0, size): + for i in range(0, size): if i % 16 == 0: lines.append([]) lines[-1].append(bytes[i]) @@ -578,15 +578,15 @@ class XLDirStream(object): output("%4.4Xh: "%header) n = len(line) for byte in line: - output("%2.2X "%ord(byte)) - for i in xrange(n, 16): + output("%2.2X "%globals.indexedbytetoint(byte)) + for i in range(n, 16): output(" ") output(' ') for byte in line: output(globals.toCharOrDot(byte)) - print("") + globals.outputln("") if handler != None and not self.strmData.encrypted: # record handler exists. Parse the record and display more info diff --git a/msodumper/xmlpp.py b/msodumper/xmlpp.py index 8975689..6a3758c 100755 --- a/msodumper/xmlpp.py +++ b/msodumper/xmlpp.py @@ -38,6 +38,13 @@ def _usage(this_file): return """SYNOPSIS: pretty print an XML document USAGE: python %s <filename> \n""" % this_file +# Note about encodings: for python3, the default output (sys.stdout) +# expects str (decoded) data, and this is also the case for the output +# used with ppt-dump (appendline(), which is called with str data): so +# we decode the raw data. As the existing code does not deal with a +# possible encoding specification (<?xml encoding="xxx"), we make the +# reasonable assumption that it is utf-8. + def _pprint_line(indent_level, line, width=100, output=_sys.stdout): if line.strip(): start = "" @@ -112,7 +119,7 @@ def pprint(xml, output=_sys.stdout, indent=4, width=80): """Pretty print xml. Use output to select output stream. Default is sys.stdout Use indent to select indentation level. Default is 4 """ - data = xml + data = xml.decode('utf-8') indent_level = 0 start_pos, end_pos, is_stop, no_indent = _get_next_elem(data) while ((start_pos > -1 and end_pos > -1)): diff --git a/ppt-dump.py b/ppt-dump.py index 0a753e2..f607525 100755 --- a/ppt-dump.py +++ b/ppt-dump.py @@ -20,7 +20,7 @@ Options: --no-raw-dumps suppress raw hex dumps of uninterpreted areas --id-select=id1[,id2 ...] limit output to selected record Ids """ % exname - print msg + print(msg) class PPTDumper(object): @@ -46,25 +46,26 @@ class PPTDumper(object): dirnames = strm.getDirectoryNames() result = True for dirname in dirnames: - if len(dirname) == 0 or dirname == 'Root Entry': + sdirname = globals.nulltrunc(dirname) + if len(sdirname) == 0 or sdirname == b"Root Entry": continue try: dirstrm = strm.getDirectoryStreamByName(dirname) - except Exception, err: + except Exception as err: error("getDirectoryStreamByName(%s): %s\n" % (dirname,str(err))) # The previous version was killed by the exception # here, so the equivalent is to break, but maybe there # is no reason to do so. break self.__printDirHeader(dirname, len(dirstrm.bytes)) - if dirname == "PowerPoint Document": + if sdirname == b"PowerPoint Document": if not self.__readSubStream(dirstrm): result = False - elif dirname == "Current User": + elif sdirname == b"Current User": if not self.__readSubStream(dirstrm): result = False - elif dirname == "\x05DocumentSummaryInformation": + elif sdirname == b"\x05DocumentSummaryInformation": strm = olestream.PropertySetStream(dirstrm.bytes) strm.read() else: @@ -119,7 +120,7 @@ def main (args): if not dumper.dump(): error("FAILURE\n") if globals.params.dumpText: - print(globals.textdump.replace("\r", "\n")) + globals.dumptext() if __name__ == '__main__': main(sys.argv) diff --git a/test/doc/test.py b/test/doc/test.py index e38f6a9..2d6751e 100755 --- a/test/doc/test.py +++ b/test/doc/test.py @@ -242,6 +242,7 @@ class Test(unittest.TestCase): actual = self.root.findall(xpath)[0].attrib['value'] self.assertEqual(expected, actual) + if __name__ == '__main__': unittest.main() diff --git a/test/vsd-test.py b/test/vsd-test.py index ee7741c..b5d5a8f 100755 --- a/test/vsd-test.py +++ b/test/vsd-test.py @@ -23,7 +23,7 @@ class OLEStream(msodumper.docdirstream.DOCDirStream): msodumper.docdirstream.DOCDirStream.__init__(self, bytes) def dump(self): - print '<stream type="OLE" size="%d">' % self.size + print('<stream type="OLE" size="%d">' % self.size) header = Header(self) header.dump() @@ -32,25 +32,25 @@ class OLEStream(msodumper.docdirstream.DOCDirStream): self.pos = (header.FirstDirSectorLocation + 1) * sectorSize DirectoryEntryName = msodumper.globals.getUTF8FromUTF16(self.readBytes(64)) - print '<DirectoryEntryName value="%s"/>' % DirectoryEntryName + print('<DirectoryEntryName value="%s"/>' % DirectoryEntryName) DirectoryEntryNameLength = self.readuInt16() - print '<DirectoryEntryNameLength value="%s"/>' % DirectoryEntryNameLength + print('<DirectoryEntryNameLength value="%s"/>' % DirectoryEntryNameLength) ObjectType = self.readuInt8() - print '<ObjectType value="%s"/>' % ObjectType + print('<ObjectType value="%s"/>' % ObjectType) ColorFlag = self.readuInt8() - print '<ColorFlag value="%s"/>' % ColorFlag + print('<ColorFlag value="%s"/>' % ColorFlag) LeftSiblingID = self.readuInt32() - print '<LeftSiblingID value="0x%x"/>' % LeftSiblingID + print('<LeftSiblingID value="0x%x"/>' % LeftSiblingID) RightSiblingID = self.readuInt32() - print '<RightSiblingID value="0x%x"/>' % RightSiblingID + print('<RightSiblingID value="0x%x"/>' % RightSiblingID) ChildID = self.readuInt32() - print '<ChildID value="0x%x"/>' % ChildID + print('<ChildID value="0x%x"/>' % ChildID) msodumper.msometa.GUID(self, "CLSID").dump() StateBits = self.readuInt32() - print '<StateBits value="0x%x"/>' % StateBits + print('<StateBits value="0x%x"/>' % StateBits) msodumper.msometa.FILETIME(self, "CreationTime").dump() msodumper.msometa.FILETIME(self, "ModifiedTime").dump() - print '</stream>' + print('</stream>') class Header(msodumper.msometa.OLERecord): @@ -58,7 +58,7 @@ class Header(msodumper.msometa.OLERecord): msodumper.msometa.OLERecord.__init__(self, parent) def dump(self): - print '<CFHeader>' + print('<CFHeader>') self.printAndSet("HeaderSignature", self.readuInt64()) self.printAndSet("HeaderCLSID0", self.readuInt64()) self.printAndSet("HeaderCLSID1", self.readuInt64()) @@ -79,16 +79,16 @@ class Header(msodumper.msometa.OLERecord): self.printAndSet("NumMiniFATSectors", self.readuInt32()) self.printAndSet("FirstDIFATSectorLocation", self.readuInt32()) self.printAndSet("NumDIFATSectors", self.readuInt32()) - print '<DIFAT>' + print('<DIFAT>') self.DIFAT = [] for i in range(109): n = self.readuInt32() if n == 0xffffffff: break - print '<DIFAT index="%d" value="%x"/>' % (i, n) + print('<DIFAT index="%d" value="%x"/>' % (i, n)) self.DIFAT.append(n) - print '</DIFAT>' - print '</CFHeader>' + print('</DIFAT>') + print('</CFHeader>') class OLEDumper: @@ -99,7 +99,7 @@ class OLEDumper: file = open(self.filepath, 'rb') strm = OLEStream(file.read()) file.close() - print '<?xml version="1.0"?>' + print('<?xml version="1.0"?>') strm.dump() @@ -107,6 +107,7 @@ def main(args): dumper = OLEDumper(args[1]) dumper.dump() + if __name__ == '__main__': main(sys.argv) @@ -4,7 +4,7 @@ # License, v. 2.0. If a copy of the MPL was not distributed with this # file, You can obtain one at http://mozilla.org/MPL/2.0/. # - +from builtins import range import sys, os.path, optparse, math from msodumper import ole, globals, node, olestream, vbahelper @@ -106,7 +106,7 @@ class CodePageReader(StdReader): #default codepage to something reasonable self.reader.codepageName = "cp1252" - if codePageMap.has_key( self.reader.codepage ): + if self.reader.codepage in codePageMap: self.reader.codepageName = codePageMap[ self.reader.codepage ] print(" codepage: %i"%self.reader.codepage) @@ -340,7 +340,7 @@ class ReferenceControlReaderPart2(StdReader): self.reader.readBytes( 6 ) origTypeLib = self.reader.readBytes( 16 ) sys.stdout.write(" GUID: " ) - for i in xrange( 0, 16 ): + for i in range( 0, 16 ): if i: sys.stdout.write(" ") sys.stdout.write("0x%x"%origTypeLib[ i ]) @@ -417,15 +417,15 @@ class DirStreamReader( globals.ByteStream ): pos = self.getCurrentPos() recordID = self.readUnsignedInt( 2 ) name = "Unknown" - if dirRecordData.has_key( recordID ): + if recordID in dirRecordData: name = dirRecordData[ recordID ][0] # if we have a handler let it deal with the record labelWidth = int(math.ceil(math.log(len(self.bytes), 10))) fmt = "0x%%%d.%dx: "%(labelWidth, labelWidth) sys.stdout.write(fmt%pos) # print ("%s [0x%x] "%(name,recordID)) - print '[0x{0:0>4x}] {1}'.format(recordID,name) - if ( dirRecordData.has_key( recordID ) and len( dirRecordData[ recordID ] ) > 2 ): + print('[0x{0:0>4x}] {1}'.format(recordID,name)) + if ( recordID in dirRecordData and len( dirRecordData[ recordID ] ) > 2 ): reader = dirRecordData[ recordID ][2]( self ) reader.parse() else: diff --git a/vsd-dump.py b/vsd-dump.py index 0ec0e7b..f7d5bd3 100755 --- a/vsd-dump.py +++ b/vsd-dump.py @@ -21,12 +21,12 @@ class VSDDumper: strm = vsdstream.createVSDFile(file.read(), self.params) file.close() dirnames = strm.getDirectoryNames() - print '<?xml version="1.0"?>\n<streams ole-type="%s">' % strm.getName() + print('<?xml version="1.0"?>\n<streams ole-type="%s">' % strm.getName()) if strm.error: - print '<error what="%s"/>' % strm.error + print('<error what="%s"/>' % strm.error) for dirname in dirnames: strm.getDirectoryStreamByName(dirname).dump() - print '</streams>' + print('</streams>') def main(args): @@ -34,6 +34,7 @@ def main(args): dumper = VSDDumper(args[1], params) dumper.dump() + if __name__ == '__main__': main(sys.argv) diff --git a/xls-dump.py b/xls-dump.py index 2f76996..61c4d36 100755 --- a/xls-dump.py +++ b/xls-dump.py @@ -4,7 +4,7 @@ # License, v. 2.0. If a copy of the MPL was not distributed with this # file, You can obtain one at http://mozilla.org/MPL/2.0/. # - +from builtins import range import sys, os.path, optparse from msodumper import ole, xlsstream, globals, node, xlsmodel, olestream @@ -16,8 +16,8 @@ def equalsName (name, array): if len(name) != len(array): return False - for i in xrange(0, len(name)): - if ord(name[i]) != array[i]: + for i in range(0, len(name)): + if globals.indexbytes(name, i) != array[i]: return False return True @@ -45,13 +45,13 @@ class XLDumper(object): def __printDirHeader (self, direntry, byteLen): dirname = direntry.Name dirname = globals.encodeName(dirname) - print("") - print("="*globals.OutputWidth) + globals.outputln("") + globals.outputln("="*globals.OutputWidth) if direntry.isStorage(): - print("%s (storage)"%dirname) + globals.outputln("%s (storage)"%dirname) else: - print("%s (stream, size: %d bytes)"%(dirname, byteLen)) - print("-"*globals.OutputWidth) + globals.outputln("%s (stream, size: %d bytes)"%(dirname, byteLen)) + globals.outputln("-"*globals.OutputWidth) def __parseFile (self): file = open(self.filepath, 'rb') @@ -66,14 +66,15 @@ class XLDumper(object): root = docroot.appendElement('xls-dump') for d in dirs: - if d.Name != "Workbook": + if d.Name != b"Workbook": # for now, we only dump the Workbook directory stream. continue dirstrm = self.strm.getDirectoryStream(d) data = self.__readSubStreamXML(dirstrm) self.__dumpDataAsXML(data, root) - node.prettyPrint(sys.stdout, docroot, utf8 = self.params.utf8) + + node.prettyPrint(globals.utfwriter(), docroot, utf8 = self.params.utf8) def dumpCanonicalXML (self): self.__parseFile() @@ -83,7 +84,7 @@ class XLDumper(object): dirEntries = self.strm.getDirectoryEntries() for entry in dirEntries: dirname = entry.Name - if dirname != "Workbook": + if dirname != b"Workbook": # for now, we only dump the Workbook directory stream. continue @@ -92,7 +93,7 @@ class XLDumper(object): wbmodel.encrypted = self.strmData.encrypted root.appendChild(wbmodel.createDOM()) - node.prettyPrint(sys.stdout, docroot, utf8 = self.params.utf8) + node.prettyPrint(globals.utfwriter(), docroot, utf8 = self.params.utf8) def dump (self): self.__parseFile() @@ -113,18 +114,18 @@ class XLDumper(object): if entry.isStorage(): continue - elif dirname == "Workbook": + elif dirname == b"Workbook": success = True while success: success = self.__readSubStream(dirstrm) - elif dirname == "Revision Log": + elif dirname == b"Revision Log": dirstrm.type = xlsstream.DirType.RevisionLog self.__readSubStream(dirstrm) - elif dirname == "EncryptionInfo": + elif dirname == b"EncryptionInfo": globals.dumpBytes(dirstrm.bytes, 512) - print("-"*globals.OutputWidth) + globals.outputln("-"*globals.OutputWidth) info = msocrypto.EncryptionInfo(dirstrm.bytes) info.read() info.output() |