diff options
author | Sergey Kishchenko <voidwrk@gmail.com> | 2011-09-12 10:06:32 +0300 |
---|---|---|
committer | Sergey Kishchenko <voidwrk@gmail.com> | 2011-09-12 10:06:32 +0300 |
commit | 74a24e29cdc0329c9e96dad87d8f122c81c459eb (patch) | |
tree | 32b18f3c440a431b4886f5d401d2e46a9a4a581e | |
parent | b23fad259dbe9b633e00906b8a9e91d5ed16d11b (diff) |
xls-dump canonical xml dump fix
-rw-r--r-- | src/xlsmodel.py | 44 | ||||
-rw-r--r-- | src/xlsrecord.py | 73 | ||||
-rwxr-xr-x | xls-dump.py | 16 |
3 files changed, 79 insertions, 54 deletions
diff --git a/src/xlsmodel.py b/src/xlsmodel.py index f47a088..5c10cad 100644 --- a/src/xlsmodel.py +++ b/src/xlsmodel.py @@ -50,17 +50,25 @@ class Workbook(ModelBase): # private members self.__sheets = [] - def appendSheet (self): - n = len(self.__sheets) - if n == 0: - self.__sheets.append(WorkbookGlobal()) - else: - self.__sheets.append(Worksheet(n-1)) + + def appendSheet (self, sheetType): + def raiseError(cause): + def errorFunc(): + raise Exception(cause) + + HANDLERS = { 0x0005: WorkbookGlobal, + 0x0006: raiseError("Unsupported sheet type: Visual Basic module"), + 0x0010: lambda: Worksheet(len(self.__sheets)), + 0x0020: Chart, + 0x0040: raiseError("Unsupported sheet type: Excel 4.0 macro sheet"), + 0x0100: raiseError("Unsupported sheet type: Workspace file") + } + self.__sheets.append(HANDLERS[sheetType]()) - return self.getCurrentSheet() + return self.__sheets[-1] def getWorkbookGlobal (self): - return self.__sheets[0] + return filter(lambda x: isinstance(x, WorkbookGlobal), self.__sheets)[0] def getCurrentSheet (self): return self.__sheets[-1] @@ -68,20 +76,19 @@ class Workbook(ModelBase): def createDOM (self): nd = node.Element('workbook') nd.setAttr('encrypted', self.encrypted) - n = len(self.__sheets) + sheets = filter(lambda x: isinstance(x, Worksheet), self.__sheets) + n = len(sheets) if n == 0: return - wbglobal = self.__sheets[0] + wbglobal = self.getWorkbookGlobal() nd.appendChild(wbglobal.createDOM(self)) - for i in xrange(1, n): - sheet = self.__sheets[i] + for (i,sheet) in enumerate(sheets): sheetNode = sheet.createDOM(self) nd.appendChild(sheetNode) - if i > 0: - data = wbglobal.getSheetData(i-1) - sheetNode.setAttr('name', data.name) - sheetNode.setAttr('visible', data.visible) + data = wbglobal.getSheetData(i-1) + sheetNode.setAttr('name', data.name) + sheetNode.setAttr('visible', data.visible) return nd @@ -91,6 +98,7 @@ class SheetBase(object): class Type: WorkbookGlobal = 0 Worksheet = 1 + Chart=2 def __init__ (self, modelType): self.modelType = modelType @@ -202,6 +210,10 @@ class SupbookExternal(Supbook): return nd +class Chart(SheetBase): + def __init__(self): + super(Chart, self).__init__(SheetBase.Type.Chart) + class WorkbookGlobal(SheetBase): class SheetData: def __init__ (self): diff --git a/src/xlsrecord.py b/src/xlsrecord.py index 3df50a1..c69cabf 100644 --- a/src/xlsrecord.py +++ b/src/xlsrecord.py @@ -436,52 +436,64 @@ class BOF(BaseRecordHandler): else: return '(unknown)' + def __parseBytes (self): + # BIFF version + self.ver = self.readUnsignedInt(2) + + # Substream type + self.dataType = self.readUnsignedInt(2) + + # build ID and year + self.buildID = self.readUnsignedInt(2) + self.buildYear = self.readUnsignedInt(2) + + # file history flags + self.flags = self.readUnsignedInt(4) + self.win = (self.flags & 0x00000001) + self.risc = (self.flags & 0x00000002) + self.beta = (self.flags & 0x00000004) + self.winAny = (self.flags & 0x00000008) + self.macAny = (self.flags & 0x00000010) + self.betaAny = (self.flags & 0x00000020) + self.riscAny = (self.flags & 0x00000100) + self.lowestExcelVer = self.readSignedInt(4) + def parseBytes (self): + self.__parseBytes() # BIFF version ver = self.readUnsignedInt(2) s = 'not BIFF8' - if ver == 0x0600: + if self.ver == 0x0600: s = 'BIFF8' self.appendLine("BIFF version: %s"%s) # Substream type - dataType = self.readUnsignedInt(2) - self.appendLine("type: %s"%BOF.Type[dataType]) + self.appendLine("type: %s"%BOF.Type[self.dataType]) # build ID and year - buildID = self.readUnsignedInt(2) - self.appendLine("build ID: %s (%4.4Xh)"%(self.getBuildIdName(buildID), buildID)) - buildYear = self.readUnsignedInt(2) - self.appendLine("build year: %d"%buildYear) + self.appendLine("build ID: %s (%4.4Xh)"%(self.getBuildIdName(self.buildID), self.buildID)) + self.appendLine("build year: %d"%self.buildYear) # file history flags - flags = self.readUnsignedInt(4) - win = (flags & 0x00000001) - risc = (flags & 0x00000002) - beta = (flags & 0x00000004) - winAny = (flags & 0x00000008) - macAny = (flags & 0x00000010) - betaAny = (flags & 0x00000020) - riscAny = (flags & 0x00000100) - self.appendLine("last edited by Excel on Windows: %s"%self.getYesNo(win)) - self.appendLine("last edited by Excel on RISC: %s"%self.getYesNo(risc)) - self.appendLine("last edited by beta version of Excel: %s"%self.getYesNo(beta)) - self.appendLine("has ever been edited by Excel for Windows: %s"%self.getYesNo(winAny)) - self.appendLine("has ever been edited by Excel for Macintosh: %s"%self.getYesNo(macAny)) - self.appendLine("has ever been edited by beta version of Excel: %s"%self.getYesNo(betaAny)) - self.appendLine("has ever been edited by Excel on RISC: %s"%self.getYesNo(riscAny)) - - lowestExcelVer = self.readSignedInt(4) - self.appendLine("earliest Excel version that can read all records: %d"%lowestExcelVer) + self.appendLine("last edited by Excel on Windows: %s"%self.getYesNo(self.win)) + self.appendLine("last edited by Excel on RISC: %s"%self.getYesNo(self.risc)) + self.appendLine("last edited by beta version of Excel: %s"%self.getYesNo(self.beta)) + self.appendLine("has ever been edited by Excel for Windows: %s"%self.getYesNo(self.winAny)) + self.appendLine("has ever been edited by Excel for Macintosh: %s"%self.getYesNo(self.macAny)) + self.appendLine("has ever been edited by beta version of Excel: %s"%self.getYesNo(self.betaAny)) + self.appendLine("has ever been edited by Excel on RISC: %s"%self.getYesNo(self.riscAny)) + + self.appendLine("earliest Excel version that can read all records: %d"%self.lowestExcelVer) def fillModel (self, model): + if model.modelType != xlsmodel.ModelType.Workbook: return + self.__parseBytes() - sheet = model.appendSheet() - ver = self.readUnsignedInt(2) + sheet = model.appendSheet(self.dataType) s = 'not BIFF8' - if ver == 0x0600: + if self.ver == 0x0600: s = 'BIFF8' sheet.version = s @@ -745,8 +757,9 @@ class Dimensions(BaseRecordHandler): def fillModel (self, model): self.__parseBytes() sh = model.getCurrentSheet() - sh.setFirstDefinedCell(self.colMin, self.rowMin) - sh.setFirstFreeCell(self.colMax, self.rowMax) + if not isinstance(sh, xlsmodel.Chart): + sh.setFirstDefinedCell(self.colMin, self.rowMin) + sh.setFirstFreeCell(self.colMax, self.rowMax) class Dv(BaseRecordHandler): diff --git a/xls-dump.py b/xls-dump.py index 0bb9ef3..00d5c4a 100755 --- a/xls-dump.py +++ b/xls-dump.py @@ -81,27 +81,27 @@ class XLDumper(object): def dumpXML (self): self.__parseFile() - dirnames = self.strm.getDirectoryNames() - for dirname in dirnames: - if dirname != "Workbook": + dirs = self.strm.getDirectoryEntries() + for d in dirs: + if d.Name != "Workbook": # for now, we only dump the Workbook directory stream. continue - dirstrm = self.strm.getDirectoryStreamByName(dirname) + dirstrm = self.strm.getDirectoryStream(d) self.__readSubStreamXML(dirstrm) def dumpCanonicalXML (self): self.__parseFile() - dirnames = self.strm.getDirectoryNames() + dirs = self.strm.getDirectoryEntries() docroot = node.Root() root = docroot.appendElement('xls-dump') - for dirname in dirnames: - if dirname != "Workbook": + for d in dirs: + if d.Name != "Workbook": # for now, we only dump the Workbook directory stream. continue - dirstrm = self.strm.getDirectoryStreamByName(dirname) + dirstrm = self.strm.getDirectoryStream(d) wbmodel = self.__buildWorkbookModel(dirstrm) wbmodel.encrypted = self.strmData.encrypted root.appendChild(wbmodel.createDOM()) |