summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorSergey Kishchenko <voidwrk@gmail.com>2011-09-12 10:06:32 +0300
committerSergey Kishchenko <voidwrk@gmail.com>2011-09-12 10:06:32 +0300
commit74a24e29cdc0329c9e96dad87d8f122c81c459eb (patch)
tree32b18f3c440a431b4886f5d401d2e46a9a4a581e
parentb23fad259dbe9b633e00906b8a9e91d5ed16d11b (diff)
xls-dump canonical xml dump fix
-rw-r--r--src/xlsmodel.py44
-rw-r--r--src/xlsrecord.py73
-rwxr-xr-xxls-dump.py16
3 files changed, 79 insertions, 54 deletions
diff --git a/src/xlsmodel.py b/src/xlsmodel.py
index f47a088..5c10cad 100644
--- a/src/xlsmodel.py
+++ b/src/xlsmodel.py
@@ -50,17 +50,25 @@ class Workbook(ModelBase):
# private members
self.__sheets = []
- def appendSheet (self):
- n = len(self.__sheets)
- if n == 0:
- self.__sheets.append(WorkbookGlobal())
- else:
- self.__sheets.append(Worksheet(n-1))
+
+ def appendSheet (self, sheetType):
+ def raiseError(cause):
+ def errorFunc():
+ raise Exception(cause)
+
+ HANDLERS = { 0x0005: WorkbookGlobal,
+ 0x0006: raiseError("Unsupported sheet type: Visual Basic module"),
+ 0x0010: lambda: Worksheet(len(self.__sheets)),
+ 0x0020: Chart,
+ 0x0040: raiseError("Unsupported sheet type: Excel 4.0 macro sheet"),
+ 0x0100: raiseError("Unsupported sheet type: Workspace file")
+ }
+ self.__sheets.append(HANDLERS[sheetType]())
- return self.getCurrentSheet()
+ return self.__sheets[-1]
def getWorkbookGlobal (self):
- return self.__sheets[0]
+ return filter(lambda x: isinstance(x, WorkbookGlobal), self.__sheets)[0]
def getCurrentSheet (self):
return self.__sheets[-1]
@@ -68,20 +76,19 @@ class Workbook(ModelBase):
def createDOM (self):
nd = node.Element('workbook')
nd.setAttr('encrypted', self.encrypted)
- n = len(self.__sheets)
+ sheets = filter(lambda x: isinstance(x, Worksheet), self.__sheets)
+ n = len(sheets)
if n == 0:
return
- wbglobal = self.__sheets[0]
+ wbglobal = self.getWorkbookGlobal()
nd.appendChild(wbglobal.createDOM(self))
- for i in xrange(1, n):
- sheet = self.__sheets[i]
+ for (i,sheet) in enumerate(sheets):
sheetNode = sheet.createDOM(self)
nd.appendChild(sheetNode)
- if i > 0:
- data = wbglobal.getSheetData(i-1)
- sheetNode.setAttr('name', data.name)
- sheetNode.setAttr('visible', data.visible)
+ data = wbglobal.getSheetData(i-1)
+ sheetNode.setAttr('name', data.name)
+ sheetNode.setAttr('visible', data.visible)
return nd
@@ -91,6 +98,7 @@ class SheetBase(object):
class Type:
WorkbookGlobal = 0
Worksheet = 1
+ Chart=2
def __init__ (self, modelType):
self.modelType = modelType
@@ -202,6 +210,10 @@ class SupbookExternal(Supbook):
return nd
+class Chart(SheetBase):
+ def __init__(self):
+ super(Chart, self).__init__(SheetBase.Type.Chart)
+
class WorkbookGlobal(SheetBase):
class SheetData:
def __init__ (self):
diff --git a/src/xlsrecord.py b/src/xlsrecord.py
index 3df50a1..c69cabf 100644
--- a/src/xlsrecord.py
+++ b/src/xlsrecord.py
@@ -436,52 +436,64 @@ class BOF(BaseRecordHandler):
else:
return '(unknown)'
+ def __parseBytes (self):
+ # BIFF version
+ self.ver = self.readUnsignedInt(2)
+
+ # Substream type
+ self.dataType = self.readUnsignedInt(2)
+
+ # build ID and year
+ self.buildID = self.readUnsignedInt(2)
+ self.buildYear = self.readUnsignedInt(2)
+
+ # file history flags
+ self.flags = self.readUnsignedInt(4)
+ self.win = (self.flags & 0x00000001)
+ self.risc = (self.flags & 0x00000002)
+ self.beta = (self.flags & 0x00000004)
+ self.winAny = (self.flags & 0x00000008)
+ self.macAny = (self.flags & 0x00000010)
+ self.betaAny = (self.flags & 0x00000020)
+ self.riscAny = (self.flags & 0x00000100)
+ self.lowestExcelVer = self.readSignedInt(4)
+
def parseBytes (self):
+ self.__parseBytes()
# BIFF version
ver = self.readUnsignedInt(2)
s = 'not BIFF8'
- if ver == 0x0600:
+ if self.ver == 0x0600:
s = 'BIFF8'
self.appendLine("BIFF version: %s"%s)
# Substream type
- dataType = self.readUnsignedInt(2)
- self.appendLine("type: %s"%BOF.Type[dataType])
+ self.appendLine("type: %s"%BOF.Type[self.dataType])
# build ID and year
- buildID = self.readUnsignedInt(2)
- self.appendLine("build ID: %s (%4.4Xh)"%(self.getBuildIdName(buildID), buildID))
- buildYear = self.readUnsignedInt(2)
- self.appendLine("build year: %d"%buildYear)
+ self.appendLine("build ID: %s (%4.4Xh)"%(self.getBuildIdName(self.buildID), self.buildID))
+ self.appendLine("build year: %d"%self.buildYear)
# file history flags
- flags = self.readUnsignedInt(4)
- win = (flags & 0x00000001)
- risc = (flags & 0x00000002)
- beta = (flags & 0x00000004)
- winAny = (flags & 0x00000008)
- macAny = (flags & 0x00000010)
- betaAny = (flags & 0x00000020)
- riscAny = (flags & 0x00000100)
- self.appendLine("last edited by Excel on Windows: %s"%self.getYesNo(win))
- self.appendLine("last edited by Excel on RISC: %s"%self.getYesNo(risc))
- self.appendLine("last edited by beta version of Excel: %s"%self.getYesNo(beta))
- self.appendLine("has ever been edited by Excel for Windows: %s"%self.getYesNo(winAny))
- self.appendLine("has ever been edited by Excel for Macintosh: %s"%self.getYesNo(macAny))
- self.appendLine("has ever been edited by beta version of Excel: %s"%self.getYesNo(betaAny))
- self.appendLine("has ever been edited by Excel on RISC: %s"%self.getYesNo(riscAny))
-
- lowestExcelVer = self.readSignedInt(4)
- self.appendLine("earliest Excel version that can read all records: %d"%lowestExcelVer)
+ self.appendLine("last edited by Excel on Windows: %s"%self.getYesNo(self.win))
+ self.appendLine("last edited by Excel on RISC: %s"%self.getYesNo(self.risc))
+ self.appendLine("last edited by beta version of Excel: %s"%self.getYesNo(self.beta))
+ self.appendLine("has ever been edited by Excel for Windows: %s"%self.getYesNo(self.winAny))
+ self.appendLine("has ever been edited by Excel for Macintosh: %s"%self.getYesNo(self.macAny))
+ self.appendLine("has ever been edited by beta version of Excel: %s"%self.getYesNo(self.betaAny))
+ self.appendLine("has ever been edited by Excel on RISC: %s"%self.getYesNo(self.riscAny))
+
+ self.appendLine("earliest Excel version that can read all records: %d"%self.lowestExcelVer)
def fillModel (self, model):
+
if model.modelType != xlsmodel.ModelType.Workbook:
return
+ self.__parseBytes()
- sheet = model.appendSheet()
- ver = self.readUnsignedInt(2)
+ sheet = model.appendSheet(self.dataType)
s = 'not BIFF8'
- if ver == 0x0600:
+ if self.ver == 0x0600:
s = 'BIFF8'
sheet.version = s
@@ -745,8 +757,9 @@ class Dimensions(BaseRecordHandler):
def fillModel (self, model):
self.__parseBytes()
sh = model.getCurrentSheet()
- sh.setFirstDefinedCell(self.colMin, self.rowMin)
- sh.setFirstFreeCell(self.colMax, self.rowMax)
+ if not isinstance(sh, xlsmodel.Chart):
+ sh.setFirstDefinedCell(self.colMin, self.rowMin)
+ sh.setFirstFreeCell(self.colMax, self.rowMax)
class Dv(BaseRecordHandler):
diff --git a/xls-dump.py b/xls-dump.py
index 0bb9ef3..00d5c4a 100755
--- a/xls-dump.py
+++ b/xls-dump.py
@@ -81,27 +81,27 @@ class XLDumper(object):
def dumpXML (self):
self.__parseFile()
- dirnames = self.strm.getDirectoryNames()
- for dirname in dirnames:
- if dirname != "Workbook":
+ dirs = self.strm.getDirectoryEntries()
+ for d in dirs:
+ if d.Name != "Workbook":
# for now, we only dump the Workbook directory stream.
continue
- dirstrm = self.strm.getDirectoryStreamByName(dirname)
+ dirstrm = self.strm.getDirectoryStream(d)
self.__readSubStreamXML(dirstrm)
def dumpCanonicalXML (self):
self.__parseFile()
- dirnames = self.strm.getDirectoryNames()
+ dirs = self.strm.getDirectoryEntries()
docroot = node.Root()
root = docroot.appendElement('xls-dump')
- for dirname in dirnames:
- if dirname != "Workbook":
+ for d in dirs:
+ if d.Name != "Workbook":
# for now, we only dump the Workbook directory stream.
continue
- dirstrm = self.strm.getDirectoryStreamByName(dirname)
+ dirstrm = self.strm.getDirectoryStream(d)
wbmodel = self.__buildWorkbookModel(dirstrm)
wbmodel.encrypted = self.strmData.encrypted
root.appendChild(wbmodel.createDOM())