diff options
author | Sergey Kishchenko <voidwrk@gmail.com> | 2011-09-16 15:38:30 +0300 |
---|---|---|
committer | Sergey Kishchenko <voidwrk@gmail.com> | 2011-09-16 15:38:30 +0300 |
commit | 2465c4bb135fa9ef701bb81f0ceaf64baa233bd6 (patch) | |
tree | 84b7ddb30d4ee6df0f01200ab80f16a503e209c0 | |
parent | d5cb3bdc85ad3e46938a357d3903980309ada171 (diff) |
xls-dump.py was tested using libreoffice's set of test files; several small issues were fixed
-rw-r--r-- | .gitignore | 1 | ||||
-rwxr-xr-x | misc/test-files.sh | 15 | ||||
-rw-r--r-- | src/xlsmodel.py | 2 | ||||
-rw-r--r-- | src/xlsparser.py | 45 | ||||
-rw-r--r-- | src/xlsrecord.py | 71 | ||||
-rw-r--r-- | src/xlsstream.py | 4 |
6 files changed, 114 insertions, 24 deletions
diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..0d20b64 --- /dev/null +++ b/.gitignore @@ -0,0 +1 @@ +*.pyc diff --git a/misc/test-files.sh b/misc/test-files.sh new file mode 100755 index 0000000..164952d --- /dev/null +++ b/misc/test-files.sh @@ -0,0 +1,15 @@ +#! /bin/sh + +test_dir=$1 + +if [ ! -d $test_dir ] +then + echo "Usage: test-files.sh TEST_DIR" + exit 1 +fi + +for x in `find $test_dir -name \*.xls`; do + (python xls-dump.py $x | grep -v "rror inter" > /dev/null) || echo "Flat dump failed for" $x + python xls-dump.py --dump-mode=xml $x > /dev/null || echo "Xml dump failed for" $x + python xls-dump.py --dump-mode=cxml $x > /dev/null || echo "CXml dump failed for" $x +done diff --git a/src/xlsmodel.py b/src/xlsmodel.py index 5c10cad..5094ee5 100644 --- a/src/xlsmodel.py +++ b/src/xlsmodel.py @@ -79,7 +79,7 @@ class Workbook(ModelBase): sheets = filter(lambda x: isinstance(x, Worksheet), self.__sheets) n = len(sheets) if n == 0: - return + return nd wbglobal = self.getWorkbookGlobal() nd.appendChild(wbglobal.createDOM(self)) diff --git a/src/xlsparser.py b/src/xlsparser.py index b9d5204..d105f24 100644 --- a/src/xlsparser.py +++ b/src/xlsparser.py @@ -119,8 +119,11 @@ class Req(BaseParser): def parse(self, stream): parsed = safeParse(self.__parser, stream) if parsed is None: + currentToken = "<<<End Of Token Stream>>>" + if stream.currentIndex < len(stream.tokens): + currentToken = stream.tokens[stream.currentIndex] raise ParseException("%s failed but it is required, next token is [%s]" % - (str(self.__parser), str(stream.tokens[stream.currentIndex]))) + (str(self.__parser), str(currentToken))) return parsed def __str__(self): @@ -334,8 +337,14 @@ class PlotGrowth(BaseParser): class Series(BaseParser): PARSER = Term(xlsrecord.Series) +class SeriesText(BaseParser): + PARSER = Term(xlsrecord.SeriesText) + +class BRAI(BaseParser): + PARSER = Term(xlsrecord.Brai) + class AI(BaseParser): - PARSER = Term(xlsrecord.Brai) # TODO: we use Brai instead of AI now, fix it + PARSER = Req(BRAI()) << SeriesText() class SerParent(BaseParser): pass class SerAuxTrend(BaseParser): pass @@ -351,9 +360,13 @@ class DataFormat(BaseParser): class Chart3DBarShape(BaseParser): PARSER = Term(xlsrecord.Chart3DBarShape) -class PieFormat(BaseParser): pass -class SerFmt(BaseParser): pass -class MarkerFormat(BaseParser): pass +class PieFormat(BaseParser): + PARSER = Term(xlsrecord.PieFormat) + +class SerFmt(BaseParser): pass + +class MarkerFormat(BaseParser): + PARSER = Term(xlsrecord.MarkerFormat) class Text(BaseParser): PARSER = Term(xlsrecord.Text) @@ -487,7 +500,9 @@ class BobPopCustom(BaseParser): pass class Bar(BaseParser): PARSER = Term(xlsrecord.CHBar) -class Line(BaseParser): pass +class Line(BaseParser): + PARSER = Term(xlsrecord.CHLine) + class Pie(BaseParser): pass class Area(BaseParser): pass class Scatter(BaseParser): pass @@ -538,15 +553,15 @@ class CHARTFORMATS(BaseParser): #*2DFTTEXT AxesUsed 1*2AXISPARENT [CrtLayout12A] [DAT] *ATTACHEDLABEL [CRTMLFRT] #*([DataLabExt StartObject] ATTACHEDLABEL [EndObject]) [TEXTPROPS] *2CRTMLFRT End PARSER = Group('chart-fmt', Req(Chart()) << Req(Begin()) << Many('font-lists', FONTLIST(), max=2) << - Req(Scl()) << Req(PlotGrowth()) << FRAME() << Many('series-fmt-list', SERIESFORMAT()) << + Req(Scl()) << Req(PlotGrowth()) << Opt(FRAME()) << Many('series-fmt-list', SERIESFORMAT()) << Many('ss-list', SS()) << Req(ShtProps()) << Many('dft-texts', DFTTEXT(), max=2) << Req(AxesUsed()) << Many('axis-roots', AXISPARENT(), min=1, max=2) << - CrtLayout12A() << DAT() << Many('attached-labels', ATTACHEDLABEL()) << - CRTMLFRT() << Many('datalab-exts', Seq(Seq(Req(DataLabExt()), - Req(StartObject())), - Req(ATTACHEDLABEL()), - EndObject())) << - TEXTPROPS() << Many('crtmlfrt-list', CRTMLFRT()) << Req(End())) + CrtLayout12A() << Opt(DAT()) << Many('attached-labels', ATTACHEDLABEL()) << + Opt(CRTMLFRT()) << Many('datalab-exts', Seq(Opt(Seq(Req(DataLabExt()), + Req(StartObject()))), + Req(ATTACHEDLABEL()), + EndObject())) << + Opt(TEXTPROPS()) << Many('crtmlfrt-list', CRTMLFRT()) << Req(End())) class Dimensions(BaseParser): PARSER = Term(xlsrecord.Dimensions) @@ -560,7 +575,9 @@ class Number(BaseParser): class BoolErr(BaseParser): pass class Blank(BaseParser): pass -class Label(BaseParser): pass + +class Label(BaseParser): + PARSER = Term(xlsrecord.Label) class SERIESDATA(BaseParser): #SERIESDATA = Dimensions 3(SIIndex *(Number / BoolErr / Blank / Label)) diff --git a/src/xlsrecord.py b/src/xlsrecord.py index 1e5b0e6..21f0beb 100644 --- a/src/xlsrecord.py +++ b/src/xlsrecord.py @@ -1127,6 +1127,12 @@ class Label(BaseRecordHandler): self.appendLine("XF record ID: %d"%self.xfIdx) self.appendLine("label text: %s"%self.text) + def dumpData(self): + self.__parseBytes() + return ('label', {'col': self.col, + 'row': self.row, + 'xf-idx': self.xfIdx, + 'text': self.text}) class LabelSST(BaseRecordHandler): @@ -3531,6 +3537,48 @@ class AreaFormat(BaseRecordHandler): ('icv-fore', dumpIcv(self.icvFore)), ('icv-back', dumpIcv(self.icvBack))]) +class PieFormat(BaseRecordHandler): + def __parseBytes(self): + self.pcExplode = self.readSignedInt(2) + + def parseBytes (self): + self.__parseBytes() + # TODO: dump all data + + def dumpData(self): + self.__parseBytes() + return ('pie-format', {'pc-explode': self.pcExplode}) + +class MarkerFormat(BaseRecordHandler): + def __parseBytes(self): + self.rgbFore = self.readLongRGB() + self.rgbBack = self.readLongRGB() + self.imk = self.readUnsignedInt(2) + flags = self.readUnsignedInt(2) + self.auto = (flags & 0x001) != 0 # A + # next 3 bits are reserved + self.notShowInt = (flags & 0x010) != 0 # C + self.notShowBrd = (flags & 0x020) != 0 # D + self.icvFore = self.readICV() + self.icvBack = self.readICV() + self.miSize = self.readUnsignedInt(4) + + def parseBytes (self): + self.__parseBytes() + # TODO: dump all data + + def dumpData(self): + self.__parseBytes() + return ('marker-format', {'imk': self.imk, + 'auto': self.auto, + 'not-show-int': self.notShowInt, + 'not-show-brd': self.notShowBrd, + 'mi-size': self.miSize}, + [('rgb-fore', dumpRgb(self.rgbFore)), + ('rgb-back', dumpRgb(self.rgbBack)), + ('icv-fore', dumpIcv(self.icvFore)), + ('icv-back', dumpIcv(self.icvBack))]) + class DataFormat(BaseRecordHandler): def __parseBytes(self): self.xi = self.readUnsignedInt(2) @@ -4104,15 +4152,24 @@ class CHBar(BaseRecordHandler): class CHLine(BaseRecordHandler): - def parseBytes (self): + def __parseBytes (self): flags = globals.getUnsignedInt(self.readBytes(2)) - stacked = (flags & 0x0001) - percent = (flags & 0x0002) - shadow = (flags & 0x0004) + self.stacked = (flags & 0x0001) + self.percent = (flags & 0x0002) + self.shadow = (flags & 0x0004) - self.appendLine("stacked: %s"%self.getYesNo(stacked)) - self.appendLine("percent: %s"%self.getYesNo(percent)) - self.appendLine("shadow: %s"%self.getYesNo(shadow)) + def parseBytes (self): + self.__parseBytes() + self.appendLine("stacked: %s"%self.getYesNo(self.stacked)) + self.appendLine("percent: %s"%self.getYesNo(self.percent)) + self.appendLine("shadow: %s"%self.getYesNo(self.shadow)) + + def dumpData(self): + self.__parseBytes() + return ('line', {'stacked': self.stacked, + 'percent': self.percent, + 'shadow': self.shadow}) + class Brai(BaseRecordHandler): diff --git a/src/xlsstream.py b/src/xlsstream.py index 5c569ad..ceb6eeb 100644 --- a/src/xlsstream.py +++ b/src/xlsstream.py @@ -243,9 +243,9 @@ recData = { 0x1003: ["SERIES", "Data Properties for Series, Trendlines or Error Bars", xlsrecord.Series], 0x1006: ["CHDATAFORMAT", "Data point or series that the formatting information that follows applies to (2.4.74)", xlsrecord.DataFormat], 0x1007: ["LINEFORMAT", "Appearance of A Line", xlsrecord.LineFormat], - 0x1009: ["CHMARKERFORMAT", "?"], + 0x1009: ["CHMARKERFORMAT", "Color, size, and shape of the markers", xlsrecord.MarkerFormat], 0x100A: ["AREAFORMAT", "Patterns and Colors in Filled Region of Chart", xlsrecord.AreaFormat], - 0x100B: ["CHPIEFORMAT", "?"], + 0x100B: ["CHPIEFORMAT", "Distance of a data point from the center", xlsrecord.PieFormat], 0x100C: ["CHATTACHEDLABEL", "?"], 0x100D: ["SERIESTEXT", "Series Category Name or Title Text in Chart", xlsrecord.SeriesText], 0x1014: ["CHTYPEGROUP", "Properties of a chart group", xlsrecord.ChartFormat], |