summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorSergey Kishchenko <voidwrk@gmail.com>2011-09-16 15:38:30 +0300
committerSergey Kishchenko <voidwrk@gmail.com>2011-09-16 15:38:30 +0300
commit2465c4bb135fa9ef701bb81f0ceaf64baa233bd6 (patch)
tree84b7ddb30d4ee6df0f01200ab80f16a503e209c0
parentd5cb3bdc85ad3e46938a357d3903980309ada171 (diff)
xls-dump.py was tested using libreoffice's set of test files; several small issues were fixed
-rw-r--r--.gitignore1
-rwxr-xr-xmisc/test-files.sh15
-rw-r--r--src/xlsmodel.py2
-rw-r--r--src/xlsparser.py45
-rw-r--r--src/xlsrecord.py71
-rw-r--r--src/xlsstream.py4
6 files changed, 114 insertions, 24 deletions
diff --git a/.gitignore b/.gitignore
new file mode 100644
index 0000000..0d20b64
--- /dev/null
+++ b/.gitignore
@@ -0,0 +1 @@
+*.pyc
diff --git a/misc/test-files.sh b/misc/test-files.sh
new file mode 100755
index 0000000..164952d
--- /dev/null
+++ b/misc/test-files.sh
@@ -0,0 +1,15 @@
+#! /bin/sh
+
+test_dir=$1
+
+if [ ! -d $test_dir ]
+then
+ echo "Usage: test-files.sh TEST_DIR"
+ exit 1
+fi
+
+for x in `find $test_dir -name \*.xls`; do
+ (python xls-dump.py $x | grep -v "rror inter" > /dev/null) || echo "Flat dump failed for" $x
+ python xls-dump.py --dump-mode=xml $x > /dev/null || echo "Xml dump failed for" $x
+ python xls-dump.py --dump-mode=cxml $x > /dev/null || echo "CXml dump failed for" $x
+done
diff --git a/src/xlsmodel.py b/src/xlsmodel.py
index 5c10cad..5094ee5 100644
--- a/src/xlsmodel.py
+++ b/src/xlsmodel.py
@@ -79,7 +79,7 @@ class Workbook(ModelBase):
sheets = filter(lambda x: isinstance(x, Worksheet), self.__sheets)
n = len(sheets)
if n == 0:
- return
+ return nd
wbglobal = self.getWorkbookGlobal()
nd.appendChild(wbglobal.createDOM(self))
diff --git a/src/xlsparser.py b/src/xlsparser.py
index b9d5204..d105f24 100644
--- a/src/xlsparser.py
+++ b/src/xlsparser.py
@@ -119,8 +119,11 @@ class Req(BaseParser):
def parse(self, stream):
parsed = safeParse(self.__parser, stream)
if parsed is None:
+ currentToken = "<<<End Of Token Stream>>>"
+ if stream.currentIndex < len(stream.tokens):
+ currentToken = stream.tokens[stream.currentIndex]
raise ParseException("%s failed but it is required, next token is [%s]" %
- (str(self.__parser), str(stream.tokens[stream.currentIndex])))
+ (str(self.__parser), str(currentToken)))
return parsed
def __str__(self):
@@ -334,8 +337,14 @@ class PlotGrowth(BaseParser):
class Series(BaseParser):
PARSER = Term(xlsrecord.Series)
+class SeriesText(BaseParser):
+ PARSER = Term(xlsrecord.SeriesText)
+
+class BRAI(BaseParser):
+ PARSER = Term(xlsrecord.Brai)
+
class AI(BaseParser):
- PARSER = Term(xlsrecord.Brai) # TODO: we use Brai instead of AI now, fix it
+ PARSER = Req(BRAI()) << SeriesText()
class SerParent(BaseParser): pass
class SerAuxTrend(BaseParser): pass
@@ -351,9 +360,13 @@ class DataFormat(BaseParser):
class Chart3DBarShape(BaseParser):
PARSER = Term(xlsrecord.Chart3DBarShape)
-class PieFormat(BaseParser): pass
-class SerFmt(BaseParser): pass
-class MarkerFormat(BaseParser): pass
+class PieFormat(BaseParser):
+ PARSER = Term(xlsrecord.PieFormat)
+
+class SerFmt(BaseParser): pass
+
+class MarkerFormat(BaseParser):
+ PARSER = Term(xlsrecord.MarkerFormat)
class Text(BaseParser):
PARSER = Term(xlsrecord.Text)
@@ -487,7 +500,9 @@ class BobPopCustom(BaseParser): pass
class Bar(BaseParser):
PARSER = Term(xlsrecord.CHBar)
-class Line(BaseParser): pass
+class Line(BaseParser):
+ PARSER = Term(xlsrecord.CHLine)
+
class Pie(BaseParser): pass
class Area(BaseParser): pass
class Scatter(BaseParser): pass
@@ -538,15 +553,15 @@ class CHARTFORMATS(BaseParser):
#*2DFTTEXT AxesUsed 1*2AXISPARENT [CrtLayout12A] [DAT] *ATTACHEDLABEL [CRTMLFRT]
#*([DataLabExt StartObject] ATTACHEDLABEL [EndObject]) [TEXTPROPS] *2CRTMLFRT End
PARSER = Group('chart-fmt', Req(Chart()) << Req(Begin()) << Many('font-lists', FONTLIST(), max=2) <<
- Req(Scl()) << Req(PlotGrowth()) << FRAME() << Many('series-fmt-list', SERIESFORMAT()) <<
+ Req(Scl()) << Req(PlotGrowth()) << Opt(FRAME()) << Many('series-fmt-list', SERIESFORMAT()) <<
Many('ss-list', SS()) << Req(ShtProps()) << Many('dft-texts', DFTTEXT(), max=2) <<
Req(AxesUsed()) << Many('axis-roots', AXISPARENT(), min=1, max=2) <<
- CrtLayout12A() << DAT() << Many('attached-labels', ATTACHEDLABEL()) <<
- CRTMLFRT() << Many('datalab-exts', Seq(Seq(Req(DataLabExt()),
- Req(StartObject())),
- Req(ATTACHEDLABEL()),
- EndObject())) <<
- TEXTPROPS() << Many('crtmlfrt-list', CRTMLFRT()) << Req(End()))
+ CrtLayout12A() << Opt(DAT()) << Many('attached-labels', ATTACHEDLABEL()) <<
+ Opt(CRTMLFRT()) << Many('datalab-exts', Seq(Opt(Seq(Req(DataLabExt()),
+ Req(StartObject()))),
+ Req(ATTACHEDLABEL()),
+ EndObject())) <<
+ Opt(TEXTPROPS()) << Many('crtmlfrt-list', CRTMLFRT()) << Req(End()))
class Dimensions(BaseParser):
PARSER = Term(xlsrecord.Dimensions)
@@ -560,7 +575,9 @@ class Number(BaseParser):
class BoolErr(BaseParser): pass
class Blank(BaseParser): pass
-class Label(BaseParser): pass
+
+class Label(BaseParser):
+ PARSER = Term(xlsrecord.Label)
class SERIESDATA(BaseParser):
#SERIESDATA = Dimensions 3(SIIndex *(Number / BoolErr / Blank / Label))
diff --git a/src/xlsrecord.py b/src/xlsrecord.py
index 1e5b0e6..21f0beb 100644
--- a/src/xlsrecord.py
+++ b/src/xlsrecord.py
@@ -1127,6 +1127,12 @@ class Label(BaseRecordHandler):
self.appendLine("XF record ID: %d"%self.xfIdx)
self.appendLine("label text: %s"%self.text)
+ def dumpData(self):
+ self.__parseBytes()
+ return ('label', {'col': self.col,
+ 'row': self.row,
+ 'xf-idx': self.xfIdx,
+ 'text': self.text})
class LabelSST(BaseRecordHandler):
@@ -3531,6 +3537,48 @@ class AreaFormat(BaseRecordHandler):
('icv-fore', dumpIcv(self.icvFore)),
('icv-back', dumpIcv(self.icvBack))])
+class PieFormat(BaseRecordHandler):
+ def __parseBytes(self):
+ self.pcExplode = self.readSignedInt(2)
+
+ def parseBytes (self):
+ self.__parseBytes()
+ # TODO: dump all data
+
+ def dumpData(self):
+ self.__parseBytes()
+ return ('pie-format', {'pc-explode': self.pcExplode})
+
+class MarkerFormat(BaseRecordHandler):
+ def __parseBytes(self):
+ self.rgbFore = self.readLongRGB()
+ self.rgbBack = self.readLongRGB()
+ self.imk = self.readUnsignedInt(2)
+ flags = self.readUnsignedInt(2)
+ self.auto = (flags & 0x001) != 0 # A
+ # next 3 bits are reserved
+ self.notShowInt = (flags & 0x010) != 0 # C
+ self.notShowBrd = (flags & 0x020) != 0 # D
+ self.icvFore = self.readICV()
+ self.icvBack = self.readICV()
+ self.miSize = self.readUnsignedInt(4)
+
+ def parseBytes (self):
+ self.__parseBytes()
+ # TODO: dump all data
+
+ def dumpData(self):
+ self.__parseBytes()
+ return ('marker-format', {'imk': self.imk,
+ 'auto': self.auto,
+ 'not-show-int': self.notShowInt,
+ 'not-show-brd': self.notShowBrd,
+ 'mi-size': self.miSize},
+ [('rgb-fore', dumpRgb(self.rgbFore)),
+ ('rgb-back', dumpRgb(self.rgbBack)),
+ ('icv-fore', dumpIcv(self.icvFore)),
+ ('icv-back', dumpIcv(self.icvBack))])
+
class DataFormat(BaseRecordHandler):
def __parseBytes(self):
self.xi = self.readUnsignedInt(2)
@@ -4104,15 +4152,24 @@ class CHBar(BaseRecordHandler):
class CHLine(BaseRecordHandler):
- def parseBytes (self):
+ def __parseBytes (self):
flags = globals.getUnsignedInt(self.readBytes(2))
- stacked = (flags & 0x0001)
- percent = (flags & 0x0002)
- shadow = (flags & 0x0004)
+ self.stacked = (flags & 0x0001)
+ self.percent = (flags & 0x0002)
+ self.shadow = (flags & 0x0004)
- self.appendLine("stacked: %s"%self.getYesNo(stacked))
- self.appendLine("percent: %s"%self.getYesNo(percent))
- self.appendLine("shadow: %s"%self.getYesNo(shadow))
+ def parseBytes (self):
+ self.__parseBytes()
+ self.appendLine("stacked: %s"%self.getYesNo(self.stacked))
+ self.appendLine("percent: %s"%self.getYesNo(self.percent))
+ self.appendLine("shadow: %s"%self.getYesNo(self.shadow))
+
+ def dumpData(self):
+ self.__parseBytes()
+ return ('line', {'stacked': self.stacked,
+ 'percent': self.percent,
+ 'shadow': self.shadow})
+
class Brai(BaseRecordHandler):
diff --git a/src/xlsstream.py b/src/xlsstream.py
index 5c569ad..ceb6eeb 100644
--- a/src/xlsstream.py
+++ b/src/xlsstream.py
@@ -243,9 +243,9 @@ recData = {
0x1003: ["SERIES", "Data Properties for Series, Trendlines or Error Bars", xlsrecord.Series],
0x1006: ["CHDATAFORMAT", "Data point or series that the formatting information that follows applies to (2.4.74)", xlsrecord.DataFormat],
0x1007: ["LINEFORMAT", "Appearance of A Line", xlsrecord.LineFormat],
- 0x1009: ["CHMARKERFORMAT", "?"],
+ 0x1009: ["CHMARKERFORMAT", "Color, size, and shape of the markers", xlsrecord.MarkerFormat],
0x100A: ["AREAFORMAT", "Patterns and Colors in Filled Region of Chart", xlsrecord.AreaFormat],
- 0x100B: ["CHPIEFORMAT", "?"],
+ 0x100B: ["CHPIEFORMAT", "Distance of a data point from the center", xlsrecord.PieFormat],
0x100C: ["CHATTACHEDLABEL", "?"],
0x100D: ["SERIESTEXT", "Series Category Name or Title Text in Chart", xlsrecord.SeriesText],
0x1014: ["CHTYPEGROUP", "Properties of a chart group", xlsrecord.ChartFormat],