diff options
author | Igor Slepchin <igor.slepchin@gmail.com> | 2011-11-15 21:53:40 +0100 |
---|---|---|
committer | Albert Astals Cid <aacid@kde.org> | 2011-11-15 21:53:40 +0100 |
commit | 65388b1aaf9a78efcf9486d5e2d4bdce76f11194 (patch) | |
tree | 158194668887c03809163738df4ec865e81ed1ce /utils | |
parent | a4c78946bc1fc3d52152af2e319051050ab05e28 (diff) |
Output images in pdftohtml -xml mode if no -i option is specified.
Comes with an attached update to pdf2xml.dtd
Diffstat (limited to 'utils')
-rw-r--r-- | utils/HtmlOutputDev.cc | 48 | ||||
-rw-r--r-- | utils/pdf2xml.dtd | 22 |
2 files changed, 57 insertions, 13 deletions
diff --git a/utils/HtmlOutputDev.cc b/utils/HtmlOutputDev.cc index 43e4ec41..c1447ad7 100644 --- a/utils/HtmlOutputDev.cc +++ b/utils/HtmlOutputDev.cc @@ -30,6 +30,7 @@ // Copyright (C) 2010 OSSD CDAC Mumbai by Leena Chourey (leenac@cdacmumbai.in) and Onkar Potdar (onkar@cdacmumbai.in) // Copyright (C) 2011 Joshua Richardson <jric@chegg.com> // Copyright (C) 2011 Stephen Reichling <sreichling@chegg.com> +// Copyright (C) 2011 Igor Slepchin <igor.slepchin@gmail.com> // // To see a description of the changes please see the Changelog file that // came with your tarball or type make ChangeLog if you are building from git @@ -64,6 +65,21 @@ #define DEBUG __FILE__ << ": " << __LINE__ << ": DEBUG: " +class HtmlImage +{ +public: + HtmlImage(GooString *_fName, GfxState *state) + : fName(_fName) { + state->transform(0, 0, &xMin, &yMax); + state->transform(1, 1, &xMax, &yMin); + } + ~HtmlImage() { delete fName; } + + double xMin, xMax; // image x coordinates + double yMin, yMax; // image y coordinates + GooString *fName; // image file name +}; + // returns true if x is closer to y than x is to z static inline bool IS_CLOSER(float x, float y, float z) { return fabs((x)-(y)) < fabs((x)-(z)); } @@ -725,6 +741,15 @@ void HtmlPage::dumpAsXML(FILE* f,int page){ delete fontCSStyle; } + int listlen=HtmlOutputDev::imgList->getLength(); + for (int i = 0; i < listlen; i++) { + HtmlImage *img = (HtmlImage*)HtmlOutputDev::imgList->del(0); + fprintf(f,"<image top=\"%d\" left=\"%d\" ",xoutRound(img->yMin),xoutRound(img->xMin)); + fprintf(f,"width=\"%d\" height=\"%d\" ",xoutRound(img->xMax-img->xMin),xoutRound(img->yMax-img->yMin)); + fprintf(f,"src=\"%s\"/>\n",img->fName->getCString()); + delete img; + } + for(HtmlString *tmp=yxStrings;tmp;tmp=tmp->yxNext){ if (tmp->htext){ fprintf(f,"<text top=\"%d\" left=\"%d\" ",xoutRound(tmp->yMin),xoutRound(tmp->xMin)); @@ -864,9 +889,9 @@ void HtmlPage::dump(FILE *f, int pageNum) // Loop over the list of image names on this page int listlen=HtmlOutputDev::imgList->getLength(); for (int i = 0; i < listlen; i++) { - GooString *fName= (GooString *)HtmlOutputDev::imgList->del(0); - fprintf(f,"<IMG src=\"%s\"/><br/>\n",fName->getCString()); - delete fName; + HtmlImage *img = (HtmlImage*)HtmlOutputDev::imgList->del(0); + fprintf(f,"<IMG src=\"%s\"/><br/>\n",img->fName->getCString()); + delete img; } HtmlOutputDev::imgNum=1; @@ -1236,7 +1261,7 @@ void HtmlOutputDev::drawImageMask(GfxState *state, Object *ref, Stream *str, int width, int height, GBool invert, GBool interpolate, GBool inlineImg) { - if (ignore||complexMode) { + if (ignore||(complexMode && !xml)) { OutputDev::drawImageMask(state, ref, str, width, height, invert, interpolate, inlineImg); return; } @@ -1272,7 +1297,10 @@ void HtmlOutputDev::drawImageMask(GfxState *state, Object *ref, Stream *str, fclose(f1); - if (fName) imgList->append(fName); + if (fName) { + HtmlImage *img = new HtmlImage(fName, state); + imgList->append(img); + } } else { OutputDev::drawImageMask(state, ref, str, width, height, invert, interpolate, inlineImg); @@ -1283,7 +1311,7 @@ void HtmlOutputDev::drawImage(GfxState *state, Object *ref, Stream *str, int width, int height, GfxImageColorMap *colorMap, GBool interpolate, int *maskColors, GBool inlineImg) { - if (ignore||complexMode) { + if (ignore||(complexMode && !xml)) { OutputDev::drawImage(state, ref, str, width, height, colorMap, interpolate, maskColors, inlineImg); return; @@ -1324,7 +1352,10 @@ void HtmlOutputDev::drawImage(GfxState *state, Object *ref, Stream *str, fclose(f1); - if (fName) imgList->append(fName); + if (fName) { + HtmlImage *img = new HtmlImage(fName, state); + imgList->append(img); + } } else { #ifdef ENABLE_LIBPNG @@ -1390,7 +1421,8 @@ void HtmlOutputDev::drawImage(GfxState *state, Object *ref, Stream *str, fclose(f1); free(row); - imgList->append(fName); + HtmlImage *img = new HtmlImage(fName, state); + imgList->append(img); ++imgNum; imgStr->close(); delete imgStr; diff --git a/utils/pdf2xml.dtd b/utils/pdf2xml.dtd index 1afa4fe9..9cd38801 100644 --- a/utils/pdf2xml.dtd +++ b/utils/pdf2xml.dtd @@ -1,6 +1,6 @@ -<?xml version="1.0"?> +<?xml version="1.0" encoding="UTF-8"?> <!ELEMENT pdf2xml (page+)> -<!ELEMENT page (fontspec*, text*)> +<!ELEMENT page (fontspec*, image*, text*)> <!ATTLIST page number CDATA #REQUIRED position CDATA #REQUIRED @@ -16,7 +16,7 @@ family CDATA #REQUIRED color CDATA #REQUIRED > -<!ELEMENT text (#PCDATA | b | i)*> +<!ELEMENT text (#PCDATA | b | i | a)*> <!ATTLIST text top CDATA #REQUIRED left CDATA #REQUIRED @@ -24,5 +24,17 @@ height CDATA #REQUIRED font CDATA #REQUIRED > -<!ELEMENT b (#PCDATA)> -<!ELEMENT i (#PCDATA)> +<!ELEMENT b (#PCDATA | i)*> +<!ELEMENT i (#PCDATA | b)*> +<!ELEMENT a (#PCDATA)> +<!ATTLIST a + href CDATA #REQUIRED +> +<!ELEMENT image EMPTY> +<!ATTLIST image + top CDATA #REQUIRED + left CDATA #REQUIRED + width CDATA #REQUIRED + height CDATA #REQUIRED + src CDATA #REQUIRED +> |