diff options
author | Joshua Richardson <jric@chegg.com> | 2011-08-18 18:48:40 +0200 |
---|---|---|
committer | Albert Astals Cid <aacid@kde.org> | 2011-08-18 18:48:40 +0200 |
commit | 86271e4810f714d4ba7a2a6651a9b1d04f653262 (patch) | |
tree | 0245a9b7b41a984be0163f405e603c813a240874 /utils | |
parent | 56248b84e2185483dd54704c13838e8f78029d49 (diff) |
pdftohtml: Support text rotation
Includes a few other fixlets.
See bug 38586 for more info
Diffstat (limited to 'utils')
-rw-r--r-- | utils/HtmlFonts.cc | 64 | ||||
-rw-r--r-- | utils/HtmlFonts.h | 10 | ||||
-rw-r--r-- | utils/HtmlOutputDev.cc | 170 | ||||
-rw-r--r-- | utils/HtmlOutputDev.h | 8 | ||||
-rw-r--r-- | utils/HtmlUtils.h | 51 |
5 files changed, 218 insertions, 85 deletions
diff --git a/utils/HtmlFonts.cc b/utils/HtmlFonts.cc index 2ae92220..c9b558e9 100644 --- a/utils/HtmlFonts.cc +++ b/utils/HtmlFonts.cc @@ -21,7 +21,8 @@ // Copyright (C) 2008 Boris Toloknov <tlknv@yandex.ru> // Copyright (C) 2008 Tomas Are Haavet <tomasare@gmail.com> // Copyright (C) 2010 OSSD CDAC Mumbai by Leena Chourey (leenac@cdacmumbai.in) and Onkar Potdar (onkar@cdacmumbai.in) -// Copyright (C) 2011 Joshua Richardson <joshuarbox-junk1@yahoo.com> +// Copyright (C) 2011 Joshua Richardson <jric@chegg.com> +// Copyright (C) 2011 Stephen Reichling <sreichling@chegg.com> // // To see a description of the changes please see the Changelog file that // came with your tarball or type make ChangeLog if you are building from git @@ -29,6 +30,7 @@ //======================================================================== #include "HtmlFonts.h" +#include "HtmlUtils.h" #include "GlobalParams.h" #include "UnicodeMap.h" #include <stdio.h> @@ -120,6 +122,7 @@ HtmlFont::HtmlFont(GooString* ftname,int _size, GfxRGB rgb){ size=(_size-1); italic = gFalse; bold = gFalse; + rotOrSkewed = gFalse; if (fontname){ if (strstr(fontname->lowerCase()->getCString(),"bold")) bold=gTrue; @@ -148,6 +151,8 @@ HtmlFont::HtmlFont(const HtmlFont& x){ pos=x.pos; color=x.color; if (x.FontName) FontName=new GooString(x.FontName); + rotOrSkewed = x.rotOrSkewed; + memcpy(rotSkewMat, x.rotSkewMat, sizeof(rotSkewMat)); } @@ -176,14 +181,15 @@ void HtmlFont::clear(){ /* - This function is used to compare font uniquily for insertion into + This function is used to compare font uniquely for insertion into the list of all encountered fonts */ GBool HtmlFont::isEqual(const HtmlFont& x) const{ - return ((size==x.size) && + return (size==x.size) && (lineSize==x.lineSize) && (pos==x.pos) && (bold==x.bold) && (italic==x.italic) && - (color.isEqual(x.getColor()))); + (color.isEqual(x.getColor())) && isRotOrSkewed() == x.isRotOrSkewed() && + (!isRotOrSkewed() || rot_matrices_equal(getRotMat(), x.getRotMat())); } /* @@ -232,11 +238,11 @@ GooString* HtmlFont::HtmlFilter(Unicode* u, int uLen) { for (int i = 0; i < uLen; ++i) { switch (u[i]) { - case '"': tmp->append("""); break; + case '"': tmp->append("""); break; case '&': tmp->append("&"); break; case '<': tmp->append("<"); break; case '>': tmp->append(">"); break; - case ' ': tmp->append( !xml && ( i+1 >= uLen || !tmp->getLength() || tmp->getChar( tmp->getLength()-1 ) == ' ' ) ? " " : " " ); + case ' ': tmp->append( !xml && ( i+1 >= uLen || !tmp->getLength() || tmp->getChar( tmp->getLength()-1 ) == ' ' ) ? " " : " " ); break; default: { @@ -289,29 +295,6 @@ int HtmlFontAccu::AddFont(const HtmlFont& font){ return (accu->size()-1); } -// get CSS font name for font #i -GooString* HtmlFontAccu::getCSStyle(int i, GooString* content, int j){ - GooString *tmp; - GooString *iStr=GooString::fromInt(i); - GooString *jStr=GooString::fromInt(j); - - if (!xml) { - tmp = new GooString("<span class=\"ft"); - tmp->append(jStr); - tmp->append(iStr); - tmp->append("\">"); - tmp->append(content); - tmp->append("</span>"); - } else { - tmp = new GooString(""); - tmp->append(content); - } - - delete jStr; - delete iStr; - return tmp; -} - // get CSS font definition for font #i GooString* HtmlFontAccu::CSStyle(int i, int j){ GooString *tmp=new GooString(); @@ -343,6 +326,29 @@ GooString* HtmlFontAccu::CSStyle(int i, int j){ tmp->append(fontName); //font.getFontName()); tmp->append(";color:"); tmp->append(colorStr); + // if there is rotation or skew, include the matrix + if (font.isRotOrSkewed()) { + const double * const text_mat = font.getRotMat(); + GooString matrix_str(" matrix("); + matrix_str.appendf("{0:10.10g}, {1:10.10g}, {2:10.10g}, {3:10.10g}, 0, 0)", + text_mat[0], text_mat[1], text_mat[2], text_mat[3]); + tmp->append(";-moz-transform:"); + tmp->append(&matrix_str); + tmp->append(";-webkit-transform:"); + tmp->append(&matrix_str); + tmp->append(";-o-transform:"); + tmp->append(&matrix_str); + tmp->append(";-ms-transform:"); + tmp->append(&matrix_str); + // Todo: 75% is a wild guess that seems to work pretty well; + // We probably need to calculate the real percentage + // Based on the characteristic baseline and bounding box of current font + // PDF origin is at baseline + tmp->append(";-moz-transform-origin: left 75%"); + tmp->append(";-webkit-transform-origin: left 75%"); + tmp->append(";-o-transform-origin: left 75%"); + tmp->append(";-ms-transform-origin: left 75%"); + } tmp->append(";}"); } if (xml) { diff --git a/utils/HtmlFonts.h b/utils/HtmlFonts.h index 2cdea4bd..3e3b0282 100644 --- a/utils/HtmlFonts.h +++ b/utils/HtmlFonts.h @@ -20,6 +20,7 @@ // Copyright (C) 2010 OSSD CDAC Mumbai by Leena Chourey (leenac@cdacmumbai.in) and Onkar Potdar (onkar@cdacmumbai.in) // Copyright (C) 2010 Albert Astals Cid <aacid@kde.org> // Copyright (C) 2011 Steven Murdoch <Steven.Murdoch@cl.cam.ac.uk> +// Copyright (C) 2011 Joshua Richardson <jric@chegg.com> // // To see a description of the changes please see the Changelog file that // came with your tarball or type make ChangeLog if you are building from git @@ -62,13 +63,15 @@ class HtmlFont{ int lineSize; GBool italic; GBool bold; + GBool rotOrSkewed; int pos; // position of the font name in the fonts array static GooString *DefaultFont; GooString *FontName; HtmlFontColor color; + double rotSkewMat[4]; // only four values needed for rotation and skew public: - HtmlFont(){FontName=NULL;}; + HtmlFont(){FontName=NULL; rotOrSkewed = gFalse;} HtmlFont(GooString* fontname,int _size, GfxRGB rgb); HtmlFont(const HtmlFont& x); HtmlFont& operator=(const HtmlFont& x); @@ -78,9 +81,13 @@ public: GooString* getFullName(); GBool isItalic() const {return italic;} GBool isBold() const {return bold;} + GBool isRotOrSkewed() const { return rotOrSkewed; } unsigned int getSize() const {return size;} int getLineSize() const {return lineSize;} void setLineSize(int _lineSize) { lineSize = _lineSize; } + void setRotMat(const double * const mat) + { rotOrSkewed = gTrue; memcpy(rotSkewMat, mat, sizeof(rotSkewMat)); } + const double *getRotMat() const { return rotSkewMat; } GooString* getFontName(); static GooString* getDefaultFont(); static void setDefaultFont(GooString* defaultFont); @@ -102,7 +109,6 @@ public: HtmlFont *Get(int i){ return &(*accu)[i]; } - GooString* getCSStyle (int i,GooString* content, int j = 0); GooString* CSStyle(int i, int j = 0); int size() const {return accu->size();} diff --git a/utils/HtmlOutputDev.cc b/utils/HtmlOutputDev.cc index a71bb963..615cf5f7 100644 --- a/utils/HtmlOutputDev.cc +++ b/utils/HtmlOutputDev.cc @@ -28,7 +28,8 @@ // Copyright (C) 2010 Adrian Johnson <ajohnson@redneon.com> // Copyright (C) 2010 Hib Eris <hib@hiberis.nl> // Copyright (C) 2010 OSSD CDAC Mumbai by Leena Chourey (leenac@cdacmumbai.in) and Onkar Potdar (onkar@cdacmumbai.in) -// Copyright (C) 2011 Joshua Richardson <joshuarbox-junk1@yahoo.com> +// Copyright (C) 2011 Joshua Richardson <jric@chegg.com> +// Copyright (C) 2011 Stephen Reichling <sreichling@chegg.com> // // To see a description of the changes please see the Changelog file that // came with your tarball or type make ChangeLog if you are building from git @@ -46,6 +47,7 @@ #include <stddef.h> #include <ctype.h> #include <math.h> +#include <iostream> #include "goo/GooString.h" #include "goo/GooList.h" #include "UnicodeMap.h" @@ -61,6 +63,9 @@ #include "GlobalParams.h" #include "HtmlOutputDev.h" #include "HtmlFonts.h" +#include "HtmlUtils.h" + +#define DEBUG __FILE__ << ": " << __LINE__ << ": DEBUG: " // returns true if x is closer to y than x is to z static inline bool IS_CLOSER(float x, float y, float z) { return fabs((x)-(y)) < fabs((x)-(z)); } @@ -80,6 +85,9 @@ extern GBool xml; extern GBool showHidden; extern GBool noMerge; +static GBool debug = gFalse; +static GooString *gstr_buff0 = NULL; // a workspace in which I format strings + static GooString* basename(GooString* str){ char *p=str->getCString(); @@ -102,11 +110,37 @@ static GooString* Dirname(GooString* str){ } #endif +static const char *print_matrix(const double *mat) { + delete gstr_buff0; + + gstr_buff0 = GooString::format("[{0:g} {1:g} {2:g} {3:g} {4:g} {5:g}]", + *mat, mat[1], mat[2], mat[3], mat[4], mat[5]); + return gstr_buff0->getCString(); +} + +static const char *print_uni_str(const Unicode *u, const unsigned uLen) { + GooString *gstr_buff1 = NULL; + + delete gstr_buff0; + + if (!uLen) return ""; + gstr_buff0 = GooString::format("{0:c}", (*u < 0x7F ? *u & 0xFF : '?')); + for (unsigned i = 1; i < uLen; i++) { + if (u[i] < 0x7F) { + gstr_buff1 = gstr_buff0->append(u[i] < 0x7F ? static_cast<char>(u[i]) & 0xFF : '?'); + delete gstr_buff0; + gstr_buff0 = gstr_buff1; + } + } + + return gstr_buff0->getCString(); +} + //------------------------------------------------------------------------ // HtmlString //------------------------------------------------------------------------ -HtmlString::HtmlString(GfxState *state, double fontSize, HtmlFontAccu* fonts) { +HtmlString::HtmlString(GfxState *state, double fontSize, HtmlFontAccu* _fonts) : fonts(_fonts) { GfxFont *font; double x, y; @@ -129,6 +163,22 @@ HtmlString::HtmlString(GfxState *state, double fontSize, HtmlFontAccu* fonts) { GooString *name = state->getFont()->getName(); if (!name) name = HtmlFont::getDefaultFont(); //new GooString("default"); HtmlFont hfont=HtmlFont(name, static_cast<int>(fontSize-1), rgb); + if (isMatRotOrSkew(state->getTextMat())) { + double normalizedMatrix[4]; + memcpy(normalizedMatrix, state->getTextMat(), sizeof(normalizedMatrix)); + // browser rotates the opposite way + // so flip the sign of the angle -> sin() components change sign + if (debug) + std::cerr << DEBUG << "before transform: " << print_matrix(normalizedMatrix) << std::endl; + normalizedMatrix[1] *= -1; + normalizedMatrix[2] *= -1; + if (debug) + std::cerr << DEBUG << "after reflecting angle: " << print_matrix(normalizedMatrix) << std::endl; + normalizeRotMat(normalizedMatrix); + if (debug) + std::cerr << DEBUG << "after norm: " << print_matrix(normalizedMatrix) << std::endl; + hfont.setRotMat(normalizedMatrix); + } fontpos = fonts->AddFont(hfont); } else { // this means that the PDF file draws text without a current font, @@ -301,9 +351,27 @@ void HtmlPage::addChar(GfxState *state, double x, double y, // and is not too far away from it before adding //if ((UnicodeMap::getDirection(u[0]) != curStr->dir) || // XXX - if ( - (n > 0 && - fabs(x1 - curStr->xRight[n-1]) > 0.1 * (curStr->yMax - curStr->yMin))) { + if (debug) { + double *text_mat = state->getTextMat(); + // rotation is (cos q, sin q, -sin q, cos q, 0, 0) + // sin q is zero iff there is no rotation, or 180 deg. rotation; + // for 180 rotation, cos q will be negative + if (text_mat[0] < 0 || !is_within(text_mat[1], .1, 0)) { + std::cerr << DEBUG << "rotation matrix for \"" << print_uni_str(u, uLen) << '"' << std::endl; + std::cerr << "text " << print_matrix(state->getTextMat()); + } + } + if (n > 0 && // don't start a new string, unless there is already a string + // TODO: the following line assumes that text is flowing left to + // right, which will not necessarily be the case, e.g. if rotated; + // It assesses whether or not two characters are close enough to + // be part of the same string + fabs(x1 - curStr->xRight[n-1]) > 0.1 * (curStr->yMax - curStr->yMin) && + // rotation is (cos q, sin q, -sin q, cos q, 0, 0) + // sin q is zero iff there is no rotation, or 180 deg. rotation; + // for 180 rotation, cos q will be negative + !rot_matrices_equal(curStr->getFont().getRotMat(), state->getTextMat())) + { endString(); beginString(state, NULL); } @@ -546,13 +614,13 @@ void HtmlPage::coalesce() { str1->size * sizeof(double)); if (addSpace) { str1->text[str1->len] = 0x20; - str1->htext->append(xml?" ":" "); + str1->htext->append(xml?" ":" "); str1->xRight[str1->len] = str2->xMin; ++str1->len; } if (addLineBreak) { str1->text[str1->len] = '\n'; - str1->htext->append("<br>"); + str1->htext->append("<br/>"); str1->xRight[str1->len] = str2->xMin; ++str1->len; str1->yMin = str2->yMin; @@ -660,31 +728,22 @@ void HtmlPage::dumpAsXML(FILE* f,int page){ delete fontCSStyle; } - GooString *str, *str1 = NULL; for(HtmlString *tmp=yxStrings;tmp;tmp=tmp->yxNext){ if (tmp->htext){ - str=new GooString(tmp->htext); fprintf(f,"<text top=\"%d\" left=\"%d\" ",xoutRound(tmp->yMin),xoutRound(tmp->xMin)); fprintf(f,"width=\"%d\" height=\"%d\" ",xoutRound(tmp->xMax-tmp->xMin),xoutRound(tmp->yMax-tmp->yMin)); fprintf(f,"font=\"%d\">", tmp->fontpos); - str1=fonts->getCSStyle(tmp->fontpos, str); - fputs(str1->getCString(),f); - delete str; - delete str1; + fputs(tmp->htext->getCString(),f); fputs("</text>\n",f); } } fputs("</page>\n",f); } - -void HtmlPage::dumpComplex(FILE *file, int page){ - FILE* pageFile; +int HtmlPage::dumpComplexHeaders(FILE * const file, FILE *& pageFile, int page) { GooString* tmp; char* htmlEncoding; - if( firstPage == -1 ) firstPage = page; - if( !noframes ) { GooString* pgNum=GooString::fromInt(page); @@ -700,22 +759,21 @@ void HtmlPage::dumpComplex(FILE *file, int page){ if (!pageFile) { error(-1, "Couldn't open html file '%s'", tmp->getCString()); delete tmp; - return; + return 1; } if (!singleHtml) - fprintf(pageFile,"%s\n<HTML>\n<HEAD>\n<TITLE>Page %d</TITLE>\n\n", DOCTYPE, page); + fprintf(pageFile,"%s\n<HTML xmlns=\"http://www.w3.org/1999/xhtml\" lang=\"\" xml:lang=\"\">\n<HEAD>\n<TITLE>Page %d</TITLE>\n\n", DOCTYPE, page); else - fprintf(pageFile,"%s\n<HTML>\n<HEAD>\n<TITLE>%s</TITLE>\n\n", DOCTYPE, tmp->getCString()); + fprintf(pageFile,"%s\n<HTML xmlns=\"http://www.w3.org/1999/xhtml\" lang=\"\" xml:lang=\"\">\n<HEAD>\n<TITLE>%s</TITLE>\n\n", DOCTYPE, tmp->getCString()); delete tmp; - htmlEncoding = HtmlOutputDev::mapEncodingToHtml - (globalParams->getTextEncodingName()); + htmlEncoding = HtmlOutputDev::mapEncodingToHtml(globalParams->getTextEncodingName()); if (!singleHtml) - fprintf(pageFile, "<META http-equiv=\"Content-Type\" content=\"text/html; charset=%s\">\n", htmlEncoding); + fprintf(pageFile, "<META http-equiv=\"Content-Type\" content=\"text/html; charset=%s\"/>\n", htmlEncoding); else - fprintf(pageFile, "<META http-equiv=\"Content-Type\" content=\"text/html; charset=%s\">\n <br>\n", htmlEncoding); + fprintf(pageFile, "<META http-equiv=\"Content-Type\" content=\"text/html; charset=%s\"/>\n <br/>\n", htmlEncoding); } else { @@ -724,12 +782,21 @@ void HtmlPage::dumpComplex(FILE *file, int page){ fprintf(pageFile,"<a name=\"%d\"></a>\n", page); } - fprintf(pageFile,"<DIV style=\"position:relative;width:%d;height:%d;\">\n", - pageWidth, pageHeight); + return 0; +} + +void HtmlPage::dumpComplex(FILE *file, int page){ + FILE* pageFile; + GooString* tmp; + + if( firstPage == -1 ) firstPage = page; + + if (dumpComplexHeaders(file, pageFile, page)) { error(-1, "Couldn't write headers."); return; } tmp=basename(DocName); fputs("<STYLE type=\"text/css\">\n<!--\n",pageFile); + fputs("\tp {margin: 0; padding: 0;}",pageFile); for(int i=fontsPageMarker;i!=fonts->size();i++) { GooString *fontCSStyle; if (!singleHtml) @@ -747,33 +814,33 @@ void HtmlPage::dumpComplex(FILE *file, int page){ fputs("</HEAD>\n<BODY bgcolor=\"#A0A0A0\" vlink=\"blue\" link=\"blue\">\n",pageFile); } + fprintf(pageFile,"<DIV id=\"page%d-div\" style=\"position:relative;width:%dpx;height:%dpx;\">\n", + page, pageWidth, pageHeight); + if( !ignore ) { fprintf(pageFile, - "<IMG width=\"%d\" height=\"%d\" src=\"%s%03d.%s\" alt=\"background image\">\n", + "<IMG width=\"%d\" height=\"%d\" src=\"%s%03d.%s\" alt=\"background image\"/>\n", pageWidth, pageHeight, tmp->getCString(), (page-firstPage+1), imgExt->getCString()); } delete tmp; - GooString *str, *str1 = NULL; for(HtmlString *tmp1=yxStrings;tmp1;tmp1=tmp1->yxNext){ if (tmp1->htext){ - str=new GooString(tmp1->htext); fprintf(pageFile, - "<DIV style=\"position:absolute;top:%d;left:%d\">", + "<P style=\"position:absolute;top:%dpx;left:%dpx;white-space:nowrap\" class=\"ft", xoutRound(tmp1->yMin), xoutRound(tmp1->xMin)); - fputs("<nobr>",pageFile); - if (!singleHtml) - str1=fonts->getCSStyle(tmp1->fontpos, str); - else - str1=fonts->getCSStyle(tmp1->fontpos, str, page); - fputs(str1->getCString(),pageFile); - delete str; - delete str1; - fputs("</nobr></DIV>\n",pageFile); + if (!singleHtml) { + fputc('0', pageFile); + } else { + fprintf(pageFile, "%d", page); + } + fprintf(pageFile,"%d\">", tmp1->fontpos); + fputs(tmp1->htext->getCString(), pageFile); + fputs("</P>\n", pageFile); } } @@ -801,7 +868,7 @@ void HtmlPage::dump(FILE *f, int pageNum) int listlen=HtmlOutputDev::imgList->getLength(); for (int i = 0; i < listlen; i++) { GooString *fName= (GooString *)HtmlOutputDev::imgList->del(0); - fprintf(f,"<IMG src=\"%s\"><br>\n",fName->getCString()); + fprintf(f,"<IMG src=\"%s\"/><br/>\n",fName->getCString()); delete fName; } HtmlOutputDev::imgNum=1; @@ -812,7 +879,7 @@ void HtmlPage::dump(FILE *f, int pageNum) str=new GooString(tmp->htext); fputs(str->getCString(),f); delete str; - fputs("<br>\n",f); + fputs("<br/>\n",f); } } fputs("<hr>\n",f); @@ -879,7 +946,7 @@ GooString* HtmlMetaVar::toString() result->append(name); result->append("\" content=\""); result->append(content); - result->append("\">"); + result->append("\"/>"); return result; } @@ -920,12 +987,12 @@ void HtmlOutputDev::doFrame(int firstPage){ delete fName; fName=basename(Docname); - fputs(DOCTYPE_FRAMES, fContentsFrame); + fputs(DOCTYPE, fContentsFrame); fputs("\n<HTML>",fContentsFrame); fputs("\n<HEAD>",fContentsFrame); fprintf(fContentsFrame,"\n<TITLE>%s</TITLE>",docTitle->getCString()); htmlEncoding = mapEncodingToHtml(globalParams->getTextEncodingName()); - fprintf(fContentsFrame, "\n<META http-equiv=\"Content-Type\" content=\"text/html; charset=%s\">\n", htmlEncoding); + fprintf(fContentsFrame, "\n<META http-equiv=\"Content-Type\" content=\"text/html; charset=%s\"/>\n", htmlEncoding); dumpMetaVars(fContentsFrame); fprintf(fContentsFrame, "</HEAD>\n"); fputs("<FRAMESET cols=\"100,*\">\n",fContentsFrame); @@ -995,12 +1062,12 @@ HtmlOutputDev::HtmlOutputDev(char *fileName, char *title, } delete left; fputs(DOCTYPE, fContentsFrame); - fputs("<HTML>\n<HEAD>\n<TITLE></TITLE>\n</HEAD>\n<BODY>\n",fContentsFrame); + fputs("<HTML xmlns=\"http://www.w3.org/1999/xhtml\" lang=\"\" xml:lang=\"\">\n<HEAD>\n<TITLE></TITLE>\n</HEAD>\n<BODY>\n", fContentsFrame); if (doOutline) { GooString *str = basename(Docname); - fprintf(fContentsFrame, "<A href=\"%s%s\" target=\"contents\">Outline</a><br>", str->getCString(), complexMode ? "-outline.html" : "s.html#outline"); + fprintf(fContentsFrame, "<A href=\"%s%s\" target=\"contents\">Outline</a><br/>", str->getCString(), complexMode ? "-outline.html" : "s.html#outline"); delete str; } } @@ -1044,10 +1111,9 @@ HtmlOutputDev::HtmlOutputDev(char *fileName, char *title, } else { - fprintf(page,"%s\n<HTML>\n<HEAD>\n<TITLE>%s</TITLE>\n", - DOCTYPE, docTitle->getCString()); + fprintf(page,"%s\n<HTML xmlns=\"http://www.w3.org/1999/xhtml\" lang=\"\" xml:lang=\"\">\n<HEAD>\n<TITLE>%s</TITLE>\n", DOCTYPE, docTitle->getCString()); - fprintf(page, "<META http-equiv=\"Content-Type\" content=\"text/html; charset=%s\">\n", htmlEncoding); + fprintf(page, "<META http-equiv=\"Content-Type\" content=\"text/html; charset=%s\"/>\n", htmlEncoding); dumpMetaVars(page); fprintf(page,"</HEAD>\n"); @@ -1113,7 +1179,7 @@ void HtmlOutputDev::startPage(int pageNum, GfxState *state) { fprintf(fContentsFrame,"<A href=\"%s-%d.html\"",str->getCString(),pageNum); else fprintf(fContentsFrame,"<A href=\"%ss.html#%d\"",str->getCString(),pageNum); - fprintf(fContentsFrame," target=\"contents\" >Page %d</a><br>\n",pageNum); + fprintf(fContentsFrame," target=\"contents\" >Page %d</a><br/>\n",pageNum); } } @@ -1520,7 +1586,7 @@ GBool HtmlOutputDev::dumpDocOutline(Catalog* catalog) return gFalse; delete str; bClose = gTrue; - fputs("<HTML>\n<HEAD>\n<TITLE>Document Outline</TITLE>\n</HEAD>\n<BODY>\n", output); + fputs("<HTML xmlns=\"http://www.w3.org/1999/xhtml\" lang=\"\" xml:lang=\"\">\n<HEAD>\n<TITLE>Document Outline</TITLE>\n</HEAD>\n<BODY>\n", output); } } diff --git a/utils/HtmlOutputDev.h b/utils/HtmlOutputDev.h index c268ce73..474e3af9 100644 --- a/utils/HtmlOutputDev.h +++ b/utils/HtmlOutputDev.h @@ -19,6 +19,8 @@ // Copyright (C) 2009, 2011 Carlos Garcia Campos <carlosgc@gnome.org> // Copyright (C) 2009 Kovid Goyal <kovid@kovidgoyal.net> // Copyright (C) 2010 Hib Eris <hib@hiberis.nl> +// Copyright (C) 2011 Joshua Richardson <jric@chegg.com> +// Copyright (C) 2011 Stephen Reichling <sreichling@chegg.com> // // To see a description of the changes please see the Changelog file that // came with your tarball or type make ChangeLog if you are building from git @@ -52,8 +54,7 @@ #define xoutRound(x) ((int)(x + 0.5)) -#define DOCTYPE "<!DOCTYPE HTML PUBLIC \"-//W3C//DTD HTML 4.01 Transitional//EN\">" -#define DOCTYPE_FRAMES "<!DOCTYPE HTML PUBLIC \"-//W3C//DTD HTML 4.01 Frameset//EN\"\n\"http://www.w3.org/TR/html4/frameset.dtd\">" +#define DOCTYPE "<!DOCTYPE html>" class GfxState; class GooString; @@ -83,6 +84,7 @@ public: double dx, double dy, Unicode u); HtmlLink* getLink() { return link; } + const HtmlFont &getFont() const { return *fonts->Get(fontpos); } void endString(); // postprocessing private: @@ -100,6 +102,7 @@ private: int len; // length of text and xRight int size; // size of text and xRight arrays UnicodeTextDirection dir; // direction (left to right/right to left) + HtmlFontAccu *fonts; friend class HtmlPage; @@ -171,6 +174,7 @@ private: void setDocName(char* fname); void dumpAsXML(FILE* f,int page); void dumpComplex(FILE* f, int page); + int dumpComplexHeaders(FILE * const file, FILE *& pageFile, int page); // marks the position of the fonts that belong to current page (for noframes) int fontsPageMarker; diff --git a/utils/HtmlUtils.h b/utils/HtmlUtils.h new file mode 100644 index 00000000..bdb89b9a --- /dev/null +++ b/utils/HtmlUtils.h @@ -0,0 +1,51 @@ +// +// HtmlUtils.h +// +// Created on: Jun 8, 2011 +// Author: Joshua Richardson <jric@chegg.com> +// Copyright 2011 +// +// All changes made under the Poppler project to this file are licensed +// under GPL version 2 or later +// +// Copyright (C) 2011 Joshua Richardson <jric@chegg.com> +// +// To see a description of the changes please see the Changelog file that +// came with your tarball or type make ChangeLog if you are building from git +// +//======================================================================== + +#ifndef HTMLUTILS_H_ +#define HTMLUTILS_H_ + +#include <math.h> // fabs +#include "goo/gtypes.h" // GBool + +// Returns true iff the difference between a and b is less than the threshold +// We always use fuzzy math when comparing decimal numbers due to imprecision +inline GBool is_within(double a, double thresh, double b) { + return fabs(a-b) < thresh; +} + +inline GBool rot_matrices_equal(const double * const mat0, const double * const mat1) { + return is_within(mat0[0], .1, mat1[0]) && is_within(mat0[1], .1, mat1[1]) && + is_within(mat0[2], .1, mat1[2]) && is_within(mat0[3], .1, mat1[3]); +} + +// rotation is (cos q, sin q, -sin q, cos q, 0, 0) +// sin q is zero iff there is no rotation, or 180 deg. rotation; +// for 180 rotation, cos q will be negative +inline GBool isMatRotOrSkew(const double * const mat) { + return mat[0] < 0 || !is_within(mat[1], .1, 0); +} + +// Alters the matrix so that it does not scale a vector's x component; +// If the matrix does not skew, then that will also normalize the y +// component, keeping any rotation, but removing scaling. +inline void normalizeRotMat(double *mat) { + double scale = fabs(mat[0] + mat[1]); + if (!scale) return; + for (int i = 0; i < 4; i++) mat[i] /= scale; +} + +#endif /* HTMLUTILS_H_ */ |