From e865f320891f6e1f97fcc4f612119311aa029a63 Mon Sep 17 00:00:00 2001 From: Albert Astals Cid Date: Mon, 6 Feb 2012 00:26:34 +0100 Subject: all done! --- ALL_DIFF | 890 --------------------------------------------------------------- 1 file changed, 890 deletions(-) diff --git a/ALL_DIFF b/ALL_DIFF index 0bff244..e69de29 100644 --- a/ALL_DIFF +++ b/ALL_DIFF @@ -1,890 +0,0 @@ -diff -ru xpdf-3.02/doc/pdftotext.1 xpdf-3.03/doc/pdftotext.1 ---- xpdf-3.02/doc/pdftotext.1 2007-02-27 23:05:51.000000000 +0100 -+++ xpdf-3.03/doc/pdftotext.1 2011-08-15 23:08:53.000000000 +0200 -@@ -49,6 +49,10 @@ - text. The default is to \'undo' physical layout (columns, - hyphenation, etc.) and output the text in reading order. - .TP -+.BI \-fixed " number" -+Assume fixed-pitch (or tabular) text, with the specified character -+width (in points). This forces physical layout mode. -+.TP - .B \-raw - Keep the text in content stream order. This is a hack which often - "undoes" column formatting, etc. Use of raw mode is no longer -diff -ru xpdf-3.02/xpdf/pdftotext.cc xpdf-3.03/xpdf/pdftotext.cc ---- xpdf-3.02/xpdf/pdftotext.cc 2007-02-27 23:05:52.000000000 +0100 -+++ xpdf-3.03/xpdf/pdftotext.cc 2011-08-15 23:08:53.000000000 +0200 -@@ -35,7 +35,8 @@ - - static int firstPage = 1; - static int lastPage = 0; - static GBool physLayout = gFalse; -+static double fixedPitch = 0; - static GBool rawOrder = gFalse; - static GBool htmlMeta = gFalse; - static char textEncName[128] = ""; -@@ -55,6 +58,8 @@ - "last page to convert"}, - {"-layout", argFlag, &physLayout, 0, - "maintain original physical layout"}, -+ {"-fixed", argFP, &fixedPitch, 0, -+ "assume fixed-pitch (or tabular) text"}, - {"-raw", argFlag, &rawOrder, 0, - "keep strings in content stream order"}, - {"-htmlmeta", argFlag, &htmlMeta, 0, -@@ -112,6 +117,9 @@ - goto err0; - } - fileName = new GString(argv[1]); -+ if (fixedPitch) { -+ physLayout = gTrue; -+ } - - // read config file - globalParams = new GlobalParams(cfgFileName); -@@ -232,7 +241,7 @@ - - // write text file - textOut = new TextOutputDev(textFileName->getCString(), -- physLayout, rawOrder, htmlMeta); -+ physLayout, fixedPitch, rawOrder, htmlMeta); - if (textOut->isOk()) { - doc->displayPages(textOut, firstPage, lastPage, 72, 72, 0, - gFalse, gTrue, gFalse); -diff -ru xpdf-3.02/xpdf/TextOutputDev.cc xpdf-3.03/xpdf/TextOutputDev.cc ---- xpdf-3.02/xpdf/TextOutputDev.cc 2007-02-27 23:05:52.000000000 +0100 -+++ xpdf-3.03/xpdf/TextOutputDev.cc 2011-08-15 23:08:53.000000000 +0200 -@@ -545,7 +618,7 @@ - - // insert the new word - if (cursor && wordBaseIdx == cursorBaseIdx && -- word->primaryCmp(cursor) > 0) { -+ word->primaryCmp(cursor) >= 0) { - w0 = cursor; - w1 = cursor->next; - } else { -@@ -928,7 +1001,7 @@ - xMax = blk->xMin + d1 * (blk->xMax - blk->xMin); - yMin = blk->yMin + d2 * (blk->yMax - blk->yMin); - yMax = blk->yMin + d3 * (blk->yMax - blk->yMin); -- base = blk->yMin + base * (blk->yMax - blk->yMin); -+ base = blk->yMin + d4 * (blk->yMax - blk->yMin); - break; - case 1: - xMin = blk->xMax - d3 * (blk->xMax - blk->xMin); -@@ -1150,15 +1223,15 @@ - } - } - --void TextBlock::coalesce(UnicodeMap *uMap) { -+void TextBlock::coalesce(UnicodeMap *uMap, double fixedPitch) { - TextWord *word0, *word1, *word2, *bestWord0, *bestWord1, *lastWord; - TextLine *line, *line0, *line1; - int poolMinBaseIdx, startBaseIdx, minBaseIdx, maxBaseIdx; - int baseIdx, bestWordBaseIdx, idx0, idx1; - double minBase, maxBase; -- double fontSize, delta, priDelta, secDelta; -+ double fontSize, wordSpacing, delta, priDelta, secDelta; - TextLine **lineArray; -- GBool found; -+ GBool found, overlap; - int col1, col2; - int i, j, k; - -@@ -1168,11 +1241,7 @@ - while (word0) { - priDelta = dupMaxPriDelta * word0->fontSize; - secDelta = dupMaxSecDelta * word0->fontSize; -- if (rot == 0 || rot == 3) { -- maxBaseIdx = pool->getBaseIdx(word0->base + secDelta); -- } else { -- maxBaseIdx = pool->getBaseIdx(word0->base - secDelta); -- } -+ maxBaseIdx = pool->getBaseIdx(word0->base + secDelta); - found = gFalse; - word1 = word2 = NULL; // make gcc happy - for (idx1 = idx0; idx1 <= maxBaseIdx; ++idx1) { -@@ -1269,6 +1338,7 @@ - maxBase = word0->base + maxIntraLineDelta * fontSize; - minBaseIdx = pool->getBaseIdx(minBase); - maxBaseIdx = pool->getBaseIdx(maxBase); -+ wordSpacing = fixedPitch ? fixedPitch : maxWordSpacing * fontSize; - - // find the rest of the words in this line - while (1) { -@@ -1277,25 +1347,32 @@ - // this line - bestWordBaseIdx = 0; - bestWord0 = bestWord1 = NULL; -- for (baseIdx = minBaseIdx; baseIdx <= maxBaseIdx; ++baseIdx) { -+ overlap = gFalse; -+ for (baseIdx = minBaseIdx; -+ !overlap && baseIdx <= maxBaseIdx; -+ ++baseIdx) { - for (word0 = NULL, word1 = pool->getPool(baseIdx); - word1; - word0 = word1, word1 = word1->next) { - if (word1->base >= minBase && -- word1->base <= maxBase && -- (delta = lastWord->primaryDelta(word1)) >= -- minCharSpacing * fontSize) { -- if (delta < maxWordSpacing * fontSize && -- (!bestWord1 || word1->primaryCmp(bestWord1) < 0)) { -- bestWordBaseIdx = baseIdx; -- bestWord0 = word0; -- bestWord1 = word1; -+ word1->base <= maxBase) { -+ delta = lastWord->primaryDelta(word1); -+ if (delta < minCharSpacing * fontSize) { -+ overlap = gTrue; -+ break; -+ } else { -+ if (delta < wordSpacing && -+ (!bestWord1 || word1->primaryCmp(bestWord1) < 0)) { -+ bestWordBaseIdx = baseIdx; -+ bestWord0 = word0; -+ bestWord1 = word1; -+ } -+ break; - } -- break; - } - } - } -- if (!bestWord1) { -+ if (overlap || !bestWord1) { - break; - } - -@@ -1342,52 +1419,79 @@ - - // column assignment - nColumns = 0; -- for (i = 0; i < nLines; ++i) { -- line0 = lineArray[i]; -- col1 = 0; -- for (j = 0; j < i; ++j) { -- line1 = lineArray[j]; -- if (line1->primaryDelta(line0) >= 0) { -- col2 = line1->col[line1->len] + 1; -- } else { -- k = 0; // make gcc happy -- switch (rot) { -- case 0: -- for (k = 0; -- k < line1->len && -- line0->xMin >= 0.5 * (line1->edge[k] + line1->edge[k+1]); -- ++k) ; -- break; -- case 1: -- for (k = 0; -- k < line1->len && -- line0->yMin >= 0.5 * (line1->edge[k] + line1->edge[k+1]); -- ++k) ; -- break; -- case 2: -- for (k = 0; -- k < line1->len && -- line0->xMax <= 0.5 * (line1->edge[k] + line1->edge[k+1]); -- ++k) ; -- break; -- case 3: -- for (k = 0; -- k < line1->len && -- line0->yMax <= 0.5 * (line1->edge[k] + line1->edge[k+1]); -- ++k) ; -- break; -- } -- col2 = line1->col[k]; -+ if (fixedPitch) { -+ for (i = 0; i < nLines; ++i) { -+ line0 = lineArray[i]; -+ col1 = 0; // make gcc happy -+ switch (rot) { -+ case 0: -+ col1 = (int)((line0->xMin - xMin) / fixedPitch + 0.5); -+ break; -+ case 1: -+ col1 = (int)((line0->yMin - yMin) / fixedPitch + 0.5); -+ break; -+ case 2: -+ col1 = (int)((xMax - line0->xMax) / fixedPitch + 0.5); -+ break; -+ case 3: -+ col1 = (int)((yMax - line0->yMax) / fixedPitch + 0.5); -+ break; - } -- if (col2 > col1) { -- col1 = col2; -+ for (k = 0; k <= line0->len; ++k) { -+ line0->col[k] += col1; -+ } -+ if (line0->col[line0->len] > nColumns) { -+ nColumns = line0->col[line0->len]; - } - } -- for (k = 0; k <= line0->len; ++k) { -- line0->col[k] += col1; -- } -- if (line0->col[line0->len] > nColumns) { -- nColumns = line0->col[line0->len]; -+ } else { -+ for (i = 0; i < nLines; ++i) { -+ line0 = lineArray[i]; -+ col1 = 0; -+ for (j = 0; j < i; ++j) { -+ line1 = lineArray[j]; -+ if (line1->primaryDelta(line0) >= 0) { -+ col2 = line1->col[line1->len] + 1; -+ } else { -+ k = 0; // make gcc happy -+ switch (rot) { -+ case 0: -+ for (k = 0; -+ k < line1->len && -+ line0->xMin >= 0.5 * (line1->edge[k] + line1->edge[k+1]); -+ ++k) ; -+ break; -+ case 1: -+ for (k = 0; -+ k < line1->len && -+ line0->yMin >= 0.5 * (line1->edge[k] + line1->edge[k+1]); -+ ++k) ; -+ break; -+ case 2: -+ for (k = 0; -+ k < line1->len && -+ line0->xMax <= 0.5 * (line1->edge[k] + line1->edge[k+1]); -+ ++k) ; -+ break; -+ case 3: -+ for (k = 0; -+ k < line1->len && -+ line0->yMax <= 0.5 * (line1->edge[k] + line1->edge[k+1]); -+ ++k) ; -+ break; -+ } -+ col2 = line1->col[k]; -+ } -+ if (col2 > col1) { -+ col1 = col2; -+ } -+ } -+ for (k = 0; k <= line0->len; ++k) { -+ line0->col[k] += col1; -+ } -+ if (line0->col[line0->len] > nColumns) { -+ nColumns = line0->col[line0->len]; -+ } - } - } - gfree(lineArray); -@@ -1744,6 +1848,9 @@ - nest = 0; - nTinyChars = 0; - lastCharOverlap = gFalse; -+ actualText = NULL; -+ actualTextLen = 0; -+ actualTextNBytes = 0; - if (!rawOrder) { - for (rot = 0; rot < 4; ++rot) { - pools[rot] = new TextPool(); -@@ -1799,6 +1906,7 @@ - delete curWord; - curWord = NULL; - } -+ gfree(actualText); - if (rawOrder) { - while (rawWords) { - word = rawWords; -@@ -1817,6 +1925,8 @@ - gfree(blocks); - } - deleteGList(fonts, TextFontInfo); -+ deleteGList(underlines, TextUnderline); -+ deleteGList(links, TextLink); - - curWord = NULL; - charPos = 0; -@@ -1824,6 +1934,9 @@ - curFontSize = 0; - nest = 0; - nTinyChars = 0; -+ actualText = NULL; -+ actualTextLen = 0; -+ actualTextNBytes = 0; - if (!rawOrder) { - for (rot = 0; rot < 4; ++rot) { - pools[rot] = new TextPool(); -@@ -1834,6 +1947,8 @@ - rawWords = NULL; - rawLastWord = NULL; - fonts = new GList(); -+ underlines = new GList(); -+ links = new GList(); - } - - void TextPage::updateFont(GfxState *state) { -@@ -1993,7 +2124,7 @@ - // (2) this character overlaps the previous one (duplicated text), or - // (3) the previous character was an overlap (we want each duplicated - // character to be in a word by itself at this stage), -- // (4) the font size has changed -+ // (4) the font or font size has changed - if (curWord && curWord->len > 0) { - base = sp = delta = 0; // make gcc happy - switch (curWord->rot) { -@@ -2024,6 +2155,7 @@ - sp < -minDupBreakOverlap * curWord->fontSize || - sp > minWordBreakSpace * curWord->fontSize || - fabs(base - curWord->base) > 0.5 || -+ curFont != curWord->font || - curFontSize != curWord->fontSize) { - endWord(); - } -@@ -2109,7 +2272,7 @@ - links->append(new TextLink(xMin, yMin, xMax, yMax, link)); - } - --void TextPage::coalesce(GBool physLayout, GBool doHTML) { -+void TextPage::coalesce(GBool physLayout, double fixedPitch, GBool doHTML) { - UnicodeMap *uMap; - TextPool *pool; - TextWord *word0, *word1, *word2; -@@ -2139,7 +2302,7 @@ - blkList = NULL; - lastBlk = NULL; - nBlocks = 0; -- primaryRot = -1; -+ primaryRot = 0; - - #if 0 // for debugging - printf("*** initial words ***\n"); -@@ -2603,7 +2766,7 @@ - //~ addition to primary rotation - - // coalesce the block, and add it to the list -- blk->coalesce(uMap); -+ blk->coalesce(uMap, fixedPitch); - if (lastBlk) { - lastBlk->next = blk; - } else { -@@ -2611,11 +2774,12 @@ - } - lastBlk = blk; - count[rot] += blk->charCount; -- if (primaryRot < 0 || count[rot] > count[primaryRot]) { -- primaryRot = rot; -- } - ++nBlocks; - } -+ -+ if (count[rot] > count[primaryRot]) { -+ primaryRot = rot; -+ } - } - - #if 0 // for debugging -@@ -2674,76 +2838,108 @@ - - //----- column assignment - -- // sort blocks into xy order for column assignment -- blocks = (TextBlock **)gmallocn(nBlocks, sizeof(TextBlock *)); -- for (blk = blkList, i = 0; blk; blk = blk->next, ++i) { -- blocks[i] = blk; -- } -- qsort(blocks, nBlocks, sizeof(TextBlock *), &TextBlock::cmpXYPrimaryRot); -+ if (physLayout && fixedPitch) { - -- // column assignment -- for (i = 0; i < nBlocks; ++i) { -- blk0 = blocks[i]; -- col1 = 0; -- for (j = 0; j < i; ++j) { -- blk1 = blocks[j]; -- col2 = 0; // make gcc happy -+ blocks = (TextBlock **)gmallocn(nBlocks, sizeof(TextBlock *)); -+ for (blk = blkList, i = 0; blk; blk = blk->next, ++i) { -+ blocks[i] = blk; -+ col1 = 0; // make gcc happy - switch (primaryRot) { - case 0: -- if (blk0->xMin > blk1->xMax) { -- col2 = blk1->col + blk1->nColumns + 3; -- } else if (blk1->xMax == blk1->xMin) { -- col2 = blk1->col; -- } else { -- col2 = blk1->col + (int)(((blk0->xMin - blk1->xMin) / -- (blk1->xMax - blk1->xMin)) * -- blk1->nColumns); -- } -+ col1 = (int)(blk->xMin / fixedPitch + 0.5); - break; - case 1: -- if (blk0->yMin > blk1->yMax) { -- col2 = blk1->col + blk1->nColumns + 3; -- } else if (blk1->yMax == blk1->yMin) { -- col2 = blk1->col; -- } else { -- col2 = blk1->col + (int)(((blk0->yMin - blk1->yMin) / -- (blk1->yMax - blk1->yMin)) * -- blk1->nColumns); -- } -+ col1 = (int)(blk->yMin / fixedPitch + 0.5); - break; - case 2: -- if (blk0->xMax < blk1->xMin) { -- col2 = blk1->col + blk1->nColumns + 3; -- } else if (blk1->xMin == blk1->xMax) { -- col2 = blk1->col; -- } else { -- col2 = blk1->col + (int)(((blk0->xMax - blk1->xMax) / -- (blk1->xMin - blk1->xMax)) * -- blk1->nColumns); -- } -+ col1 = (int)((pageWidth - blk->xMax) / fixedPitch + 0.5); - break; - case 3: -- if (blk0->yMax < blk1->yMin) { -- col2 = blk1->col + blk1->nColumns + 3; -- } else if (blk1->yMin == blk1->yMax) { -- col2 = blk1->col; -- } else { -- col2 = blk1->col + (int)(((blk0->yMax - blk1->yMax) / -- (blk1->yMin - blk1->yMax)) * -- blk1->nColumns); -- } -+ col1 = (int)((pageHeight - blk->yMax) / fixedPitch + 0.5); - break; - } -- if (col2 > col1) { -- col1 = col2; -+ blk->col = col1; -+ for (line = blk->lines; line; line = line->next) { -+ for (j = 0; j <= line->len; ++j) { -+ line->col[j] += col1; -+ } - } - } -- blk0->col = col1; -- for (line = blk0->lines; line; line = line->next) { -- for (j = 0; j <= line->len; ++j) { -- line->col[j] += col1; -+ -+ } else { -+ -+ // sort blocks into xy order for column assignment -+ blocks = (TextBlock **)gmallocn(nBlocks, sizeof(TextBlock *)); -+ for (blk = blkList, i = 0; blk; blk = blk->next, ++i) { -+ blocks[i] = blk; -+ } -+ qsort(blocks, nBlocks, sizeof(TextBlock *), &TextBlock::cmpXYPrimaryRot); -+ -+ // column assignment -+ for (i = 0; i < nBlocks; ++i) { -+ blk0 = blocks[i]; -+ col1 = 0; -+ for (j = 0; j < i; ++j) { -+ blk1 = blocks[j]; -+ col2 = 0; // make gcc happy -+ switch (primaryRot) { -+ case 0: -+ if (blk0->xMin > blk1->xMax) { -+ col2 = blk1->col + blk1->nColumns + 3; -+ } else if (blk1->xMax == blk1->xMin) { -+ col2 = blk1->col; -+ } else { -+ col2 = blk1->col + (int)(((blk0->xMin - blk1->xMin) / -+ (blk1->xMax - blk1->xMin)) * -+ blk1->nColumns); -+ } -+ break; -+ case 1: -+ if (blk0->yMin > blk1->yMax) { -+ col2 = blk1->col + blk1->nColumns + 3; -+ } else if (blk1->yMax == blk1->yMin) { -+ col2 = blk1->col; -+ } else { -+ col2 = blk1->col + (int)(((blk0->yMin - blk1->yMin) / -+ (blk1->yMax - blk1->yMin)) * -+ blk1->nColumns); -+ } -+ break; -+ case 2: -+ if (blk0->xMax < blk1->xMin) { -+ col2 = blk1->col + blk1->nColumns + 3; -+ } else if (blk1->xMin == blk1->xMax) { -+ col2 = blk1->col; -+ } else { -+ col2 = blk1->col + (int)(((blk0->xMax - blk1->xMax) / -+ (blk1->xMin - blk1->xMax)) * -+ blk1->nColumns); -+ } -+ break; -+ case 3: -+ if (blk0->yMax < blk1->yMin) { -+ col2 = blk1->col + blk1->nColumns + 3; -+ } else if (blk1->yMin == blk1->yMax) { -+ col2 = blk1->col; -+ } else { -+ col2 = blk1->col + (int)(((blk0->yMax - blk1->yMax) / -+ (blk1->yMin - blk1->yMax)) * -+ blk1->nColumns); -+ } -+ break; -+ } -+ if (col2 > col1) { -+ col1 = col2; -+ } -+ } -+ blk0->col = col1; -+ for (line = blk0->lines; line; line = line->next) { -+ for (j = 0; j <= line->len; ++j) { -+ line->col[j] += col1; -+ } - } - } -+ - } - - #if 0 // for debugging -@@ -2753,7 +2949,7 @@ - blk->rot, blk->xMin, blk->xMax, blk->yMin, blk->yMax, blk->col, - blk->nColumns); - for (line = blk->lines; line; line = line->next) { -- printf(" line:\n"); -+ printf(" line: col[0]=%d\n", line->col[0]); - for (word0 = line->words; word0; word0 = word0->next) { - printf(" word: x=%.2f..%.2f y=%.2f..%.2f base=%.2f fontSize=%.2f space=%d: '", - word0->xMin, word0->xMax, word0->yMin, word0->yMax, -@@ -2932,6 +3128,7 @@ - GBool startAtTop, GBool stopAtBottom, - GBool startAtLast, GBool stopAtLast, - GBool caseSensitive, GBool backward, -+ GBool wholeWord, - double *xMin, double *yMin, - double *xMax, double *yMax) { - TextBlock *blk; -@@ -2989,25 +3186,35 @@ - blk = blocks[i]; - - // check: is the block above the top limit? -- if (!startAtTop && (backward ? blk->yMin > yStart : blk->yMax < yStart)) { -+ // (this only works if the page's primary rotation is zero -- -+ // otherwise the blocks won't be sorted in the useful order) -+ if (!startAtTop && primaryRot == 0 && -+ (backward ? blk->yMin > yStart : blk->yMax < yStart)) { - continue; - } - - // check: is the block below the bottom limit? -- if (!stopAtBottom && (backward ? blk->yMax < yStop : blk->yMin > yStop)) { -+ // (this only works if the page's primary rotation is zero -- -+ // otherwise the blocks won't be sorted in the useful order) -+ if (!stopAtBottom && primaryRot == 0 && -+ (backward ? blk->yMax < yStop : blk->yMin > yStop)) { - break; - } - - for (line = blk->lines; line; line = line->next) { - - // check: is the line above the top limit? -- if (!startAtTop && -+ // (this only works if the page's primary rotation is zero -- -+ // otherwise the lines won't be sorted in the useful order) -+ if (!startAtTop && primaryRot == 0 && - (backward ? line->yMin > yStart : line->yMin < yStart)) { - continue; - } - - // check: is the line below the bottom limit? -- if (!stopAtBottom && -+ // (this only works if the page's primary rotation is zero -- -+ // otherwise the lines won't be sorted in the useful order) -+ if (!stopAtBottom && primaryRot == 0 && - (backward ? line->yMin < yStop : line->yMin > yStop)) { - continue; - } -@@ -3030,68 +3237,72 @@ - j = backward ? m - len : 0; - p = txt + j; - while (backward ? j >= 0 : j <= m - len) { -- -- // compare the strings -- for (k = 0; k < len; ++k) { -- if (p[k] != s2[k]) { -- break; -+ if (!wholeWord || -+ ((j == 0 || !unicodeTypeAlphaNum(txt[j - 1])) && -+ (j + len == m || !unicodeTypeAlphaNum(txt[j + len])))) { -+ -+ // compare the strings -+ for (k = 0; k < len; ++k) { -+ if (p[k] != s2[k]) { -+ break; -+ } - } -- } - -- // found it -- if (k == len) { -- switch (line->rot) { -- case 0: -- xMin1 = line->edge[j]; -- xMax1 = line->edge[j + len]; -- yMin1 = line->yMin; -- yMax1 = line->yMax; -- break; -- case 1: -- xMin1 = line->xMin; -- xMax1 = line->xMax; -- yMin1 = line->edge[j]; -- yMax1 = line->edge[j + len]; -- break; -- case 2: -- xMin1 = line->edge[j + len]; -- xMax1 = line->edge[j]; -- yMin1 = line->yMin; -- yMax1 = line->yMax; -- break; -- case 3: -- xMin1 = line->xMin; -- xMax1 = line->xMax; -- yMin1 = line->edge[j + len]; -- yMax1 = line->edge[j]; -- break; -- } -- if (backward) { -- if ((startAtTop || -- yMin1 < yStart || (yMin1 == yStart && xMin1 < xStart)) && -- (stopAtBottom || -- yMin1 > yStop || (yMin1 == yStop && xMin1 > xStop))) { -- if (!found || -- yMin1 > yMin0 || (yMin1 == yMin0 && xMin1 > xMin0)) { -- xMin0 = xMin1; -- xMax0 = xMax1; -- yMin0 = yMin1; -- yMax0 = yMax1; -- found = gTrue; -- } -+ // found it -+ if (k == len) { -+ switch (line->rot) { -+ case 0: -+ xMin1 = line->edge[j]; -+ xMax1 = line->edge[j + len]; -+ yMin1 = line->yMin; -+ yMax1 = line->yMax; -+ break; -+ case 1: -+ xMin1 = line->xMin; -+ xMax1 = line->xMax; -+ yMin1 = line->edge[j]; -+ yMax1 = line->edge[j + len]; -+ break; -+ case 2: -+ xMin1 = line->edge[j + len]; -+ xMax1 = line->edge[j]; -+ yMin1 = line->yMin; -+ yMax1 = line->yMax; -+ break; -+ case 3: -+ xMin1 = line->xMin; -+ xMax1 = line->xMax; -+ yMin1 = line->edge[j + len]; -+ yMax1 = line->edge[j]; -+ break; - } -- } else { -- if ((startAtTop || -- yMin1 > yStart || (yMin1 == yStart && xMin1 > xStart)) && -- (stopAtBottom || -- yMin1 < yStop || (yMin1 == yStop && xMin1 < xStop))) { -- if (!found || -- yMin1 < yMin0 || (yMin1 == yMin0 && xMin1 < xMin0)) { -- xMin0 = xMin1; -- xMax0 = xMax1; -- yMin0 = yMin1; -- yMax0 = yMax1; -- found = gTrue; -+ if (backward) { -+ if ((startAtTop || -+ yMin1 < yStart || (yMin1 == yStart && xMin1 < xStart)) && -+ (stopAtBottom || -+ yMin1 > yStop || (yMin1 == yStop && xMin1 > xStop))) { -+ if (!found || -+ yMin1 > yMin0 || (yMin1 == yMin0 && xMin1 > xMin0)) { -+ xMin0 = xMin1; -+ xMax0 = xMax1; -+ yMin0 = yMin1; -+ yMax0 = yMax1; -+ found = gTrue; -+ } -+ } -+ } else { -+ if ((startAtTop || -+ yMin1 > yStart || (yMin1 == yStart && xMin1 > xStart)) && -+ (stopAtBottom || -+ yMin1 < yStop || (yMin1 == yStop && xMin1 < xStop))) { -+ if (!found || -+ yMin1 < yMin0 || (yMin1 == yMin0 && xMin1 < xMin0)) { -+ xMin0 = xMin1; -+ xMax0 = xMax1; -+ yMin0 = yMin1; -+ yMax0 = yMax1; -+ found = gTrue; -+ } - } - } - } -@@ -3820,10 +4038,20 @@ - fwrite(text, 1, len, (FILE *)stream); - } - - TextOutputDev::TextOutputDev(char *fileName, GBool physLayoutA, -- GBool rawOrderA, GBool append) { -+ double fixedPitchA, GBool rawOrderA, -+ GBool append) { - text = NULL; - physLayout = physLayoutA; -+ fixedPitch = physLayout ? fixedPitchA : 0; - rawOrder = rawOrderA; - doHTML = gFalse; - ok = gTrue; -@@ -3854,11 +4074,13 @@ - } - - TextOutputDev::TextOutputDev(TextOutputFunc func, void *stream, -- GBool physLayoutA, GBool rawOrderA) { -+ GBool physLayoutA, double fixedPitchA, -+ GBool rawOrderA) { - outputFunc = func; - outputStream = stream; - needClose = gFalse; - physLayout = physLayoutA; -+ fixedPitch = physLayout ? fixedPitchA : 0; - rawOrder = rawOrderA; - doHTML = gFalse; - text = new TextPage(rawOrderA); -@@ -3883,12 +4105,16 @@ - - void TextOutputDev::endPage() { - text->endPage(); -- text->coalesce(physLayout, doHTML); -+ text->coalesce(physLayout, fixedPitch, doHTML); - if (outputStream) { - text->dump(outputStream, outputFunc, physLayout); - } - } - -+void TextOutputDev::restoreState(GfxState *state) { -+ text->updateFont(state); -+} -+ - void TextOutputDev::updateFont(GfxState *state) { - text->updateFont(state); - } -@@ -3903,7 +4129,19 @@ - double dx, double dy, - double originX, double originY, - CharCode c, int nBytes, Unicode *u, int uLen) { -- text->addChar(state, x, y, dx, dy, c, nBytes, u, uLen); -+ text->addChar(state, x - originX, y - originY, dx, dy, c, nBytes, u, uLen); -+} -+ -@@ -4057,10 +4295,12 @@ - GBool startAtTop, GBool stopAtBottom, - GBool startAtLast, GBool stopAtLast, - GBool caseSensitive, GBool backward, -+ GBool wholeWord, - double *xMin, double *yMin, - double *xMax, double *yMax) { - return text->findText(s, len, startAtTop, stopAtBottom, -- startAtLast, stopAtLast, caseSensitive, backward, -+ startAtLast, stopAtLast, -+ caseSensitive, backward, wholeWord, - xMin, yMin, xMax, yMax); - } - -diff -ru xpdf-3.02/xpdf/TextOutputDev.h xpdf-3.03/xpdf/TextOutputDev.h ---- xpdf-3.02/xpdf/TextOutputDev.h 2007-02-27 23:05:52.000000000 +0100 -+++ xpdf-3.03/xpdf/TextOutputDev.h 2011-08-15 23:08:53.000000000 +0200 -@@ -280,7 +281,7 @@ - - void addWord(TextWord *word); - -- void coalesce(UnicodeMap *uMap); -+ void coalesce(UnicodeMap *uMap, double fixedPitch); - - // Update this block's priMin and priMax values, looking at . - void updatePriMinMax(TextBlock *blk); -@@ -442,7 +452,7 @@ - void addLink(int xMin, int yMin, int xMax, int yMax, Link *link); - - // Coalesce strings that look like parts of the same line. -- void coalesce(GBool physLayout, GBool doHTML); -+ void coalesce(GBool physLayout, double fixedPitch, GBool doHTML); - - // Find a string. If is true, starts looking at the - // top of the page; else if is true, starts looking -@@ -455,6 +465,7 @@ - GBool startAtTop, GBool stopAtBottom, - GBool startAtLast, GBool stopAtLast, - GBool caseSensitive, GBool backward, -+ GBool wholeWord, - double *xMin, double *yMin, - double *xMax, double *yMax); - -@@ -502,6 +513,13 @@ - int nTinyChars; // number of "tiny" chars seen so far - GBool lastCharOverlap; // set if the last added char overlapped the - // previous char -@@ -544,14 +562,16 @@ - // is maintained. If is true, the text is kept in - // content stream order. - TextOutputDev(char *fileName, GBool physLayoutA, -- GBool rawOrderA, GBool append); -+ double fixedPitchA, GBool rawOrderA, -+ GBool append); - - // Create a TextOutputDev which will write to a generic stream. If - // is true, the original physical layout of the text - // is maintained. If is true, the text is kept in - // content stream order. - TextOutputDev(TextOutputFunc func, void *stream, -- GBool physLayoutA, GBool rawOrderA); -+ GBool physLayoutA, double fixedPitchA, -+ GBool rawOrderA); - - // Destructor. - virtual ~TextOutputDev(); -@@ -583,6 +607,9 @@ - // End a page. - virtual void endPage(); - -+ //----- save/restore graphics state -+ virtual void restoreState(GfxState *state); -+ - //----- update text state - virtual void updateFont(GfxState *state); - -@@ -615,6 +645,7 @@ - GBool startAtTop, GBool stopAtBottom, - GBool startAtLast, GBool stopAtLast, - GBool caseSensitive, GBool backward, -+ GBool wholeWord, - double *xMin, double *yMin, - double *xMax, double *yMax); - -@@ -653,6 +684,9 @@ - TextPage *text; // text for the current page - GBool physLayout; // maintain original physical layout when - // dumping text -+ double fixedPitch; // if physLayout is true and this is non-zero, -+ // assume fixed-pitch characters with this -+ // width - GBool rawOrder; // keep text in content stream order - GBool doHTML; // extra processing for HTML conversion - GBool ok; // set up ok? \ No newline at end of file -- cgit v1.2.3