summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorAlbert Astals Cid <aacid@kde.org>2012-02-06 00:26:34 +0100
committerAlbert Astals Cid <aacid@kde.org>2012-02-06 00:26:34 +0100
commite865f320891f6e1f97fcc4f612119311aa029a63 (patch)
treed0b717785163dad529995ded228ad2ad6ef64d19
parent9d997e664b8d903a81e5b9c178a014d681faf475 (diff)
all done!
-rw-r--r--ALL_DIFF890
1 files changed, 0 insertions, 890 deletions
diff --git a/ALL_DIFF b/ALL_DIFF
index 0bff244..e69de29 100644
--- a/ALL_DIFF
+++ b/ALL_DIFF
@@ -1,890 +0,0 @@
-diff -ru xpdf-3.02/doc/pdftotext.1 xpdf-3.03/doc/pdftotext.1
---- xpdf-3.02/doc/pdftotext.1 2007-02-27 23:05:51.000000000 +0100
-+++ xpdf-3.03/doc/pdftotext.1 2011-08-15 23:08:53.000000000 +0200
-@@ -49,6 +49,10 @@
- text. The default is to \'undo' physical layout (columns,
- hyphenation, etc.) and output the text in reading order.
- .TP
-+.BI \-fixed " number"
-+Assume fixed-pitch (or tabular) text, with the specified character
-+width (in points). This forces physical layout mode.
-+.TP
- .B \-raw
- Keep the text in content stream order. This is a hack which often
- "undoes" column formatting, etc. Use of raw mode is no longer
-diff -ru xpdf-3.02/xpdf/pdftotext.cc xpdf-3.03/xpdf/pdftotext.cc
---- xpdf-3.02/xpdf/pdftotext.cc 2007-02-27 23:05:52.000000000 +0100
-+++ xpdf-3.03/xpdf/pdftotext.cc 2011-08-15 23:08:53.000000000 +0200
-@@ -35,7 +35,8 @@
-
- static int firstPage = 1;
- static int lastPage = 0;
- static GBool physLayout = gFalse;
-+static double fixedPitch = 0;
- static GBool rawOrder = gFalse;
- static GBool htmlMeta = gFalse;
- static char textEncName[128] = "";
-@@ -55,6 +58,8 @@
- "last page to convert"},
- {"-layout", argFlag, &physLayout, 0,
- "maintain original physical layout"},
-+ {"-fixed", argFP, &fixedPitch, 0,
-+ "assume fixed-pitch (or tabular) text"},
- {"-raw", argFlag, &rawOrder, 0,
- "keep strings in content stream order"},
- {"-htmlmeta", argFlag, &htmlMeta, 0,
-@@ -112,6 +117,9 @@
- goto err0;
- }
- fileName = new GString(argv[1]);
-+ if (fixedPitch) {
-+ physLayout = gTrue;
-+ }
-
- // read config file
- globalParams = new GlobalParams(cfgFileName);
-@@ -232,7 +241,7 @@
-
- // write text file
- textOut = new TextOutputDev(textFileName->getCString(),
-- physLayout, rawOrder, htmlMeta);
-+ physLayout, fixedPitch, rawOrder, htmlMeta);
- if (textOut->isOk()) {
- doc->displayPages(textOut, firstPage, lastPage, 72, 72, 0,
- gFalse, gTrue, gFalse);
-diff -ru xpdf-3.02/xpdf/TextOutputDev.cc xpdf-3.03/xpdf/TextOutputDev.cc
---- xpdf-3.02/xpdf/TextOutputDev.cc 2007-02-27 23:05:52.000000000 +0100
-+++ xpdf-3.03/xpdf/TextOutputDev.cc 2011-08-15 23:08:53.000000000 +0200
-@@ -545,7 +618,7 @@
-
- // insert the new word
- if (cursor && wordBaseIdx == cursorBaseIdx &&
-- word->primaryCmp(cursor) > 0) {
-+ word->primaryCmp(cursor) >= 0) {
- w0 = cursor;
- w1 = cursor->next;
- } else {
-@@ -928,7 +1001,7 @@
- xMax = blk->xMin + d1 * (blk->xMax - blk->xMin);
- yMin = blk->yMin + d2 * (blk->yMax - blk->yMin);
- yMax = blk->yMin + d3 * (blk->yMax - blk->yMin);
-- base = blk->yMin + base * (blk->yMax - blk->yMin);
-+ base = blk->yMin + d4 * (blk->yMax - blk->yMin);
- break;
- case 1:
- xMin = blk->xMax - d3 * (blk->xMax - blk->xMin);
-@@ -1150,15 +1223,15 @@
- }
- }
-
--void TextBlock::coalesce(UnicodeMap *uMap) {
-+void TextBlock::coalesce(UnicodeMap *uMap, double fixedPitch) {
- TextWord *word0, *word1, *word2, *bestWord0, *bestWord1, *lastWord;
- TextLine *line, *line0, *line1;
- int poolMinBaseIdx, startBaseIdx, minBaseIdx, maxBaseIdx;
- int baseIdx, bestWordBaseIdx, idx0, idx1;
- double minBase, maxBase;
-- double fontSize, delta, priDelta, secDelta;
-+ double fontSize, wordSpacing, delta, priDelta, secDelta;
- TextLine **lineArray;
-- GBool found;
-+ GBool found, overlap;
- int col1, col2;
- int i, j, k;
-
-@@ -1168,11 +1241,7 @@
- while (word0) {
- priDelta = dupMaxPriDelta * word0->fontSize;
- secDelta = dupMaxSecDelta * word0->fontSize;
-- if (rot == 0 || rot == 3) {
-- maxBaseIdx = pool->getBaseIdx(word0->base + secDelta);
-- } else {
-- maxBaseIdx = pool->getBaseIdx(word0->base - secDelta);
-- }
-+ maxBaseIdx = pool->getBaseIdx(word0->base + secDelta);
- found = gFalse;
- word1 = word2 = NULL; // make gcc happy
- for (idx1 = idx0; idx1 <= maxBaseIdx; ++idx1) {
-@@ -1269,6 +1338,7 @@
- maxBase = word0->base + maxIntraLineDelta * fontSize;
- minBaseIdx = pool->getBaseIdx(minBase);
- maxBaseIdx = pool->getBaseIdx(maxBase);
-+ wordSpacing = fixedPitch ? fixedPitch : maxWordSpacing * fontSize;
-
- // find the rest of the words in this line
- while (1) {
-@@ -1277,25 +1347,32 @@
- // this line
- bestWordBaseIdx = 0;
- bestWord0 = bestWord1 = NULL;
-- for (baseIdx = minBaseIdx; baseIdx <= maxBaseIdx; ++baseIdx) {
-+ overlap = gFalse;
-+ for (baseIdx = minBaseIdx;
-+ !overlap && baseIdx <= maxBaseIdx;
-+ ++baseIdx) {
- for (word0 = NULL, word1 = pool->getPool(baseIdx);
- word1;
- word0 = word1, word1 = word1->next) {
- if (word1->base >= minBase &&
-- word1->base <= maxBase &&
-- (delta = lastWord->primaryDelta(word1)) >=
-- minCharSpacing * fontSize) {
-- if (delta < maxWordSpacing * fontSize &&
-- (!bestWord1 || word1->primaryCmp(bestWord1) < 0)) {
-- bestWordBaseIdx = baseIdx;
-- bestWord0 = word0;
-- bestWord1 = word1;
-+ word1->base <= maxBase) {
-+ delta = lastWord->primaryDelta(word1);
-+ if (delta < minCharSpacing * fontSize) {
-+ overlap = gTrue;
-+ break;
-+ } else {
-+ if (delta < wordSpacing &&
-+ (!bestWord1 || word1->primaryCmp(bestWord1) < 0)) {
-+ bestWordBaseIdx = baseIdx;
-+ bestWord0 = word0;
-+ bestWord1 = word1;
-+ }
-+ break;
- }
-- break;
- }
- }
- }
-- if (!bestWord1) {
-+ if (overlap || !bestWord1) {
- break;
- }
-
-@@ -1342,52 +1419,79 @@
-
- // column assignment
- nColumns = 0;
-- for (i = 0; i < nLines; ++i) {
-- line0 = lineArray[i];
-- col1 = 0;
-- for (j = 0; j < i; ++j) {
-- line1 = lineArray[j];
-- if (line1->primaryDelta(line0) >= 0) {
-- col2 = line1->col[line1->len] + 1;
-- } else {
-- k = 0; // make gcc happy
-- switch (rot) {
-- case 0:
-- for (k = 0;
-- k < line1->len &&
-- line0->xMin >= 0.5 * (line1->edge[k] + line1->edge[k+1]);
-- ++k) ;
-- break;
-- case 1:
-- for (k = 0;
-- k < line1->len &&
-- line0->yMin >= 0.5 * (line1->edge[k] + line1->edge[k+1]);
-- ++k) ;
-- break;
-- case 2:
-- for (k = 0;
-- k < line1->len &&
-- line0->xMax <= 0.5 * (line1->edge[k] + line1->edge[k+1]);
-- ++k) ;
-- break;
-- case 3:
-- for (k = 0;
-- k < line1->len &&
-- line0->yMax <= 0.5 * (line1->edge[k] + line1->edge[k+1]);
-- ++k) ;
-- break;
-- }
-- col2 = line1->col[k];
-+ if (fixedPitch) {
-+ for (i = 0; i < nLines; ++i) {
-+ line0 = lineArray[i];
-+ col1 = 0; // make gcc happy
-+ switch (rot) {
-+ case 0:
-+ col1 = (int)((line0->xMin - xMin) / fixedPitch + 0.5);
-+ break;
-+ case 1:
-+ col1 = (int)((line0->yMin - yMin) / fixedPitch + 0.5);
-+ break;
-+ case 2:
-+ col1 = (int)((xMax - line0->xMax) / fixedPitch + 0.5);
-+ break;
-+ case 3:
-+ col1 = (int)((yMax - line0->yMax) / fixedPitch + 0.5);
-+ break;
- }
-- if (col2 > col1) {
-- col1 = col2;
-+ for (k = 0; k <= line0->len; ++k) {
-+ line0->col[k] += col1;
-+ }
-+ if (line0->col[line0->len] > nColumns) {
-+ nColumns = line0->col[line0->len];
- }
- }
-- for (k = 0; k <= line0->len; ++k) {
-- line0->col[k] += col1;
-- }
-- if (line0->col[line0->len] > nColumns) {
-- nColumns = line0->col[line0->len];
-+ } else {
-+ for (i = 0; i < nLines; ++i) {
-+ line0 = lineArray[i];
-+ col1 = 0;
-+ for (j = 0; j < i; ++j) {
-+ line1 = lineArray[j];
-+ if (line1->primaryDelta(line0) >= 0) {
-+ col2 = line1->col[line1->len] + 1;
-+ } else {
-+ k = 0; // make gcc happy
-+ switch (rot) {
-+ case 0:
-+ for (k = 0;
-+ k < line1->len &&
-+ line0->xMin >= 0.5 * (line1->edge[k] + line1->edge[k+1]);
-+ ++k) ;
-+ break;
-+ case 1:
-+ for (k = 0;
-+ k < line1->len &&
-+ line0->yMin >= 0.5 * (line1->edge[k] + line1->edge[k+1]);
-+ ++k) ;
-+ break;
-+ case 2:
-+ for (k = 0;
-+ k < line1->len &&
-+ line0->xMax <= 0.5 * (line1->edge[k] + line1->edge[k+1]);
-+ ++k) ;
-+ break;
-+ case 3:
-+ for (k = 0;
-+ k < line1->len &&
-+ line0->yMax <= 0.5 * (line1->edge[k] + line1->edge[k+1]);
-+ ++k) ;
-+ break;
-+ }
-+ col2 = line1->col[k];
-+ }
-+ if (col2 > col1) {
-+ col1 = col2;
-+ }
-+ }
-+ for (k = 0; k <= line0->len; ++k) {
-+ line0->col[k] += col1;
-+ }
-+ if (line0->col[line0->len] > nColumns) {
-+ nColumns = line0->col[line0->len];
-+ }
- }
- }
- gfree(lineArray);
-@@ -1744,6 +1848,9 @@
- nest = 0;
- nTinyChars = 0;
- lastCharOverlap = gFalse;
-+ actualText = NULL;
-+ actualTextLen = 0;
-+ actualTextNBytes = 0;
- if (!rawOrder) {
- for (rot = 0; rot < 4; ++rot) {
- pools[rot] = new TextPool();
-@@ -1799,6 +1906,7 @@
- delete curWord;
- curWord = NULL;
- }
-+ gfree(actualText);
- if (rawOrder) {
- while (rawWords) {
- word = rawWords;
-@@ -1817,6 +1925,8 @@
- gfree(blocks);
- }
- deleteGList(fonts, TextFontInfo);
-+ deleteGList(underlines, TextUnderline);
-+ deleteGList(links, TextLink);
-
- curWord = NULL;
- charPos = 0;
-@@ -1824,6 +1934,9 @@
- curFontSize = 0;
- nest = 0;
- nTinyChars = 0;
-+ actualText = NULL;
-+ actualTextLen = 0;
-+ actualTextNBytes = 0;
- if (!rawOrder) {
- for (rot = 0; rot < 4; ++rot) {
- pools[rot] = new TextPool();
-@@ -1834,6 +1947,8 @@
- rawWords = NULL;
- rawLastWord = NULL;
- fonts = new GList();
-+ underlines = new GList();
-+ links = new GList();
- }
-
- void TextPage::updateFont(GfxState *state) {
-@@ -1993,7 +2124,7 @@
- // (2) this character overlaps the previous one (duplicated text), or
- // (3) the previous character was an overlap (we want each duplicated
- // character to be in a word by itself at this stage),
-- // (4) the font size has changed
-+ // (4) the font or font size has changed
- if (curWord && curWord->len > 0) {
- base = sp = delta = 0; // make gcc happy
- switch (curWord->rot) {
-@@ -2024,6 +2155,7 @@
- sp < -minDupBreakOverlap * curWord->fontSize ||
- sp > minWordBreakSpace * curWord->fontSize ||
- fabs(base - curWord->base) > 0.5 ||
-+ curFont != curWord->font ||
- curFontSize != curWord->fontSize) {
- endWord();
- }
-@@ -2109,7 +2272,7 @@
- links->append(new TextLink(xMin, yMin, xMax, yMax, link));
- }
-
--void TextPage::coalesce(GBool physLayout, GBool doHTML) {
-+void TextPage::coalesce(GBool physLayout, double fixedPitch, GBool doHTML) {
- UnicodeMap *uMap;
- TextPool *pool;
- TextWord *word0, *word1, *word2;
-@@ -2139,7 +2302,7 @@
- blkList = NULL;
- lastBlk = NULL;
- nBlocks = 0;
-- primaryRot = -1;
-+ primaryRot = 0;
-
- #if 0 // for debugging
- printf("*** initial words ***\n");
-@@ -2603,7 +2766,7 @@
- //~ addition to primary rotation
-
- // coalesce the block, and add it to the list
-- blk->coalesce(uMap);
-+ blk->coalesce(uMap, fixedPitch);
- if (lastBlk) {
- lastBlk->next = blk;
- } else {
-@@ -2611,11 +2774,12 @@
- }
- lastBlk = blk;
- count[rot] += blk->charCount;
-- if (primaryRot < 0 || count[rot] > count[primaryRot]) {
-- primaryRot = rot;
-- }
- ++nBlocks;
- }
-+
-+ if (count[rot] > count[primaryRot]) {
-+ primaryRot = rot;
-+ }
- }
-
- #if 0 // for debugging
-@@ -2674,76 +2838,108 @@
-
- //----- column assignment
-
-- // sort blocks into xy order for column assignment
-- blocks = (TextBlock **)gmallocn(nBlocks, sizeof(TextBlock *));
-- for (blk = blkList, i = 0; blk; blk = blk->next, ++i) {
-- blocks[i] = blk;
-- }
-- qsort(blocks, nBlocks, sizeof(TextBlock *), &TextBlock::cmpXYPrimaryRot);
-+ if (physLayout && fixedPitch) {
-
-- // column assignment
-- for (i = 0; i < nBlocks; ++i) {
-- blk0 = blocks[i];
-- col1 = 0;
-- for (j = 0; j < i; ++j) {
-- blk1 = blocks[j];
-- col2 = 0; // make gcc happy
-+ blocks = (TextBlock **)gmallocn(nBlocks, sizeof(TextBlock *));
-+ for (blk = blkList, i = 0; blk; blk = blk->next, ++i) {
-+ blocks[i] = blk;
-+ col1 = 0; // make gcc happy
- switch (primaryRot) {
- case 0:
-- if (blk0->xMin > blk1->xMax) {
-- col2 = blk1->col + blk1->nColumns + 3;
-- } else if (blk1->xMax == blk1->xMin) {
-- col2 = blk1->col;
-- } else {
-- col2 = blk1->col + (int)(((blk0->xMin - blk1->xMin) /
-- (blk1->xMax - blk1->xMin)) *
-- blk1->nColumns);
-- }
-+ col1 = (int)(blk->xMin / fixedPitch + 0.5);
- break;
- case 1:
-- if (blk0->yMin > blk1->yMax) {
-- col2 = blk1->col + blk1->nColumns + 3;
-- } else if (blk1->yMax == blk1->yMin) {
-- col2 = blk1->col;
-- } else {
-- col2 = blk1->col + (int)(((blk0->yMin - blk1->yMin) /
-- (blk1->yMax - blk1->yMin)) *
-- blk1->nColumns);
-- }
-+ col1 = (int)(blk->yMin / fixedPitch + 0.5);
- break;
- case 2:
-- if (blk0->xMax < blk1->xMin) {
-- col2 = blk1->col + blk1->nColumns + 3;
-- } else if (blk1->xMin == blk1->xMax) {
-- col2 = blk1->col;
-- } else {
-- col2 = blk1->col + (int)(((blk0->xMax - blk1->xMax) /
-- (blk1->xMin - blk1->xMax)) *
-- blk1->nColumns);
-- }
-+ col1 = (int)((pageWidth - blk->xMax) / fixedPitch + 0.5);
- break;
- case 3:
-- if (blk0->yMax < blk1->yMin) {
-- col2 = blk1->col + blk1->nColumns + 3;
-- } else if (blk1->yMin == blk1->yMax) {
-- col2 = blk1->col;
-- } else {
-- col2 = blk1->col + (int)(((blk0->yMax - blk1->yMax) /
-- (blk1->yMin - blk1->yMax)) *
-- blk1->nColumns);
-- }
-+ col1 = (int)((pageHeight - blk->yMax) / fixedPitch + 0.5);
- break;
- }
-- if (col2 > col1) {
-- col1 = col2;
-+ blk->col = col1;
-+ for (line = blk->lines; line; line = line->next) {
-+ for (j = 0; j <= line->len; ++j) {
-+ line->col[j] += col1;
-+ }
- }
- }
-- blk0->col = col1;
-- for (line = blk0->lines; line; line = line->next) {
-- for (j = 0; j <= line->len; ++j) {
-- line->col[j] += col1;
-+
-+ } else {
-+
-+ // sort blocks into xy order for column assignment
-+ blocks = (TextBlock **)gmallocn(nBlocks, sizeof(TextBlock *));
-+ for (blk = blkList, i = 0; blk; blk = blk->next, ++i) {
-+ blocks[i] = blk;
-+ }
-+ qsort(blocks, nBlocks, sizeof(TextBlock *), &TextBlock::cmpXYPrimaryRot);
-+
-+ // column assignment
-+ for (i = 0; i < nBlocks; ++i) {
-+ blk0 = blocks[i];
-+ col1 = 0;
-+ for (j = 0; j < i; ++j) {
-+ blk1 = blocks[j];
-+ col2 = 0; // make gcc happy
-+ switch (primaryRot) {
-+ case 0:
-+ if (blk0->xMin > blk1->xMax) {
-+ col2 = blk1->col + blk1->nColumns + 3;
-+ } else if (blk1->xMax == blk1->xMin) {
-+ col2 = blk1->col;
-+ } else {
-+ col2 = blk1->col + (int)(((blk0->xMin - blk1->xMin) /
-+ (blk1->xMax - blk1->xMin)) *
-+ blk1->nColumns);
-+ }
-+ break;
-+ case 1:
-+ if (blk0->yMin > blk1->yMax) {
-+ col2 = blk1->col + blk1->nColumns + 3;
-+ } else if (blk1->yMax == blk1->yMin) {
-+ col2 = blk1->col;
-+ } else {
-+ col2 = blk1->col + (int)(((blk0->yMin - blk1->yMin) /
-+ (blk1->yMax - blk1->yMin)) *
-+ blk1->nColumns);
-+ }
-+ break;
-+ case 2:
-+ if (blk0->xMax < blk1->xMin) {
-+ col2 = blk1->col + blk1->nColumns + 3;
-+ } else if (blk1->xMin == blk1->xMax) {
-+ col2 = blk1->col;
-+ } else {
-+ col2 = blk1->col + (int)(((blk0->xMax - blk1->xMax) /
-+ (blk1->xMin - blk1->xMax)) *
-+ blk1->nColumns);
-+ }
-+ break;
-+ case 3:
-+ if (blk0->yMax < blk1->yMin) {
-+ col2 = blk1->col + blk1->nColumns + 3;
-+ } else if (blk1->yMin == blk1->yMax) {
-+ col2 = blk1->col;
-+ } else {
-+ col2 = blk1->col + (int)(((blk0->yMax - blk1->yMax) /
-+ (blk1->yMin - blk1->yMax)) *
-+ blk1->nColumns);
-+ }
-+ break;
-+ }
-+ if (col2 > col1) {
-+ col1 = col2;
-+ }
-+ }
-+ blk0->col = col1;
-+ for (line = blk0->lines; line; line = line->next) {
-+ for (j = 0; j <= line->len; ++j) {
-+ line->col[j] += col1;
-+ }
- }
- }
-+
- }
-
- #if 0 // for debugging
-@@ -2753,7 +2949,7 @@
- blk->rot, blk->xMin, blk->xMax, blk->yMin, blk->yMax, blk->col,
- blk->nColumns);
- for (line = blk->lines; line; line = line->next) {
-- printf(" line:\n");
-+ printf(" line: col[0]=%d\n", line->col[0]);
- for (word0 = line->words; word0; word0 = word0->next) {
- printf(" word: x=%.2f..%.2f y=%.2f..%.2f base=%.2f fontSize=%.2f space=%d: '",
- word0->xMin, word0->xMax, word0->yMin, word0->yMax,
-@@ -2932,6 +3128,7 @@
- GBool startAtTop, GBool stopAtBottom,
- GBool startAtLast, GBool stopAtLast,
- GBool caseSensitive, GBool backward,
-+ GBool wholeWord,
- double *xMin, double *yMin,
- double *xMax, double *yMax) {
- TextBlock *blk;
-@@ -2989,25 +3186,35 @@
- blk = blocks[i];
-
- // check: is the block above the top limit?
-- if (!startAtTop && (backward ? blk->yMin > yStart : blk->yMax < yStart)) {
-+ // (this only works if the page's primary rotation is zero --
-+ // otherwise the blocks won't be sorted in the useful order)
-+ if (!startAtTop && primaryRot == 0 &&
-+ (backward ? blk->yMin > yStart : blk->yMax < yStart)) {
- continue;
- }
-
- // check: is the block below the bottom limit?
-- if (!stopAtBottom && (backward ? blk->yMax < yStop : blk->yMin > yStop)) {
-+ // (this only works if the page's primary rotation is zero --
-+ // otherwise the blocks won't be sorted in the useful order)
-+ if (!stopAtBottom && primaryRot == 0 &&
-+ (backward ? blk->yMax < yStop : blk->yMin > yStop)) {
- break;
- }
-
- for (line = blk->lines; line; line = line->next) {
-
- // check: is the line above the top limit?
-- if (!startAtTop &&
-+ // (this only works if the page's primary rotation is zero --
-+ // otherwise the lines won't be sorted in the useful order)
-+ if (!startAtTop && primaryRot == 0 &&
- (backward ? line->yMin > yStart : line->yMin < yStart)) {
- continue;
- }
-
- // check: is the line below the bottom limit?
-- if (!stopAtBottom &&
-+ // (this only works if the page's primary rotation is zero --
-+ // otherwise the lines won't be sorted in the useful order)
-+ if (!stopAtBottom && primaryRot == 0 &&
- (backward ? line->yMin < yStop : line->yMin > yStop)) {
- continue;
- }
-@@ -3030,68 +3237,72 @@
- j = backward ? m - len : 0;
- p = txt + j;
- while (backward ? j >= 0 : j <= m - len) {
--
-- // compare the strings
-- for (k = 0; k < len; ++k) {
-- if (p[k] != s2[k]) {
-- break;
-+ if (!wholeWord ||
-+ ((j == 0 || !unicodeTypeAlphaNum(txt[j - 1])) &&
-+ (j + len == m || !unicodeTypeAlphaNum(txt[j + len])))) {
-+
-+ // compare the strings
-+ for (k = 0; k < len; ++k) {
-+ if (p[k] != s2[k]) {
-+ break;
-+ }
- }
-- }
-
-- // found it
-- if (k == len) {
-- switch (line->rot) {
-- case 0:
-- xMin1 = line->edge[j];
-- xMax1 = line->edge[j + len];
-- yMin1 = line->yMin;
-- yMax1 = line->yMax;
-- break;
-- case 1:
-- xMin1 = line->xMin;
-- xMax1 = line->xMax;
-- yMin1 = line->edge[j];
-- yMax1 = line->edge[j + len];
-- break;
-- case 2:
-- xMin1 = line->edge[j + len];
-- xMax1 = line->edge[j];
-- yMin1 = line->yMin;
-- yMax1 = line->yMax;
-- break;
-- case 3:
-- xMin1 = line->xMin;
-- xMax1 = line->xMax;
-- yMin1 = line->edge[j + len];
-- yMax1 = line->edge[j];
-- break;
-- }
-- if (backward) {
-- if ((startAtTop ||
-- yMin1 < yStart || (yMin1 == yStart && xMin1 < xStart)) &&
-- (stopAtBottom ||
-- yMin1 > yStop || (yMin1 == yStop && xMin1 > xStop))) {
-- if (!found ||
-- yMin1 > yMin0 || (yMin1 == yMin0 && xMin1 > xMin0)) {
-- xMin0 = xMin1;
-- xMax0 = xMax1;
-- yMin0 = yMin1;
-- yMax0 = yMax1;
-- found = gTrue;
-- }
-+ // found it
-+ if (k == len) {
-+ switch (line->rot) {
-+ case 0:
-+ xMin1 = line->edge[j];
-+ xMax1 = line->edge[j + len];
-+ yMin1 = line->yMin;
-+ yMax1 = line->yMax;
-+ break;
-+ case 1:
-+ xMin1 = line->xMin;
-+ xMax1 = line->xMax;
-+ yMin1 = line->edge[j];
-+ yMax1 = line->edge[j + len];
-+ break;
-+ case 2:
-+ xMin1 = line->edge[j + len];
-+ xMax1 = line->edge[j];
-+ yMin1 = line->yMin;
-+ yMax1 = line->yMax;
-+ break;
-+ case 3:
-+ xMin1 = line->xMin;
-+ xMax1 = line->xMax;
-+ yMin1 = line->edge[j + len];
-+ yMax1 = line->edge[j];
-+ break;
- }
-- } else {
-- if ((startAtTop ||
-- yMin1 > yStart || (yMin1 == yStart && xMin1 > xStart)) &&
-- (stopAtBottom ||
-- yMin1 < yStop || (yMin1 == yStop && xMin1 < xStop))) {
-- if (!found ||
-- yMin1 < yMin0 || (yMin1 == yMin0 && xMin1 < xMin0)) {
-- xMin0 = xMin1;
-- xMax0 = xMax1;
-- yMin0 = yMin1;
-- yMax0 = yMax1;
-- found = gTrue;
-+ if (backward) {
-+ if ((startAtTop ||
-+ yMin1 < yStart || (yMin1 == yStart && xMin1 < xStart)) &&
-+ (stopAtBottom ||
-+ yMin1 > yStop || (yMin1 == yStop && xMin1 > xStop))) {
-+ if (!found ||
-+ yMin1 > yMin0 || (yMin1 == yMin0 && xMin1 > xMin0)) {
-+ xMin0 = xMin1;
-+ xMax0 = xMax1;
-+ yMin0 = yMin1;
-+ yMax0 = yMax1;
-+ found = gTrue;
-+ }
-+ }
-+ } else {
-+ if ((startAtTop ||
-+ yMin1 > yStart || (yMin1 == yStart && xMin1 > xStart)) &&
-+ (stopAtBottom ||
-+ yMin1 < yStop || (yMin1 == yStop && xMin1 < xStop))) {
-+ if (!found ||
-+ yMin1 < yMin0 || (yMin1 == yMin0 && xMin1 < xMin0)) {
-+ xMin0 = xMin1;
-+ xMax0 = xMax1;
-+ yMin0 = yMin1;
-+ yMax0 = yMax1;
-+ found = gTrue;
-+ }
- }
- }
- }
-@@ -3820,10 +4038,20 @@
- fwrite(text, 1, len, (FILE *)stream);
- }
-
- TextOutputDev::TextOutputDev(char *fileName, GBool physLayoutA,
-- GBool rawOrderA, GBool append) {
-+ double fixedPitchA, GBool rawOrderA,
-+ GBool append) {
- text = NULL;
- physLayout = physLayoutA;
-+ fixedPitch = physLayout ? fixedPitchA : 0;
- rawOrder = rawOrderA;
- doHTML = gFalse;
- ok = gTrue;
-@@ -3854,11 +4074,13 @@
- }
-
- TextOutputDev::TextOutputDev(TextOutputFunc func, void *stream,
-- GBool physLayoutA, GBool rawOrderA) {
-+ GBool physLayoutA, double fixedPitchA,
-+ GBool rawOrderA) {
- outputFunc = func;
- outputStream = stream;
- needClose = gFalse;
- physLayout = physLayoutA;
-+ fixedPitch = physLayout ? fixedPitchA : 0;
- rawOrder = rawOrderA;
- doHTML = gFalse;
- text = new TextPage(rawOrderA);
-@@ -3883,12 +4105,16 @@
-
- void TextOutputDev::endPage() {
- text->endPage();
-- text->coalesce(physLayout, doHTML);
-+ text->coalesce(physLayout, fixedPitch, doHTML);
- if (outputStream) {
- text->dump(outputStream, outputFunc, physLayout);
- }
- }
-
-+void TextOutputDev::restoreState(GfxState *state) {
-+ text->updateFont(state);
-+}
-+
- void TextOutputDev::updateFont(GfxState *state) {
- text->updateFont(state);
- }
-@@ -3903,7 +4129,19 @@
- double dx, double dy,
- double originX, double originY,
- CharCode c, int nBytes, Unicode *u, int uLen) {
-- text->addChar(state, x, y, dx, dy, c, nBytes, u, uLen);
-+ text->addChar(state, x - originX, y - originY, dx, dy, c, nBytes, u, uLen);
-+}
-+
-@@ -4057,10 +4295,12 @@
- GBool startAtTop, GBool stopAtBottom,
- GBool startAtLast, GBool stopAtLast,
- GBool caseSensitive, GBool backward,
-+ GBool wholeWord,
- double *xMin, double *yMin,
- double *xMax, double *yMax) {
- return text->findText(s, len, startAtTop, stopAtBottom,
-- startAtLast, stopAtLast, caseSensitive, backward,
-+ startAtLast, stopAtLast,
-+ caseSensitive, backward, wholeWord,
- xMin, yMin, xMax, yMax);
- }
-
-diff -ru xpdf-3.02/xpdf/TextOutputDev.h xpdf-3.03/xpdf/TextOutputDev.h
---- xpdf-3.02/xpdf/TextOutputDev.h 2007-02-27 23:05:52.000000000 +0100
-+++ xpdf-3.03/xpdf/TextOutputDev.h 2011-08-15 23:08:53.000000000 +0200
-@@ -280,7 +281,7 @@
-
- void addWord(TextWord *word);
-
-- void coalesce(UnicodeMap *uMap);
-+ void coalesce(UnicodeMap *uMap, double fixedPitch);
-
- // Update this block's priMin and priMax values, looking at <blk>.
- void updatePriMinMax(TextBlock *blk);
-@@ -442,7 +452,7 @@
- void addLink(int xMin, int yMin, int xMax, int yMax, Link *link);
-
- // Coalesce strings that look like parts of the same line.
-- void coalesce(GBool physLayout, GBool doHTML);
-+ void coalesce(GBool physLayout, double fixedPitch, GBool doHTML);
-
- // Find a string. If <startAtTop> is true, starts looking at the
- // top of the page; else if <startAtLast> is true, starts looking
-@@ -455,6 +465,7 @@
- GBool startAtTop, GBool stopAtBottom,
- GBool startAtLast, GBool stopAtLast,
- GBool caseSensitive, GBool backward,
-+ GBool wholeWord,
- double *xMin, double *yMin,
- double *xMax, double *yMax);
-
-@@ -502,6 +513,13 @@
- int nTinyChars; // number of "tiny" chars seen so far
- GBool lastCharOverlap; // set if the last added char overlapped the
- // previous char
-@@ -544,14 +562,16 @@
- // is maintained. If <rawOrder> is true, the text is kept in
- // content stream order.
- TextOutputDev(char *fileName, GBool physLayoutA,
-- GBool rawOrderA, GBool append);
-+ double fixedPitchA, GBool rawOrderA,
-+ GBool append);
-
- // Create a TextOutputDev which will write to a generic stream. If
- // <physLayoutA> is true, the original physical layout of the text
- // is maintained. If <rawOrder> is true, the text is kept in
- // content stream order.
- TextOutputDev(TextOutputFunc func, void *stream,
-- GBool physLayoutA, GBool rawOrderA);
-+ GBool physLayoutA, double fixedPitchA,
-+ GBool rawOrderA);
-
- // Destructor.
- virtual ~TextOutputDev();
-@@ -583,6 +607,9 @@
- // End a page.
- virtual void endPage();
-
-+ //----- save/restore graphics state
-+ virtual void restoreState(GfxState *state);
-+
- //----- update text state
- virtual void updateFont(GfxState *state);
-
-@@ -615,6 +645,7 @@
- GBool startAtTop, GBool stopAtBottom,
- GBool startAtLast, GBool stopAtLast,
- GBool caseSensitive, GBool backward,
-+ GBool wholeWord,
- double *xMin, double *yMin,
- double *xMax, double *yMax);
-
-@@ -653,6 +684,9 @@
- TextPage *text; // text for the current page
- GBool physLayout; // maintain original physical layout when
- // dumping text
-+ double fixedPitch; // if physLayout is true and this is non-zero,
-+ // assume fixed-pitch characters with this
-+ // width
- GBool rawOrder; // keep text in content stream order
- GBool doHTML; // extra processing for HTML conversion
- GBool ok; // set up ok? \ No newline at end of file