summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--poppler/TextOutputDev.cc378
-rw-r--r--poppler/TextOutputDev.h42
2 files changed, 197 insertions, 223 deletions
diff --git a/poppler/TextOutputDev.cc b/poppler/TextOutputDev.cc
index 034209eb..03b68bc2 100644
--- a/poppler/TextOutputDev.cc
+++ b/poppler/TextOutputDev.cc
@@ -419,13 +419,6 @@ TextWord::TextWord(const GfxState *state, int rotA, double fontSizeA)
{
rot = rotA;
fontSize = fontSizeA;
- text = nullptr;
- charcode = nullptr;
- edge = nullptr;
- charPos = nullptr;
- font = nullptr;
- textMat = nullptr;
- len = size = 0;
spaceAfter = false;
next = nullptr;
invisible = state->getRender() == 3;
@@ -447,27 +440,14 @@ TextWord::TextWord(const GfxState *state, int rotA, double fontSizeA)
link = nullptr;
}
-TextWord::~TextWord()
-{
- gfree(text);
- gfree(charcode);
- gfree(edge);
- gfree(charPos);
- gfree(font);
- gfree(textMat);
-}
+TextWord::~TextWord() { }
void TextWord::addChar(const GfxState *state, TextFontInfo *fontA, double x, double y, double dx, double dy, int charPosA, int charLen, CharCode c, Unicode u, const Matrix &textMatA)
{
- ensureCapacity(len + 1);
- text[len] = u;
- charcode[len] = c;
- charPos[len] = charPosA;
- charPos[len + 1] = charPosA + charLen;
- font[len] = fontA;
- textMat[len] = textMatA;
+ chars.push_back(CharInfo { u, c, charPosA, 0.0, fontA, textMatA });
+ charPosEnd = charPosA + charLen;
- if (len == 0) {
+ if (len() == 1) {
setInitialBounds(fontA, x, y);
}
@@ -476,43 +456,42 @@ void TextWord::addChar(const GfxState *state, TextFontInfo *fontA, double x, dou
// TextPage::beginWord()) for vertical writing mode
switch (rot) {
case 0:
- edge[len] = x - fontSize;
- xMax = edge[len + 1] = x;
+ chars.back().edge = x - fontSize;
+ xMax = edgeEnd = x;
break;
case 1:
- edge[len] = y - fontSize;
- yMax = edge[len + 1] = y;
+ chars.back().edge = y - fontSize;
+ yMax = edgeEnd = y;
break;
case 2:
- edge[len] = x + fontSize;
- xMin = edge[len + 1] = x;
+ chars.back().edge = x + fontSize;
+ xMin = edgeEnd = x;
break;
case 3:
- edge[len] = y + fontSize;
- yMin = edge[len + 1] = y;
+ chars.back().edge = y + fontSize;
+ yMin = edgeEnd = y;
break;
}
} else { // horizontal writing mode
switch (rot) {
case 0:
- edge[len] = x;
- xMax = edge[len + 1] = x + dx;
+ chars.back().edge = x;
+ xMax = edgeEnd = x + dx;
break;
case 1:
- edge[len] = y;
- yMax = edge[len + 1] = y + dy;
+ chars.back().edge = y;
+ yMax = edgeEnd = y + dy;
break;
case 2:
- edge[len] = x;
- xMin = edge[len + 1] = x + dx;
+ chars.back().edge = x;
+ xMin = edgeEnd = x + dx;
break;
case 3:
- edge[len] = y;
- yMin = edge[len + 1] = y + dy;
+ chars.back().edge = y;
+ yMin = edgeEnd = y + dy;
break;
}
}
- ++len;
}
void TextWord::setInitialBounds(TextFontInfo *fontA, double x, double y)
@@ -604,19 +583,6 @@ void TextWord::setInitialBounds(TextFontInfo *fontA, double x, double y)
}
}
-void TextWord::ensureCapacity(int capacity)
-{
- if (capacity > size) {
- size = std::max(size + 16, capacity);
- text = (Unicode *)greallocn(text, size, sizeof(Unicode));
- charcode = (CharCode *)greallocn(charcode, (size + 1), sizeof(CharCode));
- edge = (double *)greallocn(edge, (size + 1), sizeof(double));
- charPos = (int *)greallocn(charPos, size + 1, sizeof(int));
- font = (TextFontInfo **)greallocn(font, size, sizeof(TextFontInfo *));
- textMat = (Matrix *)greallocn(textMat, size, sizeof(Matrix));
- }
-}
-
struct CombiningTable
{
Unicode base;
@@ -651,19 +617,15 @@ static Unicode getCombiningChar(Unicode u)
bool TextWord::addCombining(const GfxState *state, TextFontInfo *fontA, double fontSizeA, double x, double y, double dx, double dy, int charPosA, int charLen, CharCode c, Unicode u, const Matrix &textMatA)
{
- if (len == 0 || wMode != 0 || fontA->getWMode() != 0) {
+ if (chars.empty() || wMode != 0 || fontA->getWMode() != 0) {
return false;
}
Unicode cCurrent = getCombiningChar(u);
- Unicode cPrev = getCombiningChar(text[len - 1]);
- double edgeMid = (edge[len - 1] + edge[len]) / 2;
- double charMid, maxScaledMidDelta, charBase, maxScaledBaseDelta;
-
- if (cCurrent != 0 && unicodeTypeAlphaNum(text[len - 1])) {
+ if (cCurrent != 0 && unicodeTypeAlphaNum(chars.back().text)) {
// Current is a combining character, previous is base character
- maxScaledMidDelta = fabs(edge[len] - edge[len - 1]) * combMaxMidDelta;
- charMid = charBase = maxScaledBaseDelta = 0;
+ double maxScaledMidDelta = fabs(edgeEnd - chars.back().edge) * combMaxMidDelta;
+ double charMid, charBase, maxScaledBaseDelta;
// Test if characters overlap
if (rot == 0 || rot == 2) {
@@ -676,29 +638,24 @@ bool TextWord::addCombining(const GfxState *state, TextFontInfo *fontA, double f
maxScaledBaseDelta = (xMax - xMin) * combMaxBaseDelta;
}
+ double edgeMid = (chars.back().edge + edgeEnd) / 2;
if (fabs(charMid - edgeMid) >= maxScaledMidDelta || fabs(charBase - base) >= maxScaledBaseDelta) {
return false;
}
// Add character, but don't adjust edge / bounding box because
// combining character's positioning could be odd.
- ensureCapacity(len + 1);
- text[len] = cCurrent;
- charcode[len] = c;
- charPos[len] = charPosA;
- charPos[len + 1] = charPosA + charLen;
- font[len] = fontA;
- textMat[len] = textMatA;
- edge[len + 1] = edge[len];
- edge[len] = (edge[len + 1] + edge[len - 1]) / 2;
- ++len;
+ chars.emplace_back(CharInfo { cCurrent, c, charPosA, edgeMid, fontA, textMatA });
+ charPosEnd = charPosA + charLen;
+
return true;
}
+ Unicode cPrev = getCombiningChar(chars.back().text);
if (cPrev != 0 && unicodeTypeAlphaNum(u)) {
// Previous is a combining character, current is base character
- maxScaledBaseDelta = (fontA->getAscent() - fontA->getDescent()) * fontSizeA * combMaxBaseDelta;
- charMid = charBase = maxScaledMidDelta = 0;
+ double maxScaledBaseDelta = (fontA->getAscent() - fontA->getDescent()) * fontSizeA * combMaxBaseDelta;
+ double charMid, charBase, maxScaledMidDelta;
// Test if characters overlap
if (rot == 0 || rot == 2) {
@@ -711,73 +668,71 @@ bool TextWord::addCombining(const GfxState *state, TextFontInfo *fontA, double f
maxScaledMidDelta = fabs(dy * combMaxMidDelta);
}
+ double edgeMid = (chars.back().edge + edgeEnd) / 2;
if (fabs(charMid - edgeMid) >= maxScaledMidDelta || fabs(charBase - base) >= maxScaledBaseDelta) {
return false;
}
- // move combining character to after base character
- ensureCapacity(len + 1);
fontSize = fontSizeA;
- text[len] = cPrev;
- charcode[len] = charcode[len - 1];
- charPos[len] = charPosA;
- charPos[len + 1] = charPosA + charLen;
- font[len] = font[len - 1];
- textMat[len] = textMat[len - 1];
-
- text[len - 1] = u;
- charcode[len - 1] = c;
- font[len - 1] = fontA;
- textMat[len - 1] = textMatA;
-
- if (len == 1) {
+ // move combining character to after base character
+ chars.emplace_back(CharInfo { cPrev, chars.back().charcode, charPosA, edgeMid, chars.back().font, chars.back().textMat });
+
+ auto &lastChar = chars[chars.size() - 2];
+
+ charPosEnd = charPosA + charLen;
+ lastChar.text = u;
+ lastChar.charcode = c;
+ lastChar.font = fontA;
+ lastChar.textMat = textMatA;
+
+ if (len() == 2) {
setInitialBounds(fontA, x, y);
}
// Updated edges / bounding box because we changed the base
// character.
if (wMode) {
+ // FIXME unreachable, wMode == 0
switch (rot) {
case 0:
- edge[len - 1] = x - fontSize;
- xMax = edge[len + 1] = x;
+ lastChar.edge = x - fontSize;
+ xMax = edgeEnd = x;
break;
case 1:
- edge[len - 1] = y - fontSize;
- yMax = edge[len + 1] = y;
+ lastChar.edge = y - fontSize;
+ yMax = edgeEnd = y;
break;
case 2:
- edge[len - 1] = x + fontSize;
- xMin = edge[len + 1] = x;
+ lastChar.edge = x + fontSize;
+ xMin = edgeEnd = x;
break;
case 3:
- edge[len - 1] = y + fontSize;
- yMin = edge[len + 1] = y;
+ lastChar.edge = y + fontSize;
+ yMin = edgeEnd = y;
break;
}
} else {
switch (rot) {
case 0:
- edge[len - 1] = x;
- xMax = edge[len + 1] = x + dx;
+ lastChar.edge = x;
+ xMax = edgeEnd = x + dx;
break;
case 1:
- edge[len - 1] = y;
- yMax = edge[len + 1] = y + dy;
+ lastChar.edge = y;
+ yMax = edgeEnd = y + dy;
break;
case 2:
- edge[len - 1] = x;
- xMin = edge[len + 1] = x + dx;
+ lastChar.edge = x;
+ xMin = edgeEnd = x + dx;
break;
case 3:
- edge[len - 1] = y;
- yMin = edge[len + 1] = y + dy;
+ lastChar.edge = y;
+ yMin = edgeEnd = y + dy;
break;
}
}
- edge[len] = (edge[len + 1] + edge[len - 1]) / 2;
- ++len;
+ chars.back().edge = (edgeEnd + lastChar.edge) / 2;
return true;
}
return false;
@@ -785,8 +740,6 @@ bool TextWord::addCombining(const GfxState *state, TextFontInfo *fontA, double f
void TextWord::merge(TextWord *word)
{
- int i;
-
if (word->xMin < xMin) {
xMin = word->xMin;
}
@@ -799,18 +752,9 @@ void TextWord::merge(TextWord *word)
if (word->yMax > yMax) {
yMax = word->yMax;
}
- ensureCapacity(len + word->len);
- for (i = 0; i < word->len; ++i) {
- text[len + i] = word->text[i];
- charcode[len + i] = word->charcode[i];
- edge[len + i] = word->edge[i];
- charPos[len + i] = word->charPos[i];
- font[len + i] = word->font[i];
- textMat[len + i] = word->textMat[i];
- }
- edge[len + word->len] = word->edge[word->len];
- charPos[len + word->len] = word->charPos[word->len];
- len += word->len;
+ chars.insert(chars.end(), word->chars.begin(), word->chars.end());
+ edgeEnd = word->edgeEnd;
+ charPosEnd = word->charPosEnd;
}
inline int TextWord::primaryCmp(const TextWord *word) const
@@ -877,14 +821,13 @@ GooString *TextWord::getText() const
GooString *s;
const UnicodeMap *uMap;
char buf[8];
- int n, i;
s = new GooString();
if (!(uMap = globalParams->getTextEncoding())) {
return s;
}
- for (i = 0; i < len; ++i) {
- n = uMap->mapUnicode(text[i], buf, sizeof(buf));
+ for (size_t i = 0; i < len(); ++i) {
+ auto n = uMap->mapUnicode(chars[i].text, buf, sizeof(buf));
s->append(buf, n);
}
return s;
@@ -892,33 +835,39 @@ GooString *TextWord::getText() const
void TextWord::getCharBBox(int charIdx, double *xMinA, double *yMinA, double *xMaxA, double *yMaxA) const
{
- if (charIdx < 0 || charIdx >= len) {
+ if (charIdx < 0) {
return;
}
+ size_t uCharIdx = charIdx;
+ if (uCharIdx >= len()) {
+ return;
+ }
+ auto startingEdge = chars[uCharIdx].edge;
+ auto endingEdge = (uCharIdx + 1 == len()) ? edgeEnd : chars[charIdx + 1].edge;
switch (rot) {
case 0:
- *xMinA = edge[charIdx];
- *xMaxA = edge[charIdx + 1];
+ *xMinA = startingEdge;
+ *xMaxA = endingEdge;
*yMinA = yMin;
*yMaxA = yMax;
break;
case 1:
*xMinA = xMin;
*xMaxA = xMax;
- *yMinA = edge[charIdx];
- *yMaxA = edge[charIdx + 1];
+ *yMinA = startingEdge;
+ *yMaxA = endingEdge;
break;
case 2:
- *xMinA = edge[charIdx + 1];
- *xMaxA = edge[charIdx];
+ *xMinA = endingEdge;
+ *xMaxA = startingEdge;
*yMinA = yMin;
*yMaxA = yMax;
break;
case 3:
*xMinA = xMin;
*xMaxA = xMax;
- *yMinA = edge[charIdx + 1];
- *yMaxA = edge[charIdx];
+ *yMinA = endingEdge;
+ *yMaxA = startingEdge;
break;
}
}
@@ -1189,21 +1138,19 @@ int TextLine::cmpXY(const void *p1, const void *p2)
void TextLine::coalesce(const UnicodeMap *uMap)
{
- TextWord *word0, *word1;
double space, delta, minSpace;
bool isUnicode;
char buf[8];
- int i, j;
if (words->next) {
// compute the inter-word space threshold
- if (words->len > 1 || words->next->len > 1) {
+ if (words->len() > 1 || words->next->len() > 1) {
minSpace = 0;
} else {
minSpace = words->primaryDelta(words->next);
- for (word0 = words->next, word1 = word0->next; word1 && minSpace > 0; word0 = word1, word1 = word0->next) {
- if (word1->len > 1) {
+ for (auto word0 = words->next, word1 = word0->next; word1 && minSpace > 0; word0 = word1, word1 = word0->next) {
+ if (word1->len() > 1) {
minSpace = 0;
}
delta = word0->primaryDelta(word1);
@@ -1222,15 +1169,17 @@ void TextLine::coalesce(const UnicodeMap *uMap)
}
// merge words
- word0 = words;
- word1 = words->next;
+ auto word0 = words;
+ auto word1 = words->next;
while (word1) {
if (word0->primaryDelta(word1) >= space) {
word0->spaceAfter = true;
word0 = word1;
word1 = word1->next;
- } else if (word0->font[word0->len - 1] == word1->font[0] && word0->underlined == word1->underlined && fabs(word0->fontSize - word1->fontSize) < maxWordFontSizeDelta * words->fontSize
- && word1->charPos[0] == word0->charPos[word0->len]) {
+ } else if (word0->chars.back().font == word1->chars.front().font //
+ && word0->underlined == word1->underlined //
+ && fabs(word0->fontSize - word1->fontSize) < maxWordFontSizeDelta * words->fontSize //
+ && word1->chars.front().charPos == word0->charPosEnd) {
word0->merge(word1);
word0->next = word1->next;
delete word1;
@@ -1245,22 +1194,22 @@ void TextLine::coalesce(const UnicodeMap *uMap)
// build the line text
isUnicode = uMap ? uMap->isUnicode() : false;
len = 0;
- for (word1 = words; word1; word1 = word1->next) {
- len += word1->len;
+ for (auto word1 = words; word1; word1 = word1->next) {
+ len += word1->len();
if (word1->spaceAfter) {
++len;
}
}
text = (Unicode *)gmallocn(len, sizeof(Unicode));
edge = (double *)gmallocn(len + 1, sizeof(double));
- i = 0;
- for (word1 = words; word1; word1 = word1->next) {
- for (j = 0; j < word1->len; ++j) {
- text[i] = word1->text[j];
- edge[i] = word1->edge[j];
+ size_t i = 0;
+ for (auto word1 = words; word1; word1 = word1->next) {
+ for (size_t j = 0; j < word1->len(); ++j) {
+ text[i] = word1->chars[j].text;
+ edge[i] = word1->chars[j].edge;
++i;
}
- edge[i] = word1->edge[word1->len];
+ edge[i] = word1->edgeEnd;
if (word1->spaceAfter) {
text[i] = (Unicode)0x0020;
++i;
@@ -1270,12 +1219,12 @@ void TextLine::coalesce(const UnicodeMap *uMap)
// compute convertedLen and set up the col array
col = (int *)gmallocn(len + 1, sizeof(int));
convertedLen = 0;
- for (i = 0; i < len; ++i) {
- col[i] = convertedLen;
+ for (int ci = 0; ci < len; ++ci) {
+ col[ci] = convertedLen;
if (isUnicode) {
++convertedLen;
} else if (uMap) {
- convertedLen += uMap->mapUnicode(text[i], buf, sizeof(buf));
+ convertedLen += uMap->mapUnicode(text[ci], buf, sizeof(buf));
}
}
col[len] = convertedLen;
@@ -1690,8 +1639,14 @@ void TextBlock::coalesce(const UnicodeMap *uMap, double fixedPitch)
word1 = nullptr;
word2 = pool->getPool(idx1);
}
+ TextWord *word1 = prevWord->next;
+
+ auto equalText = [](const TextWord &w1, const TextWord &w2) -> bool { //
+ return std::equal(w1.chars.begin(), w1.chars.end(), w2.chars.begin(), w2.chars.end(), //
+ [](auto c1, auto c2) { return c1.text == c2.text; });
+ };
for (; word2; word1 = word2, word2 = word2->next) {
- if (word2->len == word0->len && !memcmp(word2->text, word0->text, word0->len * sizeof(Unicode))) {
+ if (equalText(*word0, *word2)) {
switch (rot) {
case 0:
case 2:
@@ -2713,28 +2668,28 @@ void TextPage::addChar(const GfxState *state, double x, double y, double dx, dou
// character to be in a word by itself at this stage),
// (4) the font size has changed
// (5) the WMode changed
- if (curWord && curWord->len > 0) {
+ if (curWord && curWord->len() > 0) {
base = sp = delta = 0; // make gcc happy
switch (curWord->rot) {
case 0:
base = y1;
sp = x1 - curWord->xMax;
- delta = x1 - curWord->edge[curWord->len - 1];
+ delta = x1 - curWord->chars.back().edge;
break;
case 1:
base = x1;
sp = y1 - curWord->yMax;
- delta = y1 - curWord->edge[curWord->len - 1];
+ delta = y1 - curWord->chars.back().edge;
break;
case 2:
base = y1;
sp = curWord->xMin - x1;
- delta = curWord->edge[curWord->len - 1] - x1;
+ delta = curWord->chars.back().edge - x1;
break;
case 3:
base = x1;
sp = curWord->yMin - y1;
- delta = curWord->edge[curWord->len - 1] - y1;
+ delta = curWord->chars.back().edge - y1;
break;
}
overlap = fabs(delta) < dupMaxPriDelta * curWord->fontSize && fabs(base - curWord->base) < dupMaxSecDelta * curWord->fontSize;
@@ -2813,7 +2768,7 @@ void TextPage::addWord(TextWord *word)
{
// throw away zero-length words -- they don't have valid xMin/xMax
// values, and they're useless anyway
- if (word->len == 0) {
+ if (word->len() == 0) {
delete word;
return;
}
@@ -3309,10 +3264,10 @@ void TextPage::coalesce(bool physLayout, double fixedPitch, bool doHTML, double
for (blk = blkList; blk; blk = blk->next) {
for (line = blk->lines; line; line = line->next) {
for (word0 = line->words; word0; word0 = word0->next) {
- for (int i = 0; i < word0->len; ++i) {
- if (unicodeTypeL(word0->text[i])) {
+ for (size_t i = 0; i < word0->len(); ++i) {
+ if (unicodeTypeL(word0->chars[i].text)) {
++lrCount;
- } else if (unicodeTypeR(word0->text[i])) {
+ } else if (unicodeTypeR(word0->chars[i].text)) {
--lrCount;
}
}
@@ -4587,12 +4542,16 @@ GooString *TextSelectionDumper::getText()
spaceLen = uMap->mapUnicode(0x20, space, sizeof(space));
eolLen = uMap->mapUnicode(0x0a, eol, sizeof(eol));
+ std::vector<Unicode> uText;
for (i = 0; i < nLines; i++) {
std::vector<TextWordSelection *> *lineWords = lines[i];
for (std::size_t j = 0; j < lineWords->size(); j++) {
TextWordSelection *sel = (*lineWords)[j];
- page->dumpFragment(sel->word->text + sel->begin, sel->end - sel->begin, uMap, text);
+ uText.resize(sel->end - sel->begin);
+ std::transform(sel->word->chars.begin() + sel->begin, sel->word->chars.begin() + sel->end, uText.begin(), [](auto &c) { return c.text; });
+ page->dumpFragment(uText.data(), uText.size(), uMap, text);
+
if (j < lineWords->size() - 1 && sel->word->spaceAfter) {
text->append(space, spaceLen);
}
@@ -4838,36 +4797,39 @@ void TextSelectionPainter::endPage()
out->updateFillColor(state);
+ GooString string;
for (const TextWordSelection *sel : *selectionList) {
int begin = sel->begin;
while (begin < sel->end) {
- TextFontInfo *font = sel->word->font[begin];
- Matrix *mat = &sel->word->textMat[begin];
+ TextFontInfo *font = sel->word->chars[begin].font;
+ const Matrix *mat = &sel->word->chars[begin].textMat;
state->setTextMat(mat->m[0], mat->m[1], mat->m[2], mat->m[3], 0, 0);
state->setFont(font->gfxFont, 1);
out->updateFont(state);
int fEnd = begin + 1;
- while (fEnd < sel->end && font->matches(sel->word->font[fEnd]) && mat->m[0] == sel->word->textMat[fEnd].m[0] && mat->m[1] == sel->word->textMat[fEnd].m[1] && mat->m[2] == sel->word->textMat[fEnd].m[2]
- && mat->m[3] == sel->word->textMat[fEnd].m[3]) {
+ while (fEnd < sel->end && font->matches(sel->word->chars[fEnd].font) //
+ && mat->m[0] == sel->word->chars[fEnd].textMat.m[0] && mat->m[1] == sel->word->chars[fEnd].textMat.m[1] //
+ && mat->m[2] == sel->word->chars[fEnd].textMat.m[2] && mat->m[3] == sel->word->chars[fEnd].textMat.m[3]) {
fEnd++;
}
/* The only purpose of this string is to let the output device query
* it's length. Might want to change this interface later. */
- GooString *string = new GooString((char *)sel->word->charcode, fEnd - begin);
- out->beginString(state, string);
+ string.clear();
+ std::for_each(sel->word->chars.begin() + begin, sel->word->chars.begin() + fEnd, [&string](const auto c) { string.append(c.charcode); });
+ out->beginString(state, &string);
for (int j = begin; j < fEnd; j++) {
- if (j != begin && sel->word->charPos[j] == sel->word->charPos[j - 1]) {
+ const auto &charJ = sel->word->chars[j];
+ if (j != begin && charJ.charPos == sel->word->chars[j - 1].charPos) {
continue;
}
- out->drawChar(state, sel->word->textMat[j].m[4], sel->word->textMat[j].m[5], 0, 0, 0, 0, sel->word->charcode[j], 1, nullptr, 0);
+ out->drawChar(state, charJ.textMat.m[4], charJ.textMat.m[5], 0, 0, 0, 0, charJ.charcode, 1, nullptr, 0);
}
out->endString(state);
- delete string;
begin = fEnd;
}
}
@@ -4878,7 +4840,6 @@ void TextSelectionPainter::endPage()
void TextWord::visitSelection(TextSelectionVisitor *visitor, const PDFRectangle *selection, SelectionStyle style)
{
- int i, begin, end;
double mid, s1, s2;
if (rot == 0 || rot == 2) {
@@ -4889,10 +4850,14 @@ void TextWord::visitSelection(TextSelectionVisitor *visitor, const PDFRectangle
s2 = selection->y2;
}
- begin = len;
- end = 0;
- for (i = 0; i < len; i++) {
- mid = (edge[i] + edge[i + 1]) / 2;
+ size_t begin = len();
+ size_t end = 0;
+ for (size_t i = 0; i < len(); i++) {
+ if (i + 1 < len()) {
+ mid = (chars[i].edge + chars[i + 1].edge) / 2;
+ } else {
+ mid = (chars[i].edge + edgeEnd) / 2;
+ }
if (XBetweenAB(mid, s1, s2)) {
if (i < begin) {
begin = i;
@@ -5279,7 +5244,6 @@ bool TextPage::findCharRange(int pos, int length, double *xMin, double *yMin, do
double xMin0, xMax0, yMin0, yMax0;
double xMin1, xMax1, yMin1, yMax1;
bool first;
- int i, j0, j1;
if (rawOrder) {
return false;
@@ -5291,41 +5255,44 @@ bool TextPage::findCharRange(int pos, int length, double *xMin, double *yMin, do
first = true;
xMin0 = xMax0 = yMin0 = yMax0 = 0; // make gcc happy
xMin1 = xMax1 = yMin1 = yMax1 = 0; // make gcc happy
- for (i = 0; i < nBlocks; ++i) {
+ for (int i = 0; i < nBlocks; ++i) {
blk = blocks[i];
for (line = blk->lines; line; line = line->next) {
for (word = line->words; word; word = word->next) {
- if (pos < word->charPos[word->len] && pos + length > word->charPos[0]) {
- for (j0 = 0; j0 < word->len && pos >= word->charPos[j0 + 1]; ++j0) {
+ if (pos < word->charPosEnd && pos + length > word->chars.front().charPos) {
+ size_t j0, j1;
+ for (j0 = 0; (j0 + 1) < word->len() && pos >= word->chars[j0 + 1].charPos; ++j0) {
;
}
- for (j1 = word->len - 1; j1 > j0 && pos + length <= word->charPos[j1]; --j1) {
+ for (j1 = word->len(); j1 > j0 && pos + length <= word->chars[j1].charPos; --j1) {
;
}
+ auto startingEdge = word->chars[j0].edge;
+ auto endingEdge = (j1 + 1 == word->len()) ? word->edgeEnd : word->chars[j1 + 1].edge;
switch (line->rot) {
case 0:
- xMin1 = word->edge[j0];
- xMax1 = word->edge[j1 + 1];
+ xMin1 = startingEdge;
+ xMax1 = endingEdge;
yMin1 = word->yMin;
yMax1 = word->yMax;
break;
case 1:
xMin1 = word->xMin;
xMax1 = word->xMax;
- yMin1 = word->edge[j0];
- yMax1 = word->edge[j1 + 1];
+ yMin1 = startingEdge;
+ yMax1 = endingEdge;
break;
case 2:
- xMin1 = word->edge[j1 + 1];
- xMax1 = word->edge[j0];
+ xMin1 = endingEdge;
+ xMax1 = startingEdge;
yMin1 = word->yMin;
yMax1 = word->yMax;
break;
case 3:
xMin1 = word->xMin;
xMax1 = word->xMax;
- yMin1 = word->edge[j1 + 1];
- yMax1 = word->edge[j0];
+ yMin1 = endingEdge;
+ yMax1 = startingEdge;
break;
}
if (first || xMin1 < xMin0) {
@@ -5367,7 +5334,6 @@ void TextPage::dump(void *outputStream, TextOutputFunc outputFunc, bool physLayo
TextLineFrag *frag;
char space[8], eol[16], eop[8];
int spaceLen, eolLen, eopLen;
- GooString *s;
double delta;
int col, i, j, d, n;
@@ -5396,11 +5362,16 @@ void TextPage::dump(void *outputStream, TextOutputFunc outputFunc, bool physLayo
// output the page in raw (content stream) order
if (rawOrder) {
+ GooString s;
+ std::vector<Unicode> uText;
+
for (word = rawWords; word; word = word->next) {
- s = new GooString();
- dumpFragment(word->text, word->len, uMap, s);
- (*outputFunc)(outputStream, s->c_str(), s->getLength());
- delete s;
+ s.clear();
+ uText.resize(word->len());
+ std::transform(word->chars.begin(), word->chars.end(), uText.begin(), [](auto &c) { return c.text; });
+ dumpFragment(uText.data(), uText.size(), uMap, &s);
+ (*outputFunc)(outputStream, s.c_str(), s.getLength());
+
if (word->next && fabs(word->next->base - word->base) < maxIntraLineDelta * word->fontSize && word->next->xMin > word->xMax - minDupBreakOverlap * word->fontSize) {
if (word->next->xMin > word->xMax + minWordSpacing * word->fontSize) {
(*outputFunc)(outputStream, space, spaceLen);
@@ -5454,6 +5425,7 @@ void TextPage::dump(void *outputStream, TextOutputFunc outputFunc, bool physLayo
printf("\n");
#endif
+ GooString s;
// generate output
col = 0;
for (i = 0; i < nFrags; ++i) {
@@ -5465,10 +5437,9 @@ void TextPage::dump(void *outputStream, TextOutputFunc outputFunc, bool physLayo
}
// print the line
- s = new GooString();
- col += dumpFragment(frag->line->text + frag->start, frag->len, uMap, s);
- (*outputFunc)(outputStream, s->c_str(), s->getLength());
- delete s;
+ s.clear();
+ col += dumpFragment(frag->line->text + frag->start, frag->len, uMap, &s);
+ (*outputFunc)(outputStream, s.c_str(), s.getLength());
// print one or more returns if necessary
if (i == nFrags - 1 || frags[i + 1].col < col || fabs(frags[i + 1].base - frag->base) > maxIntraLineDelta * frag->line->words->fontSize) {
@@ -5500,10 +5471,9 @@ void TextPage::dump(void *outputStream, TextOutputFunc outputFunc, bool physLayo
if (line->hyphenated && (line->next || blk->next)) {
--n;
}
- s = new GooString();
- dumpFragment(line->text, n, uMap, s);
- (*outputFunc)(outputStream, s->c_str(), s->getLength());
- delete s;
+ GooString s;
+ dumpFragment(line->text, n, uMap, &s);
+ (*outputFunc)(outputStream, s.c_str(), s.getLength());
// output a newline when a hyphen is not suppressed
if (n == line->len) {
(*outputFunc)(outputStream, eol, eolLen);
diff --git a/poppler/TextOutputDev.h b/poppler/TextOutputDev.h
index af007fa0..98e1df18 100644
--- a/poppler/TextOutputDev.h
+++ b/poppler/TextOutputDev.h
@@ -171,16 +171,16 @@ public:
void visitSelection(TextSelectionVisitor *visitor, const PDFRectangle *selection, SelectionStyle style);
// Get the TextFontInfo object associated with a character.
- const TextFontInfo *getFontInfo(int idx) const { return font[idx]; }
+ const TextFontInfo *getFontInfo(int idx) const { return chars[idx].font; }
// Get the next TextWord on the linked list.
const TextWord *getNext() const { return next; }
#ifdef TEXTOUT_WORD_LIST
- int getLength() const { return len; }
- const Unicode *getChar(int idx) const { return &text[idx]; }
+ int getLength() const { return chars.size(); }
+ const Unicode *getChar(int idx) const { return &chars[idx].text; }
GooString *getText() const;
- const GooString *getFontName(int idx) const { return font[idx]->fontName; }
+ const GooString *getFontName(int idx) const { return chars[idx].font->fontName; }
void getColor(double *r, double *g, double *b) const
{
*r = colorR;
@@ -197,19 +197,19 @@ public:
void getCharBBox(int charIdx, double *xMinA, double *yMinA, double *xMaxA, double *yMaxA) const;
double getFontSize() const { return fontSize; }
int getRotation() const { return rot; }
- int getCharPos() const { return charPos[0]; }
- int getCharLen() const { return charPos[len] - charPos[0]; }
+ int getCharPos() const { return chars.empty() ? 0 : chars.front().charPos; }
+ int getCharLen() const { return chars.empty() ? 0 : chars.back().charPos - chars.front().charPos; }
bool getSpaceAfter() const { return spaceAfter; }
#endif
bool isUnderlined() const { return underlined; }
const AnnotLink *getLink() const { return link; }
- double getEdge(int i) const { return edge[i]; }
+ double getEdge(int i) const { return chars[i].edge; }
double getBaseline() const { return base; }
bool hasSpaceAfter() const { return spaceAfter; }
const TextWord *nextWord() const { return next; };
+ auto len() const { return chars.size(); }
private:
- void ensureCapacity(int capacity);
void setInitialBounds(TextFontInfo *fontA, double x, double y);
int rot; // rotation, multiple of 90 degrees
@@ -218,18 +218,22 @@ private:
double xMin, xMax; // bounding box x coordinates
double yMin, yMax; // bounding box y coordinates
double base; // baseline x or y coordinate
- Unicode *text; // the text
- CharCode *charcode; // glyph indices
- double *edge; // "near" edge x or y coord of each char
- // (plus one extra entry for the last char)
- int *charPos; // character position (within content stream)
- // of each char (plus one extra entry for
- // the last char)
- int len; // length of text/edge/charPos/font arrays
- int size; // size of text/edge/charPos/font arrays
- TextFontInfo **font; // font information for each char
- Matrix *textMat; // transformation matrix for each char
+
double fontSize; // font size
+
+ struct CharInfo
+ {
+ Unicode text;
+ CharCode charcode;
+ int charPos;
+ double edge;
+ TextFontInfo *font;
+ Matrix textMat;
+ };
+ std::vector<CharInfo> chars;
+ int charPosEnd = 0;
+ double edgeEnd = 0;
+
bool spaceAfter; // set if there is a space between this
// word and the next word on the line
bool underlined;