summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--i18npool/qa/cppunit/test_textsearch.cxx88
-rw-r--r--i18npool/source/search/textsearch.cxx40
2 files changed, 120 insertions, 8 deletions
diff --git a/i18npool/qa/cppunit/test_textsearch.cxx b/i18npool/qa/cppunit/test_textsearch.cxx
index 269380a90910..38cc099b7c95 100644
--- a/i18npool/qa/cppunit/test_textsearch.cxx
+++ b/i18npool/qa/cppunit/test_textsearch.cxx
@@ -38,6 +38,7 @@ public:
void testSearches();
void testWildcardSearch();
void testApostropheSearch();
+ void testQuotationMarkSearch();
void testTdf138410();
CPPUNIT_TEST_SUITE(TestTextSearch);
@@ -45,6 +46,7 @@ public:
CPPUNIT_TEST(testSearches);
CPPUNIT_TEST(testWildcardSearch);
CPPUNIT_TEST(testApostropheSearch);
+ CPPUNIT_TEST(testQuotationMarkSearch);
CPPUNIT_TEST(testTdf138410);
CPPUNIT_TEST_SUITE_END();
private:
@@ -404,6 +406,92 @@ void TestTextSearch::testApostropheSearch()
CPPUNIT_ASSERT( aRes.subRegExpressions > 0 );
}
+void TestTextSearch::testQuotationMarkSearch()
+{
+ // A) find typographic quotation marks also by using ASCII ones
+ OUString str( u"“x”, „y‟, ‘z’, ‚a‛"_ustr );
+ sal_Int32 startPos = 0, endPos = str.getLength();
+
+ // set options
+ util::SearchOptions aOptions;
+ aOptions.algorithmType = util::SearchAlgorithms_ABSOLUTE;
+ aOptions.searchFlag = util::SearchFlags::ALL_IGNORE_CASE;
+ aOptions.searchString = "\"x\"";
+ aOptions.transliterateFlags = static_cast<int>(TransliterationFlags::IGNORE_CASE
+ | TransliterationFlags::IGNORE_WIDTH);
+ m_xSearch->setOptions( aOptions );
+
+ util::SearchResult aRes;
+
+ // search forward
+ aRes = m_xSearch->searchForward( str, startPos, endPos );
+ // This was 0.
+ CPPUNIT_ASSERT( aRes.subRegExpressions > 0 );
+ CPPUNIT_ASSERT_EQUAL( static_cast<sal_Int32>(0), aRes.startOffset[0] );
+ CPPUNIT_ASSERT_EQUAL( static_cast<sal_Int32>(3), aRes.endOffset[0] );
+
+ // search backwards
+ aRes = m_xSearch->searchBackward( str, endPos, startPos );
+ // This was 0.
+ CPPUNIT_ASSERT( aRes.subRegExpressions > 0 );
+ CPPUNIT_ASSERT_EQUAL( static_cast<sal_Int32>(3), aRes.startOffset[0] );
+ CPPUNIT_ASSERT_EQUAL( static_cast<sal_Int32>(0), aRes.endOffset[0] );
+
+ // B)
+ aOptions.searchString = "\"y\"";
+ m_xSearch->setOptions( aOptions );
+
+ // search forward
+ aRes = m_xSearch->searchForward( str, startPos, endPos );
+ // This was 0.
+ CPPUNIT_ASSERT( aRes.subRegExpressions > 0 );
+ CPPUNIT_ASSERT_EQUAL( static_cast<sal_Int32>(5), aRes.startOffset[0] );
+ CPPUNIT_ASSERT_EQUAL( static_cast<sal_Int32>(8), aRes.endOffset[0] );
+
+ // search backwards
+ aRes = m_xSearch->searchBackward( str, endPos, startPos );
+ // This was 0.
+ CPPUNIT_ASSERT( aRes.subRegExpressions > 0 );
+ CPPUNIT_ASSERT_EQUAL( static_cast<sal_Int32>(8), aRes.startOffset[0] );
+ CPPUNIT_ASSERT_EQUAL( static_cast<sal_Int32>(5), aRes.endOffset[0] );
+
+ // C)
+ aOptions.searchString = "'z'";
+ m_xSearch->setOptions( aOptions );
+
+ // search forward
+ aRes = m_xSearch->searchForward( str, startPos, endPos );
+ // This was 0.
+ CPPUNIT_ASSERT( aRes.subRegExpressions > 0 );
+ CPPUNIT_ASSERT_EQUAL( static_cast<sal_Int32>(10), aRes.startOffset[0] );
+ CPPUNIT_ASSERT_EQUAL( static_cast<sal_Int32>(13), aRes.endOffset[0] );
+
+ // search backwards
+ aRes = m_xSearch->searchBackward( str, endPos, startPos );
+ // This was 0.
+ CPPUNIT_ASSERT( aRes.subRegExpressions > 0 );
+ CPPUNIT_ASSERT_EQUAL( static_cast<sal_Int32>(13), aRes.startOffset[0] );
+ CPPUNIT_ASSERT_EQUAL( static_cast<sal_Int32>(10), aRes.endOffset[0] );
+
+ // D)
+ aOptions.searchString = "'a'";
+ m_xSearch->setOptions( aOptions );
+
+ // search forward
+ aRes = m_xSearch->searchForward( str, startPos, endPos );
+ // This was 0.
+ CPPUNIT_ASSERT( aRes.subRegExpressions > 0 );
+ CPPUNIT_ASSERT_EQUAL( static_cast<sal_Int32>(15), aRes.startOffset[0] );
+ CPPUNIT_ASSERT_EQUAL( static_cast<sal_Int32>(18), aRes.endOffset[0] );
+
+ // search backwards
+ aRes = m_xSearch->searchBackward( str, endPos, startPos );
+ // This was 0.
+ CPPUNIT_ASSERT( aRes.subRegExpressions > 0 );
+ CPPUNIT_ASSERT_EQUAL( static_cast<sal_Int32>(18), aRes.startOffset[0] );
+ CPPUNIT_ASSERT_EQUAL( static_cast<sal_Int32>(15), aRes.endOffset[0] );
+}
+
void TestTextSearch::testTdf138410()
{
OUString str(u"\u0643\u064f\u062a\u064f\u0628 \u0643\u062a\u0628"_ustr);
diff --git a/i18npool/source/search/textsearch.cxx b/i18npool/source/search/textsearch.cxx
index 816e162c1e6e..dbb49f494781 100644
--- a/i18npool/source/search/textsearch.cxx
+++ b/i18npool/source/search/textsearch.cxx
@@ -93,6 +93,30 @@ bool isSimpleRegexTrans( TransliterationFlags n )
{
return bool(maskSimpleRegexTrans(n));
}
+
+bool isReplacePunctuation( OUString &rStr )
+{
+ return rStr.indexOf(u'\u2018') > -1 ||
+ rStr.indexOf(u'\u2019') > -1 ||
+ rStr.indexOf(u'\u201A') > -1 ||
+ rStr.indexOf(u'\u201B') > -1 ||
+ rStr.indexOf(u'\u201C') > -1 ||
+ rStr.indexOf(u'\u201D') > -1 ||
+ rStr.indexOf(u'\u201E') > -1 ||
+ rStr.indexOf(u'\u201F') > -1;
+}
+
+OUString replacePunctuation( OUString &rStr )
+{
+ return rStr.replace(u'\u2018', '\'')
+ .replace(u'\u2019', '\'')
+ .replace(u'\u201A', '\'')
+ .replace(u'\u201B', '\'')
+ .replace(u'\u201C', '"')
+ .replace(u'\u201D', '"')
+ .replace(u'\u201E', '"')
+ .replace(u'\u201F', '"');
+}
};
TextSearch::TextSearch(const Reference < XComponentContext > & rxContext)
@@ -139,10 +163,10 @@ void TextSearch::setOptions2( const SearchOptions2& rOptions )
// match is not case-altered, leave case-(in)sensitive to regex engine.
transliterateFlags &= ~TransliterationFlags::IGNORE_CASE;
}
- else if ( aSrchPara.searchString.indexOf('\'') > - 1 )
+ else if ( aSrchPara.searchString.indexOf('\'') > - 1 || aSrchPara.searchString.indexOf('"') > - 1 )
{
bSearchApostrophe = true;
- bReplaceApostrophe = aSrchPara.searchString.indexOf(u'\u2019') > -1;
+ bReplaceApostrophe = isReplacePunctuation(aSrchPara.searchString);
}
// Create Transliteration class
@@ -215,7 +239,7 @@ void TextSearch::setOptions2( const SearchOptions2& rOptions )
}
if ( bReplaceApostrophe )
- sSrchStr = sSrchStr.replace(u'\u2019', '\'');
+ sSrchStr = replacePunctuation(sSrchStr);
// Take the new SearchOptions2::AlgorithmType2 field and ignore
// SearchOptions::algorithmType
@@ -308,7 +332,7 @@ SearchResult TextSearch::searchForward( const OUString& searchStr, sal_Int32 sta
// in non-regex mode, allow searching typographical apostrophe with the ASCII one
// to avoid regression after using automatic conversion to U+2019 during typing in Writer
- bool bReplaceApostrophe = bSearchApostrophe && in_str.indexOf(u'\u2019') > -1;
+ bool bReplaceApostrophe = bSearchApostrophe && isReplacePunctuation(in_str);
bUsePrimarySrchStr = true;
@@ -340,7 +364,7 @@ SearchResult TextSearch::searchForward( const OUString& searchStr, sal_Int32 sta
in_str = xTranslit->transliterate(searchStr, nInStartPos, nInEndPos - nInStartPos, offset);
if ( bReplaceApostrophe )
- in_str = in_str.replace(u'\u2019', '\'');
+ in_str = replacePunctuation(in_str);
// JP 20.6.2001: also the start and end positions must be corrected!
sal_Int32 newStartPos =
@@ -447,7 +471,7 @@ SearchResult TextSearch::searchBackward( const OUString& searchStr, sal_Int32 st
// in non-regex mode, allow searching typographical apostrophe with the ASCII one
// to avoid regression after using automatic conversion to U+2019 during typing in Writer
- bool bReplaceApostrophe = bSearchApostrophe && in_str.indexOf(u'\u2019') > -1;
+ bool bReplaceApostrophe = bSearchApostrophe && isReplacePunctuation(in_str);
bUsePrimarySrchStr = true;
@@ -458,7 +482,7 @@ SearchResult TextSearch::searchBackward( const OUString& searchStr, sal_Int32 st
in_str = xTranslit->transliterate( searchStr, endPos, startPos - endPos, offset );
if ( bReplaceApostrophe )
- in_str = in_str.replace(u'\u2019', '\'');
+ in_str = replacePunctuation(in_str);
// JP 20.6.2001: also the start and end positions must be corrected!
sal_Int32 const newStartPos = (startPos < searchStr.getLength())
@@ -508,7 +532,7 @@ SearchResult TextSearch::searchBackward( const OUString& searchStr, sal_Int32 st
else
{
if ( bReplaceApostrophe )
- in_str = in_str.replace(u'\u2019', '\'');
+ in_str = replacePunctuation(in_str);
sres = (this->*fnBackward)( in_str, startPos, endPos );
}