diff options
-rw-r--r-- | i18npool/qa/cppunit/test_textsearch.cxx | 88 | ||||
-rw-r--r-- | i18npool/source/search/textsearch.cxx | 40 |
2 files changed, 120 insertions, 8 deletions
diff --git a/i18npool/qa/cppunit/test_textsearch.cxx b/i18npool/qa/cppunit/test_textsearch.cxx index 269380a90910..38cc099b7c95 100644 --- a/i18npool/qa/cppunit/test_textsearch.cxx +++ b/i18npool/qa/cppunit/test_textsearch.cxx @@ -38,6 +38,7 @@ public: void testSearches(); void testWildcardSearch(); void testApostropheSearch(); + void testQuotationMarkSearch(); void testTdf138410(); CPPUNIT_TEST_SUITE(TestTextSearch); @@ -45,6 +46,7 @@ public: CPPUNIT_TEST(testSearches); CPPUNIT_TEST(testWildcardSearch); CPPUNIT_TEST(testApostropheSearch); + CPPUNIT_TEST(testQuotationMarkSearch); CPPUNIT_TEST(testTdf138410); CPPUNIT_TEST_SUITE_END(); private: @@ -404,6 +406,92 @@ void TestTextSearch::testApostropheSearch() CPPUNIT_ASSERT( aRes.subRegExpressions > 0 ); } +void TestTextSearch::testQuotationMarkSearch() +{ + // A) find typographic quotation marks also by using ASCII ones + OUString str( u"“x”, „y‟, ‘z’, ‚a‛"_ustr ); + sal_Int32 startPos = 0, endPos = str.getLength(); + + // set options + util::SearchOptions aOptions; + aOptions.algorithmType = util::SearchAlgorithms_ABSOLUTE; + aOptions.searchFlag = util::SearchFlags::ALL_IGNORE_CASE; + aOptions.searchString = "\"x\""; + aOptions.transliterateFlags = static_cast<int>(TransliterationFlags::IGNORE_CASE + | TransliterationFlags::IGNORE_WIDTH); + m_xSearch->setOptions( aOptions ); + + util::SearchResult aRes; + + // search forward + aRes = m_xSearch->searchForward( str, startPos, endPos ); + // This was 0. + CPPUNIT_ASSERT( aRes.subRegExpressions > 0 ); + CPPUNIT_ASSERT_EQUAL( static_cast<sal_Int32>(0), aRes.startOffset[0] ); + CPPUNIT_ASSERT_EQUAL( static_cast<sal_Int32>(3), aRes.endOffset[0] ); + + // search backwards + aRes = m_xSearch->searchBackward( str, endPos, startPos ); + // This was 0. + CPPUNIT_ASSERT( aRes.subRegExpressions > 0 ); + CPPUNIT_ASSERT_EQUAL( static_cast<sal_Int32>(3), aRes.startOffset[0] ); + CPPUNIT_ASSERT_EQUAL( static_cast<sal_Int32>(0), aRes.endOffset[0] ); + + // B) + aOptions.searchString = "\"y\""; + m_xSearch->setOptions( aOptions ); + + // search forward + aRes = m_xSearch->searchForward( str, startPos, endPos ); + // This was 0. + CPPUNIT_ASSERT( aRes.subRegExpressions > 0 ); + CPPUNIT_ASSERT_EQUAL( static_cast<sal_Int32>(5), aRes.startOffset[0] ); + CPPUNIT_ASSERT_EQUAL( static_cast<sal_Int32>(8), aRes.endOffset[0] ); + + // search backwards + aRes = m_xSearch->searchBackward( str, endPos, startPos ); + // This was 0. + CPPUNIT_ASSERT( aRes.subRegExpressions > 0 ); + CPPUNIT_ASSERT_EQUAL( static_cast<sal_Int32>(8), aRes.startOffset[0] ); + CPPUNIT_ASSERT_EQUAL( static_cast<sal_Int32>(5), aRes.endOffset[0] ); + + // C) + aOptions.searchString = "'z'"; + m_xSearch->setOptions( aOptions ); + + // search forward + aRes = m_xSearch->searchForward( str, startPos, endPos ); + // This was 0. + CPPUNIT_ASSERT( aRes.subRegExpressions > 0 ); + CPPUNIT_ASSERT_EQUAL( static_cast<sal_Int32>(10), aRes.startOffset[0] ); + CPPUNIT_ASSERT_EQUAL( static_cast<sal_Int32>(13), aRes.endOffset[0] ); + + // search backwards + aRes = m_xSearch->searchBackward( str, endPos, startPos ); + // This was 0. + CPPUNIT_ASSERT( aRes.subRegExpressions > 0 ); + CPPUNIT_ASSERT_EQUAL( static_cast<sal_Int32>(13), aRes.startOffset[0] ); + CPPUNIT_ASSERT_EQUAL( static_cast<sal_Int32>(10), aRes.endOffset[0] ); + + // D) + aOptions.searchString = "'a'"; + m_xSearch->setOptions( aOptions ); + + // search forward + aRes = m_xSearch->searchForward( str, startPos, endPos ); + // This was 0. + CPPUNIT_ASSERT( aRes.subRegExpressions > 0 ); + CPPUNIT_ASSERT_EQUAL( static_cast<sal_Int32>(15), aRes.startOffset[0] ); + CPPUNIT_ASSERT_EQUAL( static_cast<sal_Int32>(18), aRes.endOffset[0] ); + + // search backwards + aRes = m_xSearch->searchBackward( str, endPos, startPos ); + // This was 0. + CPPUNIT_ASSERT( aRes.subRegExpressions > 0 ); + CPPUNIT_ASSERT_EQUAL( static_cast<sal_Int32>(18), aRes.startOffset[0] ); + CPPUNIT_ASSERT_EQUAL( static_cast<sal_Int32>(15), aRes.endOffset[0] ); +} + void TestTextSearch::testTdf138410() { OUString str(u"\u0643\u064f\u062a\u064f\u0628 \u0643\u062a\u0628"_ustr); diff --git a/i18npool/source/search/textsearch.cxx b/i18npool/source/search/textsearch.cxx index 816e162c1e6e..dbb49f494781 100644 --- a/i18npool/source/search/textsearch.cxx +++ b/i18npool/source/search/textsearch.cxx @@ -93,6 +93,30 @@ bool isSimpleRegexTrans( TransliterationFlags n ) { return bool(maskSimpleRegexTrans(n)); } + +bool isReplacePunctuation( OUString &rStr ) +{ + return rStr.indexOf(u'\u2018') > -1 || + rStr.indexOf(u'\u2019') > -1 || + rStr.indexOf(u'\u201A') > -1 || + rStr.indexOf(u'\u201B') > -1 || + rStr.indexOf(u'\u201C') > -1 || + rStr.indexOf(u'\u201D') > -1 || + rStr.indexOf(u'\u201E') > -1 || + rStr.indexOf(u'\u201F') > -1; +} + +OUString replacePunctuation( OUString &rStr ) +{ + return rStr.replace(u'\u2018', '\'') + .replace(u'\u2019', '\'') + .replace(u'\u201A', '\'') + .replace(u'\u201B', '\'') + .replace(u'\u201C', '"') + .replace(u'\u201D', '"') + .replace(u'\u201E', '"') + .replace(u'\u201F', '"'); +} }; TextSearch::TextSearch(const Reference < XComponentContext > & rxContext) @@ -139,10 +163,10 @@ void TextSearch::setOptions2( const SearchOptions2& rOptions ) // match is not case-altered, leave case-(in)sensitive to regex engine. transliterateFlags &= ~TransliterationFlags::IGNORE_CASE; } - else if ( aSrchPara.searchString.indexOf('\'') > - 1 ) + else if ( aSrchPara.searchString.indexOf('\'') > - 1 || aSrchPara.searchString.indexOf('"') > - 1 ) { bSearchApostrophe = true; - bReplaceApostrophe = aSrchPara.searchString.indexOf(u'\u2019') > -1; + bReplaceApostrophe = isReplacePunctuation(aSrchPara.searchString); } // Create Transliteration class @@ -215,7 +239,7 @@ void TextSearch::setOptions2( const SearchOptions2& rOptions ) } if ( bReplaceApostrophe ) - sSrchStr = sSrchStr.replace(u'\u2019', '\''); + sSrchStr = replacePunctuation(sSrchStr); // Take the new SearchOptions2::AlgorithmType2 field and ignore // SearchOptions::algorithmType @@ -308,7 +332,7 @@ SearchResult TextSearch::searchForward( const OUString& searchStr, sal_Int32 sta // in non-regex mode, allow searching typographical apostrophe with the ASCII one // to avoid regression after using automatic conversion to U+2019 during typing in Writer - bool bReplaceApostrophe = bSearchApostrophe && in_str.indexOf(u'\u2019') > -1; + bool bReplaceApostrophe = bSearchApostrophe && isReplacePunctuation(in_str); bUsePrimarySrchStr = true; @@ -340,7 +364,7 @@ SearchResult TextSearch::searchForward( const OUString& searchStr, sal_Int32 sta in_str = xTranslit->transliterate(searchStr, nInStartPos, nInEndPos - nInStartPos, offset); if ( bReplaceApostrophe ) - in_str = in_str.replace(u'\u2019', '\''); + in_str = replacePunctuation(in_str); // JP 20.6.2001: also the start and end positions must be corrected! sal_Int32 newStartPos = @@ -447,7 +471,7 @@ SearchResult TextSearch::searchBackward( const OUString& searchStr, sal_Int32 st // in non-regex mode, allow searching typographical apostrophe with the ASCII one // to avoid regression after using automatic conversion to U+2019 during typing in Writer - bool bReplaceApostrophe = bSearchApostrophe && in_str.indexOf(u'\u2019') > -1; + bool bReplaceApostrophe = bSearchApostrophe && isReplacePunctuation(in_str); bUsePrimarySrchStr = true; @@ -458,7 +482,7 @@ SearchResult TextSearch::searchBackward( const OUString& searchStr, sal_Int32 st in_str = xTranslit->transliterate( searchStr, endPos, startPos - endPos, offset ); if ( bReplaceApostrophe ) - in_str = in_str.replace(u'\u2019', '\''); + in_str = replacePunctuation(in_str); // JP 20.6.2001: also the start and end positions must be corrected! sal_Int32 const newStartPos = (startPos < searchStr.getLength()) @@ -508,7 +532,7 @@ SearchResult TextSearch::searchBackward( const OUString& searchStr, sal_Int32 st else { if ( bReplaceApostrophe ) - in_str = in_str.replace(u'\u2019', '\''); + in_str = replacePunctuation(in_str); sres = (this->*fnBackward)( in_str, startPos, endPos ); } |