diff options
author | Caolán McNamara <caolanm@redhat.com> | 2012-03-30 12:21:14 +0100 |
---|---|---|
committer | Caolán McNamara <caolanm@redhat.com> | 2012-03-30 14:07:21 +0100 |
commit | 417823d4c23100713813171865d70f5e2719ace2 (patch) | |
tree | 02021a89e8cc755957947916dfb51b18ce05d977 /sw | |
parent | 45e4f9e99b75ef8a125228bc42df7fb98af9d7bc (diff) |
count words in text with mixed script punctuation correctly
Diffstat (limited to 'sw')
-rw-r--r-- | sw/qa/core/swdoc-test.cxx | 89 | ||||
-rw-r--r-- | sw/source/core/txtnode/txtedt.cxx | 4 |
2 files changed, 76 insertions, 17 deletions
diff --git a/sw/qa/core/swdoc-test.cxx b/sw/qa/core/swdoc-test.cxx index d7d453f4f9c6..6106da20c34f 100644 --- a/sw/qa/core/swdoc-test.cxx +++ b/sw/qa/core/swdoc-test.cxx @@ -36,6 +36,8 @@ #include <tools/urlobj.hxx> #include <unotools/tempfile.hxx> +#include <editeng/langitem.hxx> + #include <sfx2/app.hxx> #include <sfx2/docfilt.hxx> #include <sfx2/docfile.hxx> @@ -214,7 +216,6 @@ void SwDocTest::testUserPerceivedCharCount() CPPUNIT_ASSERT_MESSAGE("Surrogate Pair should be counted as single character", nCount == 1); } -//See https://bugs.freedesktop.org/show_bug.cgi?id=40449 for motivation void SwDocTest::testSwScanner() { SwNodeIndex aIdx(m_pDoc->GetNodes().GetEndOfContent(), -1); @@ -224,26 +225,80 @@ void SwDocTest::testSwScanner() CPPUNIT_ASSERT_MESSAGE("Has Text Node", pTxtNode); + //See https://bugs.freedesktop.org/show_bug.cgi?id=40449 for motivation + //See https://bugs.freedesktop.org/show_bug.cgi?id=39365 for motivation //Use a temporary rtl::OUString as the arg, as that's the trouble behind //fdo#40449 and fdo#39365 - SwScanner aScanner(*pTxtNode, - rtl::OUString(RTL_CONSTASCII_USTRINGPARAM("Hello World")), - 0, 0, i18n::WordType::DICTIONARY_WORD, 0, - RTL_CONSTASCII_LENGTH("Hello World")); - - bool bFirstOk = aScanner.NextWord(); - CPPUNIT_ASSERT_MESSAGE("First Token", bFirstOk); - const rtl::OUString &rHello = aScanner.GetWord(); - CPPUNIT_ASSERT_MESSAGE("Should be Hello", - rHello.equalsAsciiL(RTL_CONSTASCII_STRINGPARAM("Hello"))); - - bool bSecondOk = aScanner.NextWord(); - CPPUNIT_ASSERT_MESSAGE("Second Token", bSecondOk); - const rtl::OUString &rWorld = aScanner.GetWord(); - CPPUNIT_ASSERT_MESSAGE("Should be World", - rWorld.equalsAsciiL(RTL_CONSTASCII_STRINGPARAM("World"))); + { + SwScanner aScanner(*pTxtNode, + rtl::OUString(RTL_CONSTASCII_USTRINGPARAM("Hello World")), + 0, 0, i18n::WordType::DICTIONARY_WORD, 0, + RTL_CONSTASCII_LENGTH("Hello World")); + + bool bFirstOk = aScanner.NextWord(); + CPPUNIT_ASSERT_MESSAGE("First Token", bFirstOk); + const rtl::OUString &rHello = aScanner.GetWord(); + CPPUNIT_ASSERT_MESSAGE("Should be Hello", + rHello.equalsAsciiL(RTL_CONSTASCII_STRINGPARAM("Hello"))); + + bool bSecondOk = aScanner.NextWord(); + CPPUNIT_ASSERT_MESSAGE("Second Token", bSecondOk); + const rtl::OUString &rWorld = aScanner.GetWord(); + CPPUNIT_ASSERT_MESSAGE("Should be World", + rWorld.equalsAsciiL(RTL_CONSTASCII_STRINGPARAM("World"))); + } + + //See https://www.libreoffice.org/bugzilla/show_bug.cgi?id=45271 for motivation + { + const sal_Unicode IDEOGRAPHICFULLSTOP_D[] = { 0x3002, 'D' }; + + m_pDoc->InsertString(aPaM, rtl::OUString(IDEOGRAPHICFULLSTOP_D, + SAL_N_ELEMENTS(IDEOGRAPHICFULLSTOP_D))); + + SvxLanguageItem aCJKLangItem( LANGUAGE_CHINESE_SIMPLIFIED, RES_CHRATR_CJK_LANGUAGE ); + SvxLanguageItem aWestLangItem( LANGUAGE_ENGLISH_US, RES_CHRATR_LANGUAGE ); + m_pDoc->InsertPoolItem(aPaM, aCJKLangItem, 0 ); + m_pDoc->InsertPoolItem(aPaM, aWestLangItem, 0 ); + + SwDocStat aDocStat; + pTxtNode->CountWords(aDocStat, 0, SAL_N_ELEMENTS(IDEOGRAPHICFULLSTOP_D)); + + CPPUNIT_ASSERT_MESSAGE("Should be 2", aDocStat.nChar == 2); + CPPUNIT_ASSERT_MESSAGE("Should be 2", aDocStat.nCharExcludingSpaces == 2); + } + + //See https://issues.apache.org/ooo/show_bug.cgi?id=89042 for motivation + { + SwDocStat aDocStat; + + const sal_Unicode aShouldBeThree[] = { + 0x0053, 0x0068, 0x006F, 0x0075, 0x006C, 0x0064, 0x0020, + 0x2018, 0x0062, 0x0065, 0x0020, 0x0074, 0x0068, 0x0072, + 0x0065, 0x0065, 0x2019 + }; + + m_pDoc->AppendTxtNode(*aPaM.GetPoint()); + m_pDoc->InsertString(aPaM, rtl::OUString(aShouldBeThree, SAL_N_ELEMENTS(aShouldBeThree))); + pTxtNode = aPaM.GetNode()->GetTxtNode(); + pTxtNode->CountWords(aDocStat, 0, SAL_N_ELEMENTS(aShouldBeThree)); + CPPUNIT_ASSERT_MESSAGE("Should be 3", aDocStat.nWord == 3); + + const sal_Unicode aShouldBeFive[] = { + 0x0046, 0x0072, 0x0065, 0x006E, 0x0063, 0x0068, 0x0020, + 0x00AB, 0x00A0, 0x0073, 0x0061, 0x0076, 0x006F, 0x0069, + 0x0072, 0x0020, 0x0063, 0x0061, 0x006C, 0x0063, 0x0075, + 0x006C, 0x0065, 0x0072, 0x00A0, 0x00BB + }; + + m_pDoc->AppendTxtNode(*aPaM.GetPoint()); + m_pDoc->InsertString(aPaM, rtl::OUString(aShouldBeFive, SAL_N_ELEMENTS(aShouldBeFive))); + pTxtNode = aPaM.GetNode()->GetTxtNode(); + pTxtNode->CountWords(aDocStat, 0, SAL_N_ELEMENTS(aShouldBeFive)); + CPPUNIT_ASSERT_MESSAGE("Should be 5", aDocStat.nWord == 5); + } } + //See https://bugs.freedesktop.org/show_bug.cgi?id=40599 for motivation void SwDocTest::testGraphicAnchorDeletion() { diff --git a/sw/source/core/txtnode/txtedt.cxx b/sw/source/core/txtnode/txtedt.cxx index 71749fdeb1bc..a186cc646857 100644 --- a/sw/source/core/txtnode/txtedt.cxx +++ b/sw/source/core/txtnode/txtedt.cxx @@ -723,6 +723,10 @@ sal_Bool SwScanner::NextWord() pBreakIt->GetLocale( aCurrLang ), nWordType, sal_True ); OSL_ENSURE( aBound.endPos >= aBound.startPos, "broken aBound result" ); + // we don't want to include preceeding text + if (aBound.startPos < nBegin) + aBound.startPos = nBegin; + //no word boundaries could be found if(aBound.endPos == aBound.startPos) return sal_False; |