diff options
author | Caolán McNamara <caolanm@redhat.com> | 2012-04-05 11:09:35 +0100 |
---|---|---|
committer | Caolán McNamara <caolanm@redhat.com> | 2012-04-05 13:59:07 +0100 |
commit | 01028864214a1b1ee6bf3f00fca142baf7b1d40c (patch) | |
tree | 03fe353f18740a59da87f55e0884afcda7e09405 | |
parent | 53600134ec737d50808c0f630b536229ad3310bb (diff) |
Resolves: fdo#45271, i#17964 count CJK words the way that's expected by users
-rw-r--r-- | sw/inc/docstat.hxx | 1 | ||||
-rw-r--r-- | sw/inc/ndtxt.hxx | 2 | ||||
-rw-r--r-- | sw/qa/core/swdoc-test.cxx | 51 | ||||
-rw-r--r-- | sw/source/core/doc/docstat.cxx | 2 | ||||
-rw-r--r-- | sw/source/core/txtnode/txtedt.cxx | 76 | ||||
-rw-r--r-- | sw/source/ui/dialog/wordcountdialog.cxx | 2 |
6 files changed, 124 insertions, 10 deletions
diff --git a/sw/inc/docstat.hxx b/sw/inc/docstat.hxx index 554d506be414..1869120b15ee 100644 --- a/sw/inc/docstat.hxx +++ b/sw/inc/docstat.hxx @@ -43,6 +43,7 @@ struct SW_DLLPUBLIC SwDocStat /// all paragraphs, including empty/hidden ones sal_uLong nAllPara; sal_uLong nWord; + sal_uLong nAsianWord; sal_uLong nChar; sal_uLong nCharExcludingSpaces; sal_Bool bModified; diff --git a/sw/inc/ndtxt.hxx b/sw/inc/ndtxt.hxx index 21e9fb4aa71e..83b3e2f35a4a 100644 --- a/sw/inc/ndtxt.hxx +++ b/sw/inc/ndtxt.hxx @@ -173,6 +173,8 @@ class SW_DLLPUBLIC SwTxtNode: public SwCntntNode, public ::sfx2::Metadatable // SW_DLLPRIVATE void SetParaNumberOfWords( sal_uLong nTmpWords ) const; SW_DLLPRIVATE sal_uLong GetParaNumberOfWords() const; + SW_DLLPRIVATE void SetParaNumberOfAsianWords( sal_uLong nTmpAsianWords ) const; + SW_DLLPRIVATE sal_uLong GetParaNumberOfAsianWords() const; SW_DLLPRIVATE void SetParaNumberOfChars( sal_uLong nTmpChars ) const; SW_DLLPRIVATE sal_uLong GetParaNumberOfChars() const; SW_DLLPRIVATE void SetParaNumberOfCharsExcludingSpaces( sal_uLong nTmpChars ) const; diff --git a/sw/qa/core/swdoc-test.cxx b/sw/qa/core/swdoc-test.cxx index 72c5ad32e276..8469c0d38c7c 100644 --- a/sw/qa/core/swdoc-test.cxx +++ b/sw/qa/core/swdoc-test.cxx @@ -113,7 +113,7 @@ void SwDocTest::testPageDescName() CPPUNIT_ASSERT_MESSAGE("GetPageDescName results must be unique", aResults.size() == 3); } -//See https://bugs.freedesktop.org/show_bug.cgi?id=32463 for motivation +//See https://bugs.freedesktop.org/show_bug.cgi?id=32463 void SwDocTest::testFileNameFields() { //Here's a file name with some chars in it that will be %% encoded, when expanding @@ -225,8 +225,8 @@ void SwDocTest::testSwScanner() CPPUNIT_ASSERT_MESSAGE("Has Text Node", pTxtNode); - //See https://bugs.freedesktop.org/show_bug.cgi?id=40449 for motivation - //See https://bugs.freedesktop.org/show_bug.cgi?id=39365 for motivation + //See https://bugs.freedesktop.org/show_bug.cgi?id=40449 + //See https://bugs.freedesktop.org/show_bug.cgi?id=39365 //Use a temporary rtl::OUString as the arg, as that's the trouble behind //fdo#40449 and fdo#39365 { @@ -248,7 +248,7 @@ void SwDocTest::testSwScanner() rWorld.equalsAsciiL(RTL_CONSTASCII_STRINGPARAM("World"))); } - //See https://www.libreoffice.org/bugzilla/show_bug.cgi?id=45271 for motivation + //See https://www.libreoffice.org/bugzilla/show_bug.cgi?id=45271 { const sal_Unicode IDEOGRAPHICFULLSTOP_D[] = { 0x3002, 'D' }; @@ -261,13 +261,51 @@ void SwDocTest::testSwScanner() m_pDoc->InsertPoolItem(aPaM, aWestLangItem, 0 ); SwDocStat aDocStat; + pTxtNode = aPaM.GetNode()->GetTxtNode(); pTxtNode->CountWords(aDocStat, 0, SAL_N_ELEMENTS(IDEOGRAPHICFULLSTOP_D)); CPPUNIT_ASSERT_MESSAGE("Should be 2", aDocStat.nChar == 2); CPPUNIT_ASSERT_MESSAGE("Should be 2", aDocStat.nCharExcludingSpaces == 2); } + { + const sal_Unicode test[] = + { + 0x3053, 0x306E, 0x65E5, 0x672C, 0x8A9E, 0x306F, 0x6B63, 0x3057, + 0x304F, 0x6570, 0x3048, 0x3089, 0x308C, 0x308B, 0x3067, 0x3057, + 0x3087, 0x3046, 0x304B, 0x3002, 0x0041, 0x006E, 0x0064, 0x0020, + 0x006C, 0x0065, 0x0074, 0x0027, 0x0073, 0x0020, 0x0074, 0x0068, + 0x0072, 0x006F, 0x0077, 0x0020, 0x0073, 0x006F, 0x006D, 0x0065, + 0x0020, 0x0045, 0x006E, 0x0067, 0x006C, 0x0069, 0x0073, 0x0068, + 0x0020, 0x0069, 0x006E, 0x0020, 0x0074, 0x006F, 0x0020, 0x006D, + 0x0061, 0x006B, 0x0065, 0x0020, 0x0069, 0x0074, 0x0020, 0x0069, + 0x006E, 0x0074, 0x0065, 0x0072, 0x0065, 0x0073, 0x0074, 0x0069, + 0x006E, 0x0067, 0x002E, 0x0020, 0x0020, 0x305D, 0x3057, 0x3066, + 0x3001, 0x307E, 0x305F, 0x65E5, 0x672C, 0x8A9E, 0x3000, 0x3000, + 0x3067, 0x3082, 0x4ECA, 0x56DE, 0x306F, 0x7A7A, 0x767D, 0x3092, + 0x3000, 0x3000, 0x5165, 0x308C, 0x307E, 0x3057, 0x305F, 0x3002, + 0x0020, 0x0020, 0x0053, 0x006F, 0x0020, 0x0068, 0x006F, 0x0077, + 0x0020, 0x0064, 0x006F, 0x0065, 0x0073, 0x0020, 0x0074, 0x0068, + 0x0069, 0x0073, 0x0020, 0x0064, 0x006F, 0x003F, 0x0020, 0x0020 + }; + m_pDoc->AppendTxtNode(*aPaM.GetPoint()); + m_pDoc->InsertString(aPaM, rtl::OUString(test, + SAL_N_ELEMENTS(test))); + + SvxLanguageItem aCJKLangItem( LANGUAGE_JAPANESE, RES_CHRATR_CJK_LANGUAGE ); + SvxLanguageItem aWestLangItem( LANGUAGE_ENGLISH_US, RES_CHRATR_LANGUAGE ); + m_pDoc->InsertPoolItem(aPaM, aCJKLangItem, 0 ); + m_pDoc->InsertPoolItem(aPaM, aWestLangItem, 0 ); + + SwDocStat aDocStat; + pTxtNode = aPaM.GetNode()->GetTxtNode(); + pTxtNode->CountWords(aDocStat, 0, SAL_N_ELEMENTS(test)); + CPPUNIT_ASSERT_MESSAGE("58 words", aDocStat.nWord == 58); + CPPUNIT_ASSERT_MESSAGE("43 Asian characters and Korean syllables", aDocStat.nAsianWord == 43); + CPPUNIT_ASSERT_MESSAGE("105 non-whitespace chars", aDocStat.nCharExcludingSpaces == 105); + CPPUNIT_ASSERT_MESSAGE("128 characters", aDocStat.nChar == 128); + } - //See https://issues.apache.org/ooo/show_bug.cgi?id=89042 for motivation + //See https://issues.apache.org/ooo/show_bug.cgi?id=89042 { SwDocStat aDocStat; @@ -298,8 +336,7 @@ void SwDocTest::testSwScanner() } } - -//See https://bugs.freedesktop.org/show_bug.cgi?id=40599 for motivation +//See https://bugs.freedesktop.org/show_bug.cgi?id=40599 void SwDocTest::testGraphicAnchorDeletion() { CPPUNIT_ASSERT_MESSAGE("Expected initial 0 count", m_pDoc->GetDocStat().nChar == 0); diff --git a/sw/source/core/doc/docstat.cxx b/sw/source/core/doc/docstat.cxx index c84f943b8516..fd0c6e65aa57 100644 --- a/sw/source/core/doc/docstat.cxx +++ b/sw/source/core/doc/docstat.cxx @@ -43,6 +43,7 @@ SwDocStat::SwDocStat() : nPara(1), nAllPara(1), nWord(0), + nAsianWord(0), nChar(0), nCharExcludingSpaces(0), bModified(sal_True) @@ -61,6 +62,7 @@ void SwDocStat::Reset() nPara = 1; nAllPara= 1; nWord = 0; + nAsianWord = 0; nChar = 0; nCharExcludingSpaces = 0; bModified = sal_True; diff --git a/sw/source/core/txtnode/txtedt.cxx b/sw/source/core/txtnode/txtedt.cxx index a186cc646857..cb5e4724fe8c 100644 --- a/sw/source/core/txtnode/txtedt.cxx +++ b/sw/source/core/txtnode/txtedt.cxx @@ -680,6 +680,44 @@ SwScanner::SwScanner( const SwTxtNode& rNd, const rtl::OUString& rTxt, } } +namespace +{ + //fdo#45271 for Asian words count characters instead of words + sal_Int32 forceEachAsianCodePointToWord(const rtl::OUString &rText, sal_Int32 nBegin, sal_Int32 nLen) + { + if (nLen > 1) + { + const uno::Reference< XBreakIterator > &rxBreak = pBreakIt->GetBreakIter(); + + sal_uInt16 nCurrScript = rxBreak->getScriptType( rText, nBegin ); + + sal_Int32 indexUtf16 = nBegin; + rText.iterateCodePoints(&indexUtf16, 1); + + //First character is Asian, consider it a word :-( + if (nCurrScript == i18n::ScriptType::ASIAN) + { + nLen = indexUtf16 - nBegin; + return nLen; + } + + //First character was not Asian, consider appearance of any Asian character + //to be the end of the word + while (indexUtf16 < nBegin + nLen) + { + nCurrScript = rxBreak->getScriptType( rText, indexUtf16 ); + if (nCurrScript == i18n::ScriptType::ASIAN) + { + nLen = indexUtf16 - nBegin; + return nLen; + } + rText.iterateCodePoints(&indexUtf16, 1); + } + } + return nLen; + } +} + sal_Bool SwScanner::NextWord() { nBegin = nBegin + nLen; @@ -802,6 +840,9 @@ sal_Bool SwScanner::NextWord() if( ! nLen ) return sal_False; + if ( nWordType == i18n::WordType::WORD_COUNT ) + nLen = forceEachAsianCodePointToWord(aText, nBegin, nLen); + aWord = aText.copy( nBegin, nLen ); return sal_True; @@ -1812,6 +1853,7 @@ void SwTxtNode::CountWords( SwDocStat& rStat, { // accumulate into DocStat record to return the values rStat.nWord += GetParaNumberOfWords(); + rStat.nAsianWord += GetParaNumberOfAsianWords(); rStat.nChar += GetParaNumberOfChars(); rStat.nCharExcludingSpaces += GetParaNumberOfCharsExcludingSpaces(); return; @@ -1842,7 +1884,8 @@ void SwTxtNode::CountWords( SwDocStat& rStat, // all counts exclude hidden paras and hidden+redlined within para // definition of space/white chars in SwScanner (and BreakIter!) // uses both lcl_IsSkippableWhiteSpace and BreakIter getWordBoundary in SwScanner - sal_uInt32 nTmpWords = 0; // count of all contiguous blocks of non-white chars + sal_uInt32 nTmpWords = 0; // count of all words + sal_uInt32 nTmpAsianWords = 0; //count of all Asian codepoints sal_uInt32 nTmpChars = 0; // count of all chars sal_uInt32 nTmpCharsExcludingSpaces = 0; // all non-white chars @@ -1862,7 +1905,10 @@ void SwTxtNode::CountWords( SwDocStat& rStat, if( 1 != aExpandText.match(aBreakWord, aScanner.GetBegin() )) { ++nTmpWords; - nTmpCharsExcludingSpaces += pBreakIt->getGraphemeCount(aScanner.GetWord()); + const rtl::OUString &rWord = aScanner.GetWord(); + if (pBreakIt->GetBreakIter()->getScriptType(rWord, 0) == i18n::ScriptType::ASIAN) + ++nTmpAsianWords; + nTmpCharsExcludingSpaces += pBreakIt->getGraphemeCount(rWord); } } } @@ -1890,7 +1936,10 @@ void SwTxtNode::CountWords( SwDocStat& rStat, while ( aScanner.NextWord() ) { ++nTmpWords; - nTmpCharsExcludingSpaces += pBreakIt->getGraphemeCount(aScanner.GetWord()); + const rtl::OUString &rWord = aScanner.GetWord(); + if (pBreakIt->GetBreakIter()->getScriptType(rWord, 0) == i18n::ScriptType::ASIAN) + ++nTmpAsianWords; + nTmpCharsExcludingSpaces += pBreakIt->getGraphemeCount(rWord); } nTmpChars = pBreakIt->getGraphemeCount(aNumString); @@ -1909,12 +1958,14 @@ void SwTxtNode::CountWords( SwDocStat& rStat, if ( isCountAll ) { SetParaNumberOfWords( nTmpWords ); + SetParaNumberOfAsianWords( nTmpAsianWords ); SetParaNumberOfChars( nTmpChars ); SetParaNumberOfCharsExcludingSpaces( nTmpCharsExcludingSpaces ); SetWordCountDirty( false ); } // accumulate into DocStat record to return the values rStat.nWord += nTmpWords; + rStat.nAsianWord += nTmpAsianWords; rStat.nChar += nTmpChars; rStat.nCharExcludingSpaces += nTmpCharsExcludingSpaces; } @@ -1928,6 +1979,7 @@ struct SwParaIdleData_Impl SwGrammarMarkUp* pGrammarCheck; // for grammar checking / proof reading SwWrongList* pSmartTags; sal_uLong nNumberOfWords; + sal_uLong nNumberOfAsianWords; sal_uLong nNumberOfChars; sal_uLong nNumberOfCharsExcludingSpaces; bool bWordCountDirty; @@ -1941,6 +1993,7 @@ struct SwParaIdleData_Impl pGrammarCheck ( 0 ), pSmartTags ( 0 ), nNumberOfWords ( 0 ), + nNumberOfAsianWords ( 0 ), nNumberOfChars ( 0 ), nNumberOfCharsExcludingSpaces ( 0 ), bWordCountDirty ( true ), @@ -2033,10 +2086,25 @@ void SwTxtNode::SetParaNumberOfWords( sal_uLong nNew ) const m_pParaIdleData_Impl->nNumberOfWords = nNew; } } + sal_uLong SwTxtNode::GetParaNumberOfWords() const { return m_pParaIdleData_Impl ? m_pParaIdleData_Impl->nNumberOfWords : 0; } + +void SwTxtNode::SetParaNumberOfAsianWords( sal_uLong nNew ) const +{ + if ( m_pParaIdleData_Impl ) + { + m_pParaIdleData_Impl->nNumberOfAsianWords = nNew; + } +} + +sal_uLong SwTxtNode::GetParaNumberOfAsianWords() const +{ + return m_pParaIdleData_Impl ? m_pParaIdleData_Impl->nNumberOfAsianWords : 0; +} + void SwTxtNode::SetParaNumberOfChars( sal_uLong nNew ) const { if ( m_pParaIdleData_Impl ) @@ -2044,10 +2112,12 @@ void SwTxtNode::SetParaNumberOfChars( sal_uLong nNew ) const m_pParaIdleData_Impl->nNumberOfChars = nNew; } } + sal_uLong SwTxtNode::GetParaNumberOfChars() const { return m_pParaIdleData_Impl ? m_pParaIdleData_Impl->nNumberOfChars : 0; } + void SwTxtNode::SetWordCountDirty( bool bNew ) const { if ( m_pParaIdleData_Impl ) diff --git a/sw/source/ui/dialog/wordcountdialog.cxx b/sw/source/ui/dialog/wordcountdialog.cxx index 3fdc8958ddc0..f5b1c3b98824 100644 --- a/sw/source/ui/dialog/wordcountdialog.cxx +++ b/sw/source/ui/dialog/wordcountdialog.cxx @@ -42,6 +42,8 @@ #include <swwait.hxx> #include <wrtsh.hxx> +//TODO, add asian/non-asian word count to UI when CJK mode is enabled. + SwWordCountDialog::SwWordCountDialog(Window* pParent) : #if defined _MSC_VER #pragma warning (disable : 4355) |