summaryrefslogtreecommitdiff
path: root/sw
diff options
context:
space:
mode:
authorCaolán McNamara <caolanm@redhat.com>2012-04-05 11:09:35 +0100
committerCaolán McNamara <caolanm@redhat.com>2012-04-05 13:59:07 +0100
commit01028864214a1b1ee6bf3f00fca142baf7b1d40c (patch)
tree03fe353f18740a59da87f55e0884afcda7e09405 /sw
parent53600134ec737d50808c0f630b536229ad3310bb (diff)
Resolves: fdo#45271, i#17964 count CJK words the way that's expected by users
Diffstat (limited to 'sw')
-rw-r--r--sw/inc/docstat.hxx1
-rw-r--r--sw/inc/ndtxt.hxx2
-rw-r--r--sw/qa/core/swdoc-test.cxx51
-rw-r--r--sw/source/core/doc/docstat.cxx2
-rw-r--r--sw/source/core/txtnode/txtedt.cxx76
-rw-r--r--sw/source/ui/dialog/wordcountdialog.cxx2
6 files changed, 124 insertions, 10 deletions
diff --git a/sw/inc/docstat.hxx b/sw/inc/docstat.hxx
index 554d506be414..1869120b15ee 100644
--- a/sw/inc/docstat.hxx
+++ b/sw/inc/docstat.hxx
@@ -43,6 +43,7 @@ struct SW_DLLPUBLIC SwDocStat
/// all paragraphs, including empty/hidden ones
sal_uLong nAllPara;
sal_uLong nWord;
+ sal_uLong nAsianWord;
sal_uLong nChar;
sal_uLong nCharExcludingSpaces;
sal_Bool bModified;
diff --git a/sw/inc/ndtxt.hxx b/sw/inc/ndtxt.hxx
index 21e9fb4aa71e..83b3e2f35a4a 100644
--- a/sw/inc/ndtxt.hxx
+++ b/sw/inc/ndtxt.hxx
@@ -173,6 +173,8 @@ class SW_DLLPUBLIC SwTxtNode: public SwCntntNode, public ::sfx2::Metadatable
//
SW_DLLPRIVATE void SetParaNumberOfWords( sal_uLong nTmpWords ) const;
SW_DLLPRIVATE sal_uLong GetParaNumberOfWords() const;
+ SW_DLLPRIVATE void SetParaNumberOfAsianWords( sal_uLong nTmpAsianWords ) const;
+ SW_DLLPRIVATE sal_uLong GetParaNumberOfAsianWords() const;
SW_DLLPRIVATE void SetParaNumberOfChars( sal_uLong nTmpChars ) const;
SW_DLLPRIVATE sal_uLong GetParaNumberOfChars() const;
SW_DLLPRIVATE void SetParaNumberOfCharsExcludingSpaces( sal_uLong nTmpChars ) const;
diff --git a/sw/qa/core/swdoc-test.cxx b/sw/qa/core/swdoc-test.cxx
index 72c5ad32e276..8469c0d38c7c 100644
--- a/sw/qa/core/swdoc-test.cxx
+++ b/sw/qa/core/swdoc-test.cxx
@@ -113,7 +113,7 @@ void SwDocTest::testPageDescName()
CPPUNIT_ASSERT_MESSAGE("GetPageDescName results must be unique", aResults.size() == 3);
}
-//See https://bugs.freedesktop.org/show_bug.cgi?id=32463 for motivation
+//See https://bugs.freedesktop.org/show_bug.cgi?id=32463
void SwDocTest::testFileNameFields()
{
//Here's a file name with some chars in it that will be %% encoded, when expanding
@@ -225,8 +225,8 @@ void SwDocTest::testSwScanner()
CPPUNIT_ASSERT_MESSAGE("Has Text Node", pTxtNode);
- //See https://bugs.freedesktop.org/show_bug.cgi?id=40449 for motivation
- //See https://bugs.freedesktop.org/show_bug.cgi?id=39365 for motivation
+ //See https://bugs.freedesktop.org/show_bug.cgi?id=40449
+ //See https://bugs.freedesktop.org/show_bug.cgi?id=39365
//Use a temporary rtl::OUString as the arg, as that's the trouble behind
//fdo#40449 and fdo#39365
{
@@ -248,7 +248,7 @@ void SwDocTest::testSwScanner()
rWorld.equalsAsciiL(RTL_CONSTASCII_STRINGPARAM("World")));
}
- //See https://www.libreoffice.org/bugzilla/show_bug.cgi?id=45271 for motivation
+ //See https://www.libreoffice.org/bugzilla/show_bug.cgi?id=45271
{
const sal_Unicode IDEOGRAPHICFULLSTOP_D[] = { 0x3002, 'D' };
@@ -261,13 +261,51 @@ void SwDocTest::testSwScanner()
m_pDoc->InsertPoolItem(aPaM, aWestLangItem, 0 );
SwDocStat aDocStat;
+ pTxtNode = aPaM.GetNode()->GetTxtNode();
pTxtNode->CountWords(aDocStat, 0, SAL_N_ELEMENTS(IDEOGRAPHICFULLSTOP_D));
CPPUNIT_ASSERT_MESSAGE("Should be 2", aDocStat.nChar == 2);
CPPUNIT_ASSERT_MESSAGE("Should be 2", aDocStat.nCharExcludingSpaces == 2);
}
+ {
+ const sal_Unicode test[] =
+ {
+ 0x3053, 0x306E, 0x65E5, 0x672C, 0x8A9E, 0x306F, 0x6B63, 0x3057,
+ 0x304F, 0x6570, 0x3048, 0x3089, 0x308C, 0x308B, 0x3067, 0x3057,
+ 0x3087, 0x3046, 0x304B, 0x3002, 0x0041, 0x006E, 0x0064, 0x0020,
+ 0x006C, 0x0065, 0x0074, 0x0027, 0x0073, 0x0020, 0x0074, 0x0068,
+ 0x0072, 0x006F, 0x0077, 0x0020, 0x0073, 0x006F, 0x006D, 0x0065,
+ 0x0020, 0x0045, 0x006E, 0x0067, 0x006C, 0x0069, 0x0073, 0x0068,
+ 0x0020, 0x0069, 0x006E, 0x0020, 0x0074, 0x006F, 0x0020, 0x006D,
+ 0x0061, 0x006B, 0x0065, 0x0020, 0x0069, 0x0074, 0x0020, 0x0069,
+ 0x006E, 0x0074, 0x0065, 0x0072, 0x0065, 0x0073, 0x0074, 0x0069,
+ 0x006E, 0x0067, 0x002E, 0x0020, 0x0020, 0x305D, 0x3057, 0x3066,
+ 0x3001, 0x307E, 0x305F, 0x65E5, 0x672C, 0x8A9E, 0x3000, 0x3000,
+ 0x3067, 0x3082, 0x4ECA, 0x56DE, 0x306F, 0x7A7A, 0x767D, 0x3092,
+ 0x3000, 0x3000, 0x5165, 0x308C, 0x307E, 0x3057, 0x305F, 0x3002,
+ 0x0020, 0x0020, 0x0053, 0x006F, 0x0020, 0x0068, 0x006F, 0x0077,
+ 0x0020, 0x0064, 0x006F, 0x0065, 0x0073, 0x0020, 0x0074, 0x0068,
+ 0x0069, 0x0073, 0x0020, 0x0064, 0x006F, 0x003F, 0x0020, 0x0020
+ };
+ m_pDoc->AppendTxtNode(*aPaM.GetPoint());
+ m_pDoc->InsertString(aPaM, rtl::OUString(test,
+ SAL_N_ELEMENTS(test)));
+
+ SvxLanguageItem aCJKLangItem( LANGUAGE_JAPANESE, RES_CHRATR_CJK_LANGUAGE );
+ SvxLanguageItem aWestLangItem( LANGUAGE_ENGLISH_US, RES_CHRATR_LANGUAGE );
+ m_pDoc->InsertPoolItem(aPaM, aCJKLangItem, 0 );
+ m_pDoc->InsertPoolItem(aPaM, aWestLangItem, 0 );
+
+ SwDocStat aDocStat;
+ pTxtNode = aPaM.GetNode()->GetTxtNode();
+ pTxtNode->CountWords(aDocStat, 0, SAL_N_ELEMENTS(test));
+ CPPUNIT_ASSERT_MESSAGE("58 words", aDocStat.nWord == 58);
+ CPPUNIT_ASSERT_MESSAGE("43 Asian characters and Korean syllables", aDocStat.nAsianWord == 43);
+ CPPUNIT_ASSERT_MESSAGE("105 non-whitespace chars", aDocStat.nCharExcludingSpaces == 105);
+ CPPUNIT_ASSERT_MESSAGE("128 characters", aDocStat.nChar == 128);
+ }
- //See https://issues.apache.org/ooo/show_bug.cgi?id=89042 for motivation
+ //See https://issues.apache.org/ooo/show_bug.cgi?id=89042
{
SwDocStat aDocStat;
@@ -298,8 +336,7 @@ void SwDocTest::testSwScanner()
}
}
-
-//See https://bugs.freedesktop.org/show_bug.cgi?id=40599 for motivation
+//See https://bugs.freedesktop.org/show_bug.cgi?id=40599
void SwDocTest::testGraphicAnchorDeletion()
{
CPPUNIT_ASSERT_MESSAGE("Expected initial 0 count", m_pDoc->GetDocStat().nChar == 0);
diff --git a/sw/source/core/doc/docstat.cxx b/sw/source/core/doc/docstat.cxx
index c84f943b8516..fd0c6e65aa57 100644
--- a/sw/source/core/doc/docstat.cxx
+++ b/sw/source/core/doc/docstat.cxx
@@ -43,6 +43,7 @@ SwDocStat::SwDocStat() :
nPara(1),
nAllPara(1),
nWord(0),
+ nAsianWord(0),
nChar(0),
nCharExcludingSpaces(0),
bModified(sal_True)
@@ -61,6 +62,7 @@ void SwDocStat::Reset()
nPara = 1;
nAllPara= 1;
nWord = 0;
+ nAsianWord = 0;
nChar = 0;
nCharExcludingSpaces = 0;
bModified = sal_True;
diff --git a/sw/source/core/txtnode/txtedt.cxx b/sw/source/core/txtnode/txtedt.cxx
index a186cc646857..cb5e4724fe8c 100644
--- a/sw/source/core/txtnode/txtedt.cxx
+++ b/sw/source/core/txtnode/txtedt.cxx
@@ -680,6 +680,44 @@ SwScanner::SwScanner( const SwTxtNode& rNd, const rtl::OUString& rTxt,
}
}
+namespace
+{
+ //fdo#45271 for Asian words count characters instead of words
+ sal_Int32 forceEachAsianCodePointToWord(const rtl::OUString &rText, sal_Int32 nBegin, sal_Int32 nLen)
+ {
+ if (nLen > 1)
+ {
+ const uno::Reference< XBreakIterator > &rxBreak = pBreakIt->GetBreakIter();
+
+ sal_uInt16 nCurrScript = rxBreak->getScriptType( rText, nBegin );
+
+ sal_Int32 indexUtf16 = nBegin;
+ rText.iterateCodePoints(&indexUtf16, 1);
+
+ //First character is Asian, consider it a word :-(
+ if (nCurrScript == i18n::ScriptType::ASIAN)
+ {
+ nLen = indexUtf16 - nBegin;
+ return nLen;
+ }
+
+ //First character was not Asian, consider appearance of any Asian character
+ //to be the end of the word
+ while (indexUtf16 < nBegin + nLen)
+ {
+ nCurrScript = rxBreak->getScriptType( rText, indexUtf16 );
+ if (nCurrScript == i18n::ScriptType::ASIAN)
+ {
+ nLen = indexUtf16 - nBegin;
+ return nLen;
+ }
+ rText.iterateCodePoints(&indexUtf16, 1);
+ }
+ }
+ return nLen;
+ }
+}
+
sal_Bool SwScanner::NextWord()
{
nBegin = nBegin + nLen;
@@ -802,6 +840,9 @@ sal_Bool SwScanner::NextWord()
if( ! nLen )
return sal_False;
+ if ( nWordType == i18n::WordType::WORD_COUNT )
+ nLen = forceEachAsianCodePointToWord(aText, nBegin, nLen);
+
aWord = aText.copy( nBegin, nLen );
return sal_True;
@@ -1812,6 +1853,7 @@ void SwTxtNode::CountWords( SwDocStat& rStat,
{
// accumulate into DocStat record to return the values
rStat.nWord += GetParaNumberOfWords();
+ rStat.nAsianWord += GetParaNumberOfAsianWords();
rStat.nChar += GetParaNumberOfChars();
rStat.nCharExcludingSpaces += GetParaNumberOfCharsExcludingSpaces();
return;
@@ -1842,7 +1884,8 @@ void SwTxtNode::CountWords( SwDocStat& rStat,
// all counts exclude hidden paras and hidden+redlined within para
// definition of space/white chars in SwScanner (and BreakIter!)
// uses both lcl_IsSkippableWhiteSpace and BreakIter getWordBoundary in SwScanner
- sal_uInt32 nTmpWords = 0; // count of all contiguous blocks of non-white chars
+ sal_uInt32 nTmpWords = 0; // count of all words
+ sal_uInt32 nTmpAsianWords = 0; //count of all Asian codepoints
sal_uInt32 nTmpChars = 0; // count of all chars
sal_uInt32 nTmpCharsExcludingSpaces = 0; // all non-white chars
@@ -1862,7 +1905,10 @@ void SwTxtNode::CountWords( SwDocStat& rStat,
if( 1 != aExpandText.match(aBreakWord, aScanner.GetBegin() ))
{
++nTmpWords;
- nTmpCharsExcludingSpaces += pBreakIt->getGraphemeCount(aScanner.GetWord());
+ const rtl::OUString &rWord = aScanner.GetWord();
+ if (pBreakIt->GetBreakIter()->getScriptType(rWord, 0) == i18n::ScriptType::ASIAN)
+ ++nTmpAsianWords;
+ nTmpCharsExcludingSpaces += pBreakIt->getGraphemeCount(rWord);
}
}
}
@@ -1890,7 +1936,10 @@ void SwTxtNode::CountWords( SwDocStat& rStat,
while ( aScanner.NextWord() )
{
++nTmpWords;
- nTmpCharsExcludingSpaces += pBreakIt->getGraphemeCount(aScanner.GetWord());
+ const rtl::OUString &rWord = aScanner.GetWord();
+ if (pBreakIt->GetBreakIter()->getScriptType(rWord, 0) == i18n::ScriptType::ASIAN)
+ ++nTmpAsianWords;
+ nTmpCharsExcludingSpaces += pBreakIt->getGraphemeCount(rWord);
}
nTmpChars = pBreakIt->getGraphemeCount(aNumString);
@@ -1909,12 +1958,14 @@ void SwTxtNode::CountWords( SwDocStat& rStat,
if ( isCountAll )
{
SetParaNumberOfWords( nTmpWords );
+ SetParaNumberOfAsianWords( nTmpAsianWords );
SetParaNumberOfChars( nTmpChars );
SetParaNumberOfCharsExcludingSpaces( nTmpCharsExcludingSpaces );
SetWordCountDirty( false );
}
// accumulate into DocStat record to return the values
rStat.nWord += nTmpWords;
+ rStat.nAsianWord += nTmpAsianWords;
rStat.nChar += nTmpChars;
rStat.nCharExcludingSpaces += nTmpCharsExcludingSpaces;
}
@@ -1928,6 +1979,7 @@ struct SwParaIdleData_Impl
SwGrammarMarkUp* pGrammarCheck; // for grammar checking / proof reading
SwWrongList* pSmartTags;
sal_uLong nNumberOfWords;
+ sal_uLong nNumberOfAsianWords;
sal_uLong nNumberOfChars;
sal_uLong nNumberOfCharsExcludingSpaces;
bool bWordCountDirty;
@@ -1941,6 +1993,7 @@ struct SwParaIdleData_Impl
pGrammarCheck ( 0 ),
pSmartTags ( 0 ),
nNumberOfWords ( 0 ),
+ nNumberOfAsianWords ( 0 ),
nNumberOfChars ( 0 ),
nNumberOfCharsExcludingSpaces ( 0 ),
bWordCountDirty ( true ),
@@ -2033,10 +2086,25 @@ void SwTxtNode::SetParaNumberOfWords( sal_uLong nNew ) const
m_pParaIdleData_Impl->nNumberOfWords = nNew;
}
}
+
sal_uLong SwTxtNode::GetParaNumberOfWords() const
{
return m_pParaIdleData_Impl ? m_pParaIdleData_Impl->nNumberOfWords : 0;
}
+
+void SwTxtNode::SetParaNumberOfAsianWords( sal_uLong nNew ) const
+{
+ if ( m_pParaIdleData_Impl )
+ {
+ m_pParaIdleData_Impl->nNumberOfAsianWords = nNew;
+ }
+}
+
+sal_uLong SwTxtNode::GetParaNumberOfAsianWords() const
+{
+ return m_pParaIdleData_Impl ? m_pParaIdleData_Impl->nNumberOfAsianWords : 0;
+}
+
void SwTxtNode::SetParaNumberOfChars( sal_uLong nNew ) const
{
if ( m_pParaIdleData_Impl )
@@ -2044,10 +2112,12 @@ void SwTxtNode::SetParaNumberOfChars( sal_uLong nNew ) const
m_pParaIdleData_Impl->nNumberOfChars = nNew;
}
}
+
sal_uLong SwTxtNode::GetParaNumberOfChars() const
{
return m_pParaIdleData_Impl ? m_pParaIdleData_Impl->nNumberOfChars : 0;
}
+
void SwTxtNode::SetWordCountDirty( bool bNew ) const
{
if ( m_pParaIdleData_Impl )
diff --git a/sw/source/ui/dialog/wordcountdialog.cxx b/sw/source/ui/dialog/wordcountdialog.cxx
index 3fdc8958ddc0..f5b1c3b98824 100644
--- a/sw/source/ui/dialog/wordcountdialog.cxx
+++ b/sw/source/ui/dialog/wordcountdialog.cxx
@@ -42,6 +42,8 @@
#include <swwait.hxx>
#include <wrtsh.hxx>
+//TODO, add asian/non-asian word count to UI when CJK mode is enabled.
+
SwWordCountDialog::SwWordCountDialog(Window* pParent) :
#if defined _MSC_VER
#pragma warning (disable : 4355)