diff options
author | Caolán McNamara <caolanm@redhat.com> | 2012-08-28 17:10:35 +0100 |
---|---|---|
committer | Caolán McNamara <caolanm@redhat.com> | 2012-08-29 09:02:50 +0100 |
commit | 42a15f45ff4e02f98229de02efd0d8c19f10bcd5 (patch) | |
tree | 67031948d50d251825c1d05d5547a499a1c5e51b /sw | |
parent | 02f6e55231c8b1646cbafc0e3e591da8122e2bf1 (diff) |
Resolves: fdo#38983 allow extra word boundary characters
i.e. word overrides emdash and endash to be word boundary characters
for the purposes of counting words. And there are some who want
to treat =,- etc similarly.
Default to a configuration that gives the same results as Word for
word counting.
Change-Id: Ia8ce6ac12011a1d6e547f11644c76163c4c993c5
Diffstat (limited to 'sw')
-rw-r--r-- | sw/inc/swscanner.hxx | 6 | ||||
-rw-r--r-- | sw/qa/core/swdoc-test.cxx | 110 | ||||
-rw-r--r-- | sw/source/core/txtnode/txtedt.cxx | 79 | ||||
-rw-r--r-- | sw/source/ui/config/optload.cxx | 24 | ||||
-rw-r--r-- | sw/source/ui/config/optload.hrc | 5 | ||||
-rw-r--r-- | sw/source/ui/config/optload.src | 20 | ||||
-rw-r--r-- | sw/source/ui/inc/optload.hxx | 3 |
7 files changed, 219 insertions, 28 deletions
diff --git a/sw/inc/swscanner.hxx b/sw/inc/swscanner.hxx index f657f238c2ca..7a71c0a1e9e9 100644 --- a/sw/inc/swscanner.hxx +++ b/sw/inc/swscanner.hxx @@ -43,13 +43,15 @@ class SwScanner { rtl::OUString aWord; const SwTxtNode& rNode; - const rtl::OUString aText; + const rtl::OUString aPreDashReplacementText; + rtl::OUString aText; const LanguageType* pLanguage; const ModelToViewHelper& rConversionMap; sal_Int32 nStartPos; sal_Int32 nEndPos; sal_Int32 nBegin; sal_Int32 nLen; + sal_Int32 nOverriddenDashCount; LanguageType aCurrLang; sal_uInt16 nWordType; sal_Bool bClip; @@ -74,6 +76,8 @@ public: sal_Int32 GetLen() const { return nLen; } LanguageType GetCurrentLanguage() const {return aCurrLang;} + + sal_Int32 getOverriddenDashCount() const {return nOverriddenDashCount; } }; #endif diff --git a/sw/qa/core/swdoc-test.cxx b/sw/qa/core/swdoc-test.cxx index 54a1cffcd4cd..2f1bf8668824 100644 --- a/sw/qa/core/swdoc-test.cxx +++ b/sw/qa/core/swdoc-test.cxx @@ -567,6 +567,116 @@ void SwDocTest::testSwScanner() CPPUNIT_ASSERT_EQUAL(aDocStat.nWord, static_cast<sal_uLong>(0)); CPPUNIT_ASSERT_EQUAL(aDocStat.nChar, static_cast<sal_uLong>(0)); } + + //See https://bugs.freedesktop.org/show_bug.cgi?id=38983 + { + SwDocStat aDocStat; + + rtl::OUString sTemplate("ThisXis a test."); + + m_pDoc->AppendTxtNode(*aPaM.GetPoint()); + m_pDoc->InsertString(aPaM, sTemplate.replace('X', ' ')); + pTxtNode = aPaM.GetNode()->GetTxtNode(); + pTxtNode->CountWords(aDocStat, 0, pTxtNode->Len()); + CPPUNIT_ASSERT(aDocStat.nWord == 4 && + aDocStat.nCharExcludingSpaces == 12 && + aDocStat.nChar == 15); + aDocStat.Reset(); + + m_pDoc->AppendTxtNode(*aPaM.GetPoint()); + m_pDoc->InsertString(aPaM, sTemplate.replaceAll(rtl::OUString('X'), rtl::OUString(" = "))); + pTxtNode = aPaM.GetNode()->GetTxtNode(); + pTxtNode->CountWords(aDocStat, 0, pTxtNode->Len()); + CPPUNIT_ASSERT(aDocStat.nWord == 5 && + aDocStat.nCharExcludingSpaces == 13 && + aDocStat.nChar == 17); + aDocStat.Reset(); + + m_pDoc->AppendTxtNode(*aPaM.GetPoint()); + m_pDoc->InsertString(aPaM, sTemplate.replaceAll(rtl::OUString('X'), rtl::OUString(" _ "))); + pTxtNode = aPaM.GetNode()->GetTxtNode(); + pTxtNode->CountWords(aDocStat, 0, pTxtNode->Len()); + CPPUNIT_ASSERT(aDocStat.nWord == 5 && + aDocStat.nCharExcludingSpaces == 13 && + aDocStat.nChar == 17); + aDocStat.Reset(); + + m_pDoc->AppendTxtNode(*aPaM.GetPoint()); + m_pDoc->InsertString(aPaM, sTemplate.replaceAll(rtl::OUString('X'), rtl::OUString(" -- "))); + pTxtNode = aPaM.GetNode()->GetTxtNode(); + pTxtNode->CountWords(aDocStat, 0, pTxtNode->Len()); + CPPUNIT_ASSERT(aDocStat.nWord == 5 && + aDocStat.nCharExcludingSpaces == 14 && + aDocStat.nChar == 18); + aDocStat.Reset(); + + m_pDoc->AppendTxtNode(*aPaM.GetPoint()); + m_pDoc->InsertString(aPaM, sTemplate.replace('X', '_')); + pTxtNode = aPaM.GetNode()->GetTxtNode(); + pTxtNode->CountWords(aDocStat, 0, pTxtNode->Len()); + CPPUNIT_ASSERT(aDocStat.nWord == 3 && + aDocStat.nCharExcludingSpaces == 13 && + aDocStat.nChar == 15); + aDocStat.Reset(); + + m_pDoc->AppendTxtNode(*aPaM.GetPoint()); + m_pDoc->InsertString(aPaM, sTemplate.replace('X', '-')); + pTxtNode = aPaM.GetNode()->GetTxtNode(); + pTxtNode->CountWords(aDocStat, 0, pTxtNode->Len()); + CPPUNIT_ASSERT(aDocStat.nWord == 3 && + aDocStat.nCharExcludingSpaces == 13 && + aDocStat.nChar == 15); + aDocStat.Reset(); + + m_pDoc->AppendTxtNode(*aPaM.GetPoint()); + m_pDoc->InsertString(aPaM, sTemplate.replace('X', 0x2012)); + pTxtNode = aPaM.GetNode()->GetTxtNode(); + pTxtNode->CountWords(aDocStat, 0, pTxtNode->Len()); + CPPUNIT_ASSERT(aDocStat.nWord == 3 && + aDocStat.nCharExcludingSpaces == 13 && + aDocStat.nChar == 15); + aDocStat.Reset(); + + m_pDoc->AppendTxtNode(*aPaM.GetPoint()); + m_pDoc->InsertString(aPaM, sTemplate.replace('X', 0x2015)); + pTxtNode = aPaM.GetNode()->GetTxtNode(); + pTxtNode->CountWords(aDocStat, 0, pTxtNode->Len()); + CPPUNIT_ASSERT(aDocStat.nWord == 3 && + aDocStat.nCharExcludingSpaces == 13 && + aDocStat.nChar == 15); + aDocStat.Reset(); + + //But default configuration should, msword-alike treak emdash + //and endash as word seperators for word-counting + m_pDoc->AppendTxtNode(*aPaM.GetPoint()); + m_pDoc->InsertString(aPaM, sTemplate.replace('X', 0x2013)); + pTxtNode = aPaM.GetNode()->GetTxtNode(); + pTxtNode->CountWords(aDocStat, 0, pTxtNode->Len()); + CPPUNIT_ASSERT(aDocStat.nWord == 4 && + aDocStat.nCharExcludingSpaces == 13 && + aDocStat.nChar == 15); + aDocStat.Reset(); + + m_pDoc->AppendTxtNode(*aPaM.GetPoint()); + m_pDoc->InsertString(aPaM, sTemplate.replace('X', 0x2014)); + pTxtNode = aPaM.GetNode()->GetTxtNode(); + pTxtNode->CountWords(aDocStat, 0, pTxtNode->Len()); + CPPUNIT_ASSERT(aDocStat.nWord == 4 && + aDocStat.nCharExcludingSpaces == 13 && + aDocStat.nChar == 15); + aDocStat.Reset(); + + const sal_Unicode aChunk[] = {' ', 0x2013, ' '}; + rtl::OUString sChunk(aChunk, SAL_N_ELEMENTS(aChunk)); + m_pDoc->AppendTxtNode(*aPaM.GetPoint()); + m_pDoc->InsertString(aPaM, sTemplate.replaceAll(rtl::OUString('X'), sChunk)); + pTxtNode = aPaM.GetNode()->GetTxtNode(); + pTxtNode->CountWords(aDocStat, 0, pTxtNode->Len()); + CPPUNIT_ASSERT(aDocStat.nWord == 4 && + aDocStat.nCharExcludingSpaces == 13 && + aDocStat.nChar == 17); + aDocStat.Reset(); + } } //See https://bugs.freedesktop.org/show_bug.cgi?id=40599 diff --git a/sw/source/core/txtnode/txtedt.cxx b/sw/source/core/txtnode/txtedt.cxx index e37dada60149..40e6dc28f42a 100644 --- a/sw/source/core/txtnode/txtedt.cxx +++ b/sw/source/core/txtnode/txtedt.cxx @@ -37,6 +37,7 @@ #include <editeng/hangulhanja.hxx> #include <SwSmartTagMgr.hxx> #include <linguistic/lngprops.hxx> +#include <officecfg/Office/Writer.hxx> #include <unotools/transliterationwrapper.hxx> #include <unotools/charclass.hxx> #include <dlelstnr.hxx> @@ -655,12 +656,44 @@ XubString SwTxtNode::GetCurWord( xub_StrLen nPos ) const SwScanner::SwScanner( const SwTxtNode& rNd, const rtl::OUString& rTxt, const LanguageType* pLang, const ModelToViewHelper& rConvMap, sal_uInt16 nType, sal_Int32 nStart, sal_Int32 nEnde, sal_Bool bClp ) - : rNode( rNd ), aText( rTxt), pLanguage( pLang ), rConversionMap( rConvMap ), nLen( 0 ), nWordType( nType ), bClip( bClp ) + : rNode( rNd ) + , aPreDashReplacementText(rTxt) + , pLanguage( pLang ) + , rConversionMap( rConvMap ) + , nLen( 0 ) + , nOverriddenDashCount( 0 ) + , nWordType( nType ) + , bClip( bClp ) { - OSL_ENSURE( !aText.isEmpty(), "SwScanner: EmptyString" ); + OSL_ENSURE( !aPreDashReplacementText.isEmpty(), "SwScanner: EmptyString" ); nStartPos = nBegin = nStart; nEndPos = nEnde; + //MSWord f.e has special emdash and endash behaviour in that they break + //words for the purposes of word counting, while a hyphen etc. doesn't. + // + //The default configuration treats emdash/endash as a word break, but + //additional ones can be added in under tools->options + if (nWordType == i18n::WordType::WORD_COUNT) + { + rtl::OUString sDashes = officecfg::Office::Writer::WordCount::AdditionalSeperators::get(); + rtl::OUStringBuffer aBuf(aPreDashReplacementText); + for (sal_Int32 i = nStartPos; i < nEndPos; ++i) + { + sal_Unicode cChar = aBuf[i]; + if (sDashes.indexOf(cChar) != -1) + { + aBuf[i] = ' '; + ++nOverriddenDashCount; + } + } + aText = aBuf.makeStringAndClear(); + } + else + aText = aPreDashReplacementText; + + assert(aPreDashReplacementText.getLength() == aText.getLength()); + if ( pLanguage ) { aCurrLang = *pLanguage; @@ -836,7 +869,7 @@ sal_Bool SwScanner::NextWord() if ( nWordType == i18n::WordType::WORD_COUNT ) nLen = forceEachAsianCodePointToWord(aText, nBegin, nLen); - aWord = aText.copy( nBegin, nLen ); + aWord = aPreDashReplacementText.copy( nBegin, nLen ); return sal_True; } @@ -1892,30 +1925,35 @@ void SwTxtNode::CountWords( SwDocStat& rStat, sal_uInt32 nTmpCharsExcludingSpaces = 0; // all non-white chars // count words in masked and expanded text: - if (!aExpandText.isEmpty() && pBreakIt->GetBreakIter().is()) + if (!aExpandText.isEmpty()) { - // zero is NULL for pLanguage -----------v last param = true for clipping - SwScanner aScanner( *this, aExpandText, 0, aConversionMap, i18n::WordType::WORD_COUNT, - nExpandBegin, nExpandEnd, true ); + if (pBreakIt->GetBreakIter().is()) + { + // zero is NULL for pLanguage -----------v last param = true for clipping + SwScanner aScanner( *this, aExpandText, 0, aConversionMap, i18n::WordType::WORD_COUNT, + nExpandBegin, nExpandEnd, true ); - // used to filter out scanner returning almost empty strings (len=1; unichar=0x0001) - const rtl::OUString aBreakWord( CH_TXTATR_BREAKWORD ); + // used to filter out scanner returning almost empty strings (len=1; unichar=0x0001) + const rtl::OUString aBreakWord( CH_TXTATR_BREAKWORD ); - while ( aScanner.NextWord() ) - { - // 1 is len(CH_TXTATR_BREAKWORD) : match returns length of match - if( 1 != aExpandText.match(aBreakWord, aScanner.GetBegin() )) + while ( aScanner.NextWord() ) { - ++nTmpWords; - const rtl::OUString &rWord = aScanner.GetWord(); - if (pBreakIt->GetBreakIter()->getScriptType(rWord, 0) == i18n::ScriptType::ASIAN) - ++nTmpAsianWords; - nTmpCharsExcludingSpaces += pBreakIt->getGraphemeCount(rWord); + // 1 is len(CH_TXTATR_BREAKWORD) : match returns length of match + if( 1 != aExpandText.match(aBreakWord, aScanner.GetBegin() )) + { + ++nTmpWords; + const rtl::OUString &rWord = aScanner.GetWord(); + if (pBreakIt->GetBreakIter()->getScriptType(rWord, 0) == i18n::ScriptType::ASIAN) + ++nTmpAsianWords; + nTmpCharsExcludingSpaces += pBreakIt->getGraphemeCount(rWord); + } } + + nTmpCharsExcludingSpaces += aScanner.getOverriddenDashCount(); } - } - nTmpChars = pBreakIt->getGraphemeCount(aExpandText, nExpandBegin, nExpandEnd); + nTmpChars = pBreakIt->getGraphemeCount(aExpandText, nExpandBegin, nExpandEnd); + } // no nTmpCharsExcludingSpaces adjust needed neither for blanked out MaskedChars // nor for mid-word selection - set scanner bClip = true at creation @@ -1938,6 +1976,7 @@ void SwTxtNode::CountWords( SwDocStat& rStat, nTmpCharsExcludingSpaces += pBreakIt->getGraphemeCount(rWord); } + nTmpCharsExcludingSpaces += aScanner.getOverriddenDashCount(); nTmpChars += pBreakIt->getGraphemeCount(sNumString); } else if ( bHasBullet ) diff --git a/sw/source/ui/config/optload.cxx b/sw/source/ui/config/optload.cxx index e2d519a62651..760cf2035103 100644 --- a/sw/source/ui/config/optload.cxx +++ b/sw/source/ui/config/optload.cxx @@ -26,6 +26,7 @@ * ************************************************************************/ +#include <officecfg/Office/Writer.hxx> #include <comphelper/string.hxx> #include <tools/shl.hxx> #include <swtypes.hxx> @@ -88,7 +89,10 @@ SwLoadOptPage::SwLoadOptPage( Window* pParent, const SfxItemSet& rSet ) : aTabFT ( this, SW_RES( FT_TAB ) ), aTabMF ( this, SW_RES( MF_TAB ) ), aUseSquaredPageMode ( this, SW_RES( CB_USE_SQUARE_PAGE_MODE ) ), - aUseCharUnit ( this , SW_RES( CB_USE_CHAR_UNIT ) ), + aUseCharUnit ( this , SW_RES( CB_USE_CHAR_UNIT ) ), + aWordCountFL ( this , SW_RES( FL_WORDCOUNT ) ), + aWordCountFT ( this , SW_RES( FT_WORDCOUNT ) ), + aWordCountED ( this , SW_RES( ED_WORDCOUNT ) ), pWrtShell ( NULL ), bHTMLMode ( sal_False ), @@ -131,10 +135,10 @@ SwLoadOptPage::SwLoadOptPage( Window* pParent, const SfxItemSet& rSet ) : SvtCJKOptions aCJKOptions; if(!aCJKOptions.IsAsianTypographyEnabled()) - { + { aUseSquaredPageMode.Hide(); - aUseCharUnit.Hide(); - } + aUseCharUnit.Hide(); + } } SwLoadOptPage::~SwLoadOptPage() @@ -209,6 +213,15 @@ sal_Bool SwLoadOptPage::FillItemSet( SfxItemSet& rSet ) bRet = sal_True; } + if (aWordCountED.GetText() != aWordCountED.GetSavedValue()) + { + boost::shared_ptr< comphelper::ConfigurationChanges > batch( + comphelper::ConfigurationChanges::create()); + officecfg::Office::Writer::WordCount::AdditionalSeperators::set(aWordCountED.GetText(), batch); + batch->commit(); + bRet = sal_True; + } + sal_Bool bIsSquaredPageModeFlag = aUseSquaredPageMode.IsChecked(); if ( bIsSquaredPageModeFlag != aUseSquaredPageMode.GetSavedValue() ) { @@ -304,6 +317,9 @@ void SwLoadOptPage::Reset( const SfxItemSet& rSet) aUseCharUnit.Check(pUsrPref->IsApplyCharUnit()); } aUseCharUnit.SaveValue(); + + aWordCountED.SetText(officecfg::Office::Writer::WordCount::AdditionalSeperators::get()); + aWordCountED.SaveValue(); } IMPL_LINK_NOARG(SwLoadOptPage, MetricHdl) diff --git a/sw/source/ui/config/optload.hrc b/sw/source/ui/config/optload.hrc index 2d123c07ffa1..8ee917e93875 100644 --- a/sw/source/ui/config/optload.hrc +++ b/sw/source/ui/config/optload.hrc @@ -32,7 +32,10 @@ #define FT_TAB 21 #define MF_TAB 22 #define CB_USE_SQUARE_PAGE_MODE 23 -#define CB_USE_CHAR_UNIT 24 +#define CB_USE_CHAR_UNIT 24 +#define FL_WORDCOUNT 25 +#define FT_WORDCOUNT 26 +#define ED_WORDCOUNT 27 // SwCaptionOptPage ----------------------------- diff --git a/sw/source/ui/config/optload.src b/sw/source/ui/config/optload.src index 8c7089a83968..5dc8dd05fd2c 100644 --- a/sw/source/ui/config/optload.src +++ b/sw/source/ui/config/optload.src @@ -142,14 +142,30 @@ TabPage TP_OPTLOAD_PAGE Size = MAP_APPFONT ( 248 , 10 ) ; Text [ en-US ] = "Use square page mode for text grid"; }; - CheckBox CB_USE_CHAR_UNIT { Pos = MAP_APPFONT ( 12 , 130) ; Size = MAP_APPFONT ( 109 , 10 ) ; Text [ en-US ] = "Enable char unit"; }; - + FixedLine FL_WORDCOUNT + { + Pos = MAP_APPFONT ( 6 , 144 ) ; + Size = MAP_APPFONT ( 248 , 8 ) ; + Text [ en-US ] = "Word Count"; + }; + FixedText FT_WORDCOUNT + { + Pos = MAP_APPFONT ( 12 , 157 ) ; + Size = MAP_APPFONT ( 80 , 8 ) ; + Text [ en-US ] = "Additional separators"; + }; + Edit ED_WORDCOUNT + { + Pos = MAP_APPFONT ( 95 , 155 ) ; + Size = MAP_APPFONT ( 159 , 12 ) ; + Border = TRUE ; + }; }; TabPage TP_OPTCAPTION_PAGE diff --git a/sw/source/ui/inc/optload.hxx b/sw/source/ui/inc/optload.hxx index 88c04b0fe310..6889c227d3fb 100644 --- a/sw/source/ui/inc/optload.hxx +++ b/sw/source/ui/inc/optload.hxx @@ -55,6 +55,9 @@ private: MetricField aTabMF; CheckBox aUseSquaredPageMode; CheckBox aUseCharUnit; + FixedLine aWordCountFL; + FixedText aWordCountFT; + Edit aWordCountED; SwWrtShell* pWrtShell; sal_Bool bHTMLMode; |