diff options
-rw-r--r-- | i18npool/source/breakiterator/data/README | 7 | ||||
-rw-r--r-- | officecfg/registry/data/org/openoffice/Office/Writer.xcu | 10 | ||||
-rw-r--r-- | officecfg/registry/schema/org/openoffice/Office/Writer.xcs | 13 | ||||
-rw-r--r-- | sw/inc/swscanner.hxx | 6 | ||||
-rw-r--r-- | sw/qa/core/swdoc-test.cxx | 110 | ||||
-rw-r--r-- | sw/source/core/txtnode/txtedt.cxx | 79 | ||||
-rw-r--r-- | sw/source/ui/config/optload.cxx | 24 | ||||
-rw-r--r-- | sw/source/ui/config/optload.hrc | 5 | ||||
-rw-r--r-- | sw/source/ui/config/optload.src | 20 | ||||
-rw-r--r-- | sw/source/ui/inc/optload.hxx | 3 |
10 files changed, 238 insertions, 39 deletions
diff --git a/i18npool/source/breakiterator/data/README b/i18npool/source/breakiterator/data/README index 7d67cf0c6766..6858f7a538da 100644 --- a/i18npool/source/breakiterator/data/README +++ b/i18npool/source/breakiterator/data/README @@ -22,12 +22,6 @@ Date: Sat Jan 29 12:51:52 2011 +0000 Resolves: fdo#31271 wrong line break with ( -commit 109fa8224194edfc4ca75ee5cc5e760e54d76a3f -Author: Thomas Lange [tl] <tl@openoffice.org> -Date: Wed Dec 8 14:39:09 2010 +0100 - - cws tl84: #i89042# word count fix - commit 42be5541baf18e3292a14a9d478eda33f61e10ab Author: Mattias Johnsson <m.t.johnsson@gmail.com> Date: Thu Nov 4 23:25:02 2010 +1100 @@ -585,6 +579,7 @@ Date: Mon Mar 8 16:17:05 2004 +0000 done, regression tests added: +#i89042# word count fix (regression test is in writer) #i58513# add break iterator rules for Finish #i19716# fix wrong line break on bracket characters #i21290# extend Greek script type diff --git a/officecfg/registry/data/org/openoffice/Office/Writer.xcu b/officecfg/registry/data/org/openoffice/Office/Writer.xcu index 55ab299cb1db..861b777f29b6 100644 --- a/officecfg/registry/data/org/openoffice/Office/Writer.xcu +++ b/officecfg/registry/data/org/openoffice/Office/Writer.xcu @@ -735,9 +735,9 @@ </prop> </node> </node> -<node oor:name="Notes"> - <prop oor:name="ShowAnkor"> - <value>false</value> - </prop> -</node> + <node oor:name="Notes"> + <prop oor:name="ShowAnkor"> + <value>false</value> + </prop> + </node> </oor:component-data> diff --git a/officecfg/registry/schema/org/openoffice/Office/Writer.xcs b/officecfg/registry/schema/org/openoffice/Office/Writer.xcs index e79798885a83..cffc7406f643 100644 --- a/officecfg/registry/schema/org/openoffice/Office/Writer.xcs +++ b/officecfg/registry/schema/org/openoffice/Office/Writer.xcs @@ -5736,6 +5736,19 @@ <value>false</value> </prop> </group> + <group oor:name="WordCount"> + <info> + <desc>Contains settings for word counting</desc> + </info> + <prop oor:name="AdditionalSeperators" oor:type="xs:string" oor:nillable="false"> + <info> + <author>cmc</author> + <desc>configures additional word seperators for word counting</desc> + <label>Additional Word Seperators</label> + </info> + <value>—–</value> + </prop> + </group> <group oor:name="Navigator"> <info> <desc>Contains settings for the Navigator.</desc> diff --git a/sw/inc/swscanner.hxx b/sw/inc/swscanner.hxx index f657f238c2ca..7a71c0a1e9e9 100644 --- a/sw/inc/swscanner.hxx +++ b/sw/inc/swscanner.hxx @@ -43,13 +43,15 @@ class SwScanner { rtl::OUString aWord; const SwTxtNode& rNode; - const rtl::OUString aText; + const rtl::OUString aPreDashReplacementText; + rtl::OUString aText; const LanguageType* pLanguage; const ModelToViewHelper& rConversionMap; sal_Int32 nStartPos; sal_Int32 nEndPos; sal_Int32 nBegin; sal_Int32 nLen; + sal_Int32 nOverriddenDashCount; LanguageType aCurrLang; sal_uInt16 nWordType; sal_Bool bClip; @@ -74,6 +76,8 @@ public: sal_Int32 GetLen() const { return nLen; } LanguageType GetCurrentLanguage() const {return aCurrLang;} + + sal_Int32 getOverriddenDashCount() const {return nOverriddenDashCount; } }; #endif diff --git a/sw/qa/core/swdoc-test.cxx b/sw/qa/core/swdoc-test.cxx index 54a1cffcd4cd..2f1bf8668824 100644 --- a/sw/qa/core/swdoc-test.cxx +++ b/sw/qa/core/swdoc-test.cxx @@ -567,6 +567,116 @@ void SwDocTest::testSwScanner() CPPUNIT_ASSERT_EQUAL(aDocStat.nWord, static_cast<sal_uLong>(0)); CPPUNIT_ASSERT_EQUAL(aDocStat.nChar, static_cast<sal_uLong>(0)); } + + //See https://bugs.freedesktop.org/show_bug.cgi?id=38983 + { + SwDocStat aDocStat; + + rtl::OUString sTemplate("ThisXis a test."); + + m_pDoc->AppendTxtNode(*aPaM.GetPoint()); + m_pDoc->InsertString(aPaM, sTemplate.replace('X', ' ')); + pTxtNode = aPaM.GetNode()->GetTxtNode(); + pTxtNode->CountWords(aDocStat, 0, pTxtNode->Len()); + CPPUNIT_ASSERT(aDocStat.nWord == 4 && + aDocStat.nCharExcludingSpaces == 12 && + aDocStat.nChar == 15); + aDocStat.Reset(); + + m_pDoc->AppendTxtNode(*aPaM.GetPoint()); + m_pDoc->InsertString(aPaM, sTemplate.replaceAll(rtl::OUString('X'), rtl::OUString(" = "))); + pTxtNode = aPaM.GetNode()->GetTxtNode(); + pTxtNode->CountWords(aDocStat, 0, pTxtNode->Len()); + CPPUNIT_ASSERT(aDocStat.nWord == 5 && + aDocStat.nCharExcludingSpaces == 13 && + aDocStat.nChar == 17); + aDocStat.Reset(); + + m_pDoc->AppendTxtNode(*aPaM.GetPoint()); + m_pDoc->InsertString(aPaM, sTemplate.replaceAll(rtl::OUString('X'), rtl::OUString(" _ "))); + pTxtNode = aPaM.GetNode()->GetTxtNode(); + pTxtNode->CountWords(aDocStat, 0, pTxtNode->Len()); + CPPUNIT_ASSERT(aDocStat.nWord == 5 && + aDocStat.nCharExcludingSpaces == 13 && + aDocStat.nChar == 17); + aDocStat.Reset(); + + m_pDoc->AppendTxtNode(*aPaM.GetPoint()); + m_pDoc->InsertString(aPaM, sTemplate.replaceAll(rtl::OUString('X'), rtl::OUString(" -- "))); + pTxtNode = aPaM.GetNode()->GetTxtNode(); + pTxtNode->CountWords(aDocStat, 0, pTxtNode->Len()); + CPPUNIT_ASSERT(aDocStat.nWord == 5 && + aDocStat.nCharExcludingSpaces == 14 && + aDocStat.nChar == 18); + aDocStat.Reset(); + + m_pDoc->AppendTxtNode(*aPaM.GetPoint()); + m_pDoc->InsertString(aPaM, sTemplate.replace('X', '_')); + pTxtNode = aPaM.GetNode()->GetTxtNode(); + pTxtNode->CountWords(aDocStat, 0, pTxtNode->Len()); + CPPUNIT_ASSERT(aDocStat.nWord == 3 && + aDocStat.nCharExcludingSpaces == 13 && + aDocStat.nChar == 15); + aDocStat.Reset(); + + m_pDoc->AppendTxtNode(*aPaM.GetPoint()); + m_pDoc->InsertString(aPaM, sTemplate.replace('X', '-')); + pTxtNode = aPaM.GetNode()->GetTxtNode(); + pTxtNode->CountWords(aDocStat, 0, pTxtNode->Len()); + CPPUNIT_ASSERT(aDocStat.nWord == 3 && + aDocStat.nCharExcludingSpaces == 13 && + aDocStat.nChar == 15); + aDocStat.Reset(); + + m_pDoc->AppendTxtNode(*aPaM.GetPoint()); + m_pDoc->InsertString(aPaM, sTemplate.replace('X', 0x2012)); + pTxtNode = aPaM.GetNode()->GetTxtNode(); + pTxtNode->CountWords(aDocStat, 0, pTxtNode->Len()); + CPPUNIT_ASSERT(aDocStat.nWord == 3 && + aDocStat.nCharExcludingSpaces == 13 && + aDocStat.nChar == 15); + aDocStat.Reset(); + + m_pDoc->AppendTxtNode(*aPaM.GetPoint()); + m_pDoc->InsertString(aPaM, sTemplate.replace('X', 0x2015)); + pTxtNode = aPaM.GetNode()->GetTxtNode(); + pTxtNode->CountWords(aDocStat, 0, pTxtNode->Len()); + CPPUNIT_ASSERT(aDocStat.nWord == 3 && + aDocStat.nCharExcludingSpaces == 13 && + aDocStat.nChar == 15); + aDocStat.Reset(); + + //But default configuration should, msword-alike treak emdash + //and endash as word seperators for word-counting + m_pDoc->AppendTxtNode(*aPaM.GetPoint()); + m_pDoc->InsertString(aPaM, sTemplate.replace('X', 0x2013)); + pTxtNode = aPaM.GetNode()->GetTxtNode(); + pTxtNode->CountWords(aDocStat, 0, pTxtNode->Len()); + CPPUNIT_ASSERT(aDocStat.nWord == 4 && + aDocStat.nCharExcludingSpaces == 13 && + aDocStat.nChar == 15); + aDocStat.Reset(); + + m_pDoc->AppendTxtNode(*aPaM.GetPoint()); + m_pDoc->InsertString(aPaM, sTemplate.replace('X', 0x2014)); + pTxtNode = aPaM.GetNode()->GetTxtNode(); + pTxtNode->CountWords(aDocStat, 0, pTxtNode->Len()); + CPPUNIT_ASSERT(aDocStat.nWord == 4 && + aDocStat.nCharExcludingSpaces == 13 && + aDocStat.nChar == 15); + aDocStat.Reset(); + + const sal_Unicode aChunk[] = {' ', 0x2013, ' '}; + rtl::OUString sChunk(aChunk, SAL_N_ELEMENTS(aChunk)); + m_pDoc->AppendTxtNode(*aPaM.GetPoint()); + m_pDoc->InsertString(aPaM, sTemplate.replaceAll(rtl::OUString('X'), sChunk)); + pTxtNode = aPaM.GetNode()->GetTxtNode(); + pTxtNode->CountWords(aDocStat, 0, pTxtNode->Len()); + CPPUNIT_ASSERT(aDocStat.nWord == 4 && + aDocStat.nCharExcludingSpaces == 13 && + aDocStat.nChar == 17); + aDocStat.Reset(); + } } //See https://bugs.freedesktop.org/show_bug.cgi?id=40599 diff --git a/sw/source/core/txtnode/txtedt.cxx b/sw/source/core/txtnode/txtedt.cxx index e37dada60149..40e6dc28f42a 100644 --- a/sw/source/core/txtnode/txtedt.cxx +++ b/sw/source/core/txtnode/txtedt.cxx @@ -37,6 +37,7 @@ #include <editeng/hangulhanja.hxx> #include <SwSmartTagMgr.hxx> #include <linguistic/lngprops.hxx> +#include <officecfg/Office/Writer.hxx> #include <unotools/transliterationwrapper.hxx> #include <unotools/charclass.hxx> #include <dlelstnr.hxx> @@ -655,12 +656,44 @@ XubString SwTxtNode::GetCurWord( xub_StrLen nPos ) const SwScanner::SwScanner( const SwTxtNode& rNd, const rtl::OUString& rTxt, const LanguageType* pLang, const ModelToViewHelper& rConvMap, sal_uInt16 nType, sal_Int32 nStart, sal_Int32 nEnde, sal_Bool bClp ) - : rNode( rNd ), aText( rTxt), pLanguage( pLang ), rConversionMap( rConvMap ), nLen( 0 ), nWordType( nType ), bClip( bClp ) + : rNode( rNd ) + , aPreDashReplacementText(rTxt) + , pLanguage( pLang ) + , rConversionMap( rConvMap ) + , nLen( 0 ) + , nOverriddenDashCount( 0 ) + , nWordType( nType ) + , bClip( bClp ) { - OSL_ENSURE( !aText.isEmpty(), "SwScanner: EmptyString" ); + OSL_ENSURE( !aPreDashReplacementText.isEmpty(), "SwScanner: EmptyString" ); nStartPos = nBegin = nStart; nEndPos = nEnde; + //MSWord f.e has special emdash and endash behaviour in that they break + //words for the purposes of word counting, while a hyphen etc. doesn't. + // + //The default configuration treats emdash/endash as a word break, but + //additional ones can be added in under tools->options + if (nWordType == i18n::WordType::WORD_COUNT) + { + rtl::OUString sDashes = officecfg::Office::Writer::WordCount::AdditionalSeperators::get(); + rtl::OUStringBuffer aBuf(aPreDashReplacementText); + for (sal_Int32 i = nStartPos; i < nEndPos; ++i) + { + sal_Unicode cChar = aBuf[i]; + if (sDashes.indexOf(cChar) != -1) + { + aBuf[i] = ' '; + ++nOverriddenDashCount; + } + } + aText = aBuf.makeStringAndClear(); + } + else + aText = aPreDashReplacementText; + + assert(aPreDashReplacementText.getLength() == aText.getLength()); + if ( pLanguage ) { aCurrLang = *pLanguage; @@ -836,7 +869,7 @@ sal_Bool SwScanner::NextWord() if ( nWordType == i18n::WordType::WORD_COUNT ) nLen = forceEachAsianCodePointToWord(aText, nBegin, nLen); - aWord = aText.copy( nBegin, nLen ); + aWord = aPreDashReplacementText.copy( nBegin, nLen ); return sal_True; } @@ -1892,30 +1925,35 @@ void SwTxtNode::CountWords( SwDocStat& rStat, sal_uInt32 nTmpCharsExcludingSpaces = 0; // all non-white chars // count words in masked and expanded text: - if (!aExpandText.isEmpty() && pBreakIt->GetBreakIter().is()) + if (!aExpandText.isEmpty()) { - // zero is NULL for pLanguage -----------v last param = true for clipping - SwScanner aScanner( *this, aExpandText, 0, aConversionMap, i18n::WordType::WORD_COUNT, - nExpandBegin, nExpandEnd, true ); + if (pBreakIt->GetBreakIter().is()) + { + // zero is NULL for pLanguage -----------v last param = true for clipping + SwScanner aScanner( *this, aExpandText, 0, aConversionMap, i18n::WordType::WORD_COUNT, + nExpandBegin, nExpandEnd, true ); - // used to filter out scanner returning almost empty strings (len=1; unichar=0x0001) - const rtl::OUString aBreakWord( CH_TXTATR_BREAKWORD ); + // used to filter out scanner returning almost empty strings (len=1; unichar=0x0001) + const rtl::OUString aBreakWord( CH_TXTATR_BREAKWORD ); - while ( aScanner.NextWord() ) - { - // 1 is len(CH_TXTATR_BREAKWORD) : match returns length of match - if( 1 != aExpandText.match(aBreakWord, aScanner.GetBegin() )) + while ( aScanner.NextWord() ) { - ++nTmpWords; - const rtl::OUString &rWord = aScanner.GetWord(); - if (pBreakIt->GetBreakIter()->getScriptType(rWord, 0) == i18n::ScriptType::ASIAN) - ++nTmpAsianWords; - nTmpCharsExcludingSpaces += pBreakIt->getGraphemeCount(rWord); + // 1 is len(CH_TXTATR_BREAKWORD) : match returns length of match + if( 1 != aExpandText.match(aBreakWord, aScanner.GetBegin() )) + { + ++nTmpWords; + const rtl::OUString &rWord = aScanner.GetWord(); + if (pBreakIt->GetBreakIter()->getScriptType(rWord, 0) == i18n::ScriptType::ASIAN) + ++nTmpAsianWords; + nTmpCharsExcludingSpaces += pBreakIt->getGraphemeCount(rWord); + } } + + nTmpCharsExcludingSpaces += aScanner.getOverriddenDashCount(); } - } - nTmpChars = pBreakIt->getGraphemeCount(aExpandText, nExpandBegin, nExpandEnd); + nTmpChars = pBreakIt->getGraphemeCount(aExpandText, nExpandBegin, nExpandEnd); + } // no nTmpCharsExcludingSpaces adjust needed neither for blanked out MaskedChars // nor for mid-word selection - set scanner bClip = true at creation @@ -1938,6 +1976,7 @@ void SwTxtNode::CountWords( SwDocStat& rStat, nTmpCharsExcludingSpaces += pBreakIt->getGraphemeCount(rWord); } + nTmpCharsExcludingSpaces += aScanner.getOverriddenDashCount(); nTmpChars += pBreakIt->getGraphemeCount(sNumString); } else if ( bHasBullet ) diff --git a/sw/source/ui/config/optload.cxx b/sw/source/ui/config/optload.cxx index e2d519a62651..760cf2035103 100644 --- a/sw/source/ui/config/optload.cxx +++ b/sw/source/ui/config/optload.cxx @@ -26,6 +26,7 @@ * ************************************************************************/ +#include <officecfg/Office/Writer.hxx> #include <comphelper/string.hxx> #include <tools/shl.hxx> #include <swtypes.hxx> @@ -88,7 +89,10 @@ SwLoadOptPage::SwLoadOptPage( Window* pParent, const SfxItemSet& rSet ) : aTabFT ( this, SW_RES( FT_TAB ) ), aTabMF ( this, SW_RES( MF_TAB ) ), aUseSquaredPageMode ( this, SW_RES( CB_USE_SQUARE_PAGE_MODE ) ), - aUseCharUnit ( this , SW_RES( CB_USE_CHAR_UNIT ) ), + aUseCharUnit ( this , SW_RES( CB_USE_CHAR_UNIT ) ), + aWordCountFL ( this , SW_RES( FL_WORDCOUNT ) ), + aWordCountFT ( this , SW_RES( FT_WORDCOUNT ) ), + aWordCountED ( this , SW_RES( ED_WORDCOUNT ) ), pWrtShell ( NULL ), bHTMLMode ( sal_False ), @@ -131,10 +135,10 @@ SwLoadOptPage::SwLoadOptPage( Window* pParent, const SfxItemSet& rSet ) : SvtCJKOptions aCJKOptions; if(!aCJKOptions.IsAsianTypographyEnabled()) - { + { aUseSquaredPageMode.Hide(); - aUseCharUnit.Hide(); - } + aUseCharUnit.Hide(); + } } SwLoadOptPage::~SwLoadOptPage() @@ -209,6 +213,15 @@ sal_Bool SwLoadOptPage::FillItemSet( SfxItemSet& rSet ) bRet = sal_True; } + if (aWordCountED.GetText() != aWordCountED.GetSavedValue()) + { + boost::shared_ptr< comphelper::ConfigurationChanges > batch( + comphelper::ConfigurationChanges::create()); + officecfg::Office::Writer::WordCount::AdditionalSeperators::set(aWordCountED.GetText(), batch); + batch->commit(); + bRet = sal_True; + } + sal_Bool bIsSquaredPageModeFlag = aUseSquaredPageMode.IsChecked(); if ( bIsSquaredPageModeFlag != aUseSquaredPageMode.GetSavedValue() ) { @@ -304,6 +317,9 @@ void SwLoadOptPage::Reset( const SfxItemSet& rSet) aUseCharUnit.Check(pUsrPref->IsApplyCharUnit()); } aUseCharUnit.SaveValue(); + + aWordCountED.SetText(officecfg::Office::Writer::WordCount::AdditionalSeperators::get()); + aWordCountED.SaveValue(); } IMPL_LINK_NOARG(SwLoadOptPage, MetricHdl) diff --git a/sw/source/ui/config/optload.hrc b/sw/source/ui/config/optload.hrc index 2d123c07ffa1..8ee917e93875 100644 --- a/sw/source/ui/config/optload.hrc +++ b/sw/source/ui/config/optload.hrc @@ -32,7 +32,10 @@ #define FT_TAB 21 #define MF_TAB 22 #define CB_USE_SQUARE_PAGE_MODE 23 -#define CB_USE_CHAR_UNIT 24 +#define CB_USE_CHAR_UNIT 24 +#define FL_WORDCOUNT 25 +#define FT_WORDCOUNT 26 +#define ED_WORDCOUNT 27 // SwCaptionOptPage ----------------------------- diff --git a/sw/source/ui/config/optload.src b/sw/source/ui/config/optload.src index 8c7089a83968..5dc8dd05fd2c 100644 --- a/sw/source/ui/config/optload.src +++ b/sw/source/ui/config/optload.src @@ -142,14 +142,30 @@ TabPage TP_OPTLOAD_PAGE Size = MAP_APPFONT ( 248 , 10 ) ; Text [ en-US ] = "Use square page mode for text grid"; }; - CheckBox CB_USE_CHAR_UNIT { Pos = MAP_APPFONT ( 12 , 130) ; Size = MAP_APPFONT ( 109 , 10 ) ; Text [ en-US ] = "Enable char unit"; }; - + FixedLine FL_WORDCOUNT + { + Pos = MAP_APPFONT ( 6 , 144 ) ; + Size = MAP_APPFONT ( 248 , 8 ) ; + Text [ en-US ] = "Word Count"; + }; + FixedText FT_WORDCOUNT + { + Pos = MAP_APPFONT ( 12 , 157 ) ; + Size = MAP_APPFONT ( 80 , 8 ) ; + Text [ en-US ] = "Additional separators"; + }; + Edit ED_WORDCOUNT + { + Pos = MAP_APPFONT ( 95 , 155 ) ; + Size = MAP_APPFONT ( 159 , 12 ) ; + Border = TRUE ; + }; }; TabPage TP_OPTCAPTION_PAGE diff --git a/sw/source/ui/inc/optload.hxx b/sw/source/ui/inc/optload.hxx index 88c04b0fe310..6889c227d3fb 100644 --- a/sw/source/ui/inc/optload.hxx +++ b/sw/source/ui/inc/optload.hxx @@ -55,6 +55,9 @@ private: MetricField aTabMF; CheckBox aUseSquaredPageMode; CheckBox aUseCharUnit; + FixedLine aWordCountFL; + FixedText aWordCountFT; + Edit aWordCountED; SwWrtShell* pWrtShell; sal_Bool bHTMLMode; |