summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--i18npool/source/breakiterator/data/README7
-rw-r--r--officecfg/registry/data/org/openoffice/Office/Writer.xcu10
-rw-r--r--officecfg/registry/schema/org/openoffice/Office/Writer.xcs13
-rw-r--r--sw/inc/swscanner.hxx6
-rw-r--r--sw/qa/core/swdoc-test.cxx110
-rw-r--r--sw/source/core/txtnode/txtedt.cxx79
-rw-r--r--sw/source/ui/config/optload.cxx24
-rw-r--r--sw/source/ui/config/optload.hrc5
-rw-r--r--sw/source/ui/config/optload.src20
-rw-r--r--sw/source/ui/inc/optload.hxx3
10 files changed, 238 insertions, 39 deletions
diff --git a/i18npool/source/breakiterator/data/README b/i18npool/source/breakiterator/data/README
index 7d67cf0c6766..6858f7a538da 100644
--- a/i18npool/source/breakiterator/data/README
+++ b/i18npool/source/breakiterator/data/README
@@ -22,12 +22,6 @@ Date: Sat Jan 29 12:51:52 2011 +0000
Resolves: fdo#31271 wrong line break with (
-commit 109fa8224194edfc4ca75ee5cc5e760e54d76a3f
-Author: Thomas Lange [tl] <tl@openoffice.org>
-Date: Wed Dec 8 14:39:09 2010 +0100
-
- cws tl84: #i89042# word count fix
-
commit 42be5541baf18e3292a14a9d478eda33f61e10ab
Author: Mattias Johnsson <m.t.johnsson@gmail.com>
Date: Thu Nov 4 23:25:02 2010 +1100
@@ -585,6 +579,7 @@ Date: Mon Mar 8 16:17:05 2004 +0000
done, regression tests added:
+#i89042# word count fix (regression test is in writer)
#i58513# add break iterator rules for Finish
#i19716# fix wrong line break on bracket characters
#i21290# extend Greek script type
diff --git a/officecfg/registry/data/org/openoffice/Office/Writer.xcu b/officecfg/registry/data/org/openoffice/Office/Writer.xcu
index 55ab299cb1db..861b777f29b6 100644
--- a/officecfg/registry/data/org/openoffice/Office/Writer.xcu
+++ b/officecfg/registry/data/org/openoffice/Office/Writer.xcu
@@ -735,9 +735,9 @@
</prop>
</node>
</node>
-<node oor:name="Notes">
- <prop oor:name="ShowAnkor">
- <value>false</value>
- </prop>
-</node>
+ <node oor:name="Notes">
+ <prop oor:name="ShowAnkor">
+ <value>false</value>
+ </prop>
+ </node>
</oor:component-data>
diff --git a/officecfg/registry/schema/org/openoffice/Office/Writer.xcs b/officecfg/registry/schema/org/openoffice/Office/Writer.xcs
index e79798885a83..cffc7406f643 100644
--- a/officecfg/registry/schema/org/openoffice/Office/Writer.xcs
+++ b/officecfg/registry/schema/org/openoffice/Office/Writer.xcs
@@ -5736,6 +5736,19 @@
<value>false</value>
</prop>
</group>
+ <group oor:name="WordCount">
+ <info>
+ <desc>Contains settings for word counting</desc>
+ </info>
+ <prop oor:name="AdditionalSeperators" oor:type="xs:string" oor:nillable="false">
+ <info>
+ <author>cmc</author>
+ <desc>configures additional word seperators for word counting</desc>
+ <label>Additional Word Seperators</label>
+ </info>
+ <value>—–</value>
+ </prop>
+ </group>
<group oor:name="Navigator">
<info>
<desc>Contains settings for the Navigator.</desc>
diff --git a/sw/inc/swscanner.hxx b/sw/inc/swscanner.hxx
index f657f238c2ca..7a71c0a1e9e9 100644
--- a/sw/inc/swscanner.hxx
+++ b/sw/inc/swscanner.hxx
@@ -43,13 +43,15 @@ class SwScanner
{
rtl::OUString aWord;
const SwTxtNode& rNode;
- const rtl::OUString aText;
+ const rtl::OUString aPreDashReplacementText;
+ rtl::OUString aText;
const LanguageType* pLanguage;
const ModelToViewHelper& rConversionMap;
sal_Int32 nStartPos;
sal_Int32 nEndPos;
sal_Int32 nBegin;
sal_Int32 nLen;
+ sal_Int32 nOverriddenDashCount;
LanguageType aCurrLang;
sal_uInt16 nWordType;
sal_Bool bClip;
@@ -74,6 +76,8 @@ public:
sal_Int32 GetLen() const { return nLen; }
LanguageType GetCurrentLanguage() const {return aCurrLang;}
+
+ sal_Int32 getOverriddenDashCount() const {return nOverriddenDashCount; }
};
#endif
diff --git a/sw/qa/core/swdoc-test.cxx b/sw/qa/core/swdoc-test.cxx
index 54a1cffcd4cd..2f1bf8668824 100644
--- a/sw/qa/core/swdoc-test.cxx
+++ b/sw/qa/core/swdoc-test.cxx
@@ -567,6 +567,116 @@ void SwDocTest::testSwScanner()
CPPUNIT_ASSERT_EQUAL(aDocStat.nWord, static_cast<sal_uLong>(0));
CPPUNIT_ASSERT_EQUAL(aDocStat.nChar, static_cast<sal_uLong>(0));
}
+
+ //See https://bugs.freedesktop.org/show_bug.cgi?id=38983
+ {
+ SwDocStat aDocStat;
+
+ rtl::OUString sTemplate("ThisXis a test.");
+
+ m_pDoc->AppendTxtNode(*aPaM.GetPoint());
+ m_pDoc->InsertString(aPaM, sTemplate.replace('X', ' '));
+ pTxtNode = aPaM.GetNode()->GetTxtNode();
+ pTxtNode->CountWords(aDocStat, 0, pTxtNode->Len());
+ CPPUNIT_ASSERT(aDocStat.nWord == 4 &&
+ aDocStat.nCharExcludingSpaces == 12 &&
+ aDocStat.nChar == 15);
+ aDocStat.Reset();
+
+ m_pDoc->AppendTxtNode(*aPaM.GetPoint());
+ m_pDoc->InsertString(aPaM, sTemplate.replaceAll(rtl::OUString('X'), rtl::OUString(" = ")));
+ pTxtNode = aPaM.GetNode()->GetTxtNode();
+ pTxtNode->CountWords(aDocStat, 0, pTxtNode->Len());
+ CPPUNIT_ASSERT(aDocStat.nWord == 5 &&
+ aDocStat.nCharExcludingSpaces == 13 &&
+ aDocStat.nChar == 17);
+ aDocStat.Reset();
+
+ m_pDoc->AppendTxtNode(*aPaM.GetPoint());
+ m_pDoc->InsertString(aPaM, sTemplate.replaceAll(rtl::OUString('X'), rtl::OUString(" _ ")));
+ pTxtNode = aPaM.GetNode()->GetTxtNode();
+ pTxtNode->CountWords(aDocStat, 0, pTxtNode->Len());
+ CPPUNIT_ASSERT(aDocStat.nWord == 5 &&
+ aDocStat.nCharExcludingSpaces == 13 &&
+ aDocStat.nChar == 17);
+ aDocStat.Reset();
+
+ m_pDoc->AppendTxtNode(*aPaM.GetPoint());
+ m_pDoc->InsertString(aPaM, sTemplate.replaceAll(rtl::OUString('X'), rtl::OUString(" -- ")));
+ pTxtNode = aPaM.GetNode()->GetTxtNode();
+ pTxtNode->CountWords(aDocStat, 0, pTxtNode->Len());
+ CPPUNIT_ASSERT(aDocStat.nWord == 5 &&
+ aDocStat.nCharExcludingSpaces == 14 &&
+ aDocStat.nChar == 18);
+ aDocStat.Reset();
+
+ m_pDoc->AppendTxtNode(*aPaM.GetPoint());
+ m_pDoc->InsertString(aPaM, sTemplate.replace('X', '_'));
+ pTxtNode = aPaM.GetNode()->GetTxtNode();
+ pTxtNode->CountWords(aDocStat, 0, pTxtNode->Len());
+ CPPUNIT_ASSERT(aDocStat.nWord == 3 &&
+ aDocStat.nCharExcludingSpaces == 13 &&
+ aDocStat.nChar == 15);
+ aDocStat.Reset();
+
+ m_pDoc->AppendTxtNode(*aPaM.GetPoint());
+ m_pDoc->InsertString(aPaM, sTemplate.replace('X', '-'));
+ pTxtNode = aPaM.GetNode()->GetTxtNode();
+ pTxtNode->CountWords(aDocStat, 0, pTxtNode->Len());
+ CPPUNIT_ASSERT(aDocStat.nWord == 3 &&
+ aDocStat.nCharExcludingSpaces == 13 &&
+ aDocStat.nChar == 15);
+ aDocStat.Reset();
+
+ m_pDoc->AppendTxtNode(*aPaM.GetPoint());
+ m_pDoc->InsertString(aPaM, sTemplate.replace('X', 0x2012));
+ pTxtNode = aPaM.GetNode()->GetTxtNode();
+ pTxtNode->CountWords(aDocStat, 0, pTxtNode->Len());
+ CPPUNIT_ASSERT(aDocStat.nWord == 3 &&
+ aDocStat.nCharExcludingSpaces == 13 &&
+ aDocStat.nChar == 15);
+ aDocStat.Reset();
+
+ m_pDoc->AppendTxtNode(*aPaM.GetPoint());
+ m_pDoc->InsertString(aPaM, sTemplate.replace('X', 0x2015));
+ pTxtNode = aPaM.GetNode()->GetTxtNode();
+ pTxtNode->CountWords(aDocStat, 0, pTxtNode->Len());
+ CPPUNIT_ASSERT(aDocStat.nWord == 3 &&
+ aDocStat.nCharExcludingSpaces == 13 &&
+ aDocStat.nChar == 15);
+ aDocStat.Reset();
+
+ //But default configuration should, msword-alike treak emdash
+ //and endash as word seperators for word-counting
+ m_pDoc->AppendTxtNode(*aPaM.GetPoint());
+ m_pDoc->InsertString(aPaM, sTemplate.replace('X', 0x2013));
+ pTxtNode = aPaM.GetNode()->GetTxtNode();
+ pTxtNode->CountWords(aDocStat, 0, pTxtNode->Len());
+ CPPUNIT_ASSERT(aDocStat.nWord == 4 &&
+ aDocStat.nCharExcludingSpaces == 13 &&
+ aDocStat.nChar == 15);
+ aDocStat.Reset();
+
+ m_pDoc->AppendTxtNode(*aPaM.GetPoint());
+ m_pDoc->InsertString(aPaM, sTemplate.replace('X', 0x2014));
+ pTxtNode = aPaM.GetNode()->GetTxtNode();
+ pTxtNode->CountWords(aDocStat, 0, pTxtNode->Len());
+ CPPUNIT_ASSERT(aDocStat.nWord == 4 &&
+ aDocStat.nCharExcludingSpaces == 13 &&
+ aDocStat.nChar == 15);
+ aDocStat.Reset();
+
+ const sal_Unicode aChunk[] = {' ', 0x2013, ' '};
+ rtl::OUString sChunk(aChunk, SAL_N_ELEMENTS(aChunk));
+ m_pDoc->AppendTxtNode(*aPaM.GetPoint());
+ m_pDoc->InsertString(aPaM, sTemplate.replaceAll(rtl::OUString('X'), sChunk));
+ pTxtNode = aPaM.GetNode()->GetTxtNode();
+ pTxtNode->CountWords(aDocStat, 0, pTxtNode->Len());
+ CPPUNIT_ASSERT(aDocStat.nWord == 4 &&
+ aDocStat.nCharExcludingSpaces == 13 &&
+ aDocStat.nChar == 17);
+ aDocStat.Reset();
+ }
}
//See https://bugs.freedesktop.org/show_bug.cgi?id=40599
diff --git a/sw/source/core/txtnode/txtedt.cxx b/sw/source/core/txtnode/txtedt.cxx
index e37dada60149..40e6dc28f42a 100644
--- a/sw/source/core/txtnode/txtedt.cxx
+++ b/sw/source/core/txtnode/txtedt.cxx
@@ -37,6 +37,7 @@
#include <editeng/hangulhanja.hxx>
#include <SwSmartTagMgr.hxx>
#include <linguistic/lngprops.hxx>
+#include <officecfg/Office/Writer.hxx>
#include <unotools/transliterationwrapper.hxx>
#include <unotools/charclass.hxx>
#include <dlelstnr.hxx>
@@ -655,12 +656,44 @@ XubString SwTxtNode::GetCurWord( xub_StrLen nPos ) const
SwScanner::SwScanner( const SwTxtNode& rNd, const rtl::OUString& rTxt,
const LanguageType* pLang, const ModelToViewHelper& rConvMap,
sal_uInt16 nType, sal_Int32 nStart, sal_Int32 nEnde, sal_Bool bClp )
- : rNode( rNd ), aText( rTxt), pLanguage( pLang ), rConversionMap( rConvMap ), nLen( 0 ), nWordType( nType ), bClip( bClp )
+ : rNode( rNd )
+ , aPreDashReplacementText(rTxt)
+ , pLanguage( pLang )
+ , rConversionMap( rConvMap )
+ , nLen( 0 )
+ , nOverriddenDashCount( 0 )
+ , nWordType( nType )
+ , bClip( bClp )
{
- OSL_ENSURE( !aText.isEmpty(), "SwScanner: EmptyString" );
+ OSL_ENSURE( !aPreDashReplacementText.isEmpty(), "SwScanner: EmptyString" );
nStartPos = nBegin = nStart;
nEndPos = nEnde;
+ //MSWord f.e has special emdash and endash behaviour in that they break
+ //words for the purposes of word counting, while a hyphen etc. doesn't.
+ //
+ //The default configuration treats emdash/endash as a word break, but
+ //additional ones can be added in under tools->options
+ if (nWordType == i18n::WordType::WORD_COUNT)
+ {
+ rtl::OUString sDashes = officecfg::Office::Writer::WordCount::AdditionalSeperators::get();
+ rtl::OUStringBuffer aBuf(aPreDashReplacementText);
+ for (sal_Int32 i = nStartPos; i < nEndPos; ++i)
+ {
+ sal_Unicode cChar = aBuf[i];
+ if (sDashes.indexOf(cChar) != -1)
+ {
+ aBuf[i] = ' ';
+ ++nOverriddenDashCount;
+ }
+ }
+ aText = aBuf.makeStringAndClear();
+ }
+ else
+ aText = aPreDashReplacementText;
+
+ assert(aPreDashReplacementText.getLength() == aText.getLength());
+
if ( pLanguage )
{
aCurrLang = *pLanguage;
@@ -836,7 +869,7 @@ sal_Bool SwScanner::NextWord()
if ( nWordType == i18n::WordType::WORD_COUNT )
nLen = forceEachAsianCodePointToWord(aText, nBegin, nLen);
- aWord = aText.copy( nBegin, nLen );
+ aWord = aPreDashReplacementText.copy( nBegin, nLen );
return sal_True;
}
@@ -1892,30 +1925,35 @@ void SwTxtNode::CountWords( SwDocStat& rStat,
sal_uInt32 nTmpCharsExcludingSpaces = 0; // all non-white chars
// count words in masked and expanded text:
- if (!aExpandText.isEmpty() && pBreakIt->GetBreakIter().is())
+ if (!aExpandText.isEmpty())
{
- // zero is NULL for pLanguage -----------v last param = true for clipping
- SwScanner aScanner( *this, aExpandText, 0, aConversionMap, i18n::WordType::WORD_COUNT,
- nExpandBegin, nExpandEnd, true );
+ if (pBreakIt->GetBreakIter().is())
+ {
+ // zero is NULL for pLanguage -----------v last param = true for clipping
+ SwScanner aScanner( *this, aExpandText, 0, aConversionMap, i18n::WordType::WORD_COUNT,
+ nExpandBegin, nExpandEnd, true );
- // used to filter out scanner returning almost empty strings (len=1; unichar=0x0001)
- const rtl::OUString aBreakWord( CH_TXTATR_BREAKWORD );
+ // used to filter out scanner returning almost empty strings (len=1; unichar=0x0001)
+ const rtl::OUString aBreakWord( CH_TXTATR_BREAKWORD );
- while ( aScanner.NextWord() )
- {
- // 1 is len(CH_TXTATR_BREAKWORD) : match returns length of match
- if( 1 != aExpandText.match(aBreakWord, aScanner.GetBegin() ))
+ while ( aScanner.NextWord() )
{
- ++nTmpWords;
- const rtl::OUString &rWord = aScanner.GetWord();
- if (pBreakIt->GetBreakIter()->getScriptType(rWord, 0) == i18n::ScriptType::ASIAN)
- ++nTmpAsianWords;
- nTmpCharsExcludingSpaces += pBreakIt->getGraphemeCount(rWord);
+ // 1 is len(CH_TXTATR_BREAKWORD) : match returns length of match
+ if( 1 != aExpandText.match(aBreakWord, aScanner.GetBegin() ))
+ {
+ ++nTmpWords;
+ const rtl::OUString &rWord = aScanner.GetWord();
+ if (pBreakIt->GetBreakIter()->getScriptType(rWord, 0) == i18n::ScriptType::ASIAN)
+ ++nTmpAsianWords;
+ nTmpCharsExcludingSpaces += pBreakIt->getGraphemeCount(rWord);
+ }
}
+
+ nTmpCharsExcludingSpaces += aScanner.getOverriddenDashCount();
}
- }
- nTmpChars = pBreakIt->getGraphemeCount(aExpandText, nExpandBegin, nExpandEnd);
+ nTmpChars = pBreakIt->getGraphemeCount(aExpandText, nExpandBegin, nExpandEnd);
+ }
// no nTmpCharsExcludingSpaces adjust needed neither for blanked out MaskedChars
// nor for mid-word selection - set scanner bClip = true at creation
@@ -1938,6 +1976,7 @@ void SwTxtNode::CountWords( SwDocStat& rStat,
nTmpCharsExcludingSpaces += pBreakIt->getGraphemeCount(rWord);
}
+ nTmpCharsExcludingSpaces += aScanner.getOverriddenDashCount();
nTmpChars += pBreakIt->getGraphemeCount(sNumString);
}
else if ( bHasBullet )
diff --git a/sw/source/ui/config/optload.cxx b/sw/source/ui/config/optload.cxx
index e2d519a62651..760cf2035103 100644
--- a/sw/source/ui/config/optload.cxx
+++ b/sw/source/ui/config/optload.cxx
@@ -26,6 +26,7 @@
*
************************************************************************/
+#include <officecfg/Office/Writer.hxx>
#include <comphelper/string.hxx>
#include <tools/shl.hxx>
#include <swtypes.hxx>
@@ -88,7 +89,10 @@ SwLoadOptPage::SwLoadOptPage( Window* pParent, const SfxItemSet& rSet ) :
aTabFT ( this, SW_RES( FT_TAB ) ),
aTabMF ( this, SW_RES( MF_TAB ) ),
aUseSquaredPageMode ( this, SW_RES( CB_USE_SQUARE_PAGE_MODE ) ),
- aUseCharUnit ( this , SW_RES( CB_USE_CHAR_UNIT ) ),
+ aUseCharUnit ( this , SW_RES( CB_USE_CHAR_UNIT ) ),
+ aWordCountFL ( this , SW_RES( FL_WORDCOUNT ) ),
+ aWordCountFT ( this , SW_RES( FT_WORDCOUNT ) ),
+ aWordCountED ( this , SW_RES( ED_WORDCOUNT ) ),
pWrtShell ( NULL ),
bHTMLMode ( sal_False ),
@@ -131,10 +135,10 @@ SwLoadOptPage::SwLoadOptPage( Window* pParent, const SfxItemSet& rSet ) :
SvtCJKOptions aCJKOptions;
if(!aCJKOptions.IsAsianTypographyEnabled())
- {
+ {
aUseSquaredPageMode.Hide();
- aUseCharUnit.Hide();
- }
+ aUseCharUnit.Hide();
+ }
}
SwLoadOptPage::~SwLoadOptPage()
@@ -209,6 +213,15 @@ sal_Bool SwLoadOptPage::FillItemSet( SfxItemSet& rSet )
bRet = sal_True;
}
+ if (aWordCountED.GetText() != aWordCountED.GetSavedValue())
+ {
+ boost::shared_ptr< comphelper::ConfigurationChanges > batch(
+ comphelper::ConfigurationChanges::create());
+ officecfg::Office::Writer::WordCount::AdditionalSeperators::set(aWordCountED.GetText(), batch);
+ batch->commit();
+ bRet = sal_True;
+ }
+
sal_Bool bIsSquaredPageModeFlag = aUseSquaredPageMode.IsChecked();
if ( bIsSquaredPageModeFlag != aUseSquaredPageMode.GetSavedValue() )
{
@@ -304,6 +317,9 @@ void SwLoadOptPage::Reset( const SfxItemSet& rSet)
aUseCharUnit.Check(pUsrPref->IsApplyCharUnit());
}
aUseCharUnit.SaveValue();
+
+ aWordCountED.SetText(officecfg::Office::Writer::WordCount::AdditionalSeperators::get());
+ aWordCountED.SaveValue();
}
IMPL_LINK_NOARG(SwLoadOptPage, MetricHdl)
diff --git a/sw/source/ui/config/optload.hrc b/sw/source/ui/config/optload.hrc
index 2d123c07ffa1..8ee917e93875 100644
--- a/sw/source/ui/config/optload.hrc
+++ b/sw/source/ui/config/optload.hrc
@@ -32,7 +32,10 @@
#define FT_TAB 21
#define MF_TAB 22
#define CB_USE_SQUARE_PAGE_MODE 23
-#define CB_USE_CHAR_UNIT 24
+#define CB_USE_CHAR_UNIT 24
+#define FL_WORDCOUNT 25
+#define FT_WORDCOUNT 26
+#define ED_WORDCOUNT 27
// SwCaptionOptPage -----------------------------
diff --git a/sw/source/ui/config/optload.src b/sw/source/ui/config/optload.src
index 8c7089a83968..5dc8dd05fd2c 100644
--- a/sw/source/ui/config/optload.src
+++ b/sw/source/ui/config/optload.src
@@ -142,14 +142,30 @@ TabPage TP_OPTLOAD_PAGE
Size = MAP_APPFONT ( 248 , 10 ) ;
Text [ en-US ] = "Use square page mode for text grid";
};
-
CheckBox CB_USE_CHAR_UNIT
{
Pos = MAP_APPFONT ( 12 , 130) ;
Size = MAP_APPFONT ( 109 , 10 ) ;
Text [ en-US ] = "Enable char unit";
};
-
+ FixedLine FL_WORDCOUNT
+ {
+ Pos = MAP_APPFONT ( 6 , 144 ) ;
+ Size = MAP_APPFONT ( 248 , 8 ) ;
+ Text [ en-US ] = "Word Count";
+ };
+ FixedText FT_WORDCOUNT
+ {
+ Pos = MAP_APPFONT ( 12 , 157 ) ;
+ Size = MAP_APPFONT ( 80 , 8 ) ;
+ Text [ en-US ] = "Additional separators";
+ };
+ Edit ED_WORDCOUNT
+ {
+ Pos = MAP_APPFONT ( 95 , 155 ) ;
+ Size = MAP_APPFONT ( 159 , 12 ) ;
+ Border = TRUE ;
+ };
};
TabPage TP_OPTCAPTION_PAGE
diff --git a/sw/source/ui/inc/optload.hxx b/sw/source/ui/inc/optload.hxx
index 88c04b0fe310..6889c227d3fb 100644
--- a/sw/source/ui/inc/optload.hxx
+++ b/sw/source/ui/inc/optload.hxx
@@ -55,6 +55,9 @@ private:
MetricField aTabMF;
CheckBox aUseSquaredPageMode;
CheckBox aUseCharUnit;
+ FixedLine aWordCountFL;
+ FixedText aWordCountFT;
+ Edit aWordCountED;
SwWrtShell* pWrtShell;
sal_Bool bHTMLMode;