summaryrefslogtreecommitdiff
path: root/sw
diff options
context:
space:
mode:
authorCaolán McNamara <caolanm@redhat.com>2012-08-28 17:10:35 +0100
committerCaolán McNamara <caolanm@redhat.com>2012-08-29 09:02:50 +0100
commit42a15f45ff4e02f98229de02efd0d8c19f10bcd5 (patch)
tree67031948d50d251825c1d05d5547a499a1c5e51b /sw
parent02f6e55231c8b1646cbafc0e3e591da8122e2bf1 (diff)
Resolves: fdo#38983 allow extra word boundary characters
i.e. word overrides emdash and endash to be word boundary characters for the purposes of counting words. And there are some who want to treat =,- etc similarly. Default to a configuration that gives the same results as Word for word counting. Change-Id: Ia8ce6ac12011a1d6e547f11644c76163c4c993c5
Diffstat (limited to 'sw')
-rw-r--r--sw/inc/swscanner.hxx6
-rw-r--r--sw/qa/core/swdoc-test.cxx110
-rw-r--r--sw/source/core/txtnode/txtedt.cxx79
-rw-r--r--sw/source/ui/config/optload.cxx24
-rw-r--r--sw/source/ui/config/optload.hrc5
-rw-r--r--sw/source/ui/config/optload.src20
-rw-r--r--sw/source/ui/inc/optload.hxx3
7 files changed, 219 insertions, 28 deletions
diff --git a/sw/inc/swscanner.hxx b/sw/inc/swscanner.hxx
index f657f238c2ca..7a71c0a1e9e9 100644
--- a/sw/inc/swscanner.hxx
+++ b/sw/inc/swscanner.hxx
@@ -43,13 +43,15 @@ class SwScanner
{
rtl::OUString aWord;
const SwTxtNode& rNode;
- const rtl::OUString aText;
+ const rtl::OUString aPreDashReplacementText;
+ rtl::OUString aText;
const LanguageType* pLanguage;
const ModelToViewHelper& rConversionMap;
sal_Int32 nStartPos;
sal_Int32 nEndPos;
sal_Int32 nBegin;
sal_Int32 nLen;
+ sal_Int32 nOverriddenDashCount;
LanguageType aCurrLang;
sal_uInt16 nWordType;
sal_Bool bClip;
@@ -74,6 +76,8 @@ public:
sal_Int32 GetLen() const { return nLen; }
LanguageType GetCurrentLanguage() const {return aCurrLang;}
+
+ sal_Int32 getOverriddenDashCount() const {return nOverriddenDashCount; }
};
#endif
diff --git a/sw/qa/core/swdoc-test.cxx b/sw/qa/core/swdoc-test.cxx
index 54a1cffcd4cd..2f1bf8668824 100644
--- a/sw/qa/core/swdoc-test.cxx
+++ b/sw/qa/core/swdoc-test.cxx
@@ -567,6 +567,116 @@ void SwDocTest::testSwScanner()
CPPUNIT_ASSERT_EQUAL(aDocStat.nWord, static_cast<sal_uLong>(0));
CPPUNIT_ASSERT_EQUAL(aDocStat.nChar, static_cast<sal_uLong>(0));
}
+
+ //See https://bugs.freedesktop.org/show_bug.cgi?id=38983
+ {
+ SwDocStat aDocStat;
+
+ rtl::OUString sTemplate("ThisXis a test.");
+
+ m_pDoc->AppendTxtNode(*aPaM.GetPoint());
+ m_pDoc->InsertString(aPaM, sTemplate.replace('X', ' '));
+ pTxtNode = aPaM.GetNode()->GetTxtNode();
+ pTxtNode->CountWords(aDocStat, 0, pTxtNode->Len());
+ CPPUNIT_ASSERT(aDocStat.nWord == 4 &&
+ aDocStat.nCharExcludingSpaces == 12 &&
+ aDocStat.nChar == 15);
+ aDocStat.Reset();
+
+ m_pDoc->AppendTxtNode(*aPaM.GetPoint());
+ m_pDoc->InsertString(aPaM, sTemplate.replaceAll(rtl::OUString('X'), rtl::OUString(" = ")));
+ pTxtNode = aPaM.GetNode()->GetTxtNode();
+ pTxtNode->CountWords(aDocStat, 0, pTxtNode->Len());
+ CPPUNIT_ASSERT(aDocStat.nWord == 5 &&
+ aDocStat.nCharExcludingSpaces == 13 &&
+ aDocStat.nChar == 17);
+ aDocStat.Reset();
+
+ m_pDoc->AppendTxtNode(*aPaM.GetPoint());
+ m_pDoc->InsertString(aPaM, sTemplate.replaceAll(rtl::OUString('X'), rtl::OUString(" _ ")));
+ pTxtNode = aPaM.GetNode()->GetTxtNode();
+ pTxtNode->CountWords(aDocStat, 0, pTxtNode->Len());
+ CPPUNIT_ASSERT(aDocStat.nWord == 5 &&
+ aDocStat.nCharExcludingSpaces == 13 &&
+ aDocStat.nChar == 17);
+ aDocStat.Reset();
+
+ m_pDoc->AppendTxtNode(*aPaM.GetPoint());
+ m_pDoc->InsertString(aPaM, sTemplate.replaceAll(rtl::OUString('X'), rtl::OUString(" -- ")));
+ pTxtNode = aPaM.GetNode()->GetTxtNode();
+ pTxtNode->CountWords(aDocStat, 0, pTxtNode->Len());
+ CPPUNIT_ASSERT(aDocStat.nWord == 5 &&
+ aDocStat.nCharExcludingSpaces == 14 &&
+ aDocStat.nChar == 18);
+ aDocStat.Reset();
+
+ m_pDoc->AppendTxtNode(*aPaM.GetPoint());
+ m_pDoc->InsertString(aPaM, sTemplate.replace('X', '_'));
+ pTxtNode = aPaM.GetNode()->GetTxtNode();
+ pTxtNode->CountWords(aDocStat, 0, pTxtNode->Len());
+ CPPUNIT_ASSERT(aDocStat.nWord == 3 &&
+ aDocStat.nCharExcludingSpaces == 13 &&
+ aDocStat.nChar == 15);
+ aDocStat.Reset();
+
+ m_pDoc->AppendTxtNode(*aPaM.GetPoint());
+ m_pDoc->InsertString(aPaM, sTemplate.replace('X', '-'));
+ pTxtNode = aPaM.GetNode()->GetTxtNode();
+ pTxtNode->CountWords(aDocStat, 0, pTxtNode->Len());
+ CPPUNIT_ASSERT(aDocStat.nWord == 3 &&
+ aDocStat.nCharExcludingSpaces == 13 &&
+ aDocStat.nChar == 15);
+ aDocStat.Reset();
+
+ m_pDoc->AppendTxtNode(*aPaM.GetPoint());
+ m_pDoc->InsertString(aPaM, sTemplate.replace('X', 0x2012));
+ pTxtNode = aPaM.GetNode()->GetTxtNode();
+ pTxtNode->CountWords(aDocStat, 0, pTxtNode->Len());
+ CPPUNIT_ASSERT(aDocStat.nWord == 3 &&
+ aDocStat.nCharExcludingSpaces == 13 &&
+ aDocStat.nChar == 15);
+ aDocStat.Reset();
+
+ m_pDoc->AppendTxtNode(*aPaM.GetPoint());
+ m_pDoc->InsertString(aPaM, sTemplate.replace('X', 0x2015));
+ pTxtNode = aPaM.GetNode()->GetTxtNode();
+ pTxtNode->CountWords(aDocStat, 0, pTxtNode->Len());
+ CPPUNIT_ASSERT(aDocStat.nWord == 3 &&
+ aDocStat.nCharExcludingSpaces == 13 &&
+ aDocStat.nChar == 15);
+ aDocStat.Reset();
+
+ //But default configuration should, msword-alike treak emdash
+ //and endash as word seperators for word-counting
+ m_pDoc->AppendTxtNode(*aPaM.GetPoint());
+ m_pDoc->InsertString(aPaM, sTemplate.replace('X', 0x2013));
+ pTxtNode = aPaM.GetNode()->GetTxtNode();
+ pTxtNode->CountWords(aDocStat, 0, pTxtNode->Len());
+ CPPUNIT_ASSERT(aDocStat.nWord == 4 &&
+ aDocStat.nCharExcludingSpaces == 13 &&
+ aDocStat.nChar == 15);
+ aDocStat.Reset();
+
+ m_pDoc->AppendTxtNode(*aPaM.GetPoint());
+ m_pDoc->InsertString(aPaM, sTemplate.replace('X', 0x2014));
+ pTxtNode = aPaM.GetNode()->GetTxtNode();
+ pTxtNode->CountWords(aDocStat, 0, pTxtNode->Len());
+ CPPUNIT_ASSERT(aDocStat.nWord == 4 &&
+ aDocStat.nCharExcludingSpaces == 13 &&
+ aDocStat.nChar == 15);
+ aDocStat.Reset();
+
+ const sal_Unicode aChunk[] = {' ', 0x2013, ' '};
+ rtl::OUString sChunk(aChunk, SAL_N_ELEMENTS(aChunk));
+ m_pDoc->AppendTxtNode(*aPaM.GetPoint());
+ m_pDoc->InsertString(aPaM, sTemplate.replaceAll(rtl::OUString('X'), sChunk));
+ pTxtNode = aPaM.GetNode()->GetTxtNode();
+ pTxtNode->CountWords(aDocStat, 0, pTxtNode->Len());
+ CPPUNIT_ASSERT(aDocStat.nWord == 4 &&
+ aDocStat.nCharExcludingSpaces == 13 &&
+ aDocStat.nChar == 17);
+ aDocStat.Reset();
+ }
}
//See https://bugs.freedesktop.org/show_bug.cgi?id=40599
diff --git a/sw/source/core/txtnode/txtedt.cxx b/sw/source/core/txtnode/txtedt.cxx
index e37dada60149..40e6dc28f42a 100644
--- a/sw/source/core/txtnode/txtedt.cxx
+++ b/sw/source/core/txtnode/txtedt.cxx
@@ -37,6 +37,7 @@
#include <editeng/hangulhanja.hxx>
#include <SwSmartTagMgr.hxx>
#include <linguistic/lngprops.hxx>
+#include <officecfg/Office/Writer.hxx>
#include <unotools/transliterationwrapper.hxx>
#include <unotools/charclass.hxx>
#include <dlelstnr.hxx>
@@ -655,12 +656,44 @@ XubString SwTxtNode::GetCurWord( xub_StrLen nPos ) const
SwScanner::SwScanner( const SwTxtNode& rNd, const rtl::OUString& rTxt,
const LanguageType* pLang, const ModelToViewHelper& rConvMap,
sal_uInt16 nType, sal_Int32 nStart, sal_Int32 nEnde, sal_Bool bClp )
- : rNode( rNd ), aText( rTxt), pLanguage( pLang ), rConversionMap( rConvMap ), nLen( 0 ), nWordType( nType ), bClip( bClp )
+ : rNode( rNd )
+ , aPreDashReplacementText(rTxt)
+ , pLanguage( pLang )
+ , rConversionMap( rConvMap )
+ , nLen( 0 )
+ , nOverriddenDashCount( 0 )
+ , nWordType( nType )
+ , bClip( bClp )
{
- OSL_ENSURE( !aText.isEmpty(), "SwScanner: EmptyString" );
+ OSL_ENSURE( !aPreDashReplacementText.isEmpty(), "SwScanner: EmptyString" );
nStartPos = nBegin = nStart;
nEndPos = nEnde;
+ //MSWord f.e has special emdash and endash behaviour in that they break
+ //words for the purposes of word counting, while a hyphen etc. doesn't.
+ //
+ //The default configuration treats emdash/endash as a word break, but
+ //additional ones can be added in under tools->options
+ if (nWordType == i18n::WordType::WORD_COUNT)
+ {
+ rtl::OUString sDashes = officecfg::Office::Writer::WordCount::AdditionalSeperators::get();
+ rtl::OUStringBuffer aBuf(aPreDashReplacementText);
+ for (sal_Int32 i = nStartPos; i < nEndPos; ++i)
+ {
+ sal_Unicode cChar = aBuf[i];
+ if (sDashes.indexOf(cChar) != -1)
+ {
+ aBuf[i] = ' ';
+ ++nOverriddenDashCount;
+ }
+ }
+ aText = aBuf.makeStringAndClear();
+ }
+ else
+ aText = aPreDashReplacementText;
+
+ assert(aPreDashReplacementText.getLength() == aText.getLength());
+
if ( pLanguage )
{
aCurrLang = *pLanguage;
@@ -836,7 +869,7 @@ sal_Bool SwScanner::NextWord()
if ( nWordType == i18n::WordType::WORD_COUNT )
nLen = forceEachAsianCodePointToWord(aText, nBegin, nLen);
- aWord = aText.copy( nBegin, nLen );
+ aWord = aPreDashReplacementText.copy( nBegin, nLen );
return sal_True;
}
@@ -1892,30 +1925,35 @@ void SwTxtNode::CountWords( SwDocStat& rStat,
sal_uInt32 nTmpCharsExcludingSpaces = 0; // all non-white chars
// count words in masked and expanded text:
- if (!aExpandText.isEmpty() && pBreakIt->GetBreakIter().is())
+ if (!aExpandText.isEmpty())
{
- // zero is NULL for pLanguage -----------v last param = true for clipping
- SwScanner aScanner( *this, aExpandText, 0, aConversionMap, i18n::WordType::WORD_COUNT,
- nExpandBegin, nExpandEnd, true );
+ if (pBreakIt->GetBreakIter().is())
+ {
+ // zero is NULL for pLanguage -----------v last param = true for clipping
+ SwScanner aScanner( *this, aExpandText, 0, aConversionMap, i18n::WordType::WORD_COUNT,
+ nExpandBegin, nExpandEnd, true );
- // used to filter out scanner returning almost empty strings (len=1; unichar=0x0001)
- const rtl::OUString aBreakWord( CH_TXTATR_BREAKWORD );
+ // used to filter out scanner returning almost empty strings (len=1; unichar=0x0001)
+ const rtl::OUString aBreakWord( CH_TXTATR_BREAKWORD );
- while ( aScanner.NextWord() )
- {
- // 1 is len(CH_TXTATR_BREAKWORD) : match returns length of match
- if( 1 != aExpandText.match(aBreakWord, aScanner.GetBegin() ))
+ while ( aScanner.NextWord() )
{
- ++nTmpWords;
- const rtl::OUString &rWord = aScanner.GetWord();
- if (pBreakIt->GetBreakIter()->getScriptType(rWord, 0) == i18n::ScriptType::ASIAN)
- ++nTmpAsianWords;
- nTmpCharsExcludingSpaces += pBreakIt->getGraphemeCount(rWord);
+ // 1 is len(CH_TXTATR_BREAKWORD) : match returns length of match
+ if( 1 != aExpandText.match(aBreakWord, aScanner.GetBegin() ))
+ {
+ ++nTmpWords;
+ const rtl::OUString &rWord = aScanner.GetWord();
+ if (pBreakIt->GetBreakIter()->getScriptType(rWord, 0) == i18n::ScriptType::ASIAN)
+ ++nTmpAsianWords;
+ nTmpCharsExcludingSpaces += pBreakIt->getGraphemeCount(rWord);
+ }
}
+
+ nTmpCharsExcludingSpaces += aScanner.getOverriddenDashCount();
}
- }
- nTmpChars = pBreakIt->getGraphemeCount(aExpandText, nExpandBegin, nExpandEnd);
+ nTmpChars = pBreakIt->getGraphemeCount(aExpandText, nExpandBegin, nExpandEnd);
+ }
// no nTmpCharsExcludingSpaces adjust needed neither for blanked out MaskedChars
// nor for mid-word selection - set scanner bClip = true at creation
@@ -1938,6 +1976,7 @@ void SwTxtNode::CountWords( SwDocStat& rStat,
nTmpCharsExcludingSpaces += pBreakIt->getGraphemeCount(rWord);
}
+ nTmpCharsExcludingSpaces += aScanner.getOverriddenDashCount();
nTmpChars += pBreakIt->getGraphemeCount(sNumString);
}
else if ( bHasBullet )
diff --git a/sw/source/ui/config/optload.cxx b/sw/source/ui/config/optload.cxx
index e2d519a62651..760cf2035103 100644
--- a/sw/source/ui/config/optload.cxx
+++ b/sw/source/ui/config/optload.cxx
@@ -26,6 +26,7 @@
*
************************************************************************/
+#include <officecfg/Office/Writer.hxx>
#include <comphelper/string.hxx>
#include <tools/shl.hxx>
#include <swtypes.hxx>
@@ -88,7 +89,10 @@ SwLoadOptPage::SwLoadOptPage( Window* pParent, const SfxItemSet& rSet ) :
aTabFT ( this, SW_RES( FT_TAB ) ),
aTabMF ( this, SW_RES( MF_TAB ) ),
aUseSquaredPageMode ( this, SW_RES( CB_USE_SQUARE_PAGE_MODE ) ),
- aUseCharUnit ( this , SW_RES( CB_USE_CHAR_UNIT ) ),
+ aUseCharUnit ( this , SW_RES( CB_USE_CHAR_UNIT ) ),
+ aWordCountFL ( this , SW_RES( FL_WORDCOUNT ) ),
+ aWordCountFT ( this , SW_RES( FT_WORDCOUNT ) ),
+ aWordCountED ( this , SW_RES( ED_WORDCOUNT ) ),
pWrtShell ( NULL ),
bHTMLMode ( sal_False ),
@@ -131,10 +135,10 @@ SwLoadOptPage::SwLoadOptPage( Window* pParent, const SfxItemSet& rSet ) :
SvtCJKOptions aCJKOptions;
if(!aCJKOptions.IsAsianTypographyEnabled())
- {
+ {
aUseSquaredPageMode.Hide();
- aUseCharUnit.Hide();
- }
+ aUseCharUnit.Hide();
+ }
}
SwLoadOptPage::~SwLoadOptPage()
@@ -209,6 +213,15 @@ sal_Bool SwLoadOptPage::FillItemSet( SfxItemSet& rSet )
bRet = sal_True;
}
+ if (aWordCountED.GetText() != aWordCountED.GetSavedValue())
+ {
+ boost::shared_ptr< comphelper::ConfigurationChanges > batch(
+ comphelper::ConfigurationChanges::create());
+ officecfg::Office::Writer::WordCount::AdditionalSeperators::set(aWordCountED.GetText(), batch);
+ batch->commit();
+ bRet = sal_True;
+ }
+
sal_Bool bIsSquaredPageModeFlag = aUseSquaredPageMode.IsChecked();
if ( bIsSquaredPageModeFlag != aUseSquaredPageMode.GetSavedValue() )
{
@@ -304,6 +317,9 @@ void SwLoadOptPage::Reset( const SfxItemSet& rSet)
aUseCharUnit.Check(pUsrPref->IsApplyCharUnit());
}
aUseCharUnit.SaveValue();
+
+ aWordCountED.SetText(officecfg::Office::Writer::WordCount::AdditionalSeperators::get());
+ aWordCountED.SaveValue();
}
IMPL_LINK_NOARG(SwLoadOptPage, MetricHdl)
diff --git a/sw/source/ui/config/optload.hrc b/sw/source/ui/config/optload.hrc
index 2d123c07ffa1..8ee917e93875 100644
--- a/sw/source/ui/config/optload.hrc
+++ b/sw/source/ui/config/optload.hrc
@@ -32,7 +32,10 @@
#define FT_TAB 21
#define MF_TAB 22
#define CB_USE_SQUARE_PAGE_MODE 23
-#define CB_USE_CHAR_UNIT 24
+#define CB_USE_CHAR_UNIT 24
+#define FL_WORDCOUNT 25
+#define FT_WORDCOUNT 26
+#define ED_WORDCOUNT 27
// SwCaptionOptPage -----------------------------
diff --git a/sw/source/ui/config/optload.src b/sw/source/ui/config/optload.src
index 8c7089a83968..5dc8dd05fd2c 100644
--- a/sw/source/ui/config/optload.src
+++ b/sw/source/ui/config/optload.src
@@ -142,14 +142,30 @@ TabPage TP_OPTLOAD_PAGE
Size = MAP_APPFONT ( 248 , 10 ) ;
Text [ en-US ] = "Use square page mode for text grid";
};
-
CheckBox CB_USE_CHAR_UNIT
{
Pos = MAP_APPFONT ( 12 , 130) ;
Size = MAP_APPFONT ( 109 , 10 ) ;
Text [ en-US ] = "Enable char unit";
};
-
+ FixedLine FL_WORDCOUNT
+ {
+ Pos = MAP_APPFONT ( 6 , 144 ) ;
+ Size = MAP_APPFONT ( 248 , 8 ) ;
+ Text [ en-US ] = "Word Count";
+ };
+ FixedText FT_WORDCOUNT
+ {
+ Pos = MAP_APPFONT ( 12 , 157 ) ;
+ Size = MAP_APPFONT ( 80 , 8 ) ;
+ Text [ en-US ] = "Additional separators";
+ };
+ Edit ED_WORDCOUNT
+ {
+ Pos = MAP_APPFONT ( 95 , 155 ) ;
+ Size = MAP_APPFONT ( 159 , 12 ) ;
+ Border = TRUE ;
+ };
};
TabPage TP_OPTCAPTION_PAGE
diff --git a/sw/source/ui/inc/optload.hxx b/sw/source/ui/inc/optload.hxx
index 88c04b0fe310..6889c227d3fb 100644
--- a/sw/source/ui/inc/optload.hxx
+++ b/sw/source/ui/inc/optload.hxx
@@ -55,6 +55,9 @@ private:
MetricField aTabMF;
CheckBox aUseSquaredPageMode;
CheckBox aUseCharUnit;
+ FixedLine aWordCountFL;
+ FixedText aWordCountFT;
+ Edit aWordCountED;
SwWrtShell* pWrtShell;
sal_Bool bHTMLMode;