summaryrefslogtreecommitdiff
path: root/i18npool
diff options
context:
space:
mode:
Diffstat (limited to 'i18npool')
-rw-r--r--i18npool/qa/cppunit/test_breakiterator.cxx16
-rw-r--r--i18npool/source/breakiterator/breakiterator_cjk.cxx19
2 files changed, 35 insertions, 0 deletions
diff --git a/i18npool/qa/cppunit/test_breakiterator.cxx b/i18npool/qa/cppunit/test_breakiterator.cxx
index 98a0bca96a77..552274864035 100644
--- a/i18npool/qa/cppunit/test_breakiterator.cxx
+++ b/i18npool/qa/cppunit/test_breakiterator.cxx
@@ -158,6 +158,22 @@ void TestBreakIterator::testLineBreaking()
(void)m_xBreak->getLineBreak(aTest, 0, aLocale, 0, aHyphOptions, aUserOptions);
}
}
+
+ //See https://bugs.documentfoundation.org/show_bug.cgi?id=96197
+ {
+ const sal_Unicode HANGUL[] = { 0xc560, 0xad6D, 0xac00, 0xc758, 0x0020, 0xac00,
+ 0xc0ac, 0xb294};
+ OUString aTest(HANGUL, SAL_N_ELEMENTS(HANGUL));
+
+ aLocale.Language = "ko";
+ aLocale.Country = "KR";
+
+ {
+ i18n::LineBreakResults aResult = m_xBreak->getLineBreak(aTest, aTest.getLength()-2, aLocale, 0,
+ aHyphOptions, aUserOptions);
+ CPPUNIT_ASSERT_EQUAL_MESSAGE("Expected a break don't split the Korean word!", static_cast<sal_Int32>(5), aResult.breakIndex);
+ }
+ }
}
//See https://bugs.libreoffice.org/show_bug.cgi?id=49629
diff --git a/i18npool/source/breakiterator/breakiterator_cjk.cxx b/i18npool/source/breakiterator/breakiterator_cjk.cxx
index 98115e6c1d32..8a4244631759 100644
--- a/i18npool/source/breakiterator/breakiterator_cjk.cxx
+++ b/i18npool/source/breakiterator/breakiterator_cjk.cxx
@@ -86,6 +86,8 @@ BreakIterator_CJK::getWordBoundary( const OUString& text, sal_Int32 anyPos,
return BreakIterator_Unicode::getWordBoundary(text, anyPos, nLocale, wordType, bDirection);
}
+#define isHangul(cCh) ((cCh>=0xAC00&&cCh<=0xD7AF)||(cCh>=0x1100&&cCh<=0x11FF))
+
LineBreakResults SAL_CALL BreakIterator_CJK::getLineBreak(
const OUString& Text, sal_Int32 nStartPos,
const css::lang::Locale& /*rLocale*/, sal_Int32 /*nMinBreakPos*/,
@@ -94,17 +96,34 @@ LineBreakResults SAL_CALL BreakIterator_CJK::getLineBreak(
{
LineBreakResults lbr;
+ const sal_Int32 nOldStartPos = nStartPos;
+
if (bOptions.allowPunctuationOutsideMargin &&
hangingCharacters.indexOf(Text[nStartPos]) != -1 &&
(Text.iterateCodePoints( &nStartPos ), nStartPos == Text.getLength())) {
; // do nothing
} else if (bOptions.applyForbiddenRules && 0 < nStartPos && nStartPos < Text.getLength()) {
+
while (nStartPos > 0 &&
(bOptions.forbiddenBeginCharacters.indexOf(Text[nStartPos]) != -1 ||
bOptions.forbiddenEndCharacters.indexOf(Text[nStartPos-1]) != -1))
Text.iterateCodePoints( &nStartPos, -1);
}
+ // Prevent cutting Korean words in the middle.
+ if ( nOldStartPos == nStartPos && isHangul( Text[nStartPos] ) )
+ {
+ while ( nStartPos >= 0 && isHangul( Text[nStartPos] ) )
+ --nStartPos;
+
+ // beginning of the last Korean word.
+ if ( nStartPos < nOldStartPos )
+ ++nStartPos;
+
+ if ( nStartPos == 0 )
+ nStartPos = nOldStartPos;
+ }
+
lbr.breakIndex = nStartPos;
lbr.breakType = BreakType::WORDBOUNDARY;
return lbr;