From a10df00bd4a2ae293b077701d32c720a63080ebf Mon Sep 17 00:00:00 2001
From: Eike Rathke <erack@redhat.com>
Date: Tue, 19 Feb 2019 22:37:55 +0100
Subject: Resolves: tdf#123575 treat Katakana middle dots as midletter

Treat
U+30FB KATAKANA MIDDLE DOT
U+FF65 HALFWIDTH KATAKANA MIDDLE DOT
like
U+00B7 MIDDLE DOT
and other U_WB_MIDLETTER.

Change-Id: Iea18e7582f29edf5d8c5bad047f97ed4088edf7c
Reviewed-on: https://gerrit.libreoffice.org/68032
Reviewed-by: Eike Rathke <erack@redhat.com>
Tested-by: Jenkins
---
 i18npool/source/characterclassification/cclass_unicode_parser.cxx | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

(limited to 'i18npool')

diff --git a/i18npool/source/characterclassification/cclass_unicode_parser.cxx b/i18npool/source/characterclassification/cclass_unicode_parser.cxx
index 29d69521954e..d3f18e7bbe27 100644
--- a/i18npool/source/characterclassification/cclass_unicode_parser.cxx
+++ b/i18npool/source/characterclassification/cclass_unicode_parser.cxx
@@ -654,8 +654,12 @@ ParserFlags cclass_Unicode::getFlagsExtended(sal_uInt32 const c)
         case U_OTHER_PUNCTUATION:
             // fdo#61754 Lets see (if we not at the start) if this is midletter
             // punctuation and allow it in a word if it is similarly to
-            // U_NON_SPACING_MARK
-            if (bStart || U_WB_MIDLETTER != u_getIntPropertyValue(c, UCHAR_WORD_BREAK))
+            // U_NON_SPACING_MARK, for example U+00B7 MIDDLE DOT.
+            // tdf#123575 for U+30FB KATAKANA MIDDLE DOT property is not
+            // U_WB_MIDLETTER but U_WB_KATAKANA instead, explicitly test that
+            // and U+FF65 HALFWIDTH KATAKANA MIDDLE DOT.
+            if (bStart || (U_WB_MIDLETTER != u_getIntPropertyValue(c, UCHAR_WORD_BREAK)
+                        && c != 0x30FB && c != 0xFF65))
                 return ParserFlags::ILLEGAL;
             else
             {
-- 
cgit