#104501# add support for Non Space Mark script type

author: Karl Hong <khong@openoffice.org> 2002-11-05 22:35:42 +0000
committer: Karl Hong <khong@openoffice.org> 2002-11-05 22:35:42 +0000
commit: 99f7cc47069e7a71ab015e5348071058cf78633d (patch)
tree: 0c2c34f4af60d51cd9e92fa74adcd29c21eafad8 /i18npool/source/characterclassification/cclass_unicode_parser.cxx
parent: 87f8f9d0d48ee6f9519f76882f7a8e4ed1b98af5 (diff)
1 files changed, 27 insertions, 11 deletions
diff --git a/i18npool/source/characterclassification/cclass_unicode_parser.cxx b/i18npool/source/characterclassification/cclass_unicode_parser.cxx
index 1564ab191fa0..cf1cf1f2be03 100644
--- a/i18npool/source/characterclassification/cclass_unicode_parser.cxx
+++ b/i18npool/source/characterclassification/cclass_unicode_parser.cxx
@@ -2,9 +2,9 @@
  *
  *  $RCSfile: cclass_unicode_parser.cxx,v $
  *
- *  $Revision: 1.5 $
+ *  $Revision: 1.6 $
  *
- *  last change: $Author: er $ $Date: 2002-09-20 16:12:55 $
+ *  last change: $Author: khong $ $Date: 2002-11-05 23:35:42 $
  *
  *  The Contents of this file are made available subject to the terms of
  *  either of the following licenses
@@ -409,8 +409,9 @@ const sal_Unicode* cclass_Unicode::StrChr( const sal_Unicode* pStr, sal_Unicode
 }
 
 
-sal_Int32 cclass_Unicode::getParseTokensType( sal_Unicode c )
+sal_Int32 cclass_Unicode::getParseTokensType( const sal_Unicode* aStr, sal_Int32 nPos )
 {
+    sal_Unicode c = aStr[nPos];
     if ( c < nDefCnt )
         return pParseTokensType[ sal_uInt8(c) ];
     else
@@ -432,6 +433,10 @@ sal_Int32 cclass_Unicode::getParseTokensType( sal_Unicode c )
                 return KParseTokens::UNI_MODIFIER_LETTER;
             break;
             case UnicodeType::OTHER_LETTER :
+                // Non_Spacing_Mark could not be as leading character
+                if (nPos == 0) break;
+                // fall through, treat it as Other_Letter.
+            case UnicodeType::NON_SPACING_MARK :
                 return KParseTokens::UNI_OTHER_LETTER;
             break;
             case UnicodeType::DECIMAL_DIGIT_NUMBER :
@@ -640,13 +645,14 @@ void cclass_Unicode::destroyParserTable()
 }
 
 
-UPT_FLAG_TYPE cclass_Unicode::getFlags( sal_Unicode c )
+UPT_FLAG_TYPE cclass_Unicode::getFlags( const sal_Unicode* aStr, sal_Int32 nPos )
 {
     UPT_FLAG_TYPE nMask;
+    sal_Unicode c = aStr[nPos];
     if ( c < nDefCnt )
         nMask = pTable[ sal_uInt8(c) ];
     else
-        nMask = getFlagsExtended( c );
+        nMask = getFlagsExtended( aStr, nPos );
     switch ( eState )
     {
         case ssGetChar :
@@ -674,8 +680,9 @@ UPT_FLAG_TYPE cclass_Unicode::getFlags( sal_Unicode c )
 }
 
 
-UPT_FLAG_TYPE cclass_Unicode::getFlagsExtended( sal_Unicode c )
+UPT_FLAG_TYPE cclass_Unicode::getFlagsExtended( const sal_Unicode* aStr, sal_Int32 nPos )
 {
+    sal_Unicode c = aStr[nPos];
     if ( c == cGroupSep )
         return TOKEN_VALUE;
     else if ( c == cDecimalSep )
@@ -708,6 +715,10 @@ UPT_FLAG_TYPE cclass_Unicode::getFlagsExtended( sal_Unicode c )
                 (bStart ? TOKEN_CHAR_WORD : TOKEN_WORD) :
                 TOKEN_ILLEGAL;
         break;
+        case UnicodeType::NON_SPACING_MARK :
+            // Non_Spacing_Mark could not be as leading character
+            if (nPos == 0) break;
+            // fall through, treat it as Other_Letter.
         case UnicodeType::OTHER_LETTER :
             return (nTypes & KParseTokens::UNI_OTHER_LETTER) ?
                 (bStart ? TOKEN_CHAR_WORD : TOKEN_WORD) :
@@ -775,6 +786,7 @@ void cclass_Unicode::parseText( ParseResult& r, const OUString& rText, sal_Int32
     const sal_Unicode* pSrc = pSym;
     OUString aSymbol;
     sal_Unicode c = *pSrc;
+    sal_Int32 nPosition = 0;
     sal_Unicode cLast = 0;
     int nDecSeps = 0;
     BOOL bQuote = FALSE;
@@ -784,8 +796,7 @@ void cclass_Unicode::parseText( ParseResult& r, const OUString& rText, sal_Int32
 
     while ( (c != 0) && (eState != ssStop) )
     {
-        pSrc++;
-        UPT_FLAG_TYPE nMask = getFlags( c );
+        UPT_FLAG_TYPE nMask = getFlags( pStart, nPosition );
         if ( nMask & TOKEN_EXCLUDED )
             eState = ssBounce;
         if ( bMightBeWord )
@@ -795,7 +806,9 @@ void cclass_Unicode::parseText( ParseResult& r, const OUString& rText, sal_Int32
             else
                 bMightBeWord = ((nMask & TOKEN_WORD) != 0);
         }
-        sal_Int32 nParseTokensType = getParseTokensType( c );
+        sal_Int32 nParseTokensType = getParseTokensType( pStart, nPosition );
+        pSrc++;
+        nPosition++;
         switch (eState)
         {
             case ssGetChar :
@@ -889,7 +902,7 @@ void cclass_Unicode::parseText( ParseResult& r, const OUString& rText, sal_Int32
                 }
                 else if ( c == 'E' || c == 'e' )
                 {
-                    UPT_FLAG_TYPE nNext = getFlags( *pSrc );
+                    UPT_FLAG_TYPE nNext = getFlags( pStart, nPosition );
                     if ( nNext & TOKEN_VALUE_EXP )
                         ;   // keep it going
                     else if ( bMightBeWord && ((nNext & TOKEN_WORD) || !*pSrc) )
@@ -904,7 +917,7 @@ void cclass_Unicode::parseText( ParseResult& r, const OUString& rText, sal_Int32
                 {
                     if ( (cLast == 'E') || (cLast == 'e') )
                     {
-                        UPT_FLAG_TYPE nNext = getFlags( *pSrc );
+                        UPT_FLAG_TYPE nNext = getFlags( pStart, nPosition );
                         if ( nNext & TOKEN_VALUE_EXP_VALUE )
                             ;   // keep it going
                         else if ( bMightBeWord && ((nNext & TOKEN_WORD) || !*pSrc) )
@@ -978,6 +991,7 @@ void cclass_Unicode::parseText( ParseResult& r, const OUString& rText, sal_Int32
                     {   // "" => literal " escaped
                         aSymbol += OUString( pSym, pSrc - pSym );
                         pSrc++;
+                        nPosition++;
                     }
                     else
                     {
@@ -1004,6 +1018,7 @@ void cclass_Unicode::parseText( ParseResult& r, const OUString& rText, sal_Int32
             pSrc = pSym;
             aSymbol = OUString();
             c = *pSrc;
+        nPosition = 0;
             cLast = 0;
             nDecSeps = 0;
             bQuote = FALSE;
@@ -1032,6 +1047,7 @@ void cclass_Unicode::parseText( ParseResult& r, const OUString& rText, sal_Int32
             if ( eState == ssStopBack )
             {   // put back
                 pSrc--;
+        nPosition--;
                 bMightBeWord = bMightBeWordLast;
                 eState = ssStop;
             }
author	Karl Hong <khong@openoffice.org>	2002-11-05 22:35:42 +0000
committer	Karl Hong <khong@openoffice.org>	2002-11-05 22:35:42 +0000
commit	99f7cc47069e7a71ab015e5348071058cf78633d (patch)
tree	0c2c34f4af60d51cd9e92fa74adcd29c21eafad8 /i18npool/source/characterclassification/cclass_unicode_parser.cxx
parent	87f8f9d0d48ee6f9519f76882f7a8e4ed1b98af5 (diff)