diff options
author | Andras Timar <atimar@suse.com> | 2013-02-12 15:39:09 +0100 |
---|---|---|
committer | Andras Timar <atimar@suse.com> | 2013-02-13 10:19:17 +0100 |
commit | 6e4fd7ba9f4a02e130e817aadf0b977b8b8b6262 (patch) | |
tree | 96b75ef5c5c98414477c1adc30a3e2d5c0a9d5d5 | |
parent | 4ab3d5bb6f6f095375c2eaf200dd285be516feda (diff) |
use u_isalpha() from ICU instead of home-grown solution
plus German comments were translated
Change-Id: Id9ff5d4835e4ea224c9e6232a1762822aa833d37
-rw-r--r-- | svtools/source/edit/syntaxhighlight.cxx | 217 |
1 files changed, 53 insertions, 164 deletions
diff --git a/svtools/source/edit/syntaxhighlight.cxx b/svtools/source/edit/syntaxhighlight.cxx index 7db7b722b660..ce3fcf153377 100644 --- a/svtools/source/edit/syntaxhighlight.cxx +++ b/svtools/source/edit/syntaxhighlight.cxx @@ -18,13 +18,12 @@ */ +#include <unicode/uchar.h> #include <svtools/syntaxhighlight.hxx> - -#include <unotools/charclass.hxx> #include <comphelper/string.hxx> // ########################################################################## -// ATTENTION: all these words needs to be in small caps +// ATTENTION: all these words need to be in lower case // ########################################################################## static const char* strListBasicKeyWords[] = { "access", @@ -232,111 +231,15 @@ extern "C" int CDECL compare_strings( const void *arg1, const void *arg2 ) namespace { - - class LetterTable - { - bool IsLetterTab[256]; - - public: - LetterTable( void ); - - inline bool isLetter( sal_Unicode c ) - { - bool bRet = (c < 256) ? IsLetterTab[c] : isLetterUnicode( c ); - return bRet; - } - bool isLetterUnicode( sal_Unicode c ); - }; - static bool isAlpha(sal_Unicode c) { if (comphelper::string::isalphaAscii(c)) return true; - static LetterTable aLetterTable; - return aLetterTable.isLetter(c); + return u_isalpha(c); } } -LetterTable::LetterTable( void ) -{ - for( int i = 0 ; i < 256 ; ++i ) - IsLetterTab[i] = false; - - IsLetterTab[0xC0] = true; // ?, CAPITAL LETTER A WITH GRAVE ACCENT - IsLetterTab[0xC1] = true; // ?, CAPITAL LETTER A WITH ACUTE ACCENT - IsLetterTab[0xC2] = true; // ?, CAPITAL LETTER A WITH CIRCUMFLEX ACCENT - IsLetterTab[0xC3] = true; // ?, CAPITAL LETTER A WITH TILDE - IsLetterTab[0xC4] = true; // ?, CAPITAL LETTER A WITH DIAERESIS - IsLetterTab[0xC5] = true; // ?, CAPITAL LETTER A WITH RING ABOVE - IsLetterTab[0xC6] = true; // ?, CAPITAL LIGATURE AE - IsLetterTab[0xC7] = true; // ?, CAPITAL LETTER C WITH CEDILLA - IsLetterTab[0xC8] = true; // ?, CAPITAL LETTER E WITH GRAVE ACCENT - IsLetterTab[0xC9] = true; // ?, CAPITAL LETTER E WITH ACUTE ACCENT - IsLetterTab[0xCA] = true; // ?, CAPITAL LETTER E WITH CIRCUMFLEX ACCENT - IsLetterTab[0xCB] = true; // ?, CAPITAL LETTER E WITH DIAERESIS - IsLetterTab[0xCC] = true; // ?, CAPITAL LETTER I WITH GRAVE ACCENT - IsLetterTab[0xCD] = true; // ?, CAPITAL LETTER I WITH ACUTE ACCENT - IsLetterTab[0xCE] = true; // ?, CAPITAL LETTER I WITH CIRCUMFLEX ACCENT - IsLetterTab[0xCF] = true; // ?, CAPITAL LETTER I WITH DIAERESIS - IsLetterTab[0xD0] = true; // ?, CAPITAL LETTER ETH - IsLetterTab[0xD1] = true; // ?, CAPITAL LETTER N WITH TILDE - IsLetterTab[0xD2] = true; // ?, CAPITAL LETTER O WITH GRAVE ACCENT - IsLetterTab[0xD3] = true; // ?, CAPITAL LETTER O WITH ACUTE ACCENT - IsLetterTab[0xD4] = true; // ?, CAPITAL LETTER O WITH CIRCUMFLEX ACCENT - IsLetterTab[0xD5] = true; // ?, CAPITAL LETTER O WITH TILDE - IsLetterTab[0xD6] = true; // ?, CAPITAL LETTER O WITH DIAERESIS - IsLetterTab[0xD8] = true; // ?, CAPITAL LETTER O WITH STROKE - IsLetterTab[0xD9] = true; // ?, CAPITAL LETTER U WITH GRAVE ACCENT - IsLetterTab[0xDA] = true; // ?, CAPITAL LETTER U WITH ACUTE ACCENT - IsLetterTab[0xDB] = true; // ?, CAPITAL LETTER U WITH CIRCUMFLEX ACCENT - IsLetterTab[0xDC] = true; // ?, CAPITAL LETTER U WITH DIAERESIS - IsLetterTab[0xDD] = true; // ?, CAPITAL LETTER Y WITH ACUTE ACCENT - IsLetterTab[0xDE] = true; // ?, CAPITAL LETTER THORN - IsLetterTab[0xDF] = true; // ?, SMALL LETTER SHARP S - IsLetterTab[0xE0] = true; // ?, SMALL LETTER A WITH GRAVE ACCENT - IsLetterTab[0xE1] = true; // ?, SMALL LETTER A WITH ACUTE ACCENT - IsLetterTab[0xE2] = true; // ?, SMALL LETTER A WITH CIRCUMFLEX ACCENT - IsLetterTab[0xE3] = true; // ?, SMALL LETTER A WITH TILDE - IsLetterTab[0xE4] = true; // ?, SMALL LETTER A WITH DIAERESIS - IsLetterTab[0xE5] = true; // ?, SMALL LETTER A WITH RING ABOVE - IsLetterTab[0xE6] = true; // ?, SMALL LIGATURE AE - IsLetterTab[0xE7] = true; // ?, SMALL LETTER C WITH CEDILLA - IsLetterTab[0xE8] = true; // ?, SMALL LETTER E WITH GRAVE ACCENT - IsLetterTab[0xE9] = true; // ?, SMALL LETTER E WITH ACUTE ACCENT - IsLetterTab[0xEA] = true; // ?, SMALL LETTER E WITH CIRCUMFLEX ACCENT - IsLetterTab[0xEB] = true; // ?, SMALL LETTER E WITH DIAERESIS - IsLetterTab[0xEC] = true; // ?, SMALL LETTER I WITH GRAVE ACCENT - IsLetterTab[0xED] = true; // ?, SMALL LETTER I WITH ACUTE ACCENT - IsLetterTab[0xEE] = true; // ?, SMALL LETTER I WITH CIRCUMFLEX ACCENT - IsLetterTab[0xEF] = true; // ?, SMALL LETTER I WITH DIAERESIS - IsLetterTab[0xF0] = true; // ?, SMALL LETTER ETH - IsLetterTab[0xF1] = true; // ?, SMALL LETTER N WITH TILDE - IsLetterTab[0xF2] = true; // ?, SMALL LETTER O WITH GRAVE ACCENT - IsLetterTab[0xF3] = true; // ?, SMALL LETTER O WITH ACUTE ACCENT - IsLetterTab[0xF4] = true; // ?, SMALL LETTER O WITH CIRCUMFLEX ACCENT - IsLetterTab[0xF5] = true; // ?, SMALL LETTER O WITH TILDE - IsLetterTab[0xF6] = true; // ?, SMALL LETTER O WITH DIAERESIS - IsLetterTab[0xF8] = true; // ?, SMALL LETTER O WITH OBLIQUE BAR - IsLetterTab[0xF9] = true; // ?, SMALL LETTER U WITH GRAVE ACCENT - IsLetterTab[0xFA] = true; // ?, SMALL LETTER U WITH ACUTE ACCENT - IsLetterTab[0xFB] = true; // ?, SMALL LETTER U WITH CIRCUMFLEX ACCENT - IsLetterTab[0xFC] = true; // ?, SMALL LETTER U WITH DIAERESIS - IsLetterTab[0xFD] = true; // ?, SMALL LETTER Y WITH ACUTE ACCENT - IsLetterTab[0xFE] = true; // ?, SMALL LETTER THORN - IsLetterTab[0xFF] = true; // � , SMALL LETTER Y WITH DIAERESIS -} - -bool LetterTable::isLetterUnicode( sal_Unicode c ) -{ - static CharClass* pCharClass = NULL; - if( pCharClass == NULL ) - pCharClass = new CharClass( Application::GetSettings().GetLanguageTag() ); - rtl::OUString aStr( c ); - bool bRet = pCharClass->isLetter( aStr, 0 ); - return bRet; -} - -// Hilfsfunktion: Zeichen-Flag Testen +// Helper function: test character flag sal_Bool SimpleTokenizer_Impl::testCharFlags( sal_Unicode c, sal_uInt16 nTestFlags ) { bool bRet = false; @@ -358,24 +261,20 @@ void SimpleTokenizer_Impl::setKeyWords( const char** ppKeyWords, sal_uInt16 nCou nKeyWordCount = nCount; } -// Neues Token holen sal_Bool SimpleTokenizer_Impl::getNextToken( /*out*/TokenTypes& reType, /*out*/const sal_Unicode*& rpStartPos, /*out*/const sal_Unicode*& rpEndPos ) { reType = TT_UNKNOWN; - // Position merken rpStartPos = mpActualPos; - // Zeichen untersuchen sal_Unicode c = peekChar(); if( c == CHAR_EOF ) return sal_False; - // Zeichen lesen getChar(); - //*** Alle Moeglichkeiten durchgehen *** + //*** Go through all possibilities *** // Space? if ( (testCharFlags( c, CHAR_SPACE ) == sal_True) ) { @@ -401,7 +300,7 @@ sal_Bool SimpleTokenizer_Impl::getNextToken( /*out*/TokenTypes& reType, reType = TT_IDENTIFIER; - // Schluesselwort-Tabelle + // Keyword table if (ppListKeyWords != NULL) { int nCount = mpActualPos - rpStartPos; @@ -429,7 +328,7 @@ sal_Bool SimpleTokenizer_Impl::getNextToken( /*out*/TokenTypes& reType, if (aByteStr.equalsL(RTL_CONSTASCII_STRINGPARAM("rem"))) { - // Alle Zeichen bis Zeilen-Ende oder EOF entfernen + // Remove all characters until end of line or EOF sal_Unicode cPeek = peekChar(); while( cPeek != CHAR_EOF && testCharFlags( cPeek, CHAR_EOL ) == sal_False ) { @@ -456,7 +355,7 @@ sal_Bool SimpleTokenizer_Impl::getNextToken( /*out*/TokenTypes& reType, sal_Bool bIdentifierChar; do { - // Naechstes Zeichen holen + // Get next character c = peekChar(); bIdentifierChar = isAlpha(c); if( bIdentifierChar ) @@ -471,7 +370,7 @@ sal_Bool SimpleTokenizer_Impl::getNextToken( /*out*/TokenTypes& reType, sal_Unicode cPeekNext = peekChar(); if (cPeekNext=='-') { - // Alle Zeichen bis Zeilen-Ende oder EOF entfernen + // Remove all characters until end of line or EOF while( cPeekNext != CHAR_EOF && testCharFlags( cPeekNext, CHAR_EOL ) == sal_False ) { getChar(); @@ -485,7 +384,7 @@ sal_Bool SimpleTokenizer_Impl::getNextToken( /*out*/TokenTypes& reType, sal_Unicode cPeekNext = peekChar(); if (cPeekNext=='/') { - // Alle Zeichen bis Zeilen-Ende oder EOF entfernen + // Remove all characters until end of line or EOF while( cPeekNext != CHAR_EOF && testCharFlags( cPeekNext, CHAR_EOL ) == sal_False ) { getChar(); @@ -496,12 +395,12 @@ sal_Bool SimpleTokenizer_Impl::getNextToken( /*out*/TokenTypes& reType, } else { - // Kommentar ? + // Comment? if ( c == '\'' ) { - c = getChar(); // '/' entfernen + c = getChar(); - // Alle Zeichen bis Zeilen-Ende oder EOF entfernen + // Remove all characters until end of line or EOF sal_Unicode cPeek = c; while( cPeek != CHAR_EOF && testCharFlags( cPeek, CHAR_EOL ) == sal_False ) { @@ -529,36 +428,36 @@ sal_Bool SimpleTokenizer_Impl::getNextToken( /*out*/TokenTypes& reType, reType = TT_OPERATOR; } - // Zahl? + // Number? else if( testCharFlags( c, CHAR_START_NUMBER ) == sal_True ) { reType = TT_NUMBER; - // Zahlensystem, 10 = normal, wird bei Oct/Hex geaendert + // Number system, 10 = normal, it is changed for Oct/Hex int nRadix = 10; - // Ist es eine Hex- oder Oct-Zahl? + // Is it an Oct or a Hex number? if( c == '&' ) { // Octal? if( peekChar() == 'o' || peekChar() == 'O' ) { - // o entfernen + // remove o getChar(); - nRadix = 8; // Octal-Basis + nRadix = 8; // Octal base - // Alle Ziffern einlesen + // Read all numbers while( testCharFlags( peekChar(), CHAR_IN_OCT_NUMBER ) ) c = getChar(); } - // Hex? + // Hexadecimal? else if( peekChar() == 'h' || peekChar() == 'H' ) { - // x entfernen + // remove x getChar(); - nRadix = 16; // Hex-Basis + nRadix = 16; // Hexadecimal base - // Alle Ziffern einlesen und puffern + // Read all numbers while( testCharFlags( peekChar(), CHAR_IN_HEX_NUMBER ) ) c = getChar(); } @@ -568,38 +467,36 @@ sal_Bool SimpleTokenizer_Impl::getNextToken( /*out*/TokenTypes& reType, } } - // Wenn nicht Oct oder Hex als double ansehen + // When it is not Oct or Hex, then it is double if( reType == TT_NUMBER && nRadix == 10 ) { - // Flag, ob das letzte Zeichen ein Exponent war + // Flag if the last character is an exponent sal_Bool bAfterExpChar = sal_False; - // Alle Ziffern einlesen + // Read all numbers while( testCharFlags( peekChar(), CHAR_IN_NUMBER ) || (bAfterExpChar && peekChar() == '+' ) || (bAfterExpChar && peekChar() == '-' ) ) - // Nach Exponent auch +/- OK + // After exponent +/- are OK, too { - c = getChar(); // Zeichen lesen + c = getChar(); bAfterExpChar = ( c == 'e' || c == 'E' ); } } - - // reType = TT_NUMBER; } // String? else if( testCharFlags( c, CHAR_START_STRING ) == sal_True ) { - // Merken, welches Zeichen den String eroeffnet hat + // Remember which character has opened the string sal_Unicode cEndString = c; if( c == '[' ) cEndString = ']'; - // Alle Ziffern einlesen und puffern + // Read all characters while( peekChar() != cEndString ) { - // #58846 EOF vor getChar() abfangen, damit EOF micht verloren geht + // Detect EOF before getChar(), so we do not loose EOF if( peekChar() == CHAR_EOF ) { // ERROR: unterminated string literal @@ -615,7 +512,6 @@ sal_Bool SimpleTokenizer_Impl::getNextToken( /*out*/TokenTypes& reType, } } - // Zeichen lesen if( reType != TT_ERROR ) { getChar(); @@ -626,25 +522,24 @@ sal_Bool SimpleTokenizer_Impl::getNextToken( /*out*/TokenTypes& reType, } } - // Zeilenende? + // End of line? else if( testCharFlags( c, CHAR_EOL ) == sal_True ) { - // Falls ein weiteres anderes EOL-Char folgt, weg damit + // If another EOL character comes, read it sal_Unicode cNext = peekChar(); if( cNext != c && testCharFlags( cNext, CHAR_EOL ) == sal_True ) getChar(); - // Positions-Daten auf Zeilen-Beginn setzen + // Set position data at the line start nCol = 0; nLine++; reType = TT_EOL; } - // Alles andere bleibt TT_UNKNOWN + // All other will remain TT_UNKNOWN - - // End-Position eintragen + // Save end position rpEndPos = mpActualPos; return sal_True; } @@ -653,49 +548,47 @@ SimpleTokenizer_Impl::SimpleTokenizer_Impl( HighlighterLanguage aLang ): aLangua { memset( aCharTypeTab, 0, sizeof( aCharTypeTab ) ); - // Zeichen-Tabelle fuellen + // Fill character table sal_uInt16 i; - // Zulaessige Zeichen fuer Identifier + // Allowed characters for identifiers sal_uInt16 nHelpMask = (sal_uInt16)( CHAR_START_IDENTIFIER | CHAR_IN_IDENTIFIER ); for( i = 'a' ; i <= 'z' ; i++ ) aCharTypeTab[i] |= nHelpMask; for( i = 'A' ; i <= 'Z' ; i++ ) aCharTypeTab[i] |= nHelpMask; - // '_' extra eintragen aCharTypeTab[(int)'_'] |= nHelpMask; - // AB 23.6.97: '$' ist auch erlaubt aCharTypeTab[(int)'$'] |= nHelpMask; - // Ziffern (Identifier und Number ist moeglich) + // Digit (can be identifier and number) nHelpMask = (sal_uInt16)( CHAR_IN_IDENTIFIER | CHAR_START_NUMBER | CHAR_IN_NUMBER | CHAR_IN_HEX_NUMBER ); for( i = '0' ; i <= '9' ; i++ ) aCharTypeTab[i] |= nHelpMask; - // e und E sowie . von Hand ergaenzen + // Add e, E, . and & here manually aCharTypeTab[(int)'e'] |= CHAR_IN_NUMBER; aCharTypeTab[(int)'E'] |= CHAR_IN_NUMBER; aCharTypeTab[(int)'.'] |= (sal_uInt16)( CHAR_IN_NUMBER | CHAR_START_NUMBER ); aCharTypeTab[(int)'&'] |= CHAR_START_NUMBER; - // Hex-Ziffern + // Hexadecimal digit for( i = 'a' ; i <= 'f' ; i++ ) aCharTypeTab[i] |= CHAR_IN_HEX_NUMBER; for( i = 'A' ; i <= 'F' ; i++ ) aCharTypeTab[i] |= CHAR_IN_HEX_NUMBER; - // Oct-Ziffern + // Octal digit for( i = '0' ; i <= '7' ; i++ ) aCharTypeTab[i] |= CHAR_IN_OCT_NUMBER; - // String-Beginn/End-Zeichen + // String literal start/end characters aCharTypeTab[(int)'\''] |= CHAR_START_STRING; aCharTypeTab[(int)'\"'] |= CHAR_START_STRING; aCharTypeTab[(int)'['] |= CHAR_START_STRING; aCharTypeTab[(int)'`'] |= CHAR_START_STRING; - // Operator-Zeichen + // Operator characters aCharTypeTab[(int)'!'] |= CHAR_OPERATOR; aCharTypeTab[(int)'%'] |= CHAR_OPERATOR; // aCharTypeTab[(int)'&'] |= CHAR_OPERATOR; Removed because of #i14140 @@ -724,7 +617,7 @@ SimpleTokenizer_Impl::SimpleTokenizer_Impl( HighlighterLanguage aLang ): aLangua aCharTypeTab[(int)' ' ] |= CHAR_SPACE; aCharTypeTab[(int)'\t'] |= CHAR_SPACE; - // Zeilen-Ende-Zeichen + // End of line characters aCharTypeTab[(int)'\r'] |= CHAR_EOL; aCharTypeTab[(int)'\n'] |= CHAR_EOL; @@ -743,22 +636,21 @@ SimpleTokenizer_Impl* getSimpleTokenizer( void ) return pSimpleTokenizer; } -// Heraussuchen der jeweils naechsten Funktion aus einem JavaScript-Modul sal_uInt16 SimpleTokenizer_Impl::parseLine( sal_uInt32 nParseLine, const String* aSource ) { - // Position auf den Anfang des Source-Strings setzen + // Set the position to the beginning of the source string mpStringBegin = mpActualPos = aSource->GetBuffer(); - // Zeile und Spalte initialisieren + // Initialize row and column nLine = nParseLine; nCol = 0L; - // Variablen fuer die Out-Parameter + // Variables for the out parameter TokenTypes eType; const sal_Unicode* pStartPos; const sal_Unicode* pEndPos; - // Schleife ueber alle Tokens + // Loop over all the tokens sal_uInt16 nTokenCount = 0; while( getNextToken( eType, pStartPos, pEndPos ) ) nTokenCount++; @@ -769,19 +661,19 @@ sal_uInt16 SimpleTokenizer_Impl::parseLine( sal_uInt32 nParseLine, const String* void SimpleTokenizer_Impl::getHighlightPortions( sal_uInt32 nParseLine, const String& rLine, /*out*/HighlightPortions& portions ) { - // Position auf den Anfang des Source-Strings setzen + // Set the position to the beginning of the source string mpStringBegin = mpActualPos = rLine.GetBuffer(); - // Zeile und Spalte initialisieren + // Initialize row and column nLine = nParseLine; nCol = 0L; - // Variablen fuer die Out-Parameter + // Variables for the out parameter TokenTypes eType; const sal_Unicode* pStartPos; const sal_Unicode* pEndPos; - // Schleife ueber alle Tokens + // Loop over all the tokens while( getNextToken( eType, pStartPos, pEndPos ) ) { HighlightPortion portion; @@ -795,9 +687,6 @@ void SimpleTokenizer_Impl::getHighlightPortions( sal_uInt32 nParseLine, const St } -////////////////////////////////////////////////////////////////////////// -// Implementierung des SyntaxHighlighter - SyntaxHighlighter::SyntaxHighlighter() { m_pSimpleTokenizer = 0; |