1 files changed, 906 insertions, 0 deletions
diff --git a/svtools/source/edit/syntaxhighlight.cxx b/svtools/source/edit/syntaxhighlight.cxx
new file mode 100644
index 000000000000..87585f5b2587
--- /dev/null
+++ b/svtools/source/edit/syntaxhighlight.cxx
@@ -0,0 +1,906 @@
+/*************************************************************************
+ *
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * Copyright 2000, 2010 Oracle and/or its affiliates.
+ *
+ * OpenOffice.org - a multi-platform office productivity suite
+ *
+ * This file is part of OpenOffice.org.
+ *
+ * OpenOffice.org is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License version 3
+ * only, as published by the Free Software Foundation.
+ *
+ * OpenOffice.org is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU Lesser General Public License version 3 for more details
+ * (a copy is included in the LICENSE file that accompanied this code).
+ *
+ * You should have received a copy of the GNU Lesser General Public License
+ * version 3 along with OpenOffice.org.  If not, see
+ * <http://www.openoffice.org/license.html>
+ * for a copy of the LGPLv3 License.
+ *
+ ************************************************************************/
+
+// MARKER(update_precomp.py): autogen include statement, do not remove
+#include "precompiled_svtools.hxx"
+
+#include <svtools/syntaxhighlight.hxx>
+
+#include <unotools/charclass.hxx>
+#include <tools/debug.hxx>
+
+
+// ##########################################################################
+// ATTENTION: all these words needs to be in small caps
+// ##########################################################################
+static const char* strListBasicKeyWords[] = {
+    "access",
+    "alias",
+    "and",
+    "any",
+    "append",
+    "as",
+    "base",
+    "binary",
+    "boolean",
+    "byref",
+    "byte",
+    "byval",
+    "call",
+    "case",
+    "cdecl",
+    "classmodule",
+    "close",
+    "compare",
+    "compatible",
+    "const",
+    "currency",
+    "date",
+    "declare",
+    "defbool",
+    "defcur",
+    "defdate",
+    "defdbl",
+    "deferr",
+    "defint",
+    "deflng",
+    "defobj",
+    "defsng",
+    "defstr",
+    "defvar",
+    "dim",
+    "do",
+    "double",
+    "each",
+    "else",
+    "elseif",
+    "end",
+    "end enum",
+    "end function",
+    "end if",
+    "end select",
+    "end sub",
+    "end type",
+    "endif",
+    "enum",
+    "eqv",
+    "erase",
+    "error",
+    "exit",
+    "explicit",
+    "for",
+    "function",
+    "get",
+    "global",
+    "gosub",
+    "goto",
+    "if",
+    "imp",
+    "implements",
+    "in",
+    "input",
+    "integer",
+    "is",
+    "let",
+    "lib",
+    "like",
+    "line",
+    "line input",
+    "local",
+    "lock",
+    "long",
+    "loop",
+    "lprint",
+    "lset",
+    "mod",
+    "name",
+    "new",
+    "next",
+    "not",
+    "object",
+    "on",
+    "open",
+    "option",
+    "optional",
+    "or",
+    "output",
+    "preserve",
+    "print",
+    "private",
+    "property",
+    "public",
+    "random",
+    "read",
+    "redim",
+    "rem",
+    "resume",
+    "return",
+    "rset",
+    "select",
+    "set",
+    "shared",
+    "single",
+    "static",
+    "step",
+    "stop",
+    "string",
+    "sub",
+    "system",
+    "text",
+    "then",
+    "to",
+    "type",
+    "typeof",
+    "until",
+    "variant",
+    "wend",
+    "while",
+    "with",
+    "write",
+    "xor"
+};
+
+
+static const char* strListSqlKeyWords[] = {
+    "all",
+    "and",
+    "any",
+    "as",
+    "asc",
+    "avg",
+    "between",
+    "by",
+    "cast",
+    "corresponding",
+    "count",
+    "create",
+    "cross",
+    "delete",
+    "desc",
+    "distinct",
+    "drop",
+    "escape",
+    "except",
+    "exists",
+    "false",
+    "from",
+    "full",
+    "global",
+    "group",
+    "having",
+    "in",
+    "inner",
+    "insert",
+    "intersect",
+    "into",
+    "is",
+    "join",
+    "left",
+    "like",
+    "local",
+    "match",
+    "max",
+    "min",
+    "natural",
+    "not",
+    "null",
+    "on",
+    "or",
+    "order",
+    "outer",
+    "right",
+    "select",
+    "set",
+    "some",
+    "sum",
+    "table",
+    "temporary",
+    "true",
+    "union",
+    "unique",
+    "unknown",
+    "update",
+    "using",
+    "values",
+    "where"
+};
+
+
+extern "C" int CDECL compare_strings( const void *arg1, const void *arg2 )
+{
+    return strcmp( (char *)arg1, *(char **)arg2 );
+}
+
+
+class LetterTable
+{
+    bool        IsLetterTab[256];
+
+public:
+    LetterTable( void );
+
+    inline bool isLetter( sal_Unicode c )
+    {
+        bool bRet = (c < 256) ? IsLetterTab[c] : isLetterUnicode( c );
+        return bRet;
+    }
+    bool isLetterUnicode( sal_Unicode c );
+};
+
+class BasicSimpleCharClass
+{
+    static LetterTable aLetterTable;
+
+public:
+    static BOOL isAlpha( sal_Unicode c, bool bCompatible )
+    {
+        BOOL bRet = (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z')
+                    || (bCompatible && aLetterTable.isLetter( c ));
+        return bRet;
+    }
+
+    static BOOL isDigit( sal_Unicode c )
+    {
+        BOOL bRet = (c >= '0' && c <= '9');
+        return bRet;
+    }
+
+    static BOOL isAlphaNumeric( sal_Unicode c, bool bCompatible )
+    {
+        BOOL bRet = isDigit( c ) || isAlpha( c, bCompatible );
+        return bRet;
+    }
+};
+
+LetterTable BasicSimpleCharClass::aLetterTable;
+
+LetterTable::LetterTable( void )
+{
+    for( int i = 0 ; i < 256 ; ++i )
+        IsLetterTab[i] = false;
+
+    IsLetterTab[0xC0] = true;   // � , CAPITAL LETTER A WITH GRAVE ACCENT
+    IsLetterTab[0xC1] = true;   // � , CAPITAL LETTER A WITH ACUTE ACCENT
+    IsLetterTab[0xC2] = true;   // � , CAPITAL LETTER A WITH CIRCUMFLEX ACCENT
+    IsLetterTab[0xC3] = true;   // � , CAPITAL LETTER A WITH TILDE
+    IsLetterTab[0xC4] = true;   // � , CAPITAL LETTER A WITH DIAERESIS
+    IsLetterTab[0xC5] = true;   // � , CAPITAL LETTER A WITH RING ABOVE
+    IsLetterTab[0xC6] = true;   // � , CAPITAL LIGATURE AE
+    IsLetterTab[0xC7] = true;   // � , CAPITAL LETTER C WITH CEDILLA
+    IsLetterTab[0xC8] = true;   // � , CAPITAL LETTER E WITH GRAVE ACCENT
+    IsLetterTab[0xC9] = true;   // � , CAPITAL LETTER E WITH ACUTE ACCENT
+    IsLetterTab[0xCA] = true;   // � , CAPITAL LETTER E WITH CIRCUMFLEX ACCENT
+    IsLetterTab[0xCB] = true;   // � , CAPITAL LETTER E WITH DIAERESIS
+    IsLetterTab[0xCC] = true;   // � , CAPITAL LETTER I WITH GRAVE ACCENT
+    IsLetterTab[0xCD] = true;   // � , CAPITAL LETTER I WITH ACUTE ACCENT
+    IsLetterTab[0xCE] = true;   // � , CAPITAL LETTER I WITH CIRCUMFLEX ACCENT
+    IsLetterTab[0xCF] = true;   // � , CAPITAL LETTER I WITH DIAERESIS
+    IsLetterTab[0xD0] = true;   // � , CAPITAL LETTER ETH
+    IsLetterTab[0xD1] = true;   // � , CAPITAL LETTER N WITH TILDE
+    IsLetterTab[0xD2] = true;   // � , CAPITAL LETTER O WITH GRAVE ACCENT
+    IsLetterTab[0xD3] = true;   // � , CAPITAL LETTER O WITH ACUTE ACCENT
+    IsLetterTab[0xD4] = true;   // � , CAPITAL LETTER O WITH CIRCUMFLEX ACCENT
+    IsLetterTab[0xD5] = true;   // � , CAPITAL LETTER O WITH TILDE
+    IsLetterTab[0xD6] = true;   // � , CAPITAL LETTER O WITH DIAERESIS
+    IsLetterTab[0xD8] = true;   // � , CAPITAL LETTER O WITH STROKE
+    IsLetterTab[0xD9] = true;   // � , CAPITAL LETTER U WITH GRAVE ACCENT
+    IsLetterTab[0xDA] = true;   // � , CAPITAL LETTER U WITH ACUTE ACCENT
+    IsLetterTab[0xDB] = true;   // � , CAPITAL LETTER U WITH CIRCUMFLEX ACCENT
+    IsLetterTab[0xDC] = true;   // � , CAPITAL LETTER U WITH DIAERESIS
+    IsLetterTab[0xDD] = true;   // � , CAPITAL LETTER Y WITH ACUTE ACCENT
+    IsLetterTab[0xDE] = true;   // � , CAPITAL LETTER THORN
+    IsLetterTab[0xDF] = true;   // � , SMALL LETTER SHARP S
+    IsLetterTab[0xE0] = true;   // � , SMALL LETTER A WITH GRAVE ACCENT
+    IsLetterTab[0xE1] = true;   // � , SMALL LETTER A WITH ACUTE ACCENT
+    IsLetterTab[0xE2] = true;   // � , SMALL LETTER A WITH CIRCUMFLEX ACCENT
+    IsLetterTab[0xE3] = true;   // � , SMALL LETTER A WITH TILDE
+    IsLetterTab[0xE4] = true;   // � , SMALL LETTER A WITH DIAERESIS
+    IsLetterTab[0xE5] = true;   // � , SMALL LETTER A WITH RING ABOVE
+    IsLetterTab[0xE6] = true;   // � , SMALL LIGATURE AE
+    IsLetterTab[0xE7] = true;   // � , SMALL LETTER C WITH CEDILLA
+    IsLetterTab[0xE8] = true;   // � , SMALL LETTER E WITH GRAVE ACCENT
+    IsLetterTab[0xE9] = true;   // � , SMALL LETTER E WITH ACUTE ACCENT
+    IsLetterTab[0xEA] = true;   // � , SMALL LETTER E WITH CIRCUMFLEX ACCENT
+    IsLetterTab[0xEB] = true;   // � , SMALL LETTER E WITH DIAERESIS
+    IsLetterTab[0xEC] = true;   // � , SMALL LETTER I WITH GRAVE ACCENT
+    IsLetterTab[0xED] = true;   // � , SMALL LETTER I WITH ACUTE ACCENT
+    IsLetterTab[0xEE] = true;   // � , SMALL LETTER I WITH CIRCUMFLEX ACCENT
+    IsLetterTab[0xEF] = true;   // � , SMALL LETTER I WITH DIAERESIS
+    IsLetterTab[0xF0] = true;   // � , SMALL LETTER ETH
+    IsLetterTab[0xF1] = true;   // � , SMALL LETTER N WITH TILDE
+    IsLetterTab[0xF2] = true;   // � , SMALL LETTER O WITH GRAVE ACCENT
+    IsLetterTab[0xF3] = true;   // � , SMALL LETTER O WITH ACUTE ACCENT
+    IsLetterTab[0xF4] = true;   // � , SMALL LETTER O WITH CIRCUMFLEX ACCENT
+    IsLetterTab[0xF5] = true;   // � , SMALL LETTER O WITH TILDE
+    IsLetterTab[0xF6] = true;   // � , SMALL LETTER O WITH DIAERESIS
+    IsLetterTab[0xF8] = true;   // � , SMALL LETTER O WITH OBLIQUE BAR
+    IsLetterTab[0xF9] = true;   // � , SMALL LETTER U WITH GRAVE ACCENT
+    IsLetterTab[0xFA] = true;   // � , SMALL LETTER U WITH ACUTE ACCENT
+    IsLetterTab[0xFB] = true;   // � , SMALL LETTER U WITH CIRCUMFLEX ACCENT
+    IsLetterTab[0xFC] = true;   // � , SMALL LETTER U WITH DIAERESIS
+    IsLetterTab[0xFD] = true;   // � , SMALL LETTER Y WITH ACUTE ACCENT
+    IsLetterTab[0xFE] = true;   // � , SMALL LETTER THORN
+    IsLetterTab[0xFF] = true;   // � , SMALL LETTER Y WITH DIAERESIS
+}
+
+bool LetterTable::isLetterUnicode( sal_Unicode c )
+{
+    static CharClass* pCharClass = NULL;
+    if( pCharClass == NULL )
+        pCharClass = new CharClass( Application::GetSettings().GetLocale() );
+    String aStr( c );
+    bool bRet = pCharClass->isLetter( aStr, 0 );
+    return bRet;
+}
+
+// Hilfsfunktion: Zeichen-Flag Testen
+BOOL SimpleTokenizer_Impl::testCharFlags( sal_Unicode c, USHORT nTestFlags )
+{
+    bool bRet = false;
+    if( c != 0 && c <= 255 )
+    {
+        bRet = ( (aCharTypeTab[c] & nTestFlags) != 0 );
+    }
+    else if( c > 255 )
+    {
+        bRet = (( CHAR_START_IDENTIFIER | CHAR_IN_IDENTIFIER ) & nTestFlags) != 0
+            ? BasicSimpleCharClass::isAlpha( c, true ) : false;
+    }
+    return bRet;
+}
+
+void SimpleTokenizer_Impl::setKeyWords( const char** ppKeyWords, UINT16 nCount )
+{
+    ppListKeyWords = ppKeyWords;
+    nKeyWordCount = nCount;
+}
+
+// Neues Token holen
+BOOL SimpleTokenizer_Impl::getNextToken( /*out*/TokenTypes& reType,
+    /*out*/const sal_Unicode*& rpStartPos, /*out*/const sal_Unicode*& rpEndPos )
+{
+    reType = TT_UNKNOWN;
+
+    // Position merken
+    rpStartPos = mpActualPos;
+
+    // Zeichen untersuchen
+    sal_Unicode c = peekChar();
+    if( c == CHAR_EOF )
+        return FALSE;
+
+    // Zeichen lesen
+    getChar();
+
+    //*** Alle Moeglichkeiten durchgehen ***
+    // Space?
+    if ( (testCharFlags( c, CHAR_SPACE ) == TRUE) )
+    {
+        while( testCharFlags( peekChar(), CHAR_SPACE ) == TRUE )
+            getChar();
+
+        reType = TT_WHITESPACE;
+    }
+
+    // Identifier?
+    else if ( (testCharFlags( c, CHAR_START_IDENTIFIER ) == TRUE) )
+    {
+        BOOL bIdentifierChar;
+        do
+        {
+            // Naechstes Zeichen holen
+            c = peekChar();
+            bIdentifierChar = testCharFlags( c, CHAR_IN_IDENTIFIER );
+            if( bIdentifierChar )
+                getChar();
+        }
+        while( bIdentifierChar );
+
+        reType = TT_IDENTIFIER;
+
+        // Schluesselwort-Tabelle
+        if (ppListKeyWords != NULL)
+        {
+            int nCount = mpActualPos - rpStartPos;
+
+            // No keyword if string contains char > 255
+            bool bCanBeKeyword = true;
+            for( int i = 0 ; i < nCount ; i++ )
+            {
+                if( rpStartPos[i] > 255 )
+                {
+                    bCanBeKeyword = false;
+                    break;
+                }
+            }
+
+            if( bCanBeKeyword )
+            {
+                String aKWString(rpStartPos, sal::static_int_cast< xub_StrLen >(nCount) );
+                ByteString aByteStr( aKWString, RTL_TEXTENCODING_ASCII_US );
+                aByteStr.ToLowerAscii();
+                if ( bsearch( aByteStr.GetBuffer(), ppListKeyWords, nKeyWordCount, sizeof( char* ),
+                                                                        compare_strings ) )
+                {
+                    reType = TT_KEYWORDS;
+
+                    if ( aByteStr.Equals( "rem" ) )
+                    {
+                        // Alle Zeichen bis Zeilen-Ende oder EOF entfernen
+                        sal_Unicode cPeek = peekChar();
+                        while( cPeek != CHAR_EOF && testCharFlags( cPeek, CHAR_EOL ) == FALSE )
+                        {
+                            c = getChar();
+                            cPeek = peekChar();
+                        }
+
+                        reType = TT_COMMENT;
+                    }
+                }
+            }
+        }
+    }
+
+    // Operator?
+    // only for BASIC '\'' should be a comment, otherwise it is a normal string and handled there
+    else if ( ( testCharFlags( c, CHAR_OPERATOR ) == TRUE ) || ( (c == '\'') && (aLanguage==HIGHLIGHT_BASIC)) )
+    {
+        // paramters for SQL view
+        if ( (c==':') || (c=='?'))
+        {
+            if (c!='?')
+            {
+                BOOL bIdentifierChar;
+                do
+                {
+                    // Naechstes Zeichen holen
+                    c = peekChar();
+                    bIdentifierChar =  BasicSimpleCharClass::isAlpha( c, true );
+                    if( bIdentifierChar )
+                        getChar();
+                }
+                while( bIdentifierChar );
+            }
+            reType = TT_PARAMETER;
+        }
+        else if ((c=='-'))
+        {
+            sal_Unicode cPeekNext = peekChar();
+            if (cPeekNext=='-')
+            {
+                // Alle Zeichen bis Zeilen-Ende oder EOF entfernen
+                while( cPeekNext != CHAR_EOF && testCharFlags( cPeekNext, CHAR_EOL ) == FALSE )
+                {
+                    getChar();
+                    cPeekNext = peekChar();
+                }
+                reType = TT_COMMENT;
+            }
+        }
+       else if (c=='/')
+       {
+           sal_Unicode cPeekNext = peekChar();
+           if (cPeekNext=='/')
+           {
+               // Alle Zeichen bis Zeilen-Ende oder EOF entfernen
+               while( cPeekNext != CHAR_EOF && testCharFlags( cPeekNext, CHAR_EOL ) == FALSE )
+               {
+                   getChar();
+                   cPeekNext = peekChar();
+               }
+               reType = TT_COMMENT;
+           }
+       }
+        else
+        {
+            // Kommentar ?
+            if ( c == '\'' )
+            {
+                c = getChar();  // '/' entfernen
+
+                // Alle Zeichen bis Zeilen-Ende oder EOF entfernen
+                sal_Unicode cPeek = peekChar();
+                while( cPeek != CHAR_EOF && testCharFlags( cPeek, CHAR_EOL ) == FALSE )
+                {
+                    getChar();
+                    cPeek = peekChar();
+                }
+
+                reType = TT_COMMENT;
+            }
+
+            // Echter Operator, kann hier einfach behandelt werden,
+            // da nicht der wirkliche Operator, wie z.B. += interessiert,
+            // sondern nur die Tatsache, dass es sich um einen handelt.
+            if( reType != TT_COMMENT )
+            {
+                reType = TT_OPERATOR;
+            }
+
+        }
+    }
+
+    // Objekt-Trenner? Muss vor Number abgehandelt werden
+    else if( c == '.' && ( peekChar() < '0' || peekChar() > '9' ) )
+    {
+        reType = TT_OPERATOR;
+    }
+
+    // Zahl?
+    else if( testCharFlags( c, CHAR_START_NUMBER ) == TRUE )
+    {
+        reType = TT_NUMBER;
+
+        // Zahlensystem, 10 = normal, wird bei Oct/Hex geaendert
+        int nRadix = 10;
+
+        // Ist es eine Hex- oder Oct-Zahl?
+        if( c == '&' )
+        {
+            // Octal?
+            if( peekChar() == 'o' || peekChar() == 'O' )
+            {
+                // o entfernen
+                getChar();
+                nRadix = 8;     // Octal-Basis
+
+                // Alle Ziffern einlesen
+                while( testCharFlags( peekChar(), CHAR_IN_OCT_NUMBER ) )
+                    c = getChar();
+            }
+            // Hex?
+            else if( peekChar() == 'h' || peekChar() == 'H' )
+            {
+                // x entfernen
+                getChar();
+                nRadix = 16;     // Hex-Basis
+
+                // Alle Ziffern einlesen und puffern
+                while( testCharFlags( peekChar(), CHAR_IN_HEX_NUMBER ) )
+                    c = getChar();
+            }
+            else
+            {
+                reType = TT_OPERATOR;
+            }
+        }
+
+        // Wenn nicht Oct oder Hex als double ansehen
+        if( reType == TT_NUMBER && nRadix == 10 )
+        {
+            // Flag, ob das letzte Zeichen ein Exponent war
+            BOOL bAfterExpChar = FALSE;
+
+            // Alle Ziffern einlesen
+            while( testCharFlags( peekChar(), CHAR_IN_NUMBER ) ||
+                    (bAfterExpChar && peekChar() == '+' ) ||
+                    (bAfterExpChar && peekChar() == '-' ) )
+                    // Nach Exponent auch +/- OK
+            {
+                c = getChar();                  // Zeichen lesen
+                bAfterExpChar = ( c == 'e' || c == 'E' );
+            }
+        }
+
+        // reType = TT_NUMBER;
+    }
+
+    // String?
+    else if( testCharFlags( c, CHAR_START_STRING ) == TRUE )
+    {
+        // Merken, welches Zeichen den String eroeffnet hat
+        sal_Unicode cEndString = c;
+        if( c == '[' )
+            cEndString = ']';
+
+        // Alle Ziffern einlesen und puffern
+        while( peekChar() != cEndString )
+        {
+            // #58846 EOF vor getChar() abfangen, damit EOF micht verloren geht
+            if( peekChar() == CHAR_EOF )
+            {
+                // ERROR: unterminated string literal
+                reType = TT_ERROR;
+                break;
+            }
+            c = getChar();
+            if( testCharFlags( c, CHAR_EOL ) == TRUE )
+            {
+                // ERROR: unterminated string literal
+                reType = TT_ERROR;
+                break;
+            }
+        }
+
+        //  Zeichen lesen
+        if( reType != TT_ERROR )
+        {
+            getChar();
+            if( cEndString == ']' )
+                reType = TT_IDENTIFIER;
+            else
+                reType = TT_STRING;
+        }
+    }
+
+    // Zeilenende?
+    else if( testCharFlags( c, CHAR_EOL ) == TRUE )
+    {
+        // Falls ein weiteres anderes EOL-Char folgt, weg damit
+        sal_Unicode cNext = peekChar();
+        if( cNext != c && testCharFlags( cNext, CHAR_EOL ) == TRUE )
+            getChar();
+
+        // Positions-Daten auf Zeilen-Beginn setzen
+        nCol = 0;
+        nLine++;
+
+        reType = TT_EOL;
+    }
+
+    // Alles andere bleibt TT_UNKNOWN
+
+
+    // End-Position eintragen
+    rpEndPos = mpActualPos;
+    return TRUE;
+}
+
+String SimpleTokenizer_Impl::getTokStr
+    ( /*out*/const sal_Unicode* pStartPos, /*out*/const sal_Unicode* pEndPos )
+{
+    return String( pStartPos, (USHORT)( pEndPos - pStartPos ) );
+}
+
+#ifdef DBG_UTIL
+// TEST: Token ausgeben
+String SimpleTokenizer_Impl::getFullTokenStr( /*out*/TokenTypes eType,
+    /*out*/const sal_Unicode* pStartPos, /*out*/const sal_Unicode* pEndPos )
+{
+    String aOut;
+    switch( eType )
+    {
+        case TT_UNKNOWN:    aOut = String( RTL_CONSTASCII_USTRINGPARAM("TT_UNKNOWN:") ); break;
+        case TT_IDENTIFIER: aOut = String( RTL_CONSTASCII_USTRINGPARAM("TT_IDENTIFIER:") ); break;
+        case TT_WHITESPACE: aOut = String( RTL_CONSTASCII_USTRINGPARAM("TT_WHITESPACE:") ); break;
+        case TT_NUMBER:     aOut = String( RTL_CONSTASCII_USTRINGPARAM("TT_NUMBER:") ); break;
+        case TT_STRING:     aOut = String( RTL_CONSTASCII_USTRINGPARAM("TT_STRING:") ); break;
+        case TT_EOL:        aOut = String( RTL_CONSTASCII_USTRINGPARAM("TT_EOL:") ); break;
+        case TT_COMMENT:    aOut = String( RTL_CONSTASCII_USTRINGPARAM("TT_COMMENT:") ); break;
+        case TT_ERROR:      aOut = String( RTL_CONSTASCII_USTRINGPARAM("TT_ERROR:") ); break;
+        case TT_OPERATOR:   aOut = String( RTL_CONSTASCII_USTRINGPARAM("TT_OPERATOR:") ); break;
+        case TT_KEYWORDS:   aOut = String( RTL_CONSTASCII_USTRINGPARAM("TT_KEYWORD:") ); break;
+        case TT_PARAMETER:  aOut = String( RTL_CONSTASCII_USTRINGPARAM("TT_PARAMETER:") ); break;
+    }
+    if( eType != TT_EOL )
+    {
+        aOut += String( pStartPos, (USHORT)( pEndPos - pStartPos ) );
+    }
+    aOut += String( RTL_CONSTASCII_USTRINGPARAM("\n") );
+    return aOut;
+}
+#endif
+
+SimpleTokenizer_Impl::SimpleTokenizer_Impl( HighlighterLanguage aLang ): aLanguage(aLang)
+{
+    memset( aCharTypeTab, 0, sizeof( aCharTypeTab ) );
+
+    // Zeichen-Tabelle fuellen
+    USHORT i;
+
+    // Zulaessige Zeichen fuer Identifier
+    USHORT nHelpMask = (USHORT)( CHAR_START_IDENTIFIER | CHAR_IN_IDENTIFIER );
+    for( i = 'a' ; i <= 'z' ; i++ )
+        aCharTypeTab[i] |= nHelpMask;
+    for( i = 'A' ; i <= 'Z' ; i++ )
+        aCharTypeTab[i] |= nHelpMask;
+    // '_' extra eintragen
+    aCharTypeTab[(int)'_'] |= nHelpMask;
+    // AB 23.6.97: '$' ist auch erlaubt
+    aCharTypeTab[(int)'$'] |= nHelpMask;
+
+    // Ziffern (Identifier und Number ist moeglich)
+    nHelpMask = (USHORT)( CHAR_IN_IDENTIFIER | CHAR_START_NUMBER |
+                         CHAR_IN_NUMBER | CHAR_IN_HEX_NUMBER );
+    for( i = '0' ; i <= '9' ; i++ )
+        aCharTypeTab[i] |= nHelpMask;
+
+    // e und E sowie . von Hand ergaenzen
+    aCharTypeTab[(int)'e'] |= CHAR_IN_NUMBER;
+    aCharTypeTab[(int)'E'] |= CHAR_IN_NUMBER;
+    aCharTypeTab[(int)'.'] |= (USHORT)( CHAR_IN_NUMBER | CHAR_START_NUMBER );
+    aCharTypeTab[(int)'&'] |= CHAR_START_NUMBER;
+
+    // Hex-Ziffern
+    for( i = 'a' ; i <= 'f' ; i++ )
+        aCharTypeTab[i] |= CHAR_IN_HEX_NUMBER;
+    for( i = 'A' ; i <= 'F' ; i++ )
+        aCharTypeTab[i] |= CHAR_IN_HEX_NUMBER;
+
+    // Oct-Ziffern
+    for( i = '0' ; i <= '7' ; i++ )
+        aCharTypeTab[i] |= CHAR_IN_OCT_NUMBER;
+
+    // String-Beginn/End-Zeichen
+    aCharTypeTab[(int)'\''] |= CHAR_START_STRING;
+    aCharTypeTab[(int)'\"'] |= CHAR_START_STRING;
+    aCharTypeTab[(int)'[']  |= CHAR_START_STRING;
+    aCharTypeTab[(int)'`']  |= CHAR_START_STRING;
+
+    // Operator-Zeichen
+    aCharTypeTab[(int)'!'] |= CHAR_OPERATOR;
+    aCharTypeTab[(int)'%'] |= CHAR_OPERATOR;
+    // aCharTypeTab[(int)'&'] |= CHAR_OPERATOR;     Removed because of #i14140
+    aCharTypeTab[(int)'('] |= CHAR_OPERATOR;
+    aCharTypeTab[(int)')'] |= CHAR_OPERATOR;
+    aCharTypeTab[(int)'*'] |= CHAR_OPERATOR;
+    aCharTypeTab[(int)'+'] |= CHAR_OPERATOR;
+    aCharTypeTab[(int)','] |= CHAR_OPERATOR;
+    aCharTypeTab[(int)'-'] |= CHAR_OPERATOR;
+    aCharTypeTab[(int)'/'] |= CHAR_OPERATOR;
+    aCharTypeTab[(int)':'] |= CHAR_OPERATOR;
+    aCharTypeTab[(int)'<'] |= CHAR_OPERATOR;
+    aCharTypeTab[(int)'='] |= CHAR_OPERATOR;
+    aCharTypeTab[(int)'>'] |= CHAR_OPERATOR;
+    aCharTypeTab[(int)'?'] |= CHAR_OPERATOR;
+    aCharTypeTab[(int)'^'] |= CHAR_OPERATOR;
+    aCharTypeTab[(int)'|'] |= CHAR_OPERATOR;
+    aCharTypeTab[(int)'~'] |= CHAR_OPERATOR;
+    aCharTypeTab[(int)'{'] |= CHAR_OPERATOR;
+    aCharTypeTab[(int)'}'] |= CHAR_OPERATOR;
+    // aCharTypeTab[(int)'['] |= CHAR_OPERATOR;     Removed because of #i17826
+    aCharTypeTab[(int)']'] |= CHAR_OPERATOR;
+    aCharTypeTab[(int)';'] |= CHAR_OPERATOR;
+
+    // Space
+    aCharTypeTab[(int)' ' ] |= CHAR_SPACE;
+    aCharTypeTab[(int)'\t'] |= CHAR_SPACE;
+
+    // Zeilen-Ende-Zeichen
+    aCharTypeTab[(int)'\r'] |= CHAR_EOL;
+    aCharTypeTab[(int)'\n'] |= CHAR_EOL;
+
+    ppListKeyWords = NULL;
+}
+
+SimpleTokenizer_Impl::~SimpleTokenizer_Impl( void )
+{
+}
+
+SimpleTokenizer_Impl* getSimpleTokenizer( void )
+{
+    static SimpleTokenizer_Impl* pSimpleTokenizer = NULL;
+    if( !pSimpleTokenizer )
+        pSimpleTokenizer = new SimpleTokenizer_Impl();
+    return pSimpleTokenizer;
+}
+
+// Heraussuchen der jeweils naechsten Funktion aus einem JavaScript-Modul
+UINT16 SimpleTokenizer_Impl::parseLine( UINT32 nParseLine, const String* aSource )
+{
+    // Position auf den Anfang des Source-Strings setzen
+    mpStringBegin = mpActualPos = aSource->GetBuffer();
+
+    // Zeile und Spalte initialisieren
+    nLine = nParseLine;
+    nCol = 0L;
+
+    // Variablen fuer die Out-Parameter
+    TokenTypes eType;
+    const sal_Unicode* pStartPos;
+    const sal_Unicode* pEndPos;
+
+    // Schleife ueber alle Tokens
+    UINT16 nTokenCount = 0;
+    while( getNextToken( eType, pStartPos, pEndPos ) )
+        nTokenCount++;
+
+    return nTokenCount;
+}
+
+void SimpleTokenizer_Impl::getHighlightPortions( UINT32 nParseLine, const String& rLine,
+                                                    /*out*/HighlightPortions& portions  )
+{
+    // Position auf den Anfang des Source-Strings setzen
+    mpStringBegin = mpActualPos = rLine.GetBuffer();
+
+    // Zeile und Spalte initialisieren
+    nLine = nParseLine;
+    nCol = 0L;
+
+    // Variablen fuer die Out-Parameter
+    TokenTypes eType;
+    const sal_Unicode* pStartPos;
+    const sal_Unicode* pEndPos;
+
+    // Schleife ueber alle Tokens
+    while( getNextToken( eType, pStartPos, pEndPos ) )
+    {
+        HighlightPortion portion;
+
+        portion.nBegin = (UINT16)(pStartPos - mpStringBegin);
+        portion.nEnd = (UINT16)(pEndPos - mpStringBegin);
+        portion.tokenType = eType;
+
+        portions.push_back(portion);
+    }
+}
+
+
+//////////////////////////////////////////////////////////////////////////
+// Implementierung des SyntaxHighlighter
+
+SyntaxHighlighter::SyntaxHighlighter()
+{
+    m_pSimpleTokenizer = 0;
+    m_pKeyWords = NULL;
+    m_nKeyWordCount = 0;
+}
+
+SyntaxHighlighter::~SyntaxHighlighter()
+{
+    delete m_pSimpleTokenizer;
+    delete m_pKeyWords;
+}
+
+void SyntaxHighlighter::initialize( HighlighterLanguage eLanguage_ )
+{
+    eLanguage = eLanguage_;
+    delete m_pSimpleTokenizer;
+    m_pSimpleTokenizer = new SimpleTokenizer_Impl(eLanguage);
+
+    switch (eLanguage)
+    {
+        case HIGHLIGHT_BASIC:
+            m_pSimpleTokenizer->setKeyWords( strListBasicKeyWords,
+                                            sizeof( strListBasicKeyWords ) / sizeof( char* ));
+            break;
+        case HIGHLIGHT_SQL:
+            m_pSimpleTokenizer->setKeyWords( strListSqlKeyWords,
+                                            sizeof( strListSqlKeyWords ) / sizeof( char* ));
+            break;
+        default:
+            m_pSimpleTokenizer->setKeyWords( NULL, 0 );
+    }
+}
+
+const Range SyntaxHighlighter::notifyChange( UINT32 nLine, INT32 nLineCountDifference,
+                                const String* pChangedLines, UINT32 nArrayLength)
+{
+    (void)nLineCountDifference;
+
+    for( UINT32 i=0 ; i < nArrayLength ; i++ )
+        m_pSimpleTokenizer->parseLine(nLine+i, &pChangedLines[i]);
+
+    return Range( nLine, nLine + nArrayLength-1 );
+}
+
+void SyntaxHighlighter::getHighlightPortions( UINT32 nLine, const String& rLine,
+                                            /*out*/HighlightPortions& portions )
+{
+    m_pSimpleTokenizer->getHighlightPortions( nLine, rLine, portions );
+}