Move SyntaxHighlighter class from svtools to comphelper

We use this class in helpcompiler, and it is not desirable to compile svtools (thus half of LibreOffice) for a build tool in cross-compiling environment. Change-Id: I5e6bc3e576af41eb03c1420dd347c542306f69fa
author: Andras Timar <atimar@suse.com> 2013-02-15 13:02:10 +0100
committer: Andras Timar <atimar@suse.com> 2013-02-16 12:55:03 +0100
commit: c16e9f4ed97f65357e9986f46ad88ee9f2237997 (patch)
tree: 9da5c0056d4aad772a72d57f7bbf07d24ec2478a /comphelper
parent: c4aa2c4d7eb1cef0f3b172d1dbc4e51e9b379b80 (diff)
4 files changed, 911 insertions, 1 deletions
diff --git a/comphelper/Library_comphelper.mk b/comphelper/Library_comphelper.mk
index 5c7fcd5a8c9a..93b0f3b48499 100644
--- a/comphelper/Library_comphelper.mk
+++ b/comphelper/Library_comphelper.mk
@@ -35,7 +35,11 @@ $(eval $(call gb_Library_add_defs,comphelper,\
        -DCOMPHELPER_DLLIMPLEMENTATION \
 ))
 
-$(eval $(call gb_Library_use_external,comphelper,boost_headers))
+$(eval $(call gb_Library_use_externals,comphelper,\
+    boost_headers \
+    icuuc \
+    icu_headers \
+))
 
 $(eval $(call gb_Library_use_libraries,comphelper,\
     cppu \
@@ -108,6 +112,7 @@ $(eval $(call gb_Library_add_exception_objects,comphelper,\
     comphelper/source/misc/storagehelper \
     comphelper/source/misc/string \
     comphelper/source/misc/synchronousdispatch \
+    comphelper/source/misc/syntaxhighlight \
     comphelper/source/misc/types \
     comphelper/source/misc/weak \
     comphelper/source/misc/weakeventlistener \
diff --git a/comphelper/Package_inc.mk b/comphelper/Package_inc.mk
index 195a5c97c4a7..35552f6d58fc 100644
--- a/comphelper/Package_inc.mk
+++ b/comphelper/Package_inc.mk
@@ -124,5 +124,6 @@ $(eval $(call gb_Package_add_file,comphelper_inc,inc/comphelper/sequenceashashma
 $(eval $(call gb_Package_add_file,comphelper_inc,inc/comphelper/configuration.hxx,comphelper/configuration.hxx))
 $(eval $(call gb_Package_add_file,comphelper_inc,inc/comphelper/configurationhelper.hxx,comphelper/configurationhelper.hxx))
 $(eval $(call gb_Package_add_file,comphelper_inc,inc/comphelper/newarray.hxx,comphelper/newarray.hxx))
+$(eval $(call gb_Package_add_file,comphelper_inc,inc/comphelper/syntaxhighlight.hxx,comphelper/syntaxhighlight.hxx))
 
 # vim: set noet sw=4 ts=4:
diff --git a/comphelper/inc/comphelper/syntaxhighlight.hxx b/comphelper/inc/comphelper/syntaxhighlight.hxx
new file mode 100644
index 000000000000..11a57db6ff05
--- /dev/null
+++ b/comphelper/inc/comphelper/syntaxhighlight.hxx
@@ -0,0 +1,165 @@
+/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
+/*
+ * This file is part of the LibreOffice project.
+ *
+ * This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/.
+ *
+ * This file incorporates work covered by the following license notice:
+ *
+ *   Licensed to the Apache Software Foundation (ASF) under one or more
+ *   contributor license agreements. See the NOTICE file distributed
+ *   with this work for additional information regarding copyright
+ *   ownership. The ASF licenses this file to you under the Apache
+ *   License, Version 2.0 (the "License"); you may not use this file
+ *   except in compliance with the License. You may obtain a copy of
+ *   the License at http://www.apache.org/licenses/LICENSE-2.0 .
+ */
+#ifndef _COMPHELPER_SYNTAXHIGHLIGHT_HXX
+#define _COMPHELPER_SYNTAXHIGHLIGHT_HXX
+
+#include <vector>
+#include <rtl/ustring.hxx>
+
+#include <comphelper/comphelperdllapi.h>
+
+
+#if defined CDECL
+#undef CDECL
+#endif
+
+// for the bsearch
+#ifdef WNT
+#define CDECL _cdecl
+#endif
+#if defined(UNX)
+#define CDECL
+#endif
+#ifdef UNX
+#include <sys/resource.h>
+#endif
+
+
+// Token-Typen TT_...
+enum TokenTypes
+{
+    TT_UNKNOWN,
+    TT_IDENTIFIER,
+    TT_WHITESPACE,
+    TT_NUMBER,
+    TT_STRING,
+    TT_EOL,
+    TT_COMMENT,
+    TT_ERROR,
+    TT_OPERATOR,
+    TT_KEYWORDS,
+    TT_PARAMETER
+};
+
+struct HighlightPortion { sal_uInt16 nBegin; sal_uInt16 nEnd; TokenTypes tokenType; };
+
+
+typedef std::vector<HighlightPortion> HighlightPortions;
+
+/////////////////////////////////////////////////////////////////////////
+// Hilfsklasse zur Untersuchung von JavaScript-Modulen, zunaechst zum
+// Heraussuchen der Funktionen, spaeter auch zum Syntax-Highlighting verwenden
+
+//  Flags fuer Zeichen-Eigenschaften
+#define CHAR_START_IDENTIFIER   0x0001
+#define CHAR_IN_IDENTIFIER      0x0002
+#define CHAR_START_NUMBER       0x0004
+#define CHAR_IN_NUMBER          0x0008
+#define CHAR_IN_HEX_NUMBER      0x0010
+#define CHAR_IN_OCT_NUMBER      0x0020
+#define CHAR_START_STRING       0x0040
+#define CHAR_OPERATOR           0x0080
+#define CHAR_SPACE              0x0100
+#define CHAR_EOL                0x0200
+
+#define CHAR_EOF                0x00
+
+
+// Sprachmodus des HighLighters (spaeter eventuell feiner
+// differenzieren mit Keyword-Liste, C-Kommentar-Flag)
+enum HighlighterLanguage
+{
+    HIGHLIGHT_BASIC,
+    HIGHLIGHT_SQL
+};
+
+class SimpleTokenizer_Impl
+{
+    HighlighterLanguage aLanguage;
+    // Zeichen-Info-Tabelle
+    sal_uInt16 aCharTypeTab[256];
+
+    const sal_Unicode* mpStringBegin;
+    const sal_Unicode* mpActualPos;
+
+    // Zeile und Spalte
+    sal_uInt32 nLine;
+    sal_uInt32 nCol;
+
+    sal_Unicode peekChar( void )    { return *mpActualPos; }
+    sal_Unicode getChar( void )     { nCol++; return *mpActualPos++; }
+
+    // Hilfsfunktion: Zeichen-Flag Testen
+    sal_Bool testCharFlags( sal_Unicode c, sal_uInt16 nTestFlags );
+
+    // Neues Token holen, Leerstring == nix mehr da
+    sal_Bool getNextToken( /*out*/TokenTypes& reType,
+        /*out*/const sal_Unicode*& rpStartPos, /*out*/const sal_Unicode*& rpEndPos );
+
+    const char** ppListKeyWords;
+    sal_uInt16 nKeyWordCount;
+
+public:
+    SimpleTokenizer_Impl( HighlighterLanguage aLang = HIGHLIGHT_BASIC );
+    ~SimpleTokenizer_Impl( void );
+
+    sal_uInt16 parseLine( sal_uInt32 nLine, const OUString* aSource );
+    void getHighlightPortions( sal_uInt32 nParseLine, const OUString& rLine,
+                                                    /*out*/HighlightPortions& portions );
+    void setKeyWords( const char** ppKeyWords, sal_uInt16 nCount );
+};
+
+
+//*** SyntaxHighlighter-Klasse ***
+// Konzept: Der Highlighter wird ueber alle Aenderungen im Source
+// informiert (notifyChange) und liefert dem Aufrufer jeweils die
+// Information zurueck, welcher Zeilen-Bereich des Source-Codes
+// aufgrund dieser Aenderung neu gehighlighted werden muss.
+// Dazu merkt sich Highlighter intern fuer jede Zeile, ob dort
+// C-Kommentare beginnen oder enden.
+class COMPHELPER_DLLPUBLIC SyntaxHighlighter
+{
+    HighlighterLanguage eLanguage;
+    SimpleTokenizer_Impl* m_pSimpleTokenizer;
+    char* m_pKeyWords;
+    sal_uInt16 m_nKeyWordCount;
+
+//  void initializeKeyWords( HighlighterLanguage eLanguage );
+
+public:
+    SyntaxHighlighter( void );
+    ~SyntaxHighlighter( void );
+
+    // HighLighter (neu) initialisieren, die Zeilen-Tabelle wird
+    // dabei komplett geloescht, d.h. im Abschluss wird von einem
+    // leeren Source ausgegangen. In notifyChange() kann dann
+    // nur Zeile 0 angegeben werden.
+    void initialize( HighlighterLanguage eLanguage_ );
+
+    void notifyChange( sal_uInt32 nLine, sal_Int32 nLineCountDifference,
+                                const OUString* pChangedLines, sal_uInt32 nArrayLength);
+
+    void getHighlightPortions( sal_uInt32 nLine, const OUString& rLine,
+                                            HighlightPortions& pPortions );
+
+    HighlighterLanguage GetLanguage() { return eLanguage;}
+};
+#endif
+
+/* vim:set shiftwidth=4 softtabstop=4 expandtab: */
diff --git a/comphelper/source/misc/syntaxhighlight.cxx b/comphelper/source/misc/syntaxhighlight.cxx
new file mode 100644
index 000000000000..2064c0c76a11
--- /dev/null
+++ b/comphelper/source/misc/syntaxhighlight.cxx
@@ -0,0 +1,739 @@
+/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
+/*
+ * This file is part of the LibreOffice project.
+ *
+ * This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/.
+ *
+ * This file incorporates work covered by the following license notice:
+ *
+ *   Licensed to the Apache Software Foundation (ASF) under one or more
+ *   contributor license agreements. See the NOTICE file distributed
+ *   with this work for additional information regarding copyright
+ *   ownership. The ASF licenses this file to you under the Apache
+ *   License, Version 2.0 (the "License"); you may not use this file
+ *   except in compliance with the License. You may obtain a copy of
+ *   the License at http://www.apache.org/licenses/LICENSE-2.0 .
+ */
+
+
+#include <unicode/uchar.h>
+#include <comphelper/syntaxhighlight.hxx>
+#include <comphelper/string.hxx>
+
+// ##########################################################################
+// ATTENTION: all these words need to be in lower case
+// ##########################################################################
+static const char* strListBasicKeyWords[] = {
+    "access",
+    "alias",
+    "and",
+    "any",
+    "append",
+    "as",
+    "attribute",
+    "base",
+    "binary",
+    "boolean",
+    "byref",
+    "byte",
+    "byval",
+    "call",
+    "case",
+    "cdecl",
+    "classmodule",
+    "close",
+    "compare",
+    "compatible",
+    "const",
+    "currency",
+    "date",
+    "declare",
+    "defbool",
+    "defcur",
+    "defdate",
+    "defdbl",
+    "deferr",
+    "defint",
+    "deflng",
+    "defobj",
+    "defsng",
+    "defstr",
+    "defvar",
+    "dim",
+    "do",
+    "double",
+    "each",
+    "else",
+    "elseif",
+    "end",
+    "end enum",
+    "end function",
+    "end if",
+    "end property",
+    "end select",
+    "end sub",
+    "end type",
+    "endif",
+    "enum",
+    "eqv",
+    "erase",
+    "error",
+    "exit",
+    "explicit",
+    "for",
+    "function",
+    "get",
+    "global",
+    "gosub",
+    "goto",
+    "if",
+    "imp",
+    "implements",
+    "in",
+    "input",
+    "integer",
+    "is",
+    "let",
+    "lib",
+    "like",
+    "line",
+    "line input",
+    "local",
+    "lock",
+    "long",
+    "loop",
+    "lprint",
+    "lset",
+    "mod",
+    "name",
+    "new",
+    "next",
+    "not",
+    "object",
+    "on",
+    "open",
+    "option",
+    "optional",
+    "or",
+    "output",
+    "paramarray",
+    "preserve",
+    "print",
+    "private",
+    "property",
+    "public",
+    "random",
+    "read",
+    "redim",
+    "rem",
+    "resume",
+    "return",
+    "rset",
+    "select",
+    "set",
+    "shared",
+    "single",
+    "static",
+    "step",
+    "stop",
+    "string",
+    "sub",
+    "system",
+    "text",
+    "then",
+    "to",
+    "type",
+    "typeof",
+    "until",
+    "variant",
+    "vbasupport",
+    "wend",
+    "while",
+    "with",
+    "withevents",
+    "write",
+    "xor"
+};
+
+
+static const char* strListSqlKeyWords[] = {
+    "all",
+    "and",
+    "any",
+    "as",
+    "asc",
+    "avg",
+    "between",
+    "by",
+    "cast",
+    "corresponding",
+    "count",
+    "create",
+    "cross",
+    "delete",
+    "desc",
+    "distinct",
+    "drop",
+    "escape",
+    "except",
+    "exists",
+    "false",
+    "from",
+    "full",
+    "global",
+    "group",
+    "having",
+    "in",
+    "inner",
+    "insert",
+    "intersect",
+    "into",
+    "is",
+    "join",
+    "left",
+    "like",
+    "local",
+    "match",
+    "max",
+    "min",
+    "natural",
+    "not",
+    "null",
+    "on",
+    "or",
+    "order",
+    "outer",
+    "right",
+    "select",
+    "set",
+    "some",
+    "sum",
+    "table",
+    "temporary",
+    "true",
+    "union",
+    "unique",
+    "unknown",
+    "update",
+    "using",
+    "values",
+    "where"
+};
+
+
+extern "C" int CDECL compare_strings( const void *arg1, const void *arg2 )
+{
+    return strcmp( (char *)arg1, *(char **)arg2 );
+}
+
+
+namespace
+{
+    static bool isAlpha(sal_Unicode c)
+    {
+        if (comphelper::string::isalphaAscii(c))
+            return true;
+        return u_isalpha(c);
+    }
+}
+
+// Helper function: test character flag
+sal_Bool SimpleTokenizer_Impl::testCharFlags( sal_Unicode c, sal_uInt16 nTestFlags )
+{
+    bool bRet = false;
+    if( c != 0 && c <= 255 )
+    {
+        bRet = ( (aCharTypeTab[c] & nTestFlags) != 0 );
+    }
+    else if( c > 255 )
+    {
+        bRet = (( CHAR_START_IDENTIFIER | CHAR_IN_IDENTIFIER ) & nTestFlags) != 0
+            ? isAlpha(c) : false;
+    }
+    return bRet;
+}
+
+void SimpleTokenizer_Impl::setKeyWords( const char** ppKeyWords, sal_uInt16 nCount )
+{
+    ppListKeyWords = ppKeyWords;
+    nKeyWordCount = nCount;
+}
+
+sal_Bool SimpleTokenizer_Impl::getNextToken( /*out*/TokenTypes& reType,
+    /*out*/const sal_Unicode*& rpStartPos, /*out*/const sal_Unicode*& rpEndPos )
+{
+    reType = TT_UNKNOWN;
+
+    rpStartPos = mpActualPos;
+
+    sal_Unicode c = peekChar();
+    if( c == CHAR_EOF )
+        return sal_False;
+
+    getChar();
+
+    //*** Go through all possibilities ***
+    // Space?
+    if ( (testCharFlags( c, CHAR_SPACE ) == sal_True) )
+    {
+        while( testCharFlags( peekChar(), CHAR_SPACE ) == sal_True )
+            getChar();
+
+        reType = TT_WHITESPACE;
+    }
+
+    // Identifier?
+    else if ( (testCharFlags( c, CHAR_START_IDENTIFIER ) == sal_True) )
+    {
+        sal_Bool bIdentifierChar;
+        do
+        {
+            // Naechstes Zeichen holen
+            c = peekChar();
+            bIdentifierChar = testCharFlags( c, CHAR_IN_IDENTIFIER );
+            if( bIdentifierChar )
+                getChar();
+        }
+        while( bIdentifierChar );
+
+        reType = TT_IDENTIFIER;
+
+        // Keyword table
+        if (ppListKeyWords != NULL)
+        {
+            int nCount = mpActualPos - rpStartPos;
+
+            // No keyword if string contains char > 255
+            bool bCanBeKeyword = true;
+            for( int i = 0 ; i < nCount ; i++ )
+            {
+                if( rpStartPos[i] > 255 )
+                {
+                    bCanBeKeyword = false;
+                    break;
+                }
+            }
+
+            if( bCanBeKeyword )
+            {
+                OUString aKWString(rpStartPos, nCount);
+                OString aByteStr = OUStringToOString(aKWString,
+                    RTL_TEXTENCODING_ASCII_US).toAsciiLowerCase();
+                if ( bsearch( aByteStr.getStr(), ppListKeyWords, nKeyWordCount, sizeof( char* ),
+                                                                        compare_strings ) )
+                {
+                    reType = TT_KEYWORDS;
+
+                    if (aByteStr.equalsL(RTL_CONSTASCII_STRINGPARAM("rem")))
+                    {
+                        // Remove all characters until end of line or EOF
+                        sal_Unicode cPeek = peekChar();
+                        while( cPeek != CHAR_EOF && testCharFlags( cPeek, CHAR_EOL ) == sal_False )
+                        {
+                            c = getChar();
+                            cPeek = peekChar();
+                        }
+
+                        reType = TT_COMMENT;
+                    }
+                }
+            }
+        }
+    }
+
+    // Operator?
+    // only for BASIC '\'' should be a comment, otherwise it is a normal string and handled there
+    else if ( ( testCharFlags( c, CHAR_OPERATOR ) == sal_True ) || ( (c == '\'') && (aLanguage==HIGHLIGHT_BASIC)) )
+    {
+        // parameters for SQL view
+        if ( (c==':') || (c=='?'))
+        {
+            if (c!='?')
+            {
+                sal_Bool bIdentifierChar;
+                do
+                {
+                    // Get next character
+                    c = peekChar();
+                    bIdentifierChar = isAlpha(c);
+                    if( bIdentifierChar )
+                        getChar();
+                }
+                while( bIdentifierChar );
+            }
+            reType = TT_PARAMETER;
+        }
+        else if (c=='-')
+        {
+            sal_Unicode cPeekNext = peekChar();
+            if (cPeekNext=='-')
+            {
+                // Remove all characters until end of line or EOF
+                while( cPeekNext != CHAR_EOF && testCharFlags( cPeekNext, CHAR_EOL ) == sal_False )
+                {
+                    getChar();
+                    cPeekNext = peekChar();
+                }
+                reType = TT_COMMENT;
+            }
+        }
+       else if (c=='/')
+       {
+           sal_Unicode cPeekNext = peekChar();
+           if (cPeekNext=='/')
+           {
+               // Remove all characters until end of line or EOF
+               while( cPeekNext != CHAR_EOF && testCharFlags( cPeekNext, CHAR_EOL ) == sal_False )
+               {
+                   getChar();
+                   cPeekNext = peekChar();
+               }
+               reType = TT_COMMENT;
+           }
+       }
+        else
+        {
+            // Comment?
+            if ( c == '\'' )
+            {
+                c = getChar();
+
+                // Remove all characters until end of line or EOF
+                sal_Unicode cPeek = c;
+                while( cPeek != CHAR_EOF && testCharFlags( cPeek, CHAR_EOL ) == sal_False )
+                {
+                    getChar();
+                    cPeek = peekChar();
+                }
+
+                reType = TT_COMMENT;
+            }
+
+            // Echter Operator, kann hier einfach behandelt werden,
+            // da nicht der wirkliche Operator, wie z.B. += interessiert,
+            // sondern nur die Tatsache, dass es sich um einen handelt.
+            if( reType != TT_COMMENT )
+            {
+                reType = TT_OPERATOR;
+            }
+
+        }
+    }
+
+    // Objekt-Trenner? Muss vor Number abgehandelt werden
+    else if( c == '.' && ( peekChar() < '0' || peekChar() > '9' ) )
+    {
+        reType = TT_OPERATOR;
+    }
+
+    // Number?
+    else if( testCharFlags( c, CHAR_START_NUMBER ) == sal_True )
+    {
+        reType = TT_NUMBER;
+
+        // Number system, 10 = normal, it is changed for Oct/Hex
+        int nRadix = 10;
+
+        // Is it an Oct or a Hex number?
+        if( c == '&' )
+        {
+            // Octal?
+            if( peekChar() == 'o' || peekChar() == 'O' )
+            {
+                // remove o
+                getChar();
+                nRadix = 8;     // Octal base
+
+                // Read all numbers
+                while( testCharFlags( peekChar(), CHAR_IN_OCT_NUMBER ) )
+                    c = getChar();
+            }
+            // Hexadecimal?
+            else if( peekChar() == 'h' || peekChar() == 'H' )
+            {
+                // remove x
+                getChar();
+                nRadix = 16;     // Hexadecimal base
+
+                // Read all numbers
+                while( testCharFlags( peekChar(), CHAR_IN_HEX_NUMBER ) )
+                    c = getChar();
+            }
+            else
+            {
+                reType = TT_OPERATOR;
+            }
+        }
+
+        // When it is not Oct or Hex, then it is double
+        if( reType == TT_NUMBER && nRadix == 10 )
+        {
+            // Flag if the last character is an exponent
+            sal_Bool bAfterExpChar = sal_False;
+
+            // Read all numbers
+            while( testCharFlags( peekChar(), CHAR_IN_NUMBER ) ||
+                    (bAfterExpChar && peekChar() == '+' ) ||
+                    (bAfterExpChar && peekChar() == '-' ) )
+                    // After exponent +/- are OK, too
+            {
+                c = getChar();
+                bAfterExpChar = ( c == 'e' || c == 'E' );
+            }
+        }
+    }
+
+    // String?
+    else if( testCharFlags( c, CHAR_START_STRING ) == sal_True )
+    {
+        // Remember which character has opened the string
+        sal_Unicode cEndString = c;
+        if( c == '[' )
+            cEndString = ']';
+
+        // Read all characters
+        while( peekChar() != cEndString )
+        {
+            // Detect EOF before getChar(), so we do not loose EOF
+            if( peekChar() == CHAR_EOF )
+            {
+                // ERROR: unterminated string literal
+                reType = TT_ERROR;
+                break;
+            }
+            c = getChar();
+            if( testCharFlags( c, CHAR_EOL ) == sal_True )
+            {
+                // ERROR: unterminated string literal
+                reType = TT_ERROR;
+                break;
+            }
+        }
+
+        if( reType != TT_ERROR )
+        {
+            getChar();
+            if( cEndString == ']' )
+                reType = TT_IDENTIFIER;
+            else
+                reType = TT_STRING;
+        }
+    }
+
+    // End of line?
+    else if( testCharFlags( c, CHAR_EOL ) == sal_True )
+    {
+        // If another EOL character comes, read it
+        sal_Unicode cNext = peekChar();
+        if( cNext != c && testCharFlags( cNext, CHAR_EOL ) == sal_True )
+            getChar();
+
+        // Set position data at the line start
+        nCol = 0;
+        nLine++;
+
+        reType = TT_EOL;
+    }
+
+    // All other will remain TT_UNKNOWN
+
+    // Save end position
+    rpEndPos = mpActualPos;
+    return sal_True;
+}
+
+SimpleTokenizer_Impl::SimpleTokenizer_Impl( HighlighterLanguage aLang ): aLanguage(aLang)
+{
+    memset( aCharTypeTab, 0, sizeof( aCharTypeTab ) );
+
+    // Fill character table
+    sal_uInt16 i;
+
+    // Allowed characters for identifiers
+    sal_uInt16 nHelpMask = (sal_uInt16)( CHAR_START_IDENTIFIER | CHAR_IN_IDENTIFIER );
+    for( i = 'a' ; i <= 'z' ; i++ )
+        aCharTypeTab[i] |= nHelpMask;
+    for( i = 'A' ; i <= 'Z' ; i++ )
+        aCharTypeTab[i] |= nHelpMask;
+    aCharTypeTab[(int)'_'] |= nHelpMask;
+    aCharTypeTab[(int)'$'] |= nHelpMask;
+
+    // Digit (can be identifier and number)
+    nHelpMask = (sal_uInt16)( CHAR_IN_IDENTIFIER | CHAR_START_NUMBER |
+                         CHAR_IN_NUMBER | CHAR_IN_HEX_NUMBER );
+    for( i = '0' ; i <= '9' ; i++ )
+        aCharTypeTab[i] |= nHelpMask;
+
+    // Add e, E, . and & here manually
+    aCharTypeTab[(int)'e'] |= CHAR_IN_NUMBER;
+    aCharTypeTab[(int)'E'] |= CHAR_IN_NUMBER;
+    aCharTypeTab[(int)'.'] |= (sal_uInt16)( CHAR_IN_NUMBER | CHAR_START_NUMBER );
+    aCharTypeTab[(int)'&'] |= CHAR_START_NUMBER;
+
+    // Hexadecimal digit
+    for( i = 'a' ; i <= 'f' ; i++ )
+        aCharTypeTab[i] |= CHAR_IN_HEX_NUMBER;
+    for( i = 'A' ; i <= 'F' ; i++ )
+        aCharTypeTab[i] |= CHAR_IN_HEX_NUMBER;
+
+    // Octal digit
+    for( i = '0' ; i <= '7' ; i++ )
+        aCharTypeTab[i] |= CHAR_IN_OCT_NUMBER;
+
+    // String literal start/end characters
+    aCharTypeTab[(int)'\''] |= CHAR_START_STRING;
+    aCharTypeTab[(int)'\"'] |= CHAR_START_STRING;
+    aCharTypeTab[(int)'[']  |= CHAR_START_STRING;
+    aCharTypeTab[(int)'`']  |= CHAR_START_STRING;
+
+    // Operator characters
+    aCharTypeTab[(int)'!'] |= CHAR_OPERATOR;
+    aCharTypeTab[(int)'%'] |= CHAR_OPERATOR;
+    // aCharTypeTab[(int)'&'] |= CHAR_OPERATOR;     Removed because of #i14140
+    aCharTypeTab[(int)'('] |= CHAR_OPERATOR;
+    aCharTypeTab[(int)')'] |= CHAR_OPERATOR;
+    aCharTypeTab[(int)'*'] |= CHAR_OPERATOR;
+    aCharTypeTab[(int)'+'] |= CHAR_OPERATOR;
+    aCharTypeTab[(int)','] |= CHAR_OPERATOR;
+    aCharTypeTab[(int)'-'] |= CHAR_OPERATOR;
+    aCharTypeTab[(int)'/'] |= CHAR_OPERATOR;
+    aCharTypeTab[(int)':'] |= CHAR_OPERATOR;
+    aCharTypeTab[(int)'<'] |= CHAR_OPERATOR;
+    aCharTypeTab[(int)'='] |= CHAR_OPERATOR;
+    aCharTypeTab[(int)'>'] |= CHAR_OPERATOR;
+    aCharTypeTab[(int)'?'] |= CHAR_OPERATOR;
+    aCharTypeTab[(int)'^'] |= CHAR_OPERATOR;
+    aCharTypeTab[(int)'|'] |= CHAR_OPERATOR;
+    aCharTypeTab[(int)'~'] |= CHAR_OPERATOR;
+    aCharTypeTab[(int)'{'] |= CHAR_OPERATOR;
+    aCharTypeTab[(int)'}'] |= CHAR_OPERATOR;
+    // aCharTypeTab[(int)'['] |= CHAR_OPERATOR;     Removed because of #i17826
+    aCharTypeTab[(int)']'] |= CHAR_OPERATOR;
+    aCharTypeTab[(int)';'] |= CHAR_OPERATOR;
+
+    // Space
+    aCharTypeTab[(int)' ' ] |= CHAR_SPACE;
+    aCharTypeTab[(int)'\t'] |= CHAR_SPACE;
+
+    // End of line characters
+    aCharTypeTab[(int)'\r'] |= CHAR_EOL;
+    aCharTypeTab[(int)'\n'] |= CHAR_EOL;
+
+    ppListKeyWords = NULL;
+}
+
+SimpleTokenizer_Impl::~SimpleTokenizer_Impl( void )
+{
+}
+
+SimpleTokenizer_Impl* getSimpleTokenizer( void )
+{
+    static SimpleTokenizer_Impl* pSimpleTokenizer = NULL;
+    if( !pSimpleTokenizer )
+        pSimpleTokenizer = new SimpleTokenizer_Impl();
+    return pSimpleTokenizer;
+}
+
+sal_uInt16 SimpleTokenizer_Impl::parseLine( sal_uInt32 nParseLine, const OUString* aSource )
+{
+    // Set the position to the beginning of the source string
+    mpStringBegin = mpActualPos = aSource->getStr();
+
+    // Initialize row and column
+    nLine = nParseLine;
+    nCol = 0L;
+
+    // Variables for the out parameter
+    TokenTypes eType;
+    const sal_Unicode* pStartPos;
+    const sal_Unicode* pEndPos;
+
+    // Loop over all the tokens
+    sal_uInt16 nTokenCount = 0;
+    while( getNextToken( eType, pStartPos, pEndPos ) )
+        nTokenCount++;
+
+    return nTokenCount;
+}
+
+void SimpleTokenizer_Impl::getHighlightPortions( sal_uInt32 nParseLine, const OUString& rLine,
+                                                    /*out*/HighlightPortions& portions  )
+{
+    // Set the position to the beginning of the source string
+    mpStringBegin = mpActualPos = rLine.getStr();
+
+    // Initialize row and column
+    nLine = nParseLine;
+    nCol = 0L;
+
+    // Variables for the out parameter
+    TokenTypes eType;
+    const sal_Unicode* pStartPos;
+    const sal_Unicode* pEndPos;
+
+    // Loop over all the tokens
+    while( getNextToken( eType, pStartPos, pEndPos ) )
+    {
+        HighlightPortion portion;
+
+        portion.nBegin = (sal_uInt16)(pStartPos - mpStringBegin);
+        portion.nEnd = (sal_uInt16)(pEndPos - mpStringBegin);
+        portion.tokenType = eType;
+
+        portions.push_back(portion);
+    }
+}
+
+
+SyntaxHighlighter::SyntaxHighlighter()
+{
+    m_pSimpleTokenizer = 0;
+    m_pKeyWords = NULL;
+    m_nKeyWordCount = 0;
+}
+
+SyntaxHighlighter::~SyntaxHighlighter()
+{
+    delete m_pSimpleTokenizer;
+    delete m_pKeyWords;
+}
+
+void SyntaxHighlighter::initialize( HighlighterLanguage eLanguage_ )
+{
+    eLanguage = eLanguage_;
+    delete m_pSimpleTokenizer;
+    m_pSimpleTokenizer = new SimpleTokenizer_Impl(eLanguage);
+
+    switch (eLanguage)
+    {
+        case HIGHLIGHT_BASIC:
+            m_pSimpleTokenizer->setKeyWords( strListBasicKeyWords,
+                                            sizeof( strListBasicKeyWords ) / sizeof( char* ));
+            break;
+        case HIGHLIGHT_SQL:
+            m_pSimpleTokenizer->setKeyWords( strListSqlKeyWords,
+                                            sizeof( strListSqlKeyWords ) / sizeof( char* ));
+            break;
+        default:
+            m_pSimpleTokenizer->setKeyWords( NULL, 0 );
+    }
+}
+
+void SyntaxHighlighter::notifyChange( sal_uInt32 nLine, sal_Int32 nLineCountDifference,
+                                const OUString* pChangedLines, sal_uInt32 nArrayLength)
+{
+    (void)nLineCountDifference;
+
+    for( sal_uInt32 i=0 ; i < nArrayLength ; i++ )
+        m_pSimpleTokenizer->parseLine(nLine+i, &pChangedLines[i]);
+}
+
+void SyntaxHighlighter::getHighlightPortions( sal_uInt32 nLine, const OUString& rLine,
+                                            /*out*/HighlightPortions& portions )
+{
+    m_pSimpleTokenizer->getHighlightPortions( nLine, rLine, portions );
+}
+
+/* vim:set shiftwidth=4 softtabstop=4 expandtab: */
author	Andras Timar <atimar@suse.com>	2013-02-15 13:02:10 +0100
committer	Andras Timar <atimar@suse.com>	2013-02-16 12:55:03 +0100
commit	c16e9f4ed97f65357e9986f46ad88ee9f2237997 (patch)
tree	9da5c0056d4aad772a72d57f7bbf07d24ec2478a /comphelper
parent	c4aa2c4d7eb1cef0f3b172d1dbc4e51e9b379b80 (diff)