diff options
Diffstat (limited to 'svtools/source/edit/syntaxhighlight.cxx')
-rw-r--r-- | svtools/source/edit/syntaxhighlight.cxx | 906 |
1 files changed, 906 insertions, 0 deletions
diff --git a/svtools/source/edit/syntaxhighlight.cxx b/svtools/source/edit/syntaxhighlight.cxx new file mode 100644 index 000000000000..87585f5b2587 --- /dev/null +++ b/svtools/source/edit/syntaxhighlight.cxx @@ -0,0 +1,906 @@ +/************************************************************************* + * + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * Copyright 2000, 2010 Oracle and/or its affiliates. + * + * OpenOffice.org - a multi-platform office productivity suite + * + * This file is part of OpenOffice.org. + * + * OpenOffice.org is free software: you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License version 3 + * only, as published by the Free Software Foundation. + * + * OpenOffice.org is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License version 3 for more details + * (a copy is included in the LICENSE file that accompanied this code). + * + * You should have received a copy of the GNU Lesser General Public License + * version 3 along with OpenOffice.org. If not, see + * <http://www.openoffice.org/license.html> + * for a copy of the LGPLv3 License. + * + ************************************************************************/ + +// MARKER(update_precomp.py): autogen include statement, do not remove +#include "precompiled_svtools.hxx" + +#include <svtools/syntaxhighlight.hxx> + +#include <unotools/charclass.hxx> +#include <tools/debug.hxx> + + +// ########################################################################## +// ATTENTION: all these words needs to be in small caps +// ########################################################################## +static const char* strListBasicKeyWords[] = { + "access", + "alias", + "and", + "any", + "append", + "as", + "base", + "binary", + "boolean", + "byref", + "byte", + "byval", + "call", + "case", + "cdecl", + "classmodule", + "close", + "compare", + "compatible", + "const", + "currency", + "date", + "declare", + "defbool", + "defcur", + "defdate", + "defdbl", + "deferr", + "defint", + "deflng", + "defobj", + "defsng", + "defstr", + "defvar", + "dim", + "do", + "double", + "each", + "else", + "elseif", + "end", + "end enum", + "end function", + "end if", + "end select", + "end sub", + "end type", + "endif", + "enum", + "eqv", + "erase", + "error", + "exit", + "explicit", + "for", + "function", + "get", + "global", + "gosub", + "goto", + "if", + "imp", + "implements", + "in", + "input", + "integer", + "is", + "let", + "lib", + "like", + "line", + "line input", + "local", + "lock", + "long", + "loop", + "lprint", + "lset", + "mod", + "name", + "new", + "next", + "not", + "object", + "on", + "open", + "option", + "optional", + "or", + "output", + "preserve", + "print", + "private", + "property", + "public", + "random", + "read", + "redim", + "rem", + "resume", + "return", + "rset", + "select", + "set", + "shared", + "single", + "static", + "step", + "stop", + "string", + "sub", + "system", + "text", + "then", + "to", + "type", + "typeof", + "until", + "variant", + "wend", + "while", + "with", + "write", + "xor" +}; + + +static const char* strListSqlKeyWords[] = { + "all", + "and", + "any", + "as", + "asc", + "avg", + "between", + "by", + "cast", + "corresponding", + "count", + "create", + "cross", + "delete", + "desc", + "distinct", + "drop", + "escape", + "except", + "exists", + "false", + "from", + "full", + "global", + "group", + "having", + "in", + "inner", + "insert", + "intersect", + "into", + "is", + "join", + "left", + "like", + "local", + "match", + "max", + "min", + "natural", + "not", + "null", + "on", + "or", + "order", + "outer", + "right", + "select", + "set", + "some", + "sum", + "table", + "temporary", + "true", + "union", + "unique", + "unknown", + "update", + "using", + "values", + "where" +}; + + +extern "C" int CDECL compare_strings( const void *arg1, const void *arg2 ) +{ + return strcmp( (char *)arg1, *(char **)arg2 ); +} + + +class LetterTable +{ + bool IsLetterTab[256]; + +public: + LetterTable( void ); + + inline bool isLetter( sal_Unicode c ) + { + bool bRet = (c < 256) ? IsLetterTab[c] : isLetterUnicode( c ); + return bRet; + } + bool isLetterUnicode( sal_Unicode c ); +}; + +class BasicSimpleCharClass +{ + static LetterTable aLetterTable; + +public: + static BOOL isAlpha( sal_Unicode c, bool bCompatible ) + { + BOOL bRet = (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z') + || (bCompatible && aLetterTable.isLetter( c )); + return bRet; + } + + static BOOL isDigit( sal_Unicode c ) + { + BOOL bRet = (c >= '0' && c <= '9'); + return bRet; + } + + static BOOL isAlphaNumeric( sal_Unicode c, bool bCompatible ) + { + BOOL bRet = isDigit( c ) || isAlpha( c, bCompatible ); + return bRet; + } +}; + +LetterTable BasicSimpleCharClass::aLetterTable; + +LetterTable::LetterTable( void ) +{ + for( int i = 0 ; i < 256 ; ++i ) + IsLetterTab[i] = false; + + IsLetterTab[0xC0] = true; // , CAPITAL LETTER A WITH GRAVE ACCENT + IsLetterTab[0xC1] = true; // , CAPITAL LETTER A WITH ACUTE ACCENT + IsLetterTab[0xC2] = true; // , CAPITAL LETTER A WITH CIRCUMFLEX ACCENT + IsLetterTab[0xC3] = true; // , CAPITAL LETTER A WITH TILDE + IsLetterTab[0xC4] = true; // , CAPITAL LETTER A WITH DIAERESIS + IsLetterTab[0xC5] = true; // , CAPITAL LETTER A WITH RING ABOVE + IsLetterTab[0xC6] = true; // , CAPITAL LIGATURE AE + IsLetterTab[0xC7] = true; // , CAPITAL LETTER C WITH CEDILLA + IsLetterTab[0xC8] = true; // , CAPITAL LETTER E WITH GRAVE ACCENT + IsLetterTab[0xC9] = true; // , CAPITAL LETTER E WITH ACUTE ACCENT + IsLetterTab[0xCA] = true; // , CAPITAL LETTER E WITH CIRCUMFLEX ACCENT + IsLetterTab[0xCB] = true; // , CAPITAL LETTER E WITH DIAERESIS + IsLetterTab[0xCC] = true; // , CAPITAL LETTER I WITH GRAVE ACCENT + IsLetterTab[0xCD] = true; // , CAPITAL LETTER I WITH ACUTE ACCENT + IsLetterTab[0xCE] = true; // , CAPITAL LETTER I WITH CIRCUMFLEX ACCENT + IsLetterTab[0xCF] = true; // , CAPITAL LETTER I WITH DIAERESIS + IsLetterTab[0xD0] = true; // , CAPITAL LETTER ETH + IsLetterTab[0xD1] = true; // , CAPITAL LETTER N WITH TILDE + IsLetterTab[0xD2] = true; // , CAPITAL LETTER O WITH GRAVE ACCENT + IsLetterTab[0xD3] = true; // , CAPITAL LETTER O WITH ACUTE ACCENT + IsLetterTab[0xD4] = true; // , CAPITAL LETTER O WITH CIRCUMFLEX ACCENT + IsLetterTab[0xD5] = true; // , CAPITAL LETTER O WITH TILDE + IsLetterTab[0xD6] = true; // , CAPITAL LETTER O WITH DIAERESIS + IsLetterTab[0xD8] = true; // , CAPITAL LETTER O WITH STROKE + IsLetterTab[0xD9] = true; // , CAPITAL LETTER U WITH GRAVE ACCENT + IsLetterTab[0xDA] = true; // , CAPITAL LETTER U WITH ACUTE ACCENT + IsLetterTab[0xDB] = true; // , CAPITAL LETTER U WITH CIRCUMFLEX ACCENT + IsLetterTab[0xDC] = true; // , CAPITAL LETTER U WITH DIAERESIS + IsLetterTab[0xDD] = true; // , CAPITAL LETTER Y WITH ACUTE ACCENT + IsLetterTab[0xDE] = true; // , CAPITAL LETTER THORN + IsLetterTab[0xDF] = true; // , SMALL LETTER SHARP S + IsLetterTab[0xE0] = true; // , SMALL LETTER A WITH GRAVE ACCENT + IsLetterTab[0xE1] = true; // , SMALL LETTER A WITH ACUTE ACCENT + IsLetterTab[0xE2] = true; // , SMALL LETTER A WITH CIRCUMFLEX ACCENT + IsLetterTab[0xE3] = true; // , SMALL LETTER A WITH TILDE + IsLetterTab[0xE4] = true; // , SMALL LETTER A WITH DIAERESIS + IsLetterTab[0xE5] = true; // , SMALL LETTER A WITH RING ABOVE + IsLetterTab[0xE6] = true; // , SMALL LIGATURE AE + IsLetterTab[0xE7] = true; // , SMALL LETTER C WITH CEDILLA + IsLetterTab[0xE8] = true; // , SMALL LETTER E WITH GRAVE ACCENT + IsLetterTab[0xE9] = true; // , SMALL LETTER E WITH ACUTE ACCENT + IsLetterTab[0xEA] = true; // , SMALL LETTER E WITH CIRCUMFLEX ACCENT + IsLetterTab[0xEB] = true; // , SMALL LETTER E WITH DIAERESIS + IsLetterTab[0xEC] = true; // , SMALL LETTER I WITH GRAVE ACCENT + IsLetterTab[0xED] = true; // , SMALL LETTER I WITH ACUTE ACCENT + IsLetterTab[0xEE] = true; // , SMALL LETTER I WITH CIRCUMFLEX ACCENT + IsLetterTab[0xEF] = true; // , SMALL LETTER I WITH DIAERESIS + IsLetterTab[0xF0] = true; // , SMALL LETTER ETH + IsLetterTab[0xF1] = true; // , SMALL LETTER N WITH TILDE + IsLetterTab[0xF2] = true; // , SMALL LETTER O WITH GRAVE ACCENT + IsLetterTab[0xF3] = true; // , SMALL LETTER O WITH ACUTE ACCENT + IsLetterTab[0xF4] = true; // , SMALL LETTER O WITH CIRCUMFLEX ACCENT + IsLetterTab[0xF5] = true; // , SMALL LETTER O WITH TILDE + IsLetterTab[0xF6] = true; // , SMALL LETTER O WITH DIAERESIS + IsLetterTab[0xF8] = true; // , SMALL LETTER O WITH OBLIQUE BAR + IsLetterTab[0xF9] = true; // , SMALL LETTER U WITH GRAVE ACCENT + IsLetterTab[0xFA] = true; // , SMALL LETTER U WITH ACUTE ACCENT + IsLetterTab[0xFB] = true; // , SMALL LETTER U WITH CIRCUMFLEX ACCENT + IsLetterTab[0xFC] = true; // , SMALL LETTER U WITH DIAERESIS + IsLetterTab[0xFD] = true; // , SMALL LETTER Y WITH ACUTE ACCENT + IsLetterTab[0xFE] = true; // , SMALL LETTER THORN + IsLetterTab[0xFF] = true; // , SMALL LETTER Y WITH DIAERESIS +} + +bool LetterTable::isLetterUnicode( sal_Unicode c ) +{ + static CharClass* pCharClass = NULL; + if( pCharClass == NULL ) + pCharClass = new CharClass( Application::GetSettings().GetLocale() ); + String aStr( c ); + bool bRet = pCharClass->isLetter( aStr, 0 ); + return bRet; +} + +// Hilfsfunktion: Zeichen-Flag Testen +BOOL SimpleTokenizer_Impl::testCharFlags( sal_Unicode c, USHORT nTestFlags ) +{ + bool bRet = false; + if( c != 0 && c <= 255 ) + { + bRet = ( (aCharTypeTab[c] & nTestFlags) != 0 ); + } + else if( c > 255 ) + { + bRet = (( CHAR_START_IDENTIFIER | CHAR_IN_IDENTIFIER ) & nTestFlags) != 0 + ? BasicSimpleCharClass::isAlpha( c, true ) : false; + } + return bRet; +} + +void SimpleTokenizer_Impl::setKeyWords( const char** ppKeyWords, UINT16 nCount ) +{ + ppListKeyWords = ppKeyWords; + nKeyWordCount = nCount; +} + +// Neues Token holen +BOOL SimpleTokenizer_Impl::getNextToken( /*out*/TokenTypes& reType, + /*out*/const sal_Unicode*& rpStartPos, /*out*/const sal_Unicode*& rpEndPos ) +{ + reType = TT_UNKNOWN; + + // Position merken + rpStartPos = mpActualPos; + + // Zeichen untersuchen + sal_Unicode c = peekChar(); + if( c == CHAR_EOF ) + return FALSE; + + // Zeichen lesen + getChar(); + + //*** Alle Moeglichkeiten durchgehen *** + // Space? + if ( (testCharFlags( c, CHAR_SPACE ) == TRUE) ) + { + while( testCharFlags( peekChar(), CHAR_SPACE ) == TRUE ) + getChar(); + + reType = TT_WHITESPACE; + } + + // Identifier? + else if ( (testCharFlags( c, CHAR_START_IDENTIFIER ) == TRUE) ) + { + BOOL bIdentifierChar; + do + { + // Naechstes Zeichen holen + c = peekChar(); + bIdentifierChar = testCharFlags( c, CHAR_IN_IDENTIFIER ); + if( bIdentifierChar ) + getChar(); + } + while( bIdentifierChar ); + + reType = TT_IDENTIFIER; + + // Schluesselwort-Tabelle + if (ppListKeyWords != NULL) + { + int nCount = mpActualPos - rpStartPos; + + // No keyword if string contains char > 255 + bool bCanBeKeyword = true; + for( int i = 0 ; i < nCount ; i++ ) + { + if( rpStartPos[i] > 255 ) + { + bCanBeKeyword = false; + break; + } + } + + if( bCanBeKeyword ) + { + String aKWString(rpStartPos, sal::static_int_cast< xub_StrLen >(nCount) ); + ByteString aByteStr( aKWString, RTL_TEXTENCODING_ASCII_US ); + aByteStr.ToLowerAscii(); + if ( bsearch( aByteStr.GetBuffer(), ppListKeyWords, nKeyWordCount, sizeof( char* ), + compare_strings ) ) + { + reType = TT_KEYWORDS; + + if ( aByteStr.Equals( "rem" ) ) + { + // Alle Zeichen bis Zeilen-Ende oder EOF entfernen + sal_Unicode cPeek = peekChar(); + while( cPeek != CHAR_EOF && testCharFlags( cPeek, CHAR_EOL ) == FALSE ) + { + c = getChar(); + cPeek = peekChar(); + } + + reType = TT_COMMENT; + } + } + } + } + } + + // Operator? + // only for BASIC '\'' should be a comment, otherwise it is a normal string and handled there + else if ( ( testCharFlags( c, CHAR_OPERATOR ) == TRUE ) || ( (c == '\'') && (aLanguage==HIGHLIGHT_BASIC)) ) + { + // paramters for SQL view + if ( (c==':') || (c=='?')) + { + if (c!='?') + { + BOOL bIdentifierChar; + do + { + // Naechstes Zeichen holen + c = peekChar(); + bIdentifierChar = BasicSimpleCharClass::isAlpha( c, true ); + if( bIdentifierChar ) + getChar(); + } + while( bIdentifierChar ); + } + reType = TT_PARAMETER; + } + else if ((c=='-')) + { + sal_Unicode cPeekNext = peekChar(); + if (cPeekNext=='-') + { + // Alle Zeichen bis Zeilen-Ende oder EOF entfernen + while( cPeekNext != CHAR_EOF && testCharFlags( cPeekNext, CHAR_EOL ) == FALSE ) + { + getChar(); + cPeekNext = peekChar(); + } + reType = TT_COMMENT; + } + } + else if (c=='/') + { + sal_Unicode cPeekNext = peekChar(); + if (cPeekNext=='/') + { + // Alle Zeichen bis Zeilen-Ende oder EOF entfernen + while( cPeekNext != CHAR_EOF && testCharFlags( cPeekNext, CHAR_EOL ) == FALSE ) + { + getChar(); + cPeekNext = peekChar(); + } + reType = TT_COMMENT; + } + } + else + { + // Kommentar ? + if ( c == '\'' ) + { + c = getChar(); // '/' entfernen + + // Alle Zeichen bis Zeilen-Ende oder EOF entfernen + sal_Unicode cPeek = peekChar(); + while( cPeek != CHAR_EOF && testCharFlags( cPeek, CHAR_EOL ) == FALSE ) + { + getChar(); + cPeek = peekChar(); + } + + reType = TT_COMMENT; + } + + // Echter Operator, kann hier einfach behandelt werden, + // da nicht der wirkliche Operator, wie z.B. += interessiert, + // sondern nur die Tatsache, dass es sich um einen handelt. + if( reType != TT_COMMENT ) + { + reType = TT_OPERATOR; + } + + } + } + + // Objekt-Trenner? Muss vor Number abgehandelt werden + else if( c == '.' && ( peekChar() < '0' || peekChar() > '9' ) ) + { + reType = TT_OPERATOR; + } + + // Zahl? + else if( testCharFlags( c, CHAR_START_NUMBER ) == TRUE ) + { + reType = TT_NUMBER; + + // Zahlensystem, 10 = normal, wird bei Oct/Hex geaendert + int nRadix = 10; + + // Ist es eine Hex- oder Oct-Zahl? + if( c == '&' ) + { + // Octal? + if( peekChar() == 'o' || peekChar() == 'O' ) + { + // o entfernen + getChar(); + nRadix = 8; // Octal-Basis + + // Alle Ziffern einlesen + while( testCharFlags( peekChar(), CHAR_IN_OCT_NUMBER ) ) + c = getChar(); + } + // Hex? + else if( peekChar() == 'h' || peekChar() == 'H' ) + { + // x entfernen + getChar(); + nRadix = 16; // Hex-Basis + + // Alle Ziffern einlesen und puffern + while( testCharFlags( peekChar(), CHAR_IN_HEX_NUMBER ) ) + c = getChar(); + } + else + { + reType = TT_OPERATOR; + } + } + + // Wenn nicht Oct oder Hex als double ansehen + if( reType == TT_NUMBER && nRadix == 10 ) + { + // Flag, ob das letzte Zeichen ein Exponent war + BOOL bAfterExpChar = FALSE; + + // Alle Ziffern einlesen + while( testCharFlags( peekChar(), CHAR_IN_NUMBER ) || + (bAfterExpChar && peekChar() == '+' ) || + (bAfterExpChar && peekChar() == '-' ) ) + // Nach Exponent auch +/- OK + { + c = getChar(); // Zeichen lesen + bAfterExpChar = ( c == 'e' || c == 'E' ); + } + } + + // reType = TT_NUMBER; + } + + // String? + else if( testCharFlags( c, CHAR_START_STRING ) == TRUE ) + { + // Merken, welches Zeichen den String eroeffnet hat + sal_Unicode cEndString = c; + if( c == '[' ) + cEndString = ']'; + + // Alle Ziffern einlesen und puffern + while( peekChar() != cEndString ) + { + // #58846 EOF vor getChar() abfangen, damit EOF micht verloren geht + if( peekChar() == CHAR_EOF ) + { + // ERROR: unterminated string literal + reType = TT_ERROR; + break; + } + c = getChar(); + if( testCharFlags( c, CHAR_EOL ) == TRUE ) + { + // ERROR: unterminated string literal + reType = TT_ERROR; + break; + } + } + + // Zeichen lesen + if( reType != TT_ERROR ) + { + getChar(); + if( cEndString == ']' ) + reType = TT_IDENTIFIER; + else + reType = TT_STRING; + } + } + + // Zeilenende? + else if( testCharFlags( c, CHAR_EOL ) == TRUE ) + { + // Falls ein weiteres anderes EOL-Char folgt, weg damit + sal_Unicode cNext = peekChar(); + if( cNext != c && testCharFlags( cNext, CHAR_EOL ) == TRUE ) + getChar(); + + // Positions-Daten auf Zeilen-Beginn setzen + nCol = 0; + nLine++; + + reType = TT_EOL; + } + + // Alles andere bleibt TT_UNKNOWN + + + // End-Position eintragen + rpEndPos = mpActualPos; + return TRUE; +} + +String SimpleTokenizer_Impl::getTokStr + ( /*out*/const sal_Unicode* pStartPos, /*out*/const sal_Unicode* pEndPos ) +{ + return String( pStartPos, (USHORT)( pEndPos - pStartPos ) ); +} + +#ifdef DBG_UTIL +// TEST: Token ausgeben +String SimpleTokenizer_Impl::getFullTokenStr( /*out*/TokenTypes eType, + /*out*/const sal_Unicode* pStartPos, /*out*/const sal_Unicode* pEndPos ) +{ + String aOut; + switch( eType ) + { + case TT_UNKNOWN: aOut = String( RTL_CONSTASCII_USTRINGPARAM("TT_UNKNOWN:") ); break; + case TT_IDENTIFIER: aOut = String( RTL_CONSTASCII_USTRINGPARAM("TT_IDENTIFIER:") ); break; + case TT_WHITESPACE: aOut = String( RTL_CONSTASCII_USTRINGPARAM("TT_WHITESPACE:") ); break; + case TT_NUMBER: aOut = String( RTL_CONSTASCII_USTRINGPARAM("TT_NUMBER:") ); break; + case TT_STRING: aOut = String( RTL_CONSTASCII_USTRINGPARAM("TT_STRING:") ); break; + case TT_EOL: aOut = String( RTL_CONSTASCII_USTRINGPARAM("TT_EOL:") ); break; + case TT_COMMENT: aOut = String( RTL_CONSTASCII_USTRINGPARAM("TT_COMMENT:") ); break; + case TT_ERROR: aOut = String( RTL_CONSTASCII_USTRINGPARAM("TT_ERROR:") ); break; + case TT_OPERATOR: aOut = String( RTL_CONSTASCII_USTRINGPARAM("TT_OPERATOR:") ); break; + case TT_KEYWORDS: aOut = String( RTL_CONSTASCII_USTRINGPARAM("TT_KEYWORD:") ); break; + case TT_PARAMETER: aOut = String( RTL_CONSTASCII_USTRINGPARAM("TT_PARAMETER:") ); break; + } + if( eType != TT_EOL ) + { + aOut += String( pStartPos, (USHORT)( pEndPos - pStartPos ) ); + } + aOut += String( RTL_CONSTASCII_USTRINGPARAM("\n") ); + return aOut; +} +#endif + +SimpleTokenizer_Impl::SimpleTokenizer_Impl( HighlighterLanguage aLang ): aLanguage(aLang) +{ + memset( aCharTypeTab, 0, sizeof( aCharTypeTab ) ); + + // Zeichen-Tabelle fuellen + USHORT i; + + // Zulaessige Zeichen fuer Identifier + USHORT nHelpMask = (USHORT)( CHAR_START_IDENTIFIER | CHAR_IN_IDENTIFIER ); + for( i = 'a' ; i <= 'z' ; i++ ) + aCharTypeTab[i] |= nHelpMask; + for( i = 'A' ; i <= 'Z' ; i++ ) + aCharTypeTab[i] |= nHelpMask; + // '_' extra eintragen + aCharTypeTab[(int)'_'] |= nHelpMask; + // AB 23.6.97: '$' ist auch erlaubt + aCharTypeTab[(int)'$'] |= nHelpMask; + + // Ziffern (Identifier und Number ist moeglich) + nHelpMask = (USHORT)( CHAR_IN_IDENTIFIER | CHAR_START_NUMBER | + CHAR_IN_NUMBER | CHAR_IN_HEX_NUMBER ); + for( i = '0' ; i <= '9' ; i++ ) + aCharTypeTab[i] |= nHelpMask; + + // e und E sowie . von Hand ergaenzen + aCharTypeTab[(int)'e'] |= CHAR_IN_NUMBER; + aCharTypeTab[(int)'E'] |= CHAR_IN_NUMBER; + aCharTypeTab[(int)'.'] |= (USHORT)( CHAR_IN_NUMBER | CHAR_START_NUMBER ); + aCharTypeTab[(int)'&'] |= CHAR_START_NUMBER; + + // Hex-Ziffern + for( i = 'a' ; i <= 'f' ; i++ ) + aCharTypeTab[i] |= CHAR_IN_HEX_NUMBER; + for( i = 'A' ; i <= 'F' ; i++ ) + aCharTypeTab[i] |= CHAR_IN_HEX_NUMBER; + + // Oct-Ziffern + for( i = '0' ; i <= '7' ; i++ ) + aCharTypeTab[i] |= CHAR_IN_OCT_NUMBER; + + // String-Beginn/End-Zeichen + aCharTypeTab[(int)'\''] |= CHAR_START_STRING; + aCharTypeTab[(int)'\"'] |= CHAR_START_STRING; + aCharTypeTab[(int)'['] |= CHAR_START_STRING; + aCharTypeTab[(int)'`'] |= CHAR_START_STRING; + + // Operator-Zeichen + aCharTypeTab[(int)'!'] |= CHAR_OPERATOR; + aCharTypeTab[(int)'%'] |= CHAR_OPERATOR; + // aCharTypeTab[(int)'&'] |= CHAR_OPERATOR; Removed because of #i14140 + aCharTypeTab[(int)'('] |= CHAR_OPERATOR; + aCharTypeTab[(int)')'] |= CHAR_OPERATOR; + aCharTypeTab[(int)'*'] |= CHAR_OPERATOR; + aCharTypeTab[(int)'+'] |= CHAR_OPERATOR; + aCharTypeTab[(int)','] |= CHAR_OPERATOR; + aCharTypeTab[(int)'-'] |= CHAR_OPERATOR; + aCharTypeTab[(int)'/'] |= CHAR_OPERATOR; + aCharTypeTab[(int)':'] |= CHAR_OPERATOR; + aCharTypeTab[(int)'<'] |= CHAR_OPERATOR; + aCharTypeTab[(int)'='] |= CHAR_OPERATOR; + aCharTypeTab[(int)'>'] |= CHAR_OPERATOR; + aCharTypeTab[(int)'?'] |= CHAR_OPERATOR; + aCharTypeTab[(int)'^'] |= CHAR_OPERATOR; + aCharTypeTab[(int)'|'] |= CHAR_OPERATOR; + aCharTypeTab[(int)'~'] |= CHAR_OPERATOR; + aCharTypeTab[(int)'{'] |= CHAR_OPERATOR; + aCharTypeTab[(int)'}'] |= CHAR_OPERATOR; + // aCharTypeTab[(int)'['] |= CHAR_OPERATOR; Removed because of #i17826 + aCharTypeTab[(int)']'] |= CHAR_OPERATOR; + aCharTypeTab[(int)';'] |= CHAR_OPERATOR; + + // Space + aCharTypeTab[(int)' ' ] |= CHAR_SPACE; + aCharTypeTab[(int)'\t'] |= CHAR_SPACE; + + // Zeilen-Ende-Zeichen + aCharTypeTab[(int)'\r'] |= CHAR_EOL; + aCharTypeTab[(int)'\n'] |= CHAR_EOL; + + ppListKeyWords = NULL; +} + +SimpleTokenizer_Impl::~SimpleTokenizer_Impl( void ) +{ +} + +SimpleTokenizer_Impl* getSimpleTokenizer( void ) +{ + static SimpleTokenizer_Impl* pSimpleTokenizer = NULL; + if( !pSimpleTokenizer ) + pSimpleTokenizer = new SimpleTokenizer_Impl(); + return pSimpleTokenizer; +} + +// Heraussuchen der jeweils naechsten Funktion aus einem JavaScript-Modul +UINT16 SimpleTokenizer_Impl::parseLine( UINT32 nParseLine, const String* aSource ) +{ + // Position auf den Anfang des Source-Strings setzen + mpStringBegin = mpActualPos = aSource->GetBuffer(); + + // Zeile und Spalte initialisieren + nLine = nParseLine; + nCol = 0L; + + // Variablen fuer die Out-Parameter + TokenTypes eType; + const sal_Unicode* pStartPos; + const sal_Unicode* pEndPos; + + // Schleife ueber alle Tokens + UINT16 nTokenCount = 0; + while( getNextToken( eType, pStartPos, pEndPos ) ) + nTokenCount++; + + return nTokenCount; +} + +void SimpleTokenizer_Impl::getHighlightPortions( UINT32 nParseLine, const String& rLine, + /*out*/HighlightPortions& portions ) +{ + // Position auf den Anfang des Source-Strings setzen + mpStringBegin = mpActualPos = rLine.GetBuffer(); + + // Zeile und Spalte initialisieren + nLine = nParseLine; + nCol = 0L; + + // Variablen fuer die Out-Parameter + TokenTypes eType; + const sal_Unicode* pStartPos; + const sal_Unicode* pEndPos; + + // Schleife ueber alle Tokens + while( getNextToken( eType, pStartPos, pEndPos ) ) + { + HighlightPortion portion; + + portion.nBegin = (UINT16)(pStartPos - mpStringBegin); + portion.nEnd = (UINT16)(pEndPos - mpStringBegin); + portion.tokenType = eType; + + portions.push_back(portion); + } +} + + +////////////////////////////////////////////////////////////////////////// +// Implementierung des SyntaxHighlighter + +SyntaxHighlighter::SyntaxHighlighter() +{ + m_pSimpleTokenizer = 0; + m_pKeyWords = NULL; + m_nKeyWordCount = 0; +} + +SyntaxHighlighter::~SyntaxHighlighter() +{ + delete m_pSimpleTokenizer; + delete m_pKeyWords; +} + +void SyntaxHighlighter::initialize( HighlighterLanguage eLanguage_ ) +{ + eLanguage = eLanguage_; + delete m_pSimpleTokenizer; + m_pSimpleTokenizer = new SimpleTokenizer_Impl(eLanguage); + + switch (eLanguage) + { + case HIGHLIGHT_BASIC: + m_pSimpleTokenizer->setKeyWords( strListBasicKeyWords, + sizeof( strListBasicKeyWords ) / sizeof( char* )); + break; + case HIGHLIGHT_SQL: + m_pSimpleTokenizer->setKeyWords( strListSqlKeyWords, + sizeof( strListSqlKeyWords ) / sizeof( char* )); + break; + default: + m_pSimpleTokenizer->setKeyWords( NULL, 0 ); + } +} + +const Range SyntaxHighlighter::notifyChange( UINT32 nLine, INT32 nLineCountDifference, + const String* pChangedLines, UINT32 nArrayLength) +{ + (void)nLineCountDifference; + + for( UINT32 i=0 ; i < nArrayLength ; i++ ) + m_pSimpleTokenizer->parseLine(nLine+i, &pChangedLines[i]); + + return Range( nLine, nLine + nArrayLength-1 ); +} + +void SyntaxHighlighter::getHighlightPortions( UINT32 nLine, const String& rLine, + /*out*/HighlightPortions& portions ) +{ + m_pSimpleTokenizer->getHighlightPortions( nLine, rLine, portions ); +} |