diff options
author | Andras Timar <atimar@suse.com> | 2013-02-15 13:02:10 +0100 |
---|---|---|
committer | Andras Timar <atimar@suse.com> | 2013-02-16 12:55:03 +0100 |
commit | c16e9f4ed97f65357e9986f46ad88ee9f2237997 (patch) | |
tree | 9da5c0056d4aad772a72d57f7bbf07d24ec2478a /comphelper | |
parent | c4aa2c4d7eb1cef0f3b172d1dbc4e51e9b379b80 (diff) |
Move SyntaxHighlighter class from svtools to comphelper
We use this class in helpcompiler, and it is not desirable to
compile svtools (thus half of LibreOffice) for a build tool
in cross-compiling environment.
Change-Id: I5e6bc3e576af41eb03c1420dd347c542306f69fa
Diffstat (limited to 'comphelper')
-rw-r--r-- | comphelper/Library_comphelper.mk | 7 | ||||
-rw-r--r-- | comphelper/Package_inc.mk | 1 | ||||
-rw-r--r-- | comphelper/inc/comphelper/syntaxhighlight.hxx | 165 | ||||
-rw-r--r-- | comphelper/source/misc/syntaxhighlight.cxx | 739 |
4 files changed, 911 insertions, 1 deletions
diff --git a/comphelper/Library_comphelper.mk b/comphelper/Library_comphelper.mk index 5c7fcd5a8c9a..93b0f3b48499 100644 --- a/comphelper/Library_comphelper.mk +++ b/comphelper/Library_comphelper.mk @@ -35,7 +35,11 @@ $(eval $(call gb_Library_add_defs,comphelper,\ -DCOMPHELPER_DLLIMPLEMENTATION \ )) -$(eval $(call gb_Library_use_external,comphelper,boost_headers)) +$(eval $(call gb_Library_use_externals,comphelper,\ + boost_headers \ + icuuc \ + icu_headers \ +)) $(eval $(call gb_Library_use_libraries,comphelper,\ cppu \ @@ -108,6 +112,7 @@ $(eval $(call gb_Library_add_exception_objects,comphelper,\ comphelper/source/misc/storagehelper \ comphelper/source/misc/string \ comphelper/source/misc/synchronousdispatch \ + comphelper/source/misc/syntaxhighlight \ comphelper/source/misc/types \ comphelper/source/misc/weak \ comphelper/source/misc/weakeventlistener \ diff --git a/comphelper/Package_inc.mk b/comphelper/Package_inc.mk index 195a5c97c4a7..35552f6d58fc 100644 --- a/comphelper/Package_inc.mk +++ b/comphelper/Package_inc.mk @@ -124,5 +124,6 @@ $(eval $(call gb_Package_add_file,comphelper_inc,inc/comphelper/sequenceashashma $(eval $(call gb_Package_add_file,comphelper_inc,inc/comphelper/configuration.hxx,comphelper/configuration.hxx)) $(eval $(call gb_Package_add_file,comphelper_inc,inc/comphelper/configurationhelper.hxx,comphelper/configurationhelper.hxx)) $(eval $(call gb_Package_add_file,comphelper_inc,inc/comphelper/newarray.hxx,comphelper/newarray.hxx)) +$(eval $(call gb_Package_add_file,comphelper_inc,inc/comphelper/syntaxhighlight.hxx,comphelper/syntaxhighlight.hxx)) # vim: set noet sw=4 ts=4: diff --git a/comphelper/inc/comphelper/syntaxhighlight.hxx b/comphelper/inc/comphelper/syntaxhighlight.hxx new file mode 100644 index 000000000000..11a57db6ff05 --- /dev/null +++ b/comphelper/inc/comphelper/syntaxhighlight.hxx @@ -0,0 +1,165 @@ +/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */ +/* + * This file is part of the LibreOffice project. + * + * This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. + * + * This file incorporates work covered by the following license notice: + * + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed + * with this work for additional information regarding copyright + * ownership. The ASF licenses this file to you under the Apache + * License, Version 2.0 (the "License"); you may not use this file + * except in compliance with the License. You may obtain a copy of + * the License at http://www.apache.org/licenses/LICENSE-2.0 . + */ +#ifndef _COMPHELPER_SYNTAXHIGHLIGHT_HXX +#define _COMPHELPER_SYNTAXHIGHLIGHT_HXX + +#include <vector> +#include <rtl/ustring.hxx> + +#include <comphelper/comphelperdllapi.h> + + +#if defined CDECL +#undef CDECL +#endif + +// for the bsearch +#ifdef WNT +#define CDECL _cdecl +#endif +#if defined(UNX) +#define CDECL +#endif +#ifdef UNX +#include <sys/resource.h> +#endif + + +// Token-Typen TT_... +enum TokenTypes +{ + TT_UNKNOWN, + TT_IDENTIFIER, + TT_WHITESPACE, + TT_NUMBER, + TT_STRING, + TT_EOL, + TT_COMMENT, + TT_ERROR, + TT_OPERATOR, + TT_KEYWORDS, + TT_PARAMETER +}; + +struct HighlightPortion { sal_uInt16 nBegin; sal_uInt16 nEnd; TokenTypes tokenType; }; + + +typedef std::vector<HighlightPortion> HighlightPortions; + +///////////////////////////////////////////////////////////////////////// +// Hilfsklasse zur Untersuchung von JavaScript-Modulen, zunaechst zum +// Heraussuchen der Funktionen, spaeter auch zum Syntax-Highlighting verwenden + +// Flags fuer Zeichen-Eigenschaften +#define CHAR_START_IDENTIFIER 0x0001 +#define CHAR_IN_IDENTIFIER 0x0002 +#define CHAR_START_NUMBER 0x0004 +#define CHAR_IN_NUMBER 0x0008 +#define CHAR_IN_HEX_NUMBER 0x0010 +#define CHAR_IN_OCT_NUMBER 0x0020 +#define CHAR_START_STRING 0x0040 +#define CHAR_OPERATOR 0x0080 +#define CHAR_SPACE 0x0100 +#define CHAR_EOL 0x0200 + +#define CHAR_EOF 0x00 + + +// Sprachmodus des HighLighters (spaeter eventuell feiner +// differenzieren mit Keyword-Liste, C-Kommentar-Flag) +enum HighlighterLanguage +{ + HIGHLIGHT_BASIC, + HIGHLIGHT_SQL +}; + +class SimpleTokenizer_Impl +{ + HighlighterLanguage aLanguage; + // Zeichen-Info-Tabelle + sal_uInt16 aCharTypeTab[256]; + + const sal_Unicode* mpStringBegin; + const sal_Unicode* mpActualPos; + + // Zeile und Spalte + sal_uInt32 nLine; + sal_uInt32 nCol; + + sal_Unicode peekChar( void ) { return *mpActualPos; } + sal_Unicode getChar( void ) { nCol++; return *mpActualPos++; } + + // Hilfsfunktion: Zeichen-Flag Testen + sal_Bool testCharFlags( sal_Unicode c, sal_uInt16 nTestFlags ); + + // Neues Token holen, Leerstring == nix mehr da + sal_Bool getNextToken( /*out*/TokenTypes& reType, + /*out*/const sal_Unicode*& rpStartPos, /*out*/const sal_Unicode*& rpEndPos ); + + const char** ppListKeyWords; + sal_uInt16 nKeyWordCount; + +public: + SimpleTokenizer_Impl( HighlighterLanguage aLang = HIGHLIGHT_BASIC ); + ~SimpleTokenizer_Impl( void ); + + sal_uInt16 parseLine( sal_uInt32 nLine, const OUString* aSource ); + void getHighlightPortions( sal_uInt32 nParseLine, const OUString& rLine, + /*out*/HighlightPortions& portions ); + void setKeyWords( const char** ppKeyWords, sal_uInt16 nCount ); +}; + + +//*** SyntaxHighlighter-Klasse *** +// Konzept: Der Highlighter wird ueber alle Aenderungen im Source +// informiert (notifyChange) und liefert dem Aufrufer jeweils die +// Information zurueck, welcher Zeilen-Bereich des Source-Codes +// aufgrund dieser Aenderung neu gehighlighted werden muss. +// Dazu merkt sich Highlighter intern fuer jede Zeile, ob dort +// C-Kommentare beginnen oder enden. +class COMPHELPER_DLLPUBLIC SyntaxHighlighter +{ + HighlighterLanguage eLanguage; + SimpleTokenizer_Impl* m_pSimpleTokenizer; + char* m_pKeyWords; + sal_uInt16 m_nKeyWordCount; + +// void initializeKeyWords( HighlighterLanguage eLanguage ); + +public: + SyntaxHighlighter( void ); + ~SyntaxHighlighter( void ); + + // HighLighter (neu) initialisieren, die Zeilen-Tabelle wird + // dabei komplett geloescht, d.h. im Abschluss wird von einem + // leeren Source ausgegangen. In notifyChange() kann dann + // nur Zeile 0 angegeben werden. + void initialize( HighlighterLanguage eLanguage_ ); + + void notifyChange( sal_uInt32 nLine, sal_Int32 nLineCountDifference, + const OUString* pChangedLines, sal_uInt32 nArrayLength); + + void getHighlightPortions( sal_uInt32 nLine, const OUString& rLine, + HighlightPortions& pPortions ); + + HighlighterLanguage GetLanguage() { return eLanguage;} +}; +#endif + +/* vim:set shiftwidth=4 softtabstop=4 expandtab: */ diff --git a/comphelper/source/misc/syntaxhighlight.cxx b/comphelper/source/misc/syntaxhighlight.cxx new file mode 100644 index 000000000000..2064c0c76a11 --- /dev/null +++ b/comphelper/source/misc/syntaxhighlight.cxx @@ -0,0 +1,739 @@ +/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */ +/* + * This file is part of the LibreOffice project. + * + * This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. + * + * This file incorporates work covered by the following license notice: + * + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed + * with this work for additional information regarding copyright + * ownership. The ASF licenses this file to you under the Apache + * License, Version 2.0 (the "License"); you may not use this file + * except in compliance with the License. You may obtain a copy of + * the License at http://www.apache.org/licenses/LICENSE-2.0 . + */ + + +#include <unicode/uchar.h> +#include <comphelper/syntaxhighlight.hxx> +#include <comphelper/string.hxx> + +// ########################################################################## +// ATTENTION: all these words need to be in lower case +// ########################################################################## +static const char* strListBasicKeyWords[] = { + "access", + "alias", + "and", + "any", + "append", + "as", + "attribute", + "base", + "binary", + "boolean", + "byref", + "byte", + "byval", + "call", + "case", + "cdecl", + "classmodule", + "close", + "compare", + "compatible", + "const", + "currency", + "date", + "declare", + "defbool", + "defcur", + "defdate", + "defdbl", + "deferr", + "defint", + "deflng", + "defobj", + "defsng", + "defstr", + "defvar", + "dim", + "do", + "double", + "each", + "else", + "elseif", + "end", + "end enum", + "end function", + "end if", + "end property", + "end select", + "end sub", + "end type", + "endif", + "enum", + "eqv", + "erase", + "error", + "exit", + "explicit", + "for", + "function", + "get", + "global", + "gosub", + "goto", + "if", + "imp", + "implements", + "in", + "input", + "integer", + "is", + "let", + "lib", + "like", + "line", + "line input", + "local", + "lock", + "long", + "loop", + "lprint", + "lset", + "mod", + "name", + "new", + "next", + "not", + "object", + "on", + "open", + "option", + "optional", + "or", + "output", + "paramarray", + "preserve", + "print", + "private", + "property", + "public", + "random", + "read", + "redim", + "rem", + "resume", + "return", + "rset", + "select", + "set", + "shared", + "single", + "static", + "step", + "stop", + "string", + "sub", + "system", + "text", + "then", + "to", + "type", + "typeof", + "until", + "variant", + "vbasupport", + "wend", + "while", + "with", + "withevents", + "write", + "xor" +}; + + +static const char* strListSqlKeyWords[] = { + "all", + "and", + "any", + "as", + "asc", + "avg", + "between", + "by", + "cast", + "corresponding", + "count", + "create", + "cross", + "delete", + "desc", + "distinct", + "drop", + "escape", + "except", + "exists", + "false", + "from", + "full", + "global", + "group", + "having", + "in", + "inner", + "insert", + "intersect", + "into", + "is", + "join", + "left", + "like", + "local", + "match", + "max", + "min", + "natural", + "not", + "null", + "on", + "or", + "order", + "outer", + "right", + "select", + "set", + "some", + "sum", + "table", + "temporary", + "true", + "union", + "unique", + "unknown", + "update", + "using", + "values", + "where" +}; + + +extern "C" int CDECL compare_strings( const void *arg1, const void *arg2 ) +{ + return strcmp( (char *)arg1, *(char **)arg2 ); +} + + +namespace +{ + static bool isAlpha(sal_Unicode c) + { + if (comphelper::string::isalphaAscii(c)) + return true; + return u_isalpha(c); + } +} + +// Helper function: test character flag +sal_Bool SimpleTokenizer_Impl::testCharFlags( sal_Unicode c, sal_uInt16 nTestFlags ) +{ + bool bRet = false; + if( c != 0 && c <= 255 ) + { + bRet = ( (aCharTypeTab[c] & nTestFlags) != 0 ); + } + else if( c > 255 ) + { + bRet = (( CHAR_START_IDENTIFIER | CHAR_IN_IDENTIFIER ) & nTestFlags) != 0 + ? isAlpha(c) : false; + } + return bRet; +} + +void SimpleTokenizer_Impl::setKeyWords( const char** ppKeyWords, sal_uInt16 nCount ) +{ + ppListKeyWords = ppKeyWords; + nKeyWordCount = nCount; +} + +sal_Bool SimpleTokenizer_Impl::getNextToken( /*out*/TokenTypes& reType, + /*out*/const sal_Unicode*& rpStartPos, /*out*/const sal_Unicode*& rpEndPos ) +{ + reType = TT_UNKNOWN; + + rpStartPos = mpActualPos; + + sal_Unicode c = peekChar(); + if( c == CHAR_EOF ) + return sal_False; + + getChar(); + + //*** Go through all possibilities *** + // Space? + if ( (testCharFlags( c, CHAR_SPACE ) == sal_True) ) + { + while( testCharFlags( peekChar(), CHAR_SPACE ) == sal_True ) + getChar(); + + reType = TT_WHITESPACE; + } + + // Identifier? + else if ( (testCharFlags( c, CHAR_START_IDENTIFIER ) == sal_True) ) + { + sal_Bool bIdentifierChar; + do + { + // Naechstes Zeichen holen + c = peekChar(); + bIdentifierChar = testCharFlags( c, CHAR_IN_IDENTIFIER ); + if( bIdentifierChar ) + getChar(); + } + while( bIdentifierChar ); + + reType = TT_IDENTIFIER; + + // Keyword table + if (ppListKeyWords != NULL) + { + int nCount = mpActualPos - rpStartPos; + + // No keyword if string contains char > 255 + bool bCanBeKeyword = true; + for( int i = 0 ; i < nCount ; i++ ) + { + if( rpStartPos[i] > 255 ) + { + bCanBeKeyword = false; + break; + } + } + + if( bCanBeKeyword ) + { + OUString aKWString(rpStartPos, nCount); + OString aByteStr = OUStringToOString(aKWString, + RTL_TEXTENCODING_ASCII_US).toAsciiLowerCase(); + if ( bsearch( aByteStr.getStr(), ppListKeyWords, nKeyWordCount, sizeof( char* ), + compare_strings ) ) + { + reType = TT_KEYWORDS; + + if (aByteStr.equalsL(RTL_CONSTASCII_STRINGPARAM("rem"))) + { + // Remove all characters until end of line or EOF + sal_Unicode cPeek = peekChar(); + while( cPeek != CHAR_EOF && testCharFlags( cPeek, CHAR_EOL ) == sal_False ) + { + c = getChar(); + cPeek = peekChar(); + } + + reType = TT_COMMENT; + } + } + } + } + } + + // Operator? + // only for BASIC '\'' should be a comment, otherwise it is a normal string and handled there + else if ( ( testCharFlags( c, CHAR_OPERATOR ) == sal_True ) || ( (c == '\'') && (aLanguage==HIGHLIGHT_BASIC)) ) + { + // parameters for SQL view + if ( (c==':') || (c=='?')) + { + if (c!='?') + { + sal_Bool bIdentifierChar; + do + { + // Get next character + c = peekChar(); + bIdentifierChar = isAlpha(c); + if( bIdentifierChar ) + getChar(); + } + while( bIdentifierChar ); + } + reType = TT_PARAMETER; + } + else if (c=='-') + { + sal_Unicode cPeekNext = peekChar(); + if (cPeekNext=='-') + { + // Remove all characters until end of line or EOF + while( cPeekNext != CHAR_EOF && testCharFlags( cPeekNext, CHAR_EOL ) == sal_False ) + { + getChar(); + cPeekNext = peekChar(); + } + reType = TT_COMMENT; + } + } + else if (c=='/') + { + sal_Unicode cPeekNext = peekChar(); + if (cPeekNext=='/') + { + // Remove all characters until end of line or EOF + while( cPeekNext != CHAR_EOF && testCharFlags( cPeekNext, CHAR_EOL ) == sal_False ) + { + getChar(); + cPeekNext = peekChar(); + } + reType = TT_COMMENT; + } + } + else + { + // Comment? + if ( c == '\'' ) + { + c = getChar(); + + // Remove all characters until end of line or EOF + sal_Unicode cPeek = c; + while( cPeek != CHAR_EOF && testCharFlags( cPeek, CHAR_EOL ) == sal_False ) + { + getChar(); + cPeek = peekChar(); + } + + reType = TT_COMMENT; + } + + // Echter Operator, kann hier einfach behandelt werden, + // da nicht der wirkliche Operator, wie z.B. += interessiert, + // sondern nur die Tatsache, dass es sich um einen handelt. + if( reType != TT_COMMENT ) + { + reType = TT_OPERATOR; + } + + } + } + + // Objekt-Trenner? Muss vor Number abgehandelt werden + else if( c == '.' && ( peekChar() < '0' || peekChar() > '9' ) ) + { + reType = TT_OPERATOR; + } + + // Number? + else if( testCharFlags( c, CHAR_START_NUMBER ) == sal_True ) + { + reType = TT_NUMBER; + + // Number system, 10 = normal, it is changed for Oct/Hex + int nRadix = 10; + + // Is it an Oct or a Hex number? + if( c == '&' ) + { + // Octal? + if( peekChar() == 'o' || peekChar() == 'O' ) + { + // remove o + getChar(); + nRadix = 8; // Octal base + + // Read all numbers + while( testCharFlags( peekChar(), CHAR_IN_OCT_NUMBER ) ) + c = getChar(); + } + // Hexadecimal? + else if( peekChar() == 'h' || peekChar() == 'H' ) + { + // remove x + getChar(); + nRadix = 16; // Hexadecimal base + + // Read all numbers + while( testCharFlags( peekChar(), CHAR_IN_HEX_NUMBER ) ) + c = getChar(); + } + else + { + reType = TT_OPERATOR; + } + } + + // When it is not Oct or Hex, then it is double + if( reType == TT_NUMBER && nRadix == 10 ) + { + // Flag if the last character is an exponent + sal_Bool bAfterExpChar = sal_False; + + // Read all numbers + while( testCharFlags( peekChar(), CHAR_IN_NUMBER ) || + (bAfterExpChar && peekChar() == '+' ) || + (bAfterExpChar && peekChar() == '-' ) ) + // After exponent +/- are OK, too + { + c = getChar(); + bAfterExpChar = ( c == 'e' || c == 'E' ); + } + } + } + + // String? + else if( testCharFlags( c, CHAR_START_STRING ) == sal_True ) + { + // Remember which character has opened the string + sal_Unicode cEndString = c; + if( c == '[' ) + cEndString = ']'; + + // Read all characters + while( peekChar() != cEndString ) + { + // Detect EOF before getChar(), so we do not loose EOF + if( peekChar() == CHAR_EOF ) + { + // ERROR: unterminated string literal + reType = TT_ERROR; + break; + } + c = getChar(); + if( testCharFlags( c, CHAR_EOL ) == sal_True ) + { + // ERROR: unterminated string literal + reType = TT_ERROR; + break; + } + } + + if( reType != TT_ERROR ) + { + getChar(); + if( cEndString == ']' ) + reType = TT_IDENTIFIER; + else + reType = TT_STRING; + } + } + + // End of line? + else if( testCharFlags( c, CHAR_EOL ) == sal_True ) + { + // If another EOL character comes, read it + sal_Unicode cNext = peekChar(); + if( cNext != c && testCharFlags( cNext, CHAR_EOL ) == sal_True ) + getChar(); + + // Set position data at the line start + nCol = 0; + nLine++; + + reType = TT_EOL; + } + + // All other will remain TT_UNKNOWN + + // Save end position + rpEndPos = mpActualPos; + return sal_True; +} + +SimpleTokenizer_Impl::SimpleTokenizer_Impl( HighlighterLanguage aLang ): aLanguage(aLang) +{ + memset( aCharTypeTab, 0, sizeof( aCharTypeTab ) ); + + // Fill character table + sal_uInt16 i; + + // Allowed characters for identifiers + sal_uInt16 nHelpMask = (sal_uInt16)( CHAR_START_IDENTIFIER | CHAR_IN_IDENTIFIER ); + for( i = 'a' ; i <= 'z' ; i++ ) + aCharTypeTab[i] |= nHelpMask; + for( i = 'A' ; i <= 'Z' ; i++ ) + aCharTypeTab[i] |= nHelpMask; + aCharTypeTab[(int)'_'] |= nHelpMask; + aCharTypeTab[(int)'$'] |= nHelpMask; + + // Digit (can be identifier and number) + nHelpMask = (sal_uInt16)( CHAR_IN_IDENTIFIER | CHAR_START_NUMBER | + CHAR_IN_NUMBER | CHAR_IN_HEX_NUMBER ); + for( i = '0' ; i <= '9' ; i++ ) + aCharTypeTab[i] |= nHelpMask; + + // Add e, E, . and & here manually + aCharTypeTab[(int)'e'] |= CHAR_IN_NUMBER; + aCharTypeTab[(int)'E'] |= CHAR_IN_NUMBER; + aCharTypeTab[(int)'.'] |= (sal_uInt16)( CHAR_IN_NUMBER | CHAR_START_NUMBER ); + aCharTypeTab[(int)'&'] |= CHAR_START_NUMBER; + + // Hexadecimal digit + for( i = 'a' ; i <= 'f' ; i++ ) + aCharTypeTab[i] |= CHAR_IN_HEX_NUMBER; + for( i = 'A' ; i <= 'F' ; i++ ) + aCharTypeTab[i] |= CHAR_IN_HEX_NUMBER; + + // Octal digit + for( i = '0' ; i <= '7' ; i++ ) + aCharTypeTab[i] |= CHAR_IN_OCT_NUMBER; + + // String literal start/end characters + aCharTypeTab[(int)'\''] |= CHAR_START_STRING; + aCharTypeTab[(int)'\"'] |= CHAR_START_STRING; + aCharTypeTab[(int)'['] |= CHAR_START_STRING; + aCharTypeTab[(int)'`'] |= CHAR_START_STRING; + + // Operator characters + aCharTypeTab[(int)'!'] |= CHAR_OPERATOR; + aCharTypeTab[(int)'%'] |= CHAR_OPERATOR; + // aCharTypeTab[(int)'&'] |= CHAR_OPERATOR; Removed because of #i14140 + aCharTypeTab[(int)'('] |= CHAR_OPERATOR; + aCharTypeTab[(int)')'] |= CHAR_OPERATOR; + aCharTypeTab[(int)'*'] |= CHAR_OPERATOR; + aCharTypeTab[(int)'+'] |= CHAR_OPERATOR; + aCharTypeTab[(int)','] |= CHAR_OPERATOR; + aCharTypeTab[(int)'-'] |= CHAR_OPERATOR; + aCharTypeTab[(int)'/'] |= CHAR_OPERATOR; + aCharTypeTab[(int)':'] |= CHAR_OPERATOR; + aCharTypeTab[(int)'<'] |= CHAR_OPERATOR; + aCharTypeTab[(int)'='] |= CHAR_OPERATOR; + aCharTypeTab[(int)'>'] |= CHAR_OPERATOR; + aCharTypeTab[(int)'?'] |= CHAR_OPERATOR; + aCharTypeTab[(int)'^'] |= CHAR_OPERATOR; + aCharTypeTab[(int)'|'] |= CHAR_OPERATOR; + aCharTypeTab[(int)'~'] |= CHAR_OPERATOR; + aCharTypeTab[(int)'{'] |= CHAR_OPERATOR; + aCharTypeTab[(int)'}'] |= CHAR_OPERATOR; + // aCharTypeTab[(int)'['] |= CHAR_OPERATOR; Removed because of #i17826 + aCharTypeTab[(int)']'] |= CHAR_OPERATOR; + aCharTypeTab[(int)';'] |= CHAR_OPERATOR; + + // Space + aCharTypeTab[(int)' ' ] |= CHAR_SPACE; + aCharTypeTab[(int)'\t'] |= CHAR_SPACE; + + // End of line characters + aCharTypeTab[(int)'\r'] |= CHAR_EOL; + aCharTypeTab[(int)'\n'] |= CHAR_EOL; + + ppListKeyWords = NULL; +} + +SimpleTokenizer_Impl::~SimpleTokenizer_Impl( void ) +{ +} + +SimpleTokenizer_Impl* getSimpleTokenizer( void ) +{ + static SimpleTokenizer_Impl* pSimpleTokenizer = NULL; + if( !pSimpleTokenizer ) + pSimpleTokenizer = new SimpleTokenizer_Impl(); + return pSimpleTokenizer; +} + +sal_uInt16 SimpleTokenizer_Impl::parseLine( sal_uInt32 nParseLine, const OUString* aSource ) +{ + // Set the position to the beginning of the source string + mpStringBegin = mpActualPos = aSource->getStr(); + + // Initialize row and column + nLine = nParseLine; + nCol = 0L; + + // Variables for the out parameter + TokenTypes eType; + const sal_Unicode* pStartPos; + const sal_Unicode* pEndPos; + + // Loop over all the tokens + sal_uInt16 nTokenCount = 0; + while( getNextToken( eType, pStartPos, pEndPos ) ) + nTokenCount++; + + return nTokenCount; +} + +void SimpleTokenizer_Impl::getHighlightPortions( sal_uInt32 nParseLine, const OUString& rLine, + /*out*/HighlightPortions& portions ) +{ + // Set the position to the beginning of the source string + mpStringBegin = mpActualPos = rLine.getStr(); + + // Initialize row and column + nLine = nParseLine; + nCol = 0L; + + // Variables for the out parameter + TokenTypes eType; + const sal_Unicode* pStartPos; + const sal_Unicode* pEndPos; + + // Loop over all the tokens + while( getNextToken( eType, pStartPos, pEndPos ) ) + { + HighlightPortion portion; + + portion.nBegin = (sal_uInt16)(pStartPos - mpStringBegin); + portion.nEnd = (sal_uInt16)(pEndPos - mpStringBegin); + portion.tokenType = eType; + + portions.push_back(portion); + } +} + + +SyntaxHighlighter::SyntaxHighlighter() +{ + m_pSimpleTokenizer = 0; + m_pKeyWords = NULL; + m_nKeyWordCount = 0; +} + +SyntaxHighlighter::~SyntaxHighlighter() +{ + delete m_pSimpleTokenizer; + delete m_pKeyWords; +} + +void SyntaxHighlighter::initialize( HighlighterLanguage eLanguage_ ) +{ + eLanguage = eLanguage_; + delete m_pSimpleTokenizer; + m_pSimpleTokenizer = new SimpleTokenizer_Impl(eLanguage); + + switch (eLanguage) + { + case HIGHLIGHT_BASIC: + m_pSimpleTokenizer->setKeyWords( strListBasicKeyWords, + sizeof( strListBasicKeyWords ) / sizeof( char* )); + break; + case HIGHLIGHT_SQL: + m_pSimpleTokenizer->setKeyWords( strListSqlKeyWords, + sizeof( strListSqlKeyWords ) / sizeof( char* )); + break; + default: + m_pSimpleTokenizer->setKeyWords( NULL, 0 ); + } +} + +void SyntaxHighlighter::notifyChange( sal_uInt32 nLine, sal_Int32 nLineCountDifference, + const OUString* pChangedLines, sal_uInt32 nArrayLength) +{ + (void)nLineCountDifference; + + for( sal_uInt32 i=0 ; i < nArrayLength ; i++ ) + m_pSimpleTokenizer->parseLine(nLine+i, &pChangedLines[i]); +} + +void SyntaxHighlighter::getHighlightPortions( sal_uInt32 nLine, const OUString& rLine, + /*out*/HighlightPortions& portions ) +{ + m_pSimpleTokenizer->getHighlightPortions( nLine, rLine, portions ); +} + +/* vim:set shiftwidth=4 softtabstop=4 expandtab: */ |