summaryrefslogtreecommitdiff
path: root/svtools/source
diff options
context:
space:
mode:
authorAndras Timar <atimar@suse.com>2013-02-15 13:02:10 +0100
committerAndras Timar <atimar@suse.com>2013-02-16 12:55:03 +0100
commitc16e9f4ed97f65357e9986f46ad88ee9f2237997 (patch)
tree9da5c0056d4aad772a72d57f7bbf07d24ec2478a /svtools/source
parentc4aa2c4d7eb1cef0f3b172d1dbc4e51e9b379b80 (diff)
Move SyntaxHighlighter class from svtools to comphelper
We use this class in helpcompiler, and it is not desirable to compile svtools (thus half of LibreOffice) for a build tool in cross-compiling environment. Change-Id: I5e6bc3e576af41eb03c1420dd347c542306f69fa
Diffstat (limited to 'svtools/source')
-rw-r--r--svtools/source/edit/syntaxhighlight.cxx741
1 files changed, 0 insertions, 741 deletions
diff --git a/svtools/source/edit/syntaxhighlight.cxx b/svtools/source/edit/syntaxhighlight.cxx
deleted file mode 100644
index 920bac56892c..000000000000
--- a/svtools/source/edit/syntaxhighlight.cxx
+++ /dev/null
@@ -1,741 +0,0 @@
-/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
-/*
- * This file is part of the LibreOffice project.
- *
- * This Source Code Form is subject to the terms of the Mozilla Public
- * License, v. 2.0. If a copy of the MPL was not distributed with this
- * file, You can obtain one at http://mozilla.org/MPL/2.0/.
- *
- * This file incorporates work covered by the following license notice:
- *
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed
- * with this work for additional information regarding copyright
- * ownership. The ASF licenses this file to you under the Apache
- * License, Version 2.0 (the "License"); you may not use this file
- * except in compliance with the License. You may obtain a copy of
- * the License at http://www.apache.org/licenses/LICENSE-2.0 .
- */
-
-
-#include <unicode/uchar.h>
-#include <svtools/syntaxhighlight.hxx>
-#include <comphelper/string.hxx>
-
-// ##########################################################################
-// ATTENTION: all these words need to be in lower case
-// ##########################################################################
-static const char* strListBasicKeyWords[] = {
- "access",
- "alias",
- "and",
- "any",
- "append",
- "as",
- "attribute",
- "base",
- "binary",
- "boolean",
- "byref",
- "byte",
- "byval",
- "call",
- "case",
- "cdecl",
- "classmodule",
- "close",
- "compare",
- "compatible",
- "const",
- "currency",
- "date",
- "declare",
- "defbool",
- "defcur",
- "defdate",
- "defdbl",
- "deferr",
- "defint",
- "deflng",
- "defobj",
- "defsng",
- "defstr",
- "defvar",
- "dim",
- "do",
- "double",
- "each",
- "else",
- "elseif",
- "end",
- "end enum",
- "end function",
- "end if",
- "end property",
- "end select",
- "end sub",
- "end type",
- "endif",
- "enum",
- "eqv",
- "erase",
- "error",
- "exit",
- "explicit",
- "for",
- "function",
- "get",
- "global",
- "gosub",
- "goto",
- "if",
- "imp",
- "implements",
- "in",
- "input",
- "integer",
- "is",
- "let",
- "lib",
- "like",
- "line",
- "line input",
- "local",
- "lock",
- "long",
- "loop",
- "lprint",
- "lset",
- "mod",
- "name",
- "new",
- "next",
- "not",
- "object",
- "on",
- "open",
- "option",
- "optional",
- "or",
- "output",
- "paramarray",
- "preserve",
- "print",
- "private",
- "property",
- "public",
- "random",
- "read",
- "redim",
- "rem",
- "resume",
- "return",
- "rset",
- "select",
- "set",
- "shared",
- "single",
- "static",
- "step",
- "stop",
- "string",
- "sub",
- "system",
- "text",
- "then",
- "to",
- "type",
- "typeof",
- "until",
- "variant",
- "vbasupport",
- "wend",
- "while",
- "with",
- "withevents",
- "write",
- "xor"
-};
-
-
-static const char* strListSqlKeyWords[] = {
- "all",
- "and",
- "any",
- "as",
- "asc",
- "avg",
- "between",
- "by",
- "cast",
- "corresponding",
- "count",
- "create",
- "cross",
- "delete",
- "desc",
- "distinct",
- "drop",
- "escape",
- "except",
- "exists",
- "false",
- "from",
- "full",
- "global",
- "group",
- "having",
- "in",
- "inner",
- "insert",
- "intersect",
- "into",
- "is",
- "join",
- "left",
- "like",
- "local",
- "match",
- "max",
- "min",
- "natural",
- "not",
- "null",
- "on",
- "or",
- "order",
- "outer",
- "right",
- "select",
- "set",
- "some",
- "sum",
- "table",
- "temporary",
- "true",
- "union",
- "unique",
- "unknown",
- "update",
- "using",
- "values",
- "where"
-};
-
-
-extern "C" int CDECL compare_strings( const void *arg1, const void *arg2 )
-{
- return strcmp( (char *)arg1, *(char **)arg2 );
-}
-
-
-namespace
-{
- static bool isAlpha(sal_Unicode c)
- {
- if (comphelper::string::isalphaAscii(c))
- return true;
- return u_isalpha(c);
- }
-}
-
-// Helper function: test character flag
-sal_Bool SimpleTokenizer_Impl::testCharFlags( sal_Unicode c, sal_uInt16 nTestFlags )
-{
- bool bRet = false;
- if( c != 0 && c <= 255 )
- {
- bRet = ( (aCharTypeTab[c] & nTestFlags) != 0 );
- }
- else if( c > 255 )
- {
- bRet = (( CHAR_START_IDENTIFIER | CHAR_IN_IDENTIFIER ) & nTestFlags) != 0
- ? isAlpha(c) : false;
- }
- return bRet;
-}
-
-void SimpleTokenizer_Impl::setKeyWords( const char** ppKeyWords, sal_uInt16 nCount )
-{
- ppListKeyWords = ppKeyWords;
- nKeyWordCount = nCount;
-}
-
-sal_Bool SimpleTokenizer_Impl::getNextToken( /*out*/TokenTypes& reType,
- /*out*/const sal_Unicode*& rpStartPos, /*out*/const sal_Unicode*& rpEndPos )
-{
- reType = TT_UNKNOWN;
-
- rpStartPos = mpActualPos;
-
- sal_Unicode c = peekChar();
- if( c == CHAR_EOF )
- return sal_False;
-
- getChar();
-
- //*** Go through all possibilities ***
- // Space?
- if ( (testCharFlags( c, CHAR_SPACE ) == sal_True) )
- {
- while( testCharFlags( peekChar(), CHAR_SPACE ) == sal_True )
- getChar();
-
- reType = TT_WHITESPACE;
- }
-
- // Identifier?
- else if ( (testCharFlags( c, CHAR_START_IDENTIFIER ) == sal_True) )
- {
- sal_Bool bIdentifierChar;
- do
- {
- // Naechstes Zeichen holen
- c = peekChar();
- bIdentifierChar = testCharFlags( c, CHAR_IN_IDENTIFIER );
- if( bIdentifierChar )
- getChar();
- }
- while( bIdentifierChar );
-
- reType = TT_IDENTIFIER;
-
- // Keyword table
- if (ppListKeyWords != NULL)
- {
- int nCount = mpActualPos - rpStartPos;
-
- // No keyword if string contains char > 255
- bool bCanBeKeyword = true;
- for( int i = 0 ; i < nCount ; i++ )
- {
- if( rpStartPos[i] > 255 )
- {
- bCanBeKeyword = false;
- break;
- }
- }
-
- if( bCanBeKeyword )
- {
- OUString aKWString(rpStartPos, nCount);
- OString aByteStr = OUStringToOString(aKWString,
- RTL_TEXTENCODING_ASCII_US).toAsciiLowerCase();
- if ( bsearch( aByteStr.getStr(), ppListKeyWords, nKeyWordCount, sizeof( char* ),
- compare_strings ) )
- {
- reType = TT_KEYWORDS;
-
- if (aByteStr.equalsL(RTL_CONSTASCII_STRINGPARAM("rem")))
- {
- // Remove all characters until end of line or EOF
- sal_Unicode cPeek = peekChar();
- while( cPeek != CHAR_EOF && testCharFlags( cPeek, CHAR_EOL ) == sal_False )
- {
- c = getChar();
- cPeek = peekChar();
- }
-
- reType = TT_COMMENT;
- }
- }
- }
- }
- }
-
- // Operator?
- // only for BASIC '\'' should be a comment, otherwise it is a normal string and handled there
- else if ( ( testCharFlags( c, CHAR_OPERATOR ) == sal_True ) || ( (c == '\'') && (aLanguage==HIGHLIGHT_BASIC)) )
- {
- // parameters for SQL view
- if ( (c==':') || (c=='?'))
- {
- if (c!='?')
- {
- sal_Bool bIdentifierChar;
- do
- {
- // Get next character
- c = peekChar();
- bIdentifierChar = isAlpha(c);
- if( bIdentifierChar )
- getChar();
- }
- while( bIdentifierChar );
- }
- reType = TT_PARAMETER;
- }
- else if (c=='-')
- {
- sal_Unicode cPeekNext = peekChar();
- if (cPeekNext=='-')
- {
- // Remove all characters until end of line or EOF
- while( cPeekNext != CHAR_EOF && testCharFlags( cPeekNext, CHAR_EOL ) == sal_False )
- {
- getChar();
- cPeekNext = peekChar();
- }
- reType = TT_COMMENT;
- }
- }
- else if (c=='/')
- {
- sal_Unicode cPeekNext = peekChar();
- if (cPeekNext=='/')
- {
- // Remove all characters until end of line or EOF
- while( cPeekNext != CHAR_EOF && testCharFlags( cPeekNext, CHAR_EOL ) == sal_False )
- {
- getChar();
- cPeekNext = peekChar();
- }
- reType = TT_COMMENT;
- }
- }
- else
- {
- // Comment?
- if ( c == '\'' )
- {
- c = getChar();
-
- // Remove all characters until end of line or EOF
- sal_Unicode cPeek = c;
- while( cPeek != CHAR_EOF && testCharFlags( cPeek, CHAR_EOL ) == sal_False )
- {
- getChar();
- cPeek = peekChar();
- }
-
- reType = TT_COMMENT;
- }
-
- // Echter Operator, kann hier einfach behandelt werden,
- // da nicht der wirkliche Operator, wie z.B. += interessiert,
- // sondern nur die Tatsache, dass es sich um einen handelt.
- if( reType != TT_COMMENT )
- {
- reType = TT_OPERATOR;
- }
-
- }
- }
-
- // Objekt-Trenner? Muss vor Number abgehandelt werden
- else if( c == '.' && ( peekChar() < '0' || peekChar() > '9' ) )
- {
- reType = TT_OPERATOR;
- }
-
- // Number?
- else if( testCharFlags( c, CHAR_START_NUMBER ) == sal_True )
- {
- reType = TT_NUMBER;
-
- // Number system, 10 = normal, it is changed for Oct/Hex
- int nRadix = 10;
-
- // Is it an Oct or a Hex number?
- if( c == '&' )
- {
- // Octal?
- if( peekChar() == 'o' || peekChar() == 'O' )
- {
- // remove o
- getChar();
- nRadix = 8; // Octal base
-
- // Read all numbers
- while( testCharFlags( peekChar(), CHAR_IN_OCT_NUMBER ) )
- c = getChar();
- }
- // Hexadecimal?
- else if( peekChar() == 'h' || peekChar() == 'H' )
- {
- // remove x
- getChar();
- nRadix = 16; // Hexadecimal base
-
- // Read all numbers
- while( testCharFlags( peekChar(), CHAR_IN_HEX_NUMBER ) )
- c = getChar();
- }
- else
- {
- reType = TT_OPERATOR;
- }
- }
-
- // When it is not Oct or Hex, then it is double
- if( reType == TT_NUMBER && nRadix == 10 )
- {
- // Flag if the last character is an exponent
- sal_Bool bAfterExpChar = sal_False;
-
- // Read all numbers
- while( testCharFlags( peekChar(), CHAR_IN_NUMBER ) ||
- (bAfterExpChar && peekChar() == '+' ) ||
- (bAfterExpChar && peekChar() == '-' ) )
- // After exponent +/- are OK, too
- {
- c = getChar();
- bAfterExpChar = ( c == 'e' || c == 'E' );
- }
- }
- }
-
- // String?
- else if( testCharFlags( c, CHAR_START_STRING ) == sal_True )
- {
- // Remember which character has opened the string
- sal_Unicode cEndString = c;
- if( c == '[' )
- cEndString = ']';
-
- // Read all characters
- while( peekChar() != cEndString )
- {
- // Detect EOF before getChar(), so we do not loose EOF
- if( peekChar() == CHAR_EOF )
- {
- // ERROR: unterminated string literal
- reType = TT_ERROR;
- break;
- }
- c = getChar();
- if( testCharFlags( c, CHAR_EOL ) == sal_True )
- {
- // ERROR: unterminated string literal
- reType = TT_ERROR;
- break;
- }
- }
-
- if( reType != TT_ERROR )
- {
- getChar();
- if( cEndString == ']' )
- reType = TT_IDENTIFIER;
- else
- reType = TT_STRING;
- }
- }
-
- // End of line?
- else if( testCharFlags( c, CHAR_EOL ) == sal_True )
- {
- // If another EOL character comes, read it
- sal_Unicode cNext = peekChar();
- if( cNext != c && testCharFlags( cNext, CHAR_EOL ) == sal_True )
- getChar();
-
- // Set position data at the line start
- nCol = 0;
- nLine++;
-
- reType = TT_EOL;
- }
-
- // All other will remain TT_UNKNOWN
-
- // Save end position
- rpEndPos = mpActualPos;
- return sal_True;
-}
-
-SimpleTokenizer_Impl::SimpleTokenizer_Impl( HighlighterLanguage aLang ): aLanguage(aLang)
-{
- memset( aCharTypeTab, 0, sizeof( aCharTypeTab ) );
-
- // Fill character table
- sal_uInt16 i;
-
- // Allowed characters for identifiers
- sal_uInt16 nHelpMask = (sal_uInt16)( CHAR_START_IDENTIFIER | CHAR_IN_IDENTIFIER );
- for( i = 'a' ; i <= 'z' ; i++ )
- aCharTypeTab[i] |= nHelpMask;
- for( i = 'A' ; i <= 'Z' ; i++ )
- aCharTypeTab[i] |= nHelpMask;
- aCharTypeTab[(int)'_'] |= nHelpMask;
- aCharTypeTab[(int)'$'] |= nHelpMask;
-
- // Digit (can be identifier and number)
- nHelpMask = (sal_uInt16)( CHAR_IN_IDENTIFIER | CHAR_START_NUMBER |
- CHAR_IN_NUMBER | CHAR_IN_HEX_NUMBER );
- for( i = '0' ; i <= '9' ; i++ )
- aCharTypeTab[i] |= nHelpMask;
-
- // Add e, E, . and & here manually
- aCharTypeTab[(int)'e'] |= CHAR_IN_NUMBER;
- aCharTypeTab[(int)'E'] |= CHAR_IN_NUMBER;
- aCharTypeTab[(int)'.'] |= (sal_uInt16)( CHAR_IN_NUMBER | CHAR_START_NUMBER );
- aCharTypeTab[(int)'&'] |= CHAR_START_NUMBER;
-
- // Hexadecimal digit
- for( i = 'a' ; i <= 'f' ; i++ )
- aCharTypeTab[i] |= CHAR_IN_HEX_NUMBER;
- for( i = 'A' ; i <= 'F' ; i++ )
- aCharTypeTab[i] |= CHAR_IN_HEX_NUMBER;
-
- // Octal digit
- for( i = '0' ; i <= '7' ; i++ )
- aCharTypeTab[i] |= CHAR_IN_OCT_NUMBER;
-
- // String literal start/end characters
- aCharTypeTab[(int)'\''] |= CHAR_START_STRING;
- aCharTypeTab[(int)'\"'] |= CHAR_START_STRING;
- aCharTypeTab[(int)'['] |= CHAR_START_STRING;
- aCharTypeTab[(int)'`'] |= CHAR_START_STRING;
-
- // Operator characters
- aCharTypeTab[(int)'!'] |= CHAR_OPERATOR;
- aCharTypeTab[(int)'%'] |= CHAR_OPERATOR;
- // aCharTypeTab[(int)'&'] |= CHAR_OPERATOR; Removed because of #i14140
- aCharTypeTab[(int)'('] |= CHAR_OPERATOR;
- aCharTypeTab[(int)')'] |= CHAR_OPERATOR;
- aCharTypeTab[(int)'*'] |= CHAR_OPERATOR;
- aCharTypeTab[(int)'+'] |= CHAR_OPERATOR;
- aCharTypeTab[(int)','] |= CHAR_OPERATOR;
- aCharTypeTab[(int)'-'] |= CHAR_OPERATOR;
- aCharTypeTab[(int)'/'] |= CHAR_OPERATOR;
- aCharTypeTab[(int)':'] |= CHAR_OPERATOR;
- aCharTypeTab[(int)'<'] |= CHAR_OPERATOR;
- aCharTypeTab[(int)'='] |= CHAR_OPERATOR;
- aCharTypeTab[(int)'>'] |= CHAR_OPERATOR;
- aCharTypeTab[(int)'?'] |= CHAR_OPERATOR;
- aCharTypeTab[(int)'^'] |= CHAR_OPERATOR;
- aCharTypeTab[(int)'|'] |= CHAR_OPERATOR;
- aCharTypeTab[(int)'~'] |= CHAR_OPERATOR;
- aCharTypeTab[(int)'{'] |= CHAR_OPERATOR;
- aCharTypeTab[(int)'}'] |= CHAR_OPERATOR;
- // aCharTypeTab[(int)'['] |= CHAR_OPERATOR; Removed because of #i17826
- aCharTypeTab[(int)']'] |= CHAR_OPERATOR;
- aCharTypeTab[(int)';'] |= CHAR_OPERATOR;
-
- // Space
- aCharTypeTab[(int)' ' ] |= CHAR_SPACE;
- aCharTypeTab[(int)'\t'] |= CHAR_SPACE;
-
- // End of line characters
- aCharTypeTab[(int)'\r'] |= CHAR_EOL;
- aCharTypeTab[(int)'\n'] |= CHAR_EOL;
-
- ppListKeyWords = NULL;
-}
-
-SimpleTokenizer_Impl::~SimpleTokenizer_Impl( void )
-{
-}
-
-SimpleTokenizer_Impl* getSimpleTokenizer( void )
-{
- static SimpleTokenizer_Impl* pSimpleTokenizer = NULL;
- if( !pSimpleTokenizer )
- pSimpleTokenizer = new SimpleTokenizer_Impl();
- return pSimpleTokenizer;
-}
-
-sal_uInt16 SimpleTokenizer_Impl::parseLine( sal_uInt32 nParseLine, const OUString* aSource )
-{
- // Set the position to the beginning of the source string
- mpStringBegin = mpActualPos = aSource->getStr();
-
- // Initialize row and column
- nLine = nParseLine;
- nCol = 0L;
-
- // Variables for the out parameter
- TokenTypes eType;
- const sal_Unicode* pStartPos;
- const sal_Unicode* pEndPos;
-
- // Loop over all the tokens
- sal_uInt16 nTokenCount = 0;
- while( getNextToken( eType, pStartPos, pEndPos ) )
- nTokenCount++;
-
- return nTokenCount;
-}
-
-void SimpleTokenizer_Impl::getHighlightPortions( sal_uInt32 nParseLine, const OUString& rLine,
- /*out*/HighlightPortions& portions )
-{
- // Set the position to the beginning of the source string
- mpStringBegin = mpActualPos = rLine.getStr();
-
- // Initialize row and column
- nLine = nParseLine;
- nCol = 0L;
-
- // Variables for the out parameter
- TokenTypes eType;
- const sal_Unicode* pStartPos;
- const sal_Unicode* pEndPos;
-
- // Loop over all the tokens
- while( getNextToken( eType, pStartPos, pEndPos ) )
- {
- HighlightPortion portion;
-
- portion.nBegin = (sal_uInt16)(pStartPos - mpStringBegin);
- portion.nEnd = (sal_uInt16)(pEndPos - mpStringBegin);
- portion.tokenType = eType;
-
- portions.push_back(portion);
- }
-}
-
-
-SyntaxHighlighter::SyntaxHighlighter()
-{
- m_pSimpleTokenizer = 0;
- m_pKeyWords = NULL;
- m_nKeyWordCount = 0;
-}
-
-SyntaxHighlighter::~SyntaxHighlighter()
-{
- delete m_pSimpleTokenizer;
- delete m_pKeyWords;
-}
-
-void SyntaxHighlighter::initialize( HighlighterLanguage eLanguage_ )
-{
- eLanguage = eLanguage_;
- delete m_pSimpleTokenizer;
- m_pSimpleTokenizer = new SimpleTokenizer_Impl(eLanguage);
-
- switch (eLanguage)
- {
- case HIGHLIGHT_BASIC:
- m_pSimpleTokenizer->setKeyWords( strListBasicKeyWords,
- sizeof( strListBasicKeyWords ) / sizeof( char* ));
- break;
- case HIGHLIGHT_SQL:
- m_pSimpleTokenizer->setKeyWords( strListSqlKeyWords,
- sizeof( strListSqlKeyWords ) / sizeof( char* ));
- break;
- default:
- m_pSimpleTokenizer->setKeyWords( NULL, 0 );
- }
-}
-
-const Range SyntaxHighlighter::notifyChange( sal_uInt32 nLine, sal_Int32 nLineCountDifference,
- const OUString* pChangedLines, sal_uInt32 nArrayLength)
-{
- (void)nLineCountDifference;
-
- for( sal_uInt32 i=0 ; i < nArrayLength ; i++ )
- m_pSimpleTokenizer->parseLine(nLine+i, &pChangedLines[i]);
-
- return Range( nLine, nLine + nArrayLength-1 );
-}
-
-void SyntaxHighlighter::getHighlightPortions( sal_uInt32 nLine, const OUString& rLine,
- /*out*/HighlightPortions& portions )
-{
- m_pSimpleTokenizer->getHighlightPortions( nLine, rLine, portions );
-}
-
-/* vim:set shiftwidth=4 softtabstop=4 expandtab: */