diff options
Diffstat (limited to 'toolkit/src2xml/source/srclexer.py')
-rw-r--r-- | toolkit/src2xml/source/srclexer.py | 488 |
1 files changed, 488 insertions, 0 deletions
diff --git a/toolkit/src2xml/source/srclexer.py b/toolkit/src2xml/source/srclexer.py new file mode 100644 index 000000000000..5a5a3319b0d1 --- /dev/null +++ b/toolkit/src2xml/source/srclexer.py @@ -0,0 +1,488 @@ +import sys, os.path +from globals import * +import macroparser + +class EOF(Exception): + def __init__ (self): + pass + + def str (self): + return "end of file" + +class BOF(Exception): + def __init__ (self): + pass + + def str (self): + return "beginning of file" + + +def removeHeaderQuotes (orig): + if len(orig) <= 2: + return orig + elif orig[0] == orig[-1] == '"': + return orig[1:-1] + elif orig[0] == '<' and orig[-1] == '>': + return orig[1:-1] + + return orig + + +def dumpTokens (tokens, toError=False): + + scope = 0 + indent = " " + line = '' + chars = '' + + for token in tokens: + if token in '{<': + if len(line) > 0: + chars += indent*scope + line + "\n" + line = '' + chars += indent*scope + token + "\n" + scope += 1 + + elif token in '}>': + if len(line) > 0: + chars += indent*scope + line + "\n" + line = '' + scope -= 1 + chars += indent*scope + token + + elif token == ';': + if len(line) > 0: + chars += indent*scope + line + ";\n" + line = '' + else: + chars += ";\n" + elif len(token) > 0: + line += token + ' ' + + if len(line) > 0: + chars += line + chars += "\n" + if toError: + sys.stderr.write(chars) + else: + sys.stdout.write(chars) + + +class HeaderData(object): + def __init__ (self): + self.defines = {} + self.tokens = [] + + +class SrcLexer(object): + """Lexicographical analyzer for .src format. + +The role of a lexer is to parse the source file and break it into +appropriate tokens. Such tokens are later passed to a parser to +build the syntax tree. +""" + headerCache = {} + + VISIBLE = 0 + INVISIBLE_PRE = 1 + INVISIBLE_POST = 2 + + def __init__ (self, chars, filepath = None): + self.filepath = filepath + self.parentLexer = None + self.chars = chars + self.bufsize = len(self.chars) + + # TODO: use parameters for this + # Properties that can be copied. + self.headerDict = dict () + self.debug = False + self.debugMacro = False + self.includeDirs = list () + self.expandHeaders = True + self.inMacroDefine = False + self.stopOnHeader = False + + def copyProperties (self, other): + """Copy properties from another instance of SrcLexer.""" + + # TODO: use parameters for this + self.headerDict = other.headerDict + self.debug = other.debug + self.debugMacro = other.debugMacro + self.includeDirs = other.includeDirs[:] + self.expandHeaders = other.expandHeaders + self.inMacroDefine = other.inMacroDefine + self.stopOnHeader = other.stopOnHeader + + def init (self): + self.firstNonBlank = '' + self.token = '' + self.tokens = [] + self.defines = {} + self.visibilityStack = [] + + def getTokens (self): + return self.tokens + + def getDefines (self): + return self.defines + + def nextPos (self, i): + while True: + i += 1 + try: + c = self.chars[i] + except IndexError: + raise EOF + + if ord(c) in [0x0D]: + continue + break + return i + + def prevPos (self, i): + while True: + i -= 1 + try: + c = self.chars[i] + except IndexError: + raise BOF + + if ord(c) in [0x0D]: + continue + break + return i + + def isCodeVisible (self): + if len(self.visibilityStack) == 0: + return True + for item in self.visibilityStack: + if item != SrcLexer.VISIBLE: + return False + return True + + def tokenize (self): + self.init() + + i = 0 + while True: + c = self.chars[i] + + if self.firstNonBlank == '' and not c in [' ', "\n", "\t"]: + # Store the first non-blank in a line. + self.firstNonBlank = c + elif c == "\n": + self.firstNonBlank = '' + + if c == '#': + i = self.pound(i) + elif c == '/': + i = self.slash(i) + elif c == "\n": + i = self.lineBreak(i) + elif c == '"': + i = self.doubleQuote(i) + elif c in [' ', "\t"]: + i = self.blank(i) + elif c in ";()[]{}<>,=+-*": + # Any outstanding single-character token. + i = self.anyToken(i, c) + elif self.isCodeVisible(): + self.token += c + + try: + i = self.nextPos(i) + except EOF: + break + + if len(self.token): + self.tokens.append(self.token) + + if not self.parentLexer and self.debug: + progress ("-"*68 + "\n") + progress ("All defines found in this translation unit:\n") + keys = self.defines.keys() + keys.sort() + for key in keys: + progress ("@ %s\n"%key) + + def dumpTokens (self, toError=False): + dumpTokens(self.tokens, toError) + + + def maybeAddToken (self): + if len(self.token) > 0: + self.tokens.append(self.token) + self.token = '' + + + #-------------------------------------------------------------------- + # character handlers + + def blank (self, i): + if not self.isCodeVisible(): + return i + + self.maybeAddToken() + return i + + + def pound (self, i): + + if self.inMacroDefine: + return i + + if not self.firstNonBlank == '#': + return i + + self.maybeAddToken() + # We are in preprocessing mode. + + # Get the macro command name '#<command> .....' + + command, define, buf = '', '', '' + firstNonBlank = False + while True: + try: + i = self.nextPos(i) + c = self.chars[i] + if c == '\\' and self.chars[self.nextPos(i)] == "\n": + i = self.nextPos(i) + continue + except EOF: + break + + if c == "\n": + if len(buf) > 0 and len(command) == 0: + command = buf + i = self.prevPos(i) + break + elif c in [' ', "\t"]: + if not firstNonBlank: + # Ignore any leading blanks after the '#'. + continue + + if len(command) == 0: + command = buf + buf = '' + else: + buf += ' ' + elif c == '(': + if len(buf) > 0 and len(command) == 0: + command = buf + buf += c + else: + if not firstNonBlank: + firstNonBlank = True + buf += c + + if command == 'define': + self.handleMacroDefine(buf) + elif command == 'include': + self.handleMacroInclude(buf) + elif command == 'ifdef': + defineName = buf.strip() + if self.defines.has_key(defineName): + self.visibilityStack.append(SrcLexer.VISIBLE) + else: + self.visibilityStack.append(SrcLexer.INVISIBLE_PRE) + + elif command == 'ifndef': + defineName = buf.strip() + if self.defines.has_key(defineName): + self.visibilityStack.append(SrcLexer.INVISIBLE_PRE) + else: + self.visibilityStack.append(SrcLexer.VISIBLE) + + elif command == 'if': + if self.evalCodeVisibility(buf): + self.visibilityStack.append(SrcLexer.VISIBLE) + else: + self.visibilityStack.append(SrcLexer.INVISIBLE_PRE) + + elif command == 'elif': + if len(self.visibilityStack) == 0: + raise ParseError ('') + + if self.visibilityStack[-1] == SrcLexer.VISIBLE: + self.visibilityStack[-1] = SrcLexer.INVISIBLE_POST + elif self.visibilityStack[-1] == SrcLexer.INVISIBLE_PRE: + # Evaluate only if the current visibility is false. + if self.evalCodeVisibility(buf): + self.visibilityStack[-1] = SrcLexer.VISIBLE + + elif command == 'else': + if len(self.visibilityStack) == 0: + raise ParseError ('') + + if self.visibilityStack[-1] == SrcLexer.VISIBLE: + self.visibilityStack[-1] = SrcLexer.INVISIBLE_POST + if self.visibilityStack[-1] == SrcLexer.INVISIBLE_PRE: + self.visibilityStack[-1] = SrcLexer.VISIBLE + + elif command == 'endif': + if len(self.visibilityStack) == 0: + raise ParseError ('') + self.visibilityStack.pop() + + elif command == 'undef': + pass + elif command in ['error', 'pragma']: + pass + else: + print "'%s' '%s'"%(command, buf) + print self.filepath + sys.exit(0) + + return i + + + def evalCodeVisibility (self, buf): + try: + return eval(buf) + except: + return True + + def handleMacroDefine (self, buf): + + mparser = macroparser.MacroParser(buf) + mparser.debug = self.debugMacro + mparser.parse() + macro = mparser.getMacro() + if macro: + self.defines[macro.name] = macro + + def handleMacroInclude (self, buf): + + # Strip excess string if any. + pos = buf.find(' ') + if pos >= 0: + buf = buf[:pos] + headerSub = removeHeaderQuotes(buf) + + if not self.expandHeaders: + # We don't want to expand headers. Bail out. + if self.debug: + progress ("%s ignored\n"%headerSub) + return + + defines = {} + headerPath = None + for includeDir in self.includeDirs: + hpath = includeDir + '/' + headerSub + if os.path.isfile(hpath) and hpath != self.filepath: + headerPath = hpath + break + + if not headerPath: + error("included header file " + headerSub + " not found\n", self.stopOnHeader) + return + + if self.debug: + progress ("%s found\n"%headerPath) + + if headerPath in self.headerDict: + if self.debug: + progress ("%s already included\n"%headerPath) + return + + if SrcLexer.headerCache.has_key(headerPath): + if self.debug: + progress ("%s in cache\n"%headerPath) + for key in SrcLexer.headerCache[headerPath].defines.keys(): + self.defines[key] = SrcLexer.headerCache[headerPath].defines[key] + return + + chars = open(headerPath, 'r').read() + mclexer = SrcLexer(chars, headerPath) + mclexer.copyProperties(self) + mclexer.parentLexer = self + mclexer.tokenize() + hdrData = HeaderData() + hdrData.tokens = mclexer.getTokens() + headerDefines = mclexer.getDefines() + for key in headerDefines.keys(): + defines[key] = headerDefines[key] + hdrData.defines[key] = headerDefines[key] + + self.headerDict[headerPath] = True + SrcLexer.headerCache[headerPath] = hdrData + + # Update the list of headers that have already been expaneded. + for key in mclexer.headerDict.keys(): + self.headerDict[key] = True + + if self.debug: + progress ("defines found in header %s:\n"%headerSub) + for key in defines.keys(): + progress (" '%s'\n"%key) + + for key in defines.keys(): + self.defines[key] = defines[key] + + + def slash (self, i): + if not self.isCodeVisible(): + return i + + if i < self.bufsize - 1 and self.chars[i+1] == '/': + # Parse line comment. + line = '' + i += 2 + while i < self.bufsize: + c = self.chars[i] + if ord(c) in [0x0A, 0x0D]: + return i - 1 + line += c + i += 1 + self.token = '' + elif i < self.bufsize - 1 and self.chars[i+1] == '*': + comment = '' + i += 2 + while i < self.bufsize: + c = self.chars[i] + if c == '/' and self.chars[i-1] == '*': + return i + comment += c + i += 1 + else: + return self.anyToken(i, '/') + + return i + + + def lineBreak (self, i): + if not self.isCodeVisible(): + return i + + self.maybeAddToken() + + return i + + + def doubleQuote (self, i): + if not self.isCodeVisible(): + return i + + literal = '' + i += 1 + while i < self.bufsize: + c = self.chars[i] + if c == '"': + self.tokens.append('"'+literal+'"') + break + literal += c + i += 1 + + return i + + + def anyToken (self, i, token): + if not self.isCodeVisible(): + return i + + self.maybeAddToken() + self.token = token + self.maybeAddToken() + return i |