summaryrefslogtreecommitdiff
path: root/toolkit/src2xml/source/srclexer.py
diff options
context:
space:
mode:
Diffstat (limited to 'toolkit/src2xml/source/srclexer.py')
-rw-r--r--toolkit/src2xml/source/srclexer.py488
1 files changed, 488 insertions, 0 deletions
diff --git a/toolkit/src2xml/source/srclexer.py b/toolkit/src2xml/source/srclexer.py
new file mode 100644
index 000000000000..5a5a3319b0d1
--- /dev/null
+++ b/toolkit/src2xml/source/srclexer.py
@@ -0,0 +1,488 @@
+import sys, os.path
+from globals import *
+import macroparser
+
+class EOF(Exception):
+ def __init__ (self):
+ pass
+
+ def str (self):
+ return "end of file"
+
+class BOF(Exception):
+ def __init__ (self):
+ pass
+
+ def str (self):
+ return "beginning of file"
+
+
+def removeHeaderQuotes (orig):
+ if len(orig) <= 2:
+ return orig
+ elif orig[0] == orig[-1] == '"':
+ return orig[1:-1]
+ elif orig[0] == '<' and orig[-1] == '>':
+ return orig[1:-1]
+
+ return orig
+
+
+def dumpTokens (tokens, toError=False):
+
+ scope = 0
+ indent = " "
+ line = ''
+ chars = ''
+
+ for token in tokens:
+ if token in '{<':
+ if len(line) > 0:
+ chars += indent*scope + line + "\n"
+ line = ''
+ chars += indent*scope + token + "\n"
+ scope += 1
+
+ elif token in '}>':
+ if len(line) > 0:
+ chars += indent*scope + line + "\n"
+ line = ''
+ scope -= 1
+ chars += indent*scope + token
+
+ elif token == ';':
+ if len(line) > 0:
+ chars += indent*scope + line + ";\n"
+ line = ''
+ else:
+ chars += ";\n"
+ elif len(token) > 0:
+ line += token + ' '
+
+ if len(line) > 0:
+ chars += line
+ chars += "\n"
+ if toError:
+ sys.stderr.write(chars)
+ else:
+ sys.stdout.write(chars)
+
+
+class HeaderData(object):
+ def __init__ (self):
+ self.defines = {}
+ self.tokens = []
+
+
+class SrcLexer(object):
+ """Lexicographical analyzer for .src format.
+
+The role of a lexer is to parse the source file and break it into
+appropriate tokens. Such tokens are later passed to a parser to
+build the syntax tree.
+"""
+ headerCache = {}
+
+ VISIBLE = 0
+ INVISIBLE_PRE = 1
+ INVISIBLE_POST = 2
+
+ def __init__ (self, chars, filepath = None):
+ self.filepath = filepath
+ self.parentLexer = None
+ self.chars = chars
+ self.bufsize = len(self.chars)
+
+ # TODO: use parameters for this
+ # Properties that can be copied.
+ self.headerDict = dict ()
+ self.debug = False
+ self.debugMacro = False
+ self.includeDirs = list ()
+ self.expandHeaders = True
+ self.inMacroDefine = False
+ self.stopOnHeader = False
+
+ def copyProperties (self, other):
+ """Copy properties from another instance of SrcLexer."""
+
+ # TODO: use parameters for this
+ self.headerDict = other.headerDict
+ self.debug = other.debug
+ self.debugMacro = other.debugMacro
+ self.includeDirs = other.includeDirs[:]
+ self.expandHeaders = other.expandHeaders
+ self.inMacroDefine = other.inMacroDefine
+ self.stopOnHeader = other.stopOnHeader
+
+ def init (self):
+ self.firstNonBlank = ''
+ self.token = ''
+ self.tokens = []
+ self.defines = {}
+ self.visibilityStack = []
+
+ def getTokens (self):
+ return self.tokens
+
+ def getDefines (self):
+ return self.defines
+
+ def nextPos (self, i):
+ while True:
+ i += 1
+ try:
+ c = self.chars[i]
+ except IndexError:
+ raise EOF
+
+ if ord(c) in [0x0D]:
+ continue
+ break
+ return i
+
+ def prevPos (self, i):
+ while True:
+ i -= 1
+ try:
+ c = self.chars[i]
+ except IndexError:
+ raise BOF
+
+ if ord(c) in [0x0D]:
+ continue
+ break
+ return i
+
+ def isCodeVisible (self):
+ if len(self.visibilityStack) == 0:
+ return True
+ for item in self.visibilityStack:
+ if item != SrcLexer.VISIBLE:
+ return False
+ return True
+
+ def tokenize (self):
+ self.init()
+
+ i = 0
+ while True:
+ c = self.chars[i]
+
+ if self.firstNonBlank == '' and not c in [' ', "\n", "\t"]:
+ # Store the first non-blank in a line.
+ self.firstNonBlank = c
+ elif c == "\n":
+ self.firstNonBlank = ''
+
+ if c == '#':
+ i = self.pound(i)
+ elif c == '/':
+ i = self.slash(i)
+ elif c == "\n":
+ i = self.lineBreak(i)
+ elif c == '"':
+ i = self.doubleQuote(i)
+ elif c in [' ', "\t"]:
+ i = self.blank(i)
+ elif c in ";()[]{}<>,=+-*":
+ # Any outstanding single-character token.
+ i = self.anyToken(i, c)
+ elif self.isCodeVisible():
+ self.token += c
+
+ try:
+ i = self.nextPos(i)
+ except EOF:
+ break
+
+ if len(self.token):
+ self.tokens.append(self.token)
+
+ if not self.parentLexer and self.debug:
+ progress ("-"*68 + "\n")
+ progress ("All defines found in this translation unit:\n")
+ keys = self.defines.keys()
+ keys.sort()
+ for key in keys:
+ progress ("@ %s\n"%key)
+
+ def dumpTokens (self, toError=False):
+ dumpTokens(self.tokens, toError)
+
+
+ def maybeAddToken (self):
+ if len(self.token) > 0:
+ self.tokens.append(self.token)
+ self.token = ''
+
+
+ #--------------------------------------------------------------------
+ # character handlers
+
+ def blank (self, i):
+ if not self.isCodeVisible():
+ return i
+
+ self.maybeAddToken()
+ return i
+
+
+ def pound (self, i):
+
+ if self.inMacroDefine:
+ return i
+
+ if not self.firstNonBlank == '#':
+ return i
+
+ self.maybeAddToken()
+ # We are in preprocessing mode.
+
+ # Get the macro command name '#<command> .....'
+
+ command, define, buf = '', '', ''
+ firstNonBlank = False
+ while True:
+ try:
+ i = self.nextPos(i)
+ c = self.chars[i]
+ if c == '\\' and self.chars[self.nextPos(i)] == "\n":
+ i = self.nextPos(i)
+ continue
+ except EOF:
+ break
+
+ if c == "\n":
+ if len(buf) > 0 and len(command) == 0:
+ command = buf
+ i = self.prevPos(i)
+ break
+ elif c in [' ', "\t"]:
+ if not firstNonBlank:
+ # Ignore any leading blanks after the '#'.
+ continue
+
+ if len(command) == 0:
+ command = buf
+ buf = ''
+ else:
+ buf += ' '
+ elif c == '(':
+ if len(buf) > 0 and len(command) == 0:
+ command = buf
+ buf += c
+ else:
+ if not firstNonBlank:
+ firstNonBlank = True
+ buf += c
+
+ if command == 'define':
+ self.handleMacroDefine(buf)
+ elif command == 'include':
+ self.handleMacroInclude(buf)
+ elif command == 'ifdef':
+ defineName = buf.strip()
+ if self.defines.has_key(defineName):
+ self.visibilityStack.append(SrcLexer.VISIBLE)
+ else:
+ self.visibilityStack.append(SrcLexer.INVISIBLE_PRE)
+
+ elif command == 'ifndef':
+ defineName = buf.strip()
+ if self.defines.has_key(defineName):
+ self.visibilityStack.append(SrcLexer.INVISIBLE_PRE)
+ else:
+ self.visibilityStack.append(SrcLexer.VISIBLE)
+
+ elif command == 'if':
+ if self.evalCodeVisibility(buf):
+ self.visibilityStack.append(SrcLexer.VISIBLE)
+ else:
+ self.visibilityStack.append(SrcLexer.INVISIBLE_PRE)
+
+ elif command == 'elif':
+ if len(self.visibilityStack) == 0:
+ raise ParseError ('')
+
+ if self.visibilityStack[-1] == SrcLexer.VISIBLE:
+ self.visibilityStack[-1] = SrcLexer.INVISIBLE_POST
+ elif self.visibilityStack[-1] == SrcLexer.INVISIBLE_PRE:
+ # Evaluate only if the current visibility is false.
+ if self.evalCodeVisibility(buf):
+ self.visibilityStack[-1] = SrcLexer.VISIBLE
+
+ elif command == 'else':
+ if len(self.visibilityStack) == 0:
+ raise ParseError ('')
+
+ if self.visibilityStack[-1] == SrcLexer.VISIBLE:
+ self.visibilityStack[-1] = SrcLexer.INVISIBLE_POST
+ if self.visibilityStack[-1] == SrcLexer.INVISIBLE_PRE:
+ self.visibilityStack[-1] = SrcLexer.VISIBLE
+
+ elif command == 'endif':
+ if len(self.visibilityStack) == 0:
+ raise ParseError ('')
+ self.visibilityStack.pop()
+
+ elif command == 'undef':
+ pass
+ elif command in ['error', 'pragma']:
+ pass
+ else:
+ print "'%s' '%s'"%(command, buf)
+ print self.filepath
+ sys.exit(0)
+
+ return i
+
+
+ def evalCodeVisibility (self, buf):
+ try:
+ return eval(buf)
+ except:
+ return True
+
+ def handleMacroDefine (self, buf):
+
+ mparser = macroparser.MacroParser(buf)
+ mparser.debug = self.debugMacro
+ mparser.parse()
+ macro = mparser.getMacro()
+ if macro:
+ self.defines[macro.name] = macro
+
+ def handleMacroInclude (self, buf):
+
+ # Strip excess string if any.
+ pos = buf.find(' ')
+ if pos >= 0:
+ buf = buf[:pos]
+ headerSub = removeHeaderQuotes(buf)
+
+ if not self.expandHeaders:
+ # We don't want to expand headers. Bail out.
+ if self.debug:
+ progress ("%s ignored\n"%headerSub)
+ return
+
+ defines = {}
+ headerPath = None
+ for includeDir in self.includeDirs:
+ hpath = includeDir + '/' + headerSub
+ if os.path.isfile(hpath) and hpath != self.filepath:
+ headerPath = hpath
+ break
+
+ if not headerPath:
+ error("included header file " + headerSub + " not found\n", self.stopOnHeader)
+ return
+
+ if self.debug:
+ progress ("%s found\n"%headerPath)
+
+ if headerPath in self.headerDict:
+ if self.debug:
+ progress ("%s already included\n"%headerPath)
+ return
+
+ if SrcLexer.headerCache.has_key(headerPath):
+ if self.debug:
+ progress ("%s in cache\n"%headerPath)
+ for key in SrcLexer.headerCache[headerPath].defines.keys():
+ self.defines[key] = SrcLexer.headerCache[headerPath].defines[key]
+ return
+
+ chars = open(headerPath, 'r').read()
+ mclexer = SrcLexer(chars, headerPath)
+ mclexer.copyProperties(self)
+ mclexer.parentLexer = self
+ mclexer.tokenize()
+ hdrData = HeaderData()
+ hdrData.tokens = mclexer.getTokens()
+ headerDefines = mclexer.getDefines()
+ for key in headerDefines.keys():
+ defines[key] = headerDefines[key]
+ hdrData.defines[key] = headerDefines[key]
+
+ self.headerDict[headerPath] = True
+ SrcLexer.headerCache[headerPath] = hdrData
+
+ # Update the list of headers that have already been expaneded.
+ for key in mclexer.headerDict.keys():
+ self.headerDict[key] = True
+
+ if self.debug:
+ progress ("defines found in header %s:\n"%headerSub)
+ for key in defines.keys():
+ progress (" '%s'\n"%key)
+
+ for key in defines.keys():
+ self.defines[key] = defines[key]
+
+
+ def slash (self, i):
+ if not self.isCodeVisible():
+ return i
+
+ if i < self.bufsize - 1 and self.chars[i+1] == '/':
+ # Parse line comment.
+ line = ''
+ i += 2
+ while i < self.bufsize:
+ c = self.chars[i]
+ if ord(c) in [0x0A, 0x0D]:
+ return i - 1
+ line += c
+ i += 1
+ self.token = ''
+ elif i < self.bufsize - 1 and self.chars[i+1] == '*':
+ comment = ''
+ i += 2
+ while i < self.bufsize:
+ c = self.chars[i]
+ if c == '/' and self.chars[i-1] == '*':
+ return i
+ comment += c
+ i += 1
+ else:
+ return self.anyToken(i, '/')
+
+ return i
+
+
+ def lineBreak (self, i):
+ if not self.isCodeVisible():
+ return i
+
+ self.maybeAddToken()
+
+ return i
+
+
+ def doubleQuote (self, i):
+ if not self.isCodeVisible():
+ return i
+
+ literal = ''
+ i += 1
+ while i < self.bufsize:
+ c = self.chars[i]
+ if c == '"':
+ self.tokens.append('"'+literal+'"')
+ break
+ literal += c
+ i += 1
+
+ return i
+
+
+ def anyToken (self, i, token):
+ if not self.isCodeVisible():
+ return i
+
+ self.maybeAddToken()
+ self.token = token
+ self.maybeAddToken()
+ return i