diff options
author | Chris Larson <chris_larson@mentor.com> | 2010-08-02 13:42:23 -0700 |
---|---|---|
committer | Richard Purdie <rpurdie@linux.intel.com> | 2010-08-03 14:06:07 +0100 |
commit | 191a2883492841f30bbc21ab7bf4e4a0810d0760 (patch) | |
tree | f91e66f57380a47ea31e95bac28a93dd1e9e6370 | |
parent | e6b6767369f6d0caa7a9efe32ccd4ed514bb3148 (diff) | |
download | openembedded-core-191a2883492841f30bbc21ab7bf4e4a0810d0760.tar.gz openembedded-core-191a2883492841f30bbc21ab7bf4e4a0810d0760.tar.bz2 openembedded-core-191a2883492841f30bbc21ab7bf4e4a0810d0760.zip |
Add pysh, ply, and codegen to lib/ to prepare for future work
(Bitbake rev: d0a6e9c5c1887a885e0e73eba264ca66801f5ed0)
Signed-off-by: Chris Larson <chris_larson@mentor.com>
Signed-off-by: Richard Purdie <rpurdie@linux.intel.com>
-rw-r--r-- | bitbake/lib/codegen.py | 570 | ||||
-rw-r--r-- | bitbake/lib/ply/__init__.py | 4 | ||||
-rw-r--r-- | bitbake/lib/ply/lex.py | 1058 | ||||
-rw-r--r-- | bitbake/lib/ply/yacc.py | 3276 | ||||
-rw-r--r-- | bitbake/lib/pysh/__init__.py | 0 | ||||
-rw-r--r-- | bitbake/lib/pysh/builtin.py | 710 | ||||
-rw-r--r-- | bitbake/lib/pysh/interp.py | 1367 | ||||
-rw-r--r-- | bitbake/lib/pysh/lsprof.py | 116 | ||||
-rw-r--r-- | bitbake/lib/pysh/pysh.py | 167 | ||||
-rw-r--r-- | bitbake/lib/pysh/pyshlex.py | 888 | ||||
-rw-r--r-- | bitbake/lib/pysh/pyshyacc.py | 772 | ||||
-rw-r--r-- | bitbake/lib/pysh/sherrors.py | 41 | ||||
-rw-r--r-- | bitbake/lib/pysh/subprocess_fix.py | 77 |
13 files changed, 9046 insertions, 0 deletions
diff --git a/bitbake/lib/codegen.py b/bitbake/lib/codegen.py new file mode 100644 index 0000000000..be772d5107 --- /dev/null +++ b/bitbake/lib/codegen.py @@ -0,0 +1,570 @@ +# -*- coding: utf-8 -*- +""" + codegen + ~~~~~~~ + + Extension to ast that allow ast -> python code generation. + + :copyright: Copyright 2008 by Armin Ronacher. + :license: BSD. +""" +from ast import * + +BOOLOP_SYMBOLS = { + And: 'and', + Or: 'or' +} + +BINOP_SYMBOLS = { + Add: '+', + Sub: '-', + Mult: '*', + Div: '/', + FloorDiv: '//', + Mod: '%', + LShift: '<<', + RShift: '>>', + BitOr: '|', + BitAnd: '&', + BitXor: '^' +} + +CMPOP_SYMBOLS = { + Eq: '==', + Gt: '>', + GtE: '>=', + In: 'in', + Is: 'is', + IsNot: 'is not', + Lt: '<', + LtE: '<=', + NotEq: '!=', + NotIn: 'not in' +} + +UNARYOP_SYMBOLS = { + Invert: '~', + Not: 'not', + UAdd: '+', + USub: '-' +} + +ALL_SYMBOLS = {} +ALL_SYMBOLS.update(BOOLOP_SYMBOLS) +ALL_SYMBOLS.update(BINOP_SYMBOLS) +ALL_SYMBOLS.update(CMPOP_SYMBOLS) +ALL_SYMBOLS.update(UNARYOP_SYMBOLS) + +def to_source(node, indent_with=' ' * 4, add_line_information=False): + """This function can convert a node tree back into python sourcecode. + This is useful for debugging purposes, especially if you're dealing with + custom asts not generated by python itself. + + It could be that the sourcecode is evaluable when the AST itself is not + compilable / evaluable. The reason for this is that the AST contains some + more data than regular sourcecode does, which is dropped during + conversion. + + Each level of indentation is replaced with `indent_with`. Per default this + parameter is equal to four spaces as suggested by PEP 8, but it might be + adjusted to match the application's styleguide. + + If `add_line_information` is set to `True` comments for the line numbers + of the nodes are added to the output. This can be used to spot wrong line + number information of statement nodes. + """ + generator = SourceGenerator(indent_with, add_line_information) + generator.visit(node) + return ''.join(generator.result) + + +class SourceGenerator(NodeVisitor): + """This visitor is able to transform a well formed syntax tree into python + sourcecode. For more details have a look at the docstring of the + `node_to_source` function. + """ + + def __init__(self, indent_with, add_line_information=False): + self.result = [] + self.indent_with = indent_with + self.add_line_information = add_line_information + self.indentation = 0 + self.new_lines = 0 + + def write(self, x): + if self.new_lines: + if self.result: + self.result.append('\n' * self.new_lines) + self.result.append(self.indent_with * self.indentation) + self.new_lines = 0 + self.result.append(x) + + def newline(self, node=None, extra=0): + self.new_lines = max(self.new_lines, 1 + extra) + if node is not None and self.add_line_information: + self.write('# line: %s' % node.lineno) + self.new_lines = 1 + + def body(self, statements): + self.new_line = True + self.indentation += 1 + for stmt in statements: + self.visit(stmt) + self.indentation -= 1 + + def body_or_else(self, node): + self.body(node.body) + if node.orelse: + self.newline() + self.write('else:') + self.body(node.orelse) + + def signature(self, node): + want_comma = [] + def write_comma(): + if want_comma: + self.write(', ') + else: + want_comma.append(True) + + padding = [None] * (len(node.args) - len(node.defaults)) + for arg, default in zip(node.args, padding + node.defaults): + write_comma() + self.visit(arg) + if default is not None: + self.write('=') + self.visit(default) + if node.vararg is not None: + write_comma() + self.write('*' + node.vararg) + if node.kwarg is not None: + write_comma() + self.write('**' + node.kwarg) + + def decorators(self, node): + for decorator in node.decorator_list: + self.newline(decorator) + self.write('@') + self.visit(decorator) + + # Statements + + def visit_Assign(self, node): + self.newline(node) + for idx, target in enumerate(node.targets): + if idx: + self.write(', ') + self.visit(target) + self.write(' = ') + self.visit(node.value) + + def visit_AugAssign(self, node): + self.newline(node) + self.visit(node.target) + self.write(BINOP_SYMBOLS[type(node.op)] + '=') + self.visit(node.value) + + def visit_ImportFrom(self, node): + self.newline(node) + self.write('from %s%s import ' % ('.' * node.level, node.module)) + for idx, item in enumerate(node.names): + if idx: + self.write(', ') + self.write(item) + + def visit_Import(self, node): + self.newline(node) + for item in node.names: + self.write('import ') + self.visit(item) + + def visit_Expr(self, node): + self.newline(node) + self.generic_visit(node) + + def visit_FunctionDef(self, node): + self.newline(extra=1) + self.decorators(node) + self.newline(node) + self.write('def %s(' % node.name) + self.signature(node.args) + self.write('):') + self.body(node.body) + + def visit_ClassDef(self, node): + have_args = [] + def paren_or_comma(): + if have_args: + self.write(', ') + else: + have_args.append(True) + self.write('(') + + self.newline(extra=2) + self.decorators(node) + self.newline(node) + self.write('class %s' % node.name) + for base in node.bases: + paren_or_comma() + self.visit(base) + # XXX: the if here is used to keep this module compatible + # with python 2.6. + if hasattr(node, 'keywords'): + for keyword in node.keywords: + paren_or_comma() + self.write(keyword.arg + '=') + self.visit(keyword.value) + if node.starargs is not None: + paren_or_comma() + self.write('*') + self.visit(node.starargs) + if node.kwargs is not None: + paren_or_comma() + self.write('**') + self.visit(node.kwargs) + self.write(have_args and '):' or ':') + self.body(node.body) + + def visit_If(self, node): + self.newline(node) + self.write('if ') + self.visit(node.test) + self.write(':') + self.body(node.body) + while True: + else_ = node.orelse + if len(else_) == 1 and isinstance(else_[0], If): + node = else_[0] + self.newline() + self.write('elif ') + self.visit(node.test) + self.write(':') + self.body(node.body) + else: + self.newline() + self.write('else:') + self.body(else_) + break + + def visit_For(self, node): + self.newline(node) + self.write('for ') + self.visit(node.target) + self.write(' in ') + self.visit(node.iter) + self.write(':') + self.body_or_else(node) + + def visit_While(self, node): + self.newline(node) + self.write('while ') + self.visit(node.test) + self.write(':') + self.body_or_else(node) + + def visit_With(self, node): + self.newline(node) + self.write('with ') + self.visit(node.context_expr) + if node.optional_vars is not None: + self.write(' as ') + self.visit(node.optional_vars) + self.write(':') + self.body(node.body) + + def visit_Pass(self, node): + self.newline(node) + self.write('pass') + + def visit_Print(self, node): + # XXX: python 2.6 only + self.newline(node) + self.write('print ') + want_comma = False + if node.dest is not None: + self.write(' >> ') + self.visit(node.dest) + want_comma = True + for value in node.values: + if want_comma: + self.write(', ') + self.visit(value) + want_comma = True + if not node.nl: + self.write(',') + + def visit_Delete(self, node): + self.newline(node) + self.write('del ') + for idx, target in enumerate(node): + if idx: + self.write(', ') + self.visit(target) + + def visit_TryExcept(self, node): + self.newline(node) + self.write('try:') + self.body(node.body) + for handler in node.handlers: + self.visit(handler) + + def visit_TryFinally(self, node): + self.newline(node) + self.write('try:') + self.body(node.body) + self.newline(node) + self.write('finally:') + self.body(node.finalbody) + + def visit_Global(self, node): + self.newline(node) + self.write('global ' + ', '.join(node.names)) + + def visit_Nonlocal(self, node): + self.newline(node) + self.write('nonlocal ' + ', '.join(node.names)) + + def visit_Return(self, node): + self.newline(node) + self.write('return ') + self.visit(node.value) + + def visit_Break(self, node): + self.newline(node) + self.write('break') + + def visit_Continue(self, node): + self.newline(node) + self.write('continue') + + def visit_Raise(self, node): + # XXX: Python 2.6 / 3.0 compatibility + self.newline(node) + self.write('raise') + if hasattr(node, 'exc') and node.exc is not None: + self.write(' ') + self.visit(node.exc) + if node.cause is not None: + self.write(' from ') + self.visit(node.cause) + elif hasattr(node, 'type') and node.type is not None: + self.visit(node.type) + if node.inst is not None: + self.write(', ') + self.visit(node.inst) + if node.tback is not None: + self.write(', ') + self.visit(node.tback) + + # Expressions + + def visit_Attribute(self, node): + self.visit(node.value) + self.write('.' + node.attr) + + def visit_Call(self, node): + want_comma = [] + def write_comma(): + if want_comma: + self.write(', ') + else: + want_comma.append(True) + + self.visit(node.func) + self.write('(') + for arg in node.args: + write_comma() + self.visit(arg) + for keyword in node.keywords: + write_comma() + self.write(keyword.arg + '=') + self.visit(keyword.value) + if node.starargs is not None: + write_comma() + self.write('*') + self.visit(node.starargs) + if node.kwargs is not None: + write_comma() + self.write('**') + self.visit(node.kwargs) + self.write(')') + + def visit_Name(self, node): + self.write(node.id) + + def visit_Str(self, node): + self.write(repr(node.s)) + + def visit_Bytes(self, node): + self.write(repr(node.s)) + + def visit_Num(self, node): + self.write(repr(node.n)) + + def visit_Tuple(self, node): + self.write('(') + idx = -1 + for idx, item in enumerate(node.elts): + if idx: + self.write(', ') + self.visit(item) + self.write(idx and ')' or ',)') + + def sequence_visit(left, right): + def visit(self, node): + self.write(left) + for idx, item in enumerate(node.elts): + if idx: + self.write(', ') + self.visit(item) + self.write(right) + return visit + + visit_List = sequence_visit('[', ']') + visit_Set = sequence_visit('{', '}') + del sequence_visit + + def visit_Dict(self, node): + self.write('{') + for idx, (key, value) in enumerate(zip(node.keys, node.values)): + if idx: + self.write(', ') + self.visit(key) + self.write(': ') + self.visit(value) + self.write('}') + + def visit_BinOp(self, node): + self.visit(node.left) + self.write(' %s ' % BINOP_SYMBOLS[type(node.op)]) + self.visit(node.right) + + def visit_BoolOp(self, node): + self.write('(') + for idx, value in enumerate(node.values): + if idx: + self.write(' %s ' % BOOLOP_SYMBOLS[type(node.op)]) + self.visit(value) + self.write(')') + + def visit_Compare(self, node): + self.write('(') + self.write(node.left) + for op, right in zip(node.ops, node.comparators): + self.write(' %s %%' % CMPOP_SYMBOLS[type(op)]) + self.visit(right) + self.write(')') + + def visit_UnaryOp(self, node): + self.write('(') + op = UNARYOP_SYMBOLS[type(node.op)] + self.write(op) + if op == 'not': + self.write(' ') + self.visit(node.operand) + self.write(')') + + def visit_Subscript(self, node): + self.visit(node.value) + self.write('[') + self.visit(node.slice) + self.write(']') + + def visit_Slice(self, node): + if node.lower is not None: + self.visit(node.lower) + self.write(':') + if node.upper is not None: + self.visit(node.upper) + if node.step is not None: + self.write(':') + if not (isinstance(node.step, Name) and node.step.id == 'None'): + self.visit(node.step) + + def visit_ExtSlice(self, node): + for idx, item in node.dims: + if idx: + self.write(', ') + self.visit(item) + + def visit_Yield(self, node): + self.write('yield ') + self.visit(node.value) + + def visit_Lambda(self, node): + self.write('lambda ') + self.signature(node.args) + self.write(': ') + self.visit(node.body) + + def visit_Ellipsis(self, node): + self.write('Ellipsis') + + def generator_visit(left, right): + def visit(self, node): + self.write(left) + self.visit(node.elt) + for comprehension in node.generators: + self.visit(comprehension) + self.write(right) + return visit + + visit_ListComp = generator_visit('[', ']') + visit_GeneratorExp = generator_visit('(', ')') + visit_SetComp = generator_visit('{', '}') + del generator_visit + + def visit_DictComp(self, node): + self.write('{') + self.visit(node.key) + self.write(': ') + self.visit(node.value) + for comprehension in node.generators: + self.visit(comprehension) + self.write('}') + + def visit_IfExp(self, node): + self.visit(node.body) + self.write(' if ') + self.visit(node.test) + self.write(' else ') + self.visit(node.orelse) + + def visit_Starred(self, node): + self.write('*') + self.visit(node.value) + + def visit_Repr(self, node): + # XXX: python 2.6 only + self.write('`') + self.visit(node.value) + self.write('`') + + # Helper Nodes + + def visit_alias(self, node): + self.write(node.name) + if node.asname is not None: + self.write(' as ' + node.asname) + + def visit_comprehension(self, node): + self.write(' for ') + self.visit(node.target) + self.write(' in ') + self.visit(node.iter) + if node.ifs: + for if_ in node.ifs: + self.write(' if ') + self.visit(if_) + + def visit_excepthandler(self, node): + self.newline(node) + self.write('except') + if node.type is not None: + self.write(' ') + self.visit(node.type) + if node.name is not None: + self.write(' as ') + self.visit(node.name) + self.write(':') + self.body(node.body) diff --git a/bitbake/lib/ply/__init__.py b/bitbake/lib/ply/__init__.py new file mode 100644 index 0000000000..853a985542 --- /dev/null +++ b/bitbake/lib/ply/__init__.py @@ -0,0 +1,4 @@ +# PLY package +# Author: David Beazley (dave@dabeaz.com) + +__all__ = ['lex','yacc'] diff --git a/bitbake/lib/ply/lex.py b/bitbake/lib/ply/lex.py new file mode 100644 index 0000000000..267ec100fc --- /dev/null +++ b/bitbake/lib/ply/lex.py @@ -0,0 +1,1058 @@ +# ----------------------------------------------------------------------------- +# ply: lex.py +# +# Copyright (C) 2001-2009, +# David M. Beazley (Dabeaz LLC) +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are +# met: +# +# * Redistributions of source code must retain the above copyright notice, +# this list of conditions and the following disclaimer. +# * Redistributions in binary form must reproduce the above copyright notice, +# this list of conditions and the following disclaimer in the documentation +# and/or other materials provided with the distribution. +# * Neither the name of the David Beazley or Dabeaz LLC may be used to +# endorse or promote products derived from this software without +# specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# ----------------------------------------------------------------------------- + +__version__ = "3.3" +__tabversion__ = "3.2" # Version of table file used + +import re, sys, types, copy, os + +# This tuple contains known string types +try: + # Python 2.6 + StringTypes = (types.StringType, types.UnicodeType) +except AttributeError: + # Python 3.0 + StringTypes = (str, bytes) + +# Extract the code attribute of a function. Different implementations +# are for Python 2/3 compatibility. + +if sys.version_info[0] < 3: + def func_code(f): + return f.func_code +else: + def func_code(f): + return f.__code__ + +# This regular expression is used to match valid token names +_is_identifier = re.compile(r'^[a-zA-Z0-9_]+$') + +# Exception thrown when invalid token encountered and no default error +# handler is defined. + +class LexError(Exception): + def __init__(self,message,s): + self.args = (message,) + self.text = s + +# Token class. This class is used to represent the tokens produced. +class LexToken(object): + def __str__(self): + return "LexToken(%s,%r,%d,%d)" % (self.type,self.value,self.lineno,self.lexpos) + def __repr__(self): + return str(self) + +# This object is a stand-in for a logging object created by the +# logging module. + +class PlyLogger(object): + def __init__(self,f): + self.f = f + def critical(self,msg,*args,**kwargs): + self.f.write((msg % args) + "\n") + + def warning(self,msg,*args,**kwargs): + self.f.write("WARNING: "+ (msg % args) + "\n") + + def error(self,msg,*args,**kwargs): + self.f.write("ERROR: " + (msg % args) + "\n") + + info = critical + debug = critical + +# Null logger is used when no output is generated. Does nothing. +class NullLogger(object): + def __getattribute__(self,name): + return self + def __call__(self,*args,**kwargs): + return self + +# ----------------------------------------------------------------------------- +# === Lexing Engine === +# +# The following Lexer class implements the lexer runtime. There are only +# a few public methods and attributes: +# +# input() - Store a new string in the lexer +# token() - Get the next token +# clone() - Clone the lexer +# +# lineno - Current line number +# lexpos - Current position in the input string +# ----------------------------------------------------------------------------- + +class Lexer: + def __init__(self): + self.lexre = None # Master regular expression. This is a list of + # tuples (re,findex) where re is a compiled + # regular expression and findex is a list + # mapping regex group numbers to rules + self.lexretext = None # Current regular expression strings + self.lexstatere = {} # Dictionary mapping lexer states to master regexs + self.lexstateretext = {} # Dictionary mapping lexer states to regex strings + self.lexstaterenames = {} # Dictionary mapping lexer states to symbol names + self.lexstate = "INITIAL" # Current lexer state + self.lexstatestack = [] # Stack of lexer states + self.lexstateinfo = None # State information + self.lexstateignore = {} # Dictionary of ignored characters for each state + self.lexstateerrorf = {} # Dictionary of error functions for each state + self.lexreflags = 0 # Optional re compile flags + self.lexdata = None # Actual input data (as a string) + self.lexpos = 0 # Current position in input text + self.lexlen = 0 # Length of the input text + self.lexerrorf = None # Error rule (if any) + self.lextokens = None # List of valid tokens + self.lexignore = "" # Ignored characters + self.lexliterals = "" # Literal characters that can be passed through + self.lexmodule = None # Module + self.lineno = 1 # Current line number + self.lexoptimize = 0 # Optimized mode + + def clone(self,object=None): + c = copy.copy(self) + + # If the object parameter has been supplied, it means we are attaching the + # lexer to a new object. In this case, we have to rebind all methods in + # the lexstatere and lexstateerrorf tables. + + if object: + newtab = { } + for key, ritem in self.lexstatere.items(): + newre = [] + for cre, findex in ritem: + newfindex = [] + for f in findex: + if not f or not f[0]: + newfindex.append(f) + continue + newfindex.append((getattr(object,f[0].__name__),f[1])) + newre.append((cre,newfindex)) + newtab[key] = newre + c.lexstatere = newtab + c.lexstateerrorf = { } + for key, ef in self.lexstateerrorf.items(): + c.lexstateerrorf[key] = getattr(object,ef.__name__) + c.lexmodule = object + return c + + # ------------------------------------------------------------ + # writetab() - Write lexer information to a table file + # ------------------------------------------------------------ + def writetab(self,tabfile,outputdir=""): + if isinstance(tabfile,types.ModuleType): + return + basetabfilename = tabfile.split(".")[-1] + filename = os.path.join(outputdir,basetabfilename)+".py" + tf = open(filename,"w") + tf.write("# %s.py. This file automatically created by PLY (version %s). Don't edit!\n" % (tabfile,__version__)) + tf.write("_tabversion = %s\n" % repr(__version__)) + tf.write("_lextokens = %s\n" % repr(self.lextokens)) + tf.write("_lexreflags = %s\n" % repr(self.lexreflags)) + tf.write("_lexliterals = %s\n" % repr(self.lexliterals)) + tf.write("_lexstateinfo = %s\n" % repr(self.lexstateinfo)) + + tabre = { } + # Collect all functions in the initial state + initial = self.lexstatere["INITIAL"] + initialfuncs = [] + for part in initial: + for f in part[1]: + if f and f[0]: + initialfuncs.append(f) + + for key, lre in self.lexstatere.items(): + titem = [] + for i in range(len(lre)): + titem.append((self.lexstateretext[key][i],_funcs_to_names(lre[i][1],self.lexstaterenames[key][i]))) + tabre[key] = titem + + tf.write("_lexstatere = %s\n" % repr(tabre)) + tf.write("_lexstateignore = %s\n" % repr(self.lexstateignore)) + + taberr = { } + for key, ef in self.lexstateerrorf.items(): + if ef: + taberr[key] = ef.__name__ + else: + taberr[key] = None + tf.write("_lexstateerrorf = %s\n" % repr(taberr)) + tf.close() + + # ------------------------------------------------------------ + # readtab() - Read lexer information from a tab file + # ------------------------------------------------------------ + def readtab(self,tabfile,fdict): + if isinstance(tabfile,types.ModuleType): + lextab = tabfile + else: + if sys.version_info[0] < 3: + exec("import %s as lextab" % tabfile) + else: + env = { } + exec("import %s as lextab" % tabfile, env,env) + lextab = env['lextab'] + + if getattr(lextab,"_tabversion","0.0") != __version__: + raise ImportError("Inconsistent PLY version") + + self.lextokens = lextab._lextokens + self.lexreflags = lextab._lexreflags + self.lexliterals = lextab._lexliterals + self.lexstateinfo = lextab._lexstateinfo + self.lexstateignore = lextab._lexstateignore + self.lexstatere = { } + self.lexstateretext = { } + for key,lre in lextab._lexstatere.items(): + titem = [] + txtitem = [] + for i in range(len(lre)): + titem.append((re.compile(lre[i][0],lextab._lexreflags | re.VERBOSE),_names_to_funcs(lre[i][1],fdict))) + txtitem.append(lre[i][0]) + self.lexstatere[key] = titem + self.lexstateretext[key] = txtitem + self.lexstateerrorf = { } + for key,ef in lextab._lexstateerrorf.items(): + self.lexstateerrorf[key] = fdict[ef] + self.begin('INITIAL') + + # ------------------------------------------------------------ + # input() - Push a new string into the lexer + # ------------------------------------------------------------ + def input(self,s): + # Pull off the first character to see if s looks like a string + c = s[:1] + if not isinstance(c,StringTypes): + raise ValueError("Expected a string") + self.lexdata = s + self.lexpos = 0 + self.lexlen = len(s) + + # ------------------------------------------------------------ + # begin() - Changes the lexing state + # ------------------------------------------------------------ + def begin(self,state): + if not state in self.lexstatere: + raise ValueError("Undefined state") + self.lexre = self.lexstatere[state] + self.lexretext = self.lexstateretext[state] + self.lexignore = self.lexstateignore.get(state,"") + self.lexerrorf = self.lexstateerrorf.get(state,None) + self.lexstate = state + + # ------------------------------------------------------------ + # push_state() - Changes the lexing state and saves old on stack + # ------------------------------------------------------------ + def push_state(self,state): + self.lexstatestack.append(self.lexstate) + self.begin(state) + + # ------------------------------------------------------------ + # pop_state() - Restores the previous state + # ------------------------------------------------------------ + def pop_state(self): + self.begin(self.lexstatestack.pop()) + + # ------------------------------------------------------------ + # current_state() - Returns the current lexing state + # ------------------------------------------------------------ + def current_state(self): + return self.lexstate + + # ------------------------------------------------------------ + # skip() - Skip ahead n characters + # ------------------------------------------------------------ + def skip(self,n): + self.lexpos += n + + # ------------------------------------------------------------ + # opttoken() - Return the next token from the Lexer + # + # Note: This function has been carefully implemented to be as fast + # as possible. Don't make changes unless you really know what + # you are doing + # ------------------------------------------------------------ + def token(self): + # Make local copies of frequently referenced attributes + lexpos = self.lexpos + lexlen = self.lexlen + lexignore = self.lexignore + lexdata = self.lexdata + + while lexpos < lexlen: + # This code provides some short-circuit code for whitespace, tabs, and other ignored characters + if lexdata[lexpos] in lexignore: + lexpos += 1 + continue + + # Look for a regular expression match + for lexre,lexindexfunc in self.lexre: + m = lexre.match(lexdata,lexpos) + if not m: continue + + # Create a token for return + tok = LexToken() + tok.value = m.group() + tok.lineno = self.lineno + tok.lexpos = lexpos + + i = m.lastindex + func,tok.type = lexindexfunc[i] + + if not func: + # If no token type was set, it's an ignored token + if tok.type: + self.lexpos = m.end() + return tok + else: + lexpos = m.end() + break + + lexpos = m.end() + + # If token is processed by a function, call it + + tok.lexer = self # Set additional attributes useful in token rules + self.lexmatch = m + self.lexpos = lexpos + + newtok = func(tok) + + # Every function must return a token, if nothing, we just move to next token + if not newtok: + lexpos = self.lexpos # This is here in case user has updated lexpos. + lexignore = self.lexignore # This is here in case there was a state change + break + + # Verify type of the token. If not in the token map, raise an error + if not self.lexoptimize: + if not newtok.type in self.lextokens: + raise LexError("%s:%d: Rule '%s' returned an unknown token type '%s'" % ( + func_code(func).co_filename, func_code(func).co_firstlineno, + func.__name__, newtok.type),lexdata[lexpos:]) + + return newtok + else: + # No match, see if in literals + if lexdata[lexpos] in self.lexliterals: + tok = LexToken() + tok.value = lexdata[lexpos] + tok.lineno = self.lineno + tok.type = tok.value + tok.lexpos = lexpos + self.lexpos = lexpos + 1 + return tok + + # No match. Call t_error() if defined. + if self.lexerrorf: + tok = LexToken() + tok.value = self.lexdata[lexpos:] + tok.lineno = self.lineno + tok.type = "error" + tok.lexer = self + tok.lexpos = lexpos + self.lexpos = lexpos + newtok = self.lexerrorf(tok) + if lexpos == self.lexpos: + # Error method didn't change text position at all. This is an error. + raise LexError("Scanning error. Illegal character '%s'" % (lexdata[lexpos]), lexdata[lexpos:]) + lexpos = self.lexpos + if not newtok: continue + return newtok + + self.lexpos = lexpos + raise LexError("Illegal character '%s' at index %d" % (lexdata[lexpos],lexpos), lexdata[lexpos:]) + + self.lexpos = lexpos + 1 + if self.lexdata is None: + raise RuntimeError("No input string given with input()") + return None + + # Iterator interface + def __iter__(self): + return self + + def next(self): + t = self.token() + if t is None: + raise StopIteration + return t + + __next__ = next + +# ----------------------------------------------------------------------------- +# ==== Lex Builder === +# +# The functions and classes below are used to collect lexing information +# and build a Lexer object from it. +# ----------------------------------------------------------------------------- + +# ----------------------------------------------------------------------------- +# get_caller_module_dict() +# +# This function returns a dictionary containing all of the symbols defined within +# a caller further down the call stack. This is used to get the environment +# associated with the yacc() call if none was provided. +# ----------------------------------------------------------------------------- + +def get_caller_module_dict(levels): + try: + raise RuntimeError + except RuntimeError: + e,b,t = sys.exc_info() + f = t.tb_frame + while levels > 0: + f = f.f_back + levels -= 1 + ldict = f.f_globals.copy() + if f.f_globals != f.f_locals: + ldict.update(f.f_locals) + + return ldict + +# ----------------------------------------------------------------------------- +# _funcs_to_names() +# +# Given a list of regular expression functions, this converts it to a list +# suitable for output to a table file +# ----------------------------------------------------------------------------- + +def _funcs_to_names(funclist,namelist): + result = [] + for f,name in zip(funclist,namelist): + if f and f[0]: + result.append((name, f[1])) + else: + result.append(f) + return result + +# ----------------------------------------------------------------------------- +# _names_to_funcs() +# +# Given a list of regular expression function names, this converts it back to +# functions. +# ----------------------------------------------------------------------------- + +def _names_to_funcs(namelist,fdict): + result = [] + for n in namelist: + if n and n[0]: + result.append((fdict[n[0]],n[1])) + else: + result.append(n) + return result + +# ----------------------------------------------------------------------------- +# _form_master_re() +# +# This function takes a list of all of the regex components and attempts to +# form the master regular expression. Given limitations in the Python re +# module, it may be necessary to break the master regex into separate expressions. +# ----------------------------------------------------------------------------- + +def _form_master_re(relist,reflags,ldict,toknames): + if not relist: return [] + regex = "|".join(relist) + try: + lexre = re.compile(regex,re.VERBOSE | reflags) + + # Build the index to function map for the matching engine + lexindexfunc = [ None ] * (max(lexre.groupindex.values())+1) + lexindexnames = lexindexfunc[:] + + for f,i in lexre.groupindex.items(): + handle = ldict.get(f,None) + if type(handle) in (types.FunctionType, types.MethodType): + lexindexfunc[i] = (handle,toknames[f]) + lexindexnames[i] = f + elif handle is not None: + lexindexnames[i] = f + if f.find("ignore_") > 0: + lexindexfunc[i] = (None,None) + else: + lexindexfunc[i] = (None, toknames[f]) + + return [(lexre,lexindexfunc)],[regex],[lexindexnames] + except Exception: + m = int(len(relist)/2) + if m == 0: m = 1 + llist, lre, lnames = _form_master_re(relist[:m],reflags,ldict,toknames) + rlist, rre, rnames = _form_master_re(relist[m:],reflags,ldict,toknames) + return llist+rlist, lre+rre, lnames+rnames + +# ----------------------------------------------------------------------------- +# def _statetoken(s,names) +# +# Given a declaration name s of the form "t_" and a dictionary whose keys are +# state names, this function returns a tuple (states,tokenname) where states +# is a tuple of state names and tokenname is the name of the token. For example, +# calling this with s = "t_foo_bar_SPAM" might return (('foo','bar'),'SPAM') +# ----------------------------------------------------------------------------- + +def _statetoken(s,names): + nonstate = 1 + parts = s.split("_") + for i in range(1,len(parts)): + if not parts[i] in names and parts[i] != 'ANY': break + if i > 1: + states = tuple(parts[1:i]) + else: + states = ('INITIAL',) + + if 'ANY' in states: + states = tuple(names) + + tokenname = "_".join(parts[i:]) + return (states,tokenname) + + +# ----------------------------------------------------------------------------- +# LexerReflect() +# +# This class represents information needed to build a lexer as extracted from a +# user's input file. +# ----------------------------------------------------------------------------- +class LexerReflect(object): + def __init__(self,ldict,log=None,reflags=0): + self.ldict = ldict + self.error_func = None + self.tokens = [] + self.reflags = reflags + self.stateinfo = { 'INITIAL' : 'inclusive'} + self.files = {} + self.error = 0 + + if log is None: + self.log = PlyLogger(sys.stderr) + else: + self.log = log + + # Get all of the basic information + def get_all(self): + self.get_tokens() + self.get_literals() + self.get_states() + self.get_rules() + + # Validate all of the information + def validate_all(self): + self.validate_tokens() + self.validate_literals() + self.validate_rules() + return self.error + + # Get the tokens map + def get_tokens(self): + tokens = self.ldict.get("tokens",None) + if not tokens: + self.log.error("No token list is defined") + self.error = 1 + return + + if not isinstance(tokens,(list, tuple)): + self.log.error("tokens must be a list or tuple") + self.error = 1 + return + + if not tokens: + self.log.error("tokens is empty") + self.error = 1 + return + + self.tokens = tokens + + # Validate the tokens + def validate_tokens(self): + terminals = {} + for n in self.tokens: + if not _is_identifier.match(n): + self.log.error("Bad token name '%s'",n) + self.error = 1 + if n in terminals: + self.log.warning("Token '%s' multiply defined", n) + terminals[n] = 1 + + # Get the literals specifier + def get_literals(self): + self.literals = self.ldict.get("literals","") + + # Validate literals + def validate_literals(self): + try: + for c in self.literals: + if not isinstance(c,StringTypes) or len(c) > 1: + self.log.error("Invalid literal %s. Must be a single character", repr(c)) + self.error = 1 + continue + + except TypeError: + self.log.error("Invalid literals specification. literals must be a sequence of characters") + self.error = 1 + + def get_states(self): + self.states = self.ldict.get("states",None) + # Build statemap + if self.states: + if not isinstance(self.states,(tuple,list)): + self.log.error("states must be defined as a tuple or list") + self.error = 1 + else: + for s in self.states: + if not isinstance(s,tuple) or len(s) != 2: + self.log.error("Invalid state specifier %s. Must be a tuple (statename,'exclusive|inclusive')",repr(s)) + self.error = 1 + continue + name, statetype = s + if not isinstance(name,StringTypes): + self.log.error("State name %s must be a string", repr(name)) + self.error = 1 + continue + if not (statetype == 'inclusive' or statetype == 'exclusive'): + self.log.error("State type for state %s must be 'inclusive' or 'exclusive'",name) + self.error = 1 + continue + if name in self.stateinfo: + self.log.error("State '%s' already defined",name) + self.error = 1 + continue + self.stateinfo[name] = statetype + + # Get all of the symbols with a t_ prefix and sort them into various + # categories (functions, strings, error functions, and ignore characters) + + def get_rules(self): + tsymbols = [f for f in self.ldict if f[:2] == 't_' ] + + # Now build up a list of functions and a list of strings + + self.toknames = { } # Mapping of symbols to token names + self.funcsym = { } # Symbols defined as functions + self.strsym = { } # Symbols defined as strings + self.ignore = { } # Ignore strings by state + self.errorf = { } # Error functions by state + + for s in self.stateinfo: + self.funcsym[s] = [] + self.strsym[s] = [] + + if len(tsymbols) == 0: + self.log.error("No rules of the form t_rulename are defined") + self.error = 1 + return + + for f in tsymbols: + t = self.ldict[f] + states, tokname = _statetoken(f,self.stateinfo) + self.toknames[f] = tokname + + if hasattr(t,"__call__"): + if tokname == 'error': + for s in states: + self.errorf[s] = t + elif tokname == 'ignore': + line = func_code(t).co_firstlineno + file = func_code(t).co_filename + self.log.error("%s:%d: Rule '%s' must be defined as a string",file,line,t.__name__) + self.error = 1 + else: + for s in states: + self.funcsym[s].append((f,t)) + elif isinstance(t, StringTypes): + if tokname == 'ignore': + for s in states: + self.ignore[s] = t + if "\\" in t: + self.log.warning("%s contains a literal backslash '\\'",f) + + elif tokname == 'error': + self.log.error("Rule '%s' must be defined as a function", f) + self.error = 1 + else: + for s in states: + self.strsym[s].append((f,t)) + else: + self.log.error("%s not defined as a function or string", f) + self.error = 1 + + # Sort the functions by line number + for f in self.funcsym.values(): + if sys.version_info[0] < 3: + f.sort(lambda x,y: cmp(func_code(x[1]).co_firstlineno,func_code(y[1]).co_firstlineno)) + else: + # Python 3.0 + f.sort(key=lambda x: func_code(x[1]).co_firstlineno) + + # Sort the strings by regular expression length + for s in self.strsym.values(): + if sys.version_info[0] < 3: + s.sort(lambda x,y: (len(x[1]) < len(y[1])) - (len(x[1]) > len(y[1]))) + else: + # Python 3.0 + s.sort(key=lambda x: len(x[1]),reverse=True) + + # Validate all of the t_rules collected + def validate_rules(self): + for state in self.stateinfo: + # Validate all rules defined by functions + + + + for fname, f in self.funcsym[state]: + line = func_code(f).co_firstlineno + file = func_code(f).co_filename + self.files[file] = 1 + + tokname = self.toknames[fname] + if isinstance(f, types.MethodType): + reqargs = 2 + else: + reqargs = 1 + nargs = func_code(f).co_argcount + if nargs > reqargs: + self.log.error("%s:%d: Rule '%s' has too many arguments",file,line,f.__name__) + self.error = 1 + continue + + if nargs < reqargs: + self.log.error("%s:%d: Rule '%s' requires an argument", file,line,f.__name__) + self.error = 1 + continue + + if not f.__doc__: + self.log.error("%s:%d: No regular expression defined for rule '%s'",file,line,f.__name__) + self.error = 1 + continue + + try: + c = re.compile("(?P<%s>%s)" % (fname,f.__doc__), re.VERBOSE | self.reflags) + if c.match(""): + self.log.error("%s:%d: Regular expression for rule '%s' matches empty string", file,line,f.__name__) + self.error = 1 + except re.error: + _etype, e, _etrace = sys.exc_info() + self.log.error("%s:%d: Invalid regular expression for rule '%s'. %s", file,line,f.__name__,e) + if '#' in f.__doc__: + self.log.error("%s:%d. Make sure '#' in rule '%s' is escaped with '\\#'",file,line, f.__name__) + self.error = 1 + + # Validate all rules defined by strings + for name,r in self.strsym[state]: + tokname = self.toknames[name] + if tokname == 'error': + self.log.error("Rule '%s' must be defined as a function", name) + self.error = 1 + continue + + if not tokname in self.tokens and tokname.find("ignore_") < 0: + self.log.error("Rule '%s' defined for an unspecified token %s",name,tokname) + self.error = 1 + continue + + try: + c = re.compile("(?P<%s>%s)" % (name,r),re.VERBOSE | self.reflags) + if (c.match("")): + self.log.error("Regular expression for rule '%s' matches empty string",name) + self.error = 1 + except re.error: + _etype, e, _etrace = sys.exc_info() + self.log.error("Invalid regular expression for rule '%s'. %s",name,e) + if '#' in r: + self.log.error("Make sure '#' in rule '%s' is escaped with '\\#'",name) + self.error = 1 + + if not self.funcsym[state] and not self.strsym[state]: + self.log.error("No rules defined for state '%s'",state) + self.error = 1 + + # Validate the error function + efunc = self.errorf.get(state,None) + if efunc: + f = efunc + line = func_code(f).co_firstlineno + file = func_code(f).co_filename + self.files[file] = 1 + + if isinstance(f, types.MethodType): + reqargs = 2 + else: + reqargs = 1 + nargs = func_code(f).co_argcount + if nargs > reqargs: + self.log.error("%s:%d: Rule '%s' has too many arguments",file,line,f.__name__) + self.error = 1 + + if nargs < reqargs: + self.log.error("%s:%d: Rule '%s' requires an argument", file,line,f.__name__) + self.error = 1 + + for f in self.files: + self.validate_file(f) + + + # ----------------------------------------------------------------------------- + # validate_file() + # + # This checks to see if there are duplicated t_rulename() functions or strings + # in the parser input file. This is done using a simple regular expression + # match on each line in the given file. + # ----------------------------------------------------------------------------- + + def validate_file(self,filename): + import os.path + base,ext = os.path.splitext(filename) + if ext != '.py': return # No idea what the file is. Return OK + + try: + f = open(filename) + lines = f.readlines() + f.close() + except IOError: + return # Couldn't find the file. Don't worry about it + + fre = re.compile(r'\s*def\s+(t_[a-zA-Z_0-9]*)\(') + sre = re.compile(r'\s*(t_[a-zA-Z_0-9]*)\s*=') + + counthash = { } + linen = 1 + for l in lines: + m = fre.match(l) + if not m: + m = sre.match(l) + if m: + name = m.group(1) + prev = counthash.get(name) + if not prev: + counthash[name] = linen + else: + self.log.error("%s:%d: Rule %s redefined. Previously defined on line %d",filename,linen,name,prev) + self.error = 1 + linen += 1 + +# ----------------------------------------------------------------------------- +# lex(module) +# +# Build all of the regular expression rules from definitions in the supplied module +# ----------------------------------------------------------------------------- +def lex(module=None,object=None,debug=0,optimize=0,lextab="lextab",reflags=0,nowarn=0,outputdir="", debuglog=None, errorlog=None): + global lexer + ldict = None + stateinfo = { 'INITIAL' : 'inclusive'} + lexobj = Lexer() + lexobj.lexoptimize = optimize + global token,input + + if errorlog is None: + errorlog = PlyLogger(sys.stderr) + + if debug: + if debuglog is None: + debuglog = PlyLogger(sys.stderr) + + # Get the module dictionary used for the lexer + if object: module = object + + if module: + _items = [(k,getattr(module,k)) for k in dir(module)] + ldict = dict(_items) + else: + ldict = get_caller_module_dict(2) + + # Collect parser information from the dictionary + linfo = LexerReflect(ldict,log=errorlog,reflags=reflags) + linfo.get_all() + if not optimize: + if linfo.validate_all(): + raise SyntaxError("Can't build lexer") + + if optimize and lextab: + try: + lexobj.readtab(lextab,ldict) + token = lexobj.token + input = lexobj.input + lexer = lexobj + return lexobj + + except ImportError: + pass + + # Dump some basic debugging information + if debug: + debuglog.info("lex: tokens = %r", linfo.tokens) + debuglog.info("lex: literals = %r", linfo.literals) + debuglog.info("lex: states = %r", linfo.stateinfo) + + # Build a dictionary of valid token names + lexobj.lextokens = { } + for n in linfo.tokens: + lexobj.lextokens[n] = 1 + + # Get literals specification + if isinstance(linfo.literals,(list,tuple)): + lexobj.lexliterals = type(linfo.literals[0])().join(linfo.literals) + else: + lexobj.lexliterals = linfo.literals + + # Get the stateinfo dictionary + stateinfo = linfo.stateinfo + + regexs = { } + # Build the master regular expressions + for state in stateinfo: + regex_list = [] + + # Add rules defined by functions first + for fname, f in linfo.funcsym[state]: + line = func_code(f).co_firstlineno + file = func_code(f).co_filename + regex_list.append("(?P<%s>%s)" % (fname,f.__doc__)) + if debug: + debuglog.info("lex: Adding rule %s -> '%s' (state '%s')",fname,f.__doc__, state) + + # Now add all of the simple rules + for name,r in linfo.strsym[state]: + regex_list.append("(?P<%s>%s)" % (name,r)) + if debug: + debuglog.info("lex: Adding rule %s -> '%s' (state '%s')",name,r, state) + + regexs[state] = regex_list + + # Build the master regular expressions + + if debug: + debuglog.info("lex: ==== MASTER REGEXS FOLLOW ====") + + for state in regexs: + lexre, re_text, re_names = _form_master_re(regexs[state],reflags,ldict,linfo.toknames) + lexobj.lexstatere[state] = lexre + lexobj.lexstateretext[state] = re_text + lexobj.lexstaterenames[state] = re_names + if debug: + for i in range(len(re_text)): + debuglog.info("lex: state '%s' : regex[%d] = '%s'",state, i, re_text[i]) + + # For inclusive states, we need to add the regular expressions from the INITIAL state + for state,stype in stateinfo.items(): + if state != "INITIAL" and stype == 'inclusive': + lexobj.lexstatere[state].extend(lexobj.lexstatere['INITIAL']) + lexobj.lexstateretext[state].extend(lexobj.lexstateretext['INITIAL']) + lexobj.lexstaterenames[state].extend(lexobj.lexstaterenames['INITIAL']) + + lexobj.lexstateinfo = stateinfo + lexobj.lexre = lexobj.lexstatere["INITIAL"] + lexobj.lexretext = lexobj.lexstateretext["INITIAL"] + lexobj.lexreflags = reflags + + # Set up ignore variables + lexobj.lexstateignore = linfo.ignore + lexobj.lexignore = lexobj.lexstateignore.get("INITIAL","") + + # Set up error functions + lexobj.lexstateerrorf = linfo.errorf + lexobj.lexerrorf = linfo.errorf.get("INITIAL",None) + if not lexobj.lexerrorf: + errorlog.warning("No t_error rule is defined") + + # Check state information for ignore and error rules + for s,stype in stateinfo.items(): + if stype == 'exclusive': + if not s in linfo.errorf: + errorlog.warning("No error rule is defined for exclusive state '%s'", s) + if not s in linfo.ignore and lexobj.lexignore: + errorlog.warning("No ignore rule is defined for exclusive state '%s'", s) + elif stype == 'inclusive': + if not s in linfo.errorf: + linfo.errorf[s] = linfo.errorf.get("INITIAL",None) + if not s in linfo.ignore: + linfo.ignore[s] = linfo.ignore.get("INITIAL","") + + # Create global versions of the token() and input() functions + token = lexobj.token + input = lexobj.input + lexer = lexobj + + # If in optimize mode, we write the lextab + if lextab and optimize: + lexobj.writetab(lextab,outputdir) + + return lexobj + +# ----------------------------------------------------------------------------- +# runmain() +# +# This runs the lexer as a main program +# ----------------------------------------------------------------------------- + +def runmain(lexer=None,data=None): + if not data: + try: + filename = sys.argv[1] + f = open(filename) + data = f.read() + f.close() + except IndexError: + sys.stdout.write("Reading from standard input (type EOF to end):\n") + data = sys.stdin.read() + + if lexer: + _input = lexer.input + else: + _input = input + _input(data) + if lexer: + _token = lexer.token + else: + _token = token + + while 1: + tok = _token() + if not tok: break + sys.stdout.write("(%s,%r,%d,%d)\n" % (tok.type, tok.value, tok.lineno,tok.lexpos)) + +# ----------------------------------------------------------------------------- +# @TOKEN(regex) +# +# This decorator function can be used to set the regex expression on a function +# when its docstring might need to be set in an alternative way +# ----------------------------------------------------------------------------- + +def TOKEN(r): + def set_doc(f): + if hasattr(r,"__call__"): + f.__doc__ = r.__doc__ + else: + f.__doc__ = r + return f + return set_doc + +# Alternative spelling of the TOKEN decorator +Token = TOKEN + diff --git a/bitbake/lib/ply/yacc.py b/bitbake/lib/ply/yacc.py new file mode 100644 index 0000000000..6168fd9a03 --- /dev/null +++ b/bitbake/lib/ply/yacc.py @@ -0,0 +1,3276 @@ +# ----------------------------------------------------------------------------- +# ply: yacc.py +# +# Copyright (C) 2001-2009, +# David M. Beazley (Dabeaz LLC) +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are +# met: +# +# * Redistributions of source code must retain the above copyright notice, +# this list of conditions and the following disclaimer. +# * Redistributions in binary form must reproduce the above copyright notice, +# this list of conditions and the following disclaimer in the documentation +# and/or other materials provided with the distribution. +# * Neither the name of the David Beazley or Dabeaz LLC may be used to +# endorse or promote products derived from this software without +# specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# ----------------------------------------------------------------------------- +# +# This implements an LR parser that is constructed from grammar rules defined +# as Python functions. The grammer is specified by supplying the BNF inside +# Python documentation strings. The inspiration for this technique was borrowed +# from John Aycock's Spark parsing system. PLY might be viewed as cross between +# Spark and the GNU bison utility. +# +# The current implementation is only somewhat object-oriented. The +# LR parser itself is defined in terms of an object (which allows multiple +# parsers to co-exist). However, most of the variables used during table +# construction are defined in terms of global variables. Users shouldn't +# notice unless they are trying to define multiple parsers at the same +# time using threads (in which case they should have their head examined). +# +# This implementation supports both SLR and LALR(1) parsing. LALR(1) +# support was originally implemented by Elias Ioup (ezioup@alumni.uchicago.edu), +# using the algorithm found in Aho, Sethi, and Ullman "Compilers: Principles, +# Techniques, and Tools" (The Dragon Book). LALR(1) has since been replaced +# by the more efficient DeRemer and Pennello algorithm. +# +# :::::::: WARNING ::::::: +# +# Construction of LR parsing tables is fairly complicated and expensive. +# To make this module run fast, a *LOT* of work has been put into +# optimization---often at the expensive of readability and what might +# consider to be good Python "coding style." Modify the code at your +# own risk! +# ---------------------------------------------------------------------------- + +__version__ = "3.3" +__tabversion__ = "3.2" # Table version + +#----------------------------------------------------------------------------- +# === User configurable parameters === +# +# Change these to modify the default behavior of yacc (if you wish) +#----------------------------------------------------------------------------- + +yaccdebug = 0 # Debugging mode. If set, yacc generates a + # a 'parser.out' file in the current directory + +debug_file = 'parser.out' # Default name of the debugging file +tab_module = 'parsetab' # Default name of the table module +default_lr = 'LALR' # Default LR table generation method + +error_count = 3 # Number of symbols that must be shifted to leave recovery mode + +yaccdevel = 0 # Set to True if developing yacc. This turns off optimized + # implementations of certain functions. + +resultlimit = 40 # Size limit of results when running in debug mode. + +pickle_protocol = 0 # Protocol to use when writing pickle files + +import re, types, sys, os.path + +# Compatibility function for python 2.6/3.0 +if sys.version_info[0] < 3: + def func_code(f): + return f.func_code +else: + def func_code(f): + return f.__code__ + +# Compatibility +try: + MAXINT = sys.maxint +except AttributeError: + MAXINT = sys.maxsize + +# Python 2.x/3.0 compatibility. +def load_ply_lex(): + if sys.version_info[0] < 3: + import lex + else: + import ply.lex as lex + return lex + +# This object is a stand-in for a logging object created by the +# logging module. PLY will use this by default to create things +# such as the parser.out file. If a user wants more detailed +# information, they can create their own logging object and pass +# it into PLY. + +class PlyLogger(object): + def __init__(self,f): + self.f = f + def debug(self,msg,*args,**kwargs): + self.f.write((msg % args) + "\n") + info = debug + + def warning(self,msg,*args,**kwargs): + self.f.write("WARNING: "+ (msg % args) + "\n") + + def error(self,msg,*args,**kwargs): + self.f.write("ERROR: " + (msg % args) + "\n") + + critical = debug + +# Null logger is used when no output is generated. Does nothing. +class NullLogger(object): + def __getattribute__(self,name): + return self + def __call__(self,*args,**kwargs): + return self + +# Exception raised for yacc-related errors +class YaccError(Exception): pass + +# Format the result message that the parser produces when running in debug mode. +def format_result(r): + repr_str = repr(r) + if '\n' in repr_str: repr_str = repr(repr_str) + if len(repr_str) > resultlimit: + repr_str = repr_str[:resultlimit]+" ..." + result = "<%s @ 0x%x> (%s)" % (type(r).__name__,id(r),repr_str) + return result + + +# Format stack entries when the parser is running in debug mode +def format_stack_entry(r): + repr_str = repr(r) + if '\n' in repr_str: repr_str = repr(repr_str) + if len(repr_str) < 16: + return repr_str + else: + return "<%s @ 0x%x>" % (type(r).__name__,id(r)) + +#----------------------------------------------------------------------------- +# === LR Parsing Engine === +# +# The following classes are used for the LR parser itself. These are not +# used during table construction and are independent of the actual LR +# table generation algorithm +#----------------------------------------------------------------------------- + +# This class is used to hold non-terminal grammar symbols during parsing. +# It normally has the following attributes set: +# .type = Grammar symbol type +# .value = Symbol value +# .lineno = Starting line number +# .endlineno = Ending line number (optional, set automatically) +# .lexpos = Starting lex position +# .endlexpos = Ending lex position (optional, set automatically) + +class YaccSymbol: + def __str__(self): return self.type + def __repr__(self): return str(self) + +# This class is a wrapper around the objects actually passed to each +# grammar rule. Index lookup and assignment actually assign the +# .value attribute of the underlying YaccSymbol object. +# The lineno() method returns the line number of a given +# item (or 0 if not defined). The linespan() method returns +# a tuple of (startline,endline) representing the range of lines +# for a symbol. The lexspan() method returns a tuple (lexpos,endlexpos) +# representing the range of positional information for a symbol. + +class YaccProduction: + def __init__(self,s,stack=None): + self.slice = s + self.stack = stack + self.lexer = None + self.parser= None + def __getitem__(self,n): + if n >= 0: return self.slice[n].value + else: return self.stack[n].value + + def __setitem__(self,n,v): + self.slice[n].value = v + + def __getslice__(self,i,j): + return [s.value for s in self.slice[i:j]] + + def __len__(self): + return len(self.slice) + + def lineno(self,n): + return getattr(self.slice[n],"lineno",0) + + def set_lineno(self,n,lineno): + self.slice[n].lineno = lineno + + def linespan(self,n): + startline = getattr(self.slice[n],"lineno",0) + endline = getattr(self.slice[n],"endlineno",startline) + return startline,endline + + def lexpos(self,n): + return getattr(self.slice[n],"lexpos",0) + + def lexspan(self,n): + startpos = getattr(self.slice[n],"lexpos",0) + endpos = getattr(self.slice[n],"endlexpos",startpos) + return startpos,endpos + + def error(self): + raise SyntaxError + + +# ----------------------------------------------------------------------------- +# == LRParser == +# +# The LR Parsing engine. +# ----------------------------------------------------------------------------- + +class LRParser: + def __init__(self,lrtab,errorf): + self.productions = lrtab.lr_productions + self.action = lrtab.lr_action + self.goto = lrtab.lr_goto + self.errorfunc = errorf + + def errok(self): + self.errorok = 1 + + def restart(self): + del self.statestack[:] + del self.symstack[:] + sym = YaccSymbol() + sym.type = '$end' + self.symstack.append(sym) + self.statestack.append(0) + + def parse(self,input=None,lexer=None,debug=0,tracking=0,tokenfunc=None): + if debug or yaccdevel: + if isinstance(debug,int): + debug = PlyLogger(sys.stderr) + return self.parsedebug(input,lexer,debug,tracking,tokenfunc) + elif tracking: + return self.parseopt(input,lexer,debug,tracking,tokenfunc) + else: + return self.parseopt_notrack(input,lexer,debug,tracking,tokenfunc) + + + # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! + # parsedebug(). + # + # This is the debugging enabled version of parse(). All changes made to the + # parsing engine should be made here. For the non-debugging version, + # copy this code to a method parseopt() and delete all of the sections + # enclosed in: + # + # #--! DEBUG + # statements + # #--! DEBUG + # + # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! + + def parsedebug(self,input=None,lexer=None,debug=None,tracking=0,tokenfunc=None): + lookahead = None # Current lookahead symbol + lookaheadstack = [ ] # Stack of lookahead symbols + actions = self.action # Local reference to action table (to avoid lookup on self.) + goto = self.goto # Local reference to goto table (to avoid lookup on self.) + prod = self.productions # Local reference to production list (to avoid lookup on self.) + pslice = YaccProduction(None) # Production object passed to grammar rules + errorcount = 0 # Used during error recovery + + # --! DEBUG + debug.info("PLY: PARSE DEBUG START") + # --! DEBUG + + # If no lexer was given, we will try to use the lex module + if not lexer: + lex = load_ply_lex() + lexer = lex.lexer + + # Set up the lexer and parser objects on pslice + pslice.lexer = lexer + pslice.parser = self + + # If input was supplied, pass to lexer + if input is not None: + lexer.input(input) + + if tokenfunc is None: + # Tokenize function + get_token = lexer.token + else: + get_token = tokenfunc + + # Set up the state and symbol stacks + + statestack = [ ] # Stack of parsing states + self.statestack = statestack + symstack = [ ] # Stack of grammar symbols + self.symstack = symstack + + pslice.stack = symstack # Put in the production + errtoken = None # Err token + + # The start state is assumed to be (0,$end) + + statestack.append(0) + sym = YaccSymbol() + sym.type = "$end" + symstack.append(sym) + state = 0 + while 1: + # Get the next symbol on the input. If a lookahead symbol + # is already set, we just use that. Otherwise, we'll pull + # the next token off of the lookaheadstack or from the lexer + + # --! DEBUG + debug.debug('') + debug.debug('State : %s', state) + # --! DEBUG + + if not lookahead: + if not lookaheadstack: + lookahead = get_token() # Get the next token + else: + lookahead = lookaheadstack.pop() + if not lookahead: + lookahead = YaccSymbol() + lookahead.type = "$end" + + # --! DEBUG + debug.debug('Stack : %s', + ("%s . %s" % (" ".join([xx.type for xx in symstack][1:]), str(lookahead))).lstrip()) + # --! DEBUG + + # Check the action table + ltype = lookahead.type + t = actions[state].get(ltype) + + if t is not None: + if t > 0: + # shift a symbol on the stack + statestack.append(t) + state = t + + # --! DEBUG + debug.debug("Action : Shift and goto state %s", t) + # --! DEBUG + + symstack.append(lookahead) + lookahead = None + + # Decrease error count on successful shift + if errorcount: errorcount -=1 + continue + + if t < 0: + # reduce a symbol on the stack, emit a production + p = prod[-t] + pname = p.name + plen = p.len + + # Get production function + sym = YaccSymbol() + sym.type = pname # Production name + sym.value = None + + # --! DEBUG + if plen: + debug.info("Action : Reduce rule [%s] with %s and goto state %d", p.str, "["+",".join([format_stack_entry(_v.value) for _v in symstack[-plen:]])+"]",-t) + else: + debug.info("Action : Reduce rule [%s] with %s and goto state %d", p.str, [],-t) + + # --! DEBUG + + if plen: + targ = symstack[-plen-1:] + targ[0] = sym + + # --! TRACKING + if tracking: + t1 = targ[1] + sym.lineno = t1.lineno + sym.lexpos = t1.lexpos + t1 = targ[-1] + sym.endlineno = getattr(t1,"endlineno",t1.lineno) + sym.endlexpos = getattr(t1,"endlexpos",t1.lexpos) + + # --! TRACKING + + # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! + # The code enclosed in this section is duplicated + # below as a performance optimization. Make sure + # changes get made in both locations. + + pslice.slice = targ + + try: + # Call the grammar rule with our special slice object + del symstack[-plen:] + del statestack[-plen:] + p.callable(pslice) + # --! DEBUG + debug.info("Result : %s", format_result(pslice[0])) + # --! DEBUG + symstack.append(sym) + state = goto[statestack[-1]][pname] + statestack.append(state) + except SyntaxError: + # If an error was set. Enter error recovery state + lookaheadstack.append(lookahead) + symstack.pop() + statestack.pop() + state = statestack[-1] + sym.type = 'error' + lookahead = sym + errorcount = error_count + self.errorok = 0 + continue + # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! + + else: + + # --! TRACKING + if tracking: + sym.lineno = lexer.lineno + sym.lexpos = lexer.lexpos + # --! TRACKING + + targ = [ sym ] + + # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! + # The code enclosed in this section is duplicated + # above as a performance optimization. Make sure + # changes get made in both locations. + + pslice.slice = targ + + try: + # Call the grammar rule with our special slice object + p.callable(pslice) + # --! DEBUG + debug.info("Result : %s", format_result(pslice[0])) + # --! DEBUG + symstack.append(sym) + state = goto[statestack[-1]][pname] + statestack.append(state) + except SyntaxError: + # If an error was set. Enter error recovery state + lookaheadstack.append(lookahead) + symstack.pop() + statestack.pop() + state = statestack[-1] + sym.type = 'error' + lookahead = sym + errorcount = error_count + self.errorok = 0 + continue + # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! + + if t == 0: + n = symstack[-1] + result = getattr(n,"value",None) + # --! DEBUG + debug.info("Done : Returning %s", format_result(result)) + debug.info("PLY: PARSE DEBUG END") + # --! DEBUG + return result + + if t == None: + + # --! DEBUG + debug.error('Error : %s', + ("%s . %s" % (" ".join([xx.type for xx in symstack][1:]), str(lookahead))).lstrip()) + # --! DEBUG + + # We have some kind of parsing error here. To handle + # this, we are going to push the current token onto + # the tokenstack and replace it with an 'error' token. + # If there are any synchronization rules, they may + # catch it. + # + # In addition to pushing the error token, we call call + # the user defined p_error() function if this is the + # first syntax error. This function is only called if + # errorcount == 0. + if errorcount == 0 or self.errorok: + errorcount = error_count + self.errorok = 0 + errtoken = lookahead + if errtoken.type == "$end": + errtoken = None # End of file! + if self.errorfunc: + global errok,token,restart + errok = self.errok # Set some special functions available in error recovery + token = get_token + restart = self.restart + if errtoken and not hasattr(errtoken,'lexer'): + errtoken.lexer = lexer + tok = self.errorfunc(errtoken) + del errok, token, restart # Delete special functions + + if self.errorok: + # User must have done some kind of panic + # mode recovery on their own. The + # returned token is the next lookahead + lookahead = tok + errtoken = None + continue + else: + if errtoken: + if hasattr(errtoken,"lineno"): lineno = lookahead.lineno + else: lineno = 0 + if lineno: + sys.stderr.write("yacc: Syntax error at line %d, token=%s\n" % (lineno, errtoken.type)) + else: + sys.stderr.write("yacc: Syntax error, token=%s" % errtoken.type) + else: + sys.stderr.write("yacc: Parse error in input. EOF\n") + return + + else: + errorcount = error_count + + # case 1: the statestack only has 1 entry on it. If we're in this state, the + # entire parse has been rolled back and we're completely hosed. The token is + # discarded and we just keep going. + + if len(statestack) <= 1 and lookahead.type != "$end": + lookahead = None + errtoken = None + state = 0 + # Nuke the pushback stack + del lookaheadstack[:] + continue + + # case 2: the statestack has a couple of entries on it, but we're + # at the end of the file. nuke the top entry and generate an error token + + # Start nuking entries on the stack + if lookahead.type == "$end": + # Whoa. We're really hosed here. Bail out + return + + if lookahead.type != 'error': + sym = symstack[-1] + if sym.type == 'error': + # Hmmm. Error is on top of stack, we'll just nuke input + # symbol and continue + lookahead = None + continue + t = YaccSymbol() + t.type = 'error' + if hasattr(lookahead,"lineno"): + t.lineno = lookahead.lineno + t.value = lookahead + lookaheadstack.append(lookahead) + lookahead = t + else: + symstack.pop() + statestack.pop() + state = statestack[-1] # Potential bug fix + + continue + + # Call an error function here + raise RuntimeError("yacc: internal parser error!!!\n") + + # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! + # parseopt(). + # + # Optimized version of parse() method. DO NOT EDIT THIS CODE DIRECTLY. + # Edit the debug version above, then copy any modifications to the method + # below while removing #--! DEBUG sections. + # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! + + + def parseopt(self,input=None,lexer=None,debug=0,tracking=0,tokenfunc=None): + lookahead = None # Current lookahead symbol + lookaheadstack = [ ] # Stack of lookahead symbols + actions = self.action # Local reference to action table (to avoid lookup on self.) + goto = self.goto # Local reference to goto table (to avoid lookup on self.) + prod = self.productions # Local reference to production list (to avoid lookup on self.) + pslice = YaccProduction(None) # Production object passed to grammar rules + errorcount = 0 # Used during error recovery + + # If no lexer was given, we will try to use the lex module + if not lexer: + lex = load_ply_lex() + lexer = lex.lexer + + # Set up the lexer and parser objects on pslice + pslice.lexer = lexer + pslice.parser = self + + # If input was supplied, pass to lexer + if input is not None: + lexer.input(input) + + if tokenfunc is None: + # Tokenize function + get_token = lexer.token + else: + get_token = tokenfunc + + # Set up the state and symbol stacks + + statestack = [ ] # Stack of parsing states + self.statestack = statestack + symstack = [ ] # Stack of grammar symbols + self.symstack = symstack + + pslice.stack = symstack # Put in the production + errtoken = None # Err token + + # The start state is assumed to be (0,$end) + + statestack.append(0) + sym = YaccSymbol() + sym.type = '$end' + symstack.append(sym) + state = 0 + while 1: + # Get the next symbol on the input. If a lookahead symbol + # is already set, we just use that. Otherwise, we'll pull + # the next token off of the lookaheadstack or from the lexer + + if not lookahead: + if not lookaheadstack: + lookahead = get_token() # Get the next token + else: + lookahead = lookaheadstack.pop() + if not lookahead: + lookahead = YaccSymbol() + lookahead.type = '$end' + + # Check the action table + ltype = lookahead.type + t = actions[state].get(ltype) + + if t is not None: + if t > 0: + # shift a symbol on the stack + statestack.append(t) + state = t + + symstack.append(lookahead) + lookahead = None + + # Decrease error count on successful shift + if errorcount: errorcount -=1 + continue + + if t < 0: + # reduce a symbol on the stack, emit a production + p = prod[-t] + pname = p.name + plen = p.len + + # Get production function + sym = YaccSymbol() + sym.type = pname # Production name + sym.value = None + + if plen: + targ = symstack[-plen-1:] + targ[0] = sym + + # --! TRACKING + if tracking: + t1 = targ[1] + sym.lineno = t1.lineno + sym.lexpos = t1.lexpos + t1 = targ[-1] + sym.endlineno = getattr(t1,"endlineno",t1.lineno) + sym.endlexpos = getattr(t1,"endlexpos",t1.lexpos) + + # --! TRACKING + + # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! + # The code enclosed in this section is duplicated + # below as a performance optimization. Make sure + # changes get made in both locations. + + pslice.slice = targ + + try: + # Call the grammar rule with our special slice object + del symstack[-plen:] + del statestack[-plen:] + p.callable(pslice) + symstack.append(sym) + state = goto[statestack[-1]][pname] + statestack.append(state) + except SyntaxError: + # If an error was set. Enter error recovery state + lookaheadstack.append(lookahead) + symstack.pop() + statestack.pop() + state = statestack[-1] + sym.type = 'error' + lookahead = sym + errorcount = error_count + self.errorok = 0 + continue + # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! + + else: + + # --! TRACKING + if tracking: + sym.lineno = lexer.lineno + sym.lexpos = lexer.lexpos + # --! TRACKING + + targ = [ sym ] + + # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! + # The code enclosed in this section is duplicated + # above as a performance optimization. Make sure + # changes get made in both locations. + + pslice.slice = targ + + try: + # Call the grammar rule with our special slice object + p.callable(pslice) + symstack.append(sym) + state = goto[statestack[-1]][pname] + statestack.append(state) + except SyntaxError: + # If an error was set. Enter error recovery state + lookaheadstack.append(lookahead) + symstack.pop() + statestack.pop() + state = statestack[-1] + sym.type = 'error' + lookahead = sym + errorcount = error_count + self.errorok = 0 + continue + # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! + + if t == 0: + n = symstack[-1] + return getattr(n,"value",None) + + if t == None: + + # We have some kind of parsing error here. To handle + # this, we are going to push the current token onto + # the tokenstack and replace it with an 'error' token. + # If there are any synchronization rules, they may + # catch it. + # + # In addition to pushing the error token, we call call + # the user defined p_error() function if this is the + # first syntax error. This function is only called if + # errorcount == 0. + if errorcount == 0 or self.errorok: + errorcount = error_count + self.errorok = 0 + errtoken = lookahead + if errtoken.type == '$end': + errtoken = None # End of file! + if self.errorfunc: + global errok,token,restart + errok = self.errok # Set some special functions available in error recovery + token = get_token + restart = self.restart + if errtoken and not hasattr(errtoken,'lexer'): + errtoken.lexer = lexer + tok = self.errorfunc(errtoken) + del errok, token, restart # Delete special functions + + if self.errorok: + # User must have done some kind of panic + # mode recovery on their own. The + # returned token is the next lookahead + lookahead = tok + errtoken = None + continue + else: + if errtoken: + if hasattr(errtoken,"lineno"): lineno = lookahead.lineno + else: lineno = 0 + if lineno: + sys.stderr.write("yacc: Syntax error at line %d, token=%s\n" % (lineno, errtoken.type)) + else: + sys.stderr.write("yacc: Syntax error, token=%s" % errtoken.type) + else: + sys.stderr.write("yacc: Parse error in input. EOF\n") + return + + else: + errorcount = error_count + + # case 1: the statestack only has 1 entry on it. If we're in this state, the + # entire parse has been rolled back and we're completely hosed. The token is + # discarded and we just keep going. + + if len(statestack) <= 1 and lookahead.type != '$end': + lookahead = None + errtoken = None + state = 0 + # Nuke the pushback stack + del lookaheadstack[:] + continue + + # case 2: the statestack has a couple of entries on it, but we're + # at the end of the file. nuke the top entry and generate an error token + + # Start nuking entries on the stack + if lookahead.type == '$end': + # Whoa. We're really hosed here. Bail out + return + + if lookahead.type != 'error': + sym = symstack[-1] + if sym.type == 'error': + # Hmmm. Error is on top of stack, we'll just nuke input + # symbol and continue + lookahead = None + continue + t = YaccSymbol() + t.type = 'error' + if hasattr(lookahead,"lineno"): + t.lineno = lookahead.lineno + t.value = lookahead + lookaheadstack.append(lookahead) + lookahead = t + else: + symstack.pop() + statestack.pop() + state = statestack[-1] # Potential bug fix + + continue + + # Call an error function here + raise RuntimeError("yacc: internal parser error!!!\n") + + # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! + # parseopt_notrack(). + # + # Optimized version of parseopt() with line number tracking removed. + # DO NOT EDIT THIS CODE DIRECTLY. Copy the optimized version and remove + # code in the #--! TRACKING sections + # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! + + def parseopt_notrack(self,input=None,lexer=None,debug=0,tracking=0,tokenfunc=None): + lookahead = None # Current lookahead symbol + lookaheadstack = [ ] # Stack of lookahead symbols + actions = self.action # Local reference to action table (to avoid lookup on self.) + goto = self.goto # Local reference to goto table (to avoid lookup on self.) + prod = self.productions # Local reference to production list (to avoid lookup on self.) + pslice = YaccProduction(None) # Production object passed to grammar rules + errorcount = 0 # Used during error recovery + + # If no lexer was given, we will try to use the lex module + if not lexer: + lex = load_ply_lex() + lexer = lex.lexer + + # Set up the lexer and parser objects on pslice + pslice.lexer = lexer + pslice.parser = self + + # If input was supplied, pass to lexer + if input is not None: + lexer.input(input) + + if tokenfunc is None: + # Tokenize function + get_token = lexer.token + else: + get_token = tokenfunc + + # Set up the state and symbol stacks + + statestack = [ ] # Stack of parsing states + self.statestack = statestack + symstack = [ ] # Stack of grammar symbols + self.symstack = symstack + + pslice.stack = symstack # Put in the production + errtoken = None # Err token + + # The start state is assumed to be (0,$end) + + statestack.append(0) + sym = YaccSymbol() + sym.type = '$end' + symstack.append(sym) + state = 0 + while 1: + # Get the next symbol on the input. If a lookahead symbol + # is already set, we just use that. Otherwise, we'll pull + # the next token off of the lookaheadstack or from the lexer + + if not lookahead: + if not lookaheadstack: + lookahead = get_token() # Get the next token + else: + lookahead = lookaheadstack.pop() + if not lookahead: + lookahead = YaccSymbol() + lookahead.type = '$end' + + # Check the action table + ltype = lookahead.type + t = actions[state].get(ltype) + + if t is not None: + if t > 0: + # shift a symbol on the stack + statestack.append(t) + state = t + + symstack.append(lookahead) + lookahead = None + + # Decrease error count on successful shift + if errorcount: errorcount -=1 + continue + + if t < 0: + # reduce a symbol on the stack, emit a production + p = prod[-t] + pname = p.name + plen = p.len + + # Get production function + sym = YaccSymbol() + sym.type = pname # Production name + sym.value = None + + if plen: + targ = symstack[-plen-1:] + targ[0] = sym + + # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! + # The code enclosed in this section is duplicated + # below as a performance optimization. Make sure + # changes get made in both locations. + + pslice.slice = targ + + try: + # Call the grammar rule with our special slice object + del symstack[-plen:] + del statestack[-plen:] + p.callable(pslice) + symstack.append(sym) + state = goto[statestack[-1]][pname] + statestack.append(state) + except SyntaxError: + # If an error was set. Enter error recovery state + lookaheadstack.append(lookahead) + symstack.pop() + statestack.pop() + state = statestack[-1] + sym.type = 'error' + lookahead = sym + errorcount = error_count + self.errorok = 0 + continue + # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! + + else: + + targ = [ sym ] + + # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! + # The code enclosed in this section is duplicated + # above as a performance optimization. Make sure + # changes get made in both locations. + + pslice.slice = targ + + try: + # Call the grammar rule with our special slice object + p.callable(pslice) + symstack.append(sym) + state = goto[statestack[-1]][pname] + statestack.append(state) + except SyntaxError: + # If an error was set. Enter error recovery state + lookaheadstack.append(lookahead) + symstack.pop() + statestack.pop() + state = statestack[-1] + sym.type = 'error' + lookahead = sym + errorcount = error_count + self.errorok = 0 + continue + # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! + + if t == 0: + n = symstack[-1] + return getattr(n,"value",None) + + if t == None: + + # We have some kind of parsing error here. To handle + # this, we are going to push the current token onto + # the tokenstack and replace it with an 'error' token. + # If there are any synchronization rules, they may + # catch it. + # + # In addition to pushing the error token, we call call + # the user defined p_error() function if this is the + # first syntax error. This function is only called if + # errorcount == 0. + if errorcount == 0 or self.errorok: + errorcount = error_count + self.errorok = 0 + errtoken = lookahead + if errtoken.type == '$end': + errtoken = None # End of file! + if self.errorfunc: + global errok,token,restart + errok = self.errok # Set some special functions available in error recovery + token = get_token + restart = self.restart + if errtoken and not hasattr(errtoken,'lexer'): + errtoken.lexer = lexer + tok = self.errorfunc(errtoken) + del errok, token, restart # Delete special functions + + if self.errorok: + # User must have done some kind of panic + # mode recovery on their own. The + # returned token is the next lookahead + lookahead = tok + errtoken = None + continue + else: + if errtoken: + if hasattr(errtoken,"lineno"): lineno = lookahead.lineno + else: lineno = 0 + if lineno: + sys.stderr.write("yacc: Syntax error at line %d, token=%s\n" % (lineno, errtoken.type)) + else: + sys.stderr.write("yacc: Syntax error, token=%s" % errtoken.type) + else: + sys.stderr.write("yacc: Parse error in input. EOF\n") + return + + else: + errorcount = error_count + + # case 1: the statestack only has 1 entry on it. If we're in this state, the + # entire parse has been rolled back and we're completely hosed. The token is + # discarded and we just keep going. + + if len(statestack) <= 1 and lookahead.type != '$end': + lookahead = None + errtoken = None + state = 0 + # Nuke the pushback stack + del lookaheadstack[:] + continue + + # case 2: the statestack has a couple of entries on it, but we're + # at the end of the file. nuke the top entry and generate an error token + + # Start nuking entries on the stack + if lookahead.type == '$end': + # Whoa. We're really hosed here. Bail out + return + + if lookahead.type != 'error': + sym = symstack[-1] + if sym.type == 'error': + # Hmmm. Error is on top of stack, we'll just nuke input + # symbol and continue + lookahead = None + continue + t = YaccSymbol() + t.type = 'error' + if hasattr(lookahead,"lineno"): + t.lineno = lookahead.lineno + t.value = lookahead + lookaheadstack.append(lookahead) + lookahead = t + else: + symstack.pop() + statestack.pop() + state = statestack[-1] # Potential bug fix + + continue + + # Call an error function here + raise RuntimeError("yacc: internal parser error!!!\n") + +# ----------------------------------------------------------------------------- +# === Grammar Representation === +# +# The following functions, classes, and variables are used to represent and +# manipulate the rules that make up a grammar. +# ----------------------------------------------------------------------------- + +import re + +# regex matching identifiers +_is_identifier = re.compile(r'^[a-zA-Z0-9_-]+$') + +# ----------------------------------------------------------------------------- +# class Production: +# +# This class stores the raw information about a single production or grammar rule. +# A grammar rule refers to a specification such as this: +# +# expr : expr PLUS term +# +# Here are the basic attributes defined on all productions +# +# name - Name of the production. For example 'expr' +# prod - A list of symbols on the right side ['expr','PLUS','term'] +# prec - Production precedence level +# number - Production number. +# func - Function that executes on reduce +# file - File where production function is defined +# lineno - Line number where production function is defined +# +# The following attributes are defined or optional. +# +# len - Length of the production (number of symbols on right hand side) +# usyms - Set of unique symbols found in the production +# ----------------------------------------------------------------------------- + +class Production(object): + reduced = 0 + def __init__(self,number,name,prod,precedence=('right',0),func=None,file='',line=0): + self.name = name + self.prod = tuple(prod) + self.number = number + self.func = func + self.callable = None + self.file = file + self.line = line + self.prec = precedence + + # Internal settings used during table construction + + self.len = len(self.prod) # Length of the production + + # Create a list of unique production symbols used in the production + self.usyms = [ ] + for s in self.prod: + if s not in self.usyms: + self.usyms.append(s) + + # List of all LR items for the production + self.lr_items = [] + self.lr_next = None + + # Create a string representation + if self.prod: + self.str = "%s -> %s" % (self.name," ".join(self.prod)) + else: + self.str = "%s -> <empty>" % self.name + + def __str__(self): + return self.str + + def __repr__(self): + return "Production("+str(self)+")" + + def __len__(self): + return len(self.prod) + + def __nonzero__(self): + return 1 + + def __getitem__(self,index): + return self.prod[index] + + # Return the nth lr_item from the production (or None if at the end) + def lr_item(self,n): + if n > len(self.prod): return None + p = LRItem(self,n) + + # Precompute the list of productions immediately following. Hack. Remove later + try: + p.lr_after = Prodnames[p.prod[n+1]] + except (IndexError,KeyError): + p.lr_after = [] + try: + p.lr_before = p.prod[n-1] + except IndexError: + p.lr_before = None + + return p + + # Bind the production function name to a callable + def bind(self,pdict): + if self.func: + self.callable = pdict[self.func] + +# This class serves as a minimal standin for Production objects when +# reading table data from files. It only contains information +# actually used by the LR parsing engine, plus some additional +# debugging information. +class MiniProduction(object): + def __init__(self,str,name,len,func,file,line): + self.name = name + self.len = len + self.func = func + self.callable = None + self.file = file + self.line = line + self.str = str + def __str__(self): + return self.str + def __repr__(self): + return "MiniProduction(%s)" % self.str + + # Bind the production function name to a callable + def bind(self,pdict): + if self.func: + self.callable = pdict[self.func] + + +# ----------------------------------------------------------------------------- +# class LRItem +# +# This class represents a specific stage of parsing a production rule. For +# example: +# +# expr : expr . PLUS term +# +# In the above, the "." represents the current location of the parse. Here +# basic attributes: +# +# name - Name of the production. For example 'expr' +# prod - A list of symbols on the right side ['expr','.', 'PLUS','term'] +# number - Production number. +# +# lr_next Next LR item. Example, if we are ' expr -> expr . PLUS term' +# then lr_next refers to 'expr -> expr PLUS . term' +# lr_index - LR item index (location of the ".") in the prod list. +# lookaheads - LALR lookahead symbols for this item +# len - Length of the production (number of symbols on right hand side) +# lr_after - List of all productions that immediately follow +# lr_before - Grammar symbol immediately before +# ----------------------------------------------------------------------------- + +class LRItem(object): + def __init__(self,p,n): + self.name = p.name + self.prod = list(p.prod) + self.number = p.number + self.lr_index = n + self.lookaheads = { } + self.prod.insert(n,".") + self.prod = tuple(self.prod) + self.len = len(self.prod) + self.usyms = p.usyms + + def __str__(self): + if self.prod: + s = "%s -> %s" % (self.name," ".join(self.prod)) + else: + s = "%s -> <empty>" % self.name + return s + + def __repr__(self): + return "LRItem("+str(self)+")" + +# ----------------------------------------------------------------------------- +# rightmost_terminal() +# +# Return the rightmost terminal from a list of symbols. Used in add_production() +# ----------------------------------------------------------------------------- +def rightmost_terminal(symbols, terminals): + i = len(symbols) - 1 + while i >= 0: + if symbols[i] in terminals: + return symbols[i] + i -= 1 + return None + +# ----------------------------------------------------------------------------- +# === GRAMMAR CLASS === +# +# The following class represents the contents of the specified grammar along +# with various computed properties such as first sets, follow sets, LR items, etc. +# This data is used for critical parts of the table generation process later. +# ----------------------------------------------------------------------------- + +class GrammarError(YaccError): pass + +class Grammar(object): + def __init__(self,terminals): + self.Productions = [None] # A list of all of the productions. The first + # entry is always reserved for the purpose of + # building an augmented grammar + + self.Prodnames = { } # A dictionary mapping the names of nonterminals to a list of all + # productions of that nonterminal. + + self.Prodmap = { } # A dictionary that is only used to detect duplicate + # productions. + + self.Terminals = { } # A dictionary mapping the names of terminal symbols to a + # list of the rules where they are used. + + for term in terminals: + self.Terminals[term] = [] + + self.Terminals['error'] = [] + + self.Nonterminals = { } # A dictionary mapping names of nonterminals to a list + # of rule numbers where they are used. + + self.First = { } # A dictionary of precomputed FIRST(x) symbols + + self.Follow = { } # A dictionary of precomputed FOLLOW(x) symbols + + self.Precedence = { } # Precedence rules for each terminal. Contains tuples of the + # form ('right',level) or ('nonassoc', level) or ('left',level) + + self.UsedPrecedence = { } # Precedence rules that were actually used by the grammer. + # This is only used to provide error checking and to generate + # a warning about unused precedence rules. + + self.Start = None # Starting symbol for the grammar + + + def __len__(self): + return len(self.Productions) + + def __getitem__(self,index): + return self.Productions[index] + + # ----------------------------------------------------------------------------- + # set_precedence() + # + # Sets the precedence for a given terminal. assoc is the associativity such as + # 'left','right', or 'nonassoc'. level is a numeric level. + # + # ----------------------------------------------------------------------------- + + def set_precedence(self,term,assoc,level): + assert self.Productions == [None],"Must call set_precedence() before add_production()" + if term in self.Precedence: + raise GrammarError("Precedence already specified for terminal '%s'" % term) + if assoc not in ['left','right','nonassoc']: + raise GrammarError("Associativity must be one of 'left','right', or 'nonassoc'") + self.Precedence[term] = (assoc,level) + + # ----------------------------------------------------------------------------- + # add_production() + # + # Given an action function, this function assembles a production rule and + # computes its precedence level. + # + # The production rule is supplied as a list of symbols. For example, + # a rule such as 'expr : expr PLUS term' has a production name of 'expr' and + # symbols ['expr','PLUS','term']. + # + # Precedence is determined by the precedence of the right-most non-terminal + # or the precedence of a terminal specified by %prec. + # + # A variety of error checks are performed to make sure production symbols + # are valid and that %prec is used correctly. + # ----------------------------------------------------------------------------- + + def add_production(self,prodname,syms,func=None,file='',line=0): + + if prodname in self.Terminals: + raise GrammarError("%s:%d: Illegal rule name '%s'. Already defined as a token" % (file,line,prodname)) + if prodname == 'error': + raise GrammarError("%s:%d: Illegal rule name '%s'. error is a reserved word" % (file,line,prodname)) + if not _is_identifier.match(prodname): + raise GrammarError("%s:%d: Illegal rule name '%s'" % (file,line,prodname)) + + # Look for literal tokens + for n,s in enumerate(syms): + if s[0] in "'\"": + try: + c = eval(s) + if (len(c) > 1): + raise GrammarError("%s:%d: Literal token %s in rule '%s' may only be a single character" % (file,line,s, prodname)) + if not c in self.Terminals: + self.Terminals[c] = [] + syms[n] = c + continue + except SyntaxError: + pass + if not _is_identifier.match(s) and s != '%prec': + raise GrammarError("%s:%d: Illegal name '%s' in rule '%s'" % (file,line,s, prodname)) + + # Determine the precedence level + if '%prec' in syms: + if syms[-1] == '%prec': + raise GrammarError("%s:%d: Syntax error. Nothing follows %%prec" % (file,line)) + if syms[-2] != '%prec': + raise GrammarError("%s:%d: Syntax error. %%prec can only appear at the end of a grammar rule" % (file,line)) + precname = syms[-1] + prodprec = self.Precedence.get(precname,None) + if not prodprec: + raise GrammarError("%s:%d: Nothing known about the precedence of '%s'" % (file,line,precname)) + else: + self.UsedPrecedence[precname] = 1 + del syms[-2:] # Drop %prec from the rule + else: + # If no %prec, precedence is determined by the rightmost terminal symbol + precname = rightmost_terminal(syms,self.Terminals) + prodprec = self.Precedence.get(precname,('right',0)) + + # See if the rule is already in the rulemap + map = "%s -> %s" % (prodname,syms) + if map in self.Prodmap: + m = self.Prodmap[map] + raise GrammarError("%s:%d: Duplicate rule %s. " % (file,line, m) + + "Previous definition at %s:%d" % (m.file, m.line)) + + # From this point on, everything is valid. Create a new Production instance + pnumber = len(self.Productions) + if not prodname in self.Nonterminals: + self.Nonterminals[prodname] = [ ] + + # Add the production number to Terminals and Nonterminals + for t in syms: + if t in self.Terminals: + self.Terminals[t].append(pnumber) + else: + if not t in self.Nonterminals: + self.Nonterminals[t] = [ ] + self.Nonterminals[t].append(pnumber) + + # Create a production and add it to the list of productions + p = Production(pnumber,prodname,syms,prodprec,func,file,line) + self.Productions.append(p) + self.Prodmap[map] = p + + # Add to the global productions list + try: + self.Prodnames[prodname].append(p) + except KeyError: + self.Prodnames[prodname] = [ p ] + return 0 + + # ----------------------------------------------------------------------------- + # set_start() + # + # Sets the starting symbol and creates the augmented grammar. Production + # rule 0 is S' -> start where start is the start symbol. + # ----------------------------------------------------------------------------- + + def set_start(self,start=None): + if not start: + start = self.Productions[1].name + if start not in self.Nonterminals: + raise GrammarError("start symbol %s undefined" % start) + self.Productions[0] = Production(0,"S'",[start]) + self.Nonterminals[start].append(0) + self.Start = start + + # ----------------------------------------------------------------------------- + # find_unreachable() + # + # Find all of the nonterminal symbols that can't be reached from the starting + # symbol. Returns a list of nonterminals that can't be reached. + # ----------------------------------------------------------------------------- + + def find_unreachable(self): + + # Mark all symbols that are reachable from a symbol s + def mark_reachable_from(s): + if reachable[s]: + # We've already reached symbol s. + return + reachable[s] = 1 + for p in self.Prodnames.get(s,[]): + for r in p.prod: + mark_reachable_from(r) + + reachable = { } + for s in list(self.Terminals) + list(self.Nonterminals): + reachable[s] = 0 + + mark_reachable_from( self.Productions[0].prod[0] ) + + return [s for s in list(self.Nonterminals) + if not reachable[s]] + + # ----------------------------------------------------------------------------- + # infinite_cycles() + # + # This function looks at the various parsing rules and tries to detect + # infinite recursion cycles (grammar rules where there is no possible way + # to derive a string of only terminals). + # ----------------------------------------------------------------------------- + + def infinite_cycles(self): + terminates = {} + + # Terminals: + for t in self.Terminals: + terminates[t] = 1 + + terminates['$end'] = 1 + + # Nonterminals: + + # Initialize to false: + for n in self.Nonterminals: + terminates[n] = 0 + + # Then propagate termination until no change: + while 1: + some_change = 0 + for (n,pl) in self.Prodnames.items(): + # Nonterminal n terminates iff any of its productions terminates. + for p in pl: + # Production p terminates iff all of its rhs symbols terminate. + for s in p.prod: + if not terminates[s]: + # The symbol s does not terminate, + # so production p does not terminate. + p_terminates = 0 + break + else: + # didn't break from the loop, + # so every symbol s terminates + # so production p terminates. + p_terminates = 1 + + if p_terminates: + # symbol n terminates! + if not terminates[n]: + terminates[n] = 1 + some_change = 1 + # Don't need to consider any more productions for this n. + break + + if not some_change: + break + + infinite = [] + for (s,term) in terminates.items(): + if not term: + if not s in self.Prodnames and not s in self.Terminals and s != 'error': + # s is used-but-not-defined, and we've already warned of that, + # so it would be overkill to say that it's also non-terminating. + pass + else: + infinite.append(s) + + return infinite + + + # ----------------------------------------------------------------------------- + # undefined_symbols() + # + # Find all symbols that were used the grammar, but not defined as tokens or + # grammar rules. Returns a list of tuples (sym, prod) where sym in the symbol + # and prod is the production where the symbol was used. + # ----------------------------------------------------------------------------- + def undefined_symbols(self): + result = [] + for p in self.Productions: + if not p: continue + + for s in p.prod: + if not s in self.Prodnames and not s in self.Terminals and s != 'error': + result.append((s,p)) + return result + + # ----------------------------------------------------------------------------- + # unused_terminals() + # + # Find all terminals that were defined, but not used by the grammar. Returns + # a list of all symbols. + # ----------------------------------------------------------------------------- + def unused_terminals(self): + unused_tok = [] + for s,v in self.Terminals.items(): + if s != 'error' and not v: + unused_tok.append(s) + + return unused_tok + + # ------------------------------------------------------------------------------ + # unused_rules() + # + # Find all grammar rules that were defined, but not used (maybe not reachable) + # Returns a list of productions. + # ------------------------------------------------------------------------------ + + def unused_rules(self): + unused_prod = [] + for s,v in self.Nonterminals.items(): + if not v: + p = self.Prodnames[s][0] + unused_prod.append(p) + return unused_prod + + # ----------------------------------------------------------------------------- + # unused_precedence() + # + # Returns a list of tuples (term,precedence) corresponding to precedence + # rules that were never used by the grammar. term is the name of the terminal + # on which precedence was applied and precedence is a string such as 'left' or + # 'right' corresponding to the type of precedence. + # ----------------------------------------------------------------------------- + + def unused_precedence(self): + unused = [] + for termname in self.Precedence: + if not (termname in self.Terminals or termname in self.UsedPrecedence): + unused.append((termname,self.Precedence[termname][0])) + + return unused + + # ------------------------------------------------------------------------- + # _first() + # + # Compute the value of FIRST1(beta) where beta is a tuple of symbols. + # + # During execution of compute_first1, the result may be incomplete. + # Afterward (e.g., when called from compute_follow()), it will be complete. + # ------------------------------------------------------------------------- + def _first(self,beta): + + # We are computing First(x1,x2,x3,...,xn) + result = [ ] + for x in beta: + x_produces_empty = 0 + + # Add all the non-<empty> symbols of First[x] to the result. + for f in self.First[x]: + if f == '<empty>': + x_produces_empty = 1 + else: + if f not in result: result.append(f) + + if x_produces_empty: + # We have to consider the next x in beta, + # i.e. stay in the loop. + pass + else: + # We don't have to consider any further symbols in beta. + break + else: + # There was no 'break' from the loop, + # so x_produces_empty was true for all x in beta, + # so beta produces empty as well. + result.append('<empty>') + + return result + + # ------------------------------------------------------------------------- + # compute_first() + # + # Compute the value of FIRST1(X) for all symbols + # ------------------------------------------------------------------------- + def compute_first(self): + if self.First: + return self.First + + # Terminals: + for t in self.Terminals: + self.First[t] = [t] + + self.First['$end'] = ['$end'] + + # Nonterminals: + + # Initialize to the empty set: + for n in self.Nonterminals: + self.First[n] = [] + + # Then propagate symbols until no change: + while 1: + some_change = 0 + for n in self.Nonterminals: + for p in self.Prodnames[n]: + for f in self._first(p.prod): + if f not in self.First[n]: + self.First[n].append( f ) + some_change = 1 + if not some_change: + break + + return self.First + + # --------------------------------------------------------------------- + # compute_follow() + # + # Computes all of the follow sets for every non-terminal symbol. The + # follow set is the set of all symbols that might follow a given + # non-terminal. See the Dragon book, 2nd Ed. p. 189. + # --------------------------------------------------------------------- + def compute_follow(self,start=None): + # If already computed, return the result + if self.Follow: + return self.Follow + + # If first sets not computed yet, do that first. + if not self.First: + self.compute_first() + + # Add '$end' to the follow list of the start symbol + for k in self.Nonterminals: + self.Follow[k] = [ ] + + if not start: + start = self.Productions[1].name + + self.Follow[start] = [ '$end' ] + + while 1: + didadd = 0 + for p in self.Productions[1:]: + # Here is the production set + for i in range(len(p.prod)): + B = p.prod[i] + if B in self.Nonterminals: + # Okay. We got a non-terminal in a production + fst = self._first(p.prod[i+1:]) + hasempty = 0 + for f in fst: + if f != '<empty>' and f not in self.Follow[B]: + self.Follow[B].append(f) + didadd = 1 + if f == '<empty>': + hasempty = 1 + if hasempty or i == (len(p.prod)-1): + # Add elements of follow(a) to follow(b) + for f in self.Follow[p.name]: + if f not in self.Follow[B]: + self.Follow[B].append(f) + didadd = 1 + if not didadd: break + return self.Follow + + + # ----------------------------------------------------------------------------- + # build_lritems() + # + # This function walks the list of productions and builds a complete set of the + # LR items. The LR items are stored in two ways: First, they are uniquely + # numbered and placed in the list _lritems. Second, a linked list of LR items + # is built for each production. For example: + # + # E -> E PLUS E + # + # Creates the list + # + # [E -> . E PLUS E, E -> E . PLUS E, E -> E PLUS . E, E -> E PLUS E . ] + # ----------------------------------------------------------------------------- + + def build_lritems(self): + for p in self.Productions: + lastlri = p + i = 0 + lr_items = [] + while 1: + if i > len(p): + lri = None + else: + lri = LRItem(p,i) + # Precompute the list of productions immediately following + try: + lri.lr_after = self.Prodnames[lri.prod[i+1]] + except (IndexError,KeyError): + lri.lr_after = [] + try: + lri.lr_before = lri.prod[i-1] + except IndexError: + lri.lr_before = None + + lastlri.lr_next = lri + if not lri: break + lr_items.append(lri) + lastlri = lri + i += 1 + p.lr_items = lr_items + +# ----------------------------------------------------------------------------- +# == Class LRTable == +# +# This basic class represents a basic table of LR parsing information. +# Methods for generating the tables are not defined here. They are defined +# in the derived class LRGeneratedTable. +# ----------------------------------------------------------------------------- + +class VersionError(YaccError): pass + +class LRTable(object): + def __init__(self): + self.lr_action = None + self.lr_goto = None + self.lr_productions = None + self.lr_method = None + + def read_table(self,module): + if isinstance(module,types.ModuleType): + parsetab = module + else: + if sys.version_info[0] < 3: + exec("import %s as parsetab" % module) + else: + env = { } + exec("import %s as parsetab" % module, env, env) + parsetab = env['parsetab'] + + if parsetab._tabversion != __tabversion__: + raise VersionError("yacc table file version is out of date") + + self.lr_action = parsetab._lr_action + self.lr_goto = parsetab._lr_goto + + self.lr_productions = [] + for p in parsetab._lr_productions: + self.lr_productions.append(MiniProduction(*p)) + + self.lr_method = parsetab._lr_method + return parsetab._lr_signature + + def read_pickle(self,filename): + try: + import cPickle as pickle + except ImportError: + import pickle + + in_f = open(filename,"rb") + + tabversion = pickle.load(in_f) + if tabversion != __tabversion__: + raise VersionError("yacc table file version is out of date") + self.lr_method = pickle.load(in_f) + signature = pickle.load(in_f) + self.lr_action = pickle.load(in_f) + self.lr_goto = pickle.load(in_f) + productions = pickle.load(in_f) + + self.lr_productions = [] + for p in productions: + self.lr_productions.append(MiniProduction(*p)) + + in_f.close() + return signature + + # Bind all production function names to callable objects in pdict + def bind_callables(self,pdict): + for p in self.lr_productions: + p.bind(pdict) + +# ----------------------------------------------------------------------------- +# === LR Generator === +# +# The following classes and functions are used to generate LR parsing tables on +# a grammar. +# ----------------------------------------------------------------------------- + +# ----------------------------------------------------------------------------- +# digraph() +# traverse() +# +# The following two functions are used to compute set valued functions +# of the form: +# +# F(x) = F'(x) U U{F(y) | x R y} +# +# This is used to compute the values of Read() sets as well as FOLLOW sets +# in LALR(1) generation. +# +# Inputs: X - An input set +# R - A relation +# FP - Set-valued function +# ------------------------------------------------------------------------------ + +def digraph(X,R,FP): + N = { } + for x in X: + N[x] = 0 + stack = [] + F = { } + for x in X: + if N[x] == 0: traverse(x,N,stack,F,X,R,FP) + return F + +def traverse(x,N,stack,F,X,R,FP): + stack.append(x) + d = len(stack) + N[x] = d + F[x] = FP(x) # F(X) <- F'(x) + + rel = R(x) # Get y's related to x + for y in rel: + if N[y] == 0: + traverse(y,N,stack,F,X,R,FP) + N[x] = min(N[x],N[y]) + for a in F.get(y,[]): + if a not in F[x]: F[x].append(a) + if N[x] == d: + N[stack[-1]] = MAXINT + F[stack[-1]] = F[x] + element = stack.pop() + while element != x: + N[stack[-1]] = MAXINT + F[stack[-1]] = F[x] + element = stack.pop() + +class LALRError(YaccError): pass + +# ----------------------------------------------------------------------------- +# == LRGeneratedTable == +# +# This class implements the LR table generation algorithm. There are no +# public methods except for write() +# ----------------------------------------------------------------------------- + +class LRGeneratedTable(LRTable): + def __init__(self,grammar,method='LALR',log=None): + if method not in ['SLR','LALR']: + raise LALRError("Unsupported method %s" % method) + + self.grammar = grammar + self.lr_method = method + + # Set up the logger + if not log: + log = NullLogger() + self.log = log + + # Internal attributes + self.lr_action = {} # Action table + self.lr_goto = {} # Goto table + self.lr_productions = grammar.Productions # Copy of grammar Production array + self.lr_goto_cache = {} # Cache of computed gotos + self.lr0_cidhash = {} # Cache of closures + + self._add_count = 0 # Internal counter used to detect cycles + + # Diagonistic information filled in by the table generator + self.sr_conflict = 0 + self.rr_conflict = 0 + self.conflicts = [] # List of conflicts + + self.sr_conflicts = [] + self.rr_conflicts = [] + + # Build the tables + self.grammar.build_lritems() + self.grammar.compute_first() + self.grammar.compute_follow() + self.lr_parse_table() + + # Compute the LR(0) closure operation on I, where I is a set of LR(0) items. + + def lr0_closure(self,I): + self._add_count += 1 + + # Add everything in I to J + J = I[:] + didadd = 1 + while didadd: + didadd = 0 + for j in J: + for x in j.lr_after: + if getattr(x,"lr0_added",0) == self._add_count: continue + # Add B --> .G to J + J.append(x.lr_next) + x.lr0_added = self._add_count + didadd = 1 + + return J + + # Compute the LR(0) goto function goto(I,X) where I is a set + # of LR(0) items and X is a grammar symbol. This function is written + # in a way that guarantees uniqueness of the generated goto sets + # (i.e. the same goto set will never be returned as two different Python + # objects). With uniqueness, we can later do fast set comparisons using + # id(obj) instead of element-wise comparison. + + def lr0_goto(self,I,x): + # First we look for a previously cached entry + g = self.lr_goto_cache.get((id(I),x),None) + if g: return g + + # Now we generate the goto set in a way that guarantees uniqueness + # of the result + + s = self.lr_goto_cache.get(x,None) + if not s: + s = { } + self.lr_goto_cache[x] = s + + gs = [ ] + for p in I: + n = p.lr_next + if n and n.lr_before == x: + s1 = s.get(id(n),None) + if not s1: + s1 = { } + s[id(n)] = s1 + gs.append(n) + s = s1 + g = s.get('$end',None) + if not g: + if gs: + g = self.lr0_closure(gs) + s['$end'] = g + else: + s['$end'] = gs + self.lr_goto_cache[(id(I),x)] = g + return g + + # Compute the LR(0) sets of item function + def lr0_items(self): + + C = [ self.lr0_closure([self.grammar.Productions[0].lr_next]) ] + i = 0 + for I in C: + self.lr0_cidhash[id(I)] = i + i += 1 + + # Loop over the items in C and each grammar symbols + i = 0 + while i < len(C): + I = C[i] + i += 1 + + # Collect all of the symbols that could possibly be in the goto(I,X) sets + asyms = { } + for ii in I: + for s in ii.usyms: + asyms[s] = None + + for x in asyms: + g = self.lr0_goto(I,x) + if not g: continue + if id(g) in self.lr0_cidhash: continue + self.lr0_cidhash[id(g)] = len(C) + C.append(g) + + return C + + # ----------------------------------------------------------------------------- + # ==== LALR(1) Parsing ==== + # + # LALR(1) parsing is almost exactly the same as SLR except that instead of + # relying upon Follow() sets when performing reductions, a more selective + # lookahead set that incorporates the state of the LR(0) machine is utilized. + # Thus, we mainly just have to focus on calculating the lookahead sets. + # + # The method used here is due to DeRemer and Pennelo (1982). + # + # DeRemer, F. L., and T. J. Pennelo: "Efficient Computation of LALR(1) + # Lookahead Sets", ACM Transactions on Programming Languages and Systems, + # Vol. 4, No. 4, Oct. 1982, pp. 615-649 + # + # Further details can also be found in: + # + # J. Tremblay and P. Sorenson, "The Theory and Practice of Compiler Writing", + # McGraw-Hill Book Company, (1985). + # + # ----------------------------------------------------------------------------- + + # ----------------------------------------------------------------------------- + # compute_nullable_nonterminals() + # + # Creates a dictionary containing all of the non-terminals that might produce + # an empty production. + # ----------------------------------------------------------------------------- + + def compute_nullable_nonterminals(self): + nullable = {} + num_nullable = 0 + while 1: + for p in self.grammar.Productions[1:]: + if p.len == 0: + nullable[p.name] = 1 + continue + for t in p.prod: + if not t in nullable: break + else: + nullable[p.name] = 1 + if len(nullable) == num_nullable: break + num_nullable = len(nullable) + return nullable + + # ----------------------------------------------------------------------------- + # find_nonterminal_trans(C) + # + # Given a set of LR(0) items, this functions finds all of the non-terminal + # transitions. These are transitions in which a dot appears immediately before + # a non-terminal. Returns a list of tuples of the form (state,N) where state + # is the state number and N is the nonterminal symbol. + # + # The input C is the set of LR(0) items. + # ----------------------------------------------------------------------------- + + def find_nonterminal_transitions(self,C): + trans = [] + for state in range(len(C)): + for p in C[state]: + if p.lr_index < p.len - 1: + t = (state,p.prod[p.lr_index+1]) + if t[1] in self.grammar.Nonterminals: + if t not in trans: trans.append(t) + state = state + 1 + return trans + + # ----------------------------------------------------------------------------- + # dr_relation() + # + # Computes the DR(p,A) relationships for non-terminal transitions. The input + # is a tuple (state,N) where state is a number and N is a nonterminal symbol. + # + # Returns a list of terminals. + # ----------------------------------------------------------------------------- + + def dr_relation(self,C,trans,nullable): + dr_set = { } + state,N = trans + terms = [] + + g = self.lr0_goto(C[state],N) + for p in g: + if p.lr_index < p.len - 1: + a = p.prod[p.lr_index+1] + if a in self.grammar.Terminals: + if a not in terms: terms.append(a) + + # This extra bit is to handle the start state + if state == 0 and N == self.grammar.Productions[0].prod[0]: + terms.append('$end') + + return terms + + # ----------------------------------------------------------------------------- + # reads_relation() + # + # Computes the READS() relation (p,A) READS (t,C). + # ----------------------------------------------------------------------------- + + def reads_relation(self,C, trans, empty): + # Look for empty transitions + rel = [] + state, N = trans + + g = self.lr0_goto(C[state],N) + j = self.lr0_cidhash.get(id(g),-1) + for p in g: + if p.lr_index < p.len - 1: + a = p.prod[p.lr_index + 1] + if a in empty: + rel.append((j,a)) + + return rel + + # ----------------------------------------------------------------------------- + # compute_lookback_includes() + # + # Determines the lookback and includes relations + # + # LOOKBACK: + # + # This relation is determined by running the LR(0) state machine forward. + # For example, starting with a production "N : . A B C", we run it forward + # to obtain "N : A B C ." We then build a relationship between this final + # state and the starting state. These relationships are stored in a dictionary + # lookdict. + # + # INCLUDES: + # + # Computes the INCLUDE() relation (p,A) INCLUDES (p',B). + # + # This relation is used to determine non-terminal transitions that occur + # inside of other non-terminal transition states. (p,A) INCLUDES (p', B) + # if the following holds: + # + # B -> LAT, where T -> epsilon and p' -L-> p + # + # L is essentially a prefix (which may be empty), T is a suffix that must be + # able to derive an empty string. State p' must lead to state p with the string L. + # + # ----------------------------------------------------------------------------- + + def compute_lookback_includes(self,C,trans,nullable): + + lookdict = {} # Dictionary of lookback relations + includedict = {} # Dictionary of include relations + + # Make a dictionary of non-terminal transitions + dtrans = {} + for t in trans: + dtrans[t] = 1 + + # Loop over all transitions and compute lookbacks and includes + for state,N in trans: + lookb = [] + includes = [] + for p in C[state]: + if p.name != N: continue + + # Okay, we have a name match. We now follow the production all the way + # through the state machine until we get the . on the right hand side + + lr_index = p.lr_index + j = state + while lr_index < p.len - 1: + lr_index = lr_index + 1 + t = p.prod[lr_index] + + # Check to see if this symbol and state are a non-terminal transition + if (j,t) in dtrans: + # Yes. Okay, there is some chance that this is an includes relation + # the only way to know for certain is whether the rest of the + # production derives empty + + li = lr_index + 1 + while li < p.len: + if p.prod[li] in self.grammar.Terminals: break # No forget it + if not p.prod[li] in nullable: break + li = li + 1 + else: + # Appears to be a relation between (j,t) and (state,N) + includes.append((j,t)) + + g = self.lr0_goto(C[j],t) # Go to next set + j = self.lr0_cidhash.get(id(g),-1) # Go to next state + + # When we get here, j is the final state, now we have to locate the production + for r in C[j]: + if r.name != p.name: continue + if r.len != p.len: continue + i = 0 + # This look is comparing a production ". A B C" with "A B C ." + while i < r.lr_index: + if r.prod[i] != p.prod[i+1]: break + i = i + 1 + else: + lookb.append((j,r)) + for i in includes: + if not i in includedict: includedict[i] = [] + includedict[i].append((state,N)) + lookdict[(state,N)] = lookb + + return lookdict,includedict + + # ----------------------------------------------------------------------------- + # compute_read_sets() + # + # Given a set of LR(0) items, this function computes the read sets. + # + # Inputs: C = Set of LR(0) items + # ntrans = Set of nonterminal transitions + # nullable = Set of empty transitions + # + # Returns a set containing the read sets + # ----------------------------------------------------------------------------- + + def compute_read_sets(self,C, ntrans, nullable): + FP = lambda x: self.dr_relation(C,x,nullable) + R = lambda x: self.reads_relation(C,x,nullable) + F = digraph(ntrans,R,FP) + return F + + # ----------------------------------------------------------------------------- + # compute_follow_sets() + # + # Given a set of LR(0) items, a set of non-terminal transitions, a readset, + # and an include set, this function computes the follow sets + # + # Follow(p,A) = Read(p,A) U U {Follow(p',B) | (p,A) INCLUDES (p',B)} + # + # Inputs: + # ntrans = Set of nonterminal transitions + # readsets = Readset (previously computed) + # inclsets = Include sets (previously computed) + # + # Returns a set containing the follow sets + # ----------------------------------------------------------------------------- + + def compute_follow_sets(self,ntrans,readsets,inclsets): + FP = lambda x: readsets[x] + R = lambda x: inclsets.get(x,[]) + F = digraph(ntrans,R,FP) + return F + + # ----------------------------------------------------------------------------- + # add_lookaheads() + # + # Attaches the lookahead symbols to grammar rules. + # + # Inputs: lookbacks - Set of lookback relations + # followset - Computed follow set + # + # This function directly attaches the lookaheads to productions contained + # in the lookbacks set + # ----------------------------------------------------------------------------- + + def add_lookaheads(self,lookbacks,followset): + for trans,lb in lookbacks.items(): + # Loop over productions in lookback + for state,p in lb: + if not state in p.lookaheads: + p.lookaheads[state] = [] + f = followset.get(trans,[]) + for a in f: + if a not in p.lookaheads[state]: p.lookaheads[state].append(a) + + # ----------------------------------------------------------------------------- + # add_lalr_lookaheads() + # + # This function does all of the work of adding lookahead information for use + # with LALR parsing + # ----------------------------------------------------------------------------- + + def add_lalr_lookaheads(self,C): + # Determine all of the nullable nonterminals + nullable = self.compute_nullable_nonterminals() + + # Find all non-terminal transitions + trans = self.find_nonterminal_transitions(C) + + # Compute read sets + readsets = self.compute_read_sets(C,trans,nullable) + + # Compute lookback/includes relations + lookd, included = self.compute_lookback_includes(C,trans,nullable) + + # Compute LALR FOLLOW sets + followsets = self.compute_follow_sets(trans,readsets,included) + + # Add all of the lookaheads + self.add_lookaheads(lookd,followsets) + + # ----------------------------------------------------------------------------- + # lr_parse_table() + # + # This function constructs the parse tables for SLR or LALR + # ----------------------------------------------------------------------------- + def lr_parse_table(self): + Productions = self.grammar.Productions + Precedence = self.grammar.Precedence + goto = self.lr_goto # Goto array + action = self.lr_action # Action array + log = self.log # Logger for output + + actionp = { } # Action production array (temporary) + + log.info("Parsing method: %s", self.lr_method) + + # Step 1: Construct C = { I0, I1, ... IN}, collection of LR(0) items + # This determines the number of states + + C = self.lr0_items() + + if self.lr_method == 'LALR': + self.add_lalr_lookaheads(C) + + # Build the parser table, state by state + st = 0 + for I in C: + # Loop over each production in I + actlist = [ ] # List of actions + st_action = { } + st_actionp = { } + st_goto = { } + log.info("") + log.info("state %d", st) + log.info("") + for p in I: + log.info(" (%d) %s", p.number, str(p)) + log.info("") + + for p in I: + if p.len == p.lr_index + 1: + if p.name == "S'": + # Start symbol. Accept! + st_action["$end"] = 0 + st_actionp["$end"] = p + else: + # We are at the end of a production. Reduce! + if self.lr_method == 'LALR': + laheads = p.lookaheads[st] + else: + laheads = self.grammar.Follow[p.name] + for a in laheads: + actlist.append((a,p,"reduce using rule %d (%s)" % (p.number,p))) + r = st_action.get(a,None) + if r is not None: + # Whoa. Have a shift/reduce or reduce/reduce conflict + if r > 0: + # Need to decide on shift or reduce here + # By default we favor shifting. Need to add + # some precedence rules here. + sprec,slevel = Productions[st_actionp[a].number].prec + rprec,rlevel = Precedence.get(a,('right',0)) + if (slevel < rlevel) or ((slevel == rlevel) and (rprec == 'left')): + # We really need to reduce here. + st_action[a] = -p.number + st_actionp[a] = p + if not slevel and not rlevel: + log.info(" ! shift/reduce conflict for %s resolved as reduce",a) + self.sr_conflicts.append((st,a,'reduce')) + Productions[p.number].reduced += 1 + elif (slevel == rlevel) and (rprec == 'nonassoc'): + st_action[a] = None + else: + # Hmmm. Guess we'll keep the shift + if not rlevel: + log.info(" ! shift/reduce conflict for %s resolved as shift",a) + self.sr_conflicts.append((st,a,'shift')) + elif r < 0: + # Reduce/reduce conflict. In this case, we favor the rule + # that was defined first in the grammar file + oldp = Productions[-r] + pp = Productions[p.number] + if oldp.line > pp.line: + st_action[a] = -p.number + st_actionp[a] = p + chosenp,rejectp = pp,oldp + Productions[p.number].reduced += 1 + Productions[oldp.number].reduced -= 1 + else: + chosenp,rejectp = oldp,pp + self.rr_conflicts.append((st,chosenp,rejectp)) + log.info(" ! reduce/reduce conflict for %s resolved using rule %d (%s)", a,st_actionp[a].number, st_actionp[a]) + else: + raise LALRError("Unknown conflict in state %d" % st) + else: + st_action[a] = -p.number + st_actionp[a] = p + Productions[p.number].reduced += 1 + else: + i = p.lr_index + a = p.prod[i+1] # Get symbol right after the "." + if a in self.grammar.Terminals: + g = self.lr0_goto(I,a) + j = self.lr0_cidhash.get(id(g),-1) + if j >= 0: + # We are in a shift state + actlist.append((a,p,"shift and go to state %d" % j)) + r = st_action.get(a,None) + if r is not None: + # Whoa have a shift/reduce or shift/shift conflict + if r > 0: + if r != j: + raise LALRError("Shift/shift conflict in state %d" % st) + elif r < 0: + # Do a precedence check. + # - if precedence of reduce rule is higher, we reduce. + # - if precedence of reduce is same and left assoc, we reduce. + # - otherwise we shift + rprec,rlevel = Productions[st_actionp[a].number].prec + sprec,slevel = Precedence.get(a,('right',0)) + if (slevel > rlevel) or ((slevel == rlevel) and (rprec == 'right')): + # We decide to shift here... highest precedence to shift + Productions[st_actionp[a].number].reduced -= 1 + st_action[a] = j + st_actionp[a] = p + if not rlevel: + log.info(" ! shift/reduce conflict for %s resolved as shift",a) + self.sr_conflicts.append((st,a,'shift')) + elif (slevel == rlevel) and (rprec == 'nonassoc'): + st_action[a] = None + else: + # Hmmm. Guess we'll keep the reduce + if not slevel and not rlevel: + log.info(" ! shift/reduce conflict for %s resolved as reduce",a) + self.sr_conflicts.append((st,a,'reduce')) + + else: + raise LALRError("Unknown conflict in state %d" % st) + else: + st_action[a] = j + st_actionp[a] = p + + # Print the actions associated with each terminal + _actprint = { } + for a,p,m in actlist: + if a in st_action: + if p is st_actionp[a]: + log.info(" %-15s %s",a,m) + _actprint[(a,m)] = 1 + log.info("") + # Print the actions that were not used. (debugging) + not_used = 0 + for a,p,m in actlist: + if a in st_action: + if p is not st_actionp[a]: + if not (a,m) in _actprint: + log.debug(" ! %-15s [ %s ]",a,m) + not_used = 1 + _actprint[(a,m)] = 1 + if not_used: + log.debug("") + + # Construct the goto table for this state + + nkeys = { } + for ii in I: + for s in ii.usyms: + if s in self.grammar.Nonterminals: + nkeys[s] = None + for n in nkeys: + g = self.lr0_goto(I,n) + j = self.lr0_cidhash.get(id(g),-1) + if j >= 0: + st_goto[n] = j + log.info(" %-30s shift and go to state %d",n,j) + + action[st] = st_action + actionp[st] = st_actionp + goto[st] = st_goto + st += 1 + + + # ----------------------------------------------------------------------------- + # write() + # + # This function writes the LR parsing tables to a file + # ----------------------------------------------------------------------------- + + def write_table(self,modulename,outputdir='',signature=""): + basemodulename = modulename.split(".")[-1] + filename = os.path.join(outputdir,basemodulename) + ".py" + try: + f = open(filename,"w") + + f.write(""" +# %s +# This file is automatically generated. Do not edit. +_tabversion = %r + +_lr_method = %r + +_lr_signature = %r + """ % (filename, __tabversion__, self.lr_method, signature)) + + # Change smaller to 0 to go back to original tables + smaller = 1 + + # Factor out names to try and make smaller + if smaller: + items = { } + + for s,nd in self.lr_action.items(): + for name,v in nd.items(): + i = items.get(name) + if not i: + i = ([],[]) + items[name] = i + i[0].append(s) + i[1].append(v) + + f.write("\n_lr_action_items = {") + for k,v in items.items(): + f.write("%r:([" % k) + for i in v[0]: + f.write("%r," % i) + f.write("],[") + for i in v[1]: + f.write("%r," % i) + + f.write("]),") + f.write("}\n") + + f.write(""" +_lr_action = { } +for _k, _v in _lr_action_items.items(): + for _x,_y in zip(_v[0],_v[1]): + if not _x in _lr_action: _lr_action[_x] = { } + _lr_action[_x][_k] = _y +del _lr_action_items +""") + + else: + f.write("\n_lr_action = { "); + for k,v in self.lr_action.items(): + f.write("(%r,%r):%r," % (k[0],k[1],v)) + f.write("}\n"); + + if smaller: + # Factor out names to try and make smaller + items = { } + + for s,nd in self.lr_goto.items(): + for name,v in nd.items(): + i = items.get(name) + if not i: + i = ([],[]) + items[name] = i + i[0].append(s) + i[1].append(v) + + f.write("\n_lr_goto_items = {") + for k,v in items.items(): + f.write("%r:([" % k) + for i in v[0]: + f.write("%r," % i) + f.write("],[") + for i in v[1]: + f.write("%r," % i) + + f.write("]),") + f.write("}\n") + + f.write(""" +_lr_goto = { } +for _k, _v in _lr_goto_items.items(): + for _x,_y in zip(_v[0],_v[1]): + if not _x in _lr_goto: _lr_goto[_x] = { } + _lr_goto[_x][_k] = _y +del _lr_goto_items +""") + else: + f.write("\n_lr_goto = { "); + for k,v in self.lr_goto.items(): + f.write("(%r,%r):%r," % (k[0],k[1],v)) + f.write("}\n"); + + # Write production table + f.write("_lr_productions = [\n") + for p in self.lr_productions: + if p.func: + f.write(" (%r,%r,%d,%r,%r,%d),\n" % (p.str,p.name, p.len, p.func,p.file,p.line)) + else: + f.write(" (%r,%r,%d,None,None,None),\n" % (str(p),p.name, p.len)) + f.write("]\n") + f.close() + + except IOError: + e = sys.exc_info()[1] + sys.stderr.write("Unable to create '%s'\n" % filename) + sys.stderr.write(str(e)+"\n") + return + + + # ----------------------------------------------------------------------------- + # pickle_table() + # + # This function pickles the LR parsing tables to a supplied file object + # ----------------------------------------------------------------------------- + + def pickle_table(self,filename,signature=""): + try: + import cPickle as pickle + except ImportError: + import pickle + outf = open(filename,"wb") + pickle.dump(__tabversion__,outf,pickle_protocol) + pickle.dump(self.lr_method,outf,pickle_protocol) + pickle.dump(signature,outf,pickle_protocol) + pickle.dump(self.lr_action,outf,pickle_protocol) + pickle.dump(self.lr_goto,outf,pickle_protocol) + + outp = [] + for p in self.lr_productions: + if p.func: + outp.append((p.str,p.name, p.len, p.func,p.file,p.line)) + else: + outp.append((str(p),p.name,p.len,None,None,None)) + pickle.dump(outp,outf,pickle_protocol) + outf.close() + +# ----------------------------------------------------------------------------- +# === INTROSPECTION === +# +# The following functions and classes are used to implement the PLY +# introspection features followed by the yacc() function itself. +# ----------------------------------------------------------------------------- + +# ----------------------------------------------------------------------------- +# get_caller_module_dict() +# +# This function returns a dictionary containing all of the symbols defined within +# a caller further down the call stack. This is used to get the environment +# associated with the yacc() call if none was provided. +# ----------------------------------------------------------------------------- + +def get_caller_module_dict(levels): + try: + raise RuntimeError + except RuntimeError: + e,b,t = sys.exc_info() + f = t.tb_frame + while levels > 0: + f = f.f_back + levels -= 1 + ldict = f.f_globals.copy() + if f.f_globals != f.f_locals: + ldict.update(f.f_locals) + + return ldict + +# ----------------------------------------------------------------------------- +# parse_grammar() +# +# This takes a raw grammar rule string and parses it into production data +# ----------------------------------------------------------------------------- +def parse_grammar(doc,file,line): + grammar = [] + # Split the doc string into lines + pstrings = doc.splitlines() + lastp = None + dline = line + for ps in pstrings: + dline += 1 + p = ps.split() + if not p: continue + try: + if p[0] == '|': + # This is a continuation of a previous rule + if not lastp: + raise SyntaxError("%s:%d: Misplaced '|'" % (file,dline)) + prodname = lastp + syms = p[1:] + else: + prodname = p[0] + lastp = prodname + syms = p[2:] + assign = p[1] + if assign != ':' and assign != '::=': + raise SyntaxError("%s:%d: Syntax error. Expected ':'" % (file,dline)) + + grammar.append((file,dline,prodname,syms)) + except SyntaxError: + raise + except Exception: + raise SyntaxError("%s:%d: Syntax error in rule '%s'" % (file,dline,ps.strip())) + + return grammar + +# ----------------------------------------------------------------------------- +# ParserReflect() +# +# This class represents information extracted for building a parser including +# start symbol, error function, tokens, precedence list, action functions, +# etc. +# ----------------------------------------------------------------------------- +class ParserReflect(object): + def __init__(self,pdict,log=None): + self.pdict = pdict + self.start = None + self.error_func = None + self.tokens = None + self.files = {} + self.grammar = [] + self.error = 0 + + if log is None: + self.log = PlyLogger(sys.stderr) + else: + self.log = log + + # Get all of the basic information + def get_all(self): + self.get_start() + self.get_error_func() + self.get_tokens() + self.get_precedence() + self.get_pfunctions() + + # Validate all of the information + def validate_all(self): + self.validate_start() + self.validate_error_func() + self.validate_tokens() + self.validate_precedence() + self.validate_pfunctions() + self.validate_files() + return self.error + + # Compute a signature over the grammar + def signature(self): + try: + from hashlib import md5 + except ImportError: + from md5 import md5 + try: + sig = md5() + if self.start: + sig.update(self.start.encode('latin-1')) + if self.prec: + sig.update("".join(["".join(p) for p in self.prec]).encode('latin-1')) + if self.tokens: + sig.update(" ".join(self.tokens).encode('latin-1')) + for f in self.pfuncs: + if f[3]: + sig.update(f[3].encode('latin-1')) + except (TypeError,ValueError): + pass + return sig.digest() + + # ----------------------------------------------------------------------------- + # validate_file() + # + # This method checks to see if there are duplicated p_rulename() functions + # in the parser module file. Without this function, it is really easy for + # users to make mistakes by cutting and pasting code fragments (and it's a real + # bugger to try and figure out why the resulting parser doesn't work). Therefore, + # we just do a little regular expression pattern matching of def statements + # to try and detect duplicates. + # ----------------------------------------------------------------------------- + + def validate_files(self): + # Match def p_funcname( + fre = re.compile(r'\s*def\s+(p_[a-zA-Z_0-9]*)\(') + + for filename in self.files.keys(): + base,ext = os.path.splitext(filename) + if ext != '.py': return 1 # No idea. Assume it's okay. + + try: + f = open(filename) + lines = f.readlines() + f.close() + except IOError: + continue + + counthash = { } + for linen,l in enumerate(lines): + linen += 1 + m = fre.match(l) + if m: + name = m.group(1) + prev = counthash.get(name) + if not prev: + counthash[name] = linen + else: + self.log.warning("%s:%d: Function %s redefined. Previously defined on line %d", filename,linen,name,prev) + + # Get the start symbol + def get_start(self): + self.start = self.pdict.get('start') + + # Validate the start symbol + def validate_start(self): + if self.start is not None: + if not isinstance(self.start,str): + self.log.error("'start' must be a string") + + # Look for error handler + def get_error_func(self): + self.error_func = self.pdict.get('p_error') + + # Validate the error function + def validate_error_func(self): + if self.error_func: + if isinstance(self.error_func,types.FunctionType): + ismethod = 0 + elif isinstance(self.error_func, types.MethodType): + ismethod = 1 + else: + self.log.error("'p_error' defined, but is not a function or method") + self.error = 1 + return + + eline = func_code(self.error_func).co_firstlineno + efile = func_code(self.error_func).co_filename + self.files[efile] = 1 + + if (func_code(self.error_func).co_argcount != 1+ismethod): + self.log.error("%s:%d: p_error() requires 1 argument",efile,eline) + self.error = 1 + + # Get the tokens map + def get_tokens(self): + tokens = self.pdict.get("tokens",None) + if not tokens: + self.log.error("No token list is defined") + self.error = 1 + return + + if not isinstance(tokens,(list, tuple)): + self.log.error("tokens must be a list or tuple") + self.error = 1 + return + + if not tokens: + self.log.error("tokens is empty") + self.error = 1 + return + + self.tokens = tokens + + # Validate the tokens + def validate_tokens(self): + # Validate the tokens. + if 'error' in self.tokens: + self.log.error("Illegal token name 'error'. Is a reserved word") + self.error = 1 + return + + terminals = {} + for n in self.tokens: + if n in terminals: + self.log.warning("Token '%s' multiply defined", n) + terminals[n] = 1 + + # Get the precedence map (if any) + def get_precedence(self): + self.prec = self.pdict.get("precedence",None) + + # Validate and parse the precedence map + def validate_precedence(self): + preclist = [] + if self.prec: + if not isinstance(self.prec,(list,tuple)): + self.log.error("precedence must be a list or tuple") + self.error = 1 + return + for level,p in enumerate(self.prec): + if not isinstance(p,(list,tuple)): + self.log.error("Bad precedence table") + self.error = 1 + return + + if len(p) < 2: + self.log.error("Malformed precedence entry %s. Must be (assoc, term, ..., term)",p) + self.error = 1 + return + assoc = p[0] + if not isinstance(assoc,str): + self.log.error("precedence associativity must be a string") + self.error = 1 + return + for term in p[1:]: + if not isinstance(term,str): + self.log.error("precedence items must be strings") + self.error = 1 + return + preclist.append((term,assoc,level+1)) + self.preclist = preclist + + # Get all p_functions from the grammar + def get_pfunctions(self): + p_functions = [] + for name, item in self.pdict.items(): + if name[:2] != 'p_': continue + if name == 'p_error': continue + if isinstance(item,(types.FunctionType,types.MethodType)): + line = func_code(item).co_firstlineno + file = func_code(item).co_filename + p_functions.append((line,file,name,item.__doc__)) + + # Sort all of the actions by line number + p_functions.sort() + self.pfuncs = p_functions + + + # Validate all of the p_functions + def validate_pfunctions(self): + grammar = [] + # Check for non-empty symbols + if len(self.pfuncs) == 0: + self.log.error("no rules of the form p_rulename are defined") + self.error = 1 + return + + for line, file, name, doc in self.pfuncs: + func = self.pdict[name] + if isinstance(func, types.MethodType): + reqargs = 2 + else: + reqargs = 1 + if func_code(func).co_argcount > reqargs: + self.log.error("%s:%d: Rule '%s' has too many arguments",file,line,func.__name__) + self.error = 1 + elif func_code(func).co_argcount < reqargs: + self.log.error("%s:%d: Rule '%s' requires an argument",file,line,func.__name__) + self.error = 1 + elif not func.__doc__: + self.log.warning("%s:%d: No documentation string specified in function '%s' (ignored)",file,line,func.__name__) + else: + try: + parsed_g = parse_grammar(doc,file,line) + for g in parsed_g: + grammar.append((name, g)) + except SyntaxError: + e = sys.exc_info()[1] + self.log.error(str(e)) + self.error = 1 + + # Looks like a valid grammar rule + # Mark the file in which defined. + self.files[file] = 1 + + # Secondary validation step that looks for p_ definitions that are not functions + # or functions that look like they might be grammar rules. + + for n,v in self.pdict.items(): + if n[0:2] == 'p_' and isinstance(v, (types.FunctionType, types.MethodType)): continue + if n[0:2] == 't_': continue + if n[0:2] == 'p_' and n != 'p_error': + self.log.warning("'%s' not defined as a function", n) + if ((isinstance(v,types.FunctionType) and func_code(v).co_argcount == 1) or + (isinstance(v,types.MethodType) and func_code(v).co_argcount == 2)): + try: + doc = v.__doc__.split(" ") + if doc[1] == ':': + self.log.warning("%s:%d: Possible grammar rule '%s' defined without p_ prefix", + func_code(v).co_filename, func_code(v).co_firstlineno,n) + except Exception: + pass + + self.grammar = grammar + +# ----------------------------------------------------------------------------- +# yacc(module) +# +# Build a parser +# ----------------------------------------------------------------------------- + +def yacc(method='LALR', debug=yaccdebug, module=None, tabmodule=tab_module, start=None, + check_recursion=1, optimize=0, write_tables=1, debugfile=debug_file,outputdir='', + debuglog=None, errorlog = None, picklefile=None): + + global parse # Reference to the parsing method of the last built parser + + # If pickling is enabled, table files are not created + + if picklefile: + write_tables = 0 + + if errorlog is None: + errorlog = PlyLogger(sys.stderr) + + # Get the module dictionary used for the parser + if module: + _items = [(k,getattr(module,k)) for k in dir(module)] + pdict = dict(_items) + else: + pdict = get_caller_module_dict(2) + + # Collect parser information from the dictionary + pinfo = ParserReflect(pdict,log=errorlog) + pinfo.get_all() + + if pinfo.error: + raise YaccError("Unable to build parser") + + # Check signature against table files (if any) + signature = pinfo.signature() + + # Read the tables + try: + lr = LRTable() + if picklefile: + read_signature = lr.read_pickle(picklefile) + else: + read_signature = lr.read_table(tabmodule) + if optimize or (read_signature == signature): + try: + lr.bind_callables(pinfo.pdict) + parser = LRParser(lr,pinfo.error_func) + parse = parser.parse + return parser + except Exception: + e = sys.exc_info()[1] + errorlog.warning("There was a problem loading the table file: %s", repr(e)) + except VersionError: + e = sys.exc_info() + errorlog.warning(str(e)) + except Exception: + pass + + if debuglog is None: + if debug: + debuglog = PlyLogger(open(debugfile,"w")) + else: + debuglog = NullLogger() + + debuglog.info("Created by PLY version %s (http://www.dabeaz.com/ply)", __version__) + + + errors = 0 + + # Validate the parser information + if pinfo.validate_all(): + raise YaccError("Unable to build parser") + + if not pinfo.error_func: + errorlog.warning("no p_error() function is defined") + + # Create a grammar object + grammar = Grammar(pinfo.tokens) + + # Set precedence level for terminals + for term, assoc, level in pinfo.preclist: + try: + grammar.set_precedence(term,assoc,level) + except GrammarError: + e = sys.exc_info()[1] + errorlog.warning("%s",str(e)) + + # Add productions to the grammar + for funcname, gram in pinfo.grammar: + file, line, prodname, syms = gram + try: + grammar.add_production(prodname,syms,funcname,file,line) + except GrammarError: + e = sys.exc_info()[1] + errorlog.error("%s",str(e)) + errors = 1 + + # Set the grammar start symbols + try: + if start is None: + grammar.set_start(pinfo.start) + else: + grammar.set_start(start) + except GrammarError: + e = sys.exc_info()[1] + errorlog.error(str(e)) + errors = 1 + + if errors: + raise YaccError("Unable to build parser") + + # Verify the grammar structure + undefined_symbols = grammar.undefined_symbols() + for sym, prod in undefined_symbols: + errorlog.error("%s:%d: Symbol '%s' used, but not defined as a token or a rule",prod.file,prod.line,sym) + errors = 1 + + unused_terminals = grammar.unused_terminals() + if unused_terminals: + debuglog.info("") + debuglog.info("Unused terminals:") + debuglog.info("") + for term in unused_terminals: + errorlog.warning("Token '%s' defined, but not used", term) + debuglog.info(" %s", term) + + # Print out all productions to the debug log + if debug: + debuglog.info("") + debuglog.info("Grammar") + debuglog.info("") + for n,p in enumerate(grammar.Productions): + debuglog.info("Rule %-5d %s", n, p) + + # Find unused non-terminals + unused_rules = grammar.unused_rules() + for prod in unused_rules: + errorlog.warning("%s:%d: Rule '%s' defined, but not used", prod.file, prod.line, prod.name) + + if len(unused_terminals) == 1: + errorlog.warning("There is 1 unused token") + if len(unused_terminals) > 1: + errorlog.warning("There are %d unused tokens", len(unused_terminals)) + + if len(unused_rules) == 1: + errorlog.warning("There is 1 unused rule") + if len(unused_rules) > 1: + errorlog.warning("There are %d unused rules", len(unused_rules)) + + if debug: + debuglog.info("") + debuglog.info("Terminals, with rules where they appear") + debuglog.info("") + terms = list(grammar.Terminals) + terms.sort() + for term in terms: + debuglog.info("%-20s : %s", term, " ".join([str(s) for s in grammar.Terminals[term]])) + + debuglog.info("") + debuglog.info("Nonterminals, with rules where they appear") + debuglog.info("") + nonterms = list(grammar.Nonterminals) + nonterms.sort() + for nonterm in nonterms: + debuglog.info("%-20s : %s", nonterm, " ".join([str(s) for s in grammar.Nonterminals[nonterm]])) + debuglog.info("") + + if check_recursion: + unreachable = grammar.find_unreachable() + for u in unreachable: + errorlog.warning("Symbol '%s' is unreachable",u) + + infinite = grammar.infinite_cycles() + for inf in infinite: + errorlog.error("Infinite recursion detected for symbol '%s'", inf) + errors = 1 + + unused_prec = grammar.unused_precedence() + for term, assoc in unused_prec: + errorlog.error("Precedence rule '%s' defined for unknown symbol '%s'", assoc, term) + errors = 1 + + if errors: + raise YaccError("Unable to build parser") + + # Run the LRGeneratedTable on the grammar + if debug: + errorlog.debug("Generating %s tables", method) + + lr = LRGeneratedTable(grammar,method,debuglog) + + if debug: + num_sr = len(lr.sr_conflicts) + + # Report shift/reduce and reduce/reduce conflicts + if num_sr == 1: + errorlog.warning("1 shift/reduce conflict") + elif num_sr > 1: + errorlog.warning("%d shift/reduce conflicts", num_sr) + + num_rr = len(lr.rr_conflicts) + if num_rr == 1: + errorlog.warning("1 reduce/reduce conflict") + elif num_rr > 1: + errorlog.warning("%d reduce/reduce conflicts", num_rr) + + # Write out conflicts to the output file + if debug and (lr.sr_conflicts or lr.rr_conflicts): + debuglog.warning("") + debuglog.warning("Conflicts:") + debuglog.warning("") + + for state, tok, resolution in lr.sr_conflicts: + debuglog.warning("shift/reduce conflict for %s in state %d resolved as %s", tok, state, resolution) + + already_reported = {} + for state, rule, rejected in lr.rr_conflicts: + if (state,id(rule),id(rejected)) in already_reported: + continue + debuglog.warning("reduce/reduce conflict in state %d resolved using rule (%s)", state, rule) + debuglog.warning("rejected rule (%s) in state %d", rejected,state) + errorlog.warning("reduce/reduce conflict in state %d resolved using rule (%s)", state, rule) + errorlog.warning("rejected rule (%s) in state %d", rejected, state) + already_reported[state,id(rule),id(rejected)] = 1 + + warned_never = [] + for state, rule, rejected in lr.rr_conflicts: + if not rejected.reduced and (rejected not in warned_never): + debuglog.warning("Rule (%s) is never reduced", rejected) + errorlog.warning("Rule (%s) is never reduced", rejected) + warned_never.append(rejected) + + # Write the table file if requested + if write_tables: + lr.write_table(tabmodule,outputdir,signature) + + # Write a pickled version of the tables + if picklefile: + lr.pickle_table(picklefile,signature) + + # Build the parser + lr.bind_callables(pinfo.pdict) + parser = LRParser(lr,pinfo.error_func) + + parse = parser.parse + return parser diff --git a/bitbake/lib/pysh/__init__.py b/bitbake/lib/pysh/__init__.py new file mode 100644 index 0000000000..e69de29bb2 --- /dev/null +++ b/bitbake/lib/pysh/__init__.py diff --git a/bitbake/lib/pysh/builtin.py b/bitbake/lib/pysh/builtin.py new file mode 100644 index 0000000000..25ad22eb74 --- /dev/null +++ b/bitbake/lib/pysh/builtin.py @@ -0,0 +1,710 @@ +# builtin.py - builtins and utilities definitions for pysh. +# +# Copyright 2007 Patrick Mezard +# +# This software may be used and distributed according to the terms +# of the GNU General Public License, incorporated herein by reference. + +"""Builtin and internal utilities implementations. + +- Beware not to use python interpreter environment as if it were the shell +environment. For instance, commands working directory must be explicitely handled +through env['PWD'] instead of relying on python working directory. +""" +import errno +import optparse +import os +import re +import subprocess +import sys +import time + +def has_subprocess_bug(): + return getattr(subprocess, 'list2cmdline') and \ + ( subprocess.list2cmdline(['']) == '' or \ + subprocess.list2cmdline(['foo|bar']) == 'foo|bar') + +# Detect python bug 1634343: "subprocess swallows empty arguments under win32" +# <http://sourceforge.net/tracker/index.php?func=detail&aid=1634343&group_id=5470&atid=105470> +# Also detect: "[ 1710802 ] subprocess must escape redirection characters under win32" +# <http://sourceforge.net/tracker/index.php?func=detail&aid=1710802&group_id=5470&atid=105470> +if has_subprocess_bug(): + import subprocess_fix + subprocess.list2cmdline = subprocess_fix.list2cmdline + +from sherrors import * + +class NonExitingParser(optparse.OptionParser): + """OptionParser default behaviour upon error is to print the error message and + exit. Raise a utility error instead. + """ + def error(self, msg): + raise UtilityError(msg) + +#------------------------------------------------------------------------------- +# set special builtin +#------------------------------------------------------------------------------- +OPT_SET = NonExitingParser(usage="set - set or unset options and positional parameters") +OPT_SET.add_option( '-f', action='store_true', dest='has_f', default=False, + help='The shell shall disable pathname expansion.') +OPT_SET.add_option('-e', action='store_true', dest='has_e', default=False, + help="""When this option is on, if a simple command fails for any of the \ + reasons listed in Consequences of Shell Errors or returns an exit status \ + value >0, and is not part of the compound list following a while, until, \ + or if keyword, and is not a part of an AND or OR list, and is not a \ + pipeline preceded by the ! reserved word, then the shell shall immediately \ + exit.""") +OPT_SET.add_option('-x', action='store_true', dest='has_x', default=False, + help="""The shell shall write to standard error a trace for each command \ + after it expands the command and before it executes it. It is unspecified \ + whether the command that turns tracing off is traced.""") + +def builtin_set(name, args, interp, env, stdin, stdout, stderr, debugflags): + if 'debug-utility' in debugflags: + print interp.log(' '.join([name, str(args), interp['PWD']]) + '\n') + + option, args = OPT_SET.parse_args(args) + env = interp.get_env() + + if option.has_f: + env.set_opt('-f') + if option.has_e: + env.set_opt('-e') + if option.has_x: + env.set_opt('-x') + return 0 + +#------------------------------------------------------------------------------- +# shift special builtin +#------------------------------------------------------------------------------- +def builtin_shift(name, args, interp, env, stdin, stdout, stderr, debugflags): + if 'debug-utility' in debugflags: + print interp.log(' '.join([name, str(args), interp['PWD']]) + '\n') + + params = interp.get_env().get_positional_args() + if args: + try: + n = int(args[0]) + if n > len(params): + raise ValueError() + except ValueError: + return 1 + else: + n = 1 + + params[:n] = [] + interp.get_env().set_positional_args(params) + return 0 + +#------------------------------------------------------------------------------- +# export special builtin +#------------------------------------------------------------------------------- +OPT_EXPORT = NonExitingParser(usage="set - set or unset options and positional parameters") +OPT_EXPORT.add_option('-p', action='store_true', dest='has_p', default=False) + +def builtin_export(name, args, interp, env, stdin, stdout, stderr, debugflags): + if 'debug-utility' in debugflags: + print interp.log(' '.join([name, str(args), interp['PWD']]) + '\n') + + option, args = OPT_EXPORT.parse_args(args) + if option.has_p: + raise NotImplementedError() + + for arg in args: + try: + name, value = arg.split('=', 1) + except ValueError: + name, value = arg, None + env = interp.get_env().export(name, value) + + return 0 + +#------------------------------------------------------------------------------- +# return special builtin +#------------------------------------------------------------------------------- +def builtin_return(name, args, interp, env, stdin, stdout, stderr, debugflags): + if 'debug-utility' in debugflags: + print interp.log(' '.join([name, str(args), interp['PWD']]) + '\n') + res = 0 + if args: + try: + res = int(args[0]) + except ValueError: + res = 0 + if not 0<=res<=255: + res = 0 + + # BUG: should be last executed command exit code + raise ReturnSignal(res) + +#------------------------------------------------------------------------------- +# trap special builtin +#------------------------------------------------------------------------------- +def builtin_trap(name, args, interp, env, stdin, stdout, stderr, debugflags): + if 'debug-utility' in debugflags: + print interp.log(' '.join([name, str(args), interp['PWD']]) + '\n') + if len(args) < 2: + stderr.write('trap: usage: trap [[arg] signal_spec ...]\n') + return 2 + + action = args[0] + for sig in args[1:]: + try: + env.traps[sig] = action + except Exception, e: + stderr.write('trap: %s\n' % str(e)) + return 0 + +#------------------------------------------------------------------------------- +# unset special builtin +#------------------------------------------------------------------------------- +OPT_UNSET = NonExitingParser("unset - unset values and attributes of variables and functions") +OPT_UNSET.add_option( '-f', action='store_true', dest='has_f', default=False) +OPT_UNSET.add_option( '-v', action='store_true', dest='has_v', default=False) + +def builtin_unset(name, args, interp, env, stdin, stdout, stderr, debugflags): + if 'debug-utility' in debugflags: + print interp.log(' '.join([name, str(args), interp['PWD']]) + '\n') + + option, args = OPT_UNSET.parse_args(args) + + status = 0 + env = interp.get_env() + for arg in args: + try: + if option.has_f: + env.remove_function(arg) + else: + del env[arg] + except KeyError: + pass + except VarAssignmentError: + status = 1 + + return status + +#------------------------------------------------------------------------------- +# wait special builtin +#------------------------------------------------------------------------------- +def builtin_wait(name, args, interp, env, stdin, stdout, stderr, debugflags): + if 'debug-utility' in debugflags: + print interp.log(' '.join([name, str(args), interp['PWD']]) + '\n') + + return interp.wait([int(arg) for arg in args]) + +#------------------------------------------------------------------------------- +# cat utility +#------------------------------------------------------------------------------- +def utility_cat(name, args, interp, env, stdin, stdout, stderr, debugflags): + if 'debug-utility' in debugflags: + print interp.log(' '.join([name, str(args), interp['PWD']]) + '\n') + + if not args: + args = ['-'] + + status = 0 + for arg in args: + if arg == '-': + data = stdin.read() + else: + path = os.path.join(env['PWD'], arg) + try: + f = file(path, 'rb') + try: + data = f.read() + finally: + f.close() + except IOError, e: + if e.errno != errno.ENOENT: + raise + status = 1 + continue + stdout.write(data) + stdout.flush() + return status + +#------------------------------------------------------------------------------- +# cd utility +#------------------------------------------------------------------------------- +OPT_CD = NonExitingParser("cd - change the working directory") + +def utility_cd(name, args, interp, env, stdin, stdout, stderr, debugflags): + if 'debug-utility' in debugflags: + print interp.log(' '.join([name, str(args), interp['PWD']]) + '\n') + + option, args = OPT_CD.parse_args(args) + env = interp.get_env() + + directory = None + printdir = False + if not args: + home = env.get('HOME') + if home: + # Unspecified, do nothing + return 0 + else: + directory = home + elif len(args)==1: + directory = args[0] + if directory=='-': + if 'OLDPWD' not in env: + raise UtilityError("OLDPWD not set") + printdir = True + directory = env['OLDPWD'] + else: + raise UtilityError("too many arguments") + + curpath = None + # Absolute directories will be handled correctly by the os.path.join call. + if not directory.startswith('.') and not directory.startswith('..'): + cdpaths = env.get('CDPATH', '.').split(';') + for cdpath in cdpaths: + p = os.path.join(cdpath, directory) + if os.path.isdir(p): + curpath = p + break + + if curpath is None: + curpath = directory + curpath = os.path.join(env['PWD'], directory) + + env['OLDPWD'] = env['PWD'] + env['PWD'] = curpath + if printdir: + stdout.write('%s\n' % curpath) + return 0 + +#------------------------------------------------------------------------------- +# colon utility +#------------------------------------------------------------------------------- +def utility_colon(name, args, interp, env, stdin, stdout, stderr, debugflags): + if 'debug-utility' in debugflags: + print interp.log(' '.join([name, str(args), interp['PWD']]) + '\n') + return 0 + +#------------------------------------------------------------------------------- +# echo utility +#------------------------------------------------------------------------------- +def utility_echo(name, args, interp, env, stdin, stdout, stderr, debugflags): + if 'debug-utility' in debugflags: + print interp.log(' '.join([name, str(args), interp['PWD']]) + '\n') + + # Echo only takes arguments, no options. Use printf if you need fancy stuff. + output = ' '.join(args) + '\n' + stdout.write(output) + stdout.flush() + return 0 + +#------------------------------------------------------------------------------- +# egrep utility +#------------------------------------------------------------------------------- +# egrep is usually a shell script. +# Unfortunately, pysh does not support shell scripts *with arguments* right now, +# so the redirection is implemented here, assuming grep is available. +def utility_egrep(name, args, interp, env, stdin, stdout, stderr, debugflags): + if 'debug-utility' in debugflags: + print interp.log(' '.join([name, str(args), interp['PWD']]) + '\n') + + return run_command('grep', ['-E'] + args, interp, env, stdin, stdout, + stderr, debugflags) + +#------------------------------------------------------------------------------- +# env utility +#------------------------------------------------------------------------------- +def utility_env(name, args, interp, env, stdin, stdout, stderr, debugflags): + if 'debug-utility' in debugflags: + print interp.log(' '.join([name, str(args), interp['PWD']]) + '\n') + + if args and args[0]=='-i': + raise NotImplementedError('env: -i option is not implemented') + + i = 0 + for arg in args: + if '=' not in arg: + break + # Update the current environment + name, value = arg.split('=', 1) + env[name] = value + i += 1 + + if args[i:]: + # Find then execute the specified interpreter + utility = env.find_in_path(args[i]) + if not utility: + return 127 + args[i:i+1] = utility + name = args[i] + args = args[i+1:] + try: + return run_command(name, args, interp, env, stdin, stdout, stderr, + debugflags) + except UtilityError: + stderr.write('env: failed to execute %s' % ' '.join([name]+args)) + return 126 + else: + for pair in env.get_variables().iteritems(): + stdout.write('%s=%s\n' % pair) + return 0 + +#------------------------------------------------------------------------------- +# exit utility +#------------------------------------------------------------------------------- +def utility_exit(name, args, interp, env, stdin, stdout, stderr, debugflags): + if 'debug-utility' in debugflags: + print interp.log(' '.join([name, str(args), interp['PWD']]) + '\n') + + res = None + if args: + try: + res = int(args[0]) + except ValueError: + res = None + if not 0<=res<=255: + res = None + + if res is None: + # BUG: should be last executed command exit code + res = 0 + + raise ExitSignal(res) + +#------------------------------------------------------------------------------- +# fgrep utility +#------------------------------------------------------------------------------- +# see egrep +def utility_fgrep(name, args, interp, env, stdin, stdout, stderr, debugflags): + if 'debug-utility' in debugflags: + print interp.log(' '.join([name, str(args), interp['PWD']]) + '\n') + + return run_command('grep', ['-F'] + args, interp, env, stdin, stdout, + stderr, debugflags) + +#------------------------------------------------------------------------------- +# gunzip utility +#------------------------------------------------------------------------------- +# see egrep +def utility_gunzip(name, args, interp, env, stdin, stdout, stderr, debugflags): + if 'debug-utility' in debugflags: + print interp.log(' '.join([name, str(args), interp['PWD']]) + '\n') + + return run_command('gzip', ['-d'] + args, interp, env, stdin, stdout, + stderr, debugflags) + +#------------------------------------------------------------------------------- +# kill utility +#------------------------------------------------------------------------------- +def utility_kill(name, args, interp, env, stdin, stdout, stderr, debugflags): + if 'debug-utility' in debugflags: + print interp.log(' '.join([name, str(args), interp['PWD']]) + '\n') + + for arg in args: + pid = int(arg) + status = subprocess.call(['pskill', '/T', str(pid)], + shell=True, + stdout=subprocess.PIPE, + stderr=subprocess.PIPE) + # pskill is asynchronous, hence the stupid polling loop + while 1: + p = subprocess.Popen(['pslist', str(pid)], + shell=True, + stdout=subprocess.PIPE, + stderr=subprocess.STDOUT) + output = p.communicate()[0] + if ('process %d was not' % pid) in output: + break + time.sleep(1) + return status + +#------------------------------------------------------------------------------- +# mkdir utility +#------------------------------------------------------------------------------- +OPT_MKDIR = NonExitingParser("mkdir - make directories.") +OPT_MKDIR.add_option('-p', action='store_true', dest='has_p', default=False) + +def utility_mkdir(name, args, interp, env, stdin, stdout, stderr, debugflags): + if 'debug-utility' in debugflags: + print interp.log(' '.join([name, str(args), interp['PWD']]) + '\n') + + # TODO: implement umask + # TODO: implement proper utility error report + option, args = OPT_MKDIR.parse_args(args) + for arg in args: + path = os.path.join(env['PWD'], arg) + if option.has_p: + try: + os.makedirs(path) + except IOError, e: + if e.errno != errno.EEXIST: + raise + else: + os.mkdir(path) + return 0 + +#------------------------------------------------------------------------------- +# netstat utility +#------------------------------------------------------------------------------- +def utility_netstat(name, args, interp, env, stdin, stdout, stderr, debugflags): + # Do you really expect me to implement netstat ? + # This empty form is enough for Mercurial tests since it's + # supposed to generate nothing upon success. Faking this test + # is not a big deal either. + if 'debug-utility' in debugflags: + print interp.log(' '.join([name, str(args), interp['PWD']]) + '\n') + return 0 + +#------------------------------------------------------------------------------- +# pwd utility +#------------------------------------------------------------------------------- +OPT_PWD = NonExitingParser("pwd - return working directory name") +OPT_PWD.add_option('-L', action='store_true', dest='has_L', default=True, + help="""If the PWD environment variable contains an absolute pathname of \ + the current directory that does not contain the filenames dot or dot-dot, \ + pwd shall write this pathname to standard output. Otherwise, the -L option \ + shall behave as the -P option.""") +OPT_PWD.add_option('-P', action='store_true', dest='has_L', default=False, + help="""The absolute pathname written shall not contain filenames that, in \ + the context of the pathname, refer to files of type symbolic link.""") + +def utility_pwd(name, args, interp, env, stdin, stdout, stderr, debugflags): + if 'debug-utility' in debugflags: + print interp.log(' '.join([name, str(args), interp['PWD']]) + '\n') + + option, args = OPT_PWD.parse_args(args) + stdout.write('%s\n' % env['PWD']) + return 0 + +#------------------------------------------------------------------------------- +# printf utility +#------------------------------------------------------------------------------- +RE_UNESCAPE = re.compile(r'(\\x[a-zA-Z0-9]{2}|\\[0-7]{1,3}|\\.)') + +def utility_printf(name, args, interp, env, stdin, stdout, stderr, debugflags): + if 'debug-utility' in debugflags: + print interp.log(' '.join([name, str(args), interp['PWD']]) + '\n') + + def replace(m): + assert m.group() + g = m.group()[1:] + if g.startswith('x'): + return chr(int(g[1:], 16)) + if len(g) <= 3 and len([c for c in g if c in '01234567']) == len(g): + # Yay, an octal number + return chr(int(g, 8)) + return { + 'a': '\a', + 'b': '\b', + 'f': '\f', + 'n': '\n', + 'r': '\r', + 't': '\t', + 'v': '\v', + '\\': '\\', + }.get(g) + + # Convert escape sequences + format = re.sub(RE_UNESCAPE, replace, args[0]) + stdout.write(format % tuple(args[1:])) + return 0 + +#------------------------------------------------------------------------------- +# true utility +#------------------------------------------------------------------------------- +def utility_true(name, args, interp, env, stdin, stdout, stderr, debugflags): + if 'debug-utility' in debugflags: + print interp.log(' '.join([name, str(args), interp['PWD']]) + '\n') + return 0 + +#------------------------------------------------------------------------------- +# sed utility +#------------------------------------------------------------------------------- +RE_SED = re.compile(r'^s(.).*\1[a-zA-Z]*$') + +# cygwin sed fails with some expressions when they do not end with a single space. +# see unit tests for details. Interestingly, the same expressions works perfectly +# in cygwin shell. +def utility_sed(name, args, interp, env, stdin, stdout, stderr, debugflags): + if 'debug-utility' in debugflags: + print interp.log(' '.join([name, str(args), interp['PWD']]) + '\n') + + # Scan pattern arguments and append a space if necessary + for i in xrange(len(args)): + if not RE_SED.search(args[i]): + continue + args[i] = args[i] + ' ' + + return run_command(name, args, interp, env, stdin, stdout, + stderr, debugflags) + +#------------------------------------------------------------------------------- +# sleep utility +#------------------------------------------------------------------------------- +def utility_sleep(name, args, interp, env, stdin, stdout, stderr, debugflags): + if 'debug-utility' in debugflags: + print interp.log(' '.join([name, str(args), interp['PWD']]) + '\n') + time.sleep(int(args[0])) + return 0 + +#------------------------------------------------------------------------------- +# sort utility +#------------------------------------------------------------------------------- +OPT_SORT = NonExitingParser("sort - sort, merge, or sequence check text files") + +def utility_sort(name, args, interp, env, stdin, stdout, stderr, debugflags): + + def sort(path): + if path == '-': + lines = stdin.readlines() + else: + try: + f = file(path) + try: + lines = f.readlines() + finally: + f.close() + except IOError, e: + stderr.write(str(e) + '\n') + return 1 + + if lines and lines[-1][-1]!='\n': + lines[-1] = lines[-1] + '\n' + return lines + + if 'debug-utility' in debugflags: + print interp.log(' '.join([name, str(args), interp['PWD']]) + '\n') + + option, args = OPT_SORT.parse_args(args) + alllines = [] + + if len(args)<=0: + args += ['-'] + + # Load all files lines + curdir = os.getcwd() + try: + os.chdir(env['PWD']) + for path in args: + alllines += sort(path) + finally: + os.chdir(curdir) + + alllines.sort() + for line in alllines: + stdout.write(line) + return 0 + +#------------------------------------------------------------------------------- +# hg utility +#------------------------------------------------------------------------------- + +hgcommands = [ + 'add', + 'addremove', + 'commit', 'ci', + 'debugrename', + 'debugwalk', + 'falabala', # Dummy command used in a mercurial test + 'incoming', + 'locate', + 'pull', + 'push', + 'qinit', + 'remove', 'rm', + 'rename', 'mv', + 'revert', + 'showconfig', + 'status', 'st', + 'strip', + ] + +def rewriteslashes(name, args): + # Several hg commands output file paths, rewrite the separators + if len(args) > 1 and name.lower().endswith('python') \ + and args[0].endswith('hg'): + for cmd in hgcommands: + if cmd in args[1:]: + return True + + # svn output contains many paths with OS specific separators. + # Normalize these to unix paths. + base = os.path.basename(name) + if base.startswith('svn'): + return True + + return False + +def rewritehg(output): + if not output: + return output + # Rewrite os specific messages + output = output.replace(': The system cannot find the file specified', + ': No such file or directory') + output = re.sub(': Access is denied.*$', ': Permission denied', output) + output = output.replace(': No connection could be made because the target machine actively refused it', + ': Connection refused') + return output + + +def run_command(name, args, interp, env, stdin, stdout, + stderr, debugflags): + # Execute the command + if 'debug-utility' in debugflags: + print interp.log(' '.join([name, str(args), interp['PWD']]) + '\n') + + hgbin = interp.options().hgbinary + ishg = hgbin and ('hg' in name or args and 'hg' in args[0]) + unixoutput = 'cygwin' in name or ishg + + exec_env = env.get_variables() + try: + # BUG: comparing file descriptor is clearly not a reliable way to tell + # whether they point on the same underlying object. But in pysh limited + # scope this is usually right, we do not expect complicated redirections + # besides usual 2>&1. + # Still there is one case we have but cannot deal with is when stdout + # and stderr are redirected *by pysh caller*. This the reason for the + # --redirect pysh() option. + # Now, we want to know they are the same because we sometimes need to + # transform the command output, mostly remove CR-LF to ensure that + # command output is unix-like. Cygwin utilies are a special case because + # they explicitely set their output streams to binary mode, so we have + # nothing to do. For all others commands, we have to guess whether they + # are sending text data, in which case the transformation must be done. + # Again, the NUL character test is unreliable but should be enough for + # hg tests. + redirected = stdout.fileno()==stderr.fileno() + if not redirected: + p = subprocess.Popen([name] + args, cwd=env['PWD'], env=exec_env, + stdin=stdin, stdout=subprocess.PIPE, stderr=subprocess.PIPE) + else: + p = subprocess.Popen([name] + args, cwd=env['PWD'], env=exec_env, + stdin=stdin, stdout=subprocess.PIPE, stderr=subprocess.STDOUT) + out, err = p.communicate() + except WindowsError, e: + raise UtilityError(str(e)) + + if not unixoutput: + def encode(s): + if '\0' in s: + return s + return s.replace('\r\n', '\n') + else: + encode = lambda s: s + + if rewriteslashes(name, args): + encode1_ = encode + def encode(s): + s = encode1_(s) + s = s.replace('\\\\', '\\') + s = s.replace('\\', '/') + return s + + if ishg: + encode2_ = encode + def encode(s): + return rewritehg(encode2_(s)) + + stdout.write(encode(out)) + if not redirected: + stderr.write(encode(err)) + return p.returncode + diff --git a/bitbake/lib/pysh/interp.py b/bitbake/lib/pysh/interp.py new file mode 100644 index 0000000000..efe5181e1e --- /dev/null +++ b/bitbake/lib/pysh/interp.py @@ -0,0 +1,1367 @@ +# interp.py - shell interpreter for pysh. +# +# Copyright 2007 Patrick Mezard +# +# This software may be used and distributed according to the terms +# of the GNU General Public License, incorporated herein by reference. + +"""Implement the shell interpreter. + +Most references are made to "The Open Group Base Specifications Issue 6". +<http://www.opengroup.org/onlinepubs/009695399/utilities/xcu_chap02.html> +""" +# TODO: document the fact input streams must implement fileno() so Popen will work correctly. +# it requires non-stdin stream to be implemented as files. Still to be tested... +# DOC: pathsep is used in PATH instead of ':'. Clearly, there are path syntax issues here. +# TODO: stop command execution upon error. +# TODO: sort out the filename/io_number mess. It should be possible to use filenames only. +# TODO: review subshell implementation +# TODO: test environment cloning for non-special builtins +# TODO: set -x should not rebuild commands from tokens, assignments/redirections are lost +# TODO: unit test for variable assignment +# TODO: test error management wrt error type/utility type +# TODO: test for binary output everywhere +# BUG: debug-parsing does not pass log file to PLY. Maybe a PLY upgrade is necessary. +import base64 +import cPickle as pickle +import errno +import glob +import os +import re +import subprocess +import sys +import tempfile + +try: + s = set() + del s +except NameError: + from Set import Set as set + +import builtin +from sherrors import * +import pyshlex +import pyshyacc + +def mappend(func, *args, **kargs): + """Like map but assume func returns a list. Returned lists are merged into + a single one. + """ + return reduce(lambda a,b: a+b, map(func, *args, **kargs), []) + +class FileWrapper: + """File object wrapper to ease debugging. + + Allow mode checking and implement file duplication through a simple + reference counting scheme. Not sure the latter is really useful since + only real file descriptors can be used. + """ + def __init__(self, mode, file, close=True): + if mode not in ('r', 'w', 'a'): + raise IOError('invalid mode: %s' % mode) + self._mode = mode + self._close = close + if isinstance(file, FileWrapper): + if file._refcount[0] <= 0: + raise IOError(0, 'Error') + self._refcount = file._refcount + self._refcount[0] += 1 + self._file = file._file + else: + self._refcount = [1] + self._file = file + + def dup(self): + return FileWrapper(self._mode, self, self._close) + + def fileno(self): + """fileno() should be only necessary for input streams.""" + return self._file.fileno() + + def read(self, size=-1): + if self._mode!='r': + raise IOError(0, 'Error') + return self._file.read(size) + + def readlines(self, *args, **kwargs): + return self._file.readlines(*args, **kwargs) + + def write(self, s): + if self._mode not in ('w', 'a'): + raise IOError(0, 'Error') + return self._file.write(s) + + def flush(self): + self._file.flush() + + def close(self): + if not self._refcount: + return + assert self._refcount[0] > 0 + + self._refcount[0] -= 1 + if self._refcount[0] == 0: + self._mode = 'c' + if self._close: + self._file.close() + self._refcount = None + + def mode(self): + return self._mode + + def __getattr__(self, name): + if name == 'name': + self.name = getattr(self._file, name) + return self.name + else: + raise AttributeError(name) + + def __del__(self): + self.close() + + +def win32_open_devnull(mode): + return open('NUL', mode) + + +class Redirections: + """Stores open files and their mapping to pseudo-sh file descriptor. + """ + # BUG: redirections are not handled correctly: 1>&3 2>&3 3>&4 does + # not make 1 to redirect to 4 + def __init__(self, stdin=None, stdout=None, stderr=None): + self._descriptors = {} + if stdin is not None: + self._add_descriptor(0, stdin) + if stdout is not None: + self._add_descriptor(1, stdout) + if stderr is not None: + self._add_descriptor(2, stderr) + + def add_here_document(self, interp, name, content, io_number=None): + if io_number is None: + io_number = 0 + + if name==pyshlex.unquote_wordtree(name): + content = interp.expand_here_document(('TOKEN', content)) + + # Write document content in a temporary file + tmp = tempfile.TemporaryFile() + try: + tmp.write(content) + tmp.flush() + tmp.seek(0) + self._add_descriptor(io_number, FileWrapper('r', tmp)) + except: + tmp.close() + raise + + def add(self, interp, op, filename, io_number=None): + if op not in ('<', '>', '>|', '>>', '>&'): + # TODO: add descriptor duplication and here_documents + raise RedirectionError('Unsupported redirection operator "%s"' % op) + + if io_number is not None: + io_number = int(io_number) + + if (op == '>&' and filename.isdigit()) or filename=='-': + # No expansion for file descriptors, quote them if you want a filename + fullname = filename + else: + if filename.startswith('/'): + # TODO: win32 kludge + if filename=='/dev/null': + fullname = 'NUL' + else: + # TODO: handle absolute pathnames, they are unlikely to exist on the + # current platform (win32 for instance). + raise NotImplementedError() + else: + fullname = interp.expand_redirection(('TOKEN', filename)) + if not fullname: + raise RedirectionError('%s: ambiguous redirect' % filename) + # Build absolute path based on PWD + fullname = os.path.join(interp.get_env()['PWD'], fullname) + + if op=='<': + return self._add_input_redirection(interp, fullname, io_number) + elif op in ('>', '>|'): + clobber = ('>|'==op) + return self._add_output_redirection(interp, fullname, io_number, clobber) + elif op=='>>': + return self._add_output_appending(interp, fullname, io_number) + elif op=='>&': + return self._dup_output_descriptor(fullname, io_number) + + def close(self): + if self._descriptors is not None: + for desc in self._descriptors.itervalues(): + desc.flush() + desc.close() + self._descriptors = None + + def stdin(self): + return self._descriptors[0] + + def stdout(self): + return self._descriptors[1] + + def stderr(self): + return self._descriptors[2] + + def clone(self): + clone = Redirections() + for desc, fileobj in self._descriptors.iteritems(): + clone._descriptors[desc] = fileobj.dup() + return clone + + def _add_output_redirection(self, interp, filename, io_number, clobber): + if io_number is None: + # io_number default to standard output + io_number = 1 + + if not clobber and interp.get_env().has_opt('-C') and os.path.isfile(filename): + # File already exist in no-clobber mode, bail out + raise RedirectionError('File "%s" already exists' % filename) + + # Open and register + self._add_file_descriptor(io_number, filename, 'w') + + def _add_output_appending(self, interp, filename, io_number): + if io_number is None: + io_number = 1 + self._add_file_descriptor(io_number, filename, 'a') + + def _add_input_redirection(self, interp, filename, io_number): + if io_number is None: + io_number = 0 + self._add_file_descriptor(io_number, filename, 'r') + + def _add_file_descriptor(self, io_number, filename, mode): + try: + if filename.startswith('/'): + if filename=='/dev/null': + f = win32_open_devnull(mode+'b') + else: + # TODO: handle absolute pathnames, they are unlikely to exist on the + # current platform (win32 for instance). + raise NotImplementedError('cannot open absolute path %s' % repr(filename)) + else: + f = file(filename, mode+'b') + except IOError, e: + raise RedirectionError(str(e)) + + wrapper = None + try: + wrapper = FileWrapper(mode, f) + f = None + self._add_descriptor(io_number, wrapper) + except: + if f: f.close() + if wrapper: wrapper.close() + raise + + def _dup_output_descriptor(self, source_fd, dest_fd): + if source_fd is None: + source_fd = 1 + self._dup_file_descriptor(source_fd, dest_fd, 'w') + + def _dup_file_descriptor(self, source_fd, dest_fd, mode): + source_fd = int(source_fd) + if source_fd not in self._descriptors: + raise RedirectionError('"%s" is not a valid file descriptor' % str(source_fd)) + source = self._descriptors[source_fd] + + if source.mode()!=mode: + raise RedirectionError('Descriptor %s cannot be duplicated in mode "%s"' % (str(source), mode)) + + if dest_fd=='-': + # Close the source descriptor + del self._descriptors[source_fd] + source.close() + else: + dest_fd = int(dest_fd) + if dest_fd not in self._descriptors: + raise RedirectionError('Cannot replace file descriptor %s' % str(dest_fd)) + + dest = self._descriptors[dest_fd] + if dest.mode()!=mode: + raise RedirectionError('Descriptor %s cannot be cannot be redirected in mode "%s"' % (str(dest), mode)) + + self._descriptors[dest_fd] = source.dup() + dest.close() + + def _add_descriptor(self, io_number, file): + io_number = int(io_number) + + if io_number in self._descriptors: + # Close the current descriptor + d = self._descriptors[io_number] + del self._descriptors[io_number] + d.close() + + self._descriptors[io_number] = file + + def __str__(self): + names = [('%d=%r' % (k, getattr(v, 'name', None))) for k,v + in self._descriptors.iteritems()] + names = ','.join(names) + return 'Redirections(%s)' % names + + def __del__(self): + self.close() + +def cygwin_to_windows_path(path): + """Turn /cygdrive/c/foo into c:/foo, or return path if it + is not a cygwin path. + """ + if not path.startswith('/cygdrive/'): + return path + path = path[len('/cygdrive/'):] + path = path[:1] + ':' + path[1:] + return path + +def win32_to_unix_path(path): + if path is not None: + path = path.replace('\\', '/') + return path + +_RE_SHEBANG = re.compile(r'^\#!\s?([^\s]+)(?:\s([^\s]+))?') +_SHEBANG_CMDS = { + '/usr/bin/env': 'env', + '/bin/sh': 'pysh', + 'python': 'python', +} + +def resolve_shebang(path, ignoreshell=False): + """Return a list of arguments as shebang interpreter call or an empty list + if path does not refer to an executable script. + See <http://www.opengroup.org/austin/docs/austin_51r2.txt>. + + ignoreshell - set to True to ignore sh shebangs. Return an empty list instead. + """ + try: + f = file(path) + try: + # At most 80 characters in the first line + header = f.read(80).splitlines()[0] + finally: + f.close() + + m = _RE_SHEBANG.search(header) + if not m: + return [] + cmd, arg = m.group(1,2) + if os.path.isfile(cmd): + # Keep this one, the hg script for instance contains a weird windows + # shebang referencing the current python install. + cmdfile = os.path.basename(cmd).lower() + if cmdfile == 'python.exe': + cmd = 'python' + pass + elif cmd not in _SHEBANG_CMDS: + raise CommandNotFound('Unknown interpreter "%s" referenced in '\ + 'shebang' % header) + cmd = _SHEBANG_CMDS.get(cmd) + if cmd is None or (ignoreshell and cmd == 'pysh'): + return [] + if arg is None: + return [cmd, win32_to_unix_path(path)] + return [cmd, arg, win32_to_unix_path(path)] + except IOError, e: + if e.errno!=errno.ENOENT and \ + (e.errno!=errno.EPERM and not os.path.isdir(path)): # Opening a directory raises EPERM + raise + return [] + +def win32_find_in_path(name, path): + if isinstance(path, str): + path = path.split(os.pathsep) + + exts = os.environ.get('PATHEXT', '').lower().split(os.pathsep) + for p in path: + p_name = os.path.join(p, name) + + prefix = resolve_shebang(p_name) + if prefix: + return prefix + + for ext in exts: + p_name_ext = p_name + ext + if os.path.exists(p_name_ext): + return [win32_to_unix_path(p_name_ext)] + return [] + +class Traps(dict): + def __setitem__(self, key, value): + if key not in ('EXIT',): + raise NotImplementedError() + super(Traps, self).__setitem__(key, value) + +# IFS white spaces character class +_IFS_WHITESPACES = (' ', '\t', '\n') + +class Environment: + """Environment holds environment variables, export table, function + definitions and whatever is defined in 2.12 "Shell Execution Environment", + redirection excepted. + """ + def __init__(self, pwd): + self._opt = set() #Shell options + + self._functions = {} + self._env = {'?': '0', '#': '0'} + self._exported = set([ + 'HOME', 'IFS', 'PATH' + ]) + + # Set environment vars with side-effects + self._ifs_ws = None # Set of IFS whitespace characters + self._ifs_re = None # Regular expression used to split between words using IFS classes + self['IFS'] = ''.join(_IFS_WHITESPACES) #Default environment values + self['PWD'] = pwd + self.traps = Traps() + + def clone(self, subshell=False): + env = Environment(self['PWD']) + env._opt = set(self._opt) + for k,v in self.get_variables().iteritems(): + if k in self._exported: + env.export(k,v) + elif subshell: + env[k] = v + + if subshell: + env._functions = dict(self._functions) + + return env + + def __getitem__(self, key): + if key in ('@', '*', '-', '$'): + raise NotImplementedError('%s is not implemented' % repr(key)) + return self._env[key] + + def get(self, key, defval=None): + try: + return self[key] + except KeyError: + return defval + + def __setitem__(self, key, value): + if key=='IFS': + # Update the whitespace/non-whitespace classes + self._update_ifs(value) + elif key=='PWD': + pwd = os.path.abspath(value) + if not os.path.isdir(pwd): + raise VarAssignmentError('Invalid directory %s' % value) + value = pwd + elif key in ('?', '!'): + value = str(int(value)) + self._env[key] = value + + def __delitem__(self, key): + if key in ('IFS', 'PWD', '?'): + raise VarAssignmentError('%s cannot be unset' % key) + del self._env[key] + + def __contains__(self, item): + return item in self._env + + def set_positional_args(self, args): + """Set the content of 'args' as positional argument from 1 to len(args). + Return previous argument as a list of strings. + """ + # Save and remove previous arguments + prevargs = [] + for i in xrange(int(self._env['#'])): + i = str(i+1) + prevargs.append(self._env[i]) + del self._env[i] + self._env['#'] = '0' + + #Set new ones + for i,arg in enumerate(args): + self._env[str(i+1)] = str(arg) + self._env['#'] = str(len(args)) + + return prevargs + + def get_positional_args(self): + return [self._env[str(i+1)] for i in xrange(int(self._env['#']))] + + def get_variables(self): + return dict(self._env) + + def export(self, key, value=None): + if value is not None: + self[key] = value + self._exported.add(key) + + def get_exported(self): + return [(k,self._env.get(k)) for k in self._exported] + + def split_fields(self, word): + if not self._ifs_ws or not word: + return [word] + return re.split(self._ifs_re, word) + + def _update_ifs(self, value): + """Update the split_fields related variables when IFS character set is + changed. + """ + # TODO: handle NULL IFS + + # Separate characters in whitespace and non-whitespace + chars = set(value) + ws = [c for c in chars if c in _IFS_WHITESPACES] + nws = [c for c in chars if c not in _IFS_WHITESPACES] + + # Keep whitespaces in a string for left and right stripping + self._ifs_ws = ''.join(ws) + + # Build a regexp to split fields + trailing = '[' + ''.join([re.escape(c) for c in ws]) + ']' + if nws: + # First, the single non-whitespace occurence. + nws = '[' + ''.join([re.escape(c) for c in nws]) + ']' + nws = '(?:' + trailing + '*' + nws + trailing + '*' + '|' + trailing + '+)' + else: + # Then mix all parts with quantifiers + nws = trailing + '+' + self._ifs_re = re.compile(nws) + + def has_opt(self, opt, val=None): + return (opt, val) in self._opt + + def set_opt(self, opt, val=None): + self._opt.add((opt, val)) + + def find_in_path(self, name, pwd=False): + path = self._env.get('PATH', '').split(os.pathsep) + if pwd: + path[:0] = [self['PWD']] + if os.name == 'nt': + return win32_find_in_path(name, self._env.get('PATH', '')) + else: + raise NotImplementedError() + + def define_function(self, name, body): + if not is_name(name): + raise ShellSyntaxError('%s is not a valid function name' % repr(name)) + self._functions[name] = body + + def remove_function(self, name): + del self._functions[name] + + def is_function(self, name): + return name in self._functions + + def get_function(self, name): + return self._functions.get(name) + + +name_charset = 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789_' +name_charset = dict(zip(name_charset,name_charset)) + +def match_name(s): + """Return the length in characters of the longest prefix made of name + allowed characters in s. + """ + for i,c in enumerate(s): + if c not in name_charset: + return s[:i] + return s + +def is_name(s): + return len([c for c in s if c not in name_charset])<=0 + +def is_special_param(c): + return len(c)==1 and c in ('@','*','#','?','-','$','!','0') + +def utility_not_implemented(name, *args, **kwargs): + raise NotImplementedError('%s utility is not implemented' % name) + + +class Utility: + """Define utilities properties: + func -- utility callable. See builtin module for utility samples. + is_special -- see XCU 2.8. + """ + def __init__(self, func, is_special=0): + self.func = func + self.is_special = bool(is_special) + + +def encodeargs(args): + def encodearg(s): + lines = base64.encodestring(s) + lines = [l.splitlines()[0] for l in lines] + return ''.join(lines) + + s = pickle.dumps(args) + return encodearg(s) + +def decodeargs(s): + s = base64.decodestring(s) + return pickle.loads(s) + + +class GlobError(Exception): + pass + +class Options: + def __init__(self): + # True if Mercurial operates with binary streams + self.hgbinary = True + +class Interpreter: + # Implementation is very basic: the execute() method just makes a DFS on the + # AST and execute nodes one by one. Nodes are tuple (name,obj) where name + # is a string identifier and obj the AST element returned by the parser. + # + # Handler are named after the node identifiers. + # TODO: check node names and remove the switch in execute with some + # dynamic getattr() call to find node handlers. + """Shell interpreter. + + The following debugging flags can be passed: + debug-parsing - enable PLY debugging. + debug-tree - print the generated AST. + debug-cmd - trace command execution before word expansion, plus exit status. + debug-utility - trace utility execution. + """ + + # List supported commands. + COMMANDS = { + 'cat': Utility(builtin.utility_cat,), + 'cd': Utility(builtin.utility_cd,), + ':': Utility(builtin.utility_colon,), + 'echo': Utility(builtin.utility_echo), + 'env': Utility(builtin.utility_env), + 'exit': Utility(builtin.utility_exit), + 'export': Utility(builtin.builtin_export, is_special=1), + 'egrep': Utility(builtin.utility_egrep), + 'fgrep': Utility(builtin.utility_fgrep), + 'gunzip': Utility(builtin.utility_gunzip), + 'kill': Utility(builtin.utility_kill), + 'mkdir': Utility(builtin.utility_mkdir), + 'netstat': Utility(builtin.utility_netstat), + 'printf': Utility(builtin.utility_printf), + 'pwd': Utility(builtin.utility_pwd), + 'return': Utility(builtin.builtin_return, is_special=1), + 'sed': Utility(builtin.utility_sed,), + 'set': Utility(builtin.builtin_set,), + 'shift': Utility(builtin.builtin_shift,), + 'sleep': Utility(builtin.utility_sleep,), + 'sort': Utility(builtin.utility_sort,), + 'trap': Utility(builtin.builtin_trap, is_special=1), + 'true': Utility(builtin.utility_true), + 'unset': Utility(builtin.builtin_unset, is_special=1), + 'wait': Utility(builtin.builtin_wait, is_special=1), + } + + def __init__(self, pwd, debugflags = [], env=None, redirs=None, stdin=None, + stdout=None, stderr=None, opts=Options()): + self._env = env + if self._env is None: + self._env = Environment(pwd) + self._children = {} + + self._redirs = redirs + self._close_redirs = False + + if self._redirs is None: + if stdin is None: + stdin = sys.stdin + if stdout is None: + stdout = sys.stdout + if stderr is None: + stderr = sys.stderr + stdin = FileWrapper('r', stdin, False) + stdout = FileWrapper('w', stdout, False) + stderr = FileWrapper('w', stderr, False) + self._redirs = Redirections(stdin, stdout, stderr) + self._close_redirs = True + + self._debugflags = list(debugflags) + self._logfile = sys.stderr + self._options = opts + + def close(self): + """Must be called when the interpreter is no longer used.""" + script = self._env.traps.get('EXIT') + if script: + try: + self.execute_script(script=script) + except: + pass + + if self._redirs is not None and self._close_redirs: + self._redirs.close() + self._redirs = None + + def log(self, s): + self._logfile.write(s) + self._logfile.flush() + + def __getitem__(self, key): + return self._env[key] + + def __setitem__(self, key, value): + self._env[key] = value + + def options(self): + return self._options + + def redirect(self, redirs, ios): + def add_redir(io): + if isinstance(io, pyshyacc.IORedirect): + redirs.add(self, io.op, io.filename, io.io_number) + else: + redirs.add_here_document(self, io.name, io.content, io.io_number) + + map(add_redir, ios) + return redirs + + def execute_script(self, script=None, ast=None, sourced=False, + scriptpath=None): + """If script is not None, parse the input. Otherwise takes the supplied + AST. Then execute the AST. + Return the script exit status. + """ + try: + if scriptpath is not None: + self._env['0'] = os.path.abspath(scriptpath) + + if script is not None: + debug_parsing = ('debug-parsing' in self._debugflags) + cmds, script = pyshyacc.parse(script, True, debug_parsing) + if 'debug-tree' in self._debugflags: + pyshyacc.print_commands(cmds, self._logfile) + self._logfile.flush() + else: + cmds, script = ast, '' + + status = 0 + for cmd in cmds: + try: + status = self.execute(cmd) + except ExitSignal, e: + if sourced: + raise + status = int(e.args[0]) + return status + except ShellError: + self._env['?'] = 1 + raise + if 'debug-utility' in self._debugflags or 'debug-cmd' in self._debugflags: + self.log('returncode ' + str(status)+ '\n') + return status + except CommandNotFound, e: + print >>self._redirs.stderr, str(e) + self._redirs.stderr.flush() + # Command not found by non-interactive shell + # return 127 + raise + except RedirectionError, e: + # TODO: should be handled depending on the utility status + print >>self._redirs.stderr, str(e) + self._redirs.stderr.flush() + # Command not found by non-interactive shell + # return 127 + raise + + def dotcommand(self, env, args): + if len(args) < 1: + raise ShellError('. expects at least one argument') + path = args[0] + if '/' not in path: + found = env.find_in_path(args[0], True) + if found: + path = found[0] + script = file(path).read() + return self.execute_script(script=script, sourced=True) + + def execute(self, token, redirs=None): + """Execute and AST subtree with supplied redirections overriding default + interpreter ones. + Return the exit status. + """ + if not token: + return 0 + + if redirs is None: + redirs = self._redirs + + if isinstance(token, list): + # Commands sequence + res = 0 + for t in token: + res = self.execute(t, redirs) + return res + + type, value = token + status = 0 + if type=='simple_command': + redirs_copy = redirs.clone() + try: + # TODO: define and handle command return values + # TODO: implement set -e + status = self._execute_simple_command(value, redirs_copy) + finally: + redirs_copy.close() + elif type=='pipeline': + status = self._execute_pipeline(value, redirs) + elif type=='and_or': + status = self._execute_and_or(value, redirs) + elif type=='for_clause': + status = self._execute_for_clause(value, redirs) + elif type=='while_clause': + status = self._execute_while_clause(value, redirs) + elif type=='function_definition': + status = self._execute_function_definition(value, redirs) + elif type=='brace_group': + status = self._execute_brace_group(value, redirs) + elif type=='if_clause': + status = self._execute_if_clause(value, redirs) + elif type=='subshell': + status = self.subshell(ast=value.cmds, redirs=redirs) + elif type=='async': + status = self._asynclist(value) + elif type=='redirect_list': + redirs_copy = self.redirect(redirs.clone(), value.redirs) + try: + status = self.execute(value.cmd, redirs_copy) + finally: + redirs_copy.close() + else: + raise NotImplementedError('Unsupported token type ' + type) + + if status < 0: + status = 255 + return status + + def _execute_if_clause(self, if_clause, redirs): + cond_status = self.execute(if_clause.cond, redirs) + if cond_status==0: + return self.execute(if_clause.if_cmds, redirs) + else: + return self.execute(if_clause.else_cmds, redirs) + + def _execute_brace_group(self, group, redirs): + status = 0 + for cmd in group.cmds: + status = self.execute(cmd, redirs) + return status + + def _execute_function_definition(self, fundef, redirs): + self._env.define_function(fundef.name, fundef.body) + return 0 + + def _execute_while_clause(self, while_clause, redirs): + status = 0 + while 1: + cond_status = 0 + for cond in while_clause.condition: + cond_status = self.execute(cond, redirs) + + if cond_status: + break + + for cmd in while_clause.cmds: + status = self.execute(cmd, redirs) + + return status + + def _execute_for_clause(self, for_clause, redirs): + if not is_name(for_clause.name): + raise ShellSyntaxError('%s is not a valid name' % repr(for_clause.name)) + items = mappend(self.expand_token, for_clause.items) + + status = 0 + for item in items: + self._env[for_clause.name] = item + for cmd in for_clause.cmds: + status = self.execute(cmd, redirs) + return status + + def _execute_and_or(self, or_and, redirs): + res = self.execute(or_and.left, redirs) + if (or_and.op=='&&' and res==0) or (or_and.op!='&&' and res!=0): + res = self.execute(or_and.right, redirs) + return res + + def _execute_pipeline(self, pipeline, redirs): + if len(pipeline.commands)==1: + status = self.execute(pipeline.commands[0], redirs) + else: + # Execute all commands one after the other + status = 0 + inpath, outpath = None, None + try: + # Commands inputs and outputs cannot really be plugged as done + # by a real shell. Run commands sequentially and chain their + # input/output throught temporary files. + tmpfd, inpath = tempfile.mkstemp() + os.close(tmpfd) + tmpfd, outpath = tempfile.mkstemp() + os.close(tmpfd) + + inpath = win32_to_unix_path(inpath) + outpath = win32_to_unix_path(outpath) + + for i, cmd in enumerate(pipeline.commands): + call_redirs = redirs.clone() + try: + if i!=0: + call_redirs.add(self, '<', inpath) + if i!=len(pipeline.commands)-1: + call_redirs.add(self, '>', outpath) + + status = self.execute(cmd, call_redirs) + + # Chain inputs/outputs + inpath, outpath = outpath, inpath + finally: + call_redirs.close() + finally: + if inpath: os.remove(inpath) + if outpath: os.remove(outpath) + + if pipeline.reverse_status: + status = int(not status) + self._env['?'] = status + return status + + def _execute_function(self, name, args, interp, env, stdin, stdout, stderr, *others): + assert interp is self + + func = env.get_function(name) + #Set positional parameters + prevargs = None + try: + prevargs = env.set_positional_args(args) + try: + redirs = Redirections(stdin.dup(), stdout.dup(), stderr.dup()) + try: + status = self.execute(func, redirs) + finally: + redirs.close() + except ReturnSignal, e: + status = int(e.args[0]) + env['?'] = status + return status + finally: + #Reset positional parameters + if prevargs is not None: + env.set_positional_args(prevargs) + + def _execute_simple_command(self, token, redirs): + """Can raise ReturnSignal when return builtin is called, ExitSignal when + exit is called, and other shell exceptions upon builtin failures. + """ + debug_command = 'debug-cmd' in self._debugflags + if debug_command: + self.log('word' + repr(token.words) + '\n') + self.log('assigns' + repr(token.assigns) + '\n') + self.log('redirs' + repr(token.redirs) + '\n') + + is_special = None + env = self._env + + try: + # Word expansion + args = [] + for word in token.words: + args += self.expand_token(word) + if is_special is None and args: + is_special = env.is_function(args[0]) or \ + (args[0] in self.COMMANDS and self.COMMANDS[args[0]].is_special) + + if debug_command: + self.log('_execute_simple_command' + str(args) + '\n') + + if not args: + # Redirections happen is a subshell + redirs = redirs.clone() + elif not is_special: + env = self._env.clone() + + # Redirections + self.redirect(redirs, token.redirs) + + # Variables assignments + res = 0 + for type,(k,v) in token.assigns: + status, expanded = self.expand_variable((k,v)) + if status is not None: + res = status + if args: + env.export(k, expanded) + else: + env[k] = expanded + + if args and args[0] in ('.', 'source'): + res = self.dotcommand(env, args[1:]) + elif args: + if args[0] in self.COMMANDS: + command = self.COMMANDS[args[0]] + elif env.is_function(args[0]): + command = Utility(self._execute_function, is_special=True) + else: + if not '/' in args[0].replace('\\', '/'): + cmd = env.find_in_path(args[0]) + if not cmd: + # TODO: test error code on unknown command => 127 + raise CommandNotFound('Unknown command: "%s"' % args[0]) + else: + # Handle commands like '/cygdrive/c/foo.bat' + cmd = cygwin_to_windows_path(args[0]) + if not os.path.exists(cmd): + raise CommandNotFound('%s: No such file or directory' % args[0]) + shebang = resolve_shebang(cmd) + if shebang: + cmd = shebang + else: + cmd = [cmd] + args[0:1] = cmd + command = Utility(builtin.run_command) + + # Command execution + if 'debug-cmd' in self._debugflags: + self.log('redirections ' + str(redirs) + '\n') + + res = command.func(args[0], args[1:], self, env, + redirs.stdin(), redirs.stdout(), + redirs.stderr(), self._debugflags) + + if self._env.has_opt('-x'): + # Trace command execution in shell environment + # BUG: would be hard to reproduce a real shell behaviour since + # the AST is not annotated with source lines/tokens. + self._redirs.stdout().write(' '.join(args)) + + except ReturnSignal: + raise + except ShellError, e: + if is_special or isinstance(e, (ExitSignal, + ShellSyntaxError, ExpansionError)): + raise e + self._redirs.stderr().write(str(e)+'\n') + return 1 + + return res + + def expand_token(self, word): + """Expand a word as specified in [2.6 Word Expansions]. Return the list + of expanded words. + """ + status, wtrees = self._expand_word(word) + return map(pyshlex.wordtree_as_string, wtrees) + + def expand_variable(self, word): + """Return a status code (or None if no command expansion occurred) + and a single word. + """ + status, wtrees = self._expand_word(word, pathname=False, split=False) + words = map(pyshlex.wordtree_as_string, wtrees) + assert len(words)==1 + return status, words[0] + + def expand_here_document(self, word): + """Return the expanded document as a single word. The here document is + assumed to be unquoted. + """ + status, wtrees = self._expand_word(word, pathname=False, + split=False, here_document=True) + words = map(pyshlex.wordtree_as_string, wtrees) + assert len(words)==1 + return words[0] + + def expand_redirection(self, word): + """Return a single word.""" + return self.expand_variable(word)[1] + + def get_env(self): + return self._env + + def _expand_word(self, token, pathname=True, split=True, here_document=False): + wtree = pyshlex.make_wordtree(token[1], here_document=here_document) + + # TODO: implement tilde expansion + def expand(wtree): + """Return a pseudo wordtree: the tree or its subelements can be empty + lists when no value result from the expansion. + """ + status = None + for part in wtree: + if not isinstance(part, list): + continue + if part[0]in ("'", '\\'): + continue + elif part[0] in ('`', '$('): + status, result = self._expand_command(part) + part[:] = result + elif part[0] in ('$', '${'): + part[:] = self._expand_parameter(part, wtree[0]=='"', split) + elif part[0] in ('', '"'): + status, result = expand(part) + part[:] = result + else: + raise NotImplementedError('%s expansion is not implemented' + % part[0]) + # [] is returned when an expansion result in no-field, + # like an empty $@ + wtree = [p for p in wtree if p != []] + if len(wtree) < 3: + return status, [] + return status, wtree + + status, wtree = expand(wtree) + if len(wtree) == 0: + return status, wtree + wtree = pyshlex.normalize_wordtree(wtree) + + if split: + wtrees = self._split_fields(wtree) + else: + wtrees = [wtree] + + if pathname: + wtrees = mappend(self._expand_pathname, wtrees) + + wtrees = map(self._remove_quotes, wtrees) + return status, wtrees + + def _expand_command(self, wtree): + # BUG: there is something to do with backslashes and quoted + # characters here + command = pyshlex.wordtree_as_string(wtree[1:-1]) + status, output = self.subshell_output(command) + return status, ['', output, ''] + + def _expand_parameter(self, wtree, quoted=False, split=False): + """Return a valid wtree or an empty list when no parameter results.""" + # Get the parameter name + # TODO: implement weird expansion rules with ':' + name = pyshlex.wordtree_as_string(wtree[1:-1]) + if not is_name(name) and not is_special_param(name): + raise ExpansionError('Bad substitution "%s"' % name) + # TODO: implement special parameters + if name in ('@', '*'): + args = self._env.get_positional_args() + if len(args) == 0: + return [] + if len(args)<2: + return ['', ''.join(args), ''] + + sep = self._env.get('IFS', '')[:1] + if split and quoted and name=='@': + # Introduce a new token to tell the caller that these parameters + # cause a split as specified in 2.5.2 + return ['@'] + args + [''] + else: + return ['', sep.join(args), ''] + + return ['', self._env.get(name, ''), ''] + + def _split_fields(self, wtree): + def is_empty(split): + return split==['', '', ''] + + def split_positional(quoted): + # Return a list of wtree split according positional parameters rules. + # All remaining '@' groups are removed. + assert quoted[0]=='"' + + splits = [[]] + for part in quoted: + if not isinstance(part, list) or part[0]!='@': + splits[-1].append(part) + else: + # Empty or single argument list were dealt with already + assert len(part)>3 + # First argument must join with the beginning part of the original word + splits[-1].append(part[1]) + # Create double-quotes expressions for every argument after the first + for arg in part[2:-1]: + splits[-1].append('"') + splits.append(['"', arg]) + return splits + + # At this point, all expansions but pathnames have occured. Only quoted + # and positional sequences remain. Thus, all candidates for field splitting + # are in the tree root, or are positional splits ('@') and lie in root + # children. + if not wtree or wtree[0] not in ('', '"'): + # The whole token is quoted or empty, nothing to split + return [wtree] + + if wtree[0]=='"': + wtree = ['', wtree, ''] + + result = [['', '']] + for part in wtree[1:-1]: + if isinstance(part, list): + if part[0]=='"': + splits = split_positional(part) + if len(splits)<=1: + result[-1] += [part, ''] + else: + # Terminate the current split + result[-1] += [splits[0], ''] + result += splits[1:-1] + # Create a new split + result += [['', splits[-1], '']] + else: + result[-1] += [part, ''] + else: + splits = self._env.split_fields(part) + if len(splits)<=1: + # No split + result[-1][-1] += part + else: + # Terminate the current resulting part and create a new one + result[-1][-1] += splits[0] + result[-1].append('') + result += [['', r, ''] for r in splits[1:-1]] + result += [['', splits[-1]]] + result[-1].append('') + + # Leading and trailing empty groups come from leading/trailing blanks + if result and is_empty(result[-1]): + result[-1:] = [] + if result and is_empty(result[0]): + result[:1] = [] + return result + + def _expand_pathname(self, wtree): + """See [2.6.6 Pathname Expansion].""" + if self._env.has_opt('-f'): + return [wtree] + + # All expansions have been performed, only quoted sequences should remain + # in the tree. Generate the pattern by folding the tree, escaping special + # characters when appear quoted + special_chars = '*?[]' + + def make_pattern(wtree): + subpattern = [] + for part in wtree[1:-1]: + if isinstance(part, list): + part = make_pattern(part) + elif wtree[0]!='': + for c in part: + # Meta-characters cannot be quoted + if c in special_chars: + raise GlobError() + subpattern.append(part) + return ''.join(subpattern) + + def pwd_glob(pattern): + cwd = os.getcwd() + os.chdir(self._env['PWD']) + try: + return glob.glob(pattern) + finally: + os.chdir(cwd) + + #TODO: check working directory issues here wrt relative patterns + try: + pattern = make_pattern(wtree) + paths = pwd_glob(pattern) + except GlobError: + # BUG: Meta-characters were found in quoted sequences. The should + # have been used literally but this is unsupported in current glob module. + # Instead we consider the whole tree must be used literally and + # therefore there is no point in globbing. This is wrong when meta + # characters are mixed with quoted meta in the same pattern like: + # < foo*"py*" > + paths = [] + + if not paths: + return [wtree] + return [['', path, ''] for path in paths] + + def _remove_quotes(self, wtree): + """See [2.6.7 Quote Removal].""" + + def unquote(wtree): + unquoted = [] + for part in wtree[1:-1]: + if isinstance(part, list): + part = unquote(part) + unquoted.append(part) + return ''.join(unquoted) + + return ['', unquote(wtree), ''] + + def subshell(self, script=None, ast=None, redirs=None): + """Execute the script or AST in a subshell, with inherited redirections + if redirs is not None. + """ + if redirs: + sub_redirs = redirs + else: + sub_redirs = redirs.clone() + + subshell = None + try: + subshell = Interpreter(None, self._debugflags, self._env.clone(True), + sub_redirs, opts=self._options) + return subshell.execute_script(script, ast) + finally: + if not redirs: sub_redirs.close() + if subshell: subshell.close() + + def subshell_output(self, script): + """Execute the script in a subshell and return the captured output.""" + # Create temporary file to capture subshell output + tmpfd, tmppath = tempfile.mkstemp() + try: + tmpfile = os.fdopen(tmpfd, 'wb') + stdout = FileWrapper('w', tmpfile) + + redirs = Redirections(self._redirs.stdin().dup(), + stdout, + self._redirs.stderr().dup()) + try: + status = self.subshell(script=script, redirs=redirs) + finally: + redirs.close() + redirs = None + + # Extract subshell standard output + tmpfile = open(tmppath, 'rb') + try: + output = tmpfile.read() + return status, output.rstrip('\n') + finally: + tmpfile.close() + finally: + os.remove(tmppath) + + def _asynclist(self, cmd): + args = (self._env.get_variables(), cmd) + arg = encodeargs(args) + assert len(args) < 30*1024 + cmd = ['pysh.bat', '--ast', '-c', arg] + p = subprocess.Popen(cmd, cwd=self._env['PWD']) + self._children[p.pid] = p + self._env['!'] = p.pid + return 0 + + def wait(self, pids=None): + if not pids: + pids = self._children.keys() + + status = 127 + for pid in pids: + if pid not in self._children: + continue + p = self._children.pop(pid) + status = p.wait() + + return status + diff --git a/bitbake/lib/pysh/lsprof.py b/bitbake/lib/pysh/lsprof.py new file mode 100644 index 0000000000..b1831c22a7 --- /dev/null +++ b/bitbake/lib/pysh/lsprof.py @@ -0,0 +1,116 @@ +#! /usr/bin/env python + +import sys +from _lsprof import Profiler, profiler_entry + +__all__ = ['profile', 'Stats'] + +def profile(f, *args, **kwds): + """XXX docstring""" + p = Profiler() + p.enable(subcalls=True, builtins=True) + try: + f(*args, **kwds) + finally: + p.disable() + return Stats(p.getstats()) + + +class Stats(object): + """XXX docstring""" + + def __init__(self, data): + self.data = data + + def sort(self, crit="inlinetime"): + """XXX docstring""" + if crit not in profiler_entry.__dict__: + raise ValueError("Can't sort by %s" % crit) + self.data.sort(lambda b, a: cmp(getattr(a, crit), + getattr(b, crit))) + for e in self.data: + if e.calls: + e.calls.sort(lambda b, a: cmp(getattr(a, crit), + getattr(b, crit))) + + def pprint(self, top=None, file=None, limit=None, climit=None): + """XXX docstring""" + if file is None: + file = sys.stdout + d = self.data + if top is not None: + d = d[:top] + cols = "% 12s %12s %11.4f %11.4f %s\n" + hcols = "% 12s %12s %12s %12s %s\n" + cols2 = "+%12s %12s %11.4f %11.4f + %s\n" + file.write(hcols % ("CallCount", "Recursive", "Total(ms)", + "Inline(ms)", "module:lineno(function)")) + count = 0 + for e in d: + file.write(cols % (e.callcount, e.reccallcount, e.totaltime, + e.inlinetime, label(e.code))) + count += 1 + if limit is not None and count == limit: + return + ccount = 0 + if e.calls: + for se in e.calls: + file.write(cols % ("+%s" % se.callcount, se.reccallcount, + se.totaltime, se.inlinetime, + "+%s" % label(se.code))) + count += 1 + ccount += 1 + if limit is not None and count == limit: + return + if climit is not None and ccount == climit: + break + + def freeze(self): + """Replace all references to code objects with string + descriptions; this makes it possible to pickle the instance.""" + + # this code is probably rather ickier than it needs to be! + for i in range(len(self.data)): + e = self.data[i] + if not isinstance(e.code, str): + self.data[i] = type(e)((label(e.code),) + e[1:]) + if e.calls: + for j in range(len(e.calls)): + se = e.calls[j] + if not isinstance(se.code, str): + e.calls[j] = type(se)((label(se.code),) + se[1:]) + +_fn2mod = {} + +def label(code): + if isinstance(code, str): + return code + try: + mname = _fn2mod[code.co_filename] + except KeyError: + for k, v in sys.modules.items(): + if v is None: + continue + if not hasattr(v, '__file__'): + continue + if not isinstance(v.__file__, str): + continue + if v.__file__.startswith(code.co_filename): + mname = _fn2mod[code.co_filename] = k + break + else: + mname = _fn2mod[code.co_filename] = '<%s>'%code.co_filename + + return '%s:%d(%s)' % (mname, code.co_firstlineno, code.co_name) + + +if __name__ == '__main__': + import os + sys.argv = sys.argv[1:] + if not sys.argv: + print >> sys.stderr, "usage: lsprof.py <script> <arguments...>" + sys.exit(2) + sys.path.insert(0, os.path.abspath(os.path.dirname(sys.argv[0]))) + stats = profile(execfile, sys.argv[0], globals(), locals()) + stats.sort() + stats.pprint() diff --git a/bitbake/lib/pysh/pysh.py b/bitbake/lib/pysh/pysh.py new file mode 100644 index 0000000000..b4e6145b51 --- /dev/null +++ b/bitbake/lib/pysh/pysh.py @@ -0,0 +1,167 @@ +# pysh.py - command processing for pysh. +# +# Copyright 2007 Patrick Mezard +# +# This software may be used and distributed according to the terms +# of the GNU General Public License, incorporated herein by reference. + +import optparse +import os +import sys + +import interp + +SH_OPT = optparse.OptionParser(prog='pysh', usage="%prog [OPTIONS]", version='0.1') +SH_OPT.add_option('-c', action='store_true', dest='command_string', default=None, + help='A string that shall be interpreted by the shell as one or more commands') +SH_OPT.add_option('--redirect-to', dest='redirect_to', default=None, + help='Redirect script commands stdout and stderr to the specified file') +# See utility_command in builtin.py about the reason for this flag. +SH_OPT.add_option('--redirected', dest='redirected', action='store_true', default=False, + help='Tell the interpreter that stdout and stderr are actually the same objects, which is really stdout') +SH_OPT.add_option('--debug-parsing', action='store_true', dest='debug_parsing', default=False, + help='Trace PLY execution') +SH_OPT.add_option('--debug-tree', action='store_true', dest='debug_tree', default=False, + help='Display the generated syntax tree.') +SH_OPT.add_option('--debug-cmd', action='store_true', dest='debug_cmd', default=False, + help='Trace command execution before parameters expansion and exit status.') +SH_OPT.add_option('--debug-utility', action='store_true', dest='debug_utility', default=False, + help='Trace utility calls, after parameters expansions') +SH_OPT.add_option('--ast', action='store_true', dest='ast', default=False, + help='Encoded commands to execute in a subprocess') +SH_OPT.add_option('--profile', action='store_true', default=False, + help='Profile pysh run') + + +def split_args(args): + # Separate shell arguments from command ones + # Just stop at the first argument not starting with a dash. I know, this is completely broken, + # it ignores files starting with a dash or may take option values for command file. This is not + # supposed to happen for now + command_index = len(args) + for i,arg in enumerate(args): + if not arg.startswith('-'): + command_index = i + break + + return args[:command_index], args[command_index:] + + +def fixenv(env): + path = env.get('PATH') + if path is not None: + parts = path.split(os.pathsep) + # Remove Windows utilities from PATH, they are useless at best and + # some of them (find) may be confused with other utilities. + parts = [p for p in parts if 'system32' not in p.lower()] + env['PATH'] = os.pathsep.join(parts) + if env.get('HOME') is None: + # Several utilities, including cvsps, cannot work without + # a defined HOME directory. + env['HOME'] = os.path.expanduser('~') + return env + +def _sh(cwd, shargs, cmdargs, options, debugflags=None, env=None): + if os.environ.get('PYSH_TEXT') != '1': + import msvcrt + for fp in (sys.stdin, sys.stdout, sys.stderr): + msvcrt.setmode(fp.fileno(), os.O_BINARY) + + hgbin = os.environ.get('PYSH_HGTEXT') != '1' + + if debugflags is None: + debugflags = [] + if options.debug_parsing: debugflags.append('debug-parsing') + if options.debug_utility: debugflags.append('debug-utility') + if options.debug_cmd: debugflags.append('debug-cmd') + if options.debug_tree: debugflags.append('debug-tree') + + if env is None: + env = fixenv(dict(os.environ)) + if cwd is None: + cwd = os.getcwd() + + if not cmdargs: + # Nothing to do + return 0 + + ast = None + command_file = None + if options.command_string: + input = cmdargs[0] + if not options.ast: + input += '\n' + else: + args, input = interp.decodeargs(input), None + env, ast = args + cwd = env.get('PWD', cwd) + else: + command_file = cmdargs[0] + arguments = cmdargs[1:] + + prefix = interp.resolve_shebang(command_file, ignoreshell=True) + if prefix: + input = ' '.join(prefix + [command_file] + arguments) + else: + # Read commands from file + f = file(command_file) + try: + # Trailing newline to help the parser + input = f.read() + '\n' + finally: + f.close() + + redirect = None + try: + if options.redirected: + stdout = sys.stdout + stderr = stdout + elif options.redirect_to: + redirect = open(options.redirect_to, 'wb') + stdout = redirect + stderr = redirect + else: + stdout = sys.stdout + stderr = sys.stderr + + # TODO: set arguments to environment variables + opts = interp.Options() + opts.hgbinary = hgbin + ip = interp.Interpreter(cwd, debugflags, stdout=stdout, stderr=stderr, + opts=opts) + try: + # Export given environment in shell object + for k,v in env.iteritems(): + ip.get_env().export(k,v) + return ip.execute_script(input, ast, scriptpath=command_file) + finally: + ip.close() + finally: + if redirect is not None: + redirect.close() + +def sh(cwd=None, args=None, debugflags=None, env=None): + if args is None: + args = sys.argv[1:] + shargs, cmdargs = split_args(args) + options, shargs = SH_OPT.parse_args(shargs) + + if options.profile: + import lsprof + p = lsprof.Profiler() + p.enable(subcalls=True) + try: + return _sh(cwd, shargs, cmdargs, options, debugflags, env) + finally: + p.disable() + stats = lsprof.Stats(p.getstats()) + stats.sort() + stats.pprint(top=10, file=sys.stderr, climit=5) + else: + return _sh(cwd, shargs, cmdargs, options, debugflags, env) + +def main(): + sys.exit(sh()) + +if __name__=='__main__': + main() diff --git a/bitbake/lib/pysh/pyshlex.py b/bitbake/lib/pysh/pyshlex.py new file mode 100644 index 0000000000..b977b5e869 --- /dev/null +++ b/bitbake/lib/pysh/pyshlex.py @@ -0,0 +1,888 @@ +# pyshlex.py - PLY compatible lexer for pysh. +# +# Copyright 2007 Patrick Mezard +# +# This software may be used and distributed according to the terms +# of the GNU General Public License, incorporated herein by reference. + +# TODO: +# - review all "char in 'abc'" snippets: the empty string can be matched +# - test line continuations within quoted/expansion strings +# - eof is buggy wrt sublexers +# - the lexer cannot really work in pull mode as it would be required to run +# PLY in pull mode. It was designed to work incrementally and it would not be +# that hard to enable pull mode. +import re +try: + s = set() + del s +except NameError: + from Set import Set as set + +from ply import lex +from sherrors import * + +class NeedMore(Exception): + pass + +def is_blank(c): + return c in (' ', '\t') + +_RE_DIGITS = re.compile(r'^\d+$') + +def are_digits(s): + return _RE_DIGITS.search(s) is not None + +_OPERATORS = dict([ + ('&&', 'AND_IF'), + ('||', 'OR_IF'), + (';;', 'DSEMI'), + ('<<', 'DLESS'), + ('>>', 'DGREAT'), + ('<&', 'LESSAND'), + ('>&', 'GREATAND'), + ('<>', 'LESSGREAT'), + ('<<-', 'DLESSDASH'), + ('>|', 'CLOBBER'), + ('&', 'AMP'), + (';', 'COMMA'), + ('<', 'LESS'), + ('>', 'GREATER'), + ('(', 'LPARENS'), + (')', 'RPARENS'), +]) + +#Make a function to silence pychecker "Local variable shadows global" +def make_partial_ops(): + partials = {} + for k in _OPERATORS: + for i in range(1, len(k)+1): + partials[k[:i]] = None + return partials + +_PARTIAL_OPERATORS = make_partial_ops() + +def is_partial_op(s): + """Return True if s matches a non-empty subpart of an operator starting + at its first character. + """ + return s in _PARTIAL_OPERATORS + +def is_op(s): + """If s matches an operator, returns the operator identifier. Return None + otherwise. + """ + return _OPERATORS.get(s) + +_RESERVEDS = dict([ + ('if', 'If'), + ('then', 'Then'), + ('else', 'Else'), + ('elif', 'Elif'), + ('fi', 'Fi'), + ('do', 'Do'), + ('done', 'Done'), + ('case', 'Case'), + ('esac', 'Esac'), + ('while', 'While'), + ('until', 'Until'), + ('for', 'For'), + ('{', 'Lbrace'), + ('}', 'Rbrace'), + ('!', 'Bang'), + ('in', 'In'), + ('|', 'PIPE'), +]) + +def get_reserved(s): + return _RESERVEDS.get(s) + +_RE_NAME = re.compile(r'^[0-9a-zA-Z_]+$') + +def is_name(s): + return _RE_NAME.search(s) is not None + +def find_chars(seq, chars): + for i,v in enumerate(seq): + if v in chars: + return i,v + return -1, None + +class WordLexer: + """WordLexer parse quoted or expansion expressions and return an expression + tree. The input string can be any well formed sequence beginning with quoting + or expansion character. Embedded expressions are handled recursively. The + resulting tree is made of lists and strings. Lists represent quoted or + expansion expressions. Each list first element is the opening separator, + the last one the closing separator. In-between can be any number of strings + or lists for sub-expressions. Non quoted/expansion expression can written as + strings or as lists with empty strings as starting and ending delimiters. + """ + + NAME_CHARSET = 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789_' + NAME_CHARSET = dict(zip(NAME_CHARSET, NAME_CHARSET)) + + SPECIAL_CHARSET = '@*#?-$!0' + + #Characters which can be escaped depends on the current delimiters + ESCAPABLE = { + '`': set(['$', '\\', '`']), + '"': set(['$', '\\', '`', '"']), + "'": set(), + } + + def __init__(self, heredoc = False): + # _buffer is the unprocessed input characters buffer + self._buffer = [] + # _stack is empty or contains a quoted list being processed + # (this is the DFS path to the quoted expression being evaluated). + self._stack = [] + self._escapable = None + # True when parsing unquoted here documents + self._heredoc = heredoc + + def add(self, data, eof=False): + """Feed the lexer with more data. If the quoted expression can be + delimited, return a tuple (expr, remaining) containing the expression + tree and the unconsumed data. + Otherwise, raise NeedMore. + """ + self._buffer += list(data) + self._parse(eof) + + result = self._stack[0] + remaining = ''.join(self._buffer) + self._stack = [] + self._buffer = [] + return result, remaining + + def _is_escapable(self, c, delim=None): + if delim is None: + if self._heredoc: + # Backslashes works as if they were double quoted in unquoted + # here-documents + delim = '"' + else: + if len(self._stack)<=1: + return True + delim = self._stack[-2][0] + + escapables = self.ESCAPABLE.get(delim, None) + return escapables is None or c in escapables + + def _parse_squote(self, buf, result, eof): + if not buf: + raise NeedMore() + try: + pos = buf.index("'") + except ValueError: + raise NeedMore() + result[-1] += ''.join(buf[:pos]) + result += ["'"] + return pos+1, True + + def _parse_bquote(self, buf, result, eof): + if not buf: + raise NeedMore() + + if buf[0]=='\n': + #Remove line continuations + result[:] = ['', '', ''] + elif self._is_escapable(buf[0]): + result[-1] += buf[0] + result += [''] + else: + #Keep as such + result[:] = ['', '\\'+buf[0], ''] + + return 1, True + + def _parse_dquote(self, buf, result, eof): + if not buf: + raise NeedMore() + pos, sep = find_chars(buf, '$\\`"') + if pos==-1: + raise NeedMore() + + result[-1] += ''.join(buf[:pos]) + if sep=='"': + result += ['"'] + return pos+1, True + else: + #Keep everything until the separator and defer processing + return pos, False + + def _parse_command(self, buf, result, eof): + if not buf: + raise NeedMore() + + chars = '$\\`"\'' + if result[0] == '$(': + chars += ')' + pos, sep = find_chars(buf, chars) + if pos == -1: + raise NeedMore() + + result[-1] += ''.join(buf[:pos]) + if (result[0]=='$(' and sep==')') or (result[0]=='`' and sep=='`'): + result += [sep] + return pos+1, True + else: + return pos, False + + def _parse_parameter(self, buf, result, eof): + if not buf: + raise NeedMore() + + pos, sep = find_chars(buf, '$\\`"\'}') + if pos==-1: + raise NeedMore() + + result[-1] += ''.join(buf[:pos]) + if sep=='}': + result += [sep] + return pos+1, True + else: + return pos, False + + def _parse_dollar(self, buf, result, eof): + sep = result[0] + if sep=='$': + if not buf: + #TODO: handle empty $ + raise NeedMore() + if buf[0]=='(': + if len(buf)==1: + raise NeedMore() + + if buf[1]=='(': + result[0] = '$((' + buf[:2] = [] + else: + result[0] = '$(' + buf[:1] = [] + + elif buf[0]=='{': + result[0] = '${' + buf[:1] = [] + else: + if buf[0] in self.SPECIAL_CHARSET: + result[-1] = buf[0] + read = 1 + else: + for read,c in enumerate(buf): + if c not in self.NAME_CHARSET: + break + else: + if not eof: + raise NeedMore() + read += 1 + + result[-1] += ''.join(buf[0:read]) + + if not result[-1]: + result[:] = ['', result[0], ''] + else: + result += [''] + return read,True + + sep = result[0] + if sep=='$(': + parsefunc = self._parse_command + elif sep=='${': + parsefunc = self._parse_parameter + else: + raise NotImplementedError() + + pos, closed = parsefunc(buf, result, eof) + return pos, closed + + def _parse(self, eof): + buf = self._buffer + stack = self._stack + recurse = False + + while 1: + if not stack or recurse: + if not buf: + raise NeedMore() + if buf[0] not in ('"\\`$\''): + raise ShellSyntaxError('Invalid quoted string sequence') + stack.append([buf[0], '']) + buf[:1] = [] + recurse = False + + result = stack[-1] + if result[0]=="'": + parsefunc = self._parse_squote + elif result[0]=='\\': + parsefunc = self._parse_bquote + elif result[0]=='"': + parsefunc = self._parse_dquote + elif result[0]=='`': + parsefunc = self._parse_command + elif result[0][0]=='$': + parsefunc = self._parse_dollar + else: + raise NotImplementedError() + + read, closed = parsefunc(buf, result, eof) + + buf[:read] = [] + if closed: + if len(stack)>1: + #Merge in parent expression + parsed = stack.pop() + stack[-1] += [parsed] + stack[-1] += [''] + else: + break + else: + recurse = True + +def normalize_wordtree(wtree): + """Fold back every literal sequence (delimited with empty strings) into + parent sequence. + """ + def normalize(wtree): + result = [] + for part in wtree[1:-1]: + if isinstance(part, list): + part = normalize(part) + if part[0]=='': + #Move the part content back at current level + result += part[1:-1] + continue + elif not part: + #Remove empty strings + continue + result.append(part) + if not result: + result = [''] + return [wtree[0]] + result + [wtree[-1]] + + return normalize(wtree) + + +def make_wordtree(token, here_document=False): + """Parse a delimited token and return a tree similar to the ones returned by + WordLexer. token may contain any combinations of expansion/quoted fields and + non-ones. + """ + tree = [''] + remaining = token + delimiters = '\\$`' + if not here_document: + delimiters += '\'"' + + while 1: + pos, sep = find_chars(remaining, delimiters) + if pos==-1: + tree += [remaining, ''] + return normalize_wordtree(tree) + tree.append(remaining[:pos]) + remaining = remaining[pos:] + + try: + result, remaining = WordLexer(heredoc = here_document).add(remaining, True) + except NeedMore: + raise ShellSyntaxError('Invalid token "%s"') + tree.append(result) + + +def wordtree_as_string(wtree): + """Rewrite an expression tree generated by make_wordtree as string.""" + def visit(node, output): + for child in node: + if isinstance(child, list): + visit(child, output) + else: + output.append(child) + + output = [] + visit(wtree, output) + return ''.join(output) + + +def unquote_wordtree(wtree): + """Fold the word tree while removing quotes everywhere. Other expansion + sequences are joined as such. + """ + def unquote(wtree): + unquoted = [] + if wtree[0] in ('', "'", '"', '\\'): + wtree = wtree[1:-1] + + for part in wtree: + if isinstance(part, list): + part = unquote(part) + unquoted.append(part) + return ''.join(unquoted) + + return unquote(wtree) + + +class HereDocLexer: + """HereDocLexer delimits whatever comes from the here-document starting newline + not included to the closing delimiter line included. + """ + def __init__(self, op, delim): + assert op in ('<<', '<<-') + if not delim: + raise ShellSyntaxError('invalid here document delimiter %s' % str(delim)) + + self._op = op + self._delim = delim + self._buffer = [] + self._token = [] + + def add(self, data, eof): + """If the here-document was delimited, return a tuple (content, remaining). + Raise NeedMore() otherwise. + """ + self._buffer += list(data) + self._parse(eof) + token = ''.join(self._token) + remaining = ''.join(self._buffer) + self._token, self._remaining = [], [] + return token, remaining + + def _parse(self, eof): + while 1: + #Look for first unescaped newline. Quotes may be ignored + escaped = False + for i,c in enumerate(self._buffer): + if escaped: + escaped = False + elif c=='\\': + escaped = True + elif c=='\n': + break + else: + i = -1 + + if i==-1 or self._buffer[i]!='\n': + if not eof: + raise NeedMore() + #No more data, maybe the last line is closing delimiter + line = ''.join(self._buffer) + eol = '' + self._buffer[:] = [] + else: + line = ''.join(self._buffer[:i]) + eol = self._buffer[i] + self._buffer[:i+1] = [] + + if self._op=='<<-': + line = line.lstrip('\t') + + if line==self._delim: + break + + self._token += [line, eol] + if i==-1: + break + +class Token: + #TODO: check this is still in use + OPERATOR = 'OPERATOR' + WORD = 'WORD' + + def __init__(self): + self.value = '' + self.type = None + + def __getitem__(self, key): + #Behave like a two elements tuple + if key==0: + return self.type + if key==1: + return self.value + raise IndexError(key) + + +class HereDoc: + def __init__(self, op, name=None): + self.op = op + self.name = name + self.pendings = [] + +TK_COMMA = 'COMMA' +TK_AMPERSAND = 'AMP' +TK_OP = 'OP' +TK_TOKEN = 'TOKEN' +TK_COMMENT = 'COMMENT' +TK_NEWLINE = 'NEWLINE' +TK_IONUMBER = 'IO_NUMBER' +TK_ASSIGNMENT = 'ASSIGNMENT_WORD' +TK_HERENAME = 'HERENAME' + +class Lexer: + """Main lexer. + + Call add() until the script AST is returned. + """ + # Here-document handling makes the whole thing more complex because they basically + # force tokens to be reordered: here-content must come right after the operator + # and the here-document name, while some other tokens might be following the + # here-document expression on the same line. + # + # So, here-doc states are basically: + # *self._state==ST_NORMAL + # - self._heredoc.op is None: no here-document + # - self._heredoc.op is not None but name is: here-document operator matched, + # waiting for the document name/delimiter + # - self._heredoc.op and name are not None: here-document is ready, following + # tokens are being stored and will be pushed again when the document is + # completely parsed. + # *self._state==ST_HEREDOC + # - The here-document is being delimited by self._herelexer. Once it is done + # the content is pushed in front of the pending token list then all these + # tokens are pushed once again. + ST_NORMAL = 'ST_NORMAL' + ST_OP = 'ST_OP' + ST_BACKSLASH = 'ST_BACKSLASH' + ST_QUOTED = 'ST_QUOTED' + ST_COMMENT = 'ST_COMMENT' + ST_HEREDOC = 'ST_HEREDOC' + + #Match end of backquote strings + RE_BACKQUOTE_END = re.compile(r'(?<!\\)(`)') + + def __init__(self, parent_state = None): + self._input = [] + self._pos = 0 + + self._token = '' + self._type = TK_TOKEN + + self._state = self.ST_NORMAL + self._parent_state = parent_state + self._wordlexer = None + + self._heredoc = HereDoc(None) + self._herelexer = None + + ### Following attributes are not used for delimiting token and can safely + ### be changed after here-document detection (see _push_toke) + + # Count the number of tokens following a 'For' reserved word. Needed to + # return an 'In' reserved word if it comes in third place. + self._for_count = None + + def add(self, data, eof=False): + """Feed the lexer with data. + + When eof is set to True, returns unconsumed data or raise if the lexer + is in the middle of a delimiting operation. + Raise NeedMore otherwise. + """ + self._input += list(data) + self._parse(eof) + self._input[:self._pos] = [] + return ''.join(self._input) + + def _parse(self, eof): + while self._state: + if self._pos>=len(self._input): + if not eof: + raise NeedMore() + elif self._state not in (self.ST_OP, self.ST_QUOTED, self.ST_HEREDOC): + #Delimit the current token and leave cleanly + self._push_token('') + break + else: + #Let the sublexer handle the eof themselves + pass + + if self._state==self.ST_NORMAL: + self._parse_normal() + elif self._state==self.ST_COMMENT: + self._parse_comment() + elif self._state==self.ST_OP: + self._parse_op(eof) + elif self._state==self.ST_QUOTED: + self._parse_quoted(eof) + elif self._state==self.ST_HEREDOC: + self._parse_heredoc(eof) + else: + assert False, "Unknown state " + str(self._state) + + if self._heredoc.op is not None: + raise ShellSyntaxError('missing here-document delimiter') + + def _parse_normal(self): + c = self._input[self._pos] + if c=='\n': + self._push_token(c) + self._token = c + self._type = TK_NEWLINE + self._push_token('') + self._pos += 1 + elif c in ('\\', '\'', '"', '`', '$'): + self._state = self.ST_QUOTED + elif is_partial_op(c): + self._push_token(c) + + self._type = TK_OP + self._token += c + self._pos += 1 + self._state = self.ST_OP + elif is_blank(c): + self._push_token(c) + + #Discard blanks + self._pos += 1 + elif self._token: + self._token += c + self._pos += 1 + elif c=='#': + self._state = self.ST_COMMENT + self._type = TK_COMMENT + self._pos += 1 + else: + self._pos += 1 + self._token += c + + def _parse_op(self, eof): + assert self._token + + while 1: + if self._pos>=len(self._input): + if not eof: + raise NeedMore() + c = '' + else: + c = self._input[self._pos] + + op = self._token + c + if c and is_partial_op(op): + #Still parsing an operator + self._token = op + self._pos += 1 + else: + #End of operator + self._push_token(c) + self._state = self.ST_NORMAL + break + + def _parse_comment(self): + while 1: + if self._pos>=len(self._input): + raise NeedMore() + + c = self._input[self._pos] + if c=='\n': + #End of comment, do not consume the end of line + self._state = self.ST_NORMAL + break + else: + self._token += c + self._pos += 1 + + def _parse_quoted(self, eof): + """Precondition: the starting backquote/dollar is still in the input queue.""" + if not self._wordlexer: + self._wordlexer = WordLexer() + + if self._pos<len(self._input): + #Transfer input queue character into the subparser + input = self._input[self._pos:] + self._pos += len(input) + + wtree, remaining = self._wordlexer.add(input, eof) + self._wordlexer = None + self._token += wordtree_as_string(wtree) + + #Put unparsed character back in the input queue + if remaining: + self._input[self._pos:self._pos] = list(remaining) + self._state = self.ST_NORMAL + + def _parse_heredoc(self, eof): + assert not self._token + + if self._herelexer is None: + self._herelexer = HereDocLexer(self._heredoc.op, self._heredoc.name) + + if self._pos<len(self._input): + #Transfer input queue character into the subparser + input = self._input[self._pos:] + self._pos += len(input) + + self._token, remaining = self._herelexer.add(input, eof) + + #Reset here-document state + self._herelexer = None + heredoc, self._heredoc = self._heredoc, HereDoc(None) + if remaining: + self._input[self._pos:self._pos] = list(remaining) + self._state = self.ST_NORMAL + + #Push pending tokens + heredoc.pendings[:0] = [(self._token, self._type, heredoc.name)] + for token, type, delim in heredoc.pendings: + self._token = token + self._type = type + self._push_token(delim) + + def _push_token(self, delim): + if not self._token: + return 0 + + if self._heredoc.op is not None: + if self._heredoc.name is None: + #Here-document name + if self._type!=TK_TOKEN: + raise ShellSyntaxError("expecting here-document name, got '%s'" % self._token) + self._heredoc.name = unquote_wordtree(make_wordtree(self._token)) + self._type = TK_HERENAME + else: + #Capture all tokens until the newline starting the here-document + if self._type==TK_NEWLINE: + assert self._state==self.ST_NORMAL + self._state = self.ST_HEREDOC + + self._heredoc.pendings.append((self._token, self._type, delim)) + self._token = '' + self._type = TK_TOKEN + return 1 + + # BEWARE: do not change parser state from here to the end of the function: + # when parsing between an here-document operator to the end of the line + # tokens are stored in self._heredoc.pendings. Therefore, they will not + # reach the section below. + + #Check operators + if self._type==TK_OP: + #False positive because of partial op matching + op = is_op(self._token) + if not op: + self._type = TK_TOKEN + else: + #Map to the specific operator + self._type = op + if self._token in ('<<', '<<-'): + #Done here rather than in _parse_op because there is no need + #to change the parser state since we are still waiting for + #the here-document name + if self._heredoc.op is not None: + raise ShellSyntaxError("syntax error near token '%s'" % self._token) + assert self._heredoc.op is None + self._heredoc.op = self._token + + if self._type==TK_TOKEN: + if '=' in self._token and not delim: + if self._token.startswith('='): + #Token is a WORD... a TOKEN that is. + pass + else: + prev = self._token[:self._token.find('=')] + if is_name(prev): + self._type = TK_ASSIGNMENT + else: + #Just a token (unspecified) + pass + else: + reserved = get_reserved(self._token) + if reserved is not None: + if reserved=='In' and self._for_count!=2: + #Sorry, not a reserved word after all + pass + else: + self._type = reserved + if reserved in ('For', 'Case'): + self._for_count = 0 + elif are_digits(self._token) and delim in ('<', '>'): + #Detect IO_NUMBER + self._type = TK_IONUMBER + elif self._token==';': + self._type = TK_COMMA + elif self._token=='&': + self._type = TK_AMPERSAND + elif self._type==TK_COMMENT: + #Comments are not part of sh grammar, ignore them + self._token = '' + self._type = TK_TOKEN + return 0 + + if self._for_count is not None: + #Track token count in 'For' expression to detect 'In' reserved words. + #Can only be in third position, no need to go beyond + self._for_count += 1 + if self._for_count==3: + self._for_count = None + + self.on_token((self._token, self._type)) + self._token = '' + self._type = TK_TOKEN + return 1 + + def on_token(self, token): + raise NotImplementedError + + +tokens = [ + TK_TOKEN, +# To silence yacc unused token warnings +# TK_COMMENT, + TK_NEWLINE, + TK_IONUMBER, + TK_ASSIGNMENT, + TK_HERENAME, +] + +#Add specific operators +tokens += _OPERATORS.values() +#Add reserved words +tokens += _RESERVEDS.values() + +class PLYLexer(Lexer): + """Bridge Lexer and PLY lexer interface.""" + def __init__(self): + Lexer.__init__(self) + self._tokens = [] + self._current = 0 + self.lineno = 0 + + def on_token(self, token): + value, type = token + + self.lineno = 0 + t = lex.LexToken() + t.value = value + t.type = type + t.lexer = self + t.lexpos = 0 + t.lineno = 0 + + self._tokens.append(t) + + def is_empty(self): + return not bool(self._tokens) + + #PLY compliant interface + def token(self): + if self._current>=len(self._tokens): + return None + t = self._tokens[self._current] + self._current += 1 + return t + + +def get_tokens(s): + """Parse the input string and return a tuple (tokens, unprocessed) where + tokens is a list of parsed tokens and unprocessed is the part of the input + string left untouched by the lexer. + """ + lexer = PLYLexer() + untouched = lexer.add(s, True) + tokens = [] + while 1: + token = lexer.token() + if token is None: + break + tokens.append(token) + + tokens = [(t.value, t.type) for t in tokens] + return tokens, untouched diff --git a/bitbake/lib/pysh/pyshyacc.py b/bitbake/lib/pysh/pyshyacc.py new file mode 100644 index 0000000000..3d9510c0c3 --- /dev/null +++ b/bitbake/lib/pysh/pyshyacc.py @@ -0,0 +1,772 @@ +# pyshyacc.py - PLY grammar definition for pysh +# +# Copyright 2007 Patrick Mezard +# +# This software may be used and distributed according to the terms +# of the GNU General Public License, incorporated herein by reference. + +"""PLY grammar file. +""" +import sys + +import pyshlex +tokens = pyshlex.tokens + +from ply import yacc +import sherrors + +class IORedirect: + def __init__(self, op, filename, io_number=None): + self.op = op + self.filename = filename + self.io_number = io_number + +class HereDocument: + def __init__(self, op, name, content, io_number=None): + self.op = op + self.name = name + self.content = content + self.io_number = io_number + +def make_io_redirect(p): + """Make an IORedirect instance from the input 'io_redirect' production.""" + name, io_number, io_target = p + assert name=='io_redirect' + + if io_target[0]=='io_file': + io_type, io_op, io_file = io_target + return IORedirect(io_op, io_file, io_number) + elif io_target[0]=='io_here': + io_type, io_op, io_name, io_content = io_target + return HereDocument(io_op, io_name, io_content, io_number) + else: + assert False, "Invalid IO redirection token %s" % repr(io_type) + +class SimpleCommand: + """ + assigns contains (name, value) pairs. + """ + def __init__(self, words, redirs, assigns): + self.words = list(words) + self.redirs = list(redirs) + self.assigns = list(assigns) + +class Pipeline: + def __init__(self, commands, reverse_status=False): + self.commands = list(commands) + assert self.commands #Grammar forbids this + self.reverse_status = reverse_status + +class AndOr: + def __init__(self, op, left, right): + self.op = str(op) + self.left = left + self.right = right + +class ForLoop: + def __init__(self, name, items, cmds): + self.name = str(name) + self.items = list(items) + self.cmds = list(cmds) + +class WhileLoop: + def __init__(self, condition, cmds): + self.condition = list(condition) + self.cmds = list(cmds) + +class UntilLoop: + def __init__(self, condition, cmds): + self.condition = list(condition) + self.cmds = list(cmds) + +class FunDef: + def __init__(self, name, body): + self.name = str(name) + self.body = body + +class BraceGroup: + def __init__(self, cmds): + self.cmds = list(cmds) + +class IfCond: + def __init__(self, cond, if_cmds, else_cmds): + self.cond = list(cond) + self.if_cmds = if_cmds + self.else_cmds = else_cmds + +class Case: + def __init__(self, name, items): + self.name = name + self.items = items + +class SubShell: + def __init__(self, cmds): + self.cmds = cmds + +class RedirectList: + def __init__(self, cmd, redirs): + self.cmd = cmd + self.redirs = list(redirs) + +def get_production(productions, ptype): + """productions must be a list of production tuples like (name, obj) where + name is the production string identifier. + Return the first production named 'ptype'. Raise KeyError if None can be + found. + """ + for production in productions: + if production is not None and production[0]==ptype: + return production + raise KeyError(ptype) + +#------------------------------------------------------------------------------- +# PLY grammar definition +#------------------------------------------------------------------------------- + +def p_multiple_commands(p): + """multiple_commands : newline_sequence + | complete_command + | multiple_commands complete_command""" + if len(p)==2: + if p[1] is not None: + p[0] = [p[1]] + else: + p[0] = [] + else: + p[0] = p[1] + [p[2]] + +def p_complete_command(p): + """complete_command : list separator + | list""" + if len(p)==3 and p[2] and p[2][1] == '&': + p[0] = ('async', p[1]) + else: + p[0] = p[1] + +def p_list(p): + """list : list separator_op and_or + | and_or""" + if len(p)==2: + p[0] = [p[1]] + else: + #if p[2]!=';': + # raise NotImplementedError('AND-OR list asynchronous execution is not implemented') + p[0] = p[1] + [p[3]] + +def p_and_or(p): + """and_or : pipeline + | and_or AND_IF linebreak pipeline + | and_or OR_IF linebreak pipeline""" + if len(p)==2: + p[0] = p[1] + else: + p[0] = ('and_or', AndOr(p[2], p[1], p[4])) + +def p_maybe_bang_word(p): + """maybe_bang_word : Bang""" + p[0] = ('maybe_bang_word', p[1]) + +def p_pipeline(p): + """pipeline : pipe_sequence + | bang_word pipe_sequence""" + if len(p)==3: + p[0] = ('pipeline', Pipeline(p[2][1:], True)) + else: + p[0] = ('pipeline', Pipeline(p[1][1:])) + +def p_pipe_sequence(p): + """pipe_sequence : command + | pipe_sequence PIPE linebreak command""" + if len(p)==2: + p[0] = ['pipe_sequence', p[1]] + else: + p[0] = p[1] + [p[4]] + +def p_command(p): + """command : simple_command + | compound_command + | compound_command redirect_list + | function_definition""" + + if p[1][0] in ( 'simple_command', + 'for_clause', + 'while_clause', + 'until_clause', + 'case_clause', + 'if_clause', + 'function_definition', + 'subshell', + 'brace_group',): + if len(p) == 2: + p[0] = p[1] + else: + p[0] = ('redirect_list', RedirectList(p[1], p[2][1:])) + else: + raise NotImplementedError('%s command is not implemented' % repr(p[1][0])) + +def p_compound_command(p): + """compound_command : brace_group + | subshell + | for_clause + | case_clause + | if_clause + | while_clause + | until_clause""" + p[0] = p[1] + +def p_subshell(p): + """subshell : LPARENS compound_list RPARENS""" + p[0] = ('subshell', SubShell(p[2][1:])) + +def p_compound_list(p): + """compound_list : term + | newline_list term + | term separator + | newline_list term separator""" + productions = p[1:] + try: + sep = get_production(productions, 'separator') + if sep[1]!=';': + raise NotImplementedError() + except KeyError: + pass + term = get_production(productions, 'term') + p[0] = ['compound_list'] + term[1:] + +def p_term(p): + """term : term separator and_or + | and_or""" + if len(p)==2: + p[0] = ['term', p[1]] + else: + if p[2] is not None and p[2][1] == '&': + p[0] = ['term', ('async', p[1][1:])] + [p[3]] + else: + p[0] = p[1] + [p[3]] + +def p_maybe_for_word(p): + # Rearrange 'For' priority wrt TOKEN. See p_for_word + """maybe_for_word : For""" + p[0] = ('maybe_for_word', p[1]) + +def p_for_clause(p): + """for_clause : for_word name linebreak do_group + | for_word name linebreak in sequential_sep do_group + | for_word name linebreak in wordlist sequential_sep do_group""" + productions = p[1:] + do_group = get_production(productions, 'do_group') + try: + items = get_production(productions, 'in')[1:] + except KeyError: + raise NotImplementedError('"in" omission is not implemented') + + try: + items = get_production(productions, 'wordlist')[1:] + except KeyError: + items = [] + + name = p[2] + p[0] = ('for_clause', ForLoop(name, items, do_group[1:])) + +def p_name(p): + """name : token""" #Was NAME instead of token + p[0] = p[1] + +def p_in(p): + """in : In""" + p[0] = ('in', p[1]) + +def p_wordlist(p): + """wordlist : wordlist token + | token""" + if len(p)==2: + p[0] = ['wordlist', ('TOKEN', p[1])] + else: + p[0] = p[1] + [('TOKEN', p[2])] + +def p_case_clause(p): + """case_clause : Case token linebreak in linebreak case_list Esac + | Case token linebreak in linebreak case_list_ns Esac + | Case token linebreak in linebreak Esac""" + if len(p) < 8: + items = [] + else: + items = p[6][1:] + name = p[2] + p[0] = ('case_clause', Case(name, [c[1] for c in items])) + +def p_case_list_ns(p): + """case_list_ns : case_list case_item_ns + | case_item_ns""" + p_case_list(p) + +def p_case_list(p): + """case_list : case_list case_item + | case_item""" + if len(p)==2: + p[0] = ['case_list', p[1]] + else: + p[0] = p[1] + [p[2]] + +def p_case_item_ns(p): + """case_item_ns : pattern RPARENS linebreak + | pattern RPARENS compound_list linebreak + | LPARENS pattern RPARENS linebreak + | LPARENS pattern RPARENS compound_list linebreak""" + p_case_item(p) + +def p_case_item(p): + """case_item : pattern RPARENS linebreak DSEMI linebreak + | pattern RPARENS compound_list DSEMI linebreak + | LPARENS pattern RPARENS linebreak DSEMI linebreak + | LPARENS pattern RPARENS compound_list DSEMI linebreak""" + if len(p) < 7: + name = p[1][1:] + else: + name = p[2][1:] + + try: + cmds = get_production(p[1:], "compound_list")[1:] + except KeyError: + cmds = [] + + p[0] = ('case_item', (name, cmds)) + +def p_pattern(p): + """pattern : token + | pattern PIPE token""" + if len(p)==2: + p[0] = ['pattern', ('TOKEN', p[1])] + else: + p[0] = p[1] + [('TOKEN', p[2])] + +def p_maybe_if_word(p): + # Rearrange 'If' priority wrt TOKEN. See p_if_word + """maybe_if_word : If""" + p[0] = ('maybe_if_word', p[1]) + +def p_maybe_then_word(p): + # Rearrange 'Then' priority wrt TOKEN. See p_then_word + """maybe_then_word : Then""" + p[0] = ('maybe_then_word', p[1]) + +def p_if_clause(p): + """if_clause : if_word compound_list then_word compound_list else_part Fi + | if_word compound_list then_word compound_list Fi""" + else_part = [] + if len(p)==7: + else_part = p[5] + p[0] = ('if_clause', IfCond(p[2][1:], p[4][1:], else_part)) + +def p_else_part(p): + """else_part : Elif compound_list then_word compound_list else_part + | Elif compound_list then_word compound_list + | Else compound_list""" + if len(p)==3: + p[0] = p[2][1:] + else: + else_part = [] + if len(p)==6: + else_part = p[5] + p[0] = ('elif', IfCond(p[2][1:], p[4][1:], else_part)) + +def p_while_clause(p): + """while_clause : While compound_list do_group""" + p[0] = ('while_clause', WhileLoop(p[2][1:], p[3][1:])) + +def p_maybe_until_word(p): + # Rearrange 'Until' priority wrt TOKEN. See p_until_word + """maybe_until_word : Until""" + p[0] = ('maybe_until_word', p[1]) + +def p_until_clause(p): + """until_clause : until_word compound_list do_group""" + p[0] = ('until_clause', UntilLoop(p[2][1:], p[3][1:])) + +def p_function_definition(p): + """function_definition : fname LPARENS RPARENS linebreak function_body""" + p[0] = ('function_definition', FunDef(p[1], p[5])) + +def p_function_body(p): + """function_body : compound_command + | compound_command redirect_list""" + if len(p)!=2: + raise NotImplementedError('functions redirections lists are not implemented') + p[0] = p[1] + +def p_fname(p): + """fname : TOKEN""" #Was NAME instead of token + p[0] = p[1] + +def p_brace_group(p): + """brace_group : Lbrace compound_list Rbrace""" + p[0] = ('brace_group', BraceGroup(p[2][1:])) + +def p_maybe_done_word(p): + #See p_assignment_word for details. + """maybe_done_word : Done""" + p[0] = ('maybe_done_word', p[1]) + +def p_maybe_do_word(p): + """maybe_do_word : Do""" + p[0] = ('maybe_do_word', p[1]) + +def p_do_group(p): + """do_group : do_word compound_list done_word""" + #Do group contains a list of AndOr + p[0] = ['do_group'] + p[2][1:] + +def p_simple_command(p): + """simple_command : cmd_prefix cmd_word cmd_suffix + | cmd_prefix cmd_word + | cmd_prefix + | cmd_name cmd_suffix + | cmd_name""" + words, redirs, assigns = [], [], [] + for e in p[1:]: + name = e[0] + if name in ('cmd_prefix', 'cmd_suffix'): + for sube in e[1:]: + subname = sube[0] + if subname=='io_redirect': + redirs.append(make_io_redirect(sube)) + elif subname=='ASSIGNMENT_WORD': + assigns.append(sube) + else: + words.append(sube) + elif name in ('cmd_word', 'cmd_name'): + words.append(e) + + cmd = SimpleCommand(words, redirs, assigns) + p[0] = ('simple_command', cmd) + +def p_cmd_name(p): + """cmd_name : TOKEN""" + p[0] = ('cmd_name', p[1]) + +def p_cmd_word(p): + """cmd_word : token""" + p[0] = ('cmd_word', p[1]) + +def p_maybe_assignment_word(p): + #See p_assignment_word for details. + """maybe_assignment_word : ASSIGNMENT_WORD""" + p[0] = ('maybe_assignment_word', p[1]) + +def p_cmd_prefix(p): + """cmd_prefix : io_redirect + | cmd_prefix io_redirect + | assignment_word + | cmd_prefix assignment_word""" + try: + prefix = get_production(p[1:], 'cmd_prefix') + except KeyError: + prefix = ['cmd_prefix'] + + try: + value = get_production(p[1:], 'assignment_word')[1] + value = ('ASSIGNMENT_WORD', value.split('=', 1)) + except KeyError: + value = get_production(p[1:], 'io_redirect') + p[0] = prefix + [value] + +def p_cmd_suffix(p): + """cmd_suffix : io_redirect + | cmd_suffix io_redirect + | token + | cmd_suffix token + | maybe_for_word + | cmd_suffix maybe_for_word + | maybe_done_word + | cmd_suffix maybe_done_word + | maybe_do_word + | cmd_suffix maybe_do_word + | maybe_until_word + | cmd_suffix maybe_until_word + | maybe_assignment_word + | cmd_suffix maybe_assignment_word + | maybe_if_word + | cmd_suffix maybe_if_word + | maybe_then_word + | cmd_suffix maybe_then_word + | maybe_bang_word + | cmd_suffix maybe_bang_word""" + try: + suffix = get_production(p[1:], 'cmd_suffix') + token = p[2] + except KeyError: + suffix = ['cmd_suffix'] + token = p[1] + + if isinstance(token, tuple): + if token[0]=='io_redirect': + p[0] = suffix + [token] + else: + #Convert maybe_* to TOKEN if necessary + p[0] = suffix + [('TOKEN', token[1])] + else: + p[0] = suffix + [('TOKEN', token)] + +def p_redirect_list(p): + """redirect_list : io_redirect + | redirect_list io_redirect""" + if len(p) == 2: + p[0] = ['redirect_list', make_io_redirect(p[1])] + else: + p[0] = p[1] + [make_io_redirect(p[2])] + +def p_io_redirect(p): + """io_redirect : io_file + | IO_NUMBER io_file + | io_here + | IO_NUMBER io_here""" + if len(p)==3: + p[0] = ('io_redirect', p[1], p[2]) + else: + p[0] = ('io_redirect', None, p[1]) + +def p_io_file(p): + #Return the tuple (operator, filename) + """io_file : LESS filename + | LESSAND filename + | GREATER filename + | GREATAND filename + | DGREAT filename + | LESSGREAT filename + | CLOBBER filename""" + #Extract the filename from the file + p[0] = ('io_file', p[1], p[2][1]) + +def p_filename(p): + #Return the filename + """filename : TOKEN""" + p[0] = ('filename', p[1]) + +def p_io_here(p): + """io_here : DLESS here_end + | DLESSDASH here_end""" + p[0] = ('io_here', p[1], p[2][1], p[2][2]) + +def p_here_end(p): + """here_end : HERENAME TOKEN""" + p[0] = ('here_document', p[1], p[2]) + +def p_newline_sequence(p): + # Nothing in the grammar can handle leading NEWLINE productions, so add + # this one with the lowest possible priority relatively to newline_list. + """newline_sequence : newline_list""" + p[0] = None + +def p_newline_list(p): + """newline_list : NEWLINE + | newline_list NEWLINE""" + p[0] = None + +def p_linebreak(p): + """linebreak : newline_list + | empty""" + p[0] = None + +def p_separator_op(p): + """separator_op : COMMA + | AMP""" + p[0] = p[1] + +def p_separator(p): + """separator : separator_op linebreak + | newline_list""" + if len(p)==2: + #Ignore newlines + p[0] = None + else: + #Keep the separator operator + p[0] = ('separator', p[1]) + +def p_sequential_sep(p): + """sequential_sep : COMMA linebreak + | newline_list""" + p[0] = None + +# Low priority TOKEN => for_word conversion. +# Let maybe_for_word be used as a token when necessary in higher priority +# rules. +def p_for_word(p): + """for_word : maybe_for_word""" + p[0] = p[1] + +def p_if_word(p): + """if_word : maybe_if_word""" + p[0] = p[1] + +def p_then_word(p): + """then_word : maybe_then_word""" + p[0] = p[1] + +def p_done_word(p): + """done_word : maybe_done_word""" + p[0] = p[1] + +def p_do_word(p): + """do_word : maybe_do_word""" + p[0] = p[1] + +def p_until_word(p): + """until_word : maybe_until_word""" + p[0] = p[1] + +def p_assignment_word(p): + """assignment_word : maybe_assignment_word""" + p[0] = ('assignment_word', p[1][1]) + +def p_bang_word(p): + """bang_word : maybe_bang_word""" + p[0] = ('bang_word', p[1][1]) + +def p_token(p): + """token : TOKEN + | Fi""" + p[0] = p[1] + +def p_empty(p): + 'empty :' + p[0] = None + +# Error rule for syntax errors +def p_error(p): + msg = [] + w = msg.append + w('%r\n' % p) + w('followed by:\n') + for i in range(5): + n = yacc.token() + if not n: + break + w(' %r\n' % n) + raise sherrors.ShellSyntaxError(''.join(msg)) + +# Build the parser +try: + import pyshtables +except ImportError: + yacc.yacc(tabmodule = 'pyshtables') +else: + yacc.yacc(tabmodule = 'pysh.pyshtables', write_tables = 0, debug = 0) + + +def parse(input, eof=False, debug=False): + """Parse a whole script at once and return the generated AST and unconsumed + data in a tuple. + + NOTE: eof is probably meaningless for now, the parser being unable to work + in pull mode. It should be set to True. + """ + lexer = pyshlex.PLYLexer() + remaining = lexer.add(input, eof) + if lexer.is_empty(): + return [], remaining + if debug: + debug = 2 + return yacc.parse(lexer=lexer, debug=debug), remaining + +#------------------------------------------------------------------------------- +# AST rendering helpers +#------------------------------------------------------------------------------- + +def format_commands(v): + """Return a tree made of strings and lists. Make command trees easier to + display. + """ + if isinstance(v, list): + return [format_commands(c) for c in v] + if isinstance(v, tuple): + if len(v)==2 and isinstance(v[0], str) and not isinstance(v[1], str): + if v[0] == 'async': + return ['AsyncList', map(format_commands, v[1])] + else: + #Avoid decomposing tuples like ('pipeline', Pipeline(...)) + return format_commands(v[1]) + return format_commands(list(v)) + elif isinstance(v, IfCond): + name = ['IfCond'] + name += ['if', map(format_commands, v.cond)] + name += ['then', map(format_commands, v.if_cmds)] + name += ['else', map(format_commands, v.else_cmds)] + return name + elif isinstance(v, ForLoop): + name = ['ForLoop'] + name += [repr(v.name)+' in ', map(str, v.items)] + name += ['commands', map(format_commands, v.cmds)] + return name + elif isinstance(v, AndOr): + return [v.op, format_commands(v.left), format_commands(v.right)] + elif isinstance(v, Pipeline): + name = 'Pipeline' + if v.reverse_status: + name = '!' + name + return [name, format_commands(v.commands)] + elif isinstance(v, SimpleCommand): + name = ['SimpleCommand'] + if v.words: + name += ['words', map(str, v.words)] + if v.assigns: + assigns = [tuple(a[1]) for a in v.assigns] + name += ['assigns', map(str, assigns)] + if v.redirs: + name += ['redirs', map(format_commands, v.redirs)] + return name + elif isinstance(v, RedirectList): + name = ['RedirectList'] + if v.redirs: + name += ['redirs', map(format_commands, v.redirs)] + name += ['command', format_commands(v.cmd)] + return name + elif isinstance(v, IORedirect): + return ' '.join(map(str, (v.io_number, v.op, v.filename))) + elif isinstance(v, HereDocument): + return ' '.join(map(str, (v.io_number, v.op, repr(v.name), repr(v.content)))) + elif isinstance(v, SubShell): + return ['SubShell', map(format_commands, v.cmds)] + else: + return repr(v) + +def print_commands(cmds, output=sys.stdout): + """Pretty print a command tree.""" + def print_tree(cmd, spaces, output): + if isinstance(cmd, list): + for c in cmd: + print_tree(c, spaces + 3, output) + else: + print >>output, ' '*spaces + str(cmd) + + formatted = format_commands(cmds) + print_tree(formatted, 0, output) + + +def stringify_commands(cmds): + """Serialize a command tree as a string. + + Returned string is not pretty and is currently used for unit tests only. + """ + def stringify(value): + output = [] + if isinstance(value, list): + formatted = [] + for v in value: + formatted.append(stringify(v)) + formatted = ' '.join(formatted) + output.append(''.join(['<', formatted, '>'])) + else: + output.append(value) + return ' '.join(output) + + return stringify(format_commands(cmds)) + + +def visit_commands(cmds, callable): + """Visit the command tree and execute callable on every Pipeline and + SimpleCommand instances. + """ + if isinstance(cmds, (tuple, list)): + map(lambda c: visit_commands(c,callable), cmds) + elif isinstance(cmds, (Pipeline, SimpleCommand)): + callable(cmds) diff --git a/bitbake/lib/pysh/sherrors.py b/bitbake/lib/pysh/sherrors.py new file mode 100644 index 0000000000..1d5bd53b3a --- /dev/null +++ b/bitbake/lib/pysh/sherrors.py @@ -0,0 +1,41 @@ +# sherrors.py - shell errors and signals +# +# Copyright 2007 Patrick Mezard +# +# This software may be used and distributed according to the terms +# of the GNU General Public License, incorporated herein by reference. + +"""Define shell exceptions and error codes. +""" + +class ShellError(Exception): + pass + +class ShellSyntaxError(ShellError): + pass + +class UtilityError(ShellError): + """Raised upon utility syntax error (option or operand error).""" + pass + +class ExpansionError(ShellError): + pass + +class CommandNotFound(ShellError): + """Specified command was not found.""" + pass + +class RedirectionError(ShellError): + pass + +class VarAssignmentError(ShellError): + """Variable assignment error.""" + pass + +class ExitSignal(ShellError): + """Exit signal.""" + pass + +class ReturnSignal(ShellError): + """Exit signal.""" + pass
\ No newline at end of file diff --git a/bitbake/lib/pysh/subprocess_fix.py b/bitbake/lib/pysh/subprocess_fix.py new file mode 100644 index 0000000000..46eca22802 --- /dev/null +++ b/bitbake/lib/pysh/subprocess_fix.py @@ -0,0 +1,77 @@ +# subprocess - Subprocesses with accessible I/O streams +# +# For more information about this module, see PEP 324. +# +# This module should remain compatible with Python 2.2, see PEP 291. +# +# Copyright (c) 2003-2005 by Peter Astrand <astrand@lysator.liu.se> +# +# Licensed to PSF under a Contributor Agreement. +# See http://www.python.org/2.4/license for licensing details. + +def list2cmdline(seq): + """ + Translate a sequence of arguments into a command line + string, using the same rules as the MS C runtime: + + 1) Arguments are delimited by white space, which is either a + space or a tab. + + 2) A string surrounded by double quotation marks is + interpreted as a single argument, regardless of white space + contained within. A quoted string can be embedded in an + argument. + + 3) A double quotation mark preceded by a backslash is + interpreted as a literal double quotation mark. + + 4) Backslashes are interpreted literally, unless they + immediately precede a double quotation mark. + + 5) If backslashes immediately precede a double quotation mark, + every pair of backslashes is interpreted as a literal + backslash. If the number of backslashes is odd, the last + backslash escapes the next double quotation mark as + described in rule 3. + """ + + # See + # http://msdn.microsoft.com/library/en-us/vccelng/htm/progs_12.asp + result = [] + needquote = False + for arg in seq: + bs_buf = [] + + # Add a space to separate this argument from the others + if result: + result.append(' ') + + needquote = (" " in arg) or ("\t" in arg) or ("|" in arg) or arg == "" + if needquote: + result.append('"') + + for c in arg: + if c == '\\': + # Don't know if we need to double yet. + bs_buf.append(c) + elif c == '"': + # Double backspaces. + result.append('\\' * len(bs_buf)*2) + bs_buf = [] + result.append('\\"') + else: + # Normal char + if bs_buf: + result.extend(bs_buf) + bs_buf = [] + result.append(c) + + # Add remaining backspaces, if any. + if bs_buf: + result.extend(bs_buf) + + if needquote: + result.extend(bs_buf) + result.append('"') + + return ''.join(result) |