#!/usr/bin/env python3

import sys, os, subprocess, re, shutil

whitelist = (
    # type is supported by dash
    'if type systemctl >/dev/null 2>/dev/null; then',
    'if type systemd-tmpfiles >/dev/null 2>/dev/null; then',
    'type update-rc.d >/dev/null 2>/dev/null; then',
    'command -v',
    # HOSTNAME is set locally
    'buildhistory_single_commit "$CMDLINE" "$HOSTNAME"',
    # False-positive, match is a grep not shell expression
    'grep "^$groupname:[^:]*:[^:]*:\\([^,]*,\\)*$username\\(,[^,]*\\)*"',
    # TODO verify dash's '. script args' behaviour
    '. $target_sdk_dir/${oe_init_build_env_path} $target_sdk_dir >> $LOGFILE'
    )

def is_whitelisted(s):
    for w in whitelist:
        if w in s:
            return True
    return False

SCRIPT_LINENO_RE = re.compile(r' line (\d+) ')
BASHISM_WARNING = re.compile(r'^(possible bashism in.*)$', re.MULTILINE)

def process(filename, function, lineno, script):
    import tempfile

    if not script.startswith("#!"):
        script = "#! /bin/sh\n" + script

    fn = tempfile.NamedTemporaryFile(mode="w+t")
    fn.write(script)
    fn.flush()

    try:
        subprocess.check_output(("checkbashisms.pl", fn.name), universal_newlines=True, stderr=subprocess.STDOUT)
        # No bashisms, so just return
        return
    except subprocess.CalledProcessError as e:
        # TODO check exit code is 1

        # Replace the temporary filename with the function and split it
        output = e.output.replace(fn.name, function)
        if not output or not output.startswith('possible bashism'):
            # Probably starts with or contains only warnings. Dump verbatim
            # with one space indention. Can't do the splitting and whitelist
            # checking below.
            return '\n'.join([filename,
                              ' Unexpected output from checkbashisms.pl'] +
                             [' ' + x for x in output.splitlines()])

        # We know that the first line matches and that therefore the first
        # list entry will be empty - skip it.
        output = BASHISM_WARNING.split(output)[1:]
        # Turn the output into a single string like this:
        # /.../foobar.bb
        #  possible bashism in updatercd_postrm line 2 (type):
        #   if ${@use_updatercd(d)} && type update-rc.d >/dev/null 2>/dev/null; then
        #  ...
        #   ...
        result = []
        # Check the results against the whitelist
        for message, source in zip(output[0::2], output[1::2]):
            if not is_whitelisted(source):
                if lineno is not None:
                    message = SCRIPT_LINENO_RE.sub(lambda m: ' line %d ' % (int(m.group(1)) + int(lineno) - 1),
                                                   message)
                result.append(' ' + message.strip())
                result.extend(['  %s' % x for x in source.splitlines()])
        if result:
            result.insert(0, filename)
            return '\n'.join(result)
        else:
            return None

def get_tinfoil():
    scripts_path = os.path.dirname(os.path.realpath(__file__))
    lib_path = scripts_path + '/lib'
    sys.path = sys.path + [lib_path]
    import scriptpath
    scriptpath.add_bitbake_lib_path()
    import bb.tinfoil
    tinfoil = bb.tinfoil.Tinfoil()
    tinfoil.prepare()
    # tinfoil.logger.setLevel(logging.WARNING)
    return tinfoil

if __name__=='__main__':
    import shutil
    if shutil.which("checkbashisms.pl") is None:
        print("Cannot find checkbashisms.pl on $PATH, get it from https://anonscm.debian.org/cgit/collab-maint/devscripts.git/plain/scripts/checkbashisms.pl")
        sys.exit(1)

    # The order of defining the worker function,
    # initializing the pool and connecting to the
    # bitbake server is crucial, don't change it.
    def func(item):
        (filename, key, lineno), script = item
        return process(filename, key, lineno, script)

    import multiprocessing
    pool = multiprocessing.Pool()

    tinfoil = get_tinfoil()

    # This is only the default configuration and should iterate over
    # recipecaches to handle multiconfig environments
    pkg_pn = tinfoil.cooker.recipecaches[""].pkg_pn

    # TODO: use argparse and have --help
    if len(sys.argv) > 1:
        initial_pns = sys.argv[1:]
    else:
        initial_pns = sorted(pkg_pn)

    pns = set()
    scripts = {}
    print("Generating scripts...")
    for pn in initial_pns:
        for fn in pkg_pn[pn]:
            # There's no point checking multiple BBCLASSEXTENDed variants of the same recipe
            # (at least in general - there is some risk that the variants contain different scripts)
            realfn, _, _ = bb.cache.virtualfn2realfn(fn)
            if realfn not in pns:
                pns.add(realfn)
                data = tinfoil.parse_recipe_file(realfn)
                for key in data.keys():
                    if data.getVarFlag(key, "func") and not data.getVarFlag(key, "python"):
                        script = data.getVar(key, False)
                        if script:
                            filename = data.getVarFlag(key, "filename")
                            lineno = data.getVarFlag(key, "lineno")
                            # There's no point in checking a function multiple
                            # times just because different recipes include it.
                            # We identify unique scripts by file, name, and (just in case)
                            # line number.
                            attributes = (filename or realfn, key, lineno)
                            scripts.setdefault(attributes, script)


    print("Scanning scripts...\n")
    for result in pool.imap(func, scripts.items()):
        if result:
            print(result)
    tinfoil.shutdown()