#!/usr/bin/env python
#
#  File: afl-cov
#
#  Version: 0.3
#
#  Purpose: Perform lcov coverage diff's against each AFL queue file to see
#           new functions and line coverage evolve from an AFL fuzzing cycle.
#
#  Copyright (C) 2015 Michael Rash (mbr@cipherdyne.org)
#
#  License (GNU General Public License):
#
#  This program is free software; you can redistribute it and/or
#  modify it under the terms of the GNU General Public License
#  as published by the Free Software Foundation; either version 2
#  of the License, or (at your option) any later version.
#
#  This program is distributed in the hope that it will be useful,
#  but WITHOUT ANY WARRANTY; without even the implied warranty of
#  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
#  GNU General Public License for more details.
#
#  You should have received a copy of the GNU General Public License
#  along with this program; if not, write to the Free Software
#  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307
#  USA
#

from shutil import rmtree
from sys import argv
import errno
import re
import subprocess
import glob
import string
import argparse
import time
import sys, os

__version__ = '0.3'

WANT_OUTPUT = 1
NO_OUTPUT   = 0

def main():

    exit_success = 0
    exit_failure = 1

    cargs = handle_cmdline()

    if cargs.version:
        print "afl-cov-" + __version__
        return exit_success

    (gpaths, rv) = get_paths(cargs)

    if not rv:
        return exit_failure

    if not validate_args(gpaths, cargs):
        return exit_failure

    if cargs.background:
        run_in_background()

    init_dir(gpaths, cargs)

    write_status(cargs)

    return not process_afl_test_cases(gpaths, cargs)

def run_in_background():
    ### could use the python 'daemon' module, but it isn't always
    ### installed, and we just need a basic backgrounding
    ### capability anyway
    pid = os.fork()
    if (pid < 0):
        print "[*] fork() error, exiting."
        os._exit()
    elif (pid > 0):
        os._exit(0)
    else:
        os.setsid()
    return

def process_afl_test_cases(gpaths, cargs):

    found     = 0
    afl_files = []
    prev_file = ''
    cov_paths = {}
    gcov      = {}
    zero_cov  = {}
    file_num  = 0
    rv        = 1

    while True:

        new_files = []
        tmp_files = import_dir(cargs.afl_fuzzing_dir + '/queue')

        for f in tmp_files:
            if f not in afl_files:
                afl_files.append(f)
                new_files.append(f)

        if cargs.live:
            if is_afl_fuzz_running(cargs):
                if not len(new_files):
                    logr("[-] No new AFL test cases, sleeping for %d seconds" \
                            % cargs.sleep, cargs)
                    time.sleep(cargs.sleep)
                    continue
            else:
                logr("[+] afl-fuzz appears to be stopped...", cargs)
                break

        logr("\n*** Imported %d new test cases from: %s\n" \
                % (len(new_files), (cargs.afl_fuzzing_dir + '/queue')), cargs)

        for f in new_files:

            curr_cycle = get_cycle_num(file_num, cargs)

            logr("[+] AFL test case: %s (%d / %d), cycle: %d" \
                    % (os.path.basename(f), file_num, len(afl_files),
                            curr_cycle), cargs)

            cov_paths = gen_paths(prev_file, f, gpaths, cargs)

            if cargs.coverage_cmd:
                ### execute the command to generate code coverage stats
                ### for the current AFL queue file
                run_cmd(cargs.coverage_cmd.replace('AFL_FILE', f), \
                        cargs, NO_OUTPUT)

                ### collect the code coverage stats
                gen_coverage(gpaths, cov_paths, f, cargs)

                ### extract and write out the coverage result for this
                ### id:NNNNNN* test case
                append_id_delta_cov(gcov, curr_cycle, cov_paths, gpaths, cargs)

                ### diff to the previous code coverage and look for
                ### new lines/functions
                if file_num > 0:
                    zero_cov = coverage_diff(cov_paths, gpaths, prev_file, f, cargs)

                if not cargs.disable_lcov_web and cargs.lcov_web_all:
                    gen_web_cov_report(gpaths, cov_paths, cargs)

            prev_file = f
            file_num += 1

            if cargs.func_search or cargs.line_search:
                if not found:
                    found = search_cov(gpaths, cargs)
                if found and not cargs.live:
                    break

            if cargs.afl_queue_id_limit and file_num > cargs.afl_queue_id_limit:
                logr("[+] queue/ id limit of %d reached..." \
                        % cargs.afl_queue_id_limit, cargs)
                break

        if not cargs.live:
            break

    logr("[+] Processed id:%d / %d test cases.\n" \
            % (file_num-1, len(afl_files)), cargs)

    ### write the final zero coverage and positive coverage reports
    write_zero_cov(gpaths['zero_cov'], zero_cov, cargs)
    write_pos_cov(gpaths['pos_cov'], gcov, cargs)

    if not cargs.disable_lcov_web:
        gen_web_cov_report(gpaths, cov_paths, cargs)
        logr("[+] Final lcov web report: %s" \
                % gpaths['web_dir'] + '/lcov-web-final.html', cargs)
        os.symlink(cov_paths['lcov_web_dir'] + '/index.html',
                gpaths['web_dir'] + '/lcov-web-final.html')

    if not found:
        if cargs.func_search:
            logr("[-] Function '%s' not found..." % cargs.func_search, cargs)
            rv = 0
        elif cargs.line_search:
            logr("[-] Line %s not found..." % cargs.line_search, cargs)
            rv = 0

    return rv

def coverage_diff(cov_paths, gpaths, a, b, cargs):

    (old_zero_cov, old_pos_cov) \
            = extract_coverage(cov_paths['prev_lcov_info_final'], cargs)

    ### with the coverage from the previous lcov results extracted,
    ### we remove associated files unless instructed to keep them
    if not cargs.preserve_all_lcov_files:
        rm_prev_cov_files(cov_paths)

    (new_zero_cov, new_pos_cov) \
            = extract_coverage(cov_paths['lcov_info_final'], cargs)

    ### diff the two dictionaries
    printed_diff_header = 0
    for f in old_zero_cov:
        printed_file = 0
        if f in new_zero_cov:
            for ctype in old_zero_cov[f]:
                for val in sorted(old_zero_cov[f][ctype]):
                    if val not in new_zero_cov[f][ctype]:
                        if not printed_diff_header:
                            logr("\n    Coverage diff %s %s" \
                                    % (os.path.basename(a),
                                            os.path.basename(b)), cargs)
                            pstr = "diff %s -> %s" % (os.path.basename(a),
                                    os.path.basename(b))
                            logr(pstr, cargs)
                            append_file(pstr, cov_paths['diff'])
                            printed_diff_header = 1

                        if not printed_file:
                            pstr = "Src file: " + f
                            logr(pstr, cargs)
                            append_file(pstr, cov_paths['diff'])
                            printed_file = 1

                        pstr = "  New '" + ctype + "' coverage: " + val
                        logr(pstr, cargs)
                        append_file(pstr, cov_paths['diff'])

    if printed_diff_header:
        logr("", cargs)

    return new_zero_cov

def write_zero_cov(cpath, zero_cov, cargs):
    logr("[+] Final zero coverage report: %s" % cpath, cargs)
    cfile = open(cpath, 'w')
    cfile.write("# All functions / lines in this file were never executed by any\n")
    cfile.write("# AFL test case.\n")
    cfile.close()
    write_cov(cpath, zero_cov, cargs)
    return

def write_pos_cov(cpath, gcov, cargs):
    logr("[+] Final positive coverage report: %s" % cpath, cargs)
    cfile = open(cpath, 'w')
    cfile.write("# All functions / lines in this file were executed by at\n")
    cfile.write("# least one AFL test case. See the cov/id-delta-cov file\n")
    cfile.write("# for more information.\n")
    cfile.close()
    write_cov(cpath, gcov, cargs)
    return

def write_cov(cpath, cov, cargs):
    cfile = open(cpath, 'a')
    for f in cov:
        cfile.write("File: %s\n" % f)
        for ctype in sorted(cov[f]):
            if ctype == 'function':
                for val in sorted(cov[f][ctype]):
                    cfile.write("    %s: %s\n" % (ctype, val))
            elif ctype == 'line':
                if cargs.coverage_include_lines:
                    for val in sorted(cov[f][ctype], key=int):
                        cfile.write("    %s: %s\n" % (ctype, val))
    cfile.close()

    return

def rm_prev_cov_files(cov_paths):
    os.remove(cov_paths['prev_lcov_base'])
    os.remove(cov_paths['prev_lcov_info'])
    os.remove(cov_paths['prev_lcov_info_final'])
    return

def write_status(cargs):
    f = open(cargs.afl_fuzzing_dir + '/cov/afl-cov-status', 'w')
    f.write("afl_cov_pid     : %d\n" % os.getpid())
    f.write("afl_cov_version : %s\n" % __version__)
    f.write("command_line    : %s\n" % ' '.join(argv))
    f.close()
    return

def append_file(pstr, path):
    f = open(path, 'a')
    f.write("%s\n" % pstr)
    f.close()
    return

def append_id_delta_cov(gcov, curr_cycle, cov_paths, gpaths, cargs):

    pos_cov = extract_coverage(cov_paths['lcov_info_final'], cargs)[1]

    cfile = open(gpaths['id_delta_cov'], 'a')
    for f in pos_cov:
        if f not in gcov:
            cov_init(f, gcov)
        for ctype in sorted(pos_cov[f]):
            if ctype == 'function':
                for val in sorted(pos_cov[f][ctype]):
                    if val not in gcov[f][ctype]:
                        gcov[f][ctype][val] = ''
                        cfile.write("%s, %s, %s, %s, %s\n" \
                                % (cov_paths['id_file'],
                                    curr_cycle, f, ctype, val))
            elif ctype == 'line':
                if cargs.coverage_include_lines:
                    for val in sorted(pos_cov[f][ctype], key=int):
                        if val not in gcov[f][ctype]:
                            gcov[f][ctype][val] = ''
                            cfile.write("%s, %s, %s, %s, %s\n" \
                                    % (cov_paths['id_file'],
                                        curr_cycle, f, ctype, val))
    cfile.close()

    return

def cov_init(cfile, cov):
    cov[cfile] = {}
    cov[cfile]['function'] = {}
    cov[cfile]['line'] = {}
    return

def extract_coverage(lcov_file, cargs):

    search_rv = 0
    zero_cov  = {}
    pos_cov   = {}

    ### populate old lcov output for functions/lines that were called
    ### zero times
    with open(lcov_file, 'r') as f:
        current_file = ''
        for line in f:
            line = line.strip()

            m = re.search('SF:(\S+)', line)
            if m and m.group(1):
                current_file = m.group(1)
                cov_init(current_file, zero_cov)
                cov_init(current_file, pos_cov)
                continue

            if current_file:
                m = re.search('^FNDA:(\d+),(\S+)', line)
                if m and m.group(2):
                    fcn = m.group(2) + '()'
                    if m.group(1) == '0':
                        ### the function was never called
                        zero_cov[current_file]['function'][fcn] = ''
                    else:
                        pos_cov[current_file]['function'][fcn] = ''
                    continue

                ### look for lines that were never called
                m = re.search('^DA:(\d+),(\d+)', line)
                if m and m.group(1):
                    lnum = m.group(1)
                    if m.group(2) == '0':
                        ### the line was never executed
                        zero_cov[current_file]['line'][lnum] = ''
                    else:
                        pos_cov[current_file]['line'][lnum] = ''

    return zero_cov, pos_cov

def search_cov(gpaths, cargs):

    search_rv = 0

    with open(gpaths['id_delta_cov'], 'r') as f:
        for line in f:
            line = line.strip()
            ### id:NNNNNN*_file, cycle, src_file, cov_type, fcn/line\n")
            [id_file, cycle_num, src_file, cov_type, val] = line.split(', ')

            if cargs.func_search and cov_type == 'function' and val == cargs.func_search:
                if cargs.src_file:
                    if cargs.src_file == src_file:
                        logr("[+] Function '%s' in file: '%s' executed by: '%s', cycle: %s" \
                                % (val, current_file, id_file, cycle_num), cargs)
                        search_rv = 1
                else:
                    logr("[+] Function '%s' executed by: '%s', cycle: %s" \
                            % (val, id_file, cycle_num), cargs)
                    search_rv = 1

            if cargs.src_file == src_file \
                    and cargs.line_search and val == cargs.line_search:
                if cargs.src_file == src_file:
                    logr("[+] Line '%s' in file: '%s' executed by: '%s', cycle: %s" \
                            % (val, current_file, id_file, cycle_num), cargs)
                    search_rv = 1

    return search_rv

def get_cycle_num(id_num, cargs):

    ### default cycle
    cycle_num = 0

    if not os.path.exists(cargs.afl_fuzzing_dir + '/plot_data'):
        return cycle_num

    with open(cargs.afl_fuzzing_dir + '/plot_data') as f:
        for line in f:
            ### unix_time, cycles_done, cur_path, paths_total, pending_total,...
            ### 1427742641, 11, 54, 419, 45, 0, 2.70%, 0, 0, 9, 1645.47
            vals = line.split(', ')
            ### test the id number against the current path
            if vals[2] == str(id_num):
                cycle_num = int(vals[1])
                break

    return cycle_num

def gen_coverage(gpaths, cov_paths, afl_file, cargs):

    run_cmd("lcov --rc lcov_branch_coverage=1" \
            + " --no-checksum --capture --initial" \
            + " --directory " + cargs.code_dir \
            + " --output-file " \
            + cov_paths['lcov_base'], \
            cargs, NO_OUTPUT)

    run_cmd("lcov --rc lcov_branch_coverage=1" \
            + " --no-checksum --capture --directory " \
            + cargs.code_dir + " --output-file " \
            + cov_paths['lcov_info'], \
            cargs, NO_OUTPUT)

    run_cmd("lcov --rc lcov_branch_coverage=1" \
            + " --no-checksum -a " + cov_paths['lcov_base'] \
            + " -a " + cov_paths['lcov_info'] \
            + " --output-file " + cov_paths['lcov_info_final'], \
            cargs, NO_OUTPUT)

    out = run_cmd("lcov --rc lcov_branch_coverage=1" \
            + " --no-checksum -r " + cov_paths['lcov_info'] \
            + " /usr/include/*  --output-file " \
            + cov_paths['lcov_info_final'], cargs, WANT_OUTPUT)

    for line in out.splitlines():
        m = re.search('^\s+(lines\.\..*\:\s.*)', line)
        if m and m.group(1):
            logr("    " + m.group(1), cargs)
        else:
            m = re.search('^\s+(functions\.\..*\:\s.*)', line)
            if m and m.group(1):
                logr("    " + m.group(1), cargs)
            else:
                m = re.search('^\s+(branches\.\..*\:\s.*)', line)
                if m and m.group(1):
                    logr("    " + m.group(1), cargs)
    return

def gen_web_cov_report(gpaths, cov_paths, cargs):
    os.mkdir(cov_paths['lcov_web_dir'])
    run_cmd("genhtml --branch-coverage --output-directory " \
            + cov_paths['lcov_web_dir'] + " " \
            + cov_paths['lcov_info_final'], \
            cargs, NO_OUTPUT)
    return

def is_afl_fuzz_running(cargs):

    found = 0

    with open(cargs.afl_fuzzing_dir + '/fuzzer_stats', 'r') as f:
        for line in f:
            line = line.strip()
            ### fuzzer_pid     : 13238
            m = re.search('fuzzer_pid\s+\:\s+(\d+)', line)
            if m and m.group(1):
                pid = int(m.group(1))
                try:
                    os.kill(pid, 0)
                except OSError as e:
                    if e.errno == errno.EPERM:
                        found = 1
                else:
                    found = 1
                break

    return found

def gen_paths(prev_afl_file, afl_file, gpaths, cargs):

    cov_paths = {}

    basename = os.path.basename(afl_file)

    ### coverage diffs from one ID file to the next
    cov_paths['diff'] = gpaths['diff_dir'] + '/' + basename

    ### current id:NNNNNN* test case file
    cov_paths['id_file'] = basename

    ### web files
    cov_paths['lcov_web_dir'] = gpaths['web_dir'] + '/' + basename

    ### raw lcov files
    cov_paths['lcov_base'] = gpaths['lcov_dir'] + '/' + basename + '.lcov_base'
    cov_paths['lcov_info'] = gpaths['lcov_dir'] + '/' + basename + '.lcov_info'
    cov_paths['lcov_info_final'] = gpaths['lcov_dir'] + '/' \
            + basename + '.lcov_info_final'

    if prev_afl_file:
        cov_paths['prev_lcov_base'] = gpaths['lcov_dir'] + '/' \
                + os.path.basename(prev_afl_file) + '.lcov_base'
        cov_paths['prev_lcov_info'] = gpaths['lcov_dir'] + '/' \
                + os.path.basename(prev_afl_file) + '.lcov_info'
        cov_paths['prev_lcov_info_final'] = gpaths['lcov_dir'] + \
                '/' + os.path.basename(prev_afl_file) + '.lcov_info_final'

    return cov_paths

def run_cmd(cmd, cargs, collect):

    out = ''

    if cargs.verbose:
        logr("    CMD: %s" % cmd, cargs)

    fh = None
    if not cargs.disable_cmd_redirection:
        fh = open(os.devnull, 'w')

    if collect == WANT_OUTPUT:
        out = subprocess.check_output(cmd.split())
    else:
        subprocess.call(cmd, stdin=None,
                stdout=fh, stderr=subprocess.STDOUT, shell=True)

    if not cargs.disable_cmd_redirection:
        fh.close()

    return out

def import_dir(qdir):
    return sorted(glob.glob(qdir + "/id:*"))

def get_paths(cargs):
    gpaths = {}

    if not cargs.afl_fuzzing_dir:
        print "[*] Must specify AFL fuzzing dir with --afl-fuzzing-dir or -d"
        return gpaths, 0

    gpaths['top_dir']  = cargs.afl_fuzzing_dir + '/cov'
    gpaths['web_dir']  = gpaths['top_dir'] + '/web'
    gpaths['lcov_dir'] = gpaths['top_dir'] + '/lcov'
    gpaths['diff_dir'] = gpaths['top_dir'] + '/diff'

    ### summary coverage results
    gpaths['id_delta_cov'] = gpaths['top_dir'] + '/id-delta-cov'
    gpaths['zero_cov']     = gpaths['top_dir'] + '/zero-cov'
    gpaths['pos_cov']      = gpaths['top_dir'] + '/pos-cov'

    return gpaths, 1

def validate_args(gpaths, cargs):

    if cargs.coverage_cmd:
        if 'AFL_FILE' not in cargs.coverage_cmd:
            print "[*] --coverage-cmd must contain AFL_FILE"
            return 0
    else:
        if not cargs.func_search and not cargs.line_search:
            print "[*] Must set --coverage-cmd or --func-search/--line-search"
            return 0

    if cargs.code_dir:
        if not os.path.exists(cargs.code_dir):
            print "[*] --code-dir path does not exist"
            return 0
    else:
        if not cargs.func_search and not cargs.line_search:
            print "[*] Must set --code-dir unless using --func-search " \
                    "against existing afl-cov directory"
            return 0

    if cargs.func_search or cargs.line_search:
        if cargs.func_search and '()' not in cargs.func_search:
            cargs.func_search += '()'
        if not cargs.verbose and not cargs.coverage_cmd:
            cargs.quiet = 1
        if cargs.line_search and not cargs.src_file:
            print "[*] Must set --src-file in --line-search mode"
            return 0

    if not cargs.live and not fuzzing_dir_exists(cargs):
        print "[*] It doesn't look like directory '%s' exists" \
            % (cargs.afl_fuzzing_dir + '/queue')
        return 0

    if os.path.exists(gpaths['top_dir']):
        if not cargs.overwrite:
            if not cargs.func_search and not cargs.line_search:
                print "[*] Existing coverage dir found, use --overwrite to " \
                        "re-calculate coverage"
                return 0

    return 1

def init_dir(gpaths, cargs):
    if cargs.live:
        while not fuzzing_dir_exists(cargs):
            if not cargs.background:
                print "[-] Sleep for %d seconds for AFL fuzzing directory to be created..." \
                        % cargs.sleep
            time.sleep(cargs.sleep)

        ### if we make it here then afl-fuzz is presumably running
        while not is_afl_fuzz_running(cargs):
            if not cargs.background:
                print "[-] Sleep for %d seconds waiting for afl-fuzz to be started...." \
                    % cargs.sleep
            time.sleep(cargs.sleep)

    create_cov_dirs = 0
    if os.path.exists(gpaths['top_dir']):
        if cargs.overwrite:
            rmtree(gpaths['top_dir'])
            create_cov_dirs = 1
    else:
        create_cov_dirs = 1

    if create_cov_dirs:
        for k in ['top_dir', 'web_dir', 'lcov_dir', 'diff_dir']:
            os.mkdir(gpaths[k])

        ### write coverage results in the following format
        cfile = open(gpaths['id_delta_cov'], 'w')
        cfile.write("# id:NNNNNN*_file, cycle, src_file, coverage_type, fcn/line\n")
        cfile.close()

    if not cargs.disable_coverage_init \
            and create_cov_dirs and cargs.coverage_cmd:
        ### reset code coverage counters
        run_cmd("lcov --rc lcov_branch_coverage=1 " \
                + "--no-checksum --zerocounters --directory " \
                + cargs.code_dir, cargs, NO_OUTPUT)

    return 1

def fuzzing_dir_exists(cargs):
    if not os.path.exists(cargs.afl_fuzzing_dir):
        return 0
    if not os.path.exists(cargs.afl_fuzzing_dir + '/queue'):
        return 0
    return 1

def logr(pstr, cargs):
    if not cargs.background and not cargs.quiet:
        print "    " + pstr
    append_file(pstr, cargs.afl_fuzzing_dir + '/cov/afl-cov.log')
    return

def handle_cmdline():

    p = argparse.ArgumentParser()

    p.add_argument("-e", "--coverage-cmd", type=str,
            help="set command to exec (including args, and assumes code coverage support)")
    p.add_argument("-d", "--afl-fuzzing-dir", type=str,
            help="top level AFL fuzzing directory")
    p.add_argument("-c", "--code-dir", type=str,
            help="directory where the code lives (compiled with code coverage support)")
    p.add_argument("-O", "--overwrite", action='store_true',
            help="overwrite existing coverage results", default=False)
    p.add_argument("--disable-cmd-redirection", action='store_true',
            help="disable redirection of command results to /dev/null",
            default=False)
    p.add_argument("--disable-lcov-web", action='store_true',
            help="disable generation of all lcov web code coverage reports",
            default=False)
    p.add_argument("--disable-coverage-init", action='store_true',
            help="disable initialization of code coverage counters at afl-cov startup",
            default=False)
    p.add_argument("--coverage-include-lines", action='store_true',
            help="include lines in zero-coverage status files",
            default=False)
    p.add_argument("--live", action='store_true',
            help="process a live AFL directory, and afl-cov will exit when it appears afl-fuzz has been stopped",
            default=False)
    p.add_argument("--sleep", type=int,
            help="In --live mode, # of seconds to sleep between checking for new queue files",
            default=60)
    p.add_argument("--background", action='store_true',
            help="background mode - if also in --live mode, will exit when the alf-fuzz process is finished",
            default=False)
    p.add_argument("--lcov-web-all", action='store_true',
            help="generate lcov web reports for all id:NNNNNN* files instead of just the last one",
            default=False)
    p.add_argument("--preserve-all-lcov-files", action='store_true',
            help="Keep all lcov files (not usually necessary)",
            default=False)
    p.add_argument("--func-search", type=str,
            help="search for coverage of a specific function")
    p.add_argument("--line-search", type=str,
            help="search for coverage of a specific line number (requires --src-file)")
    p.add_argument("--src-file", type=str,
            help="restrict function or line search to a specfic source file")
    p.add_argument("--afl-queue-id-limit", type=int,
            help="limit the number of id:NNNNNN* files processed in the AFL queue/ directory",
            default=0)
    p.add_argument("-v", "--verbose", action='store_true',
            help="verbose mode", default=False)
    p.add_argument("-V", "--version", action='store_true',
            help="print version and exit", default=False)
    p.add_argument("-q", "--quiet", action='store_true',
            help="quiet mode", default=False)

    return p.parse_args()

if __name__ == "__main__":
    sys.exit(main())
