diff options
author | Chris Ball <chris@printf.net> | 2023-09-03 06:14:16 -0700 |
---|---|---|
committer | Chris Ball <chris@printf.net> | 2023-09-05 01:37:13 -0700 |
commit | 91938d2dfc70b782d6cc40c031b3a18f63d4a6e5 (patch) | |
tree | 8df44099ec6af8dfd9a6637f6d46ccdbed40b7ec | |
parent | 8e8acd0a04b1bd15cee6d934e026cc414a719881 (diff) | |
download | afl++-91938d2dfc70b782d6cc40c031b3a18f63d4a6e5.tar.gz |
Allow config of all experiment params, average across runs
-rw-r--r-- | benchmark/benchmark.py | 270 |
1 files changed, 153 insertions, 117 deletions
diff --git a/benchmark/benchmark.py b/benchmark/benchmark.py index c3f4ecee..e6082855 100644 --- a/benchmark/benchmark.py +++ b/benchmark/benchmark.py @@ -1,161 +1,197 @@ #!/usr/bin/env python3 -# Requires Python 3.6+. -# Author: Chris Ball <chris@printf.net> -# Ported from Marc "van Hauser" Heuse's "benchmark.sh". +# Part of the aflpluslpus project, requires Python 3.7+. +# Author: Chris Ball <chris@printf.net>, ported from Marc "van Hauser" Heuse's "benchmark.sh". import argparse import asyncio -import glob import json import multiprocessing import os +import platform import shutil import sys -from collections import defaultdict +import time +from dataclasses import dataclass from decimal import Decimal - -reset = "\033[0m" -blue = lambda text: f"\033[1;94m{text}{reset}" -gray = lambda text: f"\033[1;90m{text}{reset}" -green = lambda text: f"\033[0;32m{text}{reset}" -red = lambda text: f"\033[0;31m{text}{reset}" - -targets = [ - {"source": "../test-instr.c", "binary": "test-instr"}, - {"source": "../utils/persistent_mode/test-instr.c", "binary": "test-instr-persistent-shmem"}, +from enum import Enum, auto +from pathlib import Path + +blue = lambda text: f"\033[1;94m{text}\033[0m"; gray = lambda text: f"\033[1;90m{text}\033[0m" +green = lambda text: f"\033[0;32m{text}\033[0m"; red = lambda text: f"\033[0;31m{text}\033[0m" +yellow = lambda text: f"\033[0;33m{text}\033[0m" + +class Mode(Enum): + multicore = auto() + singlecore = auto() + +@dataclass +class Target: + source: Path + binary: str + +all_modes = [Mode.singlecore, Mode.multicore] +all_targets = [ + Target(source=Path("../utils/persistent_mode/test-instr.c").resolve(), binary="test-instr-persist-shmem"), + Target(source=Path("../test-instr.c").resolve(), binary="test-instr") ] -modes = ["single-core", "multi-core"] -tree = lambda: defaultdict(tree) # recursive (arbitrary-depth) defaultdict! -results = tree() -between_tests = False -parser = argparse.ArgumentParser() -parser.add_argument("-d", "--debug", action="store_true") +mode_names = [mode.name for mode in all_modes] +target_names = [target.binary for target in all_targets] +cpu_count = multiprocessing.cpu_count() + +parser = argparse.ArgumentParser(formatter_class=argparse.ArgumentDefaultsHelpFormatter) +parser.add_argument("-b", "--basedir", help="directory to use for temp files", type=str, default="/tmp/aflpp-benchmark") +parser.add_argument("-d", "--debug", help="show verbose debugging output", action="store_true") +parser.add_argument("-r", "--runs", help="how many runs to average results over", type=int, default=5) +parser.add_argument("-f", "--fuzzers", help="how many afl-fuzz workers to use", type=int, default=cpu_count) +parser.add_argument("-m", "--mode", help="pick modes", action="append", default=["multicore"], choices=mode_names) +parser.add_argument( + "-t", "--target", help="pick targets", action="append", default=["test-instr-persist-shmem"], choices=target_names +) args = parser.parse_args() -async def clean_up() -> None: - """Remove temporary files.""" - shutil.rmtree("in") +# Really unsatisfying argparse behavior: we want a default and to allow multiple choices, but if there's a manual choice +# it should override the default. Seems like we have to remove the default to get that and have correct help text? +if len(args.target) > 1: args.target = args.target[1:] +if len(args.mode) > 1: args.mode = args.mode[1:] + +targets = [target for target in all_targets if target.binary in args.target] +modes = [mode for mode in all_modes if mode.name in args.mode] +results = {"config": {}, "hardware": {}, "targets": {t.binary: {m.name: {} for m in modes} for t in targets}} +debug = lambda text: args.debug and print(blue(text)) +if Mode.multicore in modes: + print(blue(f" [*] Using {args.fuzzers} fuzzers for multicore fuzzing "), end="") + print(blue("(use --fuzzers to override)" if args.fuzzers == cpu_count else f"(the default is {cpu_count})")) + +async def clean_up_tempfiles() -> None: + shutil.rmtree(f"{args.basedir}/in") for target in targets: - os.remove(target["binary"]) + Path(target.binary).unlink() for mode in modes: - for outdir in glob.glob(f"/tmp/out-{mode}-{target['binary']}*"): - shutil.rmtree(outdir) + shutil.rmtree(f"{args.basedir}/out-{mode.name}-{target.binary}") + +async def check_afl_persistent() -> bool: + with open("/proc/cmdline", "r") as cpuinfo: + return "mitigations=off" in cpuinfo.read().split(" ") + +async def check_afl_system() -> bool: + sysctl = next((s for s in ["sysctl", "/sbin/sysctl"] if shutil.which(s)), None) + if sysctl: + (returncode, stdout, _) = await run_command([sysctl, "kernel.randomize_va_space"], None) + return returncode == 0 and stdout.decode().rstrip().split(" = ")[1] == "0" + return False async def check_deps() -> None: - """Check if the necessary files exist and are executable.""" - if not (os.access("../afl-fuzz", os.X_OK) and os.access("../afl-cc", os.X_OK) and os.path.exists("../SanitizerCoveragePCGUARD.so")): - sys.exit(f'{red(" [*] Error: you need to compile AFL++ first, we need afl-fuzz, afl-clang-fast and SanitizerCoveragePCGUARD.so built.")}') + """Checks for dependencies, platform, performance.""" + plat = platform.system() + if not plat == "Linux": sys.exit(red(f" [*] Error: Your platform '{plat}' is not supported by this script yet.")) + if not os.access(Path("../afl-fuzz").resolve(), os.X_OK) and os.access(Path("../afl-cc").resolve(), os.X_OK) and ( + os.path.exists(Path("../SanitizerCoveragePCGUARD.so").resolve() + )): + sys.exit(red(" [*] Compile AFL++: we need afl-fuzz, afl-clang-fast and SanitizerCoveragePCGUARD.so built.")) + + # Pick some sample settings from afl-{persistent,system}-config to try to see whether they were run. + cmd_checks = {"afl-persistent-config": check_afl_persistent, "afl-system-config": check_afl_system} + for cmd, checker in cmd_checks.items(): + results["config"][cmd] = await checker() + if not results["config"][cmd]: + print(yellow(f" [*] {cmd} was not run. You can run it to improve performance (and decrease security).")) async def prep_env() -> dict: - # Unset AFL_* environment variables - for e in list(os.environ.keys()): - if e.startswith("AFL_"): - os.environ.pop(e) - # Create input directory and file - os.makedirs("in", exist_ok=True) - with open("in/in.txt", "wb") as f: - f.write(b"\x00" * 10240) - # Rest of env - AFL_PATH = os.path.abspath("../") - os.environ["PATH"] = AFL_PATH + ":" + os.environ["PATH"] + """Unset AFL_* environment variables, create corpus dir and file, provide env vars for fuzzing.""" + Path(args.basedir).mkdir(exist_ok=True) + Path(f"{args.basedir}/in").mkdir(exist_ok=True) + with open(f"{args.basedir}/in/in.txt", "wb") as seed: seed.write(b"\x00" * 10240) return { - "AFL_BENCH_JUST_ONE": "1", - "AFL_DISABLE_TRIM": "1", - "AFL_I_DONT_CARE_ABOUT_MISSING_CRASHES": "1", - "AFL_NO_UI": "1", - "AFL_TRY_AFFINITY": "1", - "PATH": f"{AFL_PATH}:{os.environ['PATH']}", + "AFL_BENCH_JUST_ONE": "1", "AFL_DISABLE_TRIM": "1", "AFL_I_DONT_CARE_ABOUT_MISSING_CRASHES": "1", + "AFL_NO_UI": "1", "AFL_TRY_AFFINITY": "1", "PATH": str(Path("../").resolve()), } async def compile_target(source: str, binary: str) -> None: (returncode, stdout, stderr) = await run_command( - ["afl-cc", "-o", binary, source], - env={"AFL_INSTRUMENT": "PCGUARD", "PATH": os.environ["PATH"]}, + [Path("../afl-cc").resolve(), "-o", binary, source], env={"AFL_INSTRUMENT": "PCGUARD"} + ) + if returncode != 0: sys.exit(red(f" [*] Error: afl-cc is unable to compile: {stderr} {stdout}")) + +async def run_command(cmd: str, env: dict) -> (int | None, bytes, bytes): + debug(f"Launching command: {cmd} with env {env}") + p = await asyncio.create_subprocess_exec( + *cmd, stdout=asyncio.subprocess.PIPE, stderr=asyncio.subprocess.PIPE, env=env ) - if returncode != 0: - sys.exit(f'{red(f" [*] Error: afl-cc is unable to compile: {stderr} {stdout}")}') - -async def cool_down() -> None: - """Avoid the next test run's results being contaminated by e.g. thermal limits hit on this one.""" - global between_tests - if between_tests: - print(f'{blue("Taking a five second break to stay cool between tests.")}') - await asyncio.sleep(10) - else: - between_tests = True - -async def run_command(cmd, env) -> (int | None, bytes, bytes): - if args.debug: - print(blue(f"Launching command: {cmd} with env {env}")) - p = await asyncio.create_subprocess_exec(*cmd, stdout=asyncio.subprocess.PIPE, stderr=asyncio.subprocess.PIPE, env=env) stdout, stderr = await p.communicate() + debug(f"Output: {stdout.decode()} {stderr.decode()}") return (p.returncode, stdout, stderr) async def colon_value_or_none(filename: str, searchKey: str) -> str | None: - """Read a value (e.g. 'cpu MHz : 4976.109') given its filename and key.""" + """Return a colon-separated value given a key in a file, e.g. 'cpu MHz : 4976.109')""" with open(filename, "r") as fh: - for line in fh: - kv = line.split(": ", 1) - if kv and len(kv) == 2: - (key, value) = kv - key = key.strip() - value = value.strip() - if key == searchKey: - return value - return None + kv_pairs = (line.split(": ", 1) for line in fh if ": " in line) + return next((v.rstrip() for k, v in kv_pairs if k.rstrip() == searchKey), None) async def save_benchmark_results() -> None: - """We want a consistent JSON file, so read in the existing one, append, and replace.""" - with open("benchmark-results.json", "r+") as jsonfile: - current_benchmarks = json.load(jsonfile) - current_benchmarks.append(results) - jsonfile.seek(0) - jsonfile.write(json.dumps(current_benchmarks, indent=2)) - jsonfile.truncate() - print(json.dumps(results, indent=2)) + """Append a single row to the benchmark results in JSON Lines format (simple to write and to diff).""" + with open("benchmark-results.jsonl", "a") as jsonfile: + json.dump(results, jsonfile, sort_keys=True) + jsonfile.write("\n") + print(blue(f" [*] Results have been written to {jsonfile.name}")) async def main() -> None: - print(f'{gray(" [*] Preparing environment")}') - # Remove stale files, if necessary. + print(" [*] Preparing environment") try: - await clean_up() + await clean_up_tempfiles() except FileNotFoundError: pass await check_deps() + # Only record the first core's speed for now, even though it can vary between cores. + results["hardware"]["cpu_mhz"] = float(await colon_value_or_none("/proc/cpuinfo", "cpu MHz")) + results["hardware"]["cpu_model"] = await colon_value_or_none("/proc/cpuinfo", "model name") + results["hardware"]["cpu_threads"] = cpu_count env_vars = await prep_env() - cpu_count = multiprocessing.cpu_count() - results["cpu_model"] = await colon_value_or_none("/proc/cpuinfo", "model name") - results["cpu_mhz"] = await colon_value_or_none("/proc/cpuinfo", "cpu MHz") - - print(f'{gray(" [*] Ready, starting benchmark - this will take approx 1-2 minutes...")}') + print(f" [*] Ready, starting benchmark...") for target in targets: - await compile_target(target["source"], target["binary"]) + (source, binary) = [target.source, target.binary] + await compile_target(source, binary) for mode in modes: - await cool_down() - print(f" [*] {mode} {target['binary']} benchmark starting, execs/s: ", end="", flush=True) - if mode == "single-core": - cpus = [0] - elif mode == "multi-core": - cpus = range(0, cpu_count) - basedir = f"/tmp/out-{mode}-{target['binary']}-" - cmd = [["afl-fuzz", "-i", "in", "-o", f"{basedir}{cpu}", "-M", f"{cpu}", "-s", "123", "-D", f"./{target['binary']}"] for cpu in cpus] - - # Here's where we schedule the tasks, and then block waiting for them to finish. - tasks = [run_command(cmd[cpu], env_vars) for cpu in cpus] - output = await asyncio.gather(*tasks) - - if args.debug: - for (_, stdout, stderr) in output: - print(blue(f"Output: {stdout.decode()} {stderr.decode()}")) - execs = sum([Decimal(await colon_value_or_none(f"{basedir}{cpu}/{cpu}/fuzzer_stats", "execs_per_sec")) for cpu in cpus]) - print(green(execs)) - results["targets"][target["binary"]][mode]["execs_per_second"] = str(execs) - results["targets"][target["binary"]][mode]["cores_used"] = len(cpus) - - print("\nComparison: (note that values can change by 10-20% per run)") - with open("COMPARISON", "r") as f: - print(f.read()) - await clean_up() + execs_per_sec, execs_total, run_time_total = ([] for _ in range(3)) + for run in range(0, args.runs): + print(gray(f" [*] {mode.name} {binary} run {run+1} of {args.runs}, execs/s: "), end="", flush=True) + fuzzers = range(0, args.fuzzers if mode == Mode.multicore else 1) + outdir = f"{args.basedir}/out-{mode.name}-{binary}" + cmds = [] + for (idx, afl) in enumerate(fuzzers): + name = ["-o", outdir, "-M" if idx == 0 else "-S", str(afl)] + cmds.append(["afl-fuzz", "-i", f"{args.basedir}/in"] + name + ["-s", "123", "-D", f"./{binary}"]) + + # Prepare the afl-fuzz tasks, and then block here while waiting for them to finish. + tasks = [run_command(cmds[cpu], env_vars) for cpu in fuzzers] + start = time.time() + await asyncio.gather(*tasks) + end = time.time() + + # Our score is the sum of all execs_per_sec entries in fuzzer_stats files for the run. + tasks = [colon_value_or_none(f"{outdir}/{afl}/fuzzer_stats", "execs_per_sec") for afl in fuzzers] + all_execs_per_sec = await asyncio.gather(*tasks) + execs = sum([Decimal(count) for count in all_execs_per_sec if count is not None]) + print(green(execs)) + execs_per_sec.append(execs) + + # Also gather execs_total and total_run_time for this run. + tasks = [colon_value_or_none(f"{outdir}/{afl}/fuzzer_stats", "execs_done") for afl in fuzzers] + all_execs_total = await asyncio.gather(*tasks) + execs_total.append(sum([Decimal(count) for count in all_execs_total if count is not None])) + run_time_total.append(Decimal(end - start)) + + total_run_time = round(Decimal(sum(run_time_total)), 2) + avg_score = round(Decimal(sum(execs_per_sec) / len(execs_per_sec)), 2) + results["targets"][binary][mode.name] = { + "execs_per_second": float(avg_score), + "execs_total": int(sum([Decimal(execs) for execs in execs_total])), + "fuzzers_used": len(fuzzers), + "total_run_time": float(total_run_time), + } + print(f" [*] Average score for this test across all runs was: {green(avg_score)}") + if (((max(execs_per_sec) - min(execs_per_sec)) / avg_score) * 100) > 15: + print(yellow(" [*] The difference between your slowest and fastest runs was >15%, maybe try again?")) + await clean_up_tempfiles() await save_benchmark_results() if __name__ == "__main__": |