about summary refs log tree commit diff
path: root/benchmark/benchmark.py
diff options
context:
space:
mode:
authorAlexander Shvedov <60114847+a-shvedov@users.noreply.github.com>2024-05-30 10:43:01 +0300
committerGitHub <noreply@github.com>2024-05-30 10:43:01 +0300
commitf8a5f1cd9ea907654f42fa06ce6b6bfd4b8c1b13 (patch)
tree7aec2a095a30ed609ce96f85ec3c4e0a8b8eb74c /benchmark/benchmark.py
parent629edb1e78d791894ce9ee6d53259f95fe1a29af (diff)
parente7d871c8bf64962a658e447b90a1a3b43aaddc28 (diff)
downloadafl++-f8a5f1cd9ea907654f42fa06ce6b6bfd4b8c1b13.tar.gz
Merge branch 'AFLplusplus:stable' into stable
Diffstat (limited to 'benchmark/benchmark.py')
-rwxr-xr-xbenchmark/benchmark.py281
1 files changed, 281 insertions, 0 deletions
diff --git a/benchmark/benchmark.py b/benchmark/benchmark.py
new file mode 100755
index 00000000..fffb4a3a
--- /dev/null
+++ b/benchmark/benchmark.py
@@ -0,0 +1,281 @@
+#!/usr/bin/env python3
+# Part of the aflplusplus project, requires Python 3.8+.
+# Author: Chris Ball <chris@printf.net>, ported from Marc "van Hauser" Heuse's "benchmark.sh".
+import argparse, asyncio, json, multiprocessing, os, platform, re, shutil, sys
+from dataclasses import asdict, dataclass
+from decimal import Decimal
+from enum import Enum, auto
+from pathlib import Path
+from typing import Dict, List, Optional, Tuple
+
+blue   = lambda text: f"\033[1;94m{text}\033[0m"; gray = lambda text: f"\033[1;90m{text}\033[0m"
+green  = lambda text: f"\033[0;32m{text}\033[0m"; red  = lambda text: f"\033[0;31m{text}\033[0m"
+yellow = lambda text: f"\033[0;33m{text}\033[0m"
+
+class Mode(Enum):
+    multicore  = auto()
+    singlecore = auto()
+
+@dataclass
+class Target:
+    source: Path
+    binary: Path
+
+@dataclass
+class Run:
+    execs_per_sec: float
+    execs_total: float
+    fuzzers_used: int
+
+@dataclass
+class Config:
+    afl_persistent_config: bool
+    afl_system_config: bool
+    afl_version: Optional[str]
+    comment: str
+    compiler: str
+    target_arch: str
+
+@dataclass
+class Hardware:
+    cpu_fastest_core_mhz: float
+    cpu_model: str
+    cpu_threads: int
+
+@dataclass
+class Results:
+    config: Optional[Config]
+    hardware: Optional[Hardware]
+    targets: Dict[str, Dict[str, Optional[Run]]]
+
+all_modes = [Mode.singlecore, Mode.multicore]
+all_targets = [
+    Target(source=Path("../utils/persistent_mode/test-instr.c").resolve(), binary=Path("test-instr-persist-shmem")),
+    Target(source=Path("../test-instr.c").resolve(), binary=Path("test-instr"))
+]
+modes = [mode.name for mode in all_modes]
+targets = [str(target.binary) for target in all_targets]
+cpu_count = multiprocessing.cpu_count()
+env_vars = {
+    "AFL_DISABLE_TRIM": "1", "AFL_I_DONT_CARE_ABOUT_MISSING_CRASHES": "1", "AFL_FAST_CAL": "1",
+    "AFL_NO_UI": "1", "AFL_TRY_AFFINITY": "1", "PATH": f'{str(Path("../").resolve())}:{os.environ["PATH"]}',
+}
+
+parser = argparse.ArgumentParser(formatter_class=argparse.ArgumentDefaultsHelpFormatter)
+parser.add_argument("-b", "--basedir", help="directory to use for temp files", type=str, default="/tmp/aflpp-benchmark")
+parser.add_argument("-d", "--debug", help="show verbose debugging output", action="store_true")
+parser.add_argument("-r", "--runs", help="how many runs to average results over", type=int, default=3)
+parser.add_argument("-f", "--fuzzers", help="how many afl-fuzz workers to use", type=int, default=cpu_count)
+parser.add_argument("-m", "--mode", help="pick modes", action="append", default=modes, choices=modes)
+parser.add_argument("-c", "--comment", help="add a comment about your setup", type=str, default="")
+parser.add_argument("--cpu", help="override the detected CPU model name", type=str, default="")
+parser.add_argument("--mhz", help="override the detected CPU MHz", type=str, default="")
+parser.add_argument(
+    "-t", "--target", help="pick targets", action="append", default=["test-instr-persist-shmem"], choices=targets
+)
+args = parser.parse_args()
+# Really unsatisfying argparse behavior: we want a default and to allow multiple choices, but if there's a manual choice
+# it should override the default.  Seems like we have to remove the default to get that and have correct help text?
+if len(args.target) > 1:
+    args.target = args.target[1:]
+if len(args.mode) > 2:
+    args.mode = args.mode[2:]
+
+chosen_modes = [mode for mode in all_modes if mode.name in args.mode]
+chosen_targets = [target for target in all_targets if str(target.binary) in args.target]
+results = Results(config=None, hardware=None, targets={
+    str(t.binary): {m.name: None for m in chosen_modes} for t in chosen_targets}
+)
+debug = lambda text: args.debug and print(blue(text))
+
+async def clean_up_tempfiles() -> None:
+    shutil.rmtree(f"{args.basedir}/in")
+    for target in chosen_targets:
+        target.binary.unlink()
+        for mode in chosen_modes:
+            shutil.rmtree(f"{args.basedir}/out-{mode.name}-{str(target.binary)}")
+
+async def check_afl_persistent() -> bool:
+    with open("/proc/cmdline", "r") as cmdline:
+        return "mitigations=off" in cmdline.read().strip().split(" ")
+
+async def check_afl_system() -> bool:
+    sysctl = next((s for s in ["sysctl", "/sbin/sysctl"] if shutil.which(s)), None)
+    if sysctl:
+        (returncode, stdout, _) = await run_command([sysctl, "kernel.randomize_va_space"])
+        return returncode == 0 and stdout.decode().rstrip().split(" = ")[1] == "0"
+    return False
+
+async def prep_env() -> None:
+    Path(f"{args.basedir}/in").mkdir(exist_ok=True, parents=True)
+    with open(f"{args.basedir}/in/in.txt", "wb") as seed:
+        seed.write(b"\x00" * 10240)
+
+async def compile_target(source: Path, binary: Path) -> None:
+    print(f" [*] Compiling the {binary} fuzzing harness for the benchmark to use.")
+    (returncode, stdout, stderr) = await run_command(
+        [str(Path("../afl-clang-lto").resolve()), "-o", str(Path(binary.resolve())), str(Path(source).resolve())]
+    )
+    if returncode == 0:
+        return
+    print(yellow(f" [*] afl-clang-lto was unable to compile; falling back to afl-cc."))
+    (returncode, stdout, stderr) = await run_command(
+        [str(Path("../afl-cc").resolve()), "-o", str(Path(binary.resolve())), str(Path(source).resolve())]
+    )
+    if returncode != 0:
+        sys.exit(red(f" [*] Error: afl-cc is unable to compile: {stderr.decode()} {stdout.decode()}"))
+
+async def run_command(cmd: List[str]) -> Tuple[Optional[int], bytes, bytes]:
+    debug(f"Launching command: {cmd} with env {env_vars}")
+    p = await asyncio.create_subprocess_exec(
+        *cmd, stdout=asyncio.subprocess.PIPE, stderr=asyncio.subprocess.PIPE, env=env_vars
+    )
+    stdout, stderr = await p.communicate()
+    debug(f"Output: {stdout.decode()} {stderr.decode()}")
+    return (p.returncode, stdout, stderr)
+
+async def check_deps() -> None:
+    if not (plat := platform.system()) == "Linux": sys.exit(red(f" [*] {plat} is not supported by this script yet."))
+    if not os.access(Path("../afl-fuzz").resolve(), os.X_OK) and os.access(Path("../afl-cc").resolve(), os.X_OK) and (
+        os.path.exists(Path("../SanitizerCoveragePCGUARD.so").resolve())):
+        sys.exit(red(" [*] Compile AFL++: we need afl-fuzz, afl-clang-fast and SanitizerCoveragePCGUARD.so built."))
+
+    (returncode, stdout, stderr) = await run_command([str(Path("../afl-cc").resolve()), "-v"])
+    if returncode != 0:
+        sys.exit(red(f" [*] Error: afl-cc -v returned: {stderr.decode()} {stdout.decode()}"))
+    compiler = ""
+    target_arch = ""
+    for line in stderr.decode().split("\n"):
+        if "clang version" in line:
+            compiler = line
+        elif m := re.match(r"^Target: (.*)", line):
+            target_arch = m.group(1)
+
+    # Pick some sample settings from afl-{persistent,system}-config to try to see whether they were run.
+    afl_pc = await check_afl_persistent()
+    afl_sc = await check_afl_system()
+    if not afl_pc:
+        print(yellow(f" [*] afl-persistent-config did not run; run it to improve performance (and decrease security)."))
+    if not afl_sc:
+        print(yellow(f" [*] afl-system-config did not run; run it to improve performance (and decrease security)."))
+    results.config = Config(afl_persistent_config=afl_pc, afl_system_config=afl_sc, afl_version="",
+                            comment=args.comment, compiler=compiler, target_arch=target_arch)
+
+async def colon_values(filename: str, searchKey: str) -> List[str]:
+    """Return a colon-separated value given a key in a file, e.g. 'cpu MHz         : 4976.109')"""
+    with open(filename, "r") as fh:
+        kv_pairs = (line.split(": ", 1) for line in fh if ": " in line)
+        v_list = [v.rstrip() for k, v in kv_pairs if k.rstrip() == searchKey]
+        return v_list
+
+async def describe_afl_config() -> str:
+   if results.config is None:
+       return "unknown"
+   elif results.config.afl_persistent_config and results.config.afl_system_config:
+       return "both"
+   elif results.config.afl_persistent_config:
+       return "persistent"
+   elif results.config.afl_system_config:
+       return "system"
+   else:
+       return "none"
+
+async def save_benchmark_results() -> None:
+    """Append a single row to the benchmark results in JSON Lines format (which is simple to write and diff)."""
+    with open("benchmark-results.jsonl", "a") as jsonfile:
+        json.dump(asdict(results), jsonfile, sort_keys=True)
+        jsonfile.write("\n")
+        print(blue(f" [*] Results have been written to the {jsonfile.name} file."))
+    with open("COMPARISON.md", "r+") as comparisonfile:
+        described_config = await describe_afl_config()
+        aflconfig = described_config.ljust(12)
+        if results.hardware is None:
+            return
+        cpu_model = results.hardware.cpu_model.ljust(51)
+        if cpu_model in comparisonfile.read():
+            print(blue(f" [*] Results have not been written to the COMPARISON.md file; this CPU is already present."))
+            return
+        cpu_mhz = str(round(results.hardware.cpu_fastest_core_mhz)).ljust(5)
+        if not "test-instr-persist-shmem" in results.targets or \
+           not "multicore" in results.targets["test-instr-persist-shmem"] or \
+           not "singlecore" in results.targets["test-instr-persist-shmem"] or \
+           results.targets["test-instr-persist-shmem"]["singlecore"] is None or \
+           results.targets["test-instr-persist-shmem"]["multicore"] is None:
+            return
+        single = str(round(results.targets["test-instr-persist-shmem"]["singlecore"].execs_per_sec)).ljust(10)
+        multi = str(round(results.targets["test-instr-persist-shmem"]["multicore"].execs_per_sec)).ljust(9)
+        cores = str(args.fuzzers).ljust(7)
+        comparisonfile.write(f"|{cpu_model} | {cpu_mhz} | {cores} | {single} | {multi} | {aflconfig} |\n")
+        print(blue(f" [*] Results have been written to the COMPARISON.md file."))
+    with open("COMPARISON.md", "r") as comparisonfile:
+        print(comparisonfile.read())
+
+
+async def main() -> None:
+    try:
+        await clean_up_tempfiles()
+    except FileNotFoundError:
+        pass
+    await check_deps()
+    if args.mhz:
+        cpu_mhz = float(args.mhz)
+    else:
+        cpu_mhz_str = await colon_values("/proc/cpuinfo", "cpu MHz")
+        if len(cpu_mhz_str) == 0:
+            cpu_mhz_str.append("0")
+        cpu_mhz = max([float(c) for c in cpu_mhz_str]) # use the fastest CPU MHz for now
+    if args.cpu:
+        cpu_model = [args.cpu]
+    else:
+        cpu_model = await colon_values("/proc/cpuinfo", "model name") or [""]
+    results.hardware = Hardware(cpu_fastest_core_mhz=cpu_mhz, cpu_model=cpu_model[0], cpu_threads=cpu_count)
+    await prep_env()
+    print(f" [*] Ready, starting benchmark...")
+    for target in chosen_targets:
+        await compile_target(target.source, target.binary)
+        binary = str(target.binary)
+        for mode in chosen_modes:
+            if mode == Mode.multicore:
+                print(blue(f" [*] Using {args.fuzzers} fuzzers for multicore fuzzing "), end="")
+                print(blue("(use --fuzzers to override)." if args.fuzzers == cpu_count else f"(the default is {cpu_count})"))
+            execs_per_sec, execs_total = ([] for _ in range(2))
+            for run_idx in range(0, args.runs):
+                print(gray(f" [*] {mode.name} {binary} run {run_idx+1} of {args.runs}, execs/s: "), end="", flush=True)
+                fuzzers = range(0, args.fuzzers if mode == Mode.multicore else 1)
+                outdir = f"{args.basedir}/out-{mode.name}-{binary}"
+                cmds = []
+                for fuzzer_idx, afl in enumerate(fuzzers):
+                    name = ["-o", outdir, "-M" if fuzzer_idx == 0 else "-S", str(afl)]
+                    cmds.append(["afl-fuzz", "-i", f"{args.basedir}/in"] + name + ["-s", "123", "-V10", "-D", f"./{binary}"])
+                # Prepare the afl-fuzz tasks, and then block while waiting for them to finish.
+                fuzztasks = [run_command(cmds[cpu]) for cpu in fuzzers]
+                await asyncio.gather(*fuzztasks)
+                afl_versions = await colon_values(f"{outdir}/0/fuzzer_stats", "afl_version")
+                if results.config:
+                    results.config.afl_version = afl_versions[0]
+                # Our score is the sum of all execs_per_sec entries in fuzzer_stats files for the run.
+                sectasks = [colon_values(f"{outdir}/{afl}/fuzzer_stats", "execs_per_sec") for afl in fuzzers]
+                all_execs_per_sec = await asyncio.gather(*sectasks)
+                execs = sum([Decimal(count[0]) for count in all_execs_per_sec])
+                print(green(execs))
+                execs_per_sec.append(execs)
+                # Also gather execs_total and total_run_time for this run.
+                exectasks = [colon_values(f"{outdir}/{afl}/fuzzer_stats", "execs_done") for afl in fuzzers]
+                all_execs_total = await asyncio.gather(*exectasks)
+                execs_total.append(sum([Decimal(count[0]) for count in all_execs_total]))
+
+            # (Using float() because Decimal() is not JSON-serializable.)
+            avg_afl_execs_per_sec = round(Decimal(sum(execs_per_sec) / len(execs_per_sec)), 2)
+            afl_execs_total = int(sum([Decimal(execs) for execs in execs_total]))
+            run = Run(execs_per_sec=float(avg_afl_execs_per_sec), execs_total=afl_execs_total, fuzzers_used=len(fuzzers))
+            results.targets[binary][mode.name] = run
+            print(f" [*] Average execs/sec for this test across all runs was: {green(avg_afl_execs_per_sec)}")
+            if (((max(execs_per_sec) - min(execs_per_sec)) / avg_afl_execs_per_sec) * 100) > 15:
+                print(yellow(" [*] The difference between your slowest and fastest runs was >15%, maybe try again?"))
+
+    await clean_up_tempfiles()
+    await save_benchmark_results()
+
+if __name__ == "__main__":
+    asyncio.run(main())
+