// Binary instrumentation tool // // SPDX-FileCopyrightText: 2016 Aleksandar Nikolic // SPDX-FileCopyrightText: 2018-2021 Marc "van Hauser" Heuse // SPDX-License-Identifier: Apache-2.0 // // SPDX-FileCopyrightText: 2024 Nguyễn Gia Phong // SPDX-License-Identifier: AGPL-3.0-or-later // DyninstAPI includes #include "BPatch.h" #include "BPatch_flowGraph.h" #include "BPatch_point.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include // cmd line options char *entryPointName = NULL; int verbose = 0; Dyninst::Address entryPoint; std::set todo; std::set runtimeLibraries; std::set skipAddresses; std::set onlyAddresses; std::set exitAddresses; unsigned int bbMinSize = 10; int bbSkip = 0, performance = 1; bool skipMainModule = false, do_bb = true, dynfix = false; unsigned long int insertions = 0; uintptr_t mapaddr = 0; BPatch_function *save_rdi; BPatch_function *restore_rdi; const char *const functions[] = { "main", "_main", "_initproc", "_init", "start", "_start" }; BPatch_function *findFuncByName(BPatch_image *appImage, char *funcName) { BPatch_Vector funcs; if (NULL == appImage->findFunction(funcName, funcs) || !funcs.size() || NULL == funcs[0]) { std::cerr << "Failed to find function " << funcName << ".\n"; return NULL; } return funcs[0]; } // insert callback to initialization function in the instrumentation library // either at _init or at manualy specified entry point. bool insertCallToInit(BPatch_addressSpace *appBin, BPatch_function *instIncFunc, BPatch_module *module, BPatch_function *funcInit, bool install_hack) { /* Find the instrumentation points */ std::vector points; std::vector* funcEntry = funcInit->findPoint(BPatch_entry); BPatch_image *appImage = appBin->getImage(); BPatchSnippetHandle *handle; if (NULL == funcEntry) { std::cerr << "Failed to find entry for function.\n"; return false; } // THIS BLOCK IS DISABLED - dyninst is too volatile for this to work reliably // disabled because performance can not be greater than 2 if (performance >= 3 && install_hack == true) { std::cerr << "Inserting global variables\n"; // we set up a fake map so we do not have crashes if the the forkserver // is not installed in _init but later for speed reasons. // we could also check in the bb() code if map == 0 but that would // cost precious instructions. BPatch_variableExpr *fakemap = appBin->malloc(65536); BPatch_constExpr fakemap_ptr(fakemap->getBaseAddr()); BPatch_variableExpr *map = appBin->malloc(*(appImage->findType("size_t")), "map"); BPatch_arithExpr initmap(BPatch_assign, *map, fakemap_ptr); appBin->insertSnippet(initmap, *funcEntry, BPatch_firstSnippet); BPatch_constExpr map_ptr(map->getBaseAddr()); BPatch_variableExpr *prev_id = appBin->malloc(*(appImage->findType("size_t")), "prev_id"); BPatch_arithExpr initprevid(BPatch_assign, *prev_id, BPatch_constExpr(0)); appBin->insertSnippet(initprevid, *funcEntry); BPatch_Vector instArgs; instArgs.push_back(&map_ptr); BPatch_funcCallExpr instIncExpr(*instIncFunc, instArgs); handle = appBin->insertSnippet(instIncExpr, *funcEntry, BPatch_callBefore, BPatch_lastSnippet); } else { BPatch_Vector instArgs; BPatch_funcCallExpr instIncExpr(*instIncFunc, instArgs); handle = appBin->insertSnippet(instIncExpr, *funcEntry, BPatch_callBefore, BPatch_lastSnippet); } if (!handle) { std::cerr << "Failed to insert init callback.\n"; return false; } return true; } // inserts a callback for each basic block assigning it an instrumentation // time 16bit random ID just as afl bool insertBBCallback(BPatch_addressSpace *appBin, BPatch_function *curFunc, char *funcName, BPatch_function *instBBIncFunc, int *bbIndex) { BPatch_image *appImage = appBin->getImage(); BPatch_flowGraph *appCFG = curFunc->getCFG(); unsigned short randID; if (!appCFG) { std::cerr << "Failed to find CFG for function " << funcName << std::endl; return false; } BPatch_Set allBlocks; if (!appCFG->getAllBasicBlocks(allBlocks)) { std::cerr << "Failed to find basic blocks for function " << funcName << std::endl; return false; } else if (allBlocks.size() == 0) { std::cerr << "No basic blocks for function " << funcName << std::endl; return false; } for (auto* blk : allBlocks) { if (*bbIndex < bbSkip || blk->size() < bbMinSize) { // skip over first bbSkip bbs or below minimum size (*bbIndex)++; continue; } BPatch_point *bbEntry = blk->findEntryPoint(); if (performance >= 1) { if (blk->isEntryBlock() == false) { bool good = false; BPatch_Vector sources; blk->getSources(sources); for (unsigned int i = 0; i < sources.size() && good == false; i++) { BPatch_Vector targets; sources[i]->getTargets(targets); if (targets.size() > 1) good = true; } if (!good) continue; } } unsigned long address = blk->getStartAddress(); randID = rand() % USHRT_MAX; if (verbose >= 1) std::cerr << "Instrumenting Basic Block 0x" << std::hex << address << " of " << funcName << " with size " << std::dec << blk->size() << " with random id " << randID << "/0x" << std::hex << randID << std::endl; if (NULL == bbEntry) { // warn the user, but continue std::cerr << "Failed to find entry for basic block at 0x" << std::hex << address << std::endl; (*bbIndex)++; continue; } BPatchSnippetHandle *handle; // level 3 is disabled if (performance >= 3) { // these are dummy instructions we overwrite later BPatch_variableExpr *pid = appImage->findVariable("prev_id"); BPatch_arithExpr new_prev_id(BPatch_assign, *pid, BPatch_arithExpr(BPatch_divide, BPatch_constExpr(8), BPatch_constExpr(2))); handle = appBin->insertSnippet(new_prev_id, *bbEntry, BPatch_lastSnippet); BPatch_variableExpr *map = appImage->findVariable("map"); BPatch_variableExpr *pid2 = appImage->findVariable("prev_id"); BPatch_arithExpr map_idx(BPatch_arithExpr(BPatch_plus, *map, BPatch_arithExpr(BPatch_divide, *pid2, BPatch_constExpr(2)))); if (mapaddr == 0) { printf("Map for AFL is installed at: %p\n", (void *)map->getBaseAddr()); mapaddr = (uintptr_t)map->getBaseAddr(); } handle = appBin->insertSnippet(map_idx, *bbEntry, BPatch_firstSnippet); } else { BPatch_Vector instArgs1; BPatch_Vector instArgs; BPatch_constExpr bbId(randID); instArgs.push_back(&bbId); BPatch_funcCallExpr instIncExpr(*instBBIncFunc, instArgs); handle = appBin->insertSnippet(instIncExpr, *bbEntry, BPatch_callBefore); } if (!handle) { // warn the user, but continue to next bb std::cerr << "Failed to insert instrumention in basic block at 0x" << std::hex << address << std::endl; (*bbIndex)++; continue; } else insertions++; (*bbIndex)++; } return true; } int main(int argc, char **argv) { static const char *const USAGE = "Usage: afl-dyninst" " [OPTIONS]... INFILE OUTFILE\n\n" "Instrument binary to be fuzzed by AFL.\n\n" "Options:\n" " -h, --help show this help message and exit\n" " --version show program's version number and exit\n" " -e ADDR, --entry=ADDR " "entry point address to patch (required for stripped binaries)\n" " -E ADDR, --exit=ADDR force exit(0) at this address (multiple use)\n" " -D " "instrument only a simple fork server and also forced exit functions\n" " -r PATH, --library=PATH runtime library to instrument (multiple use)\n" " -I NAME, --include=NAME " "instrument only this function and nothing else (multiple use)\n" " -S NAME, --exclude=NAME " "do not instrument this function (multiple use)\n" " -m N, --min-size=N " "minimum size of a basic bock to instrument (default to 10)\n" " -s N, --skip=N " "number of initial basic blocks to skip in binary\n" " -v, --verbose enable verbose output (up to 3 levels)\n" " -x " "experimental performance mode (multiple use, ~25-50% speed improvement)\n"; int c; int option_index = 0; const static struct option long_options[] = { {"help", no_argument, NULL, 'h'}, {"entry", required_argument, NULL, 'e'}, {"exit", required_argument, NULL, 'E'}, {"library", required_argument, NULL, 'r'}, {"include", required_argument, NULL, 'I'}, {"exclude", required_argument, NULL, 'S'}, {"min-size", required_argument, NULL, 'm'}, {"skip", required_argument, NULL, 's'}, {"verbose", no_argument, NULL, 'v'}, {"version", no_argument, NULL, 0}, {NULL, 0, NULL, 0}, }; while ((c = getopt_long(argc, argv, "he:E:Dr:I:S:m:s:vx", long_options, &option_index)) != -1) { switch (c) { case 0: if (std::strcmp(long_options[option_index].name, "version") == 0) { std::cout << "afl-dyninst " VERSION "\n"; return EXIT_SUCCESS; } __builtin_unreachable(); case 'h': std::cout << USAGE; return EXIT_SUCCESS; case 'e': if ((entryPoint = strtoul(optarg, NULL, 16)) < 0x1000) entryPointName = optarg; break; case 'E': exitAddresses.insert(strtoul(optarg, NULL, 16)); break; case 'D': do_bb = false; break; case 'r': runtimeLibraries.insert(optarg); break; case 'I': onlyAddresses.insert(optarg); break; case 'S': skipAddresses.insert(optarg); break; case 'm': bbMinSize = atoi(optarg); break; case 's': bbSkip = atoi(optarg); break; case 'v': verbose++; break; case 'x': performance++; if (performance > 2) { //#if ( __amd64__ || __x86_64__ ) // std::cerr << "Warning: performance level 3 is currently totally experimental\n"; //#else // std::cerr << "Warning: maximum performance level for non-intelx64 x86 is 2\n"; performance = 2; //#endif } break; default: return EXIT_FAILURE; } } if (optind >= argc) { std::cerr << USAGE; return EXIT_FAILURE; } const char *const originalBinary = argv[optind]; if (++optind >= argc) { std::cerr << USAGE; return EXIT_FAILURE; } const char *const instrumentedBinary = argv[optind]; BPatch bpatch; if (performance >= 2) { bpatch.setSaveFPR(false); bpatch.setTrampRecursive(true); } // TODO: RAII as context manager? const char *dyninstapi_rt_lib = getenv("DYNINSTAPI_RT_LIB"); if (setenv("DYNINSTAPI_RT_LIB", DYNINSTAPI_RT_LIB, true) != 0) { std::cerr << "Failed to set DYNINSTAPI_RT_LIB=" << DYNINSTAPI_RT_LIB << std::endl; // TODO: explain return EXIT_FAILURE; } BPatch_addressSpace *appBin = bpatch.openBinary(originalBinary, false); if (dyninstapi_rt_lib == NULL) unsetenv("DYNINSTAPI_RT_LIB"); else setenv("DYNINSTAPI_RT_LIB", dyninstapi_rt_lib, true); if (appBin == NULL) { std::cerr << "Failed to open binary: " << originalBinary << std::endl; return EXIT_FAILURE; } BPatch_image* appImage = appBin->getImage(); // get and iterate over all modules, instrumenting only the default and manually specified ones std::vector* modules = appImage->getModules(); BPatch_module* defaultModule = NULL; BPatch_module* firstModule = NULL; std::string defaultModuleName; const char* func2patch = NULL; // look for _init if (defaultModuleName.empty()) { for (auto* mod : *modules) { if (firstModule == NULL) firstModule = mod; char moduleName[1024]; mod->getName(moduleName, 1024); for (auto* fn : *mod->getProcedures()) { char funcName[1024]; fn->getName(funcName, 1024); if (verbose >= 3) std::cerr << "Looking at function " << funcName << " in module " << moduleName << std::endl; for (const auto* entry_fn : functions) if (std::strcmp(entry_fn, funcName) == 0) { func2patch = entry_fn; defaultModuleName = moduleName; defaultModule = mod; if (verbose >= 1) std::cerr << "Found init function " << func2patch << " in " << moduleName << std::endl; break; } if (!defaultModuleName.empty()) break; } if (!defaultModuleName.empty()) break; } } // last resort, by name of the binary if (defaultModuleName.empty()) defaultModuleName = std::string(originalBinary).substr(std::string(originalBinary).find_last_of("\\/") + 1); if (defaultModule == NULL) defaultModule = firstModule; if (!appBin->loadLibrary(AFL_DYNINST_LIB)) { std::cerr << "Failed to open instrumentation library: " << AFL_DYNINST_LIB << std::endl; std::cerr << "It needs to be located in the current working directory.\n"; return EXIT_FAILURE; } /* Find code coverage functions in the instrumentation library */ BPatch_function *initAflForkServer; BPatch_function *bbCallback = findFuncByName(appImage, (char *)"bbCallback"); BPatch_function *forceCleanExit = findFuncByName(appImage, (char *)"forceCleanExit"); if (do_bb == true) { if (performance >= 3) initAflForkServer = findFuncByName(appImage, (char *)"initAflForkServerVar"); else initAflForkServer = findFuncByName(appImage, (char *)"initAflForkServer"); } else initAflForkServer = findFuncByName(appImage, (char *)"initOnlyAflForkServer"); if (!initAflForkServer || !bbCallback || !forceCleanExit) { std::cerr << "Instrumentation library lacks callbacks: " << AFL_DYNINST_LIB << std::endl; return EXIT_FAILURE; } int bbIndex = 0; // if an entrypoint was set then find function, else find _init BPatch_function *funcToPatch = NULL; if (entryPoint == 0 && entryPointName == NULL) { if (func2patch == NULL) { std::cerr << "Couldn't locate entry point," " specify entry point manually with --entry=ADDR\n"; return EXIT_FAILURE; } BPatch_Vector funcs; defaultModule->findFunction(func2patch, funcs); if (!funcs.size()) { std::cerr << "Couldn't locate entry point," " specify entry point manually with --entry=ADDR\n"; return EXIT_FAILURE; } // there should really be only one funcToPatch = funcs[0]; } else { if (entryPointName != NULL) { for (auto* mod : *modules) { BPatch_Vector funcs; mod->findFunction(entryPointName, funcs); if (funcs.size() > 0) { char moduleName[1024]; funcToPatch = funcs[0]; defaultModule = mod; defaultModule->getName(moduleName, 1024); defaultModuleName = moduleName; std::cerr << "Found entry point " << entryPointName << " in module " << moduleName << std::endl; break; } } } if (funcToPatch == NULL) { if (verbose > 1) printf("Looking for entrypoint %p\n", (char *)entryPoint); funcToPatch = defaultModule->findFunctionByEntry(entryPoint); if (!funcToPatch && defaultModule != firstModule) { funcToPatch = firstModule->findFunctionByEntry(entryPoint); if (funcToPatch) defaultModule = firstModule; } if (!funcToPatch) { // ok lets go hardcore ... if (verbose > 1) std::cerr << "OK we did not find the entry point so far," " lets dig deeper ...\n"; for (auto* mod : *modules) { funcToPatch = mod->findFunctionByEntry(entryPoint); if (funcToPatch) { defaultModule = mod; break; } } } if (funcToPatch && verbose >= 1) { char moduleName[1024]; defaultModule->getName(moduleName, 1024); defaultModuleName = moduleName; std::cerr << "Found entry point " << std::hex << entryPoint << " in module " << moduleName << std::endl; } } } if (funcToPatch == NULL) { std::cerr << "Couldn't locate function at given entry point.\n" << "Try: readelf -ls " << originalBinary << " | egrep 'Entry|FUNC.*GLOBAL.*DEFAULT' | egrep -v '@|UND'\n"; return EXIT_FAILURE; } if (!insertCallToInit(appBin, initAflForkServer, defaultModule, funcToPatch, true)) { std::cerr << "Could not insert init callback at given entry point.\n"; return EXIT_FAILURE; } for (auto* mod : *modules) { if (mod->isSharedLib()) continue; char moduleName[1024]; mod->getName(moduleName, 1024); if (std::string(moduleName).find(defaultModuleName) != std::string::npos && skipMainModule) continue; if (do_bb == true) { std::cerr << "Instrumenting module: " << moduleName << std::endl; // iterate over all functions in the module for (auto* curFunc : *mod->getProcedures()) { char funcName[1024]; curFunc->getName(funcName, 1024); if (std::strcmp(funcName, "_init") == 0 || std::strcmp(funcName, "__libc_csu_init") == 0 || std::strcmp(funcName, "_start") == 0) { if (verbose) std::cerr << "Skipping instrumenting function " << funcName << std::endl; continue; // here's a bug on hlt // XXX: check what happens if removed } if (!skipAddresses.empty()) { bool do_patch = true; for (const auto& sa : skipAddresses) if (sa == funcName) { do_patch = false; break; } if (!do_patch) { std::cerr << "Skipping instrumenting function " << funcName << std::endl; continue; } } if (!onlyAddresses.empty()) { bool do_patch = false; for (const auto& oa : skipAddresses) if (oa == funcName) { do_patch = true; break; } if (!do_patch) { std::cerr << "Skipping instrumenting function " << funcName << std::endl; continue; } } insertBBCallback(appBin, curFunc, funcName, bbCallback, &bbIndex); } } } if (!exitAddresses.empty()) { std::cerr << "Instrumenting forced exit addresses.\n"; for (const unsigned long& addr : exitAddresses) if (addr > 0 && addr != std::numeric_limits::max()) { funcToPatch = defaultModule->findFunctionByEntry(addr); if (!funcToPatch) std::cerr << "Could not find entry point 0x" << std::hex << addr << " (continuing)\n"; else if (!insertCallToInit(appBin, forceCleanExit, defaultModule, funcToPatch, false)) std::cerr << "Could not insert force clean exit callback at 0x" << std::hex << addr << " (continuing)\n"; } } // Output the instrumented binary BPatch_binaryEdit *appBinr = dynamic_cast(appBin); if (!appBinr->writeFile(instrumentedBinary)) { std::cerr << "Failed to write output file: " << instrumentedBinary << std::endl; return EXIT_FAILURE; } todo.insert(instrumentedBinary); if (!runtimeLibraries.empty()) { std::cerr << "Instrumenting runtime libraries.\n"; for (const auto& lib : runtimeLibraries) { BPatch_addressSpace *libBin = bpatch.openBinary(lib.c_str(), false); if (libBin == NULL) { std::cerr << "Failed to open binary " << lib << std::endl; return EXIT_FAILURE; } BPatch_image *libImg = libBin->getImage(); std::vector* modules = libImg->getModules(); for (auto* mod : *modules) { char moduleName[1024]; mod->getName(moduleName, 1024); std::cerr << "Instrumenting module: " << moduleName << std::endl; // iterate over all functions in the module for (auto* curFunc : *mod->getProcedures()) { char funcName[1024]; curFunc->getName(funcName, 1024); if (std::strcmp(funcName, "_init") == 0 || std::strcmp(funcName, "__libc_csu_init") == 0 || std::strcmp(funcName, "_start") == 0) continue; // TODO: DRY if (!skipAddresses.empty()) { bool do_patch = true; for (const auto& sa : skipAddresses) if (sa == funcName) do_patch = false; if (!do_patch) { std::cerr << "Skipping instrumenting function " << funcName << std::endl; continue; } } insertBBCallback(libBin, curFunc, funcName, bbCallback, &bbIndex); } } appBinr = dynamic_cast(libBin); const char* const ins = (lib + ".ins").c_str(); if (!appBinr->writeFile(ins)) { std::cerr << "Failed to write output file: " << ins << std::endl; return EXIT_FAILURE; } else { std::cerr << "Saved the instrumented library to " << ins << ".\n"; todo.insert(lib + ".ins"); } } } std::cerr << "Did a total of " << insertions << " basic block insertions\n"; if (performance >= 3) { int fd; struct stat st; uint64_t i, found = 0; unsigned char *ptr; unsigned char snip1[] = {0x00, 0x00, 0x71, 0x00, 0x00, 0x00, 0x00, 0x00}; unsigned char snip2[] = {0x08, 0x00, 0x71, 0x00, 0x00, 0x00, 0x00, 0x00}; unsigned char fullsnip[] = {0x53, 0x50, 0x41, 0x52, 0x48, 0xBB, 0x00, 0x00, 0x71, 0x00, 0x00, 0x00, 0x00, 0x00, 0x48, 0x8b, 0x03, 0x48, 0x85, 0xc0, 0x74, 0x28, 0x49, 0xBA, 0x08, 0x00, 0x71, 0x00, 0x00, 0x00, 0x00, 0x00, 0x66, 0x41, 0x8b, 0x1a, 0x66, 0x81, 0xf3, 0x99, 0x99, 0x48, 0x0f, 0xb7, 0xdb, 0x80, 0x04, 0x18, 0x01, 0x66, 0x41, 0x8b, 0x1a, 0x66, 0xd1, 0xfb, 0x66, 0x41, 0x89, 0x1a, 0x41, 0x5a, 0x58, 0x5b, 0x90, 0x90, 0x90, 0x90}; memcpy(snip1, (char *)&mapaddr, sizeof(mapaddr)); memcpy(fullsnip + 6, (char *)&mapaddr, sizeof(mapaddr)); mapaddr += sizeof(mapaddr); memcpy(snip2, (char *)&mapaddr, sizeof(mapaddr)); memcpy(fullsnip + 24, (char *)&mapaddr, sizeof(mapaddr)); for (const auto& fn : todo) { std::cerr << "Reinstrumenting " << fn << " ..." << std::endl; if ((fd = open(fn.c_str(), O_RDWR)) == -1 || fstat(fd, &st) != 0) { std::cerr << "Error: file is gone: " << fn << std::endl; exit(-1); } if ((size_t) st.st_size < sizeof(fullsnip)) { std::cerr << "Error: somethings horrible wrong here with " << fn << " ..." << std::endl; continue; } ptr = (unsigned char *)mmap(NULL, st.st_size, PROT_WRITE | PROT_READ, MAP_SHARED, fd, 0); for (i = 2; i < (size_t)st.st_size - (size_t)sizeof(fullsnip); i++) { if (memcmp(ptr + i, snip1, sizeof(snip1)) == 0 && memcmp(ptr + i + sizeof(snip1) + 4, snip2, sizeof(snip2)) == 0) { found++; fullsnip[0x27] = rand() % 256; fullsnip[0x28] = rand() % 256; memcpy(ptr + i - 2, fullsnip, sizeof(fullsnip)); } } // printf("found %lu entries, snipsize %u\n", found, (unsigned int)sizeof(fullsnip)); munmap((void *)ptr, st.st_size); close(fd); } if (found == insertions) { printf("SUCCESS! Performance level 3 succeeded :)\n"); } else { fprintf(stderr, "Error: can not complete performance level 3, could not find all insertions (%lu of %lu).\n", found, insertions); exit(-1); } } return EXIT_SUCCESS; }