diff options
author | Sonic <50692172+SonicStark@users.noreply.github.com> | 2024-02-01 15:17:48 +0800 |
---|---|---|
committer | GitHub <noreply@github.com> | 2024-02-01 07:17:48 +0000 |
commit | 643df2b538b06561d5a6d6ae441322167d33f834 (patch) | |
tree | 4bb8ba8f283c6a6c7ca8c6ed0e3fe6beee4705c7 | |
parent | 970e0b14ce16c04a25ae90be81a933d943a04410 (diff) | |
download | afl++-643df2b538b06561d5a6d6ae441322167d33f834.tar.gz |
Improve afl-cc (#1975)
* update response file support - full support of rsp file - fix some segv issues * Improve afl-cc - remove dead code about allow/denylist options of sancov - missing `if (!aflcc->have_msan)` - add docs for each function - typo
-rw-r--r-- | src/afl-cc.c | 488 |
1 files changed, 368 insertions, 120 deletions
diff --git a/src/afl-cc.c b/src/afl-cc.c index d11419b0..c300ddfc 100644 --- a/src/afl-cc.c +++ b/src/afl-cc.c @@ -51,7 +51,7 @@ #define MAX_PARAMS_NUM 2048 #endif -/* Global declarations */ +/** Global declarations -----BEGIN----- **/ typedef enum { @@ -187,12 +187,11 @@ typedef struct aflcc_state { void aflcc_state_init(aflcc_state_t *, u8 *argv0); -/* Try to find a specific runtime we need, the path to obj would be - allocated and returned. Otherwise it returns NULL on fail. */ u8 *find_object(aflcc_state_t *, u8 *obj); void find_built_deps(aflcc_state_t *); +/* Insert param into the new argv, raise error if MAX_PARAMS_NUM exceeded. */ static inline void insert_param(aflcc_state_t *aflcc, u8 *param) { if (unlikely(aflcc->cc_par_cnt + 1 >= MAX_PARAMS_NUM)) @@ -202,6 +201,13 @@ static inline void insert_param(aflcc_state_t *aflcc, u8 *param) { } +/* + Insert a param which contains path to the object file. It uses find_object to + get the path based on the name `obj`, and then uses a sprintf like method to + format it with `fmt`. If `fmt` is NULL, the inserted arg is same as the path. + If `msg` provided, it should be an error msg raised if the path can't be + found. `obj` must not be NULL. +*/ static inline void insert_object(aflcc_state_t *aflcc, u8 *obj, u8 *fmt, u8 *msg) { @@ -231,6 +237,7 @@ static inline void insert_object(aflcc_state_t *aflcc, u8 *obj, u8 *fmt, } +/* Insert params into the new argv, make clang load the pass. */ static inline void load_llvm_pass(aflcc_state_t *aflcc, u8 *pass) { #if LLVM_MAJOR >= 11 /* use new pass manager */ @@ -291,8 +298,12 @@ void add_lto_linker(aflcc_state_t *); void add_lto_passes(aflcc_state_t *); void add_runtime(aflcc_state_t *); -/* Working state */ +/** Global declarations -----END----- **/ +/* + Init global state struct. We also extract the callname, + check debug options and if in C++ mode here. +*/ void aflcc_state_init(aflcc_state_t *aflcc, u8 *argv0) { // Default NULL/0 is a good start @@ -352,7 +363,7 @@ void aflcc_state_init(aflcc_state_t *aflcc, u8 *argv0) { } /* - in find_object() we look here: + Try to find a specific runtime we need, in here: 1. firstly we check the $AFL_PATH environment variable location if set 2. next we check argv[0] if it has path information and use it @@ -366,7 +377,6 @@ void aflcc_state_init(aflcc_state_t *aflcc, u8 *argv0) { if all these attempts fail - we return NULL and the caller has to decide what to do. Otherwise the path to obj would be allocated and returned. */ - u8 *find_object(aflcc_state_t *aflcc, u8 *obj) { u8 *argv0 = aflcc->argv0; @@ -499,6 +509,10 @@ u8 *find_object(aflcc_state_t *aflcc, u8 *obj) { } +/* + Deduce some info about compiler toolchains in current system, + from the building results of AFL++ +*/ void find_built_deps(aflcc_state_t *aflcc) { char *ptr = NULL; @@ -571,8 +585,9 @@ void find_built_deps(aflcc_state_t *aflcc) { } -/* compiler_mode & instrument_mode selecting */ +/** compiler_mode & instrument_mode selecting -----BEGIN----- **/ +/* Select compiler_mode by callname, such as "afl-clang-fast", etc. */ void compiler_mode_by_callname(aflcc_state_t *aflcc) { if (strncmp(aflcc->callname, "afl-clang-fast", 14) == 0) { @@ -626,6 +641,10 @@ void compiler_mode_by_callname(aflcc_state_t *aflcc) { } +/* + Select compiler_mode by env AFL_CC_COMPILER. And passthrough mode can be + regarded as a special compiler_mode, so we check for it here, too. +*/ void compiler_mode_by_environ(aflcc_state_t *aflcc) { if (getenv("AFL_PASSTHROUGH") || getenv("AFL_NOOPT")) { @@ -682,7 +701,13 @@ void compiler_mode_by_environ(aflcc_state_t *aflcc) { } -// If it can be inferred, instrument_mode would also be set +/* + Select compiler_mode by command line options --afl-... + If it can be inferred, instrument_mode would also be set. + This can supersedes previous result based on callname + or AFL_CC_COMPILER. And "--afl_noopt"/"--afl-noopt" will + be overwritten by "-g". +*/ void compiler_mode_by_cmdline(aflcc_state_t *aflcc, int argc, char **argv) { char *ptr = NULL; @@ -775,6 +800,12 @@ void compiler_mode_by_cmdline(aflcc_state_t *aflcc, int argc, char **argv) { } +/* + Select instrument_mode by those envs in old style: + - USE_TRACE_PC, AFL_USE_TRACE_PC, AFL_LLVM_USE_TRACE_PC, AFL_TRACE_PC + - AFL_LLVM_CALLER, AFL_LLVM_CTX, AFL_LLVM_CTX_K + - AFL_LLVM_NGRAM_SIZE +*/ static void instrument_mode_old_environ(aflcc_state_t *aflcc) { if (getenv("AFL_LLVM_INSTRIM") || getenv("INSTRIM") || @@ -834,7 +865,11 @@ static void instrument_mode_old_environ(aflcc_state_t *aflcc) { } -// compiler_mode would also be set if depended by the instrument_mode +/* + Select instrument_mode by env 'AFL_LLVM_INSTRUMENT'. + Previous compiler_mode will be superseded, if required by some + values of instrument_mode. +*/ static void instrument_mode_new_environ(aflcc_state_t *aflcc) { if (!getenv("AFL_LLVM_INSTRUMENT")) { return; } @@ -1058,6 +1093,11 @@ static void instrument_mode_new_environ(aflcc_state_t *aflcc) { } +/* + Select instrument_mode by envs, the top wrapper. We check + have_instr_env firstly, then call instrument_mode_old_environ + and instrument_mode_new_environ sequentially. +*/ void instrument_mode_by_environ(aflcc_state_t *aflcc) { if (getenv("AFL_LLVM_INSTRUMENT_FILE") || getenv("AFL_LLVM_WHITELIST") || @@ -1081,6 +1121,10 @@ void instrument_mode_by_environ(aflcc_state_t *aflcc) { } +/* + Workaround to ensure CALLER, CTX, K-CTX and NGRAM + instrumentation were used correctly. +*/ static void instrument_opt_mode_exclude(aflcc_state_t *aflcc) { if ((aflcc->instrument_opt_mode & INSTRUMENT_OPT_CTX) && @@ -1116,6 +1160,11 @@ static void instrument_opt_mode_exclude(aflcc_state_t *aflcc) { } +/* + Last step of compiler_mode & instrument_mode selecting. + We have a few of workarounds here, to check any corner cases, + prepare for a series of fallbacks, and raise warnings or errors. +*/ void mode_final_checkout(aflcc_state_t *aflcc, int argc, char **argv) { if (aflcc->instrument_opt_mode && @@ -1320,6 +1369,10 @@ void mode_final_checkout(aflcc_state_t *aflcc, int argc, char **argv) { } +/* + Print welcome message on screen, giving brief notes about + compiler_mode and instrument_mode. +*/ void mode_notification(aflcc_state_t *aflcc) { char *ptr2 = alloc_printf(" + NGRAM-%u", aflcc->ngram_size); @@ -1358,6 +1411,17 @@ void mode_notification(aflcc_state_t *aflcc) { } +/* + Set argv[0] required by execvp. It can be + - specified by env AFL_CXX + - g++ or clang++ + - CLANGPP_BIN or LLVM_BINDIR/clang++ + when in C++ mode, or + - specified by env AFL_CC + - gcc or clang + - CLANG_BIN or LLVM_BINDIR/clang + otherwise. +*/ void add_real_argv0(aflcc_state_t *aflcc) { static u8 llvm_fullpath[PATH_MAX]; @@ -1424,7 +1488,9 @@ void add_real_argv0(aflcc_state_t *aflcc) { } -/* Macro defs for the preprocessor */ +/** compiler_mode & instrument_mode selecting -----END----- **/ + +/** Macro defs for the preprocessor -----BEGIN----- **/ void add_defs_common(aflcc_state_t *aflcc) { @@ -1433,8 +1499,11 @@ void add_defs_common(aflcc_state_t *aflcc) { } -/* See instrumentation/README.instrument_list.md# - 2-selective-instrumentation-with-_afl_coverage-directives */ +/* + __afl_coverage macro defs. See + instrumentation/README.instrument_list.md# + 2-selective-instrumentation-with-_afl_coverage-directives +*/ void add_defs_selective_instr(aflcc_state_t *aflcc) { if (aflcc->plusplus_mode) { @@ -1468,9 +1537,11 @@ void add_defs_selective_instr(aflcc_state_t *aflcc) { } -/* As documented in instrumentation/README.persistent_mode.md, deferred - forkserver initialization and persistent mode are not available in afl-gcc - and afl-clang. */ +/* + Macro defs for persistent mode. As documented in + instrumentation/README.persistent_mode.md, deferred forkserver initialization + and persistent mode are not available in afl-gcc and afl-clang. +*/ void add_defs_persistent_mode(aflcc_state_t *aflcc) { if (aflcc->compiler_mode == GCC || aflcc->compiler_mode == CLANG) return; @@ -1549,7 +1620,11 @@ void add_defs_persistent_mode(aflcc_state_t *aflcc) { } -/* Control _FORTIFY_SOURCE */ +/* + Control macro def of _FORTIFY_SOURCE. It will do nothing + if we detect this routine has been called previously, or + the macro already here in these existing args. +*/ void add_defs_fortify(aflcc_state_t *aflcc, u8 action) { if (aflcc->have_fortify) { return; } @@ -1574,6 +1649,7 @@ void add_defs_fortify(aflcc_state_t *aflcc, u8 action) { } +/* Macro defs of __AFL_LEAK_CHECK, __AFL_LSAN_ON and __AFL_LSAN_OFF */ void add_defs_lsan_ctrl(aflcc_state_t *aflcc) { insert_param(aflcc, "-includesanitizer/lsan_interface.h"); @@ -1586,7 +1662,9 @@ void add_defs_lsan_ctrl(aflcc_state_t *aflcc) { } -/* About fsanitize (including PCGUARD features) */ +/** Macro defs for the preprocessor -----END----- **/ + +/** About -fsanitize -----BEGIN----- **/ /* For input "-fsanitize=...", it: @@ -1665,6 +1743,16 @@ static u8 fsanitize_fuzzer_comma(char *string) { } +/* + Parse and process possible -fsanitize related args, return PARAM_MISS + if nothing matched. We have 3 main tasks here for these args: + - Check which one of those sanitizers present here. + - Check if libfuzzer present. We need to block the request of enable + libfuzzer, and link harness with our libAFLDriver.a later. + - Check if SanCov allow/denylist options present. We need to try switching + to LLVMNATIVE instead of using our optimized PCGUARD anyway. If we + can't make it finally for various reasons, just drop these options. +*/ param_st parse_fsanitize(aflcc_state_t *aflcc, u8 *cur_argv, u8 scan) { param_st final_ = PARAM_MISS; @@ -1706,22 +1794,8 @@ param_st parse_fsanitize(aflcc_state_t *aflcc, u8 *cur_argv, u8 scan) { #undef HAVE_SANITIZER_SCAN_KEEP - if (!strncmp(cur_argv, "-fsanitize-coverage-", 20) && - strstr(cur_argv, "list=")) { - - if (scan) { - - aflcc->have_instr_list = 1; - final_ = PARAM_SCAN; - - } else { - - final_ = PARAM_KEEP; - - } - - } - + // We can't use a "else if" there, because some of the following + // matching rules overlap with those in the if-statement above. if (!strcmp(cur_argv, "-fsanitize=fuzzer")) { if (scan) { @@ -1761,25 +1835,27 @@ param_st parse_fsanitize(aflcc_state_t *aflcc, u8 *cur_argv, u8 scan) { } - } else if ((!strncmp(cur_argv, "-fsanitize=fuzzer-", + } else if (!strncmp(cur_argv, "-fsanitize-coverage-", 20) && - strlen("-fsanitize=fuzzer-")) || - !strncmp(cur_argv, "-fsanitize-coverage", - strlen("-fsanitize-coverage"))) && - (strncmp(cur_argv, "sanitize-coverage-allow", - strlen("sanitize-coverage-allow")) && - strncmp(cur_argv, "sanitize-coverage-deny", - strlen("sanitize-coverage-deny")) && - aflcc->instrument_mode != INSTRUMENT_LLVMNATIVE)) { + strstr(cur_argv, "list=")) { if (scan) { + aflcc->have_instr_list = 1; final_ = PARAM_SCAN; } else { - if (!be_quiet) { WARNF("Found '%s' - stripping!", cur_argv); } - final_ = PARAM_DROP; + if (aflcc->instrument_mode != INSTRUMENT_LLVMNATIVE) { + + if (!be_quiet) { WARNF("Found '%s' - stripping!", cur_argv); } + final_ = PARAM_DROP; + + } else { + + final_ = PARAM_KEEP; + + } } @@ -1791,6 +1867,16 @@ param_st parse_fsanitize(aflcc_state_t *aflcc, u8 *cur_argv, u8 scan) { } +/* + Add params for sanitizers. Here we need to consider: + - Use static runtime for asan, as much as possible. + - ASAN, MSAN, AFL_HARDEN are mutually exclusive. + - Add options if not found there, on request of AFL_USE_ASAN, AFL_USE_MSAN, + etc. + - Update have_* so that functions called after this can have correct context. + However this also means any functions called before should NOT depend on + these have_*, otherwise they may not work as expected. +*/ void add_sanitizers(aflcc_state_t *aflcc, char **envp) { if (getenv("AFL_USE_ASAN") || aflcc->have_asan) { @@ -1820,7 +1906,7 @@ void add_sanitizers(aflcc_state_t *aflcc, char **envp) { FATAL("MSAN and AFL_HARDEN are mutually exclusive"); add_defs_fortify(aflcc, 0); - insert_param(aflcc, "-fsanitize=memory"); + if (!aflcc->have_msan) { insert_param(aflcc, "-fsanitize=memory"); } aflcc->have_msan = 1; } @@ -1907,6 +1993,7 @@ void add_sanitizers(aflcc_state_t *aflcc, char **envp) { } +/* Add params to enable LLVM SanCov, the native PCGUARD */ void add_native_pcguard(aflcc_state_t *aflcc) { /* If there is a rust ASan runtime on the command line, it is likely we're @@ -1943,6 +2030,11 @@ void add_native_pcguard(aflcc_state_t *aflcc) { } +/* + Add params to launch our optimized PCGUARD on request. + It will fallback to use the native PCGUARD in some cases. If so, plz + bear in mind that instrument_mode will be set to INSTRUMENT_LLVMNATIVE. +*/ void add_optimized_pcguard(aflcc_state_t *aflcc) { #if LLVM_MAJOR >= 13 @@ -1959,7 +2051,7 @@ void add_optimized_pcguard(aflcc_state_t *aflcc) { SAYF( "Using unoptimized trace-pc-guard, due usage of " "-fsanitize-coverage-allow/denylist, you can use " - "AFL_LLVM_ALLOWLIST/AFL_LLMV_DENYLIST instead.\n"); + "AFL_LLVM_ALLOWLIST/AFL_LLVM_DENYLIST instead.\n"); insert_param(aflcc, "-fsanitize-coverage=trace-pc-guard"); aflcc->instrument_mode = INSTRUMENT_LLVMNATIVE; @@ -1994,8 +2086,14 @@ void add_optimized_pcguard(aflcc_state_t *aflcc) { } -/* Linking behaviors */ +/** About -fsanitize -----END----- **/ +/** Linking behaviors -----BEGIN----- **/ + +/* + Parse and process possible linking stage related args, + return PARAM_MISS if nothing matched. +*/ param_st parse_linking_params(aflcc_state_t *aflcc, u8 *cur_argv, u8 scan, u8 *skip_next, char **argv) { @@ -2158,6 +2256,7 @@ param_st parse_linking_params(aflcc_state_t *aflcc, u8 *cur_argv, u8 scan, } +/* Add params to specify the linker used in LTO */ void add_lto_linker(aflcc_state_t *aflcc) { unsetenv("AFL_LD"); @@ -2197,6 +2296,7 @@ void add_lto_linker(aflcc_state_t *aflcc) { } +/* Add params to launch SanitizerCoverageLTO.so when linking */ void add_lto_passes(aflcc_state_t *aflcc) { #if defined(AFL_CLANG_LDPATH) && LLVM_MAJOR >= 15 @@ -2215,6 +2315,7 @@ void add_lto_passes(aflcc_state_t *aflcc) { } +/* Add params to link with libAFLDriver.a on request */ static void add_aflpplib(aflcc_state_t *aflcc) { if (!aflcc->need_aflpplib) return; @@ -2250,6 +2351,7 @@ static void add_aflpplib(aflcc_state_t *aflcc) { } +/* Add params to link with runtimes depended by our instrumentation */ void add_runtime(aflcc_state_t *aflcc) { if (aflcc->preprocessor_only || aflcc->have_c || !aflcc->non_dash) { @@ -2345,8 +2447,14 @@ void add_runtime(aflcc_state_t *aflcc) { } -/* Misc */ +/** Linking behaviors -----END----- **/ + +/** Miscellaneous routines -----BEGIN----- **/ +/* + Add params to make compiler driver use our afl-as + as assembler, required by the vanilla instrumentation. +*/ void add_assembler(aflcc_state_t *aflcc) { u8 *afl_as = find_object(aflcc, "as"); @@ -2363,6 +2471,7 @@ void add_assembler(aflcc_state_t *aflcc) { } +/* Add params to launch the gcc plugins for instrumentation. */ void add_gcc_plugin(aflcc_state_t *aflcc) { if (aflcc->cmplog_mode) { @@ -2379,6 +2488,7 @@ void add_gcc_plugin(aflcc_state_t *aflcc) { } +/* Add some miscellaneous params required by our instrumentation. */ void add_misc_params(aflcc_state_t *aflcc) { if (getenv("AFL_NO_BUILTIN") || getenv("AFL_LLVM_LAF_TRANSFORM_COMPARES") || @@ -2425,6 +2535,10 @@ void add_misc_params(aflcc_state_t *aflcc) { } +/* + Parse and process a variety of args under our matching rules, + return PARAM_MISS if nothing matched. +*/ param_st parse_misc_params(aflcc_state_t *aflcc, u8 *cur_argv, u8 scan) { param_st final_ = PARAM_MISS; @@ -2575,6 +2689,9 @@ param_st parse_misc_params(aflcc_state_t *aflcc, u8 *cur_argv, u8 scan) { } +/** Miscellaneous routines -----END----- **/ + +/* Print help message on request */ static void maybe_usage(aflcc_state_t *aflcc, int argc, char **argv) { if (argc < 2 || strncmp(argv[1], "-h", 2) == 0) { @@ -2870,6 +2987,24 @@ static void maybe_usage(aflcc_state_t *aflcc, int argc, char **argv) { } +/* + Process params passed to afl-cc. + + We have two working modes, *scan* and *non-scan*. In scan mode, + the main task is to set some variables in aflcc according to current argv[i], + while in non-scan mode, is to choose keep or drop current argv[i]. + + We have several matching routines being called sequentially in the while-loop, + and each of them try to parse and match current argv[i] according to their own + rules. If one miss match, the next will then take over. In non-scan mode, each + argv[i] mis-matched by all the routines will be kept. + + These routines are: + 1. parse_misc_params + 2. parse_fsanitize + 3. parse_linking_params + 4. `if (*cur == '@') {...}`, i.e., parse response files +*/ static void process_params(aflcc_state_t *aflcc, u8 scan, u32 argc, char **argv) { @@ -2896,134 +3031,249 @@ static void process_params(aflcc_state_t *aflcc, u8 scan, u32 argc, if (PARAM_MISS != parse_linking_params(aflcc, cur, scan, &skip_next, argv)) continue; + /* Response file support -----BEGIN----- + We have two choices - move everything to the command line or + rewrite the response files to temporary files and delete them + afterwards. We choose the first for easiness. + For clang, llvm::cl::ExpandResponseFiles does this, however it + only has C++ interface. And for gcc there is expandargv in libiberty, + written in C, but we can't simply copy-paste since its LGPL licensed. + So here we use an equivalent FSM as alternative, and try to be compatible + with the two above. See: + - https://gcc.gnu.org/onlinedocs/gcc/Overall-Options.html + - driver::expand_at_files in gcc.git/gcc/gcc.c + - expandargv in gcc.git/libiberty/argv.c + - llvm-project.git/clang/tools/driver/driver.cpp + - ExpandResponseFiles in + llvm-project.git/llvm/lib/Support/CommandLine.cpp + */ if (*cur == '@') { - // response file support. - // we have two choices - move everything to the command line or - // rewrite the response files to temporary files and delete them - // afterwards. We choose the first for easiness. - // We do *not* support quotes in the rsp files to cope with spaces in - // filenames etc! If you need that then send a patch! u8 *filename = cur + 1; if (aflcc->debug) { DEBUGF("response file=%s\n", filename); } - FILE *f = fopen(filename, "r"); - struct stat st; // Check not found or empty? let the compiler complain if so. - if (!f || fstat(fileno(f), &st) < 0 || st.st_size < 1) { + FILE *f = fopen(filename, "r"); + if (!f) { if (!scan) insert_param(aflcc, cur); continue; } - u8 *tmpbuf = malloc(st.st_size + 2), *ptr; - char **args = malloc(sizeof(char *) * (st.st_size >> 1)); - int count = 1, cont = 0, cont_act = 0; + struct stat st; + if (fstat(fileno(f), &st) || !S_ISREG(st.st_mode) || st.st_size < 1) { - while (fgets(tmpbuf, st.st_size + 1, f)) { + fclose(f); + if (!scan) insert_param(aflcc, cur); + continue; - ptr = tmpbuf; - // fprintf(stderr, "1: %s\n", ptr); - // no leading whitespace - while (isspace(*ptr)) { + } - ++ptr; - cont_act = 0; + // Limit the number of response files, the max value + // just keep consistent with expandargv. Only do this in + // scan mode, and not touch rsp_count anymore in the next. + static u32 rsp_count = 2000; + if (scan) { - } + if (rsp_count == 0) FATAL("Too many response files provided!"); - // no comments, no empty lines - if (*ptr == '#' || *ptr == '\n' || !*ptr) { continue; } - // remove LF - if (ptr[strlen(ptr) - 1] == '\n') { ptr[strlen(ptr) - 1] = 0; } - // remove CR - if (*ptr && ptr[strlen(ptr) - 1] == '\r') { ptr[strlen(ptr) - 1] = 0; } - // handle \ at end of line - if (*ptr && ptr[strlen(ptr) - 1] == '\\') { + --rsp_count; - cont = 1; - ptr[strlen(ptr) - 1] = 0; + } - } + // argc, argv acquired from this rsp file. Note that + // process_params ignores argv[0], we need to put a const "" here. + u32 argc_read = 1; + char **argv_read = ck_alloc(sizeof(char *)); + argv_read[0] = ""; + + char *arg_buf = NULL; + u64 arg_len = 0; + + enum fsm_state { + + fsm_whitespace, // whitespace seen so far + fsm_double_quote, // have unpaired double quote + fsm_single_quote, // have unpaired single quote + fsm_backslash, // a backslash is seen with no unpaired quote + fsm_normal // a normal char is seen + + }; + + // Workaround to append c to arg buffer, and append the buffer to argv +#define ARG_ALLOC(c) \ + do { \ + \ + ++arg_len; \ + arg_buf = ck_realloc(arg_buf, (arg_len + 1) * sizeof(char)); \ + arg_buf[arg_len] = '\0'; \ + arg_buf[arg_len - 1] = (char)c; \ + \ + } while (0) + +#define ARG_STORE() \ + do { \ + \ + ++argc_read; \ + argv_read = ck_realloc(argv_read, argc_read * sizeof(char *)); \ + argv_read[argc_read - 1] = arg_buf; \ + arg_buf = NULL; \ + arg_len = 0; \ + \ + } while (0) - // fprintf(stderr, "2: %s\n", ptr); + int cur_chr = (int)' '; // init as whitespace, as a good start :) + enum fsm_state state_ = fsm_whitespace; - // remove whitespace at end - while (*ptr && isspace(ptr[strlen(ptr) - 1])) { + while (cur_chr != EOF) { - ptr[strlen(ptr) - 1] = 0; - cont = 0; + switch (state_) { - } + case fsm_whitespace: + + if (arg_buf) { + + ARG_STORE(); + break; + + } - // fprintf(stderr, "3: %s\n", ptr); - if (*ptr) { + if (isspace(cur_chr)) { - do { + cur_chr = fgetc(f); - u8 *value = ptr; - while (*ptr && !isspace(*ptr)) { + } else if (cur_chr == (int)'\'') { - ++ptr; + state_ = fsm_single_quote; + cur_chr = fgetc(f); + + } else if (cur_chr == (int)'"') { + + state_ = fsm_double_quote; + cur_chr = fgetc(f); + + } else if (cur_chr == (int)'\\') { + + state_ = fsm_backslash; + cur_chr = fgetc(f); + + } else { + + state_ = fsm_normal; } - while (*ptr && isspace(*ptr)) { + break; + + case fsm_normal: - *ptr++ = 0; + if (isspace(cur_chr)) { + + state_ = fsm_whitespace; + + } else if (cur_chr == (int)'\'') { + + state_ = fsm_single_quote; + cur_chr = fgetc(f); + + } else if (cur_chr == (int)'\"') { + + state_ = fsm_double_quote; + cur_chr = fgetc(f); + + } else if (cur_chr == (int)'\\') { + + state_ = fsm_backslash; + cur_chr = fgetc(f); + + } else { + + ARG_ALLOC(cur_chr); + cur_chr = fgetc(f); } - if (cont_act) { + break; + + case fsm_backslash: + + ARG_ALLOC(cur_chr); + cur_chr = fgetc(f); + state_ = fsm_normal; + + break; + + case fsm_single_quote: + + if (cur_chr == (int)'\\') { + + cur_chr = fgetc(f); + if (cur_chr == EOF) break; + ARG_ALLOC(cur_chr); + + } else if (cur_chr == (int)'\'') { - u32 len = strlen(args[count - 1]) + strlen(value) + 1; - u8 *tmp = malloc(len); - snprintf(tmp, len, "%s%s", args[count - 1], value); - free(args[count - 1]); - args[count - 1] = tmp; - cont_act = 0; + state_ = fsm_normal; } else { - args[count++] = strdup(value); + ARG_ALLOC(cur_chr); } - } while (*ptr); + cur_chr = fgetc(f); + break; - } + case fsm_double_quote: + + if (cur_chr == (int)'\\') { + + cur_chr = fgetc(f); + if (cur_chr == EOF) break; + ARG_ALLOC(cur_chr); + + } else if (cur_chr == (int)'"') { + + state_ = fsm_normal; + + } else { + + ARG_ALLOC(cur_chr); - if (cont) { + } + + cur_chr = fgetc(f); + break; - cont_act = 1; - cont = 0; + default: + break; } } - if (count) { process_params(aflcc, scan, count, args); } + if (arg_buf) { ARG_STORE(); } // save the pending arg after EOF - // we cannot free args[] unless we don't need - // to keep any reference in cc_params - if (scan) { +#undef ARG_ALLOC +#undef ARG_STORE - if (count) do { + if (argc_read > 1) { process_params(aflcc, scan, argc_read, argv_read); } - free(args[--count]); + // We cannot free argv_read[] unless we don't need to keep any + // reference in cc_params. Never free argv[0], the const "". + if (scan) { - } while (count); + while (argc_read > 1) + ck_free(argv_read[--argc_read]); - free(args); + ck_free(argv_read); } - free(tmpbuf); - continue; - } + } /* Response file support -----END----- */ if (!scan) insert_param(aflcc, cur); @@ -3031,8 +3281,7 @@ static void process_params(aflcc_state_t *aflcc, u8 scan, u32 argc, } -/* Copy argv to cc_params, making the necessary edits. */ - +/* Process each of the existing argv, also add a few new args. */ static void edit_params(aflcc_state_t *aflcc, u32 argc, char **argv, char **envp) { @@ -3173,7 +3422,6 @@ static void edit_params(aflcc_state_t *aflcc, u32 argc, char **argv, } /* Main entry point */ - int main(int argc, char **argv, char **envp) { aflcc_state_t *aflcc = malloc(sizeof(aflcc_state_t)); |