diff options
| -rw-r--r-- | .gitignore | 1 | ||||
| -rw-r--r-- | Makefile | 14 | ||||
| -rw-r--r-- | fix-lib.m4 | 54 | ||||
| -rw-r--r-- | fix.m4 | 73 | ||||
| -rw-r--r-- | helpers.cc | 89 | ||||
| -rw-r--r-- | helpers.hh | 41 | ||||
| -rw-r--r-- | jump.c | 50 | ||||
| -rw-r--r-- | scout.cc | 263 | ||||
| -rw-r--r-- | trace-call.cc | 75 |
9 files changed, 363 insertions, 297 deletions
diff --git a/.gitignore b/.gitignore deleted file mode 100644 index 9910621..0000000 --- a/.gitignore +++ /dev/null @@ -1 +0,0 @@ -taosc diff --git a/Makefile b/Makefile index df00cc6..c6de20c 100644 --- a/Makefile +++ b/Makefile @@ -8,8 +8,8 @@ PREFIX ?= /usr/local BIN_PREFIX ::= $(DESTDIR)$(PREFIX)/bin/taosc- DATA_DIR ::= $(DESTDIR)$(PREFIX)/share/taosc -BIN ::= fix fix-lib scout synth -DATA ::= collect collection patch +BIN ::= fix scout synth trace-call +DATA ::= collect collection jump patch all: $(BIN) $(DATA) @@ -19,16 +19,16 @@ clean: fix: fix.m4 m4 -D DATA_DIR=$(DATA_DIR) $< > $@ -fix-lib: fix-lib.m4 - m4 -D DATA_DIR=$(DATA_DIR) $< > $@ +scout: scout.o helpers.o + $(CXX) $(LDFLAGS) $^ $(LOADLIBES) $(LDLIBS) -o $@ synth: synth.py link $< $@ -collect: collect.c - e9compile $< +trace-call: trace-call.o helpers.o + $(CXX) $(LDFLAGS) $^ $(LOADLIBES) $(LDLIBS) -o $@ -patch: patch.c +%: %.c e9compile $< install: $(BIN:%=$(BIN_PREFIX)%) $(DATA:%=$(DATA_DIR)/%) diff --git a/fix-lib.m4 b/fix-lib.m4 deleted file mode 100644 index 3d9216a..0000000 --- a/fix-lib.m4 +++ /dev/null @@ -1,54 +0,0 @@ -#!/bin/sh -# Patcher for dynamically linked library -# Copyright (C) 2025 Nguyễn Gia Phong -# -# This file is part of taosc. -# -# Taosc is free software: you can redistribute it and/or modify -# it under the terms of the GNU Affero General Public License as published by -# the Free Software Foundation, either version 3 of the License, or -# (at your option) any later version. -# -# Taosc is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU Affero General Public License for more details. -# -# You should have received a copy of the GNU Affero General Public License -# along with taosc. If not, see <https://www.gnu.org/licenses/>. - -set -ex -if test $# -lt 4 -then - echo Usage: taosc-fix-lib executable library address workdir option... - exit 1 -fi -binary="$(realpath $1)" -library="$(realpath $2)" -lib="$(basename $library)" -address="$3" -wd="$(realpath $4)" -bin="$wd/$(basename $binary)" -opts="${@:5}" - -afl-dyninst --library="$library" -x "$binary" "$bin.fuzzee" -pushd DATA_DIR > /dev/null -trap 'popd > /dev/null' EXIT -mkdir -p "$wd/collect" -e9tool -M false -P 'log(state)@collect' -o "$bin.collect" "$binary" -e9tool -M addr=$address -P 'log(state)@collect'\ - -o "$wd/collect/$lib" --shared "$library" -mkdir -p "$wd/patched" -e9tool -M addr=$address -P 'if dest(state)@patch goto'\ - -o "$wd/patched/$lib" --shared "$library" - -# TODO: augment number of executions -afl-dyninst-env afl-fuzz -i "$wd/fuzz/exploits" -o "$wd/fuzz/crashes"\ - -CE 10000 -- "$bin.fuzzee" $opts @@ -# TODO: use patchelf -find "$wd/fuzz/crashes/default/crashes" -name id:* | parallel\ - LD_LIBRARY_PATH="$wd/collect" TAOSC_OUTPUT="$wd/vars/neg/"'$(basename {})'\ - "$bin.collect" $opts {} || true -taosc-synth "$wd/vars" > "$wd/predicates" -taosc-scout "$library" "$address" > "$wd/destinations" -# vim: filetype=sh.m4 diff --git a/fix.m4 b/fix.m4 index b6fd666..d3ee7fa 100644 --- a/fix.m4 +++ b/fix.m4 @@ -17,17 +17,76 @@ # You should have received a copy of the GNU Affero General Public License # along with taosc. If not, see <https://www.gnu.org/licenses/>. -set -ex +set -ex -o pipefail +save_exit_code() { + set +e + # TODO: make timeout configurable + timeout -k 1 5 $@ 2>&1 1>/dev/null + exit_code=$? + set -e +} + if test $# -lt 3 then - echo Usage: taosc-fix binary address workdir option... + echo Usage: taosc-fix workdir binary option... exit 1 fi -binary="$(realpath $1)" -address="$2" -wd="$(realpath $3)" -bin="$wd/$(basename $binary)" -opts="${@:4}" +wd="$(realpath $1)" +test -d "$wd" +bin="$wd/$(basename $2)" +binary="$(realpath $2)" +test -x "$binary" +opts="${@:3}" # TODO: interpolation + +test -d "$wd/exploits" +test ! -z "$(ls -A "$wd/exploits")" +mkdir -p "$wd/exit-codes" +for exploit in "$wd/exploits"/* +do + save_exit_code "$binary" "$opts" "$exploit" + echo $exit_code > "$wd/exit-codes/$(basename "$exploit")" +done + +> "$wd/stack-trace" +for exploit in "$wd/exploits"/* +do + gdb --batch --ex run --ex backtrace --args \ + "$binary" "$opts" "$exploit" 2>/dev/null | + grep '^#[0-9]\+ \+0x[0-9a-f]\+' | + awk '!$7 || $7 == bin {print $1, $2}' "bin=$binary" >> "$wd/stack-trace" +done + +grep '^#0 0x[0-9a-f]\+$' "$wd/stack-trace" | + sed 's/^#0 0x0*//' > "$wd/return-blocks" +# Stack trace contains return addresses, not call addresses: +# https://devblogs.microsoft.com/oldnewthing?p=96116 +grep -v '^#0 0x[0-9a-f]\+$' "$wd/stack-trace" | + sort | + sed 's/^#[0-9]\+ 0x0*//' | + taosc-trace-call "$binary" >> "$wd/return-blocks" + +> "$wd/jumps" +pushd DATA_DIR > /dev/null +taosc-scout "$binary" < "$wd/return-blocks" | + while read loc destinations + do + e9tool -100 -M addr=0x$loc -P 'if dest()@jump goto' \ + -o "$bin.$loc" "$binary" + for dest in $destinations + do + for exploit in "$wd/exploits"/* + do + save_exit_code env TAOSC_DEST=0x$dest "$bin.$loc" "$opts" "$exploit" + if test $exit_code -ge 124 && test $exit_code -le 127 || + test $exit_code -eq $(< "$wd/exit-codes/$(basename "$exploit")") + then + continue 2 # next destination + fi + done + echo $loc $dest >> "$wd/jumps" + done + done 2>&1 1>/dev/null +exit afl-dyninst -x "$binary" "$bin.fuzzee" pushd DATA_DIR > /dev/null diff --git a/helpers.cc b/helpers.cc new file mode 100644 index 0000000..c02957e --- /dev/null +++ b/helpers.cc @@ -0,0 +1,89 @@ +// Helper functions +// Copyright (C) 2024-2025 Nguyễn Gia Phong +// +// This file is part of taosc. +// +// Taosc is free software: you can redistribute it and/or modify +// it under the terms of the GNU Affero General Public License as published by +// the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. +// +// Taosc is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU Affero General Public License for more details. +// +// You should have received a copy of the GNU Affero General Public License +// along with taosc. If not, see <https://www.gnu.org/licenses/>. + +// Dyninst headers +#include <CFG.h> +#include <CodeObject.h> + +using Address = Dyninst::Address; +using Block = Dyninst::ParseAPI::Block; +using CodeObject = Dyninst::ParseAPI::CodeObject; +using CodeRegion = Dyninst::ParseAPI::CodeRegion; +using CodeSource = Dyninst::ParseAPI::SymtabCodeSource; + +#include <cassert> +#include <cstdlib> +#include <filesystem> +#include <iostream> +#include <set> +#include <string> +#include <utility> + +char const* +parse_args (int argc, char const* const* argv) +{ + if (argc == 2) + return argv[1]; + std::filesystem::path prog {argv[0]}; + std::cerr << "Usage: " << prog.filename ().string () << " EXECUTABLE\n"; + std::exit (1); +} + +void +die_for (Address address, std::string const& message) +{ + std::cerr << message << ' ' << std::hex << address << '\n'; + std::exit (1); +} + +/// Find next basic block's entry after given address, reparsing if necessary +static Block* +next_block (CodeObject& co, CodeRegion* region, Address address) +{ + auto* blk = co.findBlockByEntry (region, address); + if (blk != nullptr) + return blk; + co.parse (address, true); + blk = co.findBlockByEntry (region, address); + return (blk != nullptr) ? blk : co.findNextBlock (region, address); +} + +Block* +find_block (CodeSource& cs, CodeObject& co, Address address) +{ + if (!cs.isCode (address)) + die_for (address, "no instruction at"); + std::set <CodeRegion*> regions; + if (cs.findRegions (address, regions) != 1) + die_for (address, "not exactly 1 region found for instruction at"); + for (auto* region : regions) + { + std::set <Block*> blocks; + if (co.findBlocks (region, address, blocks) > 0) + for (auto* blk : blocks) // TODO: choose the best block + return blk; + auto* blk = next_block (co, region, region->low ()); + while (blk != nullptr && address > blk->last ()) + blk = next_block (co, region, blk->end ()); + if (blk == nullptr) + die_for (address, "no block found for instruction at"); + assert (address >= blk->start () && address < blk->end ()); + return blk; + } + std::unreachable (); +} diff --git a/helpers.hh b/helpers.hh new file mode 100644 index 0000000..0c015b1 --- /dev/null +++ b/helpers.hh @@ -0,0 +1,41 @@ +// Declarations of helpers +// Copyright (C) 2025 Nguyễn Gia Phong +// +// This file is part of taosc. +// +// Taosc is free software: you can redistribute it and/or modify +// it under the terms of the GNU Affero General Public License as published by +// the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. +// +// Taosc is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU Affero General Public License for more details. +// +// You should have received a copy of the GNU Affero General Public License +// along with taosc. If not, see <https://www.gnu.org/licenses/>. + +#ifndef TAOSC_HELPERS_HH +#define TAOSC_HELPERS_HH + +// Dyninst headers +#include <CFG.h> +#include <CodeObject.h> + +#include <string> + +/// Return path to executable if the arguments are valid, +/// otherwise print usage and exit +char const* parse_args (int, char const* const*); + +/// Print the message about the given address +/// then terminate the program with exit code 1 +void die_for (Dyninst::Address, std::string const&); + +/// Find block containing given address +Dyninst::ParseAPI::Block* find_block (Dyninst::ParseAPI::SymtabCodeSource&, + Dyninst::ParseAPI::CodeObject&, + Dyninst::Address); + +#endif // TAOSC_HELPERS_HH diff --git a/jump.c b/jump.c new file mode 100644 index 0000000..794d79d --- /dev/null +++ b/jump.c @@ -0,0 +1,50 @@ +/* + * TODO + * Copyright (C) 2024-2025 Nguyễn Gia Phong + * + * This file is part of taosc. + * + * Taosc is free software: you can redistribute it and/or modify + * it under the terms of the GNU Affero General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * Taosc is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Affero General Public License for more details. + * + * You should have received a copy of the GNU Affero General Public License + * along with taosc. If not, see <https://www.gnu.org/licenses/>. + */ + +#include "stdlib.c" + +static const void *destination; + +/* + * Get an environment variable and parse as a number. + * Return 0 on error. + */ +uint64_t getenvull(const char *name) +{ + const char *const s = getenv(name); + if (s == NULL) + return 0ULL; + errno = 0; + const uint64_t u = strtoull(s, NULL, 0); + if (errno) + return 0ULL; + return u; +} + +void init(int argc, const char *const *argv, char **envp) +{ + environ = envp; + destination = (void *) getenvull("TAOSC_DEST"); +} + +const void *dest(void) +{ + return destination; +} diff --git a/scout.cc b/scout.cc index 3ef6002..ae2f7c6 100644 --- a/scout.cc +++ b/scout.cc @@ -1,5 +1,5 @@ // Patch's jump destinations searcher -// Copyright (C) 2024-2025 Nguyễn Gia Phong +// Copyright (C) 2025 Nguyễn Gia Phong // // This file is part of taosc. // @@ -16,241 +16,48 @@ // You should have received a copy of the GNU Affero General Public License // along with taosc. If not, see <https://www.gnu.org/licenses/>. +#include "helpers.hh" + // Dyninst headers #include <CFG.h> #include <CodeObject.h> -#include <Graph.h> -#include <Instruction.h> -#include <InstructionDecoder.h> -#include <slicing.h> - -using AbstractRegion = Dyninst::AbsRegion; -using Address = Dyninst::Address; -using AssignmentConverter = Dyninst::AssignmentConverter; -using AssignmentPtr = Dyninst::Assignment::Ptr; -using Block = Dyninst::ParseAPI::Block; -using CodeObject = Dyninst::ParseAPI::CodeObject; -using CodeRegion = Dyninst::ParseAPI::CodeRegion; -using CodeSource = Dyninst::ParseAPI::SymtabCodeSource; -using Edge = Dyninst::Edge; -using EdgeIterator = Dyninst::EdgeIterator; -using Function = Dyninst::ParseAPI::Function; -using Graph = Dyninst::Graph; -using Instruction = Dyninst::InstructionAPI::Instruction; -using InstructionCategory = Dyninst::InstructionAPI::InsnCategory; -using InstructionDecoder = Dyninst::InstructionAPI::InstructionDecoder; -using NodeIterator = Dyninst::NodeIterator; -using SliceNode = Dyninst::SliceNode; -using Slicer = Dyninst::Slicer; -#include <cassert> -#include <filesystem> -#include <functional> #include <iostream> -#include <map> -#include <queue> - -/// Collect elements from given iterator into a vector -template <class Element, class Iterator> - std::vector <Element*> - range (auto iter) - { - Iterator begin, end; - std::vector <Element*> result; - for (iter (begin, end); begin != end; ++begin) - result.push_back (static_cast <Element*> ((*begin).get ())); - return result; - } - -class SlicerHelper - { - CodeSource& cs; - InstructionDecoder decoder; - Slicer::Predicates predicates; - AssignmentConverter ac {true, true}; - std::set <Address> seen; - std::vector <std::vector <Address>> bfs_slices; - - public: - SlicerHelper (CodeSource& cs) - : cs {cs}, - decoder {(const void*) nullptr, 1, cs.getArch ()}, - ac {true, true} // enable caching and stack analysis - { - this->predicates.setSearchForControlFlowDep(true); - } - - /// Decode instruction at given address - Instruction - decode (Address addr) - { - auto const& insn = this->decoder.decode ((const unsigned char*) - this->cs.getPtrToInstruction (addr)); - assert (insn.size () > 0); - return insn; - } - - /// Collect the interprocedure backward slice at addr in BFS order - void - slice (Instruction const& insn, Address addr, Function* fun, Block* blk) - { - std::vector <AssignmentPtr> assignments; - this->ac.convert (insn, addr, fun, blk, assignments); - if (assignments.empty ()) - return; - for (auto const& asgn : assignments) - { - this->bfs_slices.emplace_back (); - Slicer s {asgn, blk, fun}; - auto const& slice = s.backwardSlice (this->predicates); -#define ITER(i, E, I, x, f) \ - for (auto const& i : range <E, I> ([x] (auto& b, auto& e) { x->f (b, e); })) - ITER (node, SliceNode, NodeIterator, slice, exitNodes) - { - this->seen.insert (node->addr ()); - std::queue <SliceNode*> q; // breadth-first traversal - q.push (node); - while (!q.empty ()) - { - auto const& parent = q.front (); - q.pop (); - this->bfs_slices.back ().push_back (parent->addr ()); - ITER (edge, Edge, EdgeIterator, parent, ins) -#undef ITER - { - auto const& child = edge->source (); - if (this->seen.count (child->addr ()) > 0) - continue; - this->seen.insert (child->addr ()); - q.push (static_cast <SliceNode*> (child.get ())); - } - } - } - } - } - - /// Flatten stored BFS slices in round-robin order - std::vector <Address> const - slice_zip () - { - std::vector <Address> result; - size_t n = 0; - for (auto const& v : this->bfs_slices) - { - if (result.empty () || v[0] != result.back ()) - result.push_back (v[0]); - n = std::max (n, v.size ()); - } - for (size_t i = 1; i < n; ++i) - for (auto const& v : this->bfs_slices) - if (i < v.size ()) - result.push_back (v[i]); - return result; - } - }; - -/// Find next basic block's entry after given address, reparsing if necessary -Block* -next_block (CodeObject& co, CodeRegion* region, Address address) -{ - auto blk = co.findBlockByEntry (region, address); - if (blk != nullptr) - return blk; - co.parse (address, true); - blk = co.findBlockByEntry (region, address); - return (blk != nullptr) ? blk : co.findNextBlock (region, address); -} - -/// Find block containing given address -Block* -find_block (CodeSource& cs, CodeObject& co, Address target_addr) -{ - if (!cs.isCode (target_addr)) - { - std::cerr << std::hex << target_addr - << " does not point to an instruction\n"; - return nullptr; - } - std::set <CodeRegion*> regions; - if (cs.findRegions (target_addr, regions) != 1) - { - std::cerr << "expected 1 region containing instruction, found " - << regions.size () << '\n'; - return nullptr; - } - for (auto const& region : regions) - { - std::set <Block*> blocks; - if (co.findBlocks (region, target_addr, blocks) > 0) - for (auto const& blk : blocks) // TODO: choose the best block - return blk; - - auto* blk = next_block (co, region, region->low ()); - while (blk != nullptr && target_addr > blk->last ()) - blk = next_block (co, region, blk->end ()); - if (blk == nullptr) - return nullptr; - assert (target_addr >= blk->start () && target_addr < blk->end ()); - return blk; - } -#if defined(__cpp_lib_unreachable) && (__cpp_lib_unreachable >= 202202L) - std::unreachable (); -#else - __builtin_unreachable(); // GCC or Clang -#endif -} - -/// Slice backward from return instructions -std::vector <Address> const -returns_slice (CodeSource& cs, Function* fun, Address target_addr) -{ - SlicerHelper helper {cs}; - for (auto const& blk : fun->blocks ()) - for (auto [addr, step] = std::tuple {blk->start (), 0ul}; - addr < blk->end (); - addr += step) - { - auto const& insn = helper.decode (addr); - step = insn.size (); - if (insn.getCategory () == InstructionCategory::c_ReturnInsn) - helper.slice (insn, addr, fun, blk); - } - return helper.slice_zip (); -} +#include <set> +#include <vector> int main (int argc, char** argv) { - if (argc != 3) - { - std::cerr << "Usage: " << std::filesystem::path (argv[0]).filename () - << " binary instruction-address\n"; - return -1; - } - CodeSource cs {argv[1]}; - Address target_addr; - { - std::stringstream ss; - ss << std::hex << argv[2]; - ss >> target_addr; - } - CodeObject co {&cs}; + Dyninst::ParseAPI::SymtabCodeSource cs {parse_args (argc, argv)}; + Dyninst::ParseAPI::CodeObject co {&cs}; co.parse (); // parsed functions have same lifetime as co - auto const& block = find_block (cs, co, target_addr); - if (block == nullptr) - { - std::cerr << "block containing instruction not found\n"; - return -1; - } - std::vector<Function*> functions; - block->getFuncs (functions); - if (functions.size () < 1) - { - std::cerr << "found no function containing instruction\n"; - return -1; - } - for (auto* fun : functions) - for (auto const& addr : returns_slice (cs, fun, target_addr)) - std::cout << std::hex << addr << '\n'; - return 0; + while (!std::cin.eof ()) + { + Dyninst::Address address; + std::cin >> std::hex >> address; + if (std::cin.fail ()) + break; + std::cout << std::hex << address; + auto* block = find_block (cs, co, address); + if (block->containingFuncs () < 1) + die_for (address, "no function found containing instruction at"); + std::vector <Dyninst::ParseAPI::Function*> functions; + block->getFuncs (functions); + std::set <Dyninst::Address> seen; + for (auto* fun : functions) + for (auto const& return_block : fun->returnBlocks ()) + { + std::set <Dyninst::ParseAPI::Block*> post_dominates; + fun->getImmediatePostDominates (return_block, post_dominates); + for (auto* pd : post_dominates) + if (seen.insert (pd->start ()).second) + std::cout << ' ' << std::hex << pd->start (); + } + std::cout << '\n'; + } + if (std::cin.eof ()) + return 0; + std::cerr << "invalid input\n"; + return -1; } diff --git a/trace-call.cc b/trace-call.cc new file mode 100644 index 0000000..d154b4f --- /dev/null +++ b/trace-call.cc @@ -0,0 +1,75 @@ +// Utility for finding call instruction coresponding to return address +// Copyright (C) 2025 Nguyễn Gia Phong +// +// This file is part of taosc. +// +// Taosc is free software: you can redistribute it and/or modify +// it under the terms of the GNU Affero General Public License as published by +// the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. +// +// Taosc is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU Affero General Public License for more details. +// +// You should have received a copy of the GNU Affero General Public License +// along with taosc. If not, see <https://www.gnu.org/licenses/>. + +#include "helpers.hh" + +// Dyninst headers +#include <CFG.h> +#include <CodeObject.h> + +#include <cassert> +#include <iostream> +#include <vector> + +int +main (int argc, char** argv) +{ + Dyninst::ParseAPI::SymtabCodeSource cs {parse_args (argc, argv)}; + Dyninst::ParseAPI::CodeObject co {&cs}; + co.parse (); // parsed functions have same lifetime as co + while (!std::cin.eof ()) + { + Dyninst::Address return_address; + std::cin >> std::hex >> return_address; + if (std::cin.fail ()) + break; + auto* block = find_block (cs, co, return_address); + // Each function call creates an interprocedure edge, + // hence its basic block ends with the call site. + // The control flow then naturally goes to the next basic block + // starting with the return address. + if (block->start () != return_address) + die_for (return_address, "no block found with start address"); + if (block->containingFuncs () < 1) + die_for (return_address, "no function containing return address"); + std::vector <Dyninst::ParseAPI::Function*> functions; + block->getFuncs (functions); + Dyninst::Address call_address = 0; + for (auto* fun : functions) + for (auto* call_edge : fun->callEdges ()) + { + auto* call_block = call_edge->src (); + auto* return_block = fun->getImmediatePostDominator (call_block); + if (return_block == nullptr || *return_block != *block) + continue; + if (call_address != 0) + { + assert (call_block->last () == call_address); + continue; // break if not for the assertion + } + call_address = call_block->last (); + std::cout << std::hex << call_address << '\n'; + } + if (call_address == 0) + die_for (return_address, "no call found for return address"); + } + if (std::cin.eof ()) + return 0; + std::cerr << "invalid input\n"; + return -1; +} |
