diff options
author | Nguyễn Gia Phong <cnx@loang.net> | 2024-09-02 15:42:21 +0900 |
---|---|---|
committer | Nguyễn Gia Phong <cnx@loang.net> | 2024-09-03 15:25:17 +0900 |
commit | 11ab3cc687bfeb64e1bb223e4e690fe423e6a15c (patch) | |
tree | cddd236eb21d1430af3dd130cc167102462b29b6 /taosc.cc | |
download | taosc-11ab3cc687bfeb64e1bb223e4e690fe423e6a15c.tar.gz |
Diffstat (limited to 'taosc.cc')
-rw-r--r-- | taosc.cc | 279 |
1 files changed, 279 insertions, 0 deletions
diff --git a/taosc.cc b/taosc.cc new file mode 100644 index 0000000..857fcf5 --- /dev/null +++ b/taosc.cc @@ -0,0 +1,279 @@ +// Slicing-based binary fault localizer and patch searcher +// Copyright (C) 2024 Nguyễn Gia Phong +// +// This file is part of taosc. +// +// Taosc is free software: you can redistribute it and/or modify +// it under the terms of the GNU Affero General Public License as published by +// the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. +// +// Taosc is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU Affero General Public License for more details. +// +// You should have received a copy of the GNU Affero General Public License +// along with taosc. If not, see <https://www.gnu.org/licenses/>. + +// Dyninst headers +#include <CFG.h> +#include <CodeObject.h> +#include <Graph.h> +#include <Instruction.h> +#include <InstructionDecoder.h> +#include <slicing.h> + +using AbstractRegion = Dyninst::AbsRegion; +using Address = Dyninst::Address; +using AssignmentConverter = Dyninst::AssignmentConverter; +using AssignmentPtr = Dyninst::Assignment::Ptr; +using Block = Dyninst::ParseAPI::Block; +using CodeObject = Dyninst::ParseAPI::CodeObject; +using CodeRegion = Dyninst::ParseAPI::CodeRegion; +using CodeSource = Dyninst::ParseAPI::SymtabCodeSource; +using Edge = Dyninst::Edge; +using EdgeIterator = Dyninst::EdgeIterator; +using Function = Dyninst::ParseAPI::Function; +using Graph = Dyninst::Graph; +using Instruction = Dyninst::InstructionAPI::Instruction; +using InstructionDecoder = Dyninst::InstructionAPI::InstructionDecoder; +using InstructionCategory = Dyninst::InstructionAPI::InsnCategory; +using NodeIterator = Dyninst::NodeIterator; +using SliceNode = Dyninst::SliceNode; +using Slicer = Dyninst::Slicer; + +#include <cassert> +#include <functional> +#include <iostream> +#include <map> +#include <queue> + +class SearchingAll : public Slicer::Predicates + { + public: + std::vector <Function*> + followCallBackward (Block* caller, + CallStack_t& call_stack, + AbstractRegion argument) override + { + std::vector <Function*> callers; + caller->getFuncs (callers); + return callers; + } + }; + +template <class Element, class Iterator> + std::vector <Element*> + sorted (auto iter) + { + Iterator begin, end; + std::vector <Element*> result; + for (iter (begin, end); begin != end; ++begin) + result.push_back (static_cast <Element*> ((*begin).get ())); + return result; + } + +template <class SlicerPredicates> + class SlicerHelper + { + CodeSource& cs; + InstructionDecoder decoder; + SlicerPredicates predicates; + AssignmentConverter ac {true, true}; + std::set <Address> seen; + std::vector <std::vector <Address>> bfs_slices; + + public: + SlicerHelper (CodeSource& cs) + : cs {cs}, + decoder {(const void*) nullptr, 1, cs.getArch ()}, + ac {true, true} // enable caching and stack analysis + { + this->predicates.setSearchForControlFlowDep(true); + } + + Instruction + decode (Address addr) + { + auto const& insn = this->decoder.decode ((const unsigned char *) + this->cs.getPtrToInstruction (addr)); + assert (insn.size () > 0); + return insn; + } + +#define ITER(i, E, I, x, f) \ + for (auto const& i : sorted <E, I> ([x] (auto& b, auto& e) { x->f (b, e); })) + + void + slice (Instruction const& insn, Address addr, Function* fun, Block* blk) + { + std::vector <AssignmentPtr> assignments; + this->ac.convert (insn, addr, fun, blk, assignments); + if (assignments.empty ()) + return; + for (auto const& asgn : assignments) + { + this->bfs_slices.emplace_back (); + Slicer s {asgn, blk, fun}; + auto const& slice = s.backwardSlice (this->predicates); + ITER (node, SliceNode, NodeIterator, slice, exitNodes) + { + this->seen.insert (node->addr ()); + std::queue <SliceNode*> q; // breadth-first traversal + q.push (node); + while (!q.empty ()) + { + auto const& parent = q.front (); + q.pop (); + this->bfs_slices.back ().push_back (parent->addr ()); + ITER (edge, Edge, EdgeIterator, parent, ins) + { + auto const& child = edge->source (); + if (this->seen.count (child->addr ()) > 0) + continue; + this->seen.insert (child->addr ()); + q.push (static_cast <SliceNode*> (child.get ())); + } + } + } + } + } + +#undef ITER + + std::vector <Address> + slice_zip () + { + std::vector <Address> result; + size_t n = 0; + for (auto const& v : this->bfs_slices) + { + if (result.empty () || v[0] != result.back ()) + result.push_back (v[0]); + n = std::max (n, v.size ()); + } + for (size_t i = 1; i < n; ++i) + for (auto const& v : this->bfs_slices) + if (i < v.size ()) + result.push_back (v[i]); + return result; + } + }; + +Block* +next_block (CodeObject& co, CodeRegion* region, Address address) +{ + auto blk = co.findBlockByEntry (region, address); + if (blk != nullptr) + return blk; + co.parse (address, true); + blk = co.findBlockByEntry (region, address); + return (blk != nullptr) ? blk : co.findNextBlock (region, address); +} + +Block* +find_block (CodeSource& cs, CodeObject& co, Address target_addr) +{ + if (!cs.isCode (target_addr)) + { + std::cerr << std::hex << target_addr + << " does not point to an instruction\n"; + return nullptr; + } + std::set <CodeRegion*> regions; + cs.findRegions (target_addr, regions); + if (regions.size () != 1) + { + std::cerr << "expected 1 region containing instruction, found " + << regions.size () << '\n'; + return nullptr; + } + for (auto const& region : regions) + { + std::set <Block*> blocks; + co.findBlocks (region, target_addr, blocks); + if (blocks.size () == 1) + for (auto const& blk : blocks) + return blk; + + auto blk = next_block (co, region, region->low ()); + while (blk != nullptr && target_addr > blk->last ()) + blk = next_block (co, region, blk->end ()); + if (blk == nullptr) + return nullptr; + assert (target_addr >= blk->start () && target_addr < blk->end ()); + return blk; + } + // std::unreachable(); + return nullptr; +} + +std::vector <Address> const +patch_locations (CodeSource& cs, + std::vector<Function*> const& functions, + Block* block, + Address target_addr) +{ + SlicerHelper <SearchingAll> helper {cs}; + std::map <Address, std::string> locations; + for (auto const& fun : functions) + helper.slice (helper.decode (target_addr), target_addr, fun, block); + return helper.slice_zip (); +} + +std::vector <Address> const +return_values (CodeSource& cs, + std::vector<Function*> const& functions, + Block* block, + Address target_addr) +{ + SlicerHelper <Slicer::Predicates> helper {cs}; + for (auto const& fun : functions) + for (auto const& blk : fun->blocks ()) + for (auto [addr, step] = std::tuple {blk->start (), 0ul}; + addr < blk->end (); + addr += step) + { + auto const& insn = helper.decode (addr); + step = insn.size (); + if (insn.getCategory () == InstructionCategory::c_ReturnInsn) + helper.slice (insn, addr, fun, blk); + } + return helper.slice_zip (); +} + +int +main (int argc, char** argv) +{ + if (argc != 3) + { + std::cerr << "Usage: " << argv[0] + << " <binary path> <instruction address>\n"; + return -1; + } + CodeSource cs {argv[1]}; + Address target_addr; + { + std::stringstream ss; + ss << std::hex << argv[2]; + ss >> target_addr; + } + CodeObject co {&cs}; + co.parse (); // parsed functions have same lifetime as co + auto const& block = find_block (cs, co, target_addr); + if (block == nullptr) + { + std::cerr << "block containing instruction not found\n"; + return -1; + } + std::vector<Function*> functions; + block->getFuncs (functions); + + for (auto const& addr : patch_locations (cs, functions, block, target_addr)) + std::cout << std::hex << addr << '\n'; + std::cout << "------\n"; + for (auto const& addr : return_values (cs, functions, block, target_addr)) + std::cout << std::hex << addr << '\n'; + return 0; +} |