about summary refs log tree commit diff
path: root/scout.cc
diff options
context:
space:
mode:
Diffstat (limited to 'scout.cc')
-rw-r--r--scout.cc263
1 files changed, 35 insertions, 228 deletions
diff --git a/scout.cc b/scout.cc
index 3ef6002..ae2f7c6 100644
--- a/scout.cc
+++ b/scout.cc
@@ -1,5 +1,5 @@
 // Patch's jump destinations searcher
-// Copyright (C) 2024-2025  Nguyễn Gia Phong
+// Copyright (C) 2025  Nguyễn Gia Phong
 //
 // This file is part of taosc.
 //
@@ -16,241 +16,48 @@
 // You should have received a copy of the GNU Affero General Public License
 // along with taosc.  If not, see <https://www.gnu.org/licenses/>.
 
+#include "helpers.hh"
+
 // Dyninst headers
 #include <CFG.h>
 #include <CodeObject.h>
-#include <Graph.h>
-#include <Instruction.h>
-#include <InstructionDecoder.h>
-#include <slicing.h>
-
-using AbstractRegion = Dyninst::AbsRegion;
-using Address = Dyninst::Address;
-using AssignmentConverter = Dyninst::AssignmentConverter;
-using AssignmentPtr = Dyninst::Assignment::Ptr;
-using Block = Dyninst::ParseAPI::Block;
-using CodeObject = Dyninst::ParseAPI::CodeObject;
-using CodeRegion = Dyninst::ParseAPI::CodeRegion;
-using CodeSource = Dyninst::ParseAPI::SymtabCodeSource;
-using Edge = Dyninst::Edge;
-using EdgeIterator = Dyninst::EdgeIterator;
-using Function = Dyninst::ParseAPI::Function;
-using Graph = Dyninst::Graph;
-using Instruction = Dyninst::InstructionAPI::Instruction;
-using InstructionCategory = Dyninst::InstructionAPI::InsnCategory;
-using InstructionDecoder = Dyninst::InstructionAPI::InstructionDecoder;
-using NodeIterator = Dyninst::NodeIterator;
-using SliceNode = Dyninst::SliceNode;
-using Slicer = Dyninst::Slicer;
 
-#include <cassert>
-#include <filesystem>
-#include <functional>
 #include <iostream>
-#include <map>
-#include <queue>
-
-/// Collect elements from given iterator into a vector
-template <class Element, class Iterator>
-  std::vector <Element*>
-  range (auto iter)
-  {
-    Iterator begin, end;
-    std::vector <Element*> result;
-    for (iter (begin, end); begin != end; ++begin)
-      result.push_back (static_cast <Element*> ((*begin).get ()));
-    return result;
-  }
-
-class SlicerHelper
-  {
-    CodeSource& cs;
-    InstructionDecoder decoder;
-    Slicer::Predicates predicates;
-    AssignmentConverter ac {true, true};
-    std::set <Address> seen;
-    std::vector <std::vector <Address>> bfs_slices;
-
-  public:
-    SlicerHelper (CodeSource& cs)
-    : cs {cs},
-      decoder {(const void*) nullptr, 1, cs.getArch ()},
-      ac {true, true} // enable caching and stack analysis
-    {
-      this->predicates.setSearchForControlFlowDep(true);
-    }
-
-    /// Decode instruction at given address
-    Instruction
-    decode (Address addr)
-    {
-      auto const& insn = this->decoder.decode ((const unsigned char*)
-        this->cs.getPtrToInstruction (addr));
-      assert (insn.size () > 0);
-      return insn;
-    }
-
-    /// Collect the interprocedure backward slice at addr in BFS order
-    void
-    slice (Instruction const& insn, Address addr, Function* fun, Block* blk)
-    {
-      std::vector <AssignmentPtr> assignments;
-      this->ac.convert (insn, addr, fun, blk, assignments);
-      if (assignments.empty ())
-        return;
-      for (auto const& asgn : assignments)
-        {
-          this->bfs_slices.emplace_back ();
-          Slicer s {asgn, blk, fun};
-          auto const& slice = s.backwardSlice (this->predicates);
-#define ITER(i, E, I, x, f) \
-  for (auto const& i : range <E, I> ([x] (auto& b, auto& e) { x->f (b, e); }))
-          ITER (node, SliceNode, NodeIterator, slice, exitNodes)
-            {
-              this->seen.insert (node->addr ());
-              std::queue <SliceNode*> q; // breadth-first traversal
-              q.push (node);
-              while (!q.empty ())
-                {
-                  auto const& parent = q.front ();
-                  q.pop ();
-                  this->bfs_slices.back ().push_back (parent->addr ());
-                  ITER (edge, Edge, EdgeIterator, parent, ins)
-#undef ITER
-                    {
-                      auto const& child = edge->source ();
-                      if (this->seen.count (child->addr ()) > 0)
-                        continue;
-                      this->seen.insert (child->addr ());
-                      q.push (static_cast <SliceNode*> (child.get ()));
-                    }
-                }
-            }
-        }
-    }
-
-    /// Flatten stored BFS slices in round-robin order
-    std::vector <Address> const
-    slice_zip ()
-    {
-      std::vector <Address> result;
-      size_t n = 0;
-      for (auto const& v : this->bfs_slices)
-        {
-          if (result.empty () || v[0] != result.back ())
-            result.push_back (v[0]);
-          n = std::max (n, v.size ());
-        }
-      for (size_t i = 1; i < n; ++i)
-        for (auto const& v : this->bfs_slices)
-          if (i < v.size ())
-            result.push_back (v[i]);
-      return result;
-    }
-  };
-
-/// Find next basic block's entry after given address, reparsing if necessary
-Block*
-next_block (CodeObject& co, CodeRegion* region, Address address)
-{
-  auto blk = co.findBlockByEntry (region, address);
-  if (blk != nullptr)
-    return blk;
-  co.parse (address, true);
-  blk = co.findBlockByEntry (region, address);
-  return (blk != nullptr) ? blk : co.findNextBlock (region, address);
-}
-
-/// Find block containing given address
-Block*
-find_block (CodeSource& cs, CodeObject& co, Address target_addr)
-{
-  if (!cs.isCode (target_addr))
-    {
-      std::cerr << std::hex << target_addr
-        << " does not point to an instruction\n";
-      return nullptr;
-    }
-  std::set <CodeRegion*> regions;
-  if (cs.findRegions (target_addr, regions) != 1)
-    {
-      std::cerr << "expected 1 region containing instruction, found "
-        << regions.size () << '\n';
-      return nullptr;
-    }
-  for (auto const& region : regions)
-    {
-      std::set <Block*> blocks;
-      if (co.findBlocks (region, target_addr, blocks) > 0)
-        for (auto const& blk : blocks) // TODO: choose the best block
-          return blk;
-
-      auto* blk = next_block (co, region, region->low ());
-      while (blk != nullptr && target_addr > blk->last ())
-        blk = next_block (co, region, blk->end ());
-      if (blk == nullptr)
-        return nullptr;
-      assert (target_addr >= blk->start () && target_addr < blk->end ());
-      return blk;
-    }
-#if defined(__cpp_lib_unreachable) && (__cpp_lib_unreachable >= 202202L)
-  std::unreachable ();
-#else
-  __builtin_unreachable(); // GCC or Clang
-#endif
-}
-
-/// Slice backward from return instructions
-std::vector <Address> const
-returns_slice (CodeSource& cs, Function* fun, Address target_addr)
-{
-  SlicerHelper helper {cs};
-  for (auto const& blk : fun->blocks ())
-    for (auto [addr, step] = std::tuple {blk->start (), 0ul};
-         addr < blk->end ();
-         addr += step)
-      {
-        auto const& insn = helper.decode (addr);
-        step = insn.size ();
-        if (insn.getCategory () == InstructionCategory::c_ReturnInsn)
-          helper.slice (insn, addr, fun, blk);
-      }
-  return helper.slice_zip ();
-}
+#include <set>
+#include <vector>
 
 int
 main (int argc, char** argv)
 {
-  if (argc != 3)
-    {
-      std::cerr << "Usage: " << std::filesystem::path (argv[0]).filename ()
-        << " binary instruction-address\n";
-      return -1;
-    }
-  CodeSource cs {argv[1]};
-  Address target_addr;
-  {
-    std::stringstream ss;
-    ss << std::hex << argv[2];
-    ss >> target_addr;
-  }
-  CodeObject co {&cs};
+  Dyninst::ParseAPI::SymtabCodeSource cs {parse_args (argc, argv)};
+  Dyninst::ParseAPI::CodeObject co {&cs};
   co.parse (); // parsed functions have same lifetime as co
-  auto const& block = find_block (cs, co, target_addr);
-  if (block == nullptr)
-    {
-      std::cerr << "block containing instruction not found\n";
-      return -1;
-    }
-  std::vector<Function*> functions;
-  block->getFuncs (functions);
-  if (functions.size () < 1)
-    {
-      std::cerr << "found no function containing instruction\n";
-      return -1;
-    }
-  for (auto* fun : functions)
-    for (auto const& addr : returns_slice (cs, fun, target_addr))
-      std::cout << std::hex << addr << '\n';
-  return 0;
+  while (!std::cin.eof ())
+    {
+      Dyninst::Address address;
+      std::cin >> std::hex >> address;
+      if (std::cin.fail ())
+        break;
+      std::cout << std::hex << address;
+      auto* block = find_block (cs, co, address);
+      if (block->containingFuncs () < 1)
+        die_for (address, "no function found containing instruction at");
+      std::vector <Dyninst::ParseAPI::Function*> functions;
+      block->getFuncs (functions);
+      std::set <Dyninst::Address> seen;
+      for (auto* fun : functions)
+        for (auto const& return_block : fun->returnBlocks ())
+          {
+            std::set <Dyninst::ParseAPI::Block*> post_dominates;
+            fun->getImmediatePostDominates (return_block, post_dominates);
+            for (auto* pd : post_dominates)
+              if (seen.insert (pd->start ()).second)
+                std::cout << ' ' << std::hex << pd->start ();
+          }
+      std::cout << '\n';
+    }
+  if (std::cin.eof ())
+    return 0;
+  std::cerr << "invalid input\n";
+  return -1;
 }