summary refs log tree commit diff
path: root/taosc.cc
diff options
context:
space:
mode:
authorNguyễn Gia Phong <cnx@loang.net>2024-09-02 15:42:21 +0900
committerNguyễn Gia Phong <cnx@loang.net>2024-09-03 15:25:17 +0900
commit11ab3cc687bfeb64e1bb223e4e690fe423e6a15c (patch)
treecddd236eb21d1430af3dd130cc167102462b29b6 /taosc.cc
downloadtaosc-11ab3cc687bfeb64e1bb223e4e690fe423e6a15c.tar.gz
Draft slicer HEAD main
Diffstat (limited to 'taosc.cc')
-rw-r--r--taosc.cc279
1 files changed, 279 insertions, 0 deletions
diff --git a/taosc.cc b/taosc.cc
new file mode 100644
index 0000000..857fcf5
--- /dev/null
+++ b/taosc.cc
@@ -0,0 +1,279 @@
+// Slicing-based binary fault localizer and patch searcher
+// Copyright (C) 2024  Nguyễn Gia Phong
+//
+// This file is part of taosc.
+//
+// Taosc is free software: you can redistribute it and/or modify
+// it under the terms of the GNU Affero General Public License as published by
+// the Free Software Foundation, either version 3 of the License, or
+// (at your option) any later version.
+//
+// Taosc is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+// GNU Affero General Public License for more details.
+//
+// You should have received a copy of the GNU Affero General Public License
+// along with taosc.  If not, see <https://www.gnu.org/licenses/>.
+
+// Dyninst headers
+#include <CFG.h>
+#include <CodeObject.h>
+#include <Graph.h>
+#include <Instruction.h>
+#include <InstructionDecoder.h>
+#include <slicing.h>
+
+using AbstractRegion = Dyninst::AbsRegion;
+using Address = Dyninst::Address;
+using AssignmentConverter = Dyninst::AssignmentConverter;
+using AssignmentPtr = Dyninst::Assignment::Ptr;
+using Block = Dyninst::ParseAPI::Block;
+using CodeObject = Dyninst::ParseAPI::CodeObject;
+using CodeRegion = Dyninst::ParseAPI::CodeRegion;
+using CodeSource = Dyninst::ParseAPI::SymtabCodeSource;
+using Edge = Dyninst::Edge;
+using EdgeIterator = Dyninst::EdgeIterator;
+using Function = Dyninst::ParseAPI::Function;
+using Graph = Dyninst::Graph;
+using Instruction = Dyninst::InstructionAPI::Instruction;
+using InstructionDecoder = Dyninst::InstructionAPI::InstructionDecoder;
+using InstructionCategory = Dyninst::InstructionAPI::InsnCategory;
+using NodeIterator = Dyninst::NodeIterator;
+using SliceNode = Dyninst::SliceNode;
+using Slicer = Dyninst::Slicer;
+
+#include <cassert>
+#include <functional>
+#include <iostream>
+#include <map>
+#include <queue>
+
+class SearchingAll : public Slicer::Predicates
+  {
+  public:
+    std::vector <Function*>
+    followCallBackward (Block* caller,
+                        CallStack_t& call_stack,
+                        AbstractRegion argument) override
+    {
+      std::vector <Function*> callers;
+      caller->getFuncs (callers);
+      return callers;
+    }
+  };
+
+template <class Element, class Iterator>
+  std::vector <Element*>
+  sorted (auto iter)
+  {
+    Iterator begin, end;
+    std::vector <Element*> result;
+    for (iter (begin, end); begin != end; ++begin)
+      result.push_back (static_cast <Element*> ((*begin).get ()));
+    return result;
+  }
+
+template <class SlicerPredicates>
+  class SlicerHelper
+    {
+      CodeSource& cs;
+      InstructionDecoder decoder;
+      SlicerPredicates predicates;
+      AssignmentConverter ac {true, true};
+      std::set <Address> seen;
+      std::vector <std::vector <Address>> bfs_slices;
+
+    public:
+      SlicerHelper (CodeSource& cs)
+      : cs {cs},
+        decoder {(const void*) nullptr, 1, cs.getArch ()},
+        ac {true, true} // enable caching and stack analysis
+      {
+        this->predicates.setSearchForControlFlowDep(true);
+      }
+
+      Instruction
+      decode (Address addr)
+      {
+        auto const& insn = this->decoder.decode ((const unsigned char *)
+          this->cs.getPtrToInstruction (addr));
+        assert (insn.size () > 0);
+        return insn;
+      }
+
+#define ITER(i, E, I, x, f) \
+  for (auto const& i : sorted <E, I> ([x] (auto& b, auto& e) { x->f (b, e); }))
+
+      void
+      slice (Instruction const& insn, Address addr, Function* fun, Block* blk)
+      {
+        std::vector <AssignmentPtr> assignments;
+        this->ac.convert (insn, addr, fun, blk, assignments);
+        if (assignments.empty ())
+          return;
+        for (auto const& asgn : assignments)
+          {
+            this->bfs_slices.emplace_back ();
+            Slicer s {asgn, blk, fun};
+            auto const& slice = s.backwardSlice (this->predicates);
+            ITER (node, SliceNode, NodeIterator, slice, exitNodes)
+              {
+                this->seen.insert (node->addr ());
+                std::queue <SliceNode*> q; // breadth-first traversal
+                q.push (node);
+                while (!q.empty ())
+                  {
+                    auto const& parent = q.front ();
+                    q.pop ();
+                    this->bfs_slices.back ().push_back (parent->addr ());
+                    ITER (edge, Edge, EdgeIterator, parent, ins)
+                      {
+                        auto const& child = edge->source ();
+                        if (this->seen.count (child->addr ()) > 0)
+                          continue;
+                        this->seen.insert (child->addr ());
+                        q.push (static_cast <SliceNode*> (child.get ()));
+                      }
+                  }
+              }
+          }
+      }
+
+#undef ITER
+
+      std::vector <Address>
+      slice_zip ()
+      {
+        std::vector <Address> result;
+        size_t n = 0;
+        for (auto const& v : this->bfs_slices)
+          {
+            if (result.empty () || v[0] != result.back ())
+              result.push_back (v[0]);
+            n = std::max (n, v.size ());
+          }
+        for (size_t i = 1; i < n; ++i)
+          for (auto const& v : this->bfs_slices)
+            if (i < v.size ())
+              result.push_back (v[i]);
+        return result;
+      }
+    };
+
+Block*
+next_block (CodeObject& co, CodeRegion* region, Address address)
+{
+  auto blk = co.findBlockByEntry (region, address);
+  if (blk != nullptr)
+    return blk;
+  co.parse (address, true);
+  blk = co.findBlockByEntry (region, address);
+  return (blk != nullptr) ? blk : co.findNextBlock (region, address);
+}
+
+Block*
+find_block (CodeSource& cs, CodeObject& co, Address target_addr)
+{
+  if (!cs.isCode (target_addr))
+    {
+      std::cerr << std::hex << target_addr
+        << " does not point to an instruction\n";
+      return nullptr;
+    }
+  std::set <CodeRegion*> regions;
+  cs.findRegions (target_addr, regions);
+  if (regions.size () != 1)
+    {
+      std::cerr << "expected 1 region containing instruction, found "
+        << regions.size () << '\n';
+      return nullptr;
+    }
+  for (auto const& region : regions)
+    {
+      std::set <Block*> blocks;
+      co.findBlocks (region, target_addr, blocks);
+      if (blocks.size () == 1)
+        for (auto const& blk : blocks)
+          return blk;
+
+      auto blk = next_block (co, region, region->low ());
+      while (blk != nullptr && target_addr > blk->last ())
+        blk = next_block (co, region, blk->end ());
+      if (blk == nullptr)
+        return nullptr;
+      assert (target_addr >= blk->start () && target_addr < blk->end ());
+      return blk;
+    }
+  // std::unreachable();
+  return nullptr;
+}
+
+std::vector <Address> const
+patch_locations (CodeSource& cs,
+                 std::vector<Function*> const& functions,
+                 Block* block,
+                 Address target_addr)
+{
+  SlicerHelper <SearchingAll> helper {cs};
+  std::map <Address, std::string> locations;
+  for (auto const& fun : functions)
+    helper.slice (helper.decode (target_addr), target_addr, fun, block);
+  return helper.slice_zip ();
+}
+
+std::vector <Address> const
+return_values (CodeSource& cs,
+               std::vector<Function*> const& functions,
+               Block* block,
+               Address target_addr)
+{
+  SlicerHelper <Slicer::Predicates> helper {cs};
+  for (auto const& fun : functions)
+    for (auto const& blk : fun->blocks ())
+      for (auto [addr, step] = std::tuple {blk->start (), 0ul};
+           addr < blk->end ();
+           addr += step)
+        {
+          auto const& insn = helper.decode (addr);
+          step = insn.size ();
+          if (insn.getCategory () == InstructionCategory::c_ReturnInsn)
+            helper.slice (insn, addr, fun, blk);
+        }
+  return helper.slice_zip ();
+}
+
+int
+main (int argc, char** argv)
+{
+  if (argc != 3)
+    {
+      std::cerr << "Usage: " << argv[0]
+        << " <binary path> <instruction address>\n";
+      return -1;
+    }
+  CodeSource cs {argv[1]};
+  Address target_addr;
+  {
+    std::stringstream ss;
+    ss << std::hex << argv[2];
+    ss >> target_addr;
+  }
+  CodeObject co {&cs};
+  co.parse (); // parsed functions have same lifetime as co
+  auto const& block = find_block (cs, co, target_addr);
+  if (block == nullptr)
+    {
+      std::cerr << "block containing instruction not found\n";
+      return -1;
+    }
+  std::vector<Function*> functions;
+  block->getFuncs (functions);
+
+  for (auto const& addr : patch_locations (cs, functions, block, target_addr))
+    std::cout << std::hex << addr << '\n';
+  std::cout << "------\n";
+  for (auto const& addr : return_values (cs, functions, block, target_addr))
+    std::cout << std::hex << addr << '\n';
+  return 0;
+}