diff options
author | Cristian Cadar <c.cadar@imperial.ac.uk> | 2017-07-20 14:16:39 +0100 |
---|---|---|
committer | GitHub <noreply@github.com> | 2017-07-20 14:16:39 +0100 |
commit | 051e44e59cc5260324667d3fd8a8d46d54c3e72e (patch) | |
tree | 34fdea2badf338c2f6d78185cc5e03cea19cab9b /lib | |
parent | 99a1e3a25cd1405a15112a85de1ff5fc714e7be1 (diff) | |
parent | ffe695c29915cf8605b2fb807cd083cdcc769d47 (diff) | |
download | klee-051e44e59cc5260324667d3fd8a8d46d54c3e72e.tar.gz |
Merge pull request #657 from delcypher/vectorized_instructions
Implement basic support for vectorized instructions.
Diffstat (limited to 'lib')
-rw-r--r-- | lib/Core/Executor.cpp | 109 | ||||
-rw-r--r-- | lib/Core/Executor.h | 1 | ||||
-rw-r--r-- | lib/Module/CMakeLists.txt | 2 | ||||
-rw-r--r-- | lib/Module/InstructionOperandTypeCheckPass.cpp | 184 | ||||
-rw-r--r-- | lib/Module/KModule.cpp | 19 | ||||
-rw-r--r-- | lib/Module/Passes.h | 26 | ||||
-rw-r--r-- | lib/Module/Scalarizer.cpp | 651 |
7 files changed, 980 insertions, 12 deletions
diff --git a/lib/Core/Executor.cpp b/lib/Core/Executor.cpp index 60ed1bd9..ff842fd1 100644 --- a/lib/Core/Executor.cpp +++ b/lib/Core/Executor.cpp @@ -279,6 +279,7 @@ namespace { cl::values( clEnumValN(Executor::Abort, "Abort", "The program crashed"), clEnumValN(Executor::Assert, "Assert", "An assertion was hit"), + clEnumValN(Executor::BadVectorAccess, "BadVectorAccess", "Vector accessed out of bounds"), clEnumValN(Executor::Exec, "Exec", "Trying to execute an unexpected instruction"), clEnumValN(Executor::External, "External", "External objects referenced"), clEnumValN(Executor::Free, "Free", "Freeing invalid memory"), @@ -333,6 +334,7 @@ namespace klee { const char *Executor::TerminateReasonNames[] = { [ Abort ] = "abort", [ Assert ] = "assert", + [ BadVectorAccess ] = "bad_vector_access", [ Exec ] = "exec", [ External ] = "external", [ Free ] = "free", @@ -1101,8 +1103,18 @@ ref<klee::ConstantExpr> Executor::evalConstant(const Constant *c) { } ref<Expr> res = ConcatExpr::createN(kids.size(), kids.data()); return cast<ConstantExpr>(res); + } else if (const ConstantVector *cv = dyn_cast<ConstantVector>(c)) { + llvm::SmallVector<ref<Expr>, 8> kids; + const size_t numOperands = cv->getNumOperands(); + kids.reserve(numOperands); + for (unsigned i = 0; i < numOperands; ++i) { + kids.push_back(evalConstant(cv->getOperand(i))); + } + ref<Expr> res = ConcatExpr::createN(numOperands, kids.data()); + assert(isa<ConstantExpr>(res) && + "result of constant vector built is not a constant"); + return cast<ConstantExpr>(res); } else { - // Constant{Vector} llvm::report_fatal_error("invalid argument to evalConstant()"); } } @@ -1909,6 +1921,7 @@ void Executor::executeInstruction(ExecutionState &state, KInstruction *ki) { // Special instructions case Instruction::Select: { + // NOTE: It is not required that operands 1 and 2 be of scalar type. ref<Expr> cond = eval(ki, 0, state).value; ref<Expr> tExpr = eval(ki, 1, state).value; ref<Expr> fExpr = eval(ki, 2, state).value; @@ -1967,7 +1980,7 @@ void Executor::executeInstruction(ExecutionState &state, KInstruction *ki) { bindLocal(ki, state, result); break; } - + case Instruction::SRem: { ref<Expr> left = eval(ki, 0, state).value; ref<Expr> right = eval(ki, 1, state).value; @@ -2029,7 +2042,7 @@ void Executor::executeInstruction(ExecutionState &state, KInstruction *ki) { case Instruction::ICmp: { CmpInst *ci = cast<CmpInst>(i); ICmpInst *ii = cast<ICmpInst>(ci); - + switch(ii->getPredicate()) { case ICmpInst::ICMP_EQ: { ref<Expr> left = eval(ki, 0, state).value; @@ -2194,7 +2207,7 @@ void Executor::executeInstruction(ExecutionState &state, KInstruction *ki) { ref<Expr> arg = eval(ki, 0, state).value; bindLocal(ki, state, ZExtExpr::create(arg, pType)); break; - } + } case Instruction::PtrToInt: { CastInst *ci = cast<CastInst>(i); Expr::Width iType = getWidthForLLVMType(ci->getType()); @@ -2249,7 +2262,7 @@ void Executor::executeInstruction(ExecutionState &state, KInstruction *ki) { bindLocal(ki, state, ConstantExpr::alloc(Res.bitcastToAPInt())); break; } - + case Instruction::FMul: { ref<ConstantExpr> left = toConstant(state, eval(ki, 0, state).value, "floating point"); @@ -2565,16 +2578,88 @@ void Executor::executeInstruction(ExecutionState &state, KInstruction *ki) { break; } #endif + case Instruction::InsertElement: { + InsertElementInst *iei = cast<InsertElementInst>(i); + ref<Expr> vec = eval(ki, 0, state).value; + ref<Expr> newElt = eval(ki, 1, state).value; + ref<Expr> idx = eval(ki, 2, state).value; + + ConstantExpr *cIdx = dyn_cast<ConstantExpr>(idx); + if (cIdx == NULL) { + terminateStateOnError( + state, "InsertElement, support for symbolic index not implemented", + Unhandled); + return; + } + uint64_t iIdx = cIdx->getZExtValue(); + const llvm::VectorType *vt = iei->getType(); + unsigned EltBits = getWidthForLLVMType(vt->getElementType()); - // Other instructions... - // Unhandled - case Instruction::ExtractElement: - case Instruction::InsertElement: + if (iIdx >= vt->getNumElements()) { + // Out of bounds write + terminateStateOnError(state, "Out of bounds write when inserting element", + BadVectorAccess); + return; + } + + const unsigned elementCount = vt->getNumElements(); + llvm::SmallVector<ref<Expr>, 8> elems; + elems.reserve(elementCount); + for (unsigned i = 0; i < elementCount; ++i) { + // evalConstant() will use ConcatExpr to build vectors with the + // zero-th element leftmost (most significant bits), followed + // by the next element (second leftmost) and so on. This means + // that we have to adjust the index so we read left to right + // rather than right to left. + unsigned bitOffset = EltBits * (elementCount - i - 1); + elems.push_back(i == iIdx ? newElt + : ExtractExpr::create(vec, bitOffset, EltBits)); + } + + ref<Expr> Result = ConcatExpr::createN(elementCount, elems.data()); + bindLocal(ki, state, Result); + break; + } + case Instruction::ExtractElement: { + ExtractElementInst *eei = cast<ExtractElementInst>(i); + ref<Expr> vec = eval(ki, 0, state).value; + ref<Expr> idx = eval(ki, 1, state).value; + + ConstantExpr *cIdx = dyn_cast<ConstantExpr>(idx); + if (cIdx == NULL) { + terminateStateOnError( + state, "ExtractElement, support for symbolic index not implemented", + Unhandled); + return; + } + uint64_t iIdx = cIdx->getZExtValue(); + const llvm::VectorType *vt = eei->getVectorOperandType(); + unsigned EltBits = getWidthForLLVMType(vt->getElementType()); + + if (iIdx >= vt->getNumElements()) { + // Out of bounds read + terminateStateOnError(state, "Out of bounds read when extracting element", + BadVectorAccess); + return; + } + + // evalConstant() will use ConcatExpr to build vectors with the + // zero-th element left most (most significant bits), followed + // by the next element (second left most) and so on. This means + // that we have to adjust the index so we read left to right + // rather than right to left. + unsigned bitOffset = EltBits*(vt->getNumElements() - iIdx -1); + ref<Expr> Result = ExtractExpr::create(vec, bitOffset, EltBits); + bindLocal(ki, state, Result); + break; + } case Instruction::ShuffleVector: - terminateStateOnError(state, "XXX vector instructions unhandled", - Unhandled); + // Should never happen due to Scalarizer pass removing ShuffleVector + // instructions. + terminateStateOnExecError(state, "Unexpected ShuffleVector instruction"); break; - + // Other instructions... + // Unhandled default: terminateStateOnExecError(state, "illegal instruction"); break; diff --git a/lib/Core/Executor.h b/lib/Core/Executor.h index c3f6e705..b3bb6864 100644 --- a/lib/Core/Executor.h +++ b/lib/Core/Executor.h @@ -105,6 +105,7 @@ public: enum TerminateReason { Abort, Assert, + BadVectorAccess, Exec, External, Free, diff --git a/lib/Module/CMakeLists.txt b/lib/Module/CMakeLists.txt index 006443a9..c1a5d809 100644 --- a/lib/Module/CMakeLists.txt +++ b/lib/Module/CMakeLists.txt @@ -9,6 +9,7 @@ klee_add_component(kleeModule Checks.cpp InstructionInfoTable.cpp + InstructionOperandTypeCheckPass.cpp IntrinsicCleaner.cpp KInstruction.cpp KModule.cpp @@ -17,6 +18,7 @@ klee_add_component(kleeModule Optimize.cpp PhiCleaner.cpp RaiseAsm.cpp + Scalarizer.cpp ) set(LLVM_COMPONENTS diff --git a/lib/Module/InstructionOperandTypeCheckPass.cpp b/lib/Module/InstructionOperandTypeCheckPass.cpp new file mode 100644 index 00000000..449eea48 --- /dev/null +++ b/lib/Module/InstructionOperandTypeCheckPass.cpp @@ -0,0 +1,184 @@ +//===-- InstructionOperandTypeCheckPass.cpp ---------------------*- C++ -*-===// +// +// The KLEE Symbolic Virtual Machine +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +#include "Passes.h" +#include "klee/Config/Version.h" +#include "klee/Internal/Support/ErrorHandling.h" +#include "llvm/Support/raw_ostream.h" + +using namespace llvm; + +namespace { + +void printOperandWarning(const char *expected, const Instruction *i, + LLVM_TYPE_Q Type *ty, unsigned opNum) { + std::string msg; + llvm::raw_string_ostream ss(msg); + ss << "Found unexpected type (" << *ty << ") at operand " << opNum + << ". Expected " << expected << " in " << *i; + i->print(ss); + ss.flush(); + klee::klee_warning("%s", msg.c_str()); +} + +bool checkOperandTypeIsScalarInt(const Instruction *i, unsigned opNum) { + assert(opNum < i->getNumOperands()); + LLVM_TYPE_Q llvm::Type *ty = i->getOperand(opNum)->getType(); + if (!(ty->isIntegerTy())) { + printOperandWarning("scalar integer", i, ty, opNum); + return false; + } + return true; +} + +bool checkOperandTypeIsScalarIntOrPointer(const Instruction *i, + unsigned opNum) { + assert(opNum < i->getNumOperands()); + LLVM_TYPE_Q llvm::Type *ty = i->getOperand(opNum)->getType(); + if (!(ty->isIntegerTy() || ty->isPointerTy())) { + printOperandWarning("scalar integer or pointer", i, ty, opNum); + return false; + } + return true; +} + +bool checkOperandTypeIsScalarPointer(const Instruction *i, unsigned opNum) { + assert(opNum < i->getNumOperands()); + LLVM_TYPE_Q llvm::Type *ty = i->getOperand(opNum)->getType(); + if (!(ty->isPointerTy())) { + printOperandWarning("scalar pointer", i, ty, opNum); + return false; + } + return true; +} + +bool checkOperandTypeIsScalarFloat(const Instruction *i, unsigned opNum) { + assert(opNum < i->getNumOperands()); + LLVM_TYPE_Q llvm::Type *ty = i->getOperand(opNum)->getType(); + if (!(ty->isFloatingPointTy())) { + printOperandWarning("scalar float", i, ty, opNum); + return false; + } + return true; +} + +bool checkOperandsHaveSameType(const Instruction *i, unsigned opNum0, + unsigned opNum1) { + assert(opNum0 < i->getNumOperands()); + assert(opNum1 < i->getNumOperands()); + LLVM_TYPE_Q llvm::Type *ty0 = i->getOperand(opNum0)->getType(); + LLVM_TYPE_Q llvm::Type *ty1 = i->getOperand(opNum1)->getType(); + if (!(ty0 == ty1)) { + std::string msg; + llvm::raw_string_ostream ss(msg); + ss << "Found mismatched type (" << *ty0 << " != " << *ty1 + << ") for operands" << opNum0 << " and " << opNum1 + << ". Expected operand types to match in " << *i; + i->print(ss); + ss.flush(); + klee::klee_warning("%s", msg.c_str()); + return false; + } + return true; +} + +bool checkInstruction(const Instruction *i) { + switch (i->getOpcode()) { + case Instruction::Select: { + // Note we do not enforce that operand 1 and 2 are scalar because the + // scalarizer pass might not remove these. This could be selecting which + // vector operand to feed to another instruction. The Executor can handle + // this so case so this is not a problem + return checkOperandTypeIsScalarInt(i, 0) & + checkOperandsHaveSameType(i, 1, 2); + } + // Integer arithmetic, logical and shifting + // TODO: When we upgrade to newer LLVM use LLVM_FALLTHROUGH + case Instruction::Add: + case Instruction::Sub: + case Instruction::Mul: + case Instruction::UDiv: + case Instruction::SDiv: + case Instruction::URem: + case Instruction::SRem: + case Instruction::And: + case Instruction::Or: + case Instruction::Xor: + case Instruction::Shl: + case Instruction::LShr: + case Instruction::AShr: { + return checkOperandTypeIsScalarInt(i, 0) & + checkOperandTypeIsScalarInt(i, 1); + } + // Integer comparison + case Instruction::ICmp: { + return checkOperandTypeIsScalarIntOrPointer(i, 0) & + checkOperandTypeIsScalarIntOrPointer(i, 1); + } + // Integer Conversion + case Instruction::Trunc: + case Instruction::ZExt: + case Instruction::SExt: + case Instruction::IntToPtr: { + return checkOperandTypeIsScalarInt(i, 0); + } + case Instruction::PtrToInt: { + return checkOperandTypeIsScalarPointer(i, 0); + } + // TODO: Figure out if Instruction::BitCast needs checking + // Floating point arithmetic + case Instruction::FAdd: + case Instruction::FSub: + case Instruction::FMul: + case Instruction::FDiv: + case Instruction::FRem: { + return checkOperandTypeIsScalarFloat(i, 0) & + checkOperandTypeIsScalarFloat(i, 1); + } + // Floating point conversion + case Instruction::FPTrunc: + case Instruction::FPExt: + case Instruction::FPToUI: + case Instruction::FPToSI: { + return checkOperandTypeIsScalarFloat(i, 0); + } + case Instruction::UIToFP: + case Instruction::SIToFP: { + return checkOperandTypeIsScalarInt(i, 0); + } + // Floating point comparison + case Instruction::FCmp: { + return checkOperandTypeIsScalarFloat(i, 0) & + checkOperandTypeIsScalarFloat(i, 1); + } + default: + // Treat all other instructions as conforming + return true; + } +} +} + +namespace klee { + +char InstructionOperandTypeCheckPass::ID = 0; + +bool InstructionOperandTypeCheckPass::runOnModule(Module &M) { + instructionOperandsConform = true; + for (Module::iterator fi = M.begin(), fe = M.end(); fi != fe; ++fi) { + for (Function::iterator bi = fi->begin(), be = fi->end(); bi != be; ++bi) { + for (BasicBlock::iterator ii = bi->begin(), ie = bi->end(); ii != ie; + ++ii) { + Instruction *i = static_cast<Instruction *>(ii); + instructionOperandsConform &= checkInstruction(i); + } + } + } + + return false; +} +} diff --git a/lib/Module/KModule.cpp b/lib/Module/KModule.cpp index aafabacc..ec9972eb 100644 --- a/lib/Module/KModule.cpp +++ b/lib/Module/KModule.cpp @@ -307,6 +307,15 @@ void KModule::prepare(const Interpreter::ModuleOptions &opts, // module. LegacyLLVMPassManagerTy pm; pm.add(new RaiseAsmPass()); +#if LLVM_VERSION_CODE >= LLVM_VERSION(3,4) + // This pass will scalarize as much code as possible so that the Executor + // does not need to handle operands of vector type for most instructions + // other than InsertElementInst and ExtractElementInst. + // + // NOTE: Must come before division/overshift checks because those passes + // don't know how to handle vector instructions. + pm.add(createScalarizerPass()); +#endif if (opts.CheckDivZero) pm.add(new DivCheckPass()); if (opts.CheckOvershift) pm.add(new OvershiftCheckPass()); // FIXME: This false here is to work around a bug in @@ -381,9 +390,19 @@ void KModule::prepare(const Interpreter::ModuleOptions &opts, case eSwitchTypeLLVM: pm3.add(createLowerSwitchPass()); break; default: klee_error("invalid --switch-type"); } + InstructionOperandTypeCheckPass *operandTypeCheckPass = + new InstructionOperandTypeCheckPass(); pm3.add(new IntrinsicCleanerPass(*targetData)); pm3.add(new PhiCleanerPass()); + pm3.add(operandTypeCheckPass); pm3.run(*module); + + // Enforce the operand type invariants that the Executor expects. This + // implicitly depends on the "Scalarizer" pass to be run in order to succeed + // in the presence of vector instructions. + if (!operandTypeCheckPass->checkPassed()) { + klee_error("Unexpected instruction operand types detected"); + } #if LLVM_VERSION_CODE < LLVM_VERSION(3, 3) // For cleanliness see if we can discard any of the functions we // forced to import. diff --git a/lib/Module/Passes.h b/lib/Module/Passes.h index 4f1a1453..2ac57b9b 100644 --- a/lib/Module/Passes.h +++ b/lib/Module/Passes.h @@ -180,6 +180,32 @@ private: llvm::BasicBlock *defaultBlock); }; +// This is the interface to a back-ported LLVM pass. +// Newer versions of LLVM already have this in-tree +// and we are not supporting vector instructions for +// LLVM 2.9. Therefore this interface is only needed for +// LLVM 3.4. +#if LLVM_VERSION_CODE == LLVM_VERSION(3,4) +llvm::FunctionPass *createScalarizerPass(); +#endif + +/// InstructionOperandTypeCheckPass - Type checks the types of instruction +/// operands to check that they conform to invariants expected by the Executor. +/// +/// This is a ModulePass because other pass types are not meant to maintain +/// state between calls. +class InstructionOperandTypeCheckPass : public llvm::ModulePass { +private: + bool instructionOperandsConform; + +public: + static char ID; + InstructionOperandTypeCheckPass() + : llvm::ModulePass(ID), instructionOperandsConform(true) {} + // TODO: Add `override` when we switch to C++11 + bool runOnModule(llvm::Module &M); + bool checkPassed() const { return instructionOperandsConform; } +}; } #endif diff --git a/lib/Module/Scalarizer.cpp b/lib/Module/Scalarizer.cpp new file mode 100644 index 00000000..0d8e1f48 --- /dev/null +++ b/lib/Module/Scalarizer.cpp @@ -0,0 +1,651 @@ +//===--- Scalarizer.cpp - Scalarize vector operations ---------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This pass converts vector operations into scalar operations, in order +// to expose optimization opportunities on the individual scalar operations. +// It is mainly intended for targets that do not have vector units, but it +// may also be useful for revectorizing code to different vector widths. +// +//===----------------------------------------------------------------------===// +#include "klee/Config/Version.h" + +// This is taken from r195471 in LLVM. This unfortunately was introduced just +// after LLVM branched for 3.4 so it has been copied into KLEE's source tree. +// We only use this for LLVM 3.4 because newer LLVM's have this pass in-tree. +#if LLVM_VERSION_CODE == LLVM_VERSION(3,4) + +#define DEBUG_TYPE "scalarizer" +#include "llvm/ADT/STLExtras.h" +#include "llvm/IR/IRBuilder.h" +#include "llvm/InstVisitor.h" +#include "llvm/Pass.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Transforms/Scalar.h" +#include "llvm/Transforms/Utils/BasicBlockUtils.h" + +using namespace llvm; + +namespace { +// Used to store the scattered form of a vector. +typedef SmallVector<Value *, 8> ValueVector; + +// Used to map a vector Value to its scattered form. We use std::map +// because we want iterators to persist across insertion and because the +// values are relatively large. +typedef std::map<Value *, ValueVector> ScatterMap; + +// Lists Instructions that have been replaced with scalar implementations, +// along with a pointer to their scattered forms. +typedef SmallVector<std::pair<Instruction *, ValueVector *>, 16> GatherList; + +// Provides a very limited vector-like interface for lazily accessing one +// component of a scattered vector or vector pointer. +class Scatterer { +public: + // Scatter V into Size components. If new instructions are needed, + // insert them before BBI in BB. If Cache is nonnull, use it to cache + // the results. + Scatterer(BasicBlock *bb, BasicBlock::iterator bbi, Value *v, + ValueVector *cachePtr = 0); + + // Return component I, creating a new Value for it if necessary. + Value *operator[](unsigned I); + + // Return the number of components. + unsigned size() const { return Size; } + +private: + BasicBlock *BB; + BasicBlock::iterator BBI; + Value *V; + ValueVector *CachePtr; + PointerType *PtrTy; + ValueVector Tmp; + unsigned Size; +}; + +// FCmpSpliiter(FCI)(Builder, X, Y, Name) uses Builder to create an FCmp +// called Name that compares X and Y in the same way as FCI. +struct FCmpSplitter { + FCmpSplitter(FCmpInst &fci) : FCI(fci) {} + Value *operator()(IRBuilder<> &Builder, Value *Op0, Value *Op1, + const Twine &Name) const { + return Builder.CreateFCmp(FCI.getPredicate(), Op0, Op1, Name); + } + FCmpInst &FCI; +}; + +// ICmpSpliiter(ICI)(Builder, X, Y, Name) uses Builder to create an ICmp +// called Name that compares X and Y in the same way as ICI. +struct ICmpSplitter { + ICmpSplitter(ICmpInst &ici) : ICI(ici) {} + Value *operator()(IRBuilder<> &Builder, Value *Op0, Value *Op1, + const Twine &Name) const { + return Builder.CreateICmp(ICI.getPredicate(), Op0, Op1, Name); + } + ICmpInst &ICI; +}; + +// BinarySpliiter(BO)(Builder, X, Y, Name) uses Builder to create +// a binary operator like BO called Name with operands X and Y. +struct BinarySplitter { + BinarySplitter(BinaryOperator &bo) : BO(bo) {} + Value *operator()(IRBuilder<> &Builder, Value *Op0, Value *Op1, + const Twine &Name) const { + return Builder.CreateBinOp(BO.getOpcode(), Op0, Op1, Name); + } + BinaryOperator &BO; +}; + +// GEPSpliiter()(Builder, X, Y, Name) uses Builder to create +// a single GEP called Name with operands X and Y. +struct GEPSplitter { + GEPSplitter() {} + Value *operator()(IRBuilder<> &Builder, Value *Op0, Value *Op1, + const Twine &Name) const { + return Builder.CreateGEP(Op0, Op1, Name); + } +}; + +// Information about a load or store that we're scalarizing. +struct VectorLayout { + VectorLayout() : VecTy(0), ElemTy(0), VecAlign(0), ElemSize(0) {} + + // Return the alignment of element I. + uint64_t getElemAlign(unsigned I) { + return MinAlign(VecAlign, I * ElemSize); + } + + // The type of the vector. + VectorType *VecTy; + + // The type of each element. + Type *ElemTy; + + // The alignment of the vector. + uint64_t VecAlign; + + // The size of each element. + uint64_t ElemSize; +}; + +class Scalarizer : public FunctionPass, + public InstVisitor<Scalarizer, bool> { +public: + static char ID; + + Scalarizer() : + FunctionPass(ID) { + // HACK: + //initializeScalarizerPass(*PassRegistry::getPassRegistry()); + } + + virtual bool doInitialization(Module &M); + virtual bool runOnFunction(Function &F); + + // InstVisitor methods. They return true if the instruction was scalarized, + // false if nothing changed. + bool visitInstruction(Instruction &) { return false; } + bool visitSelectInst(SelectInst &SI); + bool visitICmpInst(ICmpInst &); + bool visitFCmpInst(FCmpInst &); + bool visitBinaryOperator(BinaryOperator &); + bool visitGetElementPtrInst(GetElementPtrInst &); + bool visitCastInst(CastInst &); + bool visitBitCastInst(BitCastInst &); + bool visitShuffleVectorInst(ShuffleVectorInst &); + bool visitPHINode(PHINode &); + bool visitLoadInst(LoadInst &); + bool visitStoreInst(StoreInst &); + +private: + Scatterer scatter(Instruction *, Value *); + void gather(Instruction *, const ValueVector &); + bool canTransferMetadata(unsigned Kind); + void transferMetadata(Instruction *, const ValueVector &); + bool getVectorLayout(Type *, unsigned, VectorLayout &); + bool finish(); + + template<typename T> bool splitBinary(Instruction &, const T &); + + ScatterMap Scattered; + GatherList Gathered; + unsigned ParallelLoopAccessMDKind; + const DataLayout *TDL; +}; + +char Scalarizer::ID = 0; +} // end anonymous namespace + +bool ScalarizeLoadStore = true; // HACK +/* +// This is disabled by default because having separate loads and stores makes +// it more likely that the -combiner-alias-analysis limits will be reached. +static cl::opt<bool> ScalarizeLoadStore + ("scalarize-load-store", cl::Hidden, cl::init(false), + cl::desc("Allow the scalarizer pass to scalarize loads and store")); + +INITIALIZE_PASS(Scalarizer, "scalarizer", "Scalarize vector operations", + false, false) +*/ + +Scatterer::Scatterer(BasicBlock *bb, BasicBlock::iterator bbi, Value *v, + ValueVector *cachePtr) + : BB(bb), BBI(bbi), V(v), CachePtr(cachePtr) { + Type *Ty = V->getType(); + PtrTy = dyn_cast<PointerType>(Ty); + if (PtrTy) + Ty = PtrTy->getElementType(); + Size = Ty->getVectorNumElements(); + if (!CachePtr) + Tmp.resize(Size, 0); + else if (CachePtr->empty()) + CachePtr->resize(Size, 0); + else + assert(Size == CachePtr->size() && "Inconsistent vector sizes"); +} + +// Return component I, creating a new Value for it if necessary. +Value *Scatterer::operator[](unsigned I) { + ValueVector &CV = (CachePtr ? *CachePtr : Tmp); + // Try to reuse a previous value. + if (CV[I]) + return CV[I]; + IRBuilder<> Builder(BB, BBI); + if (PtrTy) { + if (!CV[0]) { + Type *Ty = + PointerType::get(PtrTy->getElementType()->getVectorElementType(), + PtrTy->getAddressSpace()); + CV[0] = Builder.CreateBitCast(V, Ty, V->getName() + ".i0"); + } + if (I != 0) + CV[I] = Builder.CreateConstGEP1_32(CV[0], I, + V->getName() + ".i" + Twine(I)); + } else { + // Search through a chain of InsertElementInsts looking for element I. + // Record other elements in the cache. The new V is still suitable + // for all uncached indices. + for (;;) { + InsertElementInst *Insert = dyn_cast<InsertElementInst>(V); + if (!Insert) + break; + ConstantInt *Idx = dyn_cast<ConstantInt>(Insert->getOperand(2)); + if (!Idx) + break; + unsigned J = Idx->getZExtValue(); + CV[J] = Insert->getOperand(1); + V = Insert->getOperand(0); + if (I == J) + return CV[J]; + } + CV[I] = Builder.CreateExtractElement(V, Builder.getInt32(I), + V->getName() + ".i" + Twine(I)); + } + return CV[I]; +} + +bool Scalarizer::doInitialization(Module &M) { + ParallelLoopAccessMDKind = + M.getContext().getMDKindID("llvm.mem.parallel_loop_access"); + return false; +} + +bool Scalarizer::runOnFunction(Function &F) { + TDL = getAnalysisIfAvailable<DataLayout>(); + for (Function::iterator BBI = F.begin(), BBE = F.end(); BBI != BBE; ++BBI) { + BasicBlock *BB = BBI; + for (BasicBlock::iterator II = BB->begin(), IE = BB->end(); II != IE;) { + Instruction *I = II; + bool Done = visit(I); + ++II; + if (Done && I->getType()->isVoidTy()) + I->eraseFromParent(); + } + } + return finish(); +} + +// Return a scattered form of V that can be accessed by Point. V must be a +// vector or a pointer to a vector. +Scatterer Scalarizer::scatter(Instruction *Point, Value *V) { + if (Argument *VArg = dyn_cast<Argument>(V)) { + // Put the scattered form of arguments in the entry block, + // so that it can be used everywhere. + Function *F = VArg->getParent(); + BasicBlock *BB = &F->getEntryBlock(); + return Scatterer(BB, BB->begin(), V, &Scattered[V]); + } + if (Instruction *VOp = dyn_cast<Instruction>(V)) { + // Put the scattered form of an instruction directly after the + // instruction. + BasicBlock *BB = VOp->getParent(); + return Scatterer(BB, llvm::next(BasicBlock::iterator(VOp)), + V, &Scattered[V]); + } + // In the fallback case, just put the scattered before Point and + // keep the result local to Point. + return Scatterer(Point->getParent(), Point, V); +} + +// Replace Op with the gathered form of the components in CV. Defer the +// deletion of Op and creation of the gathered form to the end of the pass, +// so that we can avoid creating the gathered form if all uses of Op are +// replaced with uses of CV. +void Scalarizer::gather(Instruction *Op, const ValueVector &CV) { + // Since we're not deleting Op yet, stub out its operands, so that it + // doesn't make anything live unnecessarily. + for (unsigned I = 0, E = Op->getNumOperands(); I != E; ++I) + Op->setOperand(I, UndefValue::get(Op->getOperand(I)->getType())); + + transferMetadata(Op, CV); + + // If we already have a scattered form of Op (created from ExtractElements + // of Op itself), replace them with the new form. + ValueVector &SV = Scattered[Op]; + if (!SV.empty()) { + for (unsigned I = 0, E = SV.size(); I != E; ++I) { + Instruction *Old = cast<Instruction>(SV[I]); + CV[I]->takeName(Old); + Old->replaceAllUsesWith(CV[I]); + Old->eraseFromParent(); + } + } + SV = CV; + Gathered.push_back(GatherList::value_type(Op, &SV)); +} + +// Return true if it is safe to transfer the given metadata tag from +// vector to scalar instructions. +bool Scalarizer::canTransferMetadata(unsigned Tag) { + return (Tag == LLVMContext::MD_tbaa + || Tag == LLVMContext::MD_fpmath + || Tag == LLVMContext::MD_tbaa_struct + || Tag == LLVMContext::MD_invariant_load + || Tag == ParallelLoopAccessMDKind); +} + +// Transfer metadata from Op to the instructions in CV if it is known +// to be safe to do so. +void Scalarizer::transferMetadata(Instruction *Op, const ValueVector &CV) { + SmallVector<std::pair<unsigned, MDNode *>, 4> MDs; + Op->getAllMetadataOtherThanDebugLoc(MDs); + for (unsigned I = 0, E = CV.size(); I != E; ++I) { + if (Instruction *New = dyn_cast<Instruction>(CV[I])) { + for (SmallVectorImpl<std::pair<unsigned, MDNode *> >::iterator + MI = MDs.begin(), ME = MDs.end(); MI != ME; ++MI) + if (canTransferMetadata(MI->first)) + New->setMetadata(MI->first, MI->second); + New->setDebugLoc(Op->getDebugLoc()); + } + } +} + +// Try to fill in Layout from Ty, returning true on success. Alignment is +// the alignment of the vector, or 0 if the ABI default should be used. +bool Scalarizer::getVectorLayout(Type *Ty, unsigned Alignment, + VectorLayout &Layout) { + if (!TDL) + return false; + + // Make sure we're dealing with a vector. + Layout.VecTy = dyn_cast<VectorType>(Ty); + if (!Layout.VecTy) + return false; + + // Check that we're dealing with full-byte elements. + Layout.ElemTy = Layout.VecTy->getElementType(); + if (TDL->getTypeSizeInBits(Layout.ElemTy) != + TDL->getTypeStoreSizeInBits(Layout.ElemTy)) + return false; + + if (Alignment) + Layout.VecAlign = Alignment; + else + Layout.VecAlign = TDL->getABITypeAlignment(Layout.VecTy); + Layout.ElemSize = TDL->getTypeStoreSize(Layout.ElemTy); + return true; +} + +// Scalarize two-operand instruction I, using Split(Builder, X, Y, Name) +// to create an instruction like I with operands X and Y and name Name. +template<typename Splitter> +bool Scalarizer::splitBinary(Instruction &I, const Splitter &Split) { + VectorType *VT = dyn_cast<VectorType>(I.getType()); + if (!VT) + return false; + + unsigned NumElems = VT->getNumElements(); + IRBuilder<> Builder(I.getParent(), &I); + Scatterer Op0 = scatter(&I, I.getOperand(0)); + Scatterer Op1 = scatter(&I, I.getOperand(1)); + assert(Op0.size() == NumElems && "Mismatched binary operation"); + assert(Op1.size() == NumElems && "Mismatched binary operation"); + ValueVector Res; + Res.resize(NumElems); + for (unsigned Elem = 0; Elem < NumElems; ++Elem) + Res[Elem] = Split(Builder, Op0[Elem], Op1[Elem], + I.getName() + ".i" + Twine(Elem)); + gather(&I, Res); + return true; +} + +bool Scalarizer::visitSelectInst(SelectInst &SI) { + VectorType *VT = dyn_cast<VectorType>(SI.getType()); + if (!VT) + return false; + + unsigned NumElems = VT->getNumElements(); + IRBuilder<> Builder(SI.getParent(), &SI); + Scatterer Op1 = scatter(&SI, SI.getOperand(1)); + Scatterer Op2 = scatter(&SI, SI.getOperand(2)); + assert(Op1.size() == NumElems && "Mismatched select"); + assert(Op2.size() == NumElems && "Mismatched select"); + ValueVector Res; + Res.resize(NumElems); + + if (SI.getOperand(0)->getType()->isVectorTy()) { + Scatterer Op0 = scatter(&SI, SI.getOperand(0)); + assert(Op0.size() == NumElems && "Mismatched select"); + for (unsigned I = 0; I < NumElems; ++I) + Res[I] = Builder.CreateSelect(Op0[I], Op1[I], Op2[I], + SI.getName() + ".i" + Twine(I)); + } else { + Value *Op0 = SI.getOperand(0); + for (unsigned I = 0; I < NumElems; ++I) + Res[I] = Builder.CreateSelect(Op0, Op1[I], Op2[I], + SI.getName() + ".i" + Twine(I)); + } + gather(&SI, Res); + return true; +} + +bool Scalarizer::visitICmpInst(ICmpInst &ICI) { + return splitBinary(ICI, ICmpSplitter(ICI)); +} + +bool Scalarizer::visitFCmpInst(FCmpInst &FCI) { + return splitBinary(FCI, FCmpSplitter(FCI)); +} + +bool Scalarizer::visitBinaryOperator(BinaryOperator &BO) { + return splitBinary(BO, BinarySplitter(BO)); +} + +bool Scalarizer::visitGetElementPtrInst(GetElementPtrInst &GEPI) { + return splitBinary(GEPI, GEPSplitter()); +} + +bool Scalarizer::visitCastInst(CastInst &CI) { + VectorType *VT = dyn_cast<VectorType>(CI.getDestTy()); + if (!VT) + return false; + + unsigned NumElems = VT->getNumElements(); + IRBuilder<> Builder(CI.getParent(), &CI); + Scatterer Op0 = scatter(&CI, CI.getOperand(0)); + assert(Op0.size() == NumElems && "Mismatched cast"); + ValueVector Res; + Res.resize(NumElems); + for (unsigned I = 0; I < NumElems; ++I) + Res[I] = Builder.CreateCast(CI.getOpcode(), Op0[I], VT->getElementType(), + CI.getName() + ".i" + Twine(I)); + gather(&CI, Res); + return true; +} + +bool Scalarizer::visitBitCastInst(BitCastInst &BCI) { + VectorType *DstVT = dyn_cast<VectorType>(BCI.getDestTy()); + VectorType *SrcVT = dyn_cast<VectorType>(BCI.getSrcTy()); + if (!DstVT || !SrcVT) + return false; + + unsigned DstNumElems = DstVT->getNumElements(); + unsigned SrcNumElems = SrcVT->getNumElements(); + IRBuilder<> Builder(BCI.getParent(), &BCI); + Scatterer Op0 = scatter(&BCI, BCI.getOperand(0)); + ValueVector Res; + Res.resize(DstNumElems); + + if (DstNumElems == SrcNumElems) { + for (unsigned I = 0; I < DstNumElems; ++I) + Res[I] = Builder.CreateBitCast(Op0[I], DstVT->getElementType(), + BCI.getName() + ".i" + Twine(I)); + } else if (DstNumElems > SrcNumElems) { + // <M x t1> -> <N*M x t2>. Convert each t1 to <N x t2> and copy the + // individual elements to the destination. + unsigned FanOut = DstNumElems / SrcNumElems; + Type *MidTy = VectorType::get(DstVT->getElementType(), FanOut); + unsigned ResI = 0; + for (unsigned Op0I = 0; Op0I < SrcNumElems; ++Op0I) { + Value *V = Op0[Op0I]; + Instruction *VI; + // Look through any existing bitcasts before converting to <N x t2>. + // In the best case, the resulting conversion might be a no-op. + while ((VI = dyn_cast<Instruction>(V)) && + VI->getOpcode() == Instruction::BitCast) + V = VI->getOperand(0); + V = Builder.CreateBitCast(V, MidTy, V->getName() + ".cast"); + Scatterer Mid = scatter(&BCI, V); + for (unsigned MidI = 0; MidI < FanOut; ++MidI) + Res[ResI++] = Mid[MidI]; + } + } else { + // <N*M x t1> -> <M x t2>. Convert each group of <N x t1> into a t2. + unsigned FanIn = SrcNumElems / DstNumElems; + Type *MidTy = VectorType::get(SrcVT->getElementType(), FanIn); + unsigned Op0I = 0; + for (unsigned ResI = 0; ResI < DstNumElems; ++ResI) { + Value *V = UndefValue::get(MidTy); + for (unsigned MidI = 0; MidI < FanIn; ++MidI) + V = Builder.CreateInsertElement(V, Op0[Op0I++], Builder.getInt32(MidI), + BCI.getName() + ".i" + Twine(ResI) + + ".upto" + Twine(MidI)); + Res[ResI] = Builder.CreateBitCast(V, DstVT->getElementType(), + BCI.getName() + ".i" + Twine(ResI)); + } + } + gather(&BCI, Res); + return true; +} + +bool Scalarizer::visitShuffleVectorInst(ShuffleVectorInst &SVI) { + VectorType *VT = dyn_cast<VectorType>(SVI.getType()); + if (!VT) + return false; + + unsigned NumElems = VT->getNumElements(); + Scatterer Op0 = scatter(&SVI, SVI.getOperand(0)); + Scatterer Op1 = scatter(&SVI, SVI.getOperand(1)); + ValueVector Res; + Res.resize(NumElems); + + for (unsigned I = 0; I < NumElems; ++I) { + int Selector = SVI.getMaskValue(I); + if (Selector < 0) + Res[I] = UndefValue::get(VT->getElementType()); + else if (unsigned(Selector) < Op0.size()) + Res[I] = Op0[Selector]; + else + Res[I] = Op1[Selector - Op0.size()]; + } + gather(&SVI, Res); + return true; +} + +bool Scalarizer::visitPHINode(PHINode &PHI) { + VectorType *VT = dyn_cast<VectorType>(PHI.getType()); + if (!VT) + return false; + + unsigned NumElems = VT->getNumElements(); + IRBuilder<> Builder(PHI.getParent(), &PHI); + ValueVector Res; + Res.resize(NumElems); + + unsigned NumOps = PHI.getNumOperands(); + for (unsigned I = 0; I < NumElems; ++I) + Res[I] = Builder.CreatePHI(VT->getElementType(), NumOps, + PHI.getName() + ".i" + Twine(I)); + + for (unsigned I = 0; I < NumOps; ++I) { + Scatterer Op = scatter(&PHI, PHI.getIncomingValue(I)); + BasicBlock *IncomingBlock = PHI.getIncomingBlock(I); + for (unsigned J = 0; J < NumElems; ++J) + cast<PHINode>(Res[J])->addIncoming(Op[J], IncomingBlock); + } + gather(&PHI, Res); + return true; +} + +bool Scalarizer::visitLoadInst(LoadInst &LI) { + if (!ScalarizeLoadStore) + return false; + if (!LI.isSimple()) + return false; + + VectorLayout Layout; + if (!getVectorLayout(LI.getType(), LI.getAlignment(), Layout)) + return false; + + unsigned NumElems = Layout.VecTy->getNumElements(); + IRBuilder<> Builder(LI.getParent(), &LI); + Scatterer Ptr = scatter(&LI, LI.getPointerOperand()); + ValueVector Res; + Res.resize(NumElems); + + for (unsigned I = 0; I < NumElems; ++I) + Res[I] = Builder.CreateAlignedLoad(Ptr[I], Layout.getElemAlign(I), + LI.getName() + ".i" + Twine(I)); + gather(&LI, Res); + return true; +} + +bool Scalarizer::visitStoreInst(StoreInst &SI) { + if (!ScalarizeLoadStore) + return false; + if (!SI.isSimple()) + return false; + + VectorLayout Layout; + Value *FullValue = SI.getValueOperand(); + if (!getVectorLayout(FullValue->getType(), SI.getAlignment(), Layout)) + return false; + + unsigned NumElems = Layout.VecTy->getNumElements(); + IRBuilder<> Builder(SI.getParent(), &SI); + Scatterer Ptr = scatter(&SI, SI.getPointerOperand()); + Scatterer Val = scatter(&SI, FullValue); + + ValueVector Stores; + Stores.resize(NumElems); + for (unsigned I = 0; I < NumElems; ++I) { + unsigned Align = Layout.getElemAlign(I); + Stores[I] = Builder.CreateAlignedStore(Val[I], Ptr[I], Align); + } + transferMetadata(&SI, Stores); + return true; +} + +// Delete the instructions that we scalarized. If a full vector result +// is still needed, recreate it using InsertElements. +bool Scalarizer::finish() { + if (Gathered.empty()) + return false; + for (GatherList::iterator GMI = Gathered.begin(), GME = Gathered.end(); + GMI != GME; ++GMI) { + Instruction *Op = GMI->first; + ValueVector &CV = *GMI->second; + if (!Op->use_empty()) { + // The value is still needed, so recreate it using a series of + // InsertElements. + Type *Ty = Op->getType(); + Value *Res = UndefValue::get(Ty); + unsigned Count = Ty->getVectorNumElements(); + IRBuilder<> Builder(Op->getParent(), Op); + for (unsigned I = 0; I < Count; ++I) + Res = Builder.CreateInsertElement(Res, CV[I], Builder.getInt32(I), + Op->getName() + ".upto" + Twine(I)); + Res->takeName(Op); + Op->replaceAllUsesWith(Res); + } + Op->eraseFromParent(); + } + Gathered.clear(); + Scattered.clear(); + return true; +} + +namespace klee { + llvm::FunctionPass *createScalarizerPass() { + return new Scalarizer(); + } +} + +#endif |