diff options
Diffstat (limited to 'bg.tex')
-rw-r--r-- | bg.tex | 106 |
1 files changed, 101 insertions, 5 deletions
diff --git a/bg.tex b/bg.tex index 6bc4638..ba2f131 100644 --- a/bg.tex +++ b/bg.tex @@ -1,7 +1,103 @@ \section{Background} -Our \gls{difftest} generation is based on the symbolic executor -\textsc{klee}'s architecture, which at a high level is an interpreter loop. -At each iteration, it selects a symbolic process, also known as a \gls{state}, -in whose context an instruction is then executed~\cite{klee}. +As mentioned earlier, our technique is based on the symbolic execution, +an engine named \klee{} in particular. Their general principles +and some specifics are laid out here to make it less ambiguous to describe +our approach later on. -% TODO: base symb exec algo +\subsection{Symbolic execution} +At a high level, symbolic execution is an interpreter loop. +Every iteration, it selects a symbolic process, +also known as a \emph{\gls{state}}, in whose context an instruction +is then executed~\cite{klee}. As shown in algorithm~\ref{symb-exec}, +each path with possible \gls{sat} of the given program is explore. +The number of paths grows at an exponential rate, so the interpreter loop +also breaks at the exhaustion of the time budget~\cite{klee}, +which is omitted from the algorithm for brevity. + +\begin{algorithm} + \caption{Symbolic execution}\label{symb-exec} + \begin{algorithmic}[1] + \Require Program ($P$) + \Ensure Set of test cases ($T$) + \State $T \gets \varnothing$ + \State $S \gets \textsc{InitialState}(P)$ \Comment{set of states} + \While{$S \neq \varnothing$} + \State $s \gets \textsc{Select}(S)$ + \State $S \gets S \setminus \{s\}$ + \State $i, e, \varPhi \gets s$ + \Comment{instruction, environment, and path constraints} + \State $i', e' \gets \textsc{Execute}(i, e)$ + \If{$\textsc{IsHalt}(i)$} + \State $t \gets \textsc{GenerateTest}(\varPhi)$ + \If{$t \ne \bot$} + $T \gets T \cup \{t\}$ + \EndIf + \ElsIf{$\textsc{IsBranch}(i)$} + \State $\varphi, i_c', i_a' \gets i'$ + \Comment{condition, consequent, and alternative} + \If{$\textsc{sat}(\varPhi \land \varphi)$} + \State $s_c' \gets i_c', e', \varPhi \land \varphi$ + \State $S \gets S \cup \{s_c'\}$ + \EndIf + \If{$\textsc{sat}(\varPhi \land \lnot\varphi)$} + \State $s_a' \gets i_a', e', \varPhi \land \lnot\varphi$ + \State $S \gets S \cup \{s_a'\}$ + \EndIf + \Else + \State $s' \gets i', e', \varPhi$ + \State $S \gets S \cup \{s'\}$ + \EndIf + \EndWhile + \end{algorithmic} +\end{algorithm} +\pagebreak + +A \gls{sat} problem involving background theories like arithmetic +in computer programs is known as a~\gls{smt} problem, a generalization +of the Boolean \gls{sat} problem~\cite{smt}, which is already NP-complete. +Under the exponential time hypothesis, the cost of solving +an arbitrary \gls{sat} problem grows exponentially with the number +of variables~\cite{k-sat}. + +\subsection{Usage of \klee{}}\label{klee-use} +The symbolic execution engine \klee{} analyses the LLVM \gls{ir} of a program, +also known as its bitcode~\cite{bc}, starting from values marked as symbolic. +Marking can be communicated either through command-line options +for files (including standard streams) and command-line arguments +or via in-source annotations~\cite{klee-docs}. + +As shown in algorithm~\ref{symb-exec}, the path constraints is only expanded +at branching instructions, so only values affecting the control flow +are tracked, since \klee{} targets the exploration of execution paths. +In~order to track other values of interest, i.e.~non-inputs, a condition +involving such value must be artificially injected. One common idiom for doing +this condition injection~\cite{angelix,cpr,trident} is shown +in figure~\ref{klee-output}. There, a~temporary symbolic avatar \texttt{var} +is created for the sole purpose of being fixed to the value \texttt{val} +in the path constraints. + +\begin{figure}[h]\centering\hfill + \begin{subfigure}{0.6\textwidth} +\begin{verbatim} +#define KLEE_OUTPUT_GEN(T) \ +T __klee_output_##T(T val, ...) { T var; \ + klee_make_symbolic(&var, sizeof(T), ...); \ + klee_assume(var == val); return val; } +KLEE_OUTPUT_GEN(int) ... +#define KLEE_OUTPUT(T, val, ...) \ + __klee_output_##T((val), ...) +\end{verbatim} + \subcaption{}\label{klee-output-def} + \end{subfigure}\hfill + \begin{subfigure}{0.3\textwidth} +\begin{alltt} + int foo; + ... +\textcolor{purple}{-bar(foo);} +\textcolor{teal}{+bar(KLEE_OUTPUT(foo));}\end{alltt} + \subcaption{}\label{klee-output-use} + \end{subfigure}\hfill + \captionsetup{subrefformat=parens} + \caption{Condition injection \subref{klee-output-def} implementation in C + and \subref{klee-output-use} example annotation.}\label{klee-output} +\end{figure} |