summary refs log tree commit diff homepage
path: root/bg.tex
diff options
context:
space:
mode:
Diffstat (limited to 'bg.tex')
-rw-r--r--bg.tex106
1 files changed, 101 insertions, 5 deletions
diff --git a/bg.tex b/bg.tex
index 6bc4638..ba2f131 100644
--- a/bg.tex
+++ b/bg.tex
@@ -1,7 +1,103 @@
 \section{Background}
-Our \gls{difftest} generation is based on the symbolic executor
-\textsc{klee}'s architecture, which at a high level is an interpreter loop.
-At each iteration, it selects a symbolic process, also known as a \gls{state},
-in whose context an instruction is then executed~\cite{klee}.
+As mentioned earlier, our technique is based on the symbolic execution,
+an engine named \klee{} in particular.  Their general principles
+and some specifics are laid out here to make it less ambiguous to describe
+our approach later on.
 
-% TODO: base symb exec algo
+\subsection{Symbolic execution}
+At a high level, symbolic execution is an interpreter loop.
+Every iteration, it selects a symbolic process,
+also known as a \emph{\gls{state}}, in whose context an instruction
+is then executed~\cite{klee}.  As shown in algorithm~\ref{symb-exec},
+each path with possible \gls{sat} of the given program is explore.
+The number of paths grows at an exponential rate, so the interpreter loop
+also breaks at the exhaustion of the time budget~\cite{klee},
+which is omitted from the algorithm for brevity.
+
+\begin{algorithm}
+  \caption{Symbolic execution}\label{symb-exec}
+  \begin{algorithmic}[1]
+    \Require Program ($P$)
+    \Ensure Set of test cases ($T$)
+    \State $T \gets \varnothing$
+    \State $S \gets \textsc{InitialState}(P)$ \Comment{set of states}
+    \While{$S \neq \varnothing$}
+      \State $s \gets \textsc{Select}(S)$
+      \State $S \gets S \setminus \{s\}$
+      \State $i, e, \varPhi \gets s$
+        \Comment{instruction, environment, and path constraints}
+      \State $i', e' \gets \textsc{Execute}(i, e)$
+      \If{$\textsc{IsHalt}(i)$}
+        \State $t \gets \textsc{GenerateTest}(\varPhi)$
+        \If{$t \ne \bot$}
+          $T \gets T \cup \{t\}$
+        \EndIf
+      \ElsIf{$\textsc{IsBranch}(i)$}
+        \State $\varphi, i_c', i_a' \gets i'$
+          \Comment{condition, consequent, and alternative}
+        \If{$\textsc{sat}(\varPhi \land \varphi)$}
+          \State $s_c' \gets i_c', e', \varPhi \land \varphi$
+          \State $S \gets S \cup \{s_c'\}$
+        \EndIf
+        \If{$\textsc{sat}(\varPhi \land \lnot\varphi)$}
+          \State $s_a' \gets i_a', e', \varPhi \land \lnot\varphi$
+          \State $S \gets S \cup \{s_a'\}$
+        \EndIf
+      \Else
+        \State $s' \gets i', e', \varPhi$
+        \State $S \gets S \cup \{s'\}$
+      \EndIf
+    \EndWhile
+  \end{algorithmic}
+\end{algorithm}
+\pagebreak
+
+A \gls{sat} problem involving background theories like arithmetic
+in computer programs is known as a~\gls{smt} problem, a generalization
+of the Boolean \gls{sat} problem~\cite{smt}, which is already NP-complete.
+Under the exponential time hypothesis, the cost of solving
+an arbitrary \gls{sat} problem grows exponentially with the number
+of variables~\cite{k-sat}.
+
+\subsection{Usage of \klee{}}\label{klee-use}
+The symbolic execution engine \klee{} analyses the LLVM \gls{ir} of a program,
+also known as its bitcode~\cite{bc}, starting from values marked as symbolic.
+Marking can be communicated either through command-line options
+for files (including standard streams) and command-line arguments
+or via in-source annotations~\cite{klee-docs}.
+
+As shown in algorithm~\ref{symb-exec}, the path constraints is only expanded
+at branching instructions, so only values affecting the control flow
+are tracked, since \klee{} targets the exploration of execution paths.
+In~order to track other values of interest, i.e.~non-inputs, a condition
+involving such value must be artificially injected.  One common idiom for doing
+this condition injection~\cite{angelix,cpr,trident} is shown
+in figure~\ref{klee-output}.  There, a~temporary symbolic avatar \texttt{var}
+is created for the sole purpose of being fixed to the value \texttt{val}
+in the path constraints.
+
+\begin{figure}[h]\centering\hfill
+  \begin{subfigure}{0.6\textwidth}
+\begin{verbatim}
+#define KLEE_OUTPUT_GEN(T)                  \
+T __klee_output_##T(T val, ...)    { T var; \
+  klee_make_symbolic(&var, sizeof(T), ...); \
+  klee_assume(var == val);      return val; }
+KLEE_OUTPUT_GEN(int) ...
+#define KLEE_OUTPUT(T, val, ...) \
+  __klee_output_##T((val), ...)
+\end{verbatim}
+    \subcaption{}\label{klee-output-def}
+  \end{subfigure}\hfill
+  \begin{subfigure}{0.3\textwidth}
+\begin{alltt}
+ int foo;
+ ...
+\textcolor{purple}{-bar(foo);}
+\textcolor{teal}{+bar(KLEE_OUTPUT(foo));}\end{alltt}
+    \subcaption{}\label{klee-output-use}
+  \end{subfigure}\hfill
+  \captionsetup{subrefformat=parens}
+  \caption{Condition injection \subref{klee-output-def} implementation in C
+           and \subref{klee-output-use} example annotation.}\label{klee-output}
+\end{figure}