diff options
author | Nguyễn Gia Phong <cnx@loang.net> | 2023-11-23 13:43:58 +0900 |
---|---|---|
committer | Nguyễn Gia Phong <cnx@loang.net> | 2023-11-23 13:43:58 +0900 |
commit | cf32a3f1cac556281aafa02355fbec106f00edd5 (patch) | |
tree | cc718244fee0f111fbb8fe5588a671ab85245551 | |
parent | 0801e732369519545d0a9d862b71a6c2df8c5adf (diff) | |
download | thesis-cf32a3f1cac556281aafa02355fbec106f00edd5.tar.gz |
Draft the algorithms
-rw-r--r-- | bg.tex | 106 | ||||
-rw-r--r-- | discuss.tex | 1 | ||||
-rw-r--r-- | intro.tex | 6 | ||||
-rw-r--r-- | technique.tex | 450 |
4 files changed, 450 insertions, 113 deletions
diff --git a/bg.tex b/bg.tex index 6bc4638..ba2f131 100644 --- a/bg.tex +++ b/bg.tex @@ -1,7 +1,103 @@ \section{Background} -Our \gls{difftest} generation is based on the symbolic executor -\textsc{klee}'s architecture, which at a high level is an interpreter loop. -At each iteration, it selects a symbolic process, also known as a \gls{state}, -in whose context an instruction is then executed~\cite{klee}. +As mentioned earlier, our technique is based on the symbolic execution, +an engine named \klee{} in particular. Their general principles +and some specifics are laid out here to make it less ambiguous to describe +our approach later on. -% TODO: base symb exec algo +\subsection{Symbolic execution} +At a high level, symbolic execution is an interpreter loop. +Every iteration, it selects a symbolic process, +also known as a \emph{\gls{state}}, in whose context an instruction +is then executed~\cite{klee}. As shown in algorithm~\ref{symb-exec}, +each path with possible \gls{sat} of the given program is explore. +The number of paths grows at an exponential rate, so the interpreter loop +also breaks at the exhaustion of the time budget~\cite{klee}, +which is omitted from the algorithm for brevity. + +\begin{algorithm} + \caption{Symbolic execution}\label{symb-exec} + \begin{algorithmic}[1] + \Require Program ($P$) + \Ensure Set of test cases ($T$) + \State $T \gets \varnothing$ + \State $S \gets \textsc{InitialState}(P)$ \Comment{set of states} + \While{$S \neq \varnothing$} + \State $s \gets \textsc{Select}(S)$ + \State $S \gets S \setminus \{s\}$ + \State $i, e, \varPhi \gets s$ + \Comment{instruction, environment, and path constraints} + \State $i', e' \gets \textsc{Execute}(i, e)$ + \If{$\textsc{IsHalt}(i)$} + \State $t \gets \textsc{GenerateTest}(\varPhi)$ + \If{$t \ne \bot$} + $T \gets T \cup \{t\}$ + \EndIf + \ElsIf{$\textsc{IsBranch}(i)$} + \State $\varphi, i_c', i_a' \gets i'$ + \Comment{condition, consequent, and alternative} + \If{$\textsc{sat}(\varPhi \land \varphi)$} + \State $s_c' \gets i_c', e', \varPhi \land \varphi$ + \State $S \gets S \cup \{s_c'\}$ + \EndIf + \If{$\textsc{sat}(\varPhi \land \lnot\varphi)$} + \State $s_a' \gets i_a', e', \varPhi \land \lnot\varphi$ + \State $S \gets S \cup \{s_a'\}$ + \EndIf + \Else + \State $s' \gets i', e', \varPhi$ + \State $S \gets S \cup \{s'\}$ + \EndIf + \EndWhile + \end{algorithmic} +\end{algorithm} +\pagebreak + +A \gls{sat} problem involving background theories like arithmetic +in computer programs is known as a~\gls{smt} problem, a generalization +of the Boolean \gls{sat} problem~\cite{smt}, which is already NP-complete. +Under the exponential time hypothesis, the cost of solving +an arbitrary \gls{sat} problem grows exponentially with the number +of variables~\cite{k-sat}. + +\subsection{Usage of \klee{}}\label{klee-use} +The symbolic execution engine \klee{} analyses the LLVM \gls{ir} of a program, +also known as its bitcode~\cite{bc}, starting from values marked as symbolic. +Marking can be communicated either through command-line options +for files (including standard streams) and command-line arguments +or via in-source annotations~\cite{klee-docs}. + +As shown in algorithm~\ref{symb-exec}, the path constraints is only expanded +at branching instructions, so only values affecting the control flow +are tracked, since \klee{} targets the exploration of execution paths. +In~order to track other values of interest, i.e.~non-inputs, a condition +involving such value must be artificially injected. One common idiom for doing +this condition injection~\cite{angelix,cpr,trident} is shown +in figure~\ref{klee-output}. There, a~temporary symbolic avatar \texttt{var} +is created for the sole purpose of being fixed to the value \texttt{val} +in the path constraints. + +\begin{figure}[h]\centering\hfill + \begin{subfigure}{0.6\textwidth} +\begin{verbatim} +#define KLEE_OUTPUT_GEN(T) \ +T __klee_output_##T(T val, ...) { T var; \ + klee_make_symbolic(&var, sizeof(T), ...); \ + klee_assume(var == val); return val; } +KLEE_OUTPUT_GEN(int) ... +#define KLEE_OUTPUT(T, val, ...) \ + __klee_output_##T((val), ...) +\end{verbatim} + \subcaption{}\label{klee-output-def} + \end{subfigure}\hfill + \begin{subfigure}{0.3\textwidth} +\begin{alltt} + int foo; + ... +\textcolor{purple}{-bar(foo);} +\textcolor{teal}{+bar(KLEE_OUTPUT(foo));}\end{alltt} + \subcaption{}\label{klee-output-use} + \end{subfigure}\hfill + \captionsetup{subrefformat=parens} + \caption{Condition injection \subref{klee-output-def} implementation in C + and \subref{klee-output-use} example annotation.}\label{klee-output} +\end{figure} diff --git a/discuss.tex b/discuss.tex index b276e76..682bd5e 100644 --- a/discuss.tex +++ b/discuss.tex @@ -1,4 +1,3 @@ \section{Discussion} \subsection{Threats to Validity} \subsection{Related Work} -\textsc{Shadow}~\cite{shadow} diff --git a/intro.tex b/intro.tex index 20697ad..0a41d8f 100644 --- a/intro.tex +++ b/intro.tex @@ -1,2 +1,8 @@ \setcounter{page}{1} \section{Introduction} +TODO: mention symbolic execution\\ +TODO: mention \gls{apr} + +This work's main contributions are:\begin{enumerate} + \item A tool named \textsc{Psychic} +\end{enumerate} diff --git a/technique.tex b/technique.tex index d4a576c..fa1e6cc 100644 --- a/technique.tex +++ b/technique.tex @@ -1,118 +1,354 @@ \section{Technique} -Unlike the original \textsc{klee} which execute one single program at a time, -our tool performs execution on a pool of the program's revisions -at the same time. - -An overview of our approach is illustrated in figure~\ref{fig:arch}. - -\begin{figure}[h]\centering - \begin{tikzpicture}[mymatrix/.style={matrix of nodes, - nodes=typetag,row sep=1ex}, - typetag/.style={draw,anchor=west}, - title/.style={draw=none,inner sep=0pt}, - line width=0.2ex] - \linespread{0.8} - \node[draw,label={[name=sched]Scheduler}] (states) {States}; - \node[draw,below=0.5ex of states] (clstr) {Clusters}; - \node[draw,fit={(sched) (states) (clstr)}] {}; - - \node[draw,right=11em of states] (symb) {Interpreter}; - \draw[-angle 45] (states.north east) to[in=90,out=15] - node[sloped,above,pos=0.4]{selected state} (symb.north west); - \draw[-angle 45] (symb.south west) to[in=0,out=-90] - node[sloped,above,pos=0.6]{child state} (states.east); - - \node[draw,below=6em of symb] (smt) {Theorem prover}; - \draw[angle 45-angle 45] (symb) to (smt); - - \node[draw,below=7em of sched,label={[name=diff]Differentiator}] - (outs) {Fruitful states}; - \draw[-angle 45] (symb.south west) to[in=45,out=-90] - node[sloped,above,pos=0.6]{state with output} (outs.15); - \draw[-angle 90] (outs) to[in=90,out=0,pos=0.4] - node[sloped,below]{path conditions} (smt.north west); - \draw (outs.south east) to[in=90,out=10] (smt.north west); - - \node[draw,below=0.5ex of outs] (prog) {Metaprogram}; - \node[draw,fit={(diff) (outs) (prog)}] {}; - \draw[-angle 45] (smt.south west) to[out=-90,in=0] - node[sloped,below]{concrete input} (prog); - \draw[-angle 45] (prog) to[out=180,in=-150] - node[left,align=center]{diff.\\test} (clstr.south west); - - \node[left=4em of states.west] (bc) {bitcode}; - \draw[-angle 90] (bc) to[in=180,out=60] (states.west); - \node[below=1em of bc,align=center] (tree) {decision\\tree}; - \draw[-angle 45] (clstr.west) to[out=180,in=60] (tree); - \node[above=1em of bc,align=center] (seed) {seed\\input}; - \draw[dotted] (seed) to[out=0,in=180] (states); - \draw[-angle 45] (bc.south west) - to[out=-135,in=-135] (prog.south west); - \end{tikzpicture} - \caption{\textsc{Psychic} overview.}\label{fig:arch} +In order to generate test cases to distinguish between multiple patches, +our tool extends symbolic execution in a number of ways. First, instead +of taking in a single program, all of its revisions are passed to the tool. +Since each patch only modifies a small portion of the program, most of the code +is shared among these revisions, hence they are combined into +a \emph{meta program} to avoid repeating execution. + +Second, we combine the path constraints in execution \glspl{state} +of different revisions in a certain way before feeding to a \gls{smt} solver +to generate \glspl{difftest}. Third, the new conditions the combined +\gls{smt} formula includes (aside from the conjunction of its parents) +are assertions on the distinction of corresponding values across revisions, +so we offer multiple options to capture such \emph{\glspl{symout}}. + +Next, we introduces concrete execution to reduce the theorem proving load, +and produce \glspl{difftest} for clustering the patches and finally produce +a decision tree. One may observe that the clustering information +can help with selecting more ``promising'' states, although such scheduling +enhancement is not covered by this work. + +We implemented these extensions on top of \klee{} in a tool +named \psychic{}, whose high-level architectures are compared +in figure~\ref{arch}. The overall process of \psychic{} +is denoted in algorithm~\ref{overall}. + +\begin{figure}[h] + \begin{subcaptiongroup} + \subcaptionlistentry{KLEE}\label{klee-arch} + \begin{tikzpicture}[mymatrix/.style={matrix of nodes, + nodes=typetag,row sep=1ex}, + typetag/.style={draw,anchor=west}, + title/.style={draw=none,inner sep=0pt}, + line width=0.2ex] + \linespread{0.8} + \node[draw,label={[name=sched]Scheduler}] (states) {States}; + \node[draw,fit={(sched) (states)}] {}; + + \node[draw,right=11em of states] (symb) {Interpreter}; + \draw[-angle 45] (states.north east) to[in=90,out=15] + node[sloped,above,pos=0.4]{Selected state} (symb.north west); + \draw[-angle 45] (symb.south west) to[in=0,out=-90] + node[sloped,above,pos=0.6]{Child state} (states.east); + + \node[draw,below=3em of symb] (smt) {Theorem prover}; + \draw[angle 45-angle 45] (symb) to (smt); + + \node[left=4em of states.west] (bc) {Bitcode}; + \draw[-angle 90] (bc) to[in=180,out=60] (states.west); + \node[above=1em of bc,align=center] (seed) {Seed\\input}; + \draw[dotted] (seed) to[out=0,in=180] (states); + \node[below=1em of bc,align=center] (tests) {Test\\cases}; + \draw[-angle 45] (smt.west) to[in=0,out=180] (tests); + \node[below=5em of sched.south east] (symb) {\captiontext{}}; + \end{tikzpicture} + \vspace{-2em} + \subcaptionlistentry{Psychic}\label{psychic-arch} + \begin{tikzpicture}[mymatrix/.style={matrix of nodes, + nodes=typetag,row sep=1ex}, + typetag/.style={draw,anchor=west}, + title/.style={draw=none,inner sep=0pt}, + line width=0.2ex] + \linespread{0.8} + \node[draw,label={[name=sched]Scheduler}] (states) {States}; + \node[draw,below=0.5ex of states] (clstr) {Clusters}; + \node[draw,fit={(sched) (states) (clstr)}] {}; + + \node[draw,right=11em of states] (symb) {Interpreter}; + \draw[-angle 45] (states.north east) to[in=90,out=15] + node[sloped,above,pos=0.4]{Selected state} (symb.north west); + \draw[-angle 45] (symb.south west) to[in=0,out=-90] + node[sloped,above,pos=0.6]{Child state} (states.east); + + \node[draw,below=6em of symb] (smt) {Theorem prover}; + \draw[angle 45-angle 45] (symb) to (smt); + + \node[draw,below=7em of sched,label={[name=diff]Differentiator}] + (outs) {Fruitful states}; + \draw[-angle 45] (symb.south west) to[in=45,out=-90] + node[sloped,above,pos=0.6]{State with output} (outs.15); + \draw[-angle 90] (outs) to[in=90,out=0,pos=0.4] + node[sloped,below]{Path conditions} (smt.north west); + \draw (outs.south east) to[in=90,out=10] (smt.north west); + + \node[draw,below=0.5ex of outs] (prog) {Binary}; + \node[draw,fit={(diff) (outs) (prog)}] {}; + \draw[-angle 45] (smt.south west) to[out=-90,in=0] + node[sloped,below]{Concrete input} (prog); + \draw[-angle 45] (prog) to[out=180,in=-150] + node[left,align=center]{Diff.\\test} (clstr.south west); + + \node[left=4em of states.west] (bc) {Bitcode}; + \draw[-angle 90] (bc) to[in=180,out=60] (states.west); + \node[below=1em of bc,align=center] (tree) {Decision\\tree}; + \draw[-angle 45] (clstr.west) to[out=180,in=60] (tree); + \node[above=1em of bc,align=center] (seed) {Seed\\input}; + \draw[dotted] (seed) to[out=0,in=180] (states); + \draw[-angle 45] (bc.south west) + to[out=-135,in=-135] (prog.south west); + \node[below=12em of sched.south east] (symb) {\captiontext{}}; + \end{tikzpicture} + \end{subcaptiongroup} + \captionsetup{subrefformat=parens} + \caption{Architecture overviews of \subref{klee-arch} \klee{} + and \subref{psychic-arch} \psychic{} extension.}\label{arch} \end{figure} \begin{algorithm} -\caption{Overall execution} -\begin{algorithmic}[1] - \Require set of program revisions $D$ - \Ensure set of differential tests $T$ - \State $T \gets \varnothing$ - \State $E \gets \varnothing$ \Comment{Exited states} - \State $q \gets \text{new\_searcher}(D)$ - \For{$d \in D$} - \State $s \gets \text{initial\_state}(d)$ - \State $\text{add\_states}(q, \{s\})$ - \EndFor - \While{has\_state($q$)} - \State $s \gets \text{select\_state}(q)$ - \State $i \gets \text{next\_instruction}(s)$ - \Comment{$i$ is a state} - \If{$i$ is exit} - \State $E' \gets \text{head}(\text{sort}(\text{distance\_to}(i), - \text{different\_revision}(E, i)))$ - \For{$e \in E'$} - \State $t \gets \text{generate\_test}(i, e)$ - \If{$t$ is not null} - \State $T \gets T \cup \{t\}$ + \caption{Overall execution of \psychic{}}\label{overall} + \begin{algorithmic}[1] + \Require Meta program $P$ + \Ensure Set of differential tests $T$ + \State $T \gets \varnothing$ + \State $X \gets \varnothing$ \Comment{Halt states} + \State $S \gets \textsc{InitialState}(P)$ + \While{$S \neq \varnothing$} + \State $i, e, \varPhi, n \gets \textsc{Select}(S)$ + \Comment{instruction, environment, + path constraints, and revision number} + \State $S \gets S \setminus \{(i, e, \varPhi, n)\}$ + \State $i', e' \gets \textsc{Execute}(i, e)$ + \If{$\textsc{IsHalt}(i)$} + \For{$(\varPhi', n') \in X$} + \If{$n \ne n'$} + \State $t \gets \textsc{DiffTest}(\varPhi, n, \varPhi', n')$ + \If{$t \ne \bot$} + $T \gets T \cup \{t\}$ + \EndIf + \EndIf + \EndFor + \State TODO: describe clustering + \ElsIf{$\textsc{IsBranch}(i)$} + \State $\varphi, i_c', i_a' \gets i'$ + \Comment{condition, consequent, and alternative} + \If{$\textsc{IsMeta}(\varphi)$} + \State $n' \gets \textsc{RevisionNumber}(\varphi)$ + \If{$n = 0 \lor n' = 0$} + $S \gets S \cup \{(i_c', e', \varPhi, n')\}$ + \Comment{at most one simultaneous patch} + \EndIf + \State $S \gets S \cup \{(i_a', e', \varPhi, n)\}$ + \Else + \If{$\textsc{sat}(\varPhi \land \varphi)$} + $S \gets S \cup \{(i_c', e', \varPhi \land \varphi, n)\}$ + \EndIf + \If{$\textsc{sat}(\varPhi \land \lnot\varphi)$} + $S \gets S \cup \{(i_a', e', \varPhi \land \lnot\varphi, n)\}$ + \EndIf + \EndIf + \Else + \If{$\textsc{IsReturn}(i)$} + \State $s' \gets i', e', \varPhi \land \textsc{FunctionOutputs}(i)$ + \Else + \State $s' \gets i', e', \varPhi$ + \EndIf + \State $S \gets S \cup \{s'\}$ + \EndIf + \EndWhile + \end{algorithmic} +\end{algorithm} + +\subsection{Meta program} +The meta program format takes inspiration from program unification +used for shadow symbolic execution. Unlike \textsc{Shadow} +which tests only one patch~\cite{shadow} to reveal its regressions, though, +the goal of \psychic{} is to find the difference in behavior +between multiple patches. Therefore, the former's annotation +at level of expressions does not meet our expectation on expressivity. +Instead, we implement higher-order branching. Annotating patches +at block level simplifies the generation process +and improves the readability of the meta program. + +\begin{figure}\centering + \begin{subfigure}{0.35\textwidth} +\begin{alltt} +\textcolor{purple}{---} dfasearch.c \textcolor{purple}{c1cb19fe} +\textcolor{blue}{+++} dfasearch.c \textcolor{blue}{8f08d8e2} +@@ \textcolor{purple}{-360,2} \textcolor{blue}{+360,1} @@ +\textcolor{purple}{-beg = match;} +\textcolor{purple}{-goto success_in_len;} +\textcolor{blue}{+goto success;}\end{alltt} + \subcaption{}\label{8f08d8e2} + \end{subfigure} + \begin{subfigure}{0.35\textwidth} +\begin{alltt} +\textcolor{purple}{---} dfasearch.c \textcolor{purple}{c1cb19fe} +\textcolor{violet}{+++} dfasearch.c \textcolor{violet}{YmI4MGM3ND} +@@ \textcolor{purple}{-360,2} \textcolor{violet}{+360,2} @@ + beg = match; +\textcolor{purple}{-goto success_in_len;} +\textcolor{violet}{+goto success;}\end{alltt} + \subcaption{}\label{YmI4MGM3ND} + \end{subfigure} + \captionsetup{subrefformat=parens} + \caption{Unified format of \subref{8f08d8e2} a fix + in \texttt{grep}'s commit \texttt{8f08d8e2}, + and \subref{YmI4MGM3ND} an alternative patch.}\label{grep-8f08d8e2} +\end{figure} + +For instance, given the two unified diffs~\cite{unidiff} of a bug fix +from upstream \texttt{grep}, and a patch in \textsc{DBGBench}~\cite{dbgbench} +in figure~\ref{grep-8f08d8e2},\footnote{The snippets have been dedented +to fit within the width of this page.} one can construct the relevant part +of the meta program in figure~\ref{grep-8f08d8e2-meta}, where \verb|__klee_meta| +parses the environment for the choice of revision when executed +concretely~\cite{msv} and returns a new symbolic value in symbolic evaluation. +This way, the meta program can be used for both executions to share metadata +such as the patch number (82 and 43 in this case). + +\begin{figure}\centering +\begin{varwidth}{\linewidth}\begin{alltt} +int __klee_meta2 = __klee_meta(2); +if (__klee_meta(2) == \textcolor{purple}{0}) \verb|{| // original buggy version + \textcolor{purple}{beg = match;} +\verb|}| else if (__klee_meta2 == \textcolor{blue}{82}) \verb|{| // upstream patch +\verb|}| else if (__klee_meta2 == \textcolor{violet}{43}) \verb|{| // alternative patch + \textcolor{violet}{beg = match;} +\verb|}| +int __klee_meta3 = __klee_meta(3); +if (__klee_meta(3) == \textcolor{purple}{0}) \verb|{| + \textcolor{purple}{goto success_in_len;} +\verb|}| else if (__klee_meta3 == \textcolor{blue}{82}) \verb|{| + \textcolor{blue}{goto success;} +\verb|}| else if (__klee_meta3 == \textcolor{violet}{43}) \verb|{| + \textcolor{violet}{goto success;} +\verb|}|\end{alltt}\end{varwidth} +\caption{Meta branches with the fixes in \texttt{grep}'s upstream + and alternative patches in figure~\ref{grep-8f08d8e2}.} +\label{grep-8f08d8e2-meta} +\end{figure} + +One may notice that the meta program may have any level of granularity. +\psychic{} utilizes the meta program to maximize instructions +and path constraints sharing, which effectively means to defer +the divergence point deep into the control flow. For its intended use +on automatically generated patches whose hunks are commonly short, +optimizing the meta program construction is not a major issue. + +In algorithm~\ref{overall}, \textsc{IsMeta} cheaply detects +each higher-order branch via basic pattern matching and skip +adding its condition to the children states. Instead, +those states carry the revision number outside of their path constraints +for constant-time access when pruning paths going though multiple patches. + +\subsection{Symbolic output selection} +\psychic{} is based on \klee{} and thus inherits its support +for the diverse set of programs written in languages with C \gls{ffi} +for calling \klee{} intrinsics and a compiler using LLVM as a backend. +One kind of LLVM instruction may be used for difference purposes, +so for better applicability we try to accommodate for a range of usages. +For capturing output values, \psychic{} offers the following methods. +\pagebreak +\begin{enumerate}[noitemsep] + \item Source code annotation\label{annote} + \item Output files (including standard output)\label{stdout} + \item Function return values\label{ret} + \item Pointers to mutable data as function arguments\label{parg} +\end{enumerate} + +Method \ref{annote} is handled similar to previous works +described in section~\ref{klee-use}. With \klee{} support for symbolic files, +the same avatar idiom is trivially applied for~\ref{stdout}. However, +since files are byte buffers, their application in symbolic execution +is limited due to the scalability of \gls{smt} solvers, which treat +each byte as a variable. For human-readable files, this is particularly +inefficient due to their low information entropy~\cite{entropy}. + +To balance between manual annotation and computational scalability, +methods \ref{ret} and \ref{parg} are introduced to automatically capture output +more precisely at the end of every interested function. Debugging information +forwarded from compiler frontend are kept in bitcodes, and they are used +to limit the subroutines to be monitored, for example in algorithm~\ref{fnout}, +the file that is patched by an \gls{apr} tool. + +\begin{algorithm} + \caption{Extraction of symbolic outputs of a function}\label{fnout} + \begin{algorithmic}[1] + \Procedure{FunctionOutputs}{$i$} + \State $f \gets \textsc{Function}(i)$ + \If{$\textsc{SourceFile}(i) \neq \textsc{PatchedFile}$} + \Return $\top$ + \EndIf + \State $\varPhi \gets \textsc{SymbolicOutput}(\textsc{ReturnValue}(i), f)$ + \For{$a \in \textsc{FunctionArguments}(i)$} + \If{$\textsc{IsPointer}(a) \land \lnot\textsc{IsConstPointer}(a) + \land \lnot\textsc{IsFunctionPointer}(a)$} + \State $\varPhi \gets \varPhi + \land \textsc{SymbolicOutput}(\textsc{Dereference}(a), a)$ \EndIf \EndFor - \State $E \gets E \cup \{i\}$ - \Else - \State $S_+, S_- \gets \text{execute\_instruction}(i)$ - \State $\text{remove\_states}\left(q, S_-\right)$ - \State $\text{add\_states}\left(q, S_+\right)$ - \EndIf - \EndWhile -\end{algorithmic} + \State \Return $\varPhi$ + \EndProcedure + \end{algorithmic} \end{algorithm} +Each symbolic output then is given an unique name for later comparison. +For the sake of simplicity, we name them after the capturing method +and associated identifier, i.e.~variable or subroutine. + +\subsection{Input generation} +When two states monitor some commonly named symbolic outputs +and are at \emph{parallel} positions in their path, their path constraints +can be used to generate a \gls{difftest}. Two states are said to be parallel +if and only if their program counter points to the same instruction +and the conjunction of their path conditions is satisfiable. + +Currently \psychic{} only compare halt states to minimize memory usage +and \gls{smt} solving cost. Such \gls{smt} formula is constructed +in algorithm~\ref{diffgen} by first append a suffix to each output name, +then for the common ones assert for possible distinction between two paths. +If the path constraints of both states are \gls{sat} with at least +one difference in output, a \gls{difftest} can be generated +from the \gls{smt} model. + \begin{algorithm} -\caption{State selection} -\begin{algorithmic}[1] - \Require searcher $q$ - \Ensure state $s$ - \State $S \gets \varnothing$ - \State $P \gets \text{base\_priority}(q)$ - \Comment{Scheduling priority from base search algorithm} - \For{$z \in \text{states}(q)$} - \If{$z \notin S$} - \State $S' \gets \text{parallel\_states}(q, z)$ - \If{$S' \ne \varnothing$} - \State $S \gets S \cup S' \cup \{z\}$ + \caption{\Gls{difftest} generation}\label{diffgen} + \begin{algorithmic}[1] + \Procedure{DiffTest}{$\varPhi, n, \varPhi', n'$} + \State $\varPsi, N \gets \textsc{RenameOutputs}(\varPhi, n)$ + \State $\varPsi', N' \gets \textsc{RenameOutputs}(\varPhi', n')$ + \For{$a \in N \cap N'$} + \State $\varPsi_\text{diff} + \gets \varPsi_\text{diff} \lor \textsc{Distinct}(a, n, n')$ + \EndFor + \If{$\lnot\textsc{sat}\left(\varPsi \land \varPsi' + \land \varPsi_\text{diff}\right)$} + \State \Return $\bot$ + \EndIf + \State TODO: describe concrete execution + \State \Return $\textsc{Model}\left(\varPsi \land \varPsi' + \land \varPsi_\text{diff}\right)$ + \EndProcedure + \Procedure{RenameOutputs}{$e, s$} + \State $E, k \gets e$ \Comment{subexpressions and kind} + \If{$\textsc{IsRead}(t)$} + \State $a, i \gets E$ \Comment{array name and index} + \If{$\textsc{IsOutput}(a)$} + \State $a' \gets \textsc{AppendName}(a, s)$ + \State $e' \gets ((a', i), k)$ + \State \Return $e', \{a\}$ + \Else + \State \Return $e, \varnothing$ + \EndIf \EndIf - \EndIf - - \State $l \gets \text{local\_frontier}(q, z)$ - \State $g \gets \text{global\_frontier}(q, z)$ - \If{$l$ is null $\land$ $g$ is not null} - \State $P_z \gets P_z + \text{distance\_bonus}(g, z)$ - \Comment{Should further or nearer be prioritised?} - \EndIf - \EndFor - \For{$z \in S$} - \State $P_z \gets P_z + \text{PARALLEL\_BONUS}$ - \EndFor - \State $s \gets \text{max\_priority}(P)$ -\end{algorithmic} + \State $E' \gets E$ + \State $N \gets \varnothing$ \Comment{seen output names} + \For{$i \in 1{.}\,{.}\textsc{Length}(E)$} + \State $E'_i, N_+ \gets \textsc{RenameOutputs}(E_i, s)$ + \State $N \gets N \cup N_+$ + \EndFor + \State \Return $(E', k), N$ + \EndProcedure + \end{algorithmic} \end{algorithm} |