From 679ca723e849b731a9d94a276763bb59756aa3a2 Mon Sep 17 00:00:00 2001 From: Nguyễn Gia Phong Date: Fri, 24 Nov 2023 18:08:17 +0900 Subject: Proof read approach --- bg.tex | 24 +++++----- ref.bib | 24 ++++++++++ technique.tex | 144 +++++++++++++++++++++++++++++++++------------------------- 3 files changed, 119 insertions(+), 73 deletions(-) diff --git a/bg.tex b/bg.tex index ba2f131..4c12048 100644 --- a/bg.tex +++ b/bg.tex @@ -20,31 +20,31 @@ which is omitted from the algorithm for brevity. \Require Program ($P$) \Ensure Set of test cases ($T$) \State $T \gets \varnothing$ - \State $S \gets \textsc{InitialState}(P)$ \Comment{set of states} + \State $S \gets \Call{InitialState}{P}$ \Comment{set of states} \While{$S \neq \varnothing$} - \State $s \gets \textsc{Select}(S)$ + \State $s \gets \Call{Select}{S}$ \State $S \gets S \setminus \{s\}$ - \State $i, e, \varPhi \gets s$ + \State $(i, e, \varPhi) \gets s$ \Comment{instruction, environment, and path constraints} - \State $i', e' \gets \textsc{Execute}(i, e)$ - \If{$\textsc{IsHalt}(i)$} - \State $t \gets \textsc{GenerateTest}(\varPhi)$ + \State $(i', e') \gets \Call{Execute}{i, e}$ + \If{$\Call{IsHalt}{i}$} + \State $t \gets \Call{GenerateTest}{\varPhi}$ \If{$t \ne \bot$} $T \gets T \cup \{t\}$ \EndIf - \ElsIf{$\textsc{IsBranch}(i)$} - \State $\varphi, i_c', i_a' \gets i'$ + \ElsIf{$\Call{IsBranch}{i}$} + \State $(\varphi, i_c', i_a') \gets i'$ \Comment{condition, consequent, and alternative} - \If{$\textsc{sat}(\varPhi \land \varphi)$} - \State $s_c' \gets i_c', e', \varPhi \land \varphi$ + \If{$\Call{sat}{\varPhi \land \varphi}$} + \State $s_c' \gets (i_c', e', \varPhi \land \varphi)$ \State $S \gets S \cup \{s_c'\}$ \EndIf - \If{$\textsc{sat}(\varPhi \land \lnot\varphi)$} + \If{$\Call{sat}{\varPhi \land \lnot\varphi}$} \State $s_a' \gets i_a', e', \varPhi \land \lnot\varphi$ \State $S \gets S \cup \{s_a'\}$ \EndIf \Else - \State $s' \gets i', e', \varPhi$ + \State $s' \gets (i', e', \varPhi)$ \State $S \gets S \cup \{s'\}$ \EndIf \EndWhile diff --git a/ref.bib b/ref.bib index 5703043..4c2eac1 100644 --- a/ref.bib +++ b/ref.bib @@ -8,6 +8,20 @@ pages = {379-423}, note = {\doi{10.1002/j.1538-7305.1948.tb01338.x}}} +@article{tree-pattern, + author = {Christoph M. Hoffmann and Michael J. O'Donnell}, + title = {Pattern Matching in Trees}, + year = 1982, + publisher = {Association for Computing Machinery}, + volume = 29, + number = 1, + issn = {0004-5411}, + note = {\doi{10.1145/322290.322295}}, + journal = {J. ACM}, + month = jan, + pages = {68--95}, + numpages = 28} + @inproceedings{k-sat, author = {Russell Impagliazzo and Ramamohan Paturi}, booktitle = {Proc. IEEE Conf. Comput. Complex. (CCC)}, @@ -171,6 +185,16 @@ year = 2017, note = {\doi{10.1145/3106237.3106255}}} +@inproceedings{fl-eval, + author = {Spencer Pearson and Jos\'e Campos and Ren\'e Just + and Gordon Fraser and Rui Abreu and Michael D. Ernst + and Deric Pang and Benjamin Keller}, + booktitle = {Proc. ACM/IEEE Int. Conf. Softw. Eng. (ICSE)}, + title = {Evaluating and Improving Fault Localization}, + year = 2017, + pages = {609--620}, + note = {\doi{10.1109/ICSE.2017.62}}} + @article{shadow, author = {Tomasz Kuchta and Hristina Palikareva and Cristian Cadar}, title = {Shadow Symbolic Execution for Testing Software Patches}, diff --git a/technique.tex b/technique.tex index fa1e6cc..58e9348 100644 --- a/technique.tex +++ b/technique.tex @@ -111,49 +111,46 @@ is denoted in algorithm~\ref{overall}. \Require Meta program $P$ \Ensure Set of differential tests $T$ \State $T \gets \varnothing$ - \State $X \gets \varnothing$ \Comment{Halt states} - \State $S \gets \textsc{InitialState}(P)$ + \State $H \gets \varnothing$ \Comment{Halt states} + \State $S \gets \Call{InitialState}{P}$ \While{$S \neq \varnothing$} - \State $i, e, \varPhi, n \gets \textsc{Select}(S)$ + \State $(i, e, \varPhi, n) \gets \Call{Select}{S}$ \Comment{instruction, environment, path constraints, and revision number} \State $S \gets S \setminus \{(i, e, \varPhi, n)\}$ - \State $i', e' \gets \textsc{Execute}(i, e)$ - \If{$\textsc{IsHalt}(i)$} - \For{$(\varPhi', n') \in X$} + \State $(i', e') \gets \Call{Execute}{i, e}$ + \If{$\Call{IsHalt}{i}$} + \For{$(\varPhi', n') \in H$} \If{$n \ne n'$} - \State $t \gets \textsc{DiffTest}(\varPhi, n, \varPhi', n')$ + \State $t \gets \Call{DiffTest}{\varPhi, n, \varPhi', n'}$ \If{$t \ne \bot$} $T \gets T \cup \{t\}$ \EndIf \EndIf \EndFor \State TODO: describe clustering - \ElsIf{$\textsc{IsBranch}(i)$} - \State $\varphi, i_c', i_a' \gets i'$ + \State $H \gets H \cup \{(\varPhi, n)\}$ + \ElsIf{$\Call{IsBranch}{i}$} + \State $(\varphi, i_c', i_a') \gets i'$ \Comment{condition, consequent, and alternative} - \If{$\textsc{IsMeta}(\varphi)$} - \State $n' \gets \textsc{RevisionNumber}(\varphi)$ - \If{$n = 0 \lor n' = 0$} - $S \gets S \cup \{(i_c', e', \varPhi, n')\}$ - \Comment{at most one simultaneous patch} - \EndIf - \State $S \gets S \cup \{(i_a', e', \varPhi, n)\}$ + \If{$\Call{IsMeta}{\varphi}$} + \State $S \gets S + \cup \Call{ApplyMetaBranch}{\varphi, i_c', i_a', e', \varPhi, n}$ \Else - \If{$\textsc{sat}(\varPhi \land \varphi)$} + \If{$\Call{sat}{\varPhi \land \varphi}$} $S \gets S \cup \{(i_c', e', \varPhi \land \varphi, n)\}$ \EndIf - \If{$\textsc{sat}(\varPhi \land \lnot\varphi)$} + \If{$\Call{sat}{\varPhi \land \lnot\varphi}$} $S \gets S \cup \{(i_a', e', \varPhi \land \lnot\varphi, n)\}$ \EndIf \EndIf \Else - \If{$\textsc{IsReturn}(i)$} - \State $s' \gets i', e', \varPhi \land \textsc{FunctionOutputs}(i)$ + \If{$\Call{IsReturn}{i}$} + \State $\varPhi' \gets \varPhi \land \Call{FunctionOutputs}{i}$ \Else - \State $s' \gets i', e', \varPhi$ + \State $\varPhi' \gets \varPhi$ \EndIf - \State $S \gets S \cup \{s'\}$ + \State $S \gets S \cup \{(i', e', \varPhi')\}$ \EndIf \EndWhile \end{algorithmic} @@ -199,13 +196,13 @@ and improves the readability of the meta program. For instance, given the two unified diffs~\cite{unidiff} of a bug fix from upstream \texttt{grep}, and a patch in \textsc{DBGBench}~\cite{dbgbench} -in figure~\ref{grep-8f08d8e2},\footnote{The snippets have been dedented +in figure~\ref{grep-8f08d8e2},\footnote{The snippets have been outdented to fit within the width of this page.} one can construct the relevant part of the meta program in figure~\ref{grep-8f08d8e2-meta}, where \verb|__klee_meta| parses the environment for the choice of revision when executed concretely~\cite{msv} and returns a new symbolic value in symbolic evaluation. This way, the meta program can be used for both executions to share metadata -such as the patch number (82 and 43 in this case). +such as the patch identifier (82 and 43 in this case). \begin{figure}\centering \begin{varwidth}{\linewidth}\begin{alltt} @@ -237,10 +234,33 @@ on automatically generated patches whose hunks are commonly short, optimizing the meta program construction is not a major issue. In algorithm~\ref{overall}, \textsc{IsMeta} cheaply detects -each higher-order branch via basic pattern matching and skip -adding its condition to the children states. Instead, -those states carry the revision number outside of their path constraints -for constant-time access when pruning paths going though multiple patches. +higher-order branching patch identifier, via basic tree pattern matching% +~\cite{tree-pattern} and skip adding its condition to the children states. +Instead, those states carry the revision number outside +of their path constraints for constant-time access when pruning paths +going though multiple patches in algorithm~\ref{apply-meta}. + +\begin{algorithm} + \caption{Construction of possible states after a patch location} + \label{apply-meta} + \begin{algorithmic}[1] + \Procedure{ApplyMetaBranch}{$\varphi, i_c, i_a, e, \varPhi, n$} + \State $n' \gets \Call{RevisionNumber}{\varphi}$ + \State $s_a = (i_a, e, \varPhi, n)$ + \Comment{assume the lack of meta branch nesting + and always follow alternative path} + \If{$n = 0$} + \Return $\{(i_c, e, \varPhi, n'), s_a\}$ + \Comment{$i_c$ enters patch $n'$ } + \EndIf + \If{$n' \ne 0 \land n \ne n'$} + \Return $\{s_a\}$ + \Comment{at most one simultaneous patch} + \EndIf + \State \Return $\{(i_c, e, \varPhi, n), s_a\}$ + \EndProcedure + \end{algorithmic} +\end{algorithm} \subsection{Symbolic output selection} \psychic{} is based on \klee{} and thus inherits its support @@ -261,9 +281,11 @@ Method \ref{annote} is handled similar to previous works described in section~\ref{klee-use}. With \klee{} support for symbolic files, the same avatar idiom is trivially applied for~\ref{stdout}. However, since files are byte buffers, their application in symbolic execution -is limited due to the scalability of \gls{smt} solvers, which treat -each byte as a variable. For human-readable files, this is particularly -inefficient due to their low information entropy~\cite{entropy}. +is limited due to the scalability of \gls{smt} solvers, which treat each byte +as a variable. For human-readable files, this is particularly inefficient +due to their low information entropy~\cite{entropy}. On the other hand, +because of imperfect fault localization either by human~\cite{dbgbench} +or automatically~\cite{fl-eval}, output annotation has limited exhaustiveness. To balance between manual annotation and computational scalability, methods \ref{ret} and \ref{parg} are introduced to automatically capture output @@ -273,19 +295,20 @@ to limit the subroutines to be monitored, for example in algorithm~\ref{fnout}, the file that is patched by an \gls{apr} tool. \begin{algorithm} - \caption{Extraction of symbolic outputs of a function}\label{fnout} + \caption{Extraction of \glspl{symout} of a function}\label{fnout} \begin{algorithmic}[1] \Procedure{FunctionOutputs}{$i$} - \State $f \gets \textsc{Function}(i)$ - \If{$\textsc{SourceFile}(i) \neq \textsc{PatchedFile}$} + \State $f \gets \Call{Function}{i}$ + \If{$f$ is not in the patched file} \Return $\top$ \EndIf - \State $\varPhi \gets \textsc{SymbolicOutput}(\textsc{ReturnValue}(i), f)$ - \For{$a \in \textsc{FunctionArguments}(i)$} - \If{$\textsc{IsPointer}(a) \land \lnot\textsc{IsConstPointer}(a) - \land \lnot\textsc{IsFunctionPointer}(a)$} + % Can't do nested calls LOL + \State $\varPhi \gets \Call{SymbolicOutput}{\textsc{ReturnValue}(i), f}$ + \For{$a \in \Call{FunctionArguments}{i}$} + \If{$\Call{IsPointer}{a} \land \lnot\Call{IsConstPointer}{a} + \land \lnot\Call{IsFunctionPointer}{a}$} \State $\varPhi \gets \varPhi - \land \textsc{SymbolicOutput}(\textsc{Dereference}(a), a)$ + \land \Call{SymbolicOutput}{\textsc{Dereference}(a), a}$ \EndIf \EndFor \State \Return $\varPhi$ @@ -293,12 +316,12 @@ the file that is patched by an \gls{apr} tool. \end{algorithmic} \end{algorithm} -Each symbolic output then is given an unique name for later comparison. +Each \gls{symout} then is given an unique name for later comparison. For the sake of simplicity, we name them after the capturing method and associated identifier, i.e.~variable or subroutine. \subsection{Input generation} -When two states monitor some commonly named symbolic outputs +When two states monitor some commonly named \glspl{symout} and are at \emph{parallel} positions in their path, their path constraints can be used to generate a \gls{difftest}. Two states are said to be parallel if and only if their program counter points to the same instruction @@ -316,39 +339,38 @@ from the \gls{smt} model. \caption{\Gls{difftest} generation}\label{diffgen} \begin{algorithmic}[1] \Procedure{DiffTest}{$\varPhi, n, \varPhi', n'$} - \State $\varPsi, N \gets \textsc{RenameOutputs}(\varPhi, n)$ - \State $\varPsi', N' \gets \textsc{RenameOutputs}(\varPhi', n')$ + \State $\varPsi, N \gets \Call{RenameOutputs}{\varPhi, n}$ + \State $\varPsi', N' \gets \Call{RenameOutputs}{\varPhi', n'}$ \For{$a \in N \cap N'$} \State $\varPsi_\text{diff} - \gets \varPsi_\text{diff} \lor \textsc{Distinct}(a, n, n')$ + \gets \varPsi_\text{diff} \lor \Call{Distinct}{a, n, n'}$ \EndFor - \If{$\lnot\textsc{sat}\left(\varPsi \land \varPsi' - \land \varPsi_\text{diff}\right)$} + \If{$\lnot\Call{sat}{\varPsi \land \varPsi' \land \varPsi_\text{diff}}$} \State \Return $\bot$ \EndIf \State TODO: describe concrete execution - \State \Return $\textsc{Model}\left(\varPsi \land \varPsi' - \land \varPsi_\text{diff}\right)$ + \State \Return $\Call{Model}{\varPsi \land \varPsi' + \land \varPsi_\text{diff}}$ \EndProcedure - \Procedure{RenameOutputs}{$e, s$} - \State $E, k \gets e$ \Comment{subexpressions and kind} - \If{$\textsc{IsRead}(t)$} - \State $a, i \gets E$ \Comment{array name and index} - \If{$\textsc{IsOutput}(a)$} - \State $a' \gets \textsc{AppendName}(a, s)$ - \State $e' \gets ((a', i), k)$ - \State \Return $e', \{a\}$ + \Procedure{RenameOutputs}{$x, n$} + \State $X, k \gets x$ \Comment{subexpressions and kind} + \If{$\Call{IsRead}{t}$} + \State $a, i \gets X$ \Comment{array name and index} + \If{$\Call{IsOutput}{a}$} + \State $a' \gets \Call{AppendName}{a, n}$ + \State $x' \gets ((a', i), k)$ + \State \Return $x', \{a\}$ \Else - \State \Return $e, \varnothing$ + \State \Return $x, \varnothing$ \EndIf \EndIf - \State $E' \gets E$ + \State $X' \gets X$ \State $N \gets \varnothing$ \Comment{seen output names} - \For{$i \in 1{.}\,{.}\textsc{Length}(E)$} - \State $E'_i, N_+ \gets \textsc{RenameOutputs}(E_i, s)$ + \For{$i \in 1{.}\,{.}\Call{Length}{X}$} + \State $X'_i, N_+ \gets \Call{RenameOutputs}{E_i, n}$ \State $N \gets N \cup N_+$ \EndFor - \State \Return $(E', k), N$ + \State \Return $(X', k), N$ \EndProcedure \end{algorithmic} \end{algorithm} -- cgit 1.4.1