summary refs log tree commit diff homepage
diff options
context:
space:
mode:
authorNguyễn Gia Phong <cnx@loang.net>2023-11-24 18:08:17 +0900
committerNguyễn Gia Phong <cnx@loang.net>2023-11-24 18:08:17 +0900
commit679ca723e849b731a9d94a276763bb59756aa3a2 (patch)
tree95d45dc5657288a881b32c92051d795ea315cc9e
parentcf32a3f1cac556281aafa02355fbec106f00edd5 (diff)
downloadthesis-679ca723e849b731a9d94a276763bb59756aa3a2.tar.gz
Proof read approach
-rw-r--r--bg.tex24
-rw-r--r--ref.bib24
-rw-r--r--technique.tex144
3 files changed, 119 insertions, 73 deletions
diff --git a/bg.tex b/bg.tex
index ba2f131..4c12048 100644
--- a/bg.tex
+++ b/bg.tex
@@ -20,31 +20,31 @@ which is omitted from the algorithm for brevity.
     \Require Program ($P$)
     \Ensure Set of test cases ($T$)
     \State $T \gets \varnothing$
-    \State $S \gets \textsc{InitialState}(P)$ \Comment{set of states}
+    \State $S \gets \Call{InitialState}{P}$ \Comment{set of states}
     \While{$S \neq \varnothing$}
-      \State $s \gets \textsc{Select}(S)$
+      \State $s \gets \Call{Select}{S}$
       \State $S \gets S \setminus \{s\}$
-      \State $i, e, \varPhi \gets s$
+      \State $(i, e, \varPhi) \gets s$
         \Comment{instruction, environment, and path constraints}
-      \State $i', e' \gets \textsc{Execute}(i, e)$
-      \If{$\textsc{IsHalt}(i)$}
-        \State $t \gets \textsc{GenerateTest}(\varPhi)$
+      \State $(i', e') \gets \Call{Execute}{i, e}$
+      \If{$\Call{IsHalt}{i}$}
+        \State $t \gets \Call{GenerateTest}{\varPhi}$
         \If{$t \ne \bot$}
           $T \gets T \cup \{t\}$
         \EndIf
-      \ElsIf{$\textsc{IsBranch}(i)$}
-        \State $\varphi, i_c', i_a' \gets i'$
+      \ElsIf{$\Call{IsBranch}{i}$}
+        \State $(\varphi, i_c', i_a') \gets i'$
           \Comment{condition, consequent, and alternative}
-        \If{$\textsc{sat}(\varPhi \land \varphi)$}
-          \State $s_c' \gets i_c', e', \varPhi \land \varphi$
+        \If{$\Call{sat}{\varPhi \land \varphi}$}
+          \State $s_c' \gets (i_c', e', \varPhi \land \varphi)$
           \State $S \gets S \cup \{s_c'\}$
         \EndIf
-        \If{$\textsc{sat}(\varPhi \land \lnot\varphi)$}
+        \If{$\Call{sat}{\varPhi \land \lnot\varphi}$}
           \State $s_a' \gets i_a', e', \varPhi \land \lnot\varphi$
           \State $S \gets S \cup \{s_a'\}$
         \EndIf
       \Else
-        \State $s' \gets i', e', \varPhi$
+        \State $s' \gets (i', e', \varPhi)$
         \State $S \gets S \cup \{s'\}$
       \EndIf
     \EndWhile
diff --git a/ref.bib b/ref.bib
index 5703043..4c2eac1 100644
--- a/ref.bib
+++ b/ref.bib
@@ -8,6 +8,20 @@
   pages = {379-423},
   note = {\doi{10.1002/j.1538-7305.1948.tb01338.x}}}
 
+@article{tree-pattern,
+  author = {Christoph M. Hoffmann and Michael J. O'Donnell},
+  title = {Pattern Matching in Trees},
+  year = 1982,
+  publisher = {Association for Computing Machinery},
+  volume = 29,
+  number = 1,
+  issn = {0004-5411},
+  note = {\doi{10.1145/322290.322295}},
+  journal = {J. ACM},
+  month = jan,
+  pages = {68--95},
+  numpages = 28}
+
 @inproceedings{k-sat,
   author = {Russell Impagliazzo and Ramamohan Paturi},
   booktitle = {Proc. IEEE Conf. Comput. Complex. (CCC)},
@@ -171,6 +185,16 @@
   year = 2017,
   note = {\doi{10.1145/3106237.3106255}}}
 
+@inproceedings{fl-eval,
+  author = {Spencer Pearson and Jos\'e Campos and Ren\'e Just
+            and Gordon Fraser and Rui Abreu and Michael D. Ernst
+            and Deric Pang and Benjamin Keller},
+  booktitle = {Proc. ACM/IEEE Int. Conf. Softw. Eng. (ICSE)},
+  title = {Evaluating and Improving Fault Localization},
+  year = 2017,
+  pages = {609--620},
+  note = {\doi{10.1109/ICSE.2017.62}}}
+
 @article{shadow,
   author = {Tomasz Kuchta and Hristina Palikareva and Cristian Cadar},
   title = {Shadow Symbolic Execution for Testing Software Patches},
diff --git a/technique.tex b/technique.tex
index fa1e6cc..58e9348 100644
--- a/technique.tex
+++ b/technique.tex
@@ -111,49 +111,46 @@ is denoted in algorithm~\ref{overall}.
     \Require Meta program $P$
     \Ensure Set of differential tests $T$
     \State $T \gets \varnothing$
-    \State $X \gets \varnothing$ \Comment{Halt states}
-    \State $S \gets \textsc{InitialState}(P)$
+    \State $H \gets \varnothing$ \Comment{Halt states}
+    \State $S \gets \Call{InitialState}{P}$
     \While{$S \neq \varnothing$}
-      \State $i, e, \varPhi, n \gets \textsc{Select}(S)$
+      \State $(i, e, \varPhi, n) \gets \Call{Select}{S}$
         \Comment{instruction, environment,
                  path constraints, and revision number}
       \State $S \gets S \setminus \{(i, e, \varPhi, n)\}$
-      \State $i', e' \gets \textsc{Execute}(i, e)$
-      \If{$\textsc{IsHalt}(i)$}
-        \For{$(\varPhi', n') \in X$}
+      \State $(i', e') \gets \Call{Execute}{i, e}$
+      \If{$\Call{IsHalt}{i}$}
+        \For{$(\varPhi', n') \in H$}
           \If{$n \ne n'$}
-            \State $t \gets \textsc{DiffTest}(\varPhi, n, \varPhi', n')$
+            \State $t \gets \Call{DiffTest}{\varPhi, n, \varPhi', n'}$
             \If{$t \ne \bot$}
               $T \gets T \cup \{t\}$
             \EndIf
           \EndIf
         \EndFor
         \State TODO: describe clustering
-      \ElsIf{$\textsc{IsBranch}(i)$}
-        \State $\varphi, i_c', i_a' \gets i'$
+        \State $H \gets H \cup \{(\varPhi, n)\}$
+      \ElsIf{$\Call{IsBranch}{i}$}
+        \State $(\varphi, i_c', i_a') \gets i'$
           \Comment{condition, consequent, and alternative}
-        \If{$\textsc{IsMeta}(\varphi)$}
-          \State $n' \gets \textsc{RevisionNumber}(\varphi)$
-          \If{$n = 0 \lor n' = 0$}
-            $S \gets S \cup \{(i_c', e', \varPhi, n')\}$
-            \Comment{at most one simultaneous patch}
-          \EndIf
-          \State $S \gets S \cup \{(i_a', e', \varPhi, n)\}$
+        \If{$\Call{IsMeta}{\varphi}$}
+          \State $S \gets S
+            \cup \Call{ApplyMetaBranch}{\varphi, i_c', i_a', e', \varPhi, n}$
         \Else
-          \If{$\textsc{sat}(\varPhi \land \varphi)$}
+          \If{$\Call{sat}{\varPhi \land \varphi}$}
             $S \gets S \cup \{(i_c', e', \varPhi \land \varphi, n)\}$
           \EndIf
-          \If{$\textsc{sat}(\varPhi \land \lnot\varphi)$}
+          \If{$\Call{sat}{\varPhi \land \lnot\varphi}$}
             $S \gets S \cup \{(i_a', e', \varPhi \land \lnot\varphi, n)\}$
           \EndIf
         \EndIf
       \Else
-        \If{$\textsc{IsReturn}(i)$}
-          \State $s' \gets i', e', \varPhi \land \textsc{FunctionOutputs}(i)$
+        \If{$\Call{IsReturn}{i}$}
+          \State $\varPhi' \gets \varPhi \land \Call{FunctionOutputs}{i}$
         \Else
-          \State $s' \gets i', e', \varPhi$
+          \State $\varPhi' \gets \varPhi$
         \EndIf
-        \State $S \gets S \cup \{s'\}$
+        \State $S \gets S \cup \{(i', e', \varPhi')\}$
       \EndIf
     \EndWhile
   \end{algorithmic}
@@ -199,13 +196,13 @@ and improves the readability of the meta program.
 
 For instance, given the two unified diffs~\cite{unidiff} of a bug fix
 from upstream \texttt{grep}, and a patch in \textsc{DBGBench}~\cite{dbgbench}
-in figure~\ref{grep-8f08d8e2},\footnote{The snippets have been dedented
+in figure~\ref{grep-8f08d8e2},\footnote{The snippets have been outdented
 to fit within the width of this page.} one can construct the relevant part
 of the meta program in figure~\ref{grep-8f08d8e2-meta}, where \verb|__klee_meta|
 parses the environment for the choice of revision when executed
 concretely~\cite{msv} and returns a new symbolic value in symbolic evaluation.
 This way, the meta program can be used for both executions to share metadata
-such as the patch number (82 and 43 in this case).
+such as the patch identifier (82 and 43 in this case).
 
 \begin{figure}\centering
 \begin{varwidth}{\linewidth}\begin{alltt}
@@ -237,10 +234,33 @@ on automatically generated patches whose hunks are commonly short,
 optimizing the meta program construction is not a major issue.
 
 In algorithm~\ref{overall}, \textsc{IsMeta} cheaply detects
-each higher-order branch via basic pattern matching and skip
-adding its condition to the children states.  Instead,
-those states carry the revision number outside of their path constraints
-for constant-time access when pruning paths going though multiple patches.
+higher-order branching patch identifier, via basic tree pattern matching%
+~\cite{tree-pattern} and skip adding its condition to the children states.
+Instead, those states carry the revision number outside
+of their path constraints for constant-time access when pruning paths
+going though multiple patches in algorithm~\ref{apply-meta}.
+
+\begin{algorithm}
+  \caption{Construction of possible states after a patch location}
+  \label{apply-meta}
+  \begin{algorithmic}[1]
+    \Procedure{ApplyMetaBranch}{$\varphi, i_c, i_a, e, \varPhi, n$}
+      \State $n' \gets \Call{RevisionNumber}{\varphi}$
+      \State $s_a = (i_a, e, \varPhi, n)$
+        \Comment{assume the lack of meta branch nesting
+                 and always follow alternative path}
+      \If{$n = 0$}
+        \Return $\{(i_c, e, \varPhi, n'), s_a\}$
+        \Comment{$i_c$ enters patch $n'$ }
+      \EndIf
+      \If{$n' \ne 0 \land n \ne n'$}
+        \Return $\{s_a\}$
+        \Comment{at most one simultaneous patch}
+      \EndIf
+      \State \Return $\{(i_c, e, \varPhi, n), s_a\}$
+    \EndProcedure
+  \end{algorithmic}
+\end{algorithm}
 
 \subsection{Symbolic output selection}
 \psychic{} is based on \klee{} and thus inherits its support
@@ -261,9 +281,11 @@ Method \ref{annote} is handled similar to previous works
 described in section~\ref{klee-use}.  With \klee{} support for symbolic files,
 the same avatar idiom is trivially applied for~\ref{stdout}.  However,
 since files are byte buffers, their application in symbolic execution
-is limited due to the scalability of \gls{smt} solvers, which treat
-each byte as a variable.  For human-readable files, this is particularly
-inefficient due to their low information entropy~\cite{entropy}.
+is limited due to the scalability of \gls{smt} solvers, which treat each byte
+as a variable.  For human-readable files, this is particularly inefficient
+due to their low information entropy~\cite{entropy}.  On the other hand,
+because of imperfect fault localization either by human~\cite{dbgbench}
+or automatically~\cite{fl-eval}, output annotation has limited exhaustiveness.
 
 To balance between manual annotation and computational scalability,
 methods \ref{ret} and \ref{parg} are introduced to automatically capture output
@@ -273,19 +295,20 @@ to limit the subroutines to be monitored, for example in algorithm~\ref{fnout},
 the file that is patched by an \gls{apr} tool.
 
 \begin{algorithm}
-  \caption{Extraction of symbolic outputs of a function}\label{fnout}
+  \caption{Extraction of \glspl{symout} of a function}\label{fnout}
   \begin{algorithmic}[1]
     \Procedure{FunctionOutputs}{$i$}
-      \State $f \gets \textsc{Function}(i)$
-      \If{$\textsc{SourceFile}(i) \neq \textsc{PatchedFile}$}
+      \State $f \gets \Call{Function}{i}$
+      \If{$f$ is not in the patched file}
         \Return $\top$
       \EndIf
-      \State $\varPhi \gets \textsc{SymbolicOutput}(\textsc{ReturnValue}(i), f)$
-      \For{$a \in \textsc{FunctionArguments}(i)$}
-        \If{$\textsc{IsPointer}(a) \land \lnot\textsc{IsConstPointer}(a)
-                                   \land \lnot\textsc{IsFunctionPointer}(a)$}
+      % Can't do nested calls LOL
+      \State $\varPhi \gets \Call{SymbolicOutput}{\textsc{ReturnValue}(i), f}$
+      \For{$a \in \Call{FunctionArguments}{i}$}
+        \If{$\Call{IsPointer}{a} \land \lnot\Call{IsConstPointer}{a}
+                                 \land \lnot\Call{IsFunctionPointer}{a}$}
           \State $\varPhi \gets \varPhi
-            \land \textsc{SymbolicOutput}(\textsc{Dereference}(a), a)$
+            \land \Call{SymbolicOutput}{\textsc{Dereference}(a), a}$
         \EndIf
       \EndFor
       \State \Return $\varPhi$
@@ -293,12 +316,12 @@ the file that is patched by an \gls{apr} tool.
   \end{algorithmic}
 \end{algorithm}
 
-Each symbolic output then is given an unique name for later comparison.
+Each \gls{symout} then is given an unique name for later comparison.
 For the sake of simplicity, we name them after the capturing method
 and associated identifier, i.e.~variable or subroutine.
 
 \subsection{Input generation}
-When two states monitor some commonly named symbolic outputs
+When two states monitor some commonly named \glspl{symout}
 and are at \emph{parallel} positions in their path, their path constraints
 can be used to generate a \gls{difftest}.  Two states are said to be parallel
 if and only if their program counter points to the same instruction
@@ -316,39 +339,38 @@ from the \gls{smt} model.
   \caption{\Gls{difftest} generation}\label{diffgen}
   \begin{algorithmic}[1]
     \Procedure{DiffTest}{$\varPhi, n, \varPhi', n'$}
-      \State $\varPsi, N \gets \textsc{RenameOutputs}(\varPhi, n)$
-      \State $\varPsi', N' \gets \textsc{RenameOutputs}(\varPhi', n')$
+      \State $\varPsi, N \gets \Call{RenameOutputs}{\varPhi, n}$
+      \State $\varPsi', N' \gets \Call{RenameOutputs}{\varPhi', n'}$
       \For{$a \in N \cap N'$}
         \State $\varPsi_\text{diff}
-          \gets \varPsi_\text{diff} \lor \textsc{Distinct}(a, n, n')$
+          \gets \varPsi_\text{diff} \lor \Call{Distinct}{a, n, n'}$
       \EndFor
-      \If{$\lnot\textsc{sat}\left(\varPsi \land \varPsi'
-                                  \land \varPsi_\text{diff}\right)$}
+      \If{$\lnot\Call{sat}{\varPsi \land \varPsi' \land \varPsi_\text{diff}}$}
         \State \Return $\bot$
       \EndIf
       \State TODO: describe concrete execution
-      \State \Return $\textsc{Model}\left(\varPsi \land \varPsi'
-                                          \land \varPsi_\text{diff}\right)$
+      \State \Return $\Call{Model}{\varPsi \land \varPsi'
+                                   \land \varPsi_\text{diff}}$
     \EndProcedure
-    \Procedure{RenameOutputs}{$e, s$}
-      \State $E, k \gets e$ \Comment{subexpressions and kind}
-      \If{$\textsc{IsRead}(t)$}
-        \State $a, i \gets E$ \Comment{array name and index}
-        \If{$\textsc{IsOutput}(a)$}
-          \State $a' \gets \textsc{AppendName}(a, s)$
-          \State $e' \gets ((a', i), k)$
-          \State \Return $e', \{a\}$
+    \Procedure{RenameOutputs}{$x, n$}
+      \State $X, k \gets x$ \Comment{subexpressions and kind}
+      \If{$\Call{IsRead}{t}$}
+        \State $a, i \gets X$ \Comment{array name and index}
+        \If{$\Call{IsOutput}{a}$}
+          \State $a' \gets \Call{AppendName}{a, n}$
+          \State $x' \gets ((a', i), k)$
+          \State \Return $x', \{a\}$
         \Else
-          \State \Return $e, \varnothing$
+          \State \Return $x, \varnothing$
         \EndIf
       \EndIf
-      \State $E' \gets E$
+      \State $X' \gets X$
       \State $N \gets \varnothing$ \Comment{seen output names}
-      \For{$i \in 1{.}\,{.}\textsc{Length}(E)$}
-        \State $E'_i, N_+ \gets \textsc{RenameOutputs}(E_i, s)$
+      \For{$i \in 1{.}\,{.}\Call{Length}{X}$}
+        \State $X'_i, N_+ \gets \Call{RenameOutputs}{E_i, n}$
         \State $N \gets N \cup N_+$
       \EndFor
-      \State \Return $(E', k), N$
+      \State \Return $(X', k), N$
     \EndProcedure
   \end{algorithmic}
 \end{algorithm}