summary refs log tree commit diff homepage
diff options
context:
space:
mode:
-rw-r--r--all.tex1
-rw-r--r--bg.tex4
-rw-r--r--eval.tex30
-rw-r--r--result.tex19
-rw-r--r--technique.tex42
5 files changed, 58 insertions, 38 deletions
diff --git a/all.tex b/all.tex
index bc39715..fe2d8fa 100644
--- a/all.tex
+++ b/all.tex
@@ -108,7 +108,6 @@
 \include{bg.tex}
 \include{technique.tex}
 \include{eval.tex}
-\include{result.tex}
 \include{discuss.tex}
 \include{related.tex}
 \include{outro.tex}
diff --git a/bg.tex b/bg.tex
index 4c12048..b1845d4 100644
--- a/bg.tex
+++ b/bg.tex
@@ -66,6 +66,7 @@ Marking can be communicated either through command-line options
 for files (including standard streams) and command-line arguments
 or via in-source annotations~\cite{klee-docs}.
 
+
 As shown in algorithm~\ref{symb-exec}, the path constraints is only expanded
 at branching instructions, so only values affecting the control flow
 are tracked, since \klee{} targets the exploration of execution paths.
@@ -74,7 +75,8 @@ involving such value must be artificially injected.  One common idiom for doing
 this condition injection~\cite{angelix,cpr,trident} is shown
 in figure~\ref{klee-output}.  There, a~temporary symbolic avatar \texttt{var}
 is created for the sole purpose of being fixed to the value \texttt{val}
-in the path constraints.
+in the path constraints.  From here, this mechanism shall be referred to
+in abstract descriptions as \textsc{SymbolicOutput(value, context)}.
 
 \begin{figure}[h]\centering\hfill
   \begin{subfigure}{0.6\textwidth}
diff --git a/eval.tex b/eval.tex
index cd3fc7f..1431cd5 100644
--- a/eval.tex
+++ b/eval.tex
@@ -1 +1,29 @@
-\section{Evaluation}
+\section{Experiments}
+TODO: prose
+
+\subsection{Experimental setup}
+We evaluate \psychic{} on patches generated by MSV~\cite{msv}, a fork
+of the pattern-based \gls{apr} tool \textsc{Prophet}~\cite{genprog}
+that adds extra repair templates and generate meta programs.
+MSV is used on \textsc{IntroClass}, a set of attempts for solving
+homework problems~\cite{introclass}.
+
+\subsection{Results}
+\begin{table}[h]
+  \begin{center}
+    \begin{tabular}{l c c c c c r}
+      \toprule
+      Task & Attempt & Bug & \texttt{\#} revisions & Tree height & Largest cluster & Time\\
+      \midrule
+      \texttt{checksum} & \texttt{3b@3} & & 2 & 1 & 1 & 1.2\\
+      \texttt{checksum} & \texttt{cb@3} & & 3 & 1 & 2 & 1.7\\
+      \texttt{grade} & \texttt{1b@3} & UB & 2 & 1 & 1 & 0.8\\
+      \texttt{grade} & \texttt{31@3} & Logic & 7 & 2 & 5 & 1.0\\
+      \texttt{grade} & \texttt{b1@3} & UB & 2 & 1 & 1 & 0.5\\
+      \texttt{smallest} & \texttt{07@2} & UB & 9 & 1 & 3 & 0.7\\
+      \texttt{smallest} & \texttt{d2@1} & UB & 5 & 0 & 5 & 1.5\\
+      \bottomrule
+    \end{tabular}
+  \end{center}
+  \caption{Decision tree quality on patch pool generated by MSV for IntroClass programs}
+\end{table}
diff --git a/result.tex b/result.tex
deleted file mode 100644
index 7c61b55..0000000
--- a/result.tex
+++ /dev/null
@@ -1,19 +0,0 @@
-\section{Results}
-\begin{table}[h]
-  \begin{center}
-    \begin{tabular}{l c r r r}
-      \toprule
-      Task & Attempt & \texttt{\#} revisions & Tree depth & Largest cluster\\
-      \midrule
-      \texttt{checksum} & \texttt{3b@3} & 2 & 1 & 1\\
-      \texttt{checksum} & \texttt{cb@3} & 3 & 1 & 2\\
-      \texttt{grade} & \texttt{1b@3} & 2 & 1 & 1\\
-      \texttt{grade} & \texttt{31@3} & 8 & 2 & 1\\
-      \texttt{grade} & \texttt{b1@3} & 2 & 1 & 1\\
-      \texttt{smallest} & \texttt{d2@2} & 5 & 1 & 3\\
-      \texttt{smallest} & \texttt{07@2} & 9 & 1 & 3\\
-      \bottomrule
-    \end{tabular}
-  \end{center}
-  \caption{Decision tree quality on patch pool generated by MSV for IntroClass programs}
-\end{table}
diff --git a/technique.tex b/technique.tex
index 58e9348..470449f 100644
--- a/technique.tex
+++ b/technique.tex
@@ -111,6 +111,7 @@ is denoted in algorithm~\ref{overall}.
     \Require Meta program $P$
     \Ensure Set of differential tests $T$
     \State $T \gets \varnothing$
+    \State $D \gets \varnothing$ \Comment{Distinguished revision pairs}
     \State $H \gets \varnothing$ \Comment{Halt states}
     \State $S \gets \Call{InitialState}{P}$
     \While{$S \neq \varnothing$}
@@ -121,14 +122,12 @@ is denoted in algorithm~\ref{overall}.
       \State $(i', e') \gets \Call{Execute}{i, e}$
       \If{$\Call{IsHalt}{i}$}
         \For{$(\varPhi', n') \in H$}
-          \If{$n \ne n'$}
-            \State $t \gets \Call{DiffTest}{\varPhi, n, \varPhi', n'}$
-            \If{$t \ne \bot$}
-              $T \gets T \cup \{t\}$
-            \EndIf
+          \State $t \gets \Call{DiffTest}{D, \varPhi, n, \varPhi', n'}$
+          \If{$t \ne \bot$}
+            \State $T \gets T \cup \{t\}$
+            \State $D \gets D \cup \Call{DistinguishPairs}{t}$
           \EndIf
         \EndFor
-        \State TODO: describe clustering
         \State $H \gets H \cup \{(\varPhi, n)\}$
       \ElsIf{$\Call{IsBranch}{i}$}
         \State $(\varphi, i_c', i_a') \gets i'$
@@ -269,7 +268,6 @@ for calling \klee{} intrinsics and a compiler using LLVM as a backend.
 One kind of LLVM instruction may be used for difference purposes,
 so for better applicability we try to accommodate for a range of usages.
 For capturing output values, \psychic{} offers the following methods.
-\pagebreak
 \begin{enumerate}[noitemsep]
   \item Source code annotation\label{annote}
   \item Output files (including standard output)\label{stdout}
@@ -332,25 +330,28 @@ and \gls{smt} solving cost.  Such \gls{smt} formula is constructed
 in algorithm~\ref{diffgen} by first append a suffix to each output name,
 then for the common ones assert for possible distinction between two paths.
 If the path constraints of both states are \gls{sat} with at least
-one difference in output, a \gls{difftest} can be generated
+one difference in outputs, a \gls{difftest} can be generated
 from the \gls{smt} model.
 
 \begin{algorithm}
   \caption{\Gls{difftest} generation}\label{diffgen}
   \begin{algorithmic}[1]
-    \Procedure{DiffTest}{$\varPhi, n, \varPhi', n'$}
+    \Procedure{DiffTest}{$D, \varPhi, n, \varPhi', n'$}
+      \If{$n \ne n' \lor \{n, n'\} \in D$}
+        \Return $\bot$
+      \EndIf
       \State $\varPsi, N \gets \Call{RenameOutputs}{\varPhi, n}$
       \State $\varPsi', N' \gets \Call{RenameOutputs}{\varPhi', n'}$
-      \For{$a \in N \cap N'$}
-        \State $\varPsi_\text{diff}
-          \gets \varPsi_\text{diff} \lor \Call{Distinct}{a, n, n'}$
-      \EndFor
+      \State $\varPsi_\text{diff}
+        \gets \bigvee_{a \in N \cap N'}\Call{Distinct}{a, n, n'}$
       \If{$\lnot\Call{sat}{\varPsi \land \varPsi' \land \varPsi_\text{diff}}$}
-        \State \Return $\bot$
+        \Return $\bot$
       \EndIf
-      \State TODO: describe concrete execution
-      \State \Return $\Call{Model}{\varPsi \land \varPsi'
+      \State $m \gets \Call{Model}{\varPsi \land \varPsi'
                                    \land \varPsi_\text{diff}}$
+      \State $I \gets \Call{InputValues}{m}$
+      \State \Return $(m,
+        \{\Call{Execute}{I, r} \mid r \in \textsc{revisions}\})$
     \EndProcedure
     \Procedure{RenameOutputs}{$x, n$}
       \State $X, k \gets x$ \Comment{subexpressions and kind}
@@ -374,3 +375,12 @@ from the \gls{smt} model.
     \EndProcedure
   \end{algorithmic}
 \end{algorithm}
+
+\subsection{Decision tree construction}
+From each \gls{difftest}, a set of clusters of revisions with the same output
+is constructed.  We then superimpose these clustering sets to create
+hierarchical clusterings or a decision tree.  A minimal tree, i.e~a tree
+with the shortest height, is found through exhaustively search though
+all subsets of set of clustering sets.
+
+TODO: analyse complexity