From 3285b6df66a9c3e0a6502ae75c93085f2d6a0f44 Mon Sep 17 00:00:00 2001 From: Nguyễn Gia Phong Date: Fri, 1 Dec 2023 16:25:25 +0900 Subject: Snapshot current content --- all.tex | 1 - bg.tex | 4 +++- eval.tex | 30 +++++++++++++++++++++++++++++- result.tex | 19 ------------------- technique.tex | 42 ++++++++++++++++++++++++++---------------- 5 files changed, 58 insertions(+), 38 deletions(-) delete mode 100644 result.tex diff --git a/all.tex b/all.tex index bc39715..fe2d8fa 100644 --- a/all.tex +++ b/all.tex @@ -108,7 +108,6 @@ \include{bg.tex} \include{technique.tex} \include{eval.tex} -\include{result.tex} \include{discuss.tex} \include{related.tex} \include{outro.tex} diff --git a/bg.tex b/bg.tex index 4c12048..b1845d4 100644 --- a/bg.tex +++ b/bg.tex @@ -66,6 +66,7 @@ Marking can be communicated either through command-line options for files (including standard streams) and command-line arguments or via in-source annotations~\cite{klee-docs}. + As shown in algorithm~\ref{symb-exec}, the path constraints is only expanded at branching instructions, so only values affecting the control flow are tracked, since \klee{} targets the exploration of execution paths. @@ -74,7 +75,8 @@ involving such value must be artificially injected. One common idiom for doing this condition injection~\cite{angelix,cpr,trident} is shown in figure~\ref{klee-output}. There, a~temporary symbolic avatar \texttt{var} is created for the sole purpose of being fixed to the value \texttt{val} -in the path constraints. +in the path constraints. From here, this mechanism shall be referred to +in abstract descriptions as \textsc{SymbolicOutput(value, context)}. \begin{figure}[h]\centering\hfill \begin{subfigure}{0.6\textwidth} diff --git a/eval.tex b/eval.tex index cd3fc7f..1431cd5 100644 --- a/eval.tex +++ b/eval.tex @@ -1 +1,29 @@ -\section{Evaluation} +\section{Experiments} +TODO: prose + +\subsection{Experimental setup} +We evaluate \psychic{} on patches generated by MSV~\cite{msv}, a fork +of the pattern-based \gls{apr} tool \textsc{Prophet}~\cite{genprog} +that adds extra repair templates and generate meta programs. +MSV is used on \textsc{IntroClass}, a set of attempts for solving +homework problems~\cite{introclass}. + +\subsection{Results} +\begin{table}[h] + \begin{center} + \begin{tabular}{l c c c c c r} + \toprule + Task & Attempt & Bug & \texttt{\#} revisions & Tree height & Largest cluster & Time\\ + \midrule + \texttt{checksum} & \texttt{3b@3} & & 2 & 1 & 1 & 1.2\\ + \texttt{checksum} & \texttt{cb@3} & & 3 & 1 & 2 & 1.7\\ + \texttt{grade} & \texttt{1b@3} & UB & 2 & 1 & 1 & 0.8\\ + \texttt{grade} & \texttt{31@3} & Logic & 7 & 2 & 5 & 1.0\\ + \texttt{grade} & \texttt{b1@3} & UB & 2 & 1 & 1 & 0.5\\ + \texttt{smallest} & \texttt{07@2} & UB & 9 & 1 & 3 & 0.7\\ + \texttt{smallest} & \texttt{d2@1} & UB & 5 & 0 & 5 & 1.5\\ + \bottomrule + \end{tabular} + \end{center} + \caption{Decision tree quality on patch pool generated by MSV for IntroClass programs} +\end{table} diff --git a/result.tex b/result.tex deleted file mode 100644 index 7c61b55..0000000 --- a/result.tex +++ /dev/null @@ -1,19 +0,0 @@ -\section{Results} -\begin{table}[h] - \begin{center} - \begin{tabular}{l c r r r} - \toprule - Task & Attempt & \texttt{\#} revisions & Tree depth & Largest cluster\\ - \midrule - \texttt{checksum} & \texttt{3b@3} & 2 & 1 & 1\\ - \texttt{checksum} & \texttt{cb@3} & 3 & 1 & 2\\ - \texttt{grade} & \texttt{1b@3} & 2 & 1 & 1\\ - \texttt{grade} & \texttt{31@3} & 8 & 2 & 1\\ - \texttt{grade} & \texttt{b1@3} & 2 & 1 & 1\\ - \texttt{smallest} & \texttt{d2@2} & 5 & 1 & 3\\ - \texttt{smallest} & \texttt{07@2} & 9 & 1 & 3\\ - \bottomrule - \end{tabular} - \end{center} - \caption{Decision tree quality on patch pool generated by MSV for IntroClass programs} -\end{table} diff --git a/technique.tex b/technique.tex index 58e9348..470449f 100644 --- a/technique.tex +++ b/technique.tex @@ -111,6 +111,7 @@ is denoted in algorithm~\ref{overall}. \Require Meta program $P$ \Ensure Set of differential tests $T$ \State $T \gets \varnothing$ + \State $D \gets \varnothing$ \Comment{Distinguished revision pairs} \State $H \gets \varnothing$ \Comment{Halt states} \State $S \gets \Call{InitialState}{P}$ \While{$S \neq \varnothing$} @@ -121,14 +122,12 @@ is denoted in algorithm~\ref{overall}. \State $(i', e') \gets \Call{Execute}{i, e}$ \If{$\Call{IsHalt}{i}$} \For{$(\varPhi', n') \in H$} - \If{$n \ne n'$} - \State $t \gets \Call{DiffTest}{\varPhi, n, \varPhi', n'}$ - \If{$t \ne \bot$} - $T \gets T \cup \{t\}$ - \EndIf + \State $t \gets \Call{DiffTest}{D, \varPhi, n, \varPhi', n'}$ + \If{$t \ne \bot$} + \State $T \gets T \cup \{t\}$ + \State $D \gets D \cup \Call{DistinguishPairs}{t}$ \EndIf \EndFor - \State TODO: describe clustering \State $H \gets H \cup \{(\varPhi, n)\}$ \ElsIf{$\Call{IsBranch}{i}$} \State $(\varphi, i_c', i_a') \gets i'$ @@ -269,7 +268,6 @@ for calling \klee{} intrinsics and a compiler using LLVM as a backend. One kind of LLVM instruction may be used for difference purposes, so for better applicability we try to accommodate for a range of usages. For capturing output values, \psychic{} offers the following methods. -\pagebreak \begin{enumerate}[noitemsep] \item Source code annotation\label{annote} \item Output files (including standard output)\label{stdout} @@ -332,25 +330,28 @@ and \gls{smt} solving cost. Such \gls{smt} formula is constructed in algorithm~\ref{diffgen} by first append a suffix to each output name, then for the common ones assert for possible distinction between two paths. If the path constraints of both states are \gls{sat} with at least -one difference in output, a \gls{difftest} can be generated +one difference in outputs, a \gls{difftest} can be generated from the \gls{smt} model. \begin{algorithm} \caption{\Gls{difftest} generation}\label{diffgen} \begin{algorithmic}[1] - \Procedure{DiffTest}{$\varPhi, n, \varPhi', n'$} + \Procedure{DiffTest}{$D, \varPhi, n, \varPhi', n'$} + \If{$n \ne n' \lor \{n, n'\} \in D$} + \Return $\bot$ + \EndIf \State $\varPsi, N \gets \Call{RenameOutputs}{\varPhi, n}$ \State $\varPsi', N' \gets \Call{RenameOutputs}{\varPhi', n'}$ - \For{$a \in N \cap N'$} - \State $\varPsi_\text{diff} - \gets \varPsi_\text{diff} \lor \Call{Distinct}{a, n, n'}$ - \EndFor + \State $\varPsi_\text{diff} + \gets \bigvee_{a \in N \cap N'}\Call{Distinct}{a, n, n'}$ \If{$\lnot\Call{sat}{\varPsi \land \varPsi' \land \varPsi_\text{diff}}$} - \State \Return $\bot$ + \Return $\bot$ \EndIf - \State TODO: describe concrete execution - \State \Return $\Call{Model}{\varPsi \land \varPsi' + \State $m \gets \Call{Model}{\varPsi \land \varPsi' \land \varPsi_\text{diff}}$ + \State $I \gets \Call{InputValues}{m}$ + \State \Return $(m, + \{\Call{Execute}{I, r} \mid r \in \textsc{revisions}\})$ \EndProcedure \Procedure{RenameOutputs}{$x, n$} \State $X, k \gets x$ \Comment{subexpressions and kind} @@ -374,3 +375,12 @@ from the \gls{smt} model. \EndProcedure \end{algorithmic} \end{algorithm} + +\subsection{Decision tree construction} +From each \gls{difftest}, a set of clusters of revisions with the same output +is constructed. We then superimpose these clustering sets to create +hierarchical clusterings or a decision tree. A minimal tree, i.e~a tree +with the shortest height, is found through exhaustively search though +all subsets of set of clustering sets. + +TODO: analyse complexity -- cgit 1.4.1