From 0f6b9207d5380eefb3a99d21a12751a32de63bfb Mon Sep 17 00:00:00 2001
From: Nguyễn Gia Phong <cnx@loang.net>
Date: Mon, 11 Dec 2023 08:30:17 +0900
Subject: Finish introduction

---
 all.tex      |  1 +
 eval.tex     | 60 ++++++++++++++++++++++++++++++---------
 glossary.tex |  2 ++
 intro.tex    | 44 ++++++++++++++++++++++++++---
 ref.bib      | 92 +++++++++++++++++++++++++++++++++++++++++++++++++-----------
 shell.nix    |  3 +-
 6 files changed, 167 insertions(+), 35 deletions(-)

diff --git a/all.tex b/all.tex
index fe2d8fa..519096f 100644
--- a/all.tex
+++ b/all.tex
@@ -19,6 +19,7 @@
 \usepackage{graphicx}
 \usepackage{glossaries}
 \usepackage{pdfpages}
+\usepackage{siunitx}
 \usepackage[skip=-1ex]{subcaption}
 \usepackage{tikz}
 \usepackage{tocloft}
diff --git a/eval.tex b/eval.tex
index 1431cd5..cfd76ab 100644
--- a/eval.tex
+++ b/eval.tex
@@ -1,29 +1,61 @@
 \section{Experiments}
-TODO: prose
+To evaluate our approach, we perform experiments with our implementation
+\psychic{} on C meta programs to generate decision trees of patches.
+
+\subsection{Experiment setup}
+Experiments are run on a machine with AMD Ryzen 3700X at \SI{3.6}{\giga\hertz}
+and \SI{16}{\giga\byte} of memory.  Software dependencies and tooling
+are provided by a declarative Nix environment~\cite{nix} for reproducibility.
 
-\subsection{Experimental setup}
 We evaluate \psychic{} on patches generated by MSV~\cite{msv}, a fork
-of the pattern-based \gls{apr} tool \textsc{Prophet}~\cite{genprog}
+of the pattern-based \gls{apr} tool \textsc{Prophet}~\cite{prophet}
 that adds extra repair templates and generate meta programs.
-MSV is used on \textsc{IntroClass}, a set of attempts for solving
+Patches are generated for \textsc{IntroClass}, a set of attempts for solving
 homework problems~\cite{introclass}.
 
+In addition to the test cases provided by \textsc{IntroClass},
+which are non-exhaustive of the input domains, we use
+property tests~\cite{hypothesis} with the respective
+ground truth program\footnote{\url{https://github.com/McSinyx/IntroClass}}
+to select buggy programs.  MSV then generate fixes in form of meta programs,
+which are then post-processed into \psychic{}-compatible format.
+
+A selection of 10 meta programs with at least two semantically different
+revisions (including the original buggy version) are then fed to \psychic{}
+to generate differential tests and decision trees with a timeout of 10 minutes.
+
 \subsection{Results}
+Experiment results on \textsc{IntroClass} is shown in table~\ref{result}.
+In case of \texttt{digits-1b31@2} and \texttt{smallest-} \texttt{d25c@1},
+\psychic{} failed to generate differential tests.  In other cases,
+except for the timeout in \texttt{digits-} \texttt{0cdf@3},
+the tool successfully differentiate all semantically different patches.
+
 \begin{table}[h]
   \begin{center}
-    \begin{tabular}{l c c c c c r}
+    \begin{tabular}{l c l r r r r}
       \toprule
-      Task & Attempt & Bug & \texttt{\#} revisions & Tree height & Largest cluster & Time\\
+      Task & Attempt & Bug type & $n$ & Tree height & Largest cluster & Time\\
       \midrule
-      \texttt{checksum} & \texttt{3b@3} & & 2 & 1 & 1 & 1.2\\
-      \texttt{checksum} & \texttt{cb@3} & & 3 & 1 & 2 & 1.7\\
-      \texttt{grade} & \texttt{1b@3} & UB & 2 & 1 & 1 & 0.8\\
-      \texttt{grade} & \texttt{31@3} & Logic & 7 & 2 & 5 & 1.0\\
-      \texttt{grade} & \texttt{b1@3} & UB & 2 & 1 & 1 & 0.5\\
-      \texttt{smallest} & \texttt{07@2} & UB & 9 & 1 & 3 & 0.7\\
-      \texttt{smallest} & \texttt{d2@1} & UB & 5 & 0 & 5 & 1.5\\
+      \texttt{checksum} & \texttt{3b23@3} & Arithmetic & 2 & 1 & 1 & 1.2\\
+      \texttt{checksum} & \texttt{cb24@3} & Logic & 3 & 1 & 2 & 1.7\\
+      \texttt{digits} & \texttt{0cdf@3} & Logic & 16 & 1 & 5 & 600.0\\
+      \texttt{digits} & \texttt{1b31@2} & Logic & 5 & 0 & 5 & 0.9\\
+      \texttt{grade} & \texttt{1b31@3} & UB & 2 & 1 & 1 & 0.8\\
+      \texttt{grade} & \texttt{317a@3} & Logic & 8 & 2 & 5 & 1.0\\
+      \texttt{grade} & \texttt{b192@3} & UB & 2 & 1 & 1 & 0.5\\
+      \texttt{median} & \texttt{97f6@3} & UB & 7 & 1 & 6 & 1.6\\
+      \texttt{smallest} & \texttt{0704@2} & UB & 5 & 1 & 3 & 87.0\\
+      \texttt{smallest} & \texttt{d25c@1} & UB & 5 & 0 & 5 & 1.3\\
       \bottomrule
     \end{tabular}
   \end{center}
-  \caption{Decision tree quality on patch pool generated by MSV for IntroClass programs}
+  \caption{Decision tree quality on patch pool generated by MSV
+    for \textsc{IntroClass} programs.  Respective to each meta program
+    with $n$ revisions are the height of the patch decision tree
+    and the maximum number of undistinguished revisions.}\label{result}
 \end{table}
+
+The stark difference in execution time reveals an inherent weakness
+of our approach: heavily-nested conditions or loops increase
+the search space dramatically.
diff --git a/glossary.tex b/glossary.tex
index 1134321..d8953ab 100644
--- a/glossary.tex
+++ b/glossary.tex
@@ -1,4 +1,6 @@
 \makenoidxglossaries
+\newglossaryentry{bug}{name={bug},
+  description={is a software defect that results in undesired behaviors}}
 \newglossaryentry{difftest}{name={differential test},
   description={is an input and a collection of outputs of two or more programs
     of the same purpose, such that each output is pairwise different}}
diff --git a/intro.tex b/intro.tex
index 0a41d8f..4cb593f 100644
--- a/intro.tex
+++ b/intro.tex
@@ -1,8 +1,44 @@
 \setcounter{page}{1}
 \section{Introduction}
-TODO: mention symbolic execution\\
-TODO: mention \gls{apr}
+Modern software development is largely an iterative and incremental
+process~\cite{incremental}.  Due to practically commonly weak specifications
+and thus incomplete verification, each refinement may involve discovering,
+eliminating, and creating new software defects, or \glspl{bug}.
+Like for other software engineering tasks, efforts have been put into
+automating this debugging process, including detecting and locating
+software faults~\cite{fl-survey}, and generating fixes~\cite{apr}.
 
-This work's main contributions are:\begin{enumerate}
-  \item A tool named \textsc{Psychic}
+Progress on \gls{apr} is greatly hindered by the weak specification issue
+as well~\cite{apr}.  Due to the criteria for correctness being incomplete,
+patches generated may over-fit such test suites, resulting in an incomplete fix
+and/or new regression \glspl{bug}~\cite{overfit}.  For the same reason,
+more than one patch fitting the specification can be found.
+Contemporary \gls{apr} tools do not give insight on the difference
+between these patches, but only a ranking on potential correctness~\cite{apr}.
+
+While this ranking is beneficial in evaluating the tools themselves,
+together with the lack of certainty of correctness, it is an insufficient guide
+for choosing \emph{the} desired patch, if any.  \Gls{apr} users must then
+verify each patch to decide which to apply.  Recognizing the tedious nature
+of this process, we work on methods to highlight the semantic difference
+between patches, in form of \glspl{difftest}.  Existing automation techniques
+for differentiation such as black- or gray-box fuzzing~\cite{nezha}
+and symbolic execution~\cite{shadow} has shown promise on pairs
+of program revisions.  However, there is a lack in precedents in doing so
+at scale like for multiple \gls{apr}-generated patches.
+
+Symbolic execution is observed as one promising direction as it works directly
+with path constraints, which can be combined and manipulated to reveal
+semantic differences.  In this research, we explore symbolic execution
+for mass differential testing with the ultimate goal of making deciding on
+automatically generated patches easier for developers.  The main contributions
+of this work are:
+\begin{enumerate}
+  \item Introduction of semantic difference mining from multiple
+    program revisions as a semi-automated pipeline, for communicating
+    the reasoning behind each patch in form of a decision tree
+  \item A tool named \psychic{} implementing this process for C programs
+    and handling platform specificities for the generalization of application
+  \item Preliminary results of its performance
+    on patches automatically generated for small programs
 \end{enumerate}
diff --git a/ref.bib b/ref.bib
index 4c2eac1..9519fca 100644
--- a/ref.bib
+++ b/ref.bib
@@ -30,6 +30,25 @@
   pages = {237--240},
   note = {\doi{10.1109/CCC.1999.766282}}}
 
+@article{incremental,
+  author = {Craig Larman and Victor R. Basili},
+  journal = {Computer},
+  publisher = {IEEE Computer Society},
+  title = {Iterative and incremental developments: A brief history},
+  year = 2003,
+  volume = 36,
+  number = 6,
+  pages = {47--56},
+  note = {\doi{10.1109/MC.2003.1204375}}}
+
+@inproceedings{nix,
+  author = {Eelco Dolstra and Armijn Hemel},
+  title = {Purely Functional System Configuration Management},
+  booktitle = {USENIX workshop on Hot top. oper. syst. (HotOS)},
+  year = 2007,
+  month = may,
+  url = {https://www.usenix.org/conference/hotos-xi/purely-functional-system-configuration-management}}
+
 @inproceedings{klee,
   author = {Cristian Cadar and Daniel Dunbar and Dawson Engler},
   title = {{KLEE}: Unassisted and Automatic Generation of High-Coverage Tests
@@ -151,6 +170,17 @@
   pages = {17--26},
   doi = {10.1109/Trustcom.2015.608}}
 
+@article{fl-survey,
+  author = {Wong, W. Eric and Gao, Ruizhi and Li, Yihao
+            and Rui Abreu and Franz Wotawa},
+  journal = {IEEE Trans. Softw. Eng. (TSE)},
+  title = {A Survey on Software Fault Localization},
+  year = 2016,
+  volume = 42,
+  number = 8,
+  pages = {707--740},
+  note = {\doi{10.1109/TSE.2016.2521368}}}
+
 @inproceedings{angelix,
   author={Sergey Mechtaev and Yi, Jooyong and Abhik Roychoudhury},
   booktitle = {Proc. ACM/IEEE Int. Conf. Softw. Eng. (ICSE)},
@@ -160,6 +190,29 @@
   pages = {691--701},
   note = {\doi{10.1145/2884781.2884807}}}
 
+@inproceedings{prophet,
+  author = {Long, Fan and Martin Rinard},
+  title = {Automatic Patch Generation by Learning Correct Code},
+  year = 2016,
+  note = {\doi{10.1145/2837614.2837617}},
+  booktitle = {Proc. ACM SIGPLAN-SIGACT Symp. Princ. Program. Lang. (POPL)},
+  pages = {298--312},
+  numpages = {15},
+  keywords = {Program repair, Learning correct code, Code correctness model}}
+
+@article{apr,
+  author = {Claire {Le Goues} and Michael {Pradel} and Abhik Roychoudhury},
+  title = {Automated Program Repair},
+  year = 2019,
+  volume = 62,
+  number = 12,
+  issn = {0001-0782},
+  note = {\doi{10.1145/3318162}},
+  journal = {Commun. ACM},
+  month = nov,
+  pages = {56--65},
+  numpages = 10}
+
 @article{repairnator,
   author = {Martin Monperrus and Simon Urli and Thomas Durieux
             and Matias Martinez and Benoit Baudry and Lionel Seinturier},
@@ -174,6 +227,27 @@
   articleno = 2,
   numpages = 12}
 
+@article{hypothesis,
+  note = {\doi{10.21105/joss.01891}},
+  year = 2019,
+  publisher = {The Open Journal},
+  volume = 4,
+  number = 43,
+  pages = {1891},
+  author = {David R. MacIver and Zac Hatfield-Dodds
+            and {many other contributors}},
+  title = {Hypothesis: A new approach to property-based testing},
+  journal = {Journal of Open Source Software}}
+
+@inproceedings{nezha,
+  author = {Theofilos Petsios and Adrian Tang and Salvatore Stolfo
+            and Angelos D. Keromytis and Suman Jana},
+  booktitle = {IEEE Symp. Secur. and Priv. (SP)},
+  title = {Nezha: Efficient Domain-Independent Differential Testing},
+  year = 2017,
+  pages = {615--632},
+  note = {\doi{10.1109/SP.2017.27}}}
+
 @inproceedings{dbgbench,
   author = {Marcel B\"{o}hme and Ezekiel Olamide Soremekun
             and Sudipta Chattopadhyay and Emamurho Ugherughe
@@ -258,7 +332,8 @@
             and Tegawend\'{e} F. Bissyand\'{e} and Kim, Dongsun and Wu, Peng
             and Jacques Klein and Mao, Xiaoguang and Le Traon, Yves},
   title = {On the Efficiency of Test Suite Based Program Repair:
-           A Systematic Assessment of 16 Automated Repair Systems for Java Programs},
+           A Systematic Assessment of 16 Automated Repair Systems
+           for Java Programs},
   year = 2020,
   isbn = {9781450371216},
   publisher = {Association for Computing Machinery},
@@ -269,20 +344,6 @@
   keywords = {patch generation, program repair,
               empirical assessment, efficiency}}
 
-@article{apr,
-  author = {Claire {Le Goues} and M. Pradel
-            and Abhik Roychoudhury and S. Chandra},
-  journal = {IEEE Software},
-  title = {Automatic Program Repair},
-  year = 2021,
-  volume = 38,
-  number = {04},
-  issn = {1937-4194},
-  pages = {22--27},
-  doi = {10.1109/MS.2021.3072577},
-  publisher = {IEEE Computer Society},
-  month = jul}
-
 @inproceedings{cpr,
   author = {Ridwan Salihin Shariffdeen and Yannic Noller
             and Lars Grunske and Abhik Roychoudhury},
@@ -349,4 +410,3 @@
   title = {{MSV}},
   publisher = {Suresoft},
   url = {https://github.com/Suresoft-GLaDOS/MSV}}
-  url =
diff --git a/shell.nix b/shell.nix
index 935766e..d773b8f 100644
--- a/shell.nix
+++ b/shell.nix
@@ -4,6 +4,7 @@ mkShell {
     inherit (texlive) latexmk scheme-basic
       algorithms algorithmicx amsmath babel-vietnamese booktabs biblatex
       caption cases cite enumitem fancyhdr float glossaries hyperref ieeetran
-      pdflscape pdfpages pgf psnfss rsfs times tocloft varwidth vntex xcolor;
+      pdflscape pdfpages pgf psnfss rsfs siunitx times tocloft
+      varwidth vntex xcolor;
   }) ];
 }
-- 
cgit 1.4.1