diff options
-rw-r--r-- | all.tex | 1 | ||||
-rw-r--r-- | eval.tex | 60 | ||||
-rw-r--r-- | glossary.tex | 2 | ||||
-rw-r--r-- | intro.tex | 44 | ||||
-rw-r--r-- | ref.bib | 92 | ||||
-rw-r--r-- | shell.nix | 3 |
6 files changed, 167 insertions, 35 deletions
diff --git a/all.tex b/all.tex index fe2d8fa..519096f 100644 --- a/all.tex +++ b/all.tex @@ -19,6 +19,7 @@ \usepackage{graphicx} \usepackage{glossaries} \usepackage{pdfpages} +\usepackage{siunitx} \usepackage[skip=-1ex]{subcaption} \usepackage{tikz} \usepackage{tocloft} diff --git a/eval.tex b/eval.tex index 1431cd5..cfd76ab 100644 --- a/eval.tex +++ b/eval.tex @@ -1,29 +1,61 @@ \section{Experiments} -TODO: prose +To evaluate our approach, we perform experiments with our implementation +\psychic{} on C meta programs to generate decision trees of patches. + +\subsection{Experiment setup} +Experiments are run on a machine with AMD Ryzen 3700X at \SI{3.6}{\giga\hertz} +and \SI{16}{\giga\byte} of memory. Software dependencies and tooling +are provided by a declarative Nix environment~\cite{nix} for reproducibility. -\subsection{Experimental setup} We evaluate \psychic{} on patches generated by MSV~\cite{msv}, a fork -of the pattern-based \gls{apr} tool \textsc{Prophet}~\cite{genprog} +of the pattern-based \gls{apr} tool \textsc{Prophet}~\cite{prophet} that adds extra repair templates and generate meta programs. -MSV is used on \textsc{IntroClass}, a set of attempts for solving +Patches are generated for \textsc{IntroClass}, a set of attempts for solving homework problems~\cite{introclass}. +In addition to the test cases provided by \textsc{IntroClass}, +which are non-exhaustive of the input domains, we use +property tests~\cite{hypothesis} with the respective +ground truth program\footnote{\url{https://github.com/McSinyx/IntroClass}} +to select buggy programs. MSV then generate fixes in form of meta programs, +which are then post-processed into \psychic{}-compatible format. + +A selection of 10 meta programs with at least two semantically different +revisions (including the original buggy version) are then fed to \psychic{} +to generate differential tests and decision trees with a timeout of 10 minutes. + \subsection{Results} +Experiment results on \textsc{IntroClass} is shown in table~\ref{result}. +In case of \texttt{digits-1b31@2} and \texttt{smallest-} \texttt{d25c@1}, +\psychic{} failed to generate differential tests. In other cases, +except for the timeout in \texttt{digits-} \texttt{0cdf@3}, +the tool successfully differentiate all semantically different patches. + \begin{table}[h] \begin{center} - \begin{tabular}{l c c c c c r} + \begin{tabular}{l c l r r r r} \toprule - Task & Attempt & Bug & \texttt{\#} revisions & Tree height & Largest cluster & Time\\ + Task & Attempt & Bug type & $n$ & Tree height & Largest cluster & Time\\ \midrule - \texttt{checksum} & \texttt{3b@3} & & 2 & 1 & 1 & 1.2\\ - \texttt{checksum} & \texttt{cb@3} & & 3 & 1 & 2 & 1.7\\ - \texttt{grade} & \texttt{1b@3} & UB & 2 & 1 & 1 & 0.8\\ - \texttt{grade} & \texttt{31@3} & Logic & 7 & 2 & 5 & 1.0\\ - \texttt{grade} & \texttt{b1@3} & UB & 2 & 1 & 1 & 0.5\\ - \texttt{smallest} & \texttt{07@2} & UB & 9 & 1 & 3 & 0.7\\ - \texttt{smallest} & \texttt{d2@1} & UB & 5 & 0 & 5 & 1.5\\ + \texttt{checksum} & \texttt{3b23@3} & Arithmetic & 2 & 1 & 1 & 1.2\\ + \texttt{checksum} & \texttt{cb24@3} & Logic & 3 & 1 & 2 & 1.7\\ + \texttt{digits} & \texttt{0cdf@3} & Logic & 16 & 1 & 5 & 600.0\\ + \texttt{digits} & \texttt{1b31@2} & Logic & 5 & 0 & 5 & 0.9\\ + \texttt{grade} & \texttt{1b31@3} & UB & 2 & 1 & 1 & 0.8\\ + \texttt{grade} & \texttt{317a@3} & Logic & 8 & 2 & 5 & 1.0\\ + \texttt{grade} & \texttt{b192@3} & UB & 2 & 1 & 1 & 0.5\\ + \texttt{median} & \texttt{97f6@3} & UB & 7 & 1 & 6 & 1.6\\ + \texttt{smallest} & \texttt{0704@2} & UB & 5 & 1 & 3 & 87.0\\ + \texttt{smallest} & \texttt{d25c@1} & UB & 5 & 0 & 5 & 1.3\\ \bottomrule \end{tabular} \end{center} - \caption{Decision tree quality on patch pool generated by MSV for IntroClass programs} + \caption{Decision tree quality on patch pool generated by MSV + for \textsc{IntroClass} programs. Respective to each meta program + with $n$ revisions are the height of the patch decision tree + and the maximum number of undistinguished revisions.}\label{result} \end{table} + +The stark difference in execution time reveals an inherent weakness +of our approach: heavily-nested conditions or loops increase +the search space dramatically. diff --git a/glossary.tex b/glossary.tex index 1134321..d8953ab 100644 --- a/glossary.tex +++ b/glossary.tex @@ -1,4 +1,6 @@ \makenoidxglossaries +\newglossaryentry{bug}{name={bug}, + description={is a software defect that results in undesired behaviors}} \newglossaryentry{difftest}{name={differential test}, description={is an input and a collection of outputs of two or more programs of the same purpose, such that each output is pairwise different}} diff --git a/intro.tex b/intro.tex index 0a41d8f..4cb593f 100644 --- a/intro.tex +++ b/intro.tex @@ -1,8 +1,44 @@ \setcounter{page}{1} \section{Introduction} -TODO: mention symbolic execution\\ -TODO: mention \gls{apr} +Modern software development is largely an iterative and incremental +process~\cite{incremental}. Due to practically commonly weak specifications +and thus incomplete verification, each refinement may involve discovering, +eliminating, and creating new software defects, or \glspl{bug}. +Like for other software engineering tasks, efforts have been put into +automating this debugging process, including detecting and locating +software faults~\cite{fl-survey}, and generating fixes~\cite{apr}. -This work's main contributions are:\begin{enumerate} - \item A tool named \textsc{Psychic} +Progress on \gls{apr} is greatly hindered by the weak specification issue +as well~\cite{apr}. Due to the criteria for correctness being incomplete, +patches generated may over-fit such test suites, resulting in an incomplete fix +and/or new regression \glspl{bug}~\cite{overfit}. For the same reason, +more than one patch fitting the specification can be found. +Contemporary \gls{apr} tools do not give insight on the difference +between these patches, but only a ranking on potential correctness~\cite{apr}. + +While this ranking is beneficial in evaluating the tools themselves, +together with the lack of certainty of correctness, it is an insufficient guide +for choosing \emph{the} desired patch, if any. \Gls{apr} users must then +verify each patch to decide which to apply. Recognizing the tedious nature +of this process, we work on methods to highlight the semantic difference +between patches, in form of \glspl{difftest}. Existing automation techniques +for differentiation such as black- or gray-box fuzzing~\cite{nezha} +and symbolic execution~\cite{shadow} has shown promise on pairs +of program revisions. However, there is a lack in precedents in doing so +at scale like for multiple \gls{apr}-generated patches. + +Symbolic execution is observed as one promising direction as it works directly +with path constraints, which can be combined and manipulated to reveal +semantic differences. In this research, we explore symbolic execution +for mass differential testing with the ultimate goal of making deciding on +automatically generated patches easier for developers. The main contributions +of this work are: +\begin{enumerate} + \item Introduction of semantic difference mining from multiple + program revisions as a semi-automated pipeline, for communicating + the reasoning behind each patch in form of a decision tree + \item A tool named \psychic{} implementing this process for C programs + and handling platform specificities for the generalization of application + \item Preliminary results of its performance + on patches automatically generated for small programs \end{enumerate} diff --git a/ref.bib b/ref.bib index 4c2eac1..9519fca 100644 --- a/ref.bib +++ b/ref.bib @@ -30,6 +30,25 @@ pages = {237--240}, note = {\doi{10.1109/CCC.1999.766282}}} +@article{incremental, + author = {Craig Larman and Victor R. Basili}, + journal = {Computer}, + publisher = {IEEE Computer Society}, + title = {Iterative and incremental developments: A brief history}, + year = 2003, + volume = 36, + number = 6, + pages = {47--56}, + note = {\doi{10.1109/MC.2003.1204375}}} + +@inproceedings{nix, + author = {Eelco Dolstra and Armijn Hemel}, + title = {Purely Functional System Configuration Management}, + booktitle = {USENIX workshop on Hot top. oper. syst. (HotOS)}, + year = 2007, + month = may, + url = {https://www.usenix.org/conference/hotos-xi/purely-functional-system-configuration-management}} + @inproceedings{klee, author = {Cristian Cadar and Daniel Dunbar and Dawson Engler}, title = {{KLEE}: Unassisted and Automatic Generation of High-Coverage Tests @@ -151,6 +170,17 @@ pages = {17--26}, doi = {10.1109/Trustcom.2015.608}} +@article{fl-survey, + author = {Wong, W. Eric and Gao, Ruizhi and Li, Yihao + and Rui Abreu and Franz Wotawa}, + journal = {IEEE Trans. Softw. Eng. (TSE)}, + title = {A Survey on Software Fault Localization}, + year = 2016, + volume = 42, + number = 8, + pages = {707--740}, + note = {\doi{10.1109/TSE.2016.2521368}}} + @inproceedings{angelix, author={Sergey Mechtaev and Yi, Jooyong and Abhik Roychoudhury}, booktitle = {Proc. ACM/IEEE Int. Conf. Softw. Eng. (ICSE)}, @@ -160,6 +190,29 @@ pages = {691--701}, note = {\doi{10.1145/2884781.2884807}}} +@inproceedings{prophet, + author = {Long, Fan and Martin Rinard}, + title = {Automatic Patch Generation by Learning Correct Code}, + year = 2016, + note = {\doi{10.1145/2837614.2837617}}, + booktitle = {Proc. ACM SIGPLAN-SIGACT Symp. Princ. Program. Lang. (POPL)}, + pages = {298--312}, + numpages = {15}, + keywords = {Program repair, Learning correct code, Code correctness model}} + +@article{apr, + author = {Claire {Le Goues} and Michael {Pradel} and Abhik Roychoudhury}, + title = {Automated Program Repair}, + year = 2019, + volume = 62, + number = 12, + issn = {0001-0782}, + note = {\doi{10.1145/3318162}}, + journal = {Commun. ACM}, + month = nov, + pages = {56--65}, + numpages = 10} + @article{repairnator, author = {Martin Monperrus and Simon Urli and Thomas Durieux and Matias Martinez and Benoit Baudry and Lionel Seinturier}, @@ -174,6 +227,27 @@ articleno = 2, numpages = 12} +@article{hypothesis, + note = {\doi{10.21105/joss.01891}}, + year = 2019, + publisher = {The Open Journal}, + volume = 4, + number = 43, + pages = {1891}, + author = {David R. MacIver and Zac Hatfield-Dodds + and {many other contributors}}, + title = {Hypothesis: A new approach to property-based testing}, + journal = {Journal of Open Source Software}} + +@inproceedings{nezha, + author = {Theofilos Petsios and Adrian Tang and Salvatore Stolfo + and Angelos D. Keromytis and Suman Jana}, + booktitle = {IEEE Symp. Secur. and Priv. (SP)}, + title = {Nezha: Efficient Domain-Independent Differential Testing}, + year = 2017, + pages = {615--632}, + note = {\doi{10.1109/SP.2017.27}}} + @inproceedings{dbgbench, author = {Marcel B\"{o}hme and Ezekiel Olamide Soremekun and Sudipta Chattopadhyay and Emamurho Ugherughe @@ -258,7 +332,8 @@ and Tegawend\'{e} F. Bissyand\'{e} and Kim, Dongsun and Wu, Peng and Jacques Klein and Mao, Xiaoguang and Le Traon, Yves}, title = {On the Efficiency of Test Suite Based Program Repair: - A Systematic Assessment of 16 Automated Repair Systems for Java Programs}, + A Systematic Assessment of 16 Automated Repair Systems + for Java Programs}, year = 2020, isbn = {9781450371216}, publisher = {Association for Computing Machinery}, @@ -269,20 +344,6 @@ keywords = {patch generation, program repair, empirical assessment, efficiency}} -@article{apr, - author = {Claire {Le Goues} and M. Pradel - and Abhik Roychoudhury and S. Chandra}, - journal = {IEEE Software}, - title = {Automatic Program Repair}, - year = 2021, - volume = 38, - number = {04}, - issn = {1937-4194}, - pages = {22--27}, - doi = {10.1109/MS.2021.3072577}, - publisher = {IEEE Computer Society}, - month = jul} - @inproceedings{cpr, author = {Ridwan Salihin Shariffdeen and Yannic Noller and Lars Grunske and Abhik Roychoudhury}, @@ -349,4 +410,3 @@ title = {{MSV}}, publisher = {Suresoft}, url = {https://github.com/Suresoft-GLaDOS/MSV}} - url = diff --git a/shell.nix b/shell.nix index 935766e..d773b8f 100644 --- a/shell.nix +++ b/shell.nix @@ -4,6 +4,7 @@ mkShell { inherit (texlive) latexmk scheme-basic algorithms algorithmicx amsmath babel-vietnamese booktabs biblatex caption cases cite enumitem fancyhdr float glossaries hyperref ieeetran - pdflscape pdfpages pgf psnfss rsfs times tocloft varwidth vntex xcolor; + pdflscape pdfpages pgf psnfss rsfs siunitx times tocloft + varwidth vntex xcolor; }) ]; } |