From 29ec67340c03fb15dce465381737b96e12ef764b Mon Sep 17 00:00:00 2001 From: Nguyễn Gia Phong Date: Sat, 28 Oct 2023 15:57:52 +0900 Subject: Draft skeleton --- .gitignore | 13 ++++++ abstract.tex | 5 +++ ack.tex | 3 ++ all.tex | 113 ++++++++++++++++++++++++++++++++++++++++++++++++ discuss.tex | 4 ++ eval.tex | 1 + front.tex | 20 +++++++++ glossary.tex | 32 ++++++++++++++ intro.tex | 2 + outro.tex | 1 + praelimen.tex | 7 +++ ref.bib | 136 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ result.tex | 1 + technique.tex | 118 ++++++++++++++++++++++++++++++++++++++++++++++++++ title.tex | 20 +++++++++ 15 files changed, 476 insertions(+) create mode 100644 .gitignore create mode 100644 abstract.tex create mode 100644 ack.tex create mode 100644 all.tex create mode 100644 discuss.tex create mode 100644 eval.tex create mode 100644 front.tex create mode 100644 glossary.tex create mode 100644 intro.tex create mode 100644 outro.tex create mode 100644 praelimen.tex create mode 100644 ref.bib create mode 100644 result.tex create mode 100644 technique.tex create mode 100644 title.tex diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..1e18ce9 --- /dev/null +++ b/.gitignore @@ -0,0 +1,13 @@ +*.aux +*.bbl +*.blg +*.fdb_latexmk +*.fls +*.glo +*.ist +*.lof +*.log +*.out +*.synctex.gz +*.toc +all.pdf diff --git a/abstract.tex b/abstract.tex new file mode 100644 index 0000000..4701ff7 --- /dev/null +++ b/abstract.tex @@ -0,0 +1,5 @@ +\renewcommand{\abstractname}{\Large Abstract} +\begin{abstract} +Your abstract should be here. +\end{abstract} +\thispagestyle{empty} diff --git a/ack.tex b/ack.tex new file mode 100644 index 0000000..3cb4b9a --- /dev/null +++ b/ack.tex @@ -0,0 +1,3 @@ +\addcontentsline{toc}{section}{Acknowledgements} +\section*{\hfill\Large Acknowledgements\hfill\hfill} +% TODO: grants diff --git a/all.tex b/all.tex new file mode 100644 index 0000000..4073a61 --- /dev/null +++ b/all.tex @@ -0,0 +1,113 @@ +\documentclass[11pt, a4paper, onecolumn, oneside]{report} +\usepackage[vietnamese,english]{babel} +\usepackage{mathptmx} +\usepackage[T1]{fontenc} +\usepackage[utf8]{inputenc} +\RequirePackage[top=3cm, bottom=1in, left=1in, right=1in]{geometry} +\linespread{1.3} + +\usepackage{algorithm} +\usepackage{algpseudocode} +\usepackage{amsmath} +\usepackage{amssymb} +\usepackage{caption} +\usepackage{cases} +\usepackage{cite} +\usepackage{enumerate} +\usepackage{fancyhdr} +\usepackage{glossaries} +\usepackage{graphicx} +\usepackage[hidelinks]{hyperref} +\usepackage{pdfpages} +\usepackage{subcaption} +\usepackage{tikz} +\usepackage{tocloft} + +\usetikzlibrary{arrows} +\usetikzlibrary{decorations.pathmorphing} +\usetikzlibrary{fit} +\usetikzlibrary{matrix} +\usetikzlibrary{positioning} +\usetikzlibrary{shapes} + +\renewcommand\cftsecafterpnum{\vskip15pt} +\renewcommand\cftsubsecafterpnum{\vskip15pt} +\renewcommand\cftfigafterpnum{\vskip15pt} +%\renewcommand{\baselinestretch}{1.5} +\renewcommand{\thesection}{\Roman{section}} +\renewcommand{\thesubsection}{\arabic{section}.\arabic{subsection}} +\renewcommand{\thefigure}{\arabic{figure}} +\newcommand{\qed}{\hfill\blacksquare} + +\newcounter{lemma} +\newcounter{proposition} +\newcounter{theorem} +\newtheorem{definition}{Definition} +\newtheorem{lemma}{\bf Lemma} +\newtheorem{proposition}{\bf Proposition} +\newtheorem{theorem}{\bf Theorem} +\newtheorem{proof}{\bf Proof} +\DeclareMathOperator*{\argmax}{arg\,max} + +\newcommand{\name}{Flexible Symbolic Executor for Mass Patch Differentiation} +\newcommand{\cnx}{\foreignlanguage{vietnamese}{Nguyễn Gia Phong}} +\newcommand{\cse}{Department of Computer Science and Engineering} +\newcommand{\unist}{Ulsan National Institute of Science and Technology} +\newcommand{\blankpage}{\hbox{}\thispagestyle{empty}\clearpage} + +\fancyhf{} +\renewcommand{\headrulewidth}{0pt} +\cfoot{\thepage} +\pagestyle{fancy} + +\title{\name} +\author{\cnx} +\input{glossary.tex} + +\begin{document} +\include{front.tex} +\include{title.tex} + +% [Thesis approval] +% Add the approval doc signed by your advisor in a PDF file +% Put your pdf with the filename below, and uncomment it. +%\includepdf[fitpaper=true, pages=-]{sample_approval.pdf} + +% [Confirmation of thesis approval] +% add the certificate signed by your committee in a PDF file +% Put your pdf with the filename below, and uncomment it. +%\includepdf[fitpaper=true, pages=-]{sample_confirmation.pdf} + +\include{abstract.tex} +\blankpage +\renewcommand{\contentsname}{\hfill\Large Contents\hfill} +\tableofcontents{} +\thispagestyle{empty} +\clearpage +\renewcommand{\listfigurename}{\hfill\Large List of Figures\hfill} +\listoffigures{} +\thispagestyle{empty} +\clearpage +\renewcommand{\glossaryname}{\hfill\Large Glossary\hfill\hfill} +\printnoidxglossaries +\thispagestyle{empty} +\clearpage + +\include{intro.tex} +\include{praelimen.tex} +\include{technique.tex} +\include{eval.tex} +\include{result.tex} +\include{discuss.tex} +\include{related.tex} +\include{outro.tex} + +\newpage +\renewcommand{\bibname}{\hfill\Large References\hfill\hfill} +\addcontentsline{toc}{section}{References} +\bibliographystyle{IEEEtran} +\bibliography{ref.bib} + +\include{ack.tex} +\blankpage +\end{document} diff --git a/discuss.tex b/discuss.tex new file mode 100644 index 0000000..b276e76 --- /dev/null +++ b/discuss.tex @@ -0,0 +1,4 @@ +\section{Discussion} +\subsection{Threats to Validity} +\subsection{Related Work} +\textsc{Shadow}~\cite{shadow} diff --git a/eval.tex b/eval.tex new file mode 100644 index 0000000..cd3fc7f --- /dev/null +++ b/eval.tex @@ -0,0 +1 @@ +\section{Evaluation} diff --git a/front.tex b/front.tex new file mode 100644 index 0000000..3ccb5d4 --- /dev/null +++ b/front.tex @@ -0,0 +1,20 @@ +\thispagestyle{empty} +\begin{center} +\LARGE Master's Thesis + +\vspace{3cm} +\huge\name +\vfill +\LARGE\cnx + +\vspace{2cm} + +\LARGE\cse + +\vspace{2cm} + +\LARGE\unist +\vspace{2cm} + +\LARGE 2023 +\end{center} diff --git a/glossary.tex b/glossary.tex new file mode 100644 index 0000000..8f43f5f --- /dev/null +++ b/glossary.tex @@ -0,0 +1,32 @@ +\makenoidxglossaries +\newglossaryentry{difftest}{name={differential test}, + description={Is an input and a collection of outputs of two or more programs + of the same purpose, such that each output is pairwise different}} +\newglossaryentry{state}{name=state, + description={Or \emph{symbolic process}, includes a register file, + a stack frame, an address space for heap and global variables, + a program counter and path conditions. Stack, heap and global variables + here are all symbolic expressions. For our algorithm, the state + additionally contain the identifier of its program revision}} +\newglossaryentry{pstates}{name={parallel states}, + description={Two states are parallel if and only if their program counter + points to the same instruction\footnote{They do not necessarily + share the same program counter however.} and the conjunction + of their path conditions is satisfiable}} +\newglossaryentry{searcher}{name=searcher, + description={Is what keeps track of states to be executed, + along with the information for scheduling them to optimize resources + on the progress of reaching the goal. In this work, the searcher + includes a set of states and execution frontier tracking}} +\newglossaryentry{execfront}{name={execution frontier}, + description={An instruction $i$ is said to be an execution frontier + of the other $j$ if and only if there exists an execution path + in which the first occurrence of $i$ is before that of $j$. + In more relaxed terminologies, a state $s$ is said to be a frontier + of the other state $z$ if $s$'s program counter is a frontier of $z$'s}} +\newglossaryentry{lexecfront}{name={local execution frontier}, + description={Between a program's states, those not having a frontier + are called the local execution frontiers}} +\newglossaryentry{gexecfront}{name={global execution frontier}, + description={Between the states of multiple program's revisions, + those not having an frontier are called the global execution frontiers}} diff --git a/intro.tex b/intro.tex new file mode 100644 index 0000000..20697ad --- /dev/null +++ b/intro.tex @@ -0,0 +1,2 @@ +\setcounter{page}{1} +\section{Introduction} diff --git a/outro.tex b/outro.tex new file mode 100644 index 0000000..5e87498 --- /dev/null +++ b/outro.tex @@ -0,0 +1 @@ +\section{Conclusion} diff --git a/praelimen.tex b/praelimen.tex new file mode 100644 index 0000000..e2dfea3 --- /dev/null +++ b/praelimen.tex @@ -0,0 +1,7 @@ +\section{Preliminaries} +Our \gls{difftest} generation is based on the symbolic executor +\textsc{klee}'s architecture, which at a high level is an interpreter loop. +At each iteration, it selects a symbolic process, also known as a \gls{state}, +in whose context an instruction is then executed. + +% TODO: base symb exec algo diff --git a/ref.bib b/ref.bib new file mode 100644 index 0000000..52e1f75 --- /dev/null +++ b/ref.bib @@ -0,0 +1,136 @@ +@inproceedings{join-split, + author = {{Hansen}, Trevor and {Schachte}, Peter and {S{\o}ndergaard}, Harald}, + editor= {Bensalem, Saddek and Peled, Doron A.}, + title = {State Joining and Splitting for the Symbolic Execution of Binaries}, + booktitle = {Runtime Verification}, + year = 2009, + publisher = {Springer Berlin Heidelberg}, + address = {Berlin, Heidelberg}, + pages = {76--92}, + doi = {10.1007/978-3-642-04694-0_6}} + +@inproceedings{rsc, + author = {Qi, Dawei and Nguyen, Hoang D.T. and Roychoudhury, Abhik}, + title = {Path Exploration Based on Symbolic Output}, + year = 2011, + isbn = {9781450304436}, + publisher = {Association for Computing Machinery}, + address = {New York, NY, USA}, + url = {https://doi.org/10.1145/2025113.2025152}, + doi = {10.1145/2025113.2025152}, + booktitle = {Proceedings of the 19th ACM SIGSOFT Symposium and the 13th European Conference on Foundations of Software Engineering}, + pages = {278--288}, + numpages = 11, + keywords = {relevant slice condition, symbolic execution, path exploration}, + location = {Szeged, Hungary}, + series = {ESEC/FSE '11}} + +@article{merge, + author = {Kuznetsov, Volodymyr and Kinder, Johannes and Bucur, Stefan and Candea, George}, + title = {Efficient State Merging in Symbolic Execution}, + year = 2012, + issue_date = {June 2012}, + publisher = {Association for Computing Machinery}, + address = {New York, NY, USA}, + volume = 47, + number = 6, + issn = {0362-1340}, + url = {https://doi.org/10.1145/2345156.2254088}, + doi = {10.1145/2345156.2254088}, + journal = {SIGPLAN Not.}, + month = jun, + pages = {193--204}, + numpages = 12, + keywords = {verification, symbolic execution, bounded software model checking, state merging, testing}} + +@inproceedings{cgs, + author = {Seo, Hyunmin and Kim, Sunghun}, + title = {How We Get There: A Context-Guided Search Strategy in Concolic Testing}, + year = 2014, + isbn = {9781450330565}, + publisher = {Association for Computing Machinery}, + address = {New York, NY, USA}, + url = {https://doi.org/10.1145/2635868.2635872}, + doi = {10.1145/2635868.2635872}, + booktitle = {Proceedings of the 22nd ACM SIGSOFT International Symposium on Foundations of Software Engineering}, + pages = {413-424}, + numpages = 12, + keywords = {symbolic execution, search strategies, Concolic testing}, + location = {Hong Kong, China}, + series = {FSE 2014}} + +@article{shadow, + author = {Kuchta, Tomasz and Palikareva, Hristina and Cadar, Cristian}, + title = {Shadow Symbolic Execution for Testing Software Patches}, + year = 2018, + issue_date = {July 2018}, + publisher = {Association for Computing Machinery}, + address = {New York, NY, USA}, + volume = 27, + number = 3, + issn = {1049-331X}, + url = {https://doi.org/10.1145/3208952}, + doi = {10.1145/3208952}, + journal = {ACM Trans. Softw. Eng. Methodol.}, + month = sep, + articleno = 10, + numpages = 32, + keywords = {regression bugs, cross-version checks, Symbolic patch testing}} + +@inproceedings{homi, + author = {Cha, Sooyoung and Oh, Hakjoo}, + title = {Making Symbolic Execution Promising by Learning Aggressive State-Pruning Strategy}, + year = 2020, + isbn = {9781450370431}, + publisher = {Association for Computing Machinery}, + address = {New York, NY, USA}, + url = {https://doi.org/10.1145/3368089.3409755}, + doi = {10.1145/3368089.3409755}, + booktitle = {Proceedings of the 28th ACM Joint Meeting on European Software Engineering Conference and Symposium on the Foundations of Software Engineering}, + pages = {147--158}, + numpages = 12, + keywords = {Online Learning, Dynamic Symbolic Execution}, + location = {Virtual Event, USA}, + series = {ESEC/FSE 2020}} + +@inproceedings{hydiff, + author = {Noller, Yannic and P\u{a}s\u{a}reanu, Corina S. and B\"{o}hme, Marcel and Sun, Youcheng and Nguyen, Hoang Lam and Grunske, Lars}, + title = {HyDiff: Hybrid Differential Software Analysis}, + year = 2020, + isbn = {9781450371216}, + publisher = {Association for Computing Machinery}, + address = {New York, NY, USA}, + url = {https://doi.org/10.1145/3377811.3380363}, + doi = {10.1145/3377811.3380363}, + booktitle = {Proceedings of the ACM/IEEE 42nd International Conference on Software Engineering}, + pages = {1273--1285}, + numpages = 13, + keywords = {fuzzing, symbolic execution, differential program analysis}, + location = {Seoul, South Korea}, + series = {ICSE '20}} + +@inproceedings{ddset, + author = {Gopinath, Rahul and Kampmann, Alexander and Havrikov, Nikolas and Soremekun, Ezekiel O. and Zeller, Andreas}, + title = {Abstracting Failure-Inducing Inputs}, + year = 2020, + isbn = {9781450380089}, + publisher = {Association for Computing Machinery}, + address = {New York, NY, USA}, + url = {https://doi.org/10.1145/3395363.3397349}, + doi = {10.1145/3395363.3397349}, + booktitle = {Proceedings of the 29th ACM SIGSOFT International Symposium on Software Testing and Analysis}, + pages = {237--248}, + numpages = 12, + keywords = {failure-inducing inputs, debugging, error diagnosis, grammars}, + location = {Virtual Event, USA}, + series = {ISSTA 2020}} + +@article{paradyse, + author = {Cha, Sooyoung and Hong, Seongjoon and Bak, Jiseong and Kim, Jingyoung and Lee, Junhee and Oh, Hakjoo}, + journal = {IEEE Transactions on Software Engineering}, + title = {Enhancing Dynamic Symbolic Execution by Automatically Learning Search Heuristics}, + year = 2022, + volume = 48, + number = 9, + pages = {3640--3663}, + doi = {10.1109/TSE.2021.3101870}} diff --git a/result.tex b/result.tex new file mode 100644 index 0000000..61bf57b --- /dev/null +++ b/result.tex @@ -0,0 +1 @@ +\section{Results} diff --git a/technique.tex b/technique.tex new file mode 100644 index 0000000..d4a576c --- /dev/null +++ b/technique.tex @@ -0,0 +1,118 @@ +\section{Technique} +Unlike the original \textsc{klee} which execute one single program at a time, +our tool performs execution on a pool of the program's revisions +at the same time. + +An overview of our approach is illustrated in figure~\ref{fig:arch}. + +\begin{figure}[h]\centering + \begin{tikzpicture}[mymatrix/.style={matrix of nodes, + nodes=typetag,row sep=1ex}, + typetag/.style={draw,anchor=west}, + title/.style={draw=none,inner sep=0pt}, + line width=0.2ex] + \linespread{0.8} + \node[draw,label={[name=sched]Scheduler}] (states) {States}; + \node[draw,below=0.5ex of states] (clstr) {Clusters}; + \node[draw,fit={(sched) (states) (clstr)}] {}; + + \node[draw,right=11em of states] (symb) {Interpreter}; + \draw[-angle 45] (states.north east) to[in=90,out=15] + node[sloped,above,pos=0.4]{selected state} (symb.north west); + \draw[-angle 45] (symb.south west) to[in=0,out=-90] + node[sloped,above,pos=0.6]{child state} (states.east); + + \node[draw,below=6em of symb] (smt) {Theorem prover}; + \draw[angle 45-angle 45] (symb) to (smt); + + \node[draw,below=7em of sched,label={[name=diff]Differentiator}] + (outs) {Fruitful states}; + \draw[-angle 45] (symb.south west) to[in=45,out=-90] + node[sloped,above,pos=0.6]{state with output} (outs.15); + \draw[-angle 90] (outs) to[in=90,out=0,pos=0.4] + node[sloped,below]{path conditions} (smt.north west); + \draw (outs.south east) to[in=90,out=10] (smt.north west); + + \node[draw,below=0.5ex of outs] (prog) {Metaprogram}; + \node[draw,fit={(diff) (outs) (prog)}] {}; + \draw[-angle 45] (smt.south west) to[out=-90,in=0] + node[sloped,below]{concrete input} (prog); + \draw[-angle 45] (prog) to[out=180,in=-150] + node[left,align=center]{diff.\\test} (clstr.south west); + + \node[left=4em of states.west] (bc) {bitcode}; + \draw[-angle 90] (bc) to[in=180,out=60] (states.west); + \node[below=1em of bc,align=center] (tree) {decision\\tree}; + \draw[-angle 45] (clstr.west) to[out=180,in=60] (tree); + \node[above=1em of bc,align=center] (seed) {seed\\input}; + \draw[dotted] (seed) to[out=0,in=180] (states); + \draw[-angle 45] (bc.south west) + to[out=-135,in=-135] (prog.south west); + \end{tikzpicture} + \caption{\textsc{Psychic} overview.}\label{fig:arch} +\end{figure} + +\begin{algorithm} +\caption{Overall execution} +\begin{algorithmic}[1] + \Require set of program revisions $D$ + \Ensure set of differential tests $T$ + \State $T \gets \varnothing$ + \State $E \gets \varnothing$ \Comment{Exited states} + \State $q \gets \text{new\_searcher}(D)$ + \For{$d \in D$} + \State $s \gets \text{initial\_state}(d)$ + \State $\text{add\_states}(q, \{s\})$ + \EndFor + \While{has\_state($q$)} + \State $s \gets \text{select\_state}(q)$ + \State $i \gets \text{next\_instruction}(s)$ + \Comment{$i$ is a state} + \If{$i$ is exit} + \State $E' \gets \text{head}(\text{sort}(\text{distance\_to}(i), + \text{different\_revision}(E, i)))$ + \For{$e \in E'$} + \State $t \gets \text{generate\_test}(i, e)$ + \If{$t$ is not null} + \State $T \gets T \cup \{t\}$ + \EndIf + \EndFor + \State $E \gets E \cup \{i\}$ + \Else + \State $S_+, S_- \gets \text{execute\_instruction}(i)$ + \State $\text{remove\_states}\left(q, S_-\right)$ + \State $\text{add\_states}\left(q, S_+\right)$ + \EndIf + \EndWhile +\end{algorithmic} +\end{algorithm} + +\begin{algorithm} +\caption{State selection} +\begin{algorithmic}[1] + \Require searcher $q$ + \Ensure state $s$ + \State $S \gets \varnothing$ + \State $P \gets \text{base\_priority}(q)$ + \Comment{Scheduling priority from base search algorithm} + \For{$z \in \text{states}(q)$} + \If{$z \notin S$} + \State $S' \gets \text{parallel\_states}(q, z)$ + \If{$S' \ne \varnothing$} + \State $S \gets S \cup S' \cup \{z\}$ + \EndIf + \EndIf + + \State $l \gets \text{local\_frontier}(q, z)$ + \State $g \gets \text{global\_frontier}(q, z)$ + \If{$l$ is null $\land$ $g$ is not null} + \State $P_z \gets P_z + \text{distance\_bonus}(g, z)$ + \Comment{Should further or nearer be prioritised?} + \EndIf + \EndFor + \For{$z \in S$} + \State $P_z \gets P_z + \text{PARALLEL\_BONUS}$ + \EndFor + \State $s \gets \text{max\_priority}(P)$ +\end{algorithmic} +\end{algorithm} diff --git a/title.tex b/title.tex new file mode 100644 index 0000000..e6821b9 --- /dev/null +++ b/title.tex @@ -0,0 +1,20 @@ +\thispagestyle{empty} +\begin{center} +\hbox{} + +\hbox{} + +\huge\name + +\vspace{5cm} + +\LARGE\cnx + +\vspace{6cm} + +\LARGE\cse + +\vspace{2cm} + +\LARGE\unist +\end{center} -- cgit 1.4.1