summary refs log tree commit diff homepage
diff options
context:
space:
mode:
authorNguyễn Gia Phong <cnx@loang.net>2023-10-28 15:57:52 +0900
committerNguyễn Gia Phong <cnx@loang.net>2023-10-28 20:02:06 +0900
commit29ec67340c03fb15dce465381737b96e12ef764b (patch)
tree31a07da1af8c4fa05f64e2fb52a3bfcdbdab3c5c
downloadthesis-29ec67340c03fb15dce465381737b96e12ef764b.tar.gz
Draft skeleton
-rw-r--r--.gitignore13
-rw-r--r--abstract.tex5
-rw-r--r--ack.tex3
-rw-r--r--all.tex113
-rw-r--r--discuss.tex4
-rw-r--r--eval.tex1
-rw-r--r--front.tex20
-rw-r--r--glossary.tex32
-rw-r--r--intro.tex2
-rw-r--r--outro.tex1
-rw-r--r--praelimen.tex7
-rw-r--r--ref.bib136
-rw-r--r--result.tex1
-rw-r--r--technique.tex118
-rw-r--r--title.tex20
15 files changed, 476 insertions, 0 deletions
diff --git a/.gitignore b/.gitignore
new file mode 100644
index 0000000..1e18ce9
--- /dev/null
+++ b/.gitignore
@@ -0,0 +1,13 @@
+*.aux
+*.bbl
+*.blg
+*.fdb_latexmk
+*.fls
+*.glo
+*.ist
+*.lof
+*.log
+*.out
+*.synctex.gz
+*.toc
+all.pdf
diff --git a/abstract.tex b/abstract.tex
new file mode 100644
index 0000000..4701ff7
--- /dev/null
+++ b/abstract.tex
@@ -0,0 +1,5 @@
+\renewcommand{\abstractname}{\Large Abstract}
+\begin{abstract}
+Your abstract should be here.
+\end{abstract}
+\thispagestyle{empty}
diff --git a/ack.tex b/ack.tex
new file mode 100644
index 0000000..3cb4b9a
--- /dev/null
+++ b/ack.tex
@@ -0,0 +1,3 @@
+\addcontentsline{toc}{section}{Acknowledgements}
+\section*{\hfill\Large Acknowledgements\hfill\hfill}
+% TODO: grants
diff --git a/all.tex b/all.tex
new file mode 100644
index 0000000..4073a61
--- /dev/null
+++ b/all.tex
@@ -0,0 +1,113 @@
+\documentclass[11pt, a4paper, onecolumn, oneside]{report}
+\usepackage[vietnamese,english]{babel}
+\usepackage{mathptmx}
+\usepackage[T1]{fontenc}
+\usepackage[utf8]{inputenc}
+\RequirePackage[top=3cm, bottom=1in, left=1in, right=1in]{geometry}
+\linespread{1.3}
+
+\usepackage{algorithm}
+\usepackage{algpseudocode}
+\usepackage{amsmath}
+\usepackage{amssymb}
+\usepackage{caption}
+\usepackage{cases}
+\usepackage{cite}
+\usepackage{enumerate}
+\usepackage{fancyhdr}
+\usepackage{glossaries}
+\usepackage{graphicx}
+\usepackage[hidelinks]{hyperref}
+\usepackage{pdfpages}
+\usepackage{subcaption}
+\usepackage{tikz}
+\usepackage{tocloft}
+
+\usetikzlibrary{arrows}
+\usetikzlibrary{decorations.pathmorphing}
+\usetikzlibrary{fit}
+\usetikzlibrary{matrix}
+\usetikzlibrary{positioning}
+\usetikzlibrary{shapes}
+
+\renewcommand\cftsecafterpnum{\vskip15pt}
+\renewcommand\cftsubsecafterpnum{\vskip15pt}
+\renewcommand\cftfigafterpnum{\vskip15pt}
+%\renewcommand{\baselinestretch}{1.5}
+\renewcommand{\thesection}{\Roman{section}}
+\renewcommand{\thesubsection}{\arabic{section}.\arabic{subsection}}
+\renewcommand{\thefigure}{\arabic{figure}}
+\newcommand{\qed}{\hfill\blacksquare}
+
+\newcounter{lemma}
+\newcounter{proposition}
+\newcounter{theorem}
+\newtheorem{definition}{Definition}
+\newtheorem{lemma}{\bf Lemma}
+\newtheorem{proposition}{\bf Proposition}
+\newtheorem{theorem}{\bf Theorem}
+\newtheorem{proof}{\bf Proof}
+\DeclareMathOperator*{\argmax}{arg\,max}
+
+\newcommand{\name}{Flexible Symbolic Executor for Mass Patch Differentiation}
+\newcommand{\cnx}{\foreignlanguage{vietnamese}{Nguyễn Gia Phong}}
+\newcommand{\cse}{Department of Computer Science and Engineering}
+\newcommand{\unist}{Ulsan National Institute of Science and Technology}
+\newcommand{\blankpage}{\hbox{}\thispagestyle{empty}\clearpage}
+
+\fancyhf{}
+\renewcommand{\headrulewidth}{0pt}
+\cfoot{\thepage}
+\pagestyle{fancy}
+
+\title{\name}
+\author{\cnx}
+\input{glossary.tex}
+
+\begin{document}
+\include{front.tex}
+\include{title.tex}
+
+% [Thesis approval]
+% Add the approval doc signed by your advisor in a PDF file
+% Put your pdf with the filename below, and uncomment it.
+%\includepdf[fitpaper=true, pages=-]{sample_approval.pdf}
+
+% [Confirmation of thesis approval]
+% add the certificate signed by your committee in a PDF file
+% Put your pdf with the filename below, and uncomment it.
+%\includepdf[fitpaper=true, pages=-]{sample_confirmation.pdf}
+
+\include{abstract.tex}
+\blankpage
+\renewcommand{\contentsname}{\hfill\Large Contents\hfill}
+\tableofcontents{}
+\thispagestyle{empty}
+\clearpage
+\renewcommand{\listfigurename}{\hfill\Large List of Figures\hfill}
+\listoffigures{}
+\thispagestyle{empty}
+\clearpage
+\renewcommand{\glossaryname}{\hfill\Large Glossary\hfill\hfill}
+\printnoidxglossaries
+\thispagestyle{empty}
+\clearpage
+
+\include{intro.tex}
+\include{praelimen.tex}
+\include{technique.tex}
+\include{eval.tex}
+\include{result.tex}
+\include{discuss.tex}
+\include{related.tex}
+\include{outro.tex}
+
+\newpage
+\renewcommand{\bibname}{\hfill\Large References\hfill\hfill}
+\addcontentsline{toc}{section}{References}
+\bibliographystyle{IEEEtran}
+\bibliography{ref.bib}
+
+\include{ack.tex}
+\blankpage
+\end{document}
diff --git a/discuss.tex b/discuss.tex
new file mode 100644
index 0000000..b276e76
--- /dev/null
+++ b/discuss.tex
@@ -0,0 +1,4 @@
+\section{Discussion}
+\subsection{Threats to Validity}
+\subsection{Related Work}
+\textsc{Shadow}~\cite{shadow}
diff --git a/eval.tex b/eval.tex
new file mode 100644
index 0000000..cd3fc7f
--- /dev/null
+++ b/eval.tex
@@ -0,0 +1 @@
+\section{Evaluation}
diff --git a/front.tex b/front.tex
new file mode 100644
index 0000000..3ccb5d4
--- /dev/null
+++ b/front.tex
@@ -0,0 +1,20 @@
+\thispagestyle{empty}
+\begin{center}
+\LARGE Master's Thesis
+
+\vspace{3cm}
+\huge\name
+\vfill
+\LARGE\cnx
+
+\vspace{2cm}
+
+\LARGE\cse
+
+\vspace{2cm}
+
+\LARGE\unist
+\vspace{2cm}
+
+\LARGE 2023
+\end{center}
diff --git a/glossary.tex b/glossary.tex
new file mode 100644
index 0000000..8f43f5f
--- /dev/null
+++ b/glossary.tex
@@ -0,0 +1,32 @@
+\makenoidxglossaries
+\newglossaryentry{difftest}{name={differential test},
+  description={Is an input and a collection of outputs of two or more programs
+    of the same purpose, such that each output is pairwise different}}
+\newglossaryentry{state}{name=state,
+  description={Or \emph{symbolic process}, includes a register file,
+    a stack frame, an address space for heap and global variables,
+    a program counter and path conditions.  Stack, heap and global variables
+    here are all symbolic expressions.  For our algorithm, the state
+    additionally contain the identifier of its program revision}}
+\newglossaryentry{pstates}{name={parallel states},
+  description={Two states are parallel if and only if their program counter
+    points to the same instruction\footnote{They do not necessarily
+    share the same program counter however.} and the conjunction
+    of their path conditions is satisfiable}}
+\newglossaryentry{searcher}{name=searcher,
+  description={Is what keeps track of states to be executed,
+    along with the information for scheduling them to optimize resources
+    on the progress of reaching the goal.  In this work, the searcher
+    includes a set of states and execution frontier tracking}}
+\newglossaryentry{execfront}{name={execution frontier},
+  description={An instruction $i$ is said to be an execution frontier
+    of the other $j$ if and only if there exists an execution path
+    in which the first occurrence of $i$ is before that of $j$.
+    In more relaxed terminologies, a state $s$ is said to be a frontier
+    of the other state $z$ if $s$'s program counter is a frontier of $z$'s}}
+\newglossaryentry{lexecfront}{name={local execution frontier},
+  description={Between a program's states, those not having a frontier
+    are called the local execution frontiers}}
+\newglossaryentry{gexecfront}{name={global execution frontier},
+  description={Between the states of multiple program's revisions,
+    those not having an frontier are called the global execution frontiers}}
diff --git a/intro.tex b/intro.tex
new file mode 100644
index 0000000..20697ad
--- /dev/null
+++ b/intro.tex
@@ -0,0 +1,2 @@
+\setcounter{page}{1}
+\section{Introduction}
diff --git a/outro.tex b/outro.tex
new file mode 100644
index 0000000..5e87498
--- /dev/null
+++ b/outro.tex
@@ -0,0 +1 @@
+\section{Conclusion}
diff --git a/praelimen.tex b/praelimen.tex
new file mode 100644
index 0000000..e2dfea3
--- /dev/null
+++ b/praelimen.tex
@@ -0,0 +1,7 @@
+\section{Preliminaries}
+Our \gls{difftest} generation is based on the symbolic executor
+\textsc{klee}'s architecture, which at a high level is an interpreter loop.
+At each iteration, it selects a symbolic process, also known as a \gls{state},
+in whose context an instruction is then executed.
+
+% TODO: base symb exec algo
diff --git a/ref.bib b/ref.bib
new file mode 100644
index 0000000..52e1f75
--- /dev/null
+++ b/ref.bib
@@ -0,0 +1,136 @@
+@inproceedings{join-split,
+  author = {{Hansen}, Trevor and {Schachte}, Peter and {S{\o}ndergaard}, Harald},
+  editor= {Bensalem, Saddek and Peled, Doron A.},
+  title = {State Joining and Splitting for the Symbolic Execution of Binaries},
+  booktitle = {Runtime Verification},
+  year = 2009,
+  publisher = {Springer Berlin Heidelberg},
+  address = {Berlin, Heidelberg},
+  pages = {76--92},
+  doi = {10.1007/978-3-642-04694-0_6}}
+
+@inproceedings{rsc,
+  author = {Qi, Dawei and Nguyen, Hoang D.T. and Roychoudhury, Abhik},
+  title = {Path Exploration Based on Symbolic Output},
+  year = 2011,
+  isbn = {9781450304436},
+  publisher = {Association for Computing Machinery},
+  address = {New York, NY, USA},
+  url = {https://doi.org/10.1145/2025113.2025152},
+  doi = {10.1145/2025113.2025152},
+  booktitle = {Proceedings of the 19th ACM SIGSOFT Symposium and the 13th European Conference on Foundations of Software Engineering},
+  pages = {278--288},
+  numpages = 11,
+  keywords = {relevant slice condition, symbolic execution, path exploration},
+  location = {Szeged, Hungary},
+  series = {ESEC/FSE '11}}
+
+@article{merge,
+  author = {Kuznetsov, Volodymyr and Kinder, Johannes and Bucur, Stefan and Candea, George},
+  title = {Efficient State Merging in Symbolic Execution},
+  year = 2012,
+  issue_date = {June 2012},
+  publisher = {Association for Computing Machinery},
+  address = {New York, NY, USA},
+  volume = 47,
+  number = 6,
+  issn = {0362-1340},
+  url = {https://doi.org/10.1145/2345156.2254088},
+  doi = {10.1145/2345156.2254088},
+  journal = {SIGPLAN Not.},
+  month = jun,
+  pages = {193--204},
+  numpages = 12,
+  keywords = {verification, symbolic execution, bounded software model checking, state merging, testing}}
+
+@inproceedings{cgs,
+  author = {Seo, Hyunmin and Kim, Sunghun},
+  title = {How We Get There: A Context-Guided Search Strategy in Concolic Testing},
+  year = 2014,
+  isbn = {9781450330565},
+  publisher = {Association for Computing Machinery},
+  address = {New York, NY, USA},
+  url = {https://doi.org/10.1145/2635868.2635872},
+  doi = {10.1145/2635868.2635872},
+  booktitle = {Proceedings of the 22nd ACM SIGSOFT International Symposium on Foundations of Software Engineering},
+  pages = {413-424},
+  numpages = 12,
+  keywords = {symbolic execution, search strategies, Concolic testing},
+  location = {Hong Kong, China},
+  series = {FSE 2014}}
+
+@article{shadow,
+  author = {Kuchta, Tomasz and Palikareva, Hristina and Cadar, Cristian},
+  title = {Shadow Symbolic Execution for Testing Software Patches},
+  year = 2018,
+  issue_date = {July 2018},
+  publisher = {Association for Computing Machinery},
+  address = {New York, NY, USA},
+  volume = 27,
+  number = 3,
+  issn = {1049-331X},
+  url = {https://doi.org/10.1145/3208952},
+  doi = {10.1145/3208952},
+  journal = {ACM Trans. Softw. Eng. Methodol.},
+  month = sep,
+  articleno = 10,
+  numpages = 32,
+  keywords = {regression bugs, cross-version checks, Symbolic patch testing}}
+
+@inproceedings{homi,
+  author = {Cha, Sooyoung and Oh, Hakjoo},
+  title = {Making Symbolic Execution Promising by Learning Aggressive State-Pruning Strategy},
+  year = 2020,
+  isbn = {9781450370431},
+  publisher = {Association for Computing Machinery},
+  address = {New York, NY, USA},
+  url = {https://doi.org/10.1145/3368089.3409755},
+  doi = {10.1145/3368089.3409755},
+  booktitle = {Proceedings of the 28th ACM Joint Meeting on European Software Engineering Conference and Symposium on the Foundations of Software Engineering},
+  pages = {147--158},
+  numpages = 12,
+  keywords = {Online Learning, Dynamic Symbolic Execution},
+  location = {Virtual Event, USA},
+  series = {ESEC/FSE 2020}}
+
+@inproceedings{hydiff,
+  author = {Noller, Yannic and P\u{a}s\u{a}reanu, Corina S. and B\"{o}hme, Marcel and Sun, Youcheng and Nguyen, Hoang Lam and Grunske, Lars},
+  title = {HyDiff: Hybrid Differential Software Analysis},
+  year = 2020,
+  isbn = {9781450371216},
+  publisher = {Association for Computing Machinery},
+  address = {New York, NY, USA},
+  url = {https://doi.org/10.1145/3377811.3380363},
+  doi = {10.1145/3377811.3380363},
+  booktitle = {Proceedings of the ACM/IEEE 42nd International Conference on Software Engineering},
+  pages = {1273--1285},
+  numpages = 13,
+  keywords = {fuzzing, symbolic execution, differential program analysis},
+  location = {Seoul, South Korea},
+  series = {ICSE '20}}
+
+@inproceedings{ddset,
+  author = {Gopinath, Rahul and Kampmann, Alexander and Havrikov, Nikolas and Soremekun, Ezekiel O. and Zeller, Andreas},
+  title = {Abstracting Failure-Inducing Inputs},
+  year = 2020,
+  isbn = {9781450380089},
+  publisher = {Association for Computing Machinery},
+  address = {New York, NY, USA},
+  url = {https://doi.org/10.1145/3395363.3397349},
+  doi = {10.1145/3395363.3397349},
+  booktitle = {Proceedings of the 29th ACM SIGSOFT International Symposium on Software Testing and Analysis},
+  pages = {237--248},
+  numpages = 12,
+  keywords = {failure-inducing inputs, debugging, error diagnosis, grammars},
+  location = {Virtual Event, USA},
+  series = {ISSTA 2020}}
+
+@article{paradyse,
+  author = {Cha, Sooyoung and Hong, Seongjoon and Bak, Jiseong and Kim, Jingyoung and Lee, Junhee and Oh, Hakjoo},
+  journal = {IEEE Transactions on Software Engineering}, 
+  title = {Enhancing Dynamic Symbolic Execution by Automatically Learning Search Heuristics}, 
+  year = 2022,
+  volume = 48,
+  number = 9,
+  pages = {3640--3663},
+  doi = {10.1109/TSE.2021.3101870}}
diff --git a/result.tex b/result.tex
new file mode 100644
index 0000000..61bf57b
--- /dev/null
+++ b/result.tex
@@ -0,0 +1 @@
+\section{Results}
diff --git a/technique.tex b/technique.tex
new file mode 100644
index 0000000..d4a576c
--- /dev/null
+++ b/technique.tex
@@ -0,0 +1,118 @@
+\section{Technique}
+Unlike the original \textsc{klee} which execute one single program at a time,
+our tool performs execution on a pool of the program's revisions
+at the same time.
+
+An overview of our approach is illustrated in figure~\ref{fig:arch}.
+
+\begin{figure}[h]\centering
+  \begin{tikzpicture}[mymatrix/.style={matrix of nodes,
+                                       nodes=typetag,row sep=1ex},
+                      typetag/.style={draw,anchor=west},
+                      title/.style={draw=none,inner sep=0pt},
+                      line width=0.2ex]
+    \linespread{0.8}
+    \node[draw,label={[name=sched]Scheduler}] (states) {States};
+    \node[draw,below=0.5ex of states] (clstr) {Clusters};
+    \node[draw,fit={(sched) (states) (clstr)}] {};
+
+    \node[draw,right=11em of states] (symb) {Interpreter};
+    \draw[-angle 45] (states.north east) to[in=90,out=15]
+      node[sloped,above,pos=0.4]{selected state} (symb.north west);
+    \draw[-angle 45] (symb.south west) to[in=0,out=-90]
+      node[sloped,above,pos=0.6]{child state} (states.east);
+
+    \node[draw,below=6em of symb] (smt) {Theorem prover};
+    \draw[angle 45-angle 45] (symb) to (smt);
+
+    \node[draw,below=7em of sched,label={[name=diff]Differentiator}]
+      (outs) {Fruitful states};
+    \draw[-angle 45] (symb.south west) to[in=45,out=-90]
+      node[sloped,above,pos=0.6]{state with output} (outs.15);
+    \draw[-angle 90] (outs) to[in=90,out=0,pos=0.4]
+      node[sloped,below]{path conditions} (smt.north west);
+    \draw (outs.south east) to[in=90,out=10] (smt.north west);
+
+    \node[draw,below=0.5ex of outs] (prog) {Metaprogram};
+    \node[draw,fit={(diff) (outs) (prog)}] {};
+    \draw[-angle 45] (smt.south west) to[out=-90,in=0]
+      node[sloped,below]{concrete input} (prog);
+    \draw[-angle 45] (prog) to[out=180,in=-150]
+      node[left,align=center]{diff.\\test} (clstr.south west);
+
+    \node[left=4em of states.west] (bc) {bitcode};
+    \draw[-angle 90] (bc) to[in=180,out=60] (states.west);
+    \node[below=1em of bc,align=center] (tree) {decision\\tree};
+    \draw[-angle 45] (clstr.west) to[out=180,in=60] (tree);
+    \node[above=1em of bc,align=center] (seed) {seed\\input};
+    \draw[dotted] (seed) to[out=0,in=180] (states);
+    \draw[-angle 45] (bc.south west)
+      to[out=-135,in=-135] (prog.south west);
+  \end{tikzpicture}
+  \caption{\textsc{Psychic} overview.}\label{fig:arch}
+\end{figure}
+
+\begin{algorithm}
+\caption{Overall execution}
+\begin{algorithmic}[1]
+  \Require set of program revisions $D$
+  \Ensure set of differential tests $T$
+  \State $T \gets \varnothing$
+  \State $E \gets \varnothing$  \Comment{Exited states}
+  \State $q \gets \text{new\_searcher}(D)$
+  \For{$d \in D$}
+    \State $s \gets \text{initial\_state}(d)$
+    \State $\text{add\_states}(q, \{s\})$
+  \EndFor
+  \While{has\_state($q$)}
+    \State $s \gets \text{select\_state}(q)$
+    \State $i \gets \text{next\_instruction}(s)$
+      \Comment{$i$ is a state}
+    \If{$i$ is exit}
+      \State $E' \gets \text{head}(\text{sort}(\text{distance\_to}(i),
+        \text{different\_revision}(E, i)))$
+      \For{$e \in E'$}
+        \State $t \gets \text{generate\_test}(i, e)$
+        \If{$t$ is not null}
+          \State $T \gets T \cup \{t\}$
+        \EndIf
+      \EndFor
+      \State $E \gets E \cup \{i\}$
+    \Else
+      \State $S_+, S_- \gets \text{execute\_instruction}(i)$
+      \State $\text{remove\_states}\left(q, S_-\right)$
+      \State $\text{add\_states}\left(q, S_+\right)$
+    \EndIf
+  \EndWhile
+\end{algorithmic}
+\end{algorithm}
+
+\begin{algorithm}
+\caption{State selection}
+\begin{algorithmic}[1]
+  \Require searcher $q$
+  \Ensure state $s$
+  \State $S \gets \varnothing$
+  \State $P \gets \text{base\_priority}(q)$
+    \Comment{Scheduling priority from base search algorithm}
+  \For{$z \in \text{states}(q)$}
+    \If{$z \notin S$}
+      \State $S' \gets \text{parallel\_states}(q, z)$
+      \If{$S' \ne \varnothing$}
+        \State $S \gets S \cup S' \cup \{z\}$
+      \EndIf
+    \EndIf
+
+    \State $l \gets \text{local\_frontier}(q, z)$
+    \State $g \gets \text{global\_frontier}(q, z)$
+    \If{$l$ is null $\land$ $g$ is not null}
+      \State $P_z \gets P_z + \text{distance\_bonus}(g, z)$
+      \Comment{Should further or nearer be prioritised?}
+    \EndIf
+  \EndFor
+  \For{$z \in S$}
+    \State $P_z \gets P_z + \text{PARALLEL\_BONUS}$
+  \EndFor
+  \State $s \gets \text{max\_priority}(P)$
+\end{algorithmic}
+\end{algorithm}
diff --git a/title.tex b/title.tex
new file mode 100644
index 0000000..e6821b9
--- /dev/null
+++ b/title.tex
@@ -0,0 +1,20 @@
+\thispagestyle{empty}
+\begin{center}
+\hbox{}
+
+\hbox{}
+
+\huge\name
+
+\vspace{5cm}
+
+\LARGE\cnx
+
+\vspace{6cm}
+
+\LARGE\cse
+
+\vspace{2cm}
+
+\LARGE\unist
+\end{center}