6 files changed, 229 insertions, 232 deletions
diff --git a/algorithm.tex b/algorithm.tex
new file mode 100644
index 0000000..8488edf
--- /dev/null
+++ b/algorithm.tex
@@ -0,0 +1,70 @@
+\section{Turning CompCert into an HLS tool}
+
+%% Should maybe go in the introduction instead.
+
+\begin{figure}
+  \centering
+  \begin{tikzpicture}
+    [language/.style={fill=white,rounded corners=2pt}]
+    \fill[compcert,rounded corners=3pt] (-1,-1) rectangle (9,1.5);
+    \fill[formalhls,rounded corners=3pt] (-1,-1.5) rectangle (9,-2.5);
+    \node[language] at (0,0) (clight) {Clight};
+    \node[language] at (2,0) (cminor) {C\#minor};
+    \node[language] at (4,0) (rtl) {RTL};
+    \node[language] at (6,0) (ltl) {LTL};
+    \node[language] at (8,0) (ppc) {PPC};
+    \node[language] at (4,-2) (dfgstmd) {HTL};
+    \node[language] at (7,-2) (verilog) {Verilog};
+    \node at (0,1) {CompCert};
+    \node at (0,-2) {CoqUp};
+    \draw[->] (clight) -- (cminor);
+    \draw[->,dashed] (cminor) -- (rtl);
+    \draw[->] (rtl) -- (ltl);
+    \draw[->,dashed] (ltl) -- (ppc);
+    \draw[->] (rtl) -- (dfgstmd);
+    \draw[->] (dfgstmd) -- (verilog);
+  \end{tikzpicture}
+  \caption{Verilog backend branching off at the RTL stage.}\label{fig:rtlbranch}
+\end{figure}
+
+This section covers the main architecture of the HLS tool, and how the backend was added to CompCert.  CompCert is made up of 11 intermediate languages in between the Clight input and the assembly output.  These intermediate languages each serve a different purpose and contain various different optimisations.  When designing a new backend for CompCert, it is therefore crucial to know where to branch off and start the hardware generation.  Many of the optimisations that the CompCert backend performs are not necessary when generating custom hardware and not relying on a CPU anymore, such as register allocation or even scheduling.  It is therefore important to find the right intermediate language so that the HLS tool still benefits from many of the generic optimisations that CompCert performs, but does not receive the code transformations that are specific to CPU architectures.
+
+Existing HLS compilers usually use LLVM IR as an intermediate representation when performing HLS specific optimisations, as each instruction can be mapped quite well to hardware which performs the same behaviour.  CompCert's RTL is the intermediate language that resembles LLVM IR the most, as it also has an infinite number of pseudo-registers and each instruction maps well to hardware.  In addition to that, many optimisations that are also useful for HLS are performed in RTL, which means that if it is supported as the input language, the HLS algorithm benefits from the same optimisations.  It is therefore a good candidate to be chosen as the input language to the HLS backend.  The complete flow that CoqUp takes is show in figure~\ref{fig:rtlbranch}.
+
+%%TODO: Maybe add why LTL and the other smaller languages are not that well suited
+
+\subsection{CompCert RTL}
+
+All CompCert intermediate language follow the similar structure below:
+
+\begin{align*}
+  \mathit{program} \quad ::= \{ &\mathbf{variables} : (\mathit{id} * \mathit{data}), \\
+                 &\mathbf{functions} : (\mathit{id} * \mathit{function\_def}),\\
+                 &\mathbf{main} : \mathit{id} \}
+\end{align*}
+
+\noindent where function definitions can either be internal or external.  External functions are functions that are not defined in the current translation unit, and are therefore not part of the current translation.  The difference in between the CompCert intermediate languages is therefore how the internal function is defined, as that defines the structure of the language itself.
+
+%% Describe RTL
+RTL function definitions are a sequence of instructions encoded in a control-flow graph, with each instruction linking to the next instruction that should be executed.
+
+%%TODO: Finish this section and describe the syntax and semantics of RTL.
+
+\subsection{HTL}
+
+RTL is first translated to an intermediate language called hardware transfer language (HTL), which is a finite state machine with datapath (FSMD) representation of the RTL code.  HTL, like all CompCert intermediate languages, has the same program structure as RTL, but internal functions now contain logic to control the order of execution, and a datapath that transforms the data in the registers.  This is represented by having two maps that link states to the control logic and to the current position in the datapath, which are both expressed using Verilog statements.  The syntax for HTL functions are the following:
+
+\begin{align*}
+  \mathit{g} \quad &::= \quad n \mapsto s\\
+  \mathit{d_{r}} \quad &::= \quad r \mapsto (io? * n)\\
+  \mathit{d_{a}} \quad &::= \quad r \mapsto (io? * n * n)\\
+  \mathit{F} \quad &::= \quad \big\{\ \mathtt{params} : \vec{r}\\
+                                     &\mathtt{datapath} : g\\
+                                     &\mathtt{controllogic} : g\\
+                                     &\mathtt{entrypoint} : n\\
+                                     &\mathtt{st, stk, finish, return, start, reset, clk} : r\\
+                                     &\mathtt{scldecls} : d_{r}\\
+                                     &\mathtt{arrdecls} : d_{a}\ \big\}
+\end{align*}
+
+\subsection{HLS Algorithm}
diff --git a/introduction.tex b/introduction.tex
new file mode 100644
index 0000000..5c8bcac
--- /dev/null
+++ b/introduction.tex
@@ -0,0 +1,47 @@
+\section{Introduction}
+
+%% Motivation for why HLS might be needed
+
+The current approach to writing energy-efficient and high-throughput applications is to use custom hardware which has been specialised for that application~\NR{You mean application-specific?}, instead of relying on and optimising for a CPU.\@  This comes at the cost of having to design the customised hardware, which, if using hardware description languages (HDL) such as Verilog can be tedious and quite error prone.  Especially with the size~\NR{size of programs or HDL designs?} growing over the years, it can become difficult to verify that the hardware design behaves in the expected way, as simulation of hardware description languages can be quite inefficient.  Furthermore, the algorithms that are being accelerated in hardware often already have a software implementation, meaning they have to be reimplemented efficiently in a hardware description language which can be time consuming.
+
+%% Definition and benefits of HLS
+\NR{What does "this" refer to in the first, second and fourth sentence? You had "this" in the previous paragraph too.}
+One possible solution to this is to use high-level synthesis (HLS), which is the process of generating custom hardware, represented in a hardware description language, based on a behavioural description, often a subset of C.  This elevates the level of abstraction, because the description of the algorithm in C is inherently untimed, meaning actions don't have to be scheduled into clock cycles.  The higher level of abstraction makes it easier to reason about the algorithms and therefore also makes them easier to maintain.  This already reduces the time taken to design the hardware, especially if a software description of the algorithm already exists, because it won't have to be designed again at a lower level and directly in hardware.  However, another benefit of using HLS to design the hardware, is that function verification of the design is much simpler and more efficient than if it was done at the HDL stage, as the whole software ecosystem can be used to do that.  Instead of having to run simulations of the hardware, the C code can just be compiled and executed natively, as the hardware design after HLS should have the same behaviour.
+\NR{The abstraction of HLS helps in two ways: improving productivity of hardware designers and reducing the entry barrier of hardware design for software programmers. Both these audiences benefit from better stand to benefit from the guarantees provided by verification and correct-by-construction C-to-Verilog generation.}
+
+%% Unreliability of HLS
+
+However, the fact that the behaviour is preserved after HLS cannot be guaranteed most existing tools,\YH{Mentor's catapult C can in some cases} meaning behavioural simulation of the hardware design still has to be performed. HLS tools are also known to be quite unreliable, for example, Intel's (formerly Altera's) OpenCL SDK compiler contained too many bugs to even be considered for random testing, as more than 25\% of the testcases failed~\cite{lidbury15_many_core_compil_fuzzin}.  In addition to that, designers often feel like HLS tools are quite unreliable and fragile with respect to which language features that are supported.\YH{Need citation}  As HLS tools are extremely complex and can therefore incorrectly change the behaviour of the design, it is not possible to guarantee that all the properties of the code that were proven in software will also hold for the generated hardware.
+
+%% Current work in formal verification of HLS
+\NR{This is a good paragraph, but we need to relate it more to this work and why this work is different.}
+\NR{Focus on more high-level of "why this work is interesting"? Two key points we want to get across to the reader is that in existing works: validation is neccessary every time a new program is compiled and the verifying algorithm is not verified.}
+\NR{Also define words like validation, verifying algorithm (can you use the word ``verifier'',mechanisation}
+\NR{Having said that, keep the text for related work section.}
+Therefore, work is being done to prove the equivalence between the generated hardware and the original behavioural description in C.  An example of a tool that implements this is Mentor's Catapult~\cite{mentor20_catap_high_level_synth}, which tries to match the states in the register transfer level (RTL) description to states in the original C code after an unverified translation.  This technique is called translation validation~\cite{pnueli98_trans}, whereby the translation that the HLS tool performed is proven to have been correct for that input, by showing that they behave in the same way for all possible inputs.  Using translation validation is quite effective for proving complex optimisations such as scheduling~\cite{kim04_autom_fsmd,karfa06_formal_verif_method_sched_high_synth,chouksey20_verif_sched_condit_behav_high_level_synth} or code motion~\cite{banerjee14_verif_code_motion_techn_using_value_propag,chouksey19_trans_valid_code_motion_trans_invol_loops}, however, the validation has to be run every time the high-level synthesis is performed.  In addition to that, the proofs are often not mechanised or directly related to the actual implementation, meaning the verifying algorithm might be wrong and could give false positives or false negatives.
+
+\NR{If we convey the right message in the above paragraph, then it follows up nicely with what CompCert provides us that these other tools do not. Then, we can get into how we use CompCert.}
+CompCert~\cite{leroy06_formal_certif_compil_back_end} is a C compiler that has been written and formally verified in the Coq theorem prover~\cite{bertot04_inter_theor_provin_progr_devel}.  First of all, most of the proofs in CompCert have been formally verified directly, meaning that once the compiler is built, the proofs can be erased as the algorithm has been shown to be correct independent of the input.  However, some optimisations in CompCert have also been proven using translation validation~\cite{tristan08_formal_verif_trans_valid}, in which case the proof still happens at runtime.  The difference is that in this case, the verifier itself is actually formally verified in Coq and is therefore proven that it will only ever say that the input and output are equivalent if that is actually the case.
+
+%% Contributions of paper
+
+In this paper we describe a fully verified high-level synthesis tool called CoqUp, which adds a Verilog backend to CompCert and proves that the behaviour of the C code does not change according to existing Verilog semantics.  The main contributions of the paper are the following:
+
+\begin{itemize}
+  \item First mechanised and formally verified HLS flow.
+  \item Proof by simulation mechanised in Coq between CompCert's intermediate language and Verilog.
+  \item Description of the Verilog semantics used to integrate it into CompCert's model with some improvements.
+  \item Able to run HLS benchmarks which are now known to be correct.
+\item \NR{We implement our Verilog semantics in CompCert and we are able to generate correct-by-construction Verilog for all programs in the CHStone benchmark suite, which is a well-known HLS benchmark.}
+\end{itemize}
+
+The first section will describe the Verilog semantics that were used and extended to fit into CompCert's model.  The second section will then describe the HLS algorithm, together with it's proof.
+
+CoqUp is open source and is hosted on Github\footnote{https://github.com/ymherklotz/coqup}.
+
+
+
+\NR{Other comments:}
+\NR{Is both the translator and verifier written in Coq?}
+\NR{A tool-flow diagram here will be useful. I thought you had one previously?}
+\NR{Do you think we have a very simple example of a program where wrong Verilog is generated in VHLS or LegUp, but not in CoqUp?}
diff --git a/main.tex b/main.tex
index dfb86ea..50a01aa 100644
--- a/main.tex
+++ b/main.tex
@@ -176,239 +176,13 @@
 
 \maketitle
 
-\section{Introduction}
-
-%% Motivation for why HLS might be needed
-
-The current approach to writing energy-efficient and high-throughput applications is to use custom hardware which has been specialised for that application~\NR{You mean application-specific?}, instead of relying on and optimising for a CPU.\@  This comes at the cost of having to design the customised hardware, which, if using hardware description languages (HDL) such as Verilog can be tedious and quite error prone.  Especially with the size~\NR{size of programs or HDL designs?} growing over the years, it can become difficult to verify that the hardware design behaves in the expected way, as simulation of hardware description languages can be quite inefficient.  Furthermore, the algorithms that are being accelerated in hardware often already have a software implementation, meaning they have to be reimplemented efficiently in a hardware description language which can be time consuming.
-
-%% Definition and benefits of HLS
-\NR{What does "this" refer to in the first, second and fourth sentence? You had "this" in the previous paragraph too.}
-One possible solution to this is to use high-level synthesis (HLS), which is the process of generating custom hardware, represented in a hardware description language, based on a behavioural description, often a subset of C.  This elevates the level of abstraction, because the description of the algorithm in C is inherently untimed, meaning actions don't have to be scheduled into clock cycles.  The higher level of abstraction makes it easier to reason about the algorithms and therefore also makes them easier to maintain.  This already reduces the time taken to design the hardware, especially if a software description of the algorithm already exists, because it won't have to be designed again at a lower level and directly in hardware.  However, another benefit of using HLS to design the hardware, is that function verification of the design is much simpler and more efficient than if it was done at the HDL stage, as the whole software ecosystem can be used to do that.  Instead of having to run simulations of the hardware, the C code can just be compiled and executed natively, as the hardware design after HLS should have the same behaviour.
-\NR{The abstraction of HLS helps in two ways: improving productivity of hardware designers and reducing the entry barrier of hardware design for software programmers. Both these audiences benefit from better stand to benefit from the guarantees provided by verification and correct-by-construction C-to-Verilog generation.}
-
-%% Unreliability of HLS
-
-However, the fact that the behaviour is preserved after HLS cannot be guaranteed most existing tools,\YH{Mentor's catapult C can in some cases} meaning behavioural simulation of the hardware design still has to be performed. HLS tools are also known to be quite unreliable, for example, Intel's (formerly Altera's) OpenCL SDK compiler contained too many bugs to even be considered for random testing, as more than 25\% of the testcases failed~\cite{lidbury15_many_core_compil_fuzzin}.  In addition to that, designers often feel like HLS tools are quite unreliable and fragile with respect to which language features that are supported.\YH{Need citation}  As HLS tools are extremely complex and can therefore incorrectly change the behaviour of the design, it is not possible to guarantee that all the properties of the code that were proven in software will also hold for the generated hardware.
-
-%% Current work in formal verification of HLS
-\NR{This is a good paragraph, but we need to relate it more to this work and why this work is different.}
-\NR{Focus on more high-level of "why this work is interesting"? Two key points we want to get across to the reader is that in existing works: validation is neccessary every time a new program is compiled and the verifying algorithm is not verified.}
-\NR{Also define words like validation, verifying algorithm (can you use the word ``verifier'',mechanisation}
-\NR{Having said that, keep the text for related work section.}
-Therefore, work is being done to prove the equivalence between the generated hardware and the original behavioural description in C.  An example of a tool that implements this is Mentor's Catapult~\cite{mentor20_catap_high_level_synth}, which tries to match the states in the register transfer level (RTL) description to states in the original C code after an unverified translation.  This technique is called translation validation~\cite{pnueli98_trans}, whereby the translation that the HLS tool performed is proven to have been correct for that input, by showing that they behave in the same way for all possible inputs.  Using translation validation is quite effective for proving complex optimisations such as scheduling~\cite{kim04_autom_fsmd,karfa06_formal_verif_method_sched_high_synth,chouksey20_verif_sched_condit_behav_high_level_synth} or code motion~\cite{banerjee14_verif_code_motion_techn_using_value_propag,chouksey19_trans_valid_code_motion_trans_invol_loops}, however, the validation has to be run every time the high-level synthesis is performed.  In addition to that, the proofs are often not mechanised or directly related to the actual implementation, meaning the verifying algorithm might be wrong and could give false positives or false negatives.
-
-\NR{If we convey the right message in the above paragraph, then it follows up nicely with what CompCert provides us that these other tools do not. Then, we can get into how we use CompCert.}
-CompCert~\cite{leroy06_formal_certif_compil_back_end} is a C compiler that has been written and formally verified in the Coq theorem prover~\cite{bertot04_inter_theor_provin_progr_devel}.  First of all, most of the proofs in CompCert have been formally verified directly, meaning that once the compiler is built, the proofs can be erased as the algorithm has been shown to be correct independent of the input.  However, some optimisations in CompCert have also been proven using translation validation~\cite{tristan08_formal_verif_trans_valid}, in which case the proof still happens at runtime.  The difference is that in this case, the verifier itself is actually formally verified in Coq and is therefore proven that it will only ever say that the input and output are equivalent if that is actually the case.
-
-%% Contributions of paper
-
-In this paper we describe a fully verified high-level synthesis tool called CoqUp, which adds a Verilog backend to CompCert and proves that the behaviour of the C code does not change according to existing Verilog semantics.  The main contributions of the paper are the following:
-
-\begin{itemize}
-  \item First mechanised and formally verified HLS flow.
-  \item Proof by simulation mechanised in Coq between CompCert's intermediate language and Verilog.
-  \item Description of the Verilog semantics used to integrate it into CompCert's model with some improvements.
-  \item Able to run HLS benchmarks which are now known to be correct.
-\item \NR{We implement our Verilog semantics in CompCert and we are able to generate correct-by-construction Verilog for all programs in the CHStone benchmark suite, which is a well-known HLS benchmark.}
-\end{itemize}
-
-The first section will describe the Verilog semantics that were used and extended to fit into CompCert's model.  The second section will then describe the HLS algorithm, together with it's proof.
-
-CoqUp is open source and is hosted on Github\footnote{https://github.com/ymherklotz/coqup}.
-
-
-
-\NR{Other comments:}
-\NR{Is both the translator and verifier written in Coq?}
-\NR{A tool-flow diagram here will be useful. I thought you had one previously?}
-\NR{Do you think we have a very simple example of a program where wrong Verilog is generated in VHLS or LegUp, but not in CoqUp?}
-
-
-\section{Verilog}
-
-Verilog is a hardware description language commonly used to design hardware.  A Verilog design can then be synthesised into more basic logic which describes how different gates connect to each other, called a netlist.  This representation can then be put onto either a field-programmable gate array (FPGA) or turned into an application-specific integrated circuit (ASPIC) to implement the design that was described in Verilog.  The Verilog standard is quite large though, and not all Verilog features are needed to be able to describe hardware.  Many Verilog features are only useful for simulation and do not affect the actual hardware itself, which means that these features do not have to be modelled in the semantics.  In addition to that, as the HLS algorithm dictates which Verilog constructs are generated, meaning the Verilog subset that has to be modelled by the semantics can be reduced even further to only support the constructs that are needed.  Only supporting a smaller subset in the semantics also means that there is less chance that the standard is misunderstood, and that the semantics actually model how the Verilog is simulated.
-
-The Verilog semantics are based on the semantics proposed by \citet{loow19_verif_compil_verif_proces}, which were used to create a formal translation from HOL logic into a Verilog circuit.  These semantics are quite practical as they restrict themselves to a small subset of Verilog, which can nonetheless be used to model all hardware constructs one would want to design.  The main syntax for the Verilog subset is the following:
-
-\begin{align*}
-  v ::=&\; \mathit{sz} * n\\
-  e ::=&\; v\\[-2pt]
-  |&\; x\\[-2pt]
-  |&\; e [e]\\[-2pt]
-  |&\; e\ \mathit{op}\ e\\[-2pt]
-  |&\; \texttt{!} e\ |\ \texttt{~} e\\[-2pt]
-  |&\; e \texttt{ ? } e \texttt{ : } e\\
-  s ::=&\; s\ \texttt{;}\ s\ |\ \texttt{;}\\[-2pt]
-  |&\; \texttt{if } e \texttt{ then } s \texttt{ else } s\\[-2pt]
-  |&\; \texttt{case } e\ [e : s] \texttt{ endcase}\\[-2pt]
-  |&\; e \texttt{ = } e\\[-2pt]
-  |&\; e \texttt{ <= } e\\
-  d ::=&\; \texttt{[n-1:0] } r\ |\ \texttt{[n-1:0] } r \texttt{ [m-1:0]}\\
-  m ::=&\ \texttt{reg } d \texttt{;}\ |\ \texttt{input wire } d \texttt{;}\ |\ \texttt{output reg } d \texttt{;}\\
-|&\; \text{\tt always @(posedge clk)}\ s
-\end{align*}
-
-The main addition to the Verilog syntax is the explicit declaration of inputs and outputs, as well as variables and arrays.  This means that the declarations have to be handled explicitly in the semantics as well, adding the safety that all the registers are declared properly with the right size, as this affects how the Verilog module is synthesised and simulated.  In addition to that, literal values cannot be represented by a list of nested boolean values, instead they are represented by a size and its value.  Finally, the last difference is that the syntax supports memories in Verilog explicitly and we have separate semantics to handle nonblocking and blocking assignments to memory correctly.
-
-\subsection{Semantics}
-
-However, adjustments to these semantics had to be made to make it compatible with the CompCert small step semantics.  In addition to that, memory is not directly modelled in their semantics of Verilog, as well as handling of inputs and declarations in modules.
-
-\begin{equation}
-  \label{eq:10}
-  s = (\Gamma, \Delta)
-\end{equation}
-
-Definition of stmntrun. \JW{stmtntrun has four parameters, and if I read the rules correctly, it looks like the fourth parameter is uniquely determined by the first three. So, you could consider presenting stmntrun simply as a recursive definition, e.g. `stmntrun f s Vskip = s', `stmntrun f s0 (Vseq st1 st2) = stmntrun f (stmntrun f s0 st1) st2', and so on. That might (\emph{might}) be easier to read than the inference rule format.}
-
-\YH{It works well for Seq and Skip, but I think that CaseMatch and CaseNoMatch, or even if statements will be a bit clunky? We would have to use case statements in the function to do different things
-based on what they evaluate to. For case I think that would end up being a three way choice. I'll try it out though and see how nice it is.}
-
-\JP{I suppose this would essentially be an ``interpreter'' style semantics but we can prove equivalence pretty easily.}
-
-\YH{To add to that, I used to have both in the Coq code, but commented the recursive definition out, and now only have the inductive definition, which is basically what I copy pasted here.} \JW{Fair enough. Whatever you think ends up being the easiest to read and understand, really. There's something to be said for staying close to the Coq definitions anyway.} \YH{I have added more rules, we can always switch from one to the other now.  One more thing I noticed though is that recursive definitions will need an \texttt{option} type.} \JW{Oh, then my suggestion of `stmntrun f s0 (Vseq st1 st2) = stmntrun f (stmntrun f s0 st1) st2' is a bit ill-typed then, unless the second parameter becomes option-typed too. Maybe the inference rules are better overall then.} \YH{Ah yes, I actually didn't even notice that, it would need the do notation, just like the implementation in Coq, so it may be easier to just use the inference rules.}
-
-\begin{equation}
-  \label{eq:1}
-  \inferrule[Skip]{ }{\text{srun}\ f\ s\ \mathtt{Vskip} = s}
-\end{equation}
-
-\begin{equation}
-  \label{eq:4}
-  \inferrule[Seq]{\text{srun}\ f\ s_{0}\ \mathit{st}_{1}\ s_{1} \\ \text{srun}\ f\ s_{1}\ \mathit{st}_{2}\ s_{2}}{\text{srun}\ f\ s_{0}\ (\mathtt{Vseq}\ \mathit{st}_{1}\ \mathit{st}_{2})\ s_{2}}
-\end{equation}
-
-\begin{equation}
-  \label{eq:2}
-  \inferrule[CondTrue]{\text{erun}\ f\ \Gamma_{0}\ c\ v_{c} \\ \text{valToB}\ v_{c} = \mathtt{true} \\ \text{srun}\ f\ s_{0}\ \mathit{stt}\ s_{1}}{\text{srun}\ f\ s_{0}\ (\mathtt{Vcond}\ c\ \mathit{stt}\ \mathit{stf})\ s_{1}}
-\end{equation}
-
-\begin{equation}
-  \label{eq:3}
-  \inferrule[CondFalse]{\text{erun}\ f\ \Gamma_{0}\ c\ v_{c} \\ \text{valToB}\ v_{c} = \mathtt{false} \\ \text{srun}\ f\ s_{0}\ \mathit{stf}\ s_{1}}{\text{srun}\ f\ s_{0}\ (\mathtt{Vcond}\ c\ \mathit{stt}\ \mathit{stf})\ s_{1}}
-\end{equation}
-
-\begin{equation}
-  \label{eq:5}
-  \inferrule[CaseNoMatch]{\text{srun}\ f\ s_{0}\ (\mathtt{Vcase}\ e\ cs\ \mathit{def})\ s_{1} \\ \text{erun}\ f\ \Gamma_{0}\ me\ mve \\ \text{erun}\ f\ \Gamma_{0}\ e\ ve \\ mve \neq ve}{\text{srun}\ f\ s_{0}\ (\mathtt{Vcase}\ e\ ((me,\ sc) :: cs)\ \mathit{def})\ s_{1}}
-\end{equation}
-
-\begin{equation}
-  \label{eq:6}
-  \inferrule[CaseMatch]{\text{srun}\ f\ s_{0}\ sc\ s_{1} \\ \text{erun}\ f\ \Gamma_{0}\ e\ ve \\ \text{erun}\ f\ \Gamma_{0}\ me\ mve \\ mve = ve}{\text{srun}\ f\ s_{0}\ (\mathtt{Vcase}\ e\ ((me,\ sc) :: cs)\ \mathit{def})\ s_{1}}
-\end{equation}
-
-\begin{equation}
-  \label{eq:7}
-  \inferrule[CaseDefault]{\text{srun}\ f\ s_{0}\ st\ s_{1}}{\text{srun}\ f\ s_{0}\ (\mathtt{Vcase}\ e\ []\ (\mathtt{Some}\ st))\ s_{1}}
-\end{equation}
-
-\begin{equation}
-  \label{eq:8}
-  \inferrule[Blocking]{\text{name}\ \mathit{lhs} = \mathtt{OK}\ n \\ \text{erun}\ f\ \Gamma\ \mathit{rhs}\ v_{\mathit{rhs}}}{\text{srun}\ f\ (\Gamma, \Delta)\ (\mathtt{Vblock}\ \mathit{lhs}\ \mathit{rhs})\ (\Gamma ! n \rightarrow v_{\mathit{rhs}}, \Delta)}
-\end{equation}
-
-\begin{equation}
-  \label{eq:9}
-  \inferrule[Nonblocking]{\text{name}\ \mathit{lhs} = \mathtt{OK}\ n \\ \text{erun}\ f\ \Gamma\ \mathit{rhs}\ v_{\mathit{rhs}}}{\text{srun}\ f\ (\Gamma, \Delta)\ (\mathtt{Vnonblock}\ \mathit{lhs}\ \mathit{rhs}) (\Gamma, \Delta ! n \rightarrow v_{\mathit{rhs}})}
-\end{equation}
-
-\section{Turning CompCert into an HLS tool}
-
-%% Should maybe go in the introduction instead.
-
-\begin{figure}
-  \centering
-  \begin{tikzpicture}
-    [language/.style={fill=white,rounded corners=2pt}]
-    \fill[compcert,rounded corners=3pt] (-1,-1) rectangle (9,1.5);
-    \fill[formalhls,rounded corners=3pt] (-1,-1.5) rectangle (9,-2.5);
-    \node[language] at (0,0) (clight) {Clight};
-    \node[language] at (2,0) (cminor) {C\#minor};
-    \node[language] at (4,0) (rtl) {RTL};
-    \node[language] at (6,0) (ltl) {LTL};
-    \node[language] at (8,0) (ppc) {PPC};
-    \node[language] at (4,-2) (dfgstmd) {HTL};
-    \node[language] at (7,-2) (verilog) {Verilog};
-    \node at (0,1) {CompCert};
-    \node at (0,-2) {CoqUp};
-    \draw[->] (clight) -- (cminor);
-    \draw[->,dashed] (cminor) -- (rtl);
-    \draw[->] (rtl) -- (ltl);
-    \draw[->,dashed] (ltl) -- (ppc);
-    \draw[->] (rtl) -- (dfgstmd);
-    \draw[->] (dfgstmd) -- (verilog);
-  \end{tikzpicture}
-  \caption{Verilog backend branching off at the RTL stage.}\label{fig:rtlbranch}
-\end{figure}
-
-This section covers the main architecture of the HLS tool, and how the backend was added to CompCert.  CompCert is made up of 11 intermediate languages in between the Clight input and the assembly output.  These intermediate languages each serve a different purpose and contain various different optimisations.  When designing a new backend for CompCert, it is therefore crucial to know where to branch off and start the hardware generation.  Many of the optimisations that the CompCert backend performs are not necessary when generating custom hardware and not relying on a CPU anymore, such as register allocation or even scheduling.  It is therefore important to find the right intermediate language so that the HLS tool still benefits from many of the generic optimisations that CompCert performs, but does not receive the code transformations that are specific to CPU architectures.
-
-Existing HLS compilers usually use LLVM IR as an intermediate representation when performing HLS specific optimisations, as each instruction can be mapped quite well to hardware which performs the same behaviour.  CompCert's RTL is the intermediate language that resembles LLVM IR the most, as it also has an infinite number of pseudo-registers and each instruction maps well to hardware.  In addition to that, many optimisations that are also useful for HLS are performed in RTL, which means that if it is supported as the input language, the HLS algorithm benefits from the same optimisations.  It is therefore a good candidate to be chosen as the input language to the HLS backend.  The complete flow that CoqUp takes is show in figure~\ref{fig:rtlbranch}.
-
-%%TODO: Maybe add why LTL and the other smaller languages are not that well suited
-
-\subsection{CompCert RTL}
-
-All CompCert intermediate language follow the similar structure below:
-
-\begin{align*}
-  \mathit{program} \quad ::= \{ &\mathbf{variables} : (\mathit{id} * \mathit{data}), \\
-                 &\mathbf{functions} : (\mathit{id} * \mathit{function\_def}),\\
-                 &\mathbf{main} : \mathit{id} \}
-\end{align*}
-
-\noindent where function definitions can either be internal or external.  External functions are functions that are not defined in the current translation unit, and are therefore not part of the current translation.  The difference in between the CompCert intermediate languages is therefore how the internal function is defined, as that defines the structure of the language itself.
-
-%% Describe RTL
-RTL function definitions are a sequence of instructions encoded in a control-flow graph, with each instruction linking to the next instruction that should be executed.
-
-%%TODO: Finish this section and describe the syntax and semantics of RTL.
-
-\subsection{HTL}
-
-RTL is first translated to an intermediate language called hardware transfer language (HTL), which is a finite state machine with datapath (FSMD) representation of the RTL code.  HTL, like all CompCert intermediate languages, has the same program structure as RTL, but internal functions now contain logic to control the order of execution, and a datapath that transforms the data in the registers.  This is represented by having two maps that link states to the control logic and to the current position in the datapath, which are both expressed using Verilog statements.  The syntax for HTL functions are the following:
-
-\begin{align*}
-  \mathit{g} \quad &::= \quad n \mapsto s\\
-  \mathit{d_{r}} \quad &::= \quad r \mapsto (io? * n)\\
-  \mathit{d_{a}} \quad &::= \quad r \mapsto (io? * n * n)\\
-  \mathit{F} \quad &::= \quad \big\{\ \mathtt{params} : \vec{r}\\
-                                     &\mathtt{datapath} : g\\
-                                     &\mathtt{controllogic} : g\\
-                                     &\mathtt{entrypoint} : n\\
-                                     &\mathtt{st, stk, finish, return, start, reset, clk} : r\\
-                                     &\mathtt{scldecls} : d_{r}\\
-                                     &\mathtt{arrdecls} : d_{a}\ \big\}
-\end{align*}
-
-\subsection{HLS Algorithm}
-
-\section{Proof}
-
-\subsection{Coq Mechanisation}
-
-
+\input{introduction}
+\input{verilog}
+\input{algorithm}
+\input{proof}
 \input{evaluation}
-
-\section{Related Work}
-
-\JW{Some papers to cite somewhere:
-\begin{itemize}
-    \item \citet{kundu08_valid_high_level_synth} have done translation validation on an HLS tool called SPARK. They don't have \emph{very} strong evidence that HLS tools are buggy; they just say ``HLS tools are large and complex software systems, often with hundreds of
-thousands of lines of code, and as with any software of this scale, they are prone
-to logical and implementation errors''. They did, however, find two bugs in SPARK as a result of their efforts, so that provides a small bit of evidence that HLS tools are buggy. \@
-    \item \citet{chapman92_verif_bedroc} have proved (manually, as far as JW can tell) that the front-end and scheduler of the BEDROC synthesis system is correct. (NB:\@ Chapman also wrote a PhD thesis about this (1994) but it doesn't appear to be online.)
-    \item \citet{ellis08} wrote a PhD thesis that was supervised by Cliff Jones whom you met at the VeTSS workshop thing last year. At a high level, it's about verifying a high-level synthesis tool using Isabelle, so very relevant to this project, though scanning through the pdf, it's quite hard to ascertain exactly what the contributions are. Strange that they never published a paper about the work -- it makes it very hard to find!
-\end{itemize}
-}
-
-\YH{Amazing, thank you, yes it seems like Kundu \textit{et al.} have quite a few papers on formal verification of HLS algorithms using translation validation, as well as~\citet{karfa06_formal_verif_method_sched_high_synth}, all using the SPARK~\cite{gupta03_spark} synthesis tool.  And yes thank you, will definitely cite that paper.  There seem to be similar early proofs of high-level synthesis like \citet{hwang91_formal_approac_to_sched_probl} or \citet{grass94_high}.  There are also the Occam papers that I need to mention too, like \citet{page91_compil_occam}, \citet{jifeng93_towar}, \citet{perna11_correc_hardw_synth} and \citet{perna12_mechan_wire_wise_verif_handel_c_synth}.}
-\JW{Well it's a good job there's no page limit on bibliographies these days then! I'll keep an eye out for more papers we could cite when I get a free moment. (No need to cite \emph{everything} of course.)}
-\YH{Yes that's true, there are too many to cite! And great thank you, that would help a lot, I have quite a few papers I still want to cite, but have to think about where they will fit in.}
-
-\section{Conclusion}
+\input{related}
+\input{conclusion}
 
 %% Acknowledgments
 \begin{acks}                            %% acks environment is optional
diff --git a/proof.tex b/proof.tex
new file mode 100644
index 0000000..2b571d9
--- /dev/null
+++ b/proof.tex
@@ -0,0 +1,3 @@
+\section{Proof}
+
+\subsection{Coq Mechanisation}
diff --git a/related.tex b/related.tex
new file mode 100644
index 0000000..335aa8d
--- /dev/null
+++ b/related.tex
@@ -0,0 +1,15 @@
+\section{Related Work}
+
+\JW{Some papers to cite somewhere:
+\begin{itemize}
+    \item \citet{kundu08_valid_high_level_synth} have done translation validation on an HLS tool called SPARK. They don't have \emph{very} strong evidence that HLS tools are buggy; they just say ``HLS tools are large and complex software systems, often with hundreds of
+thousands of lines of code, and as with any software of this scale, they are prone
+to logical and implementation errors''. They did, however, find two bugs in SPARK as a result of their efforts, so that provides a small bit of evidence that HLS tools are buggy. \@
+    \item \citet{chapman92_verif_bedroc} have proved (manually, as far as JW can tell) that the front-end and scheduler of the BEDROC synthesis system is correct. (NB:\@ Chapman also wrote a PhD thesis about this (1994) but it doesn't appear to be online.)
+    \item \citet{ellis08} wrote a PhD thesis that was supervised by Cliff Jones whom you met at the VeTSS workshop thing last year. At a high level, it's about verifying a high-level synthesis tool using Isabelle, so very relevant to this project, though scanning through the pdf, it's quite hard to ascertain exactly what the contributions are. Strange that they never published a paper about the work -- it makes it very hard to find!
+\end{itemize}
+}
+
+\YH{Amazing, thank you, yes it seems like Kundu \textit{et al.} have quite a few papers on formal verification of HLS algorithms using translation validation, as well as~\citet{karfa06_formal_verif_method_sched_high_synth}, all using the SPARK~\cite{gupta03_spark} synthesis tool.  And yes thank you, will definitely cite that paper.  There seem to be similar early proofs of high-level synthesis like \citet{hwang91_formal_approac_to_sched_probl} or \citet{grass94_high}.  There are also the Occam papers that I need to mention too, like \citet{page91_compil_occam}, \citet{jifeng93_towar}, \citet{perna11_correc_hardw_synth} and \citet{perna12_mechan_wire_wise_verif_handel_c_synth}.}
+\JW{Well it's a good job there's no page limit on bibliographies these days then! I'll keep an eye out for more papers we could cite when I get a free moment. (No need to cite \emph{everything} of course.)}
+\YH{Yes that's true, there are too many to cite! And great thank you, that would help a lot, I have quite a few papers I still want to cite, but have to think about where they will fit in.}
diff --git a/verilog.tex b/verilog.tex
new file mode 100644
index 0000000..dca4d65
--- /dev/null
+++ b/verilog.tex
@@ -0,0 +1,88 @@
+\section{Verilog}
+
+Verilog is a hardware description language commonly used to design hardware.  A Verilog design can then be synthesised into more basic logic which describes how different gates connect to each other, called a netlist.  This representation can then be put onto either a field-programmable gate array (FPGA) or turned into an application-specific integrated circuit (ASPIC) to implement the design that was described in Verilog.  The Verilog standard is quite large though, and not all Verilog features are needed to be able to describe hardware.  Many Verilog features are only useful for simulation and do not affect the actual hardware itself, which means that these features do not have to be modelled in the semantics.  In addition to that, as the HLS algorithm dictates which Verilog constructs are generated, meaning the Verilog subset that has to be modelled by the semantics can be reduced even further to only support the constructs that are needed.  Only supporting a smaller subset in the semantics also means that there is less chance that the standard is misunderstood, and that the semantics actually model how the Verilog is simulated.
+
+The Verilog semantics are based on the semantics proposed by \citet{loow19_verif_compil_verif_proces}, which were used to create a formal translation from HOL logic into a Verilog circuit.  These semantics are quite practical as they restrict themselves to a small subset of Verilog, which can nonetheless be used to model all hardware constructs one would want to design.  The main syntax for the Verilog subset is the following:
+
+\begin{align*}
+  v ::=&\; \mathit{sz} * n\\
+  e ::=&\; v\\[-2pt]
+  |&\; x\\[-2pt]
+  |&\; e [e]\\[-2pt]
+  |&\; e\ \mathit{op}\ e\\[-2pt]
+  |&\; \texttt{!} e\ |\ \texttt{~} e\\[-2pt]
+  |&\; e \texttt{ ? } e \texttt{ : } e\\
+  s ::=&\; s\ \texttt{;}\ s\ |\ \texttt{;}\\[-2pt]
+  |&\; \texttt{if } e \texttt{ then } s \texttt{ else } s\\[-2pt]
+  |&\; \texttt{case } e\ [e : s] \texttt{ endcase}\\[-2pt]
+  |&\; e \texttt{ = } e\\[-2pt]
+  |&\; e \texttt{ <= } e\\
+  d ::=&\; \texttt{[n-1:0] } r\ |\ \texttt{[n-1:0] } r \texttt{ [m-1:0]}\\
+  m ::=&\ \texttt{reg } d \texttt{;}\ |\ \texttt{input wire } d \texttt{;}\ |\ \texttt{output reg } d \texttt{;}\\
+|&\; \text{\tt always @(posedge clk)}\ s
+\end{align*}
+
+The main addition to the Verilog syntax is the explicit declaration of inputs and outputs, as well as variables and arrays.  This means that the declarations have to be handled explicitly in the semantics as well, adding the safety that all the registers are declared properly with the right size, as this affects how the Verilog module is synthesised and simulated.  In addition to that, literal values cannot be represented by a list of nested boolean values, instead they are represented by a size and its value.  Finally, the last difference is that the syntax supports memories in Verilog explicitly and we have separate semantics to handle nonblocking and blocking assignments to memory correctly.
+
+\subsection{Semantics}
+
+However, adjustments to these semantics had to be made to make it compatible with the CompCert small step semantics.  In addition to that, memory is not directly modelled in their semantics of Verilog, as well as handling of inputs and declarations in modules.
+
+\begin{equation}
+  \label{eq:10}
+  s = (\Gamma, \Delta)
+\end{equation}
+
+Definition of stmntrun. \JW{stmtntrun has four parameters, and if I read the rules correctly, it looks like the fourth parameter is uniquely determined by the first three. So, you could consider presenting stmntrun simply as a recursive definition, e.g. `stmntrun f s Vskip = s', `stmntrun f s0 (Vseq st1 st2) = stmntrun f (stmntrun f s0 st1) st2', and so on. That might (\emph{might}) be easier to read than the inference rule format.}
+
+\YH{It works well for Seq and Skip, but I think that CaseMatch and CaseNoMatch, or even if statements will be a bit clunky? We would have to use case statements in the function to do different things
+based on what they evaluate to. For case I think that would end up being a three way choice. I'll try it out though and see how nice it is.}
+
+\JP{I suppose this would essentially be an ``interpreter'' style semantics but we can prove equivalence pretty easily.}
+
+\YH{To add to that, I used to have both in the Coq code, but commented the recursive definition out, and now only have the inductive definition, which is basically what I copy pasted here.} \JW{Fair enough. Whatever you think ends up being the easiest to read and understand, really. There's something to be said for staying close to the Coq definitions anyway.} \YH{I have added more rules, we can always switch from one to the other now.  One more thing I noticed though is that recursive definitions will need an \texttt{option} type.} \JW{Oh, then my suggestion of `stmntrun f s0 (Vseq st1 st2) = stmntrun f (stmntrun f s0 st1) st2' is a bit ill-typed then, unless the second parameter becomes option-typed too. Maybe the inference rules are better overall then.} \YH{Ah yes, I actually didn't even notice that, it would need the do notation, just like the implementation in Coq, so it may be easier to just use the inference rules.}
+
+\begin{equation}
+  \label{eq:1}
+  \inferrule[Skip]{ }{\text{srun}\ f\ s\ \mathtt{Vskip} = s}
+\end{equation}
+
+\begin{equation}
+  \label{eq:4}
+  \inferrule[Seq]{\text{srun}\ f\ s_{0}\ \mathit{st}_{1}\ s_{1} \\ \text{srun}\ f\ s_{1}\ \mathit{st}_{2}\ s_{2}}{\text{srun}\ f\ s_{0}\ (\mathtt{Vseq}\ \mathit{st}_{1}\ \mathit{st}_{2})\ s_{2}}
+\end{equation}
+
+\begin{equation}
+  \label{eq:2}
+  \inferrule[CondTrue]{\text{erun}\ f\ \Gamma_{0}\ c\ v_{c} \\ \text{valToB}\ v_{c} = \mathtt{true} \\ \text{srun}\ f\ s_{0}\ \mathit{stt}\ s_{1}}{\text{srun}\ f\ s_{0}\ (\mathtt{Vcond}\ c\ \mathit{stt}\ \mathit{stf})\ s_{1}}
+\end{equation}
+
+\begin{equation}
+  \label{eq:3}
+  \inferrule[CondFalse]{\text{erun}\ f\ \Gamma_{0}\ c\ v_{c} \\ \text{valToB}\ v_{c} = \mathtt{false} \\ \text{srun}\ f\ s_{0}\ \mathit{stf}\ s_{1}}{\text{srun}\ f\ s_{0}\ (\mathtt{Vcond}\ c\ \mathit{stt}\ \mathit{stf})\ s_{1}}
+\end{equation}
+
+\begin{equation}
+  \label{eq:5}
+  \inferrule[CaseNoMatch]{\text{srun}\ f\ s_{0}\ (\mathtt{Vcase}\ e\ cs\ \mathit{def})\ s_{1} \\ \text{erun}\ f\ \Gamma_{0}\ me\ mve \\ \text{erun}\ f\ \Gamma_{0}\ e\ ve \\ mve \neq ve}{\text{srun}\ f\ s_{0}\ (\mathtt{Vcase}\ e\ ((me,\ sc) :: cs)\ \mathit{def})\ s_{1}}
+\end{equation}
+
+\begin{equation}
+  \label{eq:6}
+  \inferrule[CaseMatch]{\text{srun}\ f\ s_{0}\ sc\ s_{1} \\ \text{erun}\ f\ \Gamma_{0}\ e\ ve \\ \text{erun}\ f\ \Gamma_{0}\ me\ mve \\ mve = ve}{\text{srun}\ f\ s_{0}\ (\mathtt{Vcase}\ e\ ((me,\ sc) :: cs)\ \mathit{def})\ s_{1}}
+\end{equation}
+
+\begin{equation}
+  \label{eq:7}
+  \inferrule[CaseDefault]{\text{srun}\ f\ s_{0}\ st\ s_{1}}{\text{srun}\ f\ s_{0}\ (\mathtt{Vcase}\ e\ []\ (\mathtt{Some}\ st))\ s_{1}}
+\end{equation}
+
+\begin{equation}
+  \label{eq:8}
+  \inferrule[Blocking]{\text{name}\ \mathit{lhs} = \mathtt{OK}\ n \\ \text{erun}\ f\ \Gamma\ \mathit{rhs}\ v_{\mathit{rhs}}}{\text{srun}\ f\ (\Gamma, \Delta)\ (\mathtt{Vblock}\ \mathit{lhs}\ \mathit{rhs})\ (\Gamma ! n \rightarrow v_{\mathit{rhs}}, \Delta)}
+\end{equation}
+
+\begin{equation}
+  \label{eq:9}
+  \inferrule[Nonblocking]{\text{name}\ \mathit{lhs} = \mathtt{OK}\ n \\ \text{erun}\ f\ \Gamma\ \mathit{rhs}\ v_{\mathit{rhs}}}{\text{srun}\ f\ (\Gamma, \Delta)\ (\mathtt{Vnonblock}\ \mathit{lhs}\ \mathit{rhs}) (\Gamma, \Delta ! n \rightarrow v_{\mathit{rhs}})}
+\end{equation}