summaryrefslogtreecommitdiffstats
path: root/main.tex
blob: 6d3b4e8aeaad572843653730b5740617246c0286 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
%% For double-blind review submission, w/o CCS and ACM Reference (max submission space)
\documentclass[pagebackref=true,acmsmall,review]{acmart}\settopmatter{printfolios=true,printccs=false,printacmref=false}
%% For double-blind review submission, w/ CCS and ACM Reference
%\documentclass[acmsmall,review,anonymous]{acmart}\settopmatter{printfolios=true}
%% For single-blind review submission, w/o CCS and ACM Reference (max submission space)
%\documentclass[acmsmall,review]{acmart}\settopmatter{printfolios=true,printccs=false,printacmref=false}
%% For single-blind review submission, w/ CCS and ACM Reference
%\documentclass[acmsmall,review]{acmart}\settopmatter{printfolios=true}
%% For final camera-ready submission, w/ required CCS and ACM Reference
%\documentclass[acmsmall]{acmart}\settopmatter{}

%% Journal information
%% Supplied to authors by publisher for camera-ready submission;
%% use defaults for review submission.
\acmJournal{PACMPL}
\acmVolume{1}
\acmNumber{CONF} % CONF = POPL or ICFP or OOPSLA
\acmArticle{1}
\acmYear{2018}
\acmMonth{1}\acmDOI{}
\startPage{1}

%% Copyright information
%% Supplied to authors (based on authors' rights management selection;
%% see authors.acm.org) by publisher for camera-ready submission;
%% use 'none' for review submission.
\setcopyright{none}
%\setcopyright{acmcopyright}
%\setcopyright{acmlicensed}
%\setcopyright{rightsretained}
%\copyrightyear{2018}

%% Bibliography style
\bibliographystyle{ACM-Reference-Format}
%% Citation style
\citestyle{acmauthoryear}

\renewcommand*{\backrefalt}[4]{%
    \ifcase #1%
          \or [Page~#2.]%
          \else [Pages~#2.]%
    \fi%
    }

\usepackage{amsmath}
\usepackage{booktabs}
\usepackage{mathpartir}
\usepackage{subcaption}

\newif\ifCOMMENTS
\COMMENTStrue
\newcommand{\Comment}[3]{\ifCOMMENTS\textcolor{#1}{{\bf [\![#2:} #3{\bf ]\!]}}\fi}
\newcommand\JW[1]{\Comment{red!75!black}{JW}{#1}}
\newcommand\YH[1]{\Comment{green!50!blue}{YH}{#1}}
\newcommand\JP[1]{\Comment{blue!50!black}{JP}{#1}}

\begin{document}

%% Title information
\title[Formally Verified HLS]{Formally Verified High-Level Synthesis}         %% [Short Title] is optional;
                                        %% when present, will be used in
                                        %% header instead of Full Title.
%\titlenote{with title note}             %% \titlenote is optional;
                                        %% can be repeated if necessary;
                                        %% contents suppressed with 'anonymous'
%\subtitle{Subtitle}                     %% \subtitle is optional
%\subtitlenote{with subtitle note}       %% \subtitlenote is optional;
                                        %% can be repeated if necessary;
                                        %% contents suppressed with 'anonymous'


%% Author information
%% Contents and number of authors suppressed with 'anonymous'.
%% Each author should be introduced by \author, followed by
%% \authornote (optional), \orcid (optional), \affiliation, and
%% \email.
%% An author may have multiple affiliations and/or emails; repeat the
%% appropriate command.
%% Many elements are not rendered, but should be provided for metadata
%% extraction tools.

%% Author with single affiliation.
\author{Yann Herklotz}
\orcid{0000-0002-2329-1029}
\affiliation{
%  \position{PhD Student}
%  \department{EEE}
  \institution{Imperial College London}
%  \streetaddress{South Kensington Campus}
%  \city{London}
%  \state{}
%  \postcode{SW7 2AZ}
  \country{UK}
}
\email{yann.herklotz15@imperial.ac.uk}

\author{James Pollard}
\orcid{0000-0003-1404-1527}
\affiliation{
%  \position{}
%  \department{EEE}
  \institution{Imperial College London}
%  \streetaddress{South Kensington Campus}
%  \city{London}
%  \state{}
%  \postcode{SW7 2AZ}
  \country{UK}
}
\email{james.pollard16@imperial.ac.uk}

\author{Nadesh Ramanathan}
\orcid{0000-0000-0000-0000}
\affiliation{
%  \position{}
%  \department{EEE}
  \institution{Imperial College London}
%  \streetaddress{South Kensington Campus}
%  \city{London}
%  \state{}
%  \postcode{SW7 2AZ}
  \country{UK}
}
\email{n.ramanathan14@imperial.ac.uk}

\author{John Wickerson}
\orcid{0000-0000-0000-0000}
\affiliation{
%  \position{}
%  \department{EEE}
  \institution{Imperial College London}
%  \streetaddress{South Kensington Campus}
%  \city{London}
%  \state{}
%  \postcode{SW7 2AZ}
  \country{UK}
}
\email{j.wickerson@imperial.ac.uk}


%% Abstract
\begin{abstract}
  High-level synthesis (HLS) is the process of converting an algorithmic description, often written in C, to specialised hardware with the same behaviour, expressed in a hardware description language.  HLS is becoming increasingly popular, as it allows for faster prototyping of hardware designs, as well as simpler behavioural verification using all the tools available in the software ecosystem.  However, for software verification to be useful, the HLS process has to be trusted to be sure that the properties proven in software also apply to the hardware.  Current HLS tools do not provide this guarantee, meaning verification is often performed again at the hardware level, which is inefficient.  Our work focuses on formally verifying the high-level synthesis process from C to Verilog by proving that the behaviour remains the same according to the C and Verilog semantics.  We use the CompCert frontend to process Clight and add a Verilog backend to get a fully verified HLS tool.
\end{abstract}


%% 2012 ACM Computing Classification System (CSS) concepts
%% Generate at 'http://dl.acm.org/ccs/ccs.cfm'.
\begin{CCSXML}
<ccs2012>
<concept>
<concept_id>10011007.10011006.10011008</concept_id>
<concept_desc>Software and its engineering~General programming languages</concept_desc>
<concept_significance>500</concept_significance>
</concept>
<concept>
<concept_id>10003456.10003457.10003521.10003525</concept_id>
<concept_desc>Social and professional topics~History of programming languages</concept_desc>
<concept_significance>300</concept_significance>
</concept>
</ccs2012>
\end{CCSXML}

\ccsdesc[500]{Software and its engineering~General programming languages}
\ccsdesc[300]{Social and professional topics~History of programming languages}
%% End of generated code


%% Keywords
%% comma separated list
\keywords{CompCert, Coq, high-level synthesis}

\maketitle

\section{Introduction}

%% Motivation for why HLS might be needed

The current approach to writing energy-efficient and high-throughput applications is to use custom hardware which has been specialised for that application, instead of relying on and optimising for a CPU.\@  This comes at the cost of having to design the customised hardware, which, if using hardware description languages (HDL) such as Verilog can be tedious and quite error prone.  Especially with the size growing over the years, it can become difficult to verify that the hardware design behaves in the expected way, as simulation of hardware description languages can be quite inefficient.  Furthermore, the algorithms that are being accelerated in hardware often already have a software implementation, meaning they have to be reimplemented efficiently in a hardware description language which can be time consuming.

%% Definition and benefits of HLS

One possible solution to this is to use high-level synthesis (HLS), which is the process of generating custom hardware, represented in a hardware description language, based on a behavioural description, often a subset of C.  This elevates the level of abstraction, because the description of the algorithm in C is inherently untimed, meaning actions don't have to be scheduled into clock cycles.  The higher level of abstraction makes it easier to reason about the algorithms and therefore also makes them easier to maintain.  This already reduces the time taken to design the hardware, especially if a software description of the algorithm already exists, because it won't have to be designed again at a lower level and directly in hardware.  However, another benefit of using HLS to design the hardware, is that function verification of the design is much simpler and more efficient than if it was done at the HDL stage, as the whole software ecosystem can be used to do that.  Instead of having to run simulations of the hardware, the C code can just be compiled and executed natively, as the hardware design after HLS should have the same behaviour.

%% Unreliability of HLS

However, the fact that the behaviour is preserved after HLS cannot be guaranteed most existing tools,\YH{Mentor's catapult C can in some cases} meaning behavioural simulation of the hardware design still has to be performed.  HLS tools are also known to be quite unreliable, for example, Intel's (formerly Altera's) OpenCL SDK compiler contained too many bugs to even be considered for random testing, as more than 25\% of the testcases failed~\cite{lidbury15_many_core_compil_fuzzin}.  In addition to that, designers often feel like HLS tools are quite unreliable and fragile with respect to which language features that are supported.\YH{Need citation}  As HLS tools are extremely complex and can therefore incorrectly change the behaviour of the design, it is not possible to guarantee that all the properties of the code that were proven in software will also hold for the generated hardware.

%% Current work in formal verification of HLS

Therefore, work is being done to prove the equivalence between the generated hardware and the original behavioural description in C.  An example of a tool that implements this is Mentor's Catapult~\cite{mentor20_catap_high_level_synth}, which tries to match the states in the register transfer level (RTL) description to states in the original C code after an unverified translation.  This technique is called translation validation~\cite{pnueli98_trans}, whereby the translation that the HLS tool performed is proven to have been correct for that input, by showing that they behave in the same way for all possible inputs.  Using translation validation is quite effective for proving complex optimisations such as scheduling~\cite{kim04_autom_fsmd,karfa06_formal_verif_method_sched_high_synth,chouksey20_verif_sched_condit_behav_high_level_synth} or code motion~\cite{banerjee14_verif_code_motion_techn_using_value_propag,chouksey19_trans_valid_code_motion_trans_invol_loops}, however, the validation has to be run every time the high-level synthesis is performed.  In addition to that, the proofs are often not mechanised or directly related to the actual implementation, meaning the verifying algorithm might be wrong and could give false positives or false negatives.

CompCert~\cite{leroy06_formal_certif_compil_back_end} is a C compiler that has been written and formally verified in the Coq theorem prover~\cite{bertot04_inter_theor_provin_progr_devel}.  First of all, most of the proofs in CompCert have been formally verified directly, meaning that once the compiler is built, the proofs can be erased as the algorithm has been shown to be correct independent of the input.  However, some optimisations in CompCert have also been proven using translation validation~\cite{tristan08_formal_verif_trans_valid}, in which case the proof still happens at runtime.  The difference is that in this case, the verifier itself is actually formally verified in Coq and is therefore proven that it will only ever say that the input and output are equivalent if that is actually the case.

%% Contributions of paper

In this paper we describe a fully verified high-level synthesis tool called CoqUp, which adds a Verilog backend to CompCert and proves that the behaviour of the C code does not change according to existing Verilog semantics.  The main contributions of the paper are the following:

\begin{itemize}
  \item first mechanised and formally verified HLS flow
  \item Proof by simulation mechanised in Coq between CompCert's intermediate language and Verilog.
  \item Description of the Verilog semantics used to integrate it into CompCert's model.
  \item able to run HLS benchmarks which are now known to be correct.
\end{itemize}

CoqUp is open source and is hosted on Github\footnote{https://github.com/ymherklotz/coqup}.

\section{Background}

\subsection{Verilog}

\subsection{CompCert / Coq / Proof by simulation}

\subsection{HLS}

\JW{Some papers to cite somewhere:
\begin{itemize}
    \item \citet{kundu08_valid_high_level_synth} have done translation validation on an HLS tool called SPARK. They don't have \emph{very} strong evidence that HLS tools are buggy; they just say ``HLS tools are large and complex software systems, often with hundreds of
thousands of lines of code, and as with any software of this scale, they are prone
to logical and implementation errors''. They did, however, find two bugs in SPARK as a result of their efforts, so that provides a small bit of evidence that HLS tools are buggy. \@
    \item \citet{chapman92_verif_bedroc} have proved (manually, as far as JW can tell) that the front-end and scheduler of the BEDROC synthesis system is correct. (NB:\@ Chapman also wrote a PhD thesis about this (1994) but it doesn't appear to be online.)
    \item \citet{ellis08} wrote a PhD thesis that was supervised by Cliff Jones whom you met at the VeTSS workshop thing last year. At a high level, it's about verifying a high-level synthesis tool using Isabelle, so very relevant to this project, though scanning through the pdf, it's quite hard to ascertain exactly what the contributions are. Strange that they never published a paper about the work -- it makes it very hard to find!
\end{itemize}
}

\YH{Amazing, thank you, yes it seems like Kundu \textit{et al.} have quite a few papers on formal verification of HLS algorithms using translation validation, as well as~\citet{karfa06_formal_verif_method_sched_high_synth}, all using the SPARK~\cite{gupta03_spark} synthesis tool.  And yes thank you, will definitely cite that paper.  There seem to be similar early proofs of high-level synthesis like \citet{hwang91_formal_approac_to_sched_probl} or \citet{grass94_high}.  There are also the Occam papers that I need to mention too, like \citet{page91_compil_occam}, \citet{jifeng93_towar}, \citet{perna11_correc_hardw_synth} and \citet{perna12_mechan_wire_wise_verif_handel_c_synth}.} 
\JW{Well it's a good job there's no page limit on bibliographies these days then! I'll keep an eye out for more papers we could cite when I get a free moment. (No need to cite \emph{everything} of course.)}
\YH{Yes that's true, there are too many to cite! And great thank you, that would help a lot, I have quite a few papers I still want to cite, but have to think about where they will fit in.}

\section{Verilog Semantics}

Definition of state.

\begin{equation}
  \label{eq:10}
  s = (\Gamma, \Delta)
\end{equation}

Definition of stmntrun. \JW{stmtntrun has four parameters, and if I read the rules correctly, it looks like the fourth parameter is uniquely determined by the first three. So, you could consider presenting stmntrun simply as a recursive definition, e.g. `stmntrun f s Vskip = s', `stmntrun f s0 (Vseq st1 st2) = stmntrun f (stmntrun f s0 st1) st2', and so on. That might (\emph{might}) be easier to read than the inference rule format.}

\YH{It works well for Seq and Skip, but I think that CaseMatch and CaseNoMatch, or even if statements will be a bit clunky? We would have to use case statements in the function to do different things
based on what they evaluate to. For case I think that would end up being a three way choice. I'll try it out though and see how nice it is.}

\JP{I suppose this would essentially be an ``interpreter'' style semantics but we can prove equivalence pretty easily.}

\YH{To add to that, I used to have both in the Coq code, but commented the recursive definition out, and now only have the inductive definition, which is basically what I copy pasted here.} \JW{Fair enough. Whatever you think ends up being the easiest to read and understand, really. There's something to be said for staying close to the Coq definitions anyway.} \YH{I have added more rules, we can always switch from one to the other now.  One more thing I noticed though is that recursive definitions will need an \texttt{option} type.} \JW{Oh, then my suggestion of `stmntrun f s0 (Vseq st1 st2) = stmntrun f (stmntrun f s0 st1) st2' is a bit ill-typed then, unless the second parameter becomes option-typed too. Maybe the inference rules are better overall then.} \YH{Ah yes, I actually didn't even notice that, it would need the do notation, just like the implementation in Coq, so it may be easier to just use the inference rules.}

\begin{align}
    \text{srun}\ f\ s\ \mathtt{Vskip} &= \mathtt{Some}\ s\\
    \text{srun}\ f\ s_0\ (\mathtt{Vseq}\ \mathit{st}_1\ \mathit{st}_2) &= \text{srun}\ f\
    (\text{srun}\ f\ s_0\ \mathit{st}_1)\ \mathit{st}_2\\
  \text{srun}\ f\ s\ (\mathtt{Vcond}\ c\ \mathit{stt}\ \mathit{stf}) &= \begin{cases}
    \text{srun}\ f\ s_{0}\ \mathit{stf},& \text{erun}\ f\ \Gamma_{0}\ c = 0\\
    \text{srun}\ f\ s_{0}\ \mathit{stt},& \text{otherwise}
  \end{cases}\\
  \text{srun}\ f\ s_{0}\ (\mathtt{Vcase}\ e\ ((me,\ sc) :: cs)\ \mathit{def}) &= \begin{cases}
    \text{srun}\ f\ s_{0}\ sc,& \text{erun}\ f\ \Gamma_{0}\ me = \text{erun}\ f\ \Gamma_{0}\ e\\
    \text{srun}\ f\ s_{0}\ (\mathtt{Vcase}\ e\ cs\ \mathit{def}),& \text{otherwise}
  \end{cases}\\
  \text{srun}\ f\ s_{0}\ (\mathtt{Vcase}\ e\ []\ (\mathtt{Some}\ st)) &= \text{srun}\ f\ s_{0}\ st\\
  \text{srun}\ f\ s_{0}\ (\mathtt{Vcase}\ e\ []\ \mathtt{None}) &= \mathtt{None}
\end{align}

\begin{equation}
  \label{eq:1}
  \inferrule[Skip]{ }{\text{srun}\ f\ s\ \mathtt{Vskip} = s}
\end{equation}

\begin{equation}
  \label{eq:4}
  \inferrule[Seq]{\text{srun}\ f\ s_{0}\ \mathit{st}_{1}\ s_{1} \\ \text{srun}\ f\ s_{1}\ \mathit{st}_{2}\ s_{2}}{\text{srun}\ f\ s_{0}\ (\mathtt{Vseq}\ \mathit{st}_{1}\ \mathit{st}_{2})\ s_{2}}
\end{equation}

\begin{equation}
  \label{eq:2}
  \inferrule[CondTrue]{\text{erun}\ f\ \Gamma_{0}\ c\ v_{c} \\ \text{valToB}\ v_{c} = \mathtt{true} \\ \text{srun}\ f\ s_{0}\ \mathit{stt}\ s_{1}}{\text{srun}\ f\ s_{0}\ (\mathtt{Vcond}\ c\ \mathit{stt}\ \mathit{stf})\ s_{1}}
\end{equation}

\begin{equation}
  \label{eq:3}
  \inferrule[CondFalse]{\text{erun}\ f\ \Gamma_{0}\ c\ v_{c} \\ \text{valToB}\ v_{c} = \mathtt{false} \\ \text{srun}\ f\ s_{0}\ \mathit{stf}\ s_{1}}{\text{srun}\ f\ s_{0}\ (\mathtt{Vcond}\ c\ \mathit{stt}\ \mathit{stf})\ s_{1}}
\end{equation}

\begin{equation}
  \label{eq:5}
  \inferrule[CaseNoMatch]{\text{srun}\ f\ s_{0}\ (\mathtt{Vcase}\ e\ cs\ \mathit{def})\ s_{1} \\ \text{erun}\ f\ \Gamma_{0}\ me\ mve \\ \text{erun}\ f\ \Gamma_{0}\ e\ ve \\ mve \neq ve}{\text{srun}\ f\ s_{0}\ (\mathtt{Vcase}\ e\ ((me,\ sc) :: cs)\ \mathit{def})\ s_{1}}
\end{equation}

\begin{equation}
  \label{eq:6}
  \inferrule[CaseMatch]{\text{srun}\ f\ s_{0}\ sc\ s_{1} \\ \text{erun}\ f\ \Gamma_{0}\ e\ ve \\ \text{erun}\ f\ \Gamma_{0}\ me\ mve \\ mve = ve}{\text{srun}\ f\ s_{0}\ (\mathtt{Vcase}\ e\ ((me,\ sc) :: cs)\ \mathit{def})\ s_{1}}
\end{equation}

\begin{equation}
  \label{eq:7}
  \inferrule[CaseDefault]{\text{srun}\ f\ s_{0}\ st\ s_{1}}{\text{srun}\ f\ s_{0}\ (\mathtt{Vcase}\ e\ []\ (\mathtt{Some}\ st))\ s_{1}}
\end{equation}

\begin{equation}
  \label{eq:8}
  \inferrule[Blocking]{\text{name}\ \mathit{lhs} = \mathtt{OK}\ n \\ \text{erun}\ f\ \Gamma\ \mathit{rhs}\ v_{\mathit{rhs}}}{\text{srun}\ f\ (\Gamma, \Delta)\ (\mathtt{Vblock}\ \mathit{lhs}\ \mathit{rhs})\ (\Gamma ! n \rightarrow v_{\mathit{rhs}}, \Delta)}
\end{equation}

\begin{equation}
  \label{eq:9}
  \inferrule[Nonblocking]{\text{name}\ \mathit{lhs} = \mathtt{OK}\ n \\ \text{erun}\ f\ \Gamma\ \mathit{rhs}\ v_{\mathit{rhs}}}{\text{srun}\ f\ (\Gamma, \Delta)\ (\mathtt{Vnonblock}\ \mathit{lhs}\ \mathit{rhs}) (\Gamma, \Delta ! n \rightarrow v_{\mathit{rhs}})}
\end{equation}

\section{Proof}

\section{Coq mechanisation}

\section{Conclusion}

%% Acknowledgments
\begin{acks}                            %% acks environment is optional
                                        %% contents suppressed with 'anonymous'
  %% Commands \grantsponsor{<sponsorID>}{<name>}{<url>} and
  %% \grantnum[<url>]{<sponsorID>}{<number>} should be used to
  %% acknowledge financial support and will be used by metadata
  %% extraction tools.
  This material is based upon work supported by the
  \grantsponsor{GS100000001}{National Science
    Foundation}{http://dx.doi.org/10.13039/100000001} under Grant
  No.~\grantnum{GS100000001}{nnnnnnn} and Grant
  No.~\grantnum{GS100000001}{mmmmmmm}.  Any opinions, findings, and
  conclusions or recommendations expressed in this material are those
  of the author and do not necessarily reflect the views of the
  National Science Foundation.
\end{acks}


%% Bibliography
\bibliography{references.bib}


%% Appendix
\appendix
\section{Appendix}

Text of appendix \ldots

\end{document}