summaryrefslogtreecommitdiffstats
path: root/main.tex
blob: 31ff380a2e17c70db3cd70b6981ed9ec2e498456 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
%% For double-blind review submission, w/o CCS and ACM Reference (max submission space)
\documentclass[sigplan,10pt,pagebackref=true,review]{acmart}\settopmatter{printfolios=true,printccs=false,printacmref=false}
%\documentclass[pagebackref=true,acmsmall,review]{acmart}\settopmatter{printfolios=true,printccs=false,printacmref=false}
%% For double-blind review submission, w/ CCS and ACM Reference
%\documentclass[acmsmall,review,anonymous]{acmart}\settopmatter{printfolios=true}
%% For single-blind review submission, w/o CCS and ACM Reference (max submission space)
%\documentclass[acmsmall,review]{acmart}\settopmatter{printfolios=true,printccs=false,printacmref=false}
%% For single-blind review submission, w/ CCS and ACM Reference
%\documentclass[acmsmall,review]{acmart}\settopmatter{printfolios=true}
%% For final camera-ready submission, w/ required CCS and ACM Reference
%\documentclass[acmsmall]{acmart}\settopmatter{}

%% Journal information
%% Supplied to authors by publisher for camera-ready submission;
%% use defaults for review submission.
\acmJournal{PACMPL}
\acmVolume{1}
\acmNumber{CONF} % CONF = POPL or ICFP or OOPSLA
\acmArticle{1}
\acmYear{2018}
\acmMonth{1}\acmDOI{}
\startPage{1}

%% Copyright information
%% Supplied to authors (based on authors' rights management selection;
%% see authors.acm.org) by publisher for camera-ready submission;
%% use 'none' for review submission.
\setcopyright{none}
%\setcopyright{acmcopyright}
%\setcopyright{acmlicensed}
%\setcopyright{rightsretained}
%\copyrightyear{2018}

%% Bibliography style
\bibliographystyle{ACM-Reference-Format}
%% Citation style
%\citestyle{acmauthoryear}

\usepackage{amsmath}
\usepackage{booktabs}
\usepackage{mathpartir}
\usepackage{pgfplots}
\usepackage{soul}
\usepackage{subcaption}
\usepackage{tikz}
\usepackage{minted}
\usepackage{microtype}
\usepackage{epigraph}

\usetikzlibrary{shapes,calc,arrows.meta}
\pgfplotsset{compat=1.16}

\setminted{fontsize=\small}
\usemintedstyle{manni}

\newif\ifANONYMOUS
\ANONYMOUSfalse

\newif\ifCOMMENTS
\COMMENTStrue
\newcommand{\Comment}[3]{\ifCOMMENTS\textcolor{#1}{{\bf [\![#2:} #3{\bf ]\!]}}\fi}
\newcommand\JW[1]{\Comment{red!75!black}{JW}{#1}}
\newcommand\YH[1]{\Comment{green!50!blue}{YH}{#1}}

\newcommand\JWcouldcut[1]{{\st{#1}}}
\newcommand\JWreplace[2]{{\st{#1} \JW{#2}}}

\definecolor{compcert}{HTML}{66c2a5}
\definecolor{formalhls}{HTML}{fc8d62}
\definecolor{keywordcolour}{HTML}{8f0075}
\definecolor{functioncolour}{HTML}{721045}
\definecolor{constantcolour}{HTML}{0000bb}

\newcommand\yhkeywordsp[1]{\;\;\texttt{\textcolor{keywordcolour}{#1}}}
\newcommand\yhkeyword[1]{\texttt{\textcolor{keywordcolour}{#1}}}
\newcommand\yhfunctionsp[1]{\;\;\texttt{\textcolor{functioncolour}{#1}}}
\newcommand\yhfunction[1]{\texttt{\textcolor{functioncolour}{#1}}}
\newcommand\yhconstant[1]{\texttt{\textcolor{constantcolour}{#1}}}

\ifANONYMOUS
\newcommand{\vericert}{HLS\-Cert}
\else
\newcommand{\vericert}{Veri\-cert}
\fi
\newcommand{\compcert}{Comp\-Cert}
\newcommand{\legup}{Leg\-Up}

\newcommand{\slowdownOrig}{56}
\newcommand{\slowdownDiv}{10}
\newcommand{\areaIncr}{21}


\newtheorem{theorem}{Theorem}
\newtheorem{lemma}{Lemma}
\newtheorem*{remark}{Remark}

\begin{document}

%% Title information
\title[HLS Tools Should be Proven Correct]{High-Level Synthesis Tools Should be Proven Correct}
                                        %% when present, will be used in
                                        %% header instead of Full Title.
%\titlenote{with title note}             %% \titlenote is optional;
                                        %% can be repeated if necessary;
                                        %% contents suppressed with 'anonymous'
%\subtitle{Subtitle}                     %% \subtitle is optional
%\subtitlenote{with subtitle note}       %% \subtitlenote is optional;
                                        %% can be repeated if necessary;
                                        %% contents suppressed with 'anonymous'


%% Author information
%% Contents and number of authors suppressed with 'anonymous'.
%% Each author should be introduced by \author, followed by
%% \authornote (optional), \orcid (optional), \affiliation, and
%% \email.
%% An author may have multiple affiliations and/or emails; repeat the
%% appropriate command.
%% Many elements are not rendered, but should be provided for metadata
%% extraction tools.

%% Author with single affiliation.
\author{Yann Herklotz}
\orcid{0000-0002-2329-1029}
\affiliation{
  \institution{Imperial College London, UK}
  %\country{UK}
}
\email{yann.herklotz15@imperial.ac.uk}

\author{John Wickerson}
\orcid{0000-0000-0000-0000}
\affiliation{
  \institution{Imperial College London, UK}
  %\country{UK}
}
\email{j.wickerson@imperial.ac.uk}

% JW notes:
% - High-level synthesis is increasingly being relied upon.
% - But it's really flaky. (Cite bugs from FCCM submission etc.)
% - There exist some workarounds. (Testing the output, formally verifying the output, etc.)
% - The time has come to prove the tool itself correct. (Mention success of Compcert and other fully verified tools?)
% - We've made some encouraging progress on this front in a prototype tool called Vericert. (Summarise current state of Vericert, and how it compares performance-wise to LegUp.)
% - But there's still a long way to go. (List the main things left to do, and how you expect Vericert to compare to LegUp after those things are done.)

\begin{abstract}
  High-level synthesis (HLS) is increasingly being relied upon as the size of designs and needs for hardware accelerators increases.  However, HLS is known to be quite flaky with each tool supporting different input languages and sometimes generating incorrect designs.

  A formally verified HLS tool would solve these issues by dramatically reducing the amount of trusted code, and providing a formal description of the input language that is supported.  In this respect, we have developed Vericert, a formally verified HLS tool, based on CompCert, a formally verified C compiler.
\end{abstract}

%% 2012 ACM Computing Classification System (CSS) concepts
%% Generate at 'http://dl.acm.org/ccs/ccs.cfm'.
\begin{CCSXML}
<ccs2012>
<concept>
<concept_id>10011007.10011006.10011008</concept_id>
<concept_desc>Software and its engineering~General programming languages</concept_desc>
<concept_significance>500</concept_significance>
</concept>
<concept>
<concept_id>10003456.10003457.10003521.10003525</concept_id>
<concept_desc>Social and professional topics~History of programming languages</concept_desc>
<concept_significance>300</concept_significance>
</concept>
</ccs2012>
\end{CCSXML}

\ccsdesc[500]{Software and its engineering~General programming languages}
\ccsdesc[300]{Social and professional topics~History of programming languages}
%% End of generated code


%% Keywords
%% comma separated list
\keywords{\compcert{}, Coq, high-level synthesis, C, Verilog}

\maketitle

\section{Introduction}

%\JW{I removed the `All' from the title, as I think that's a bit strong; it also means that the title fit on one line now :).}

\renewcommand{\epigraphsize}{\normalsize}
\renewcommand{\epigraphflush}{center}
\renewcommand{\epigraphrule}{0pt}
\epigraph{\textit{High-level synthesis research and development is inherently prone to introducing bugs or regressions in the final circuit functionality.}}{--- Andrew Canis~\cite{canis15_legup}}

%\JW{Nice quote; I'd be tempted to tinker with whether it can be formatted a bit more elegantly, like at https://style.mla.org/styling-epigraphs/} 
Research in high-level synthesis (HLS) often concentrates on performance, trying to achieve the lowest area with the shortest run-time.  What is often overlooked is ensuring that the high-level synthesis tool is indeed correct, which means that it outputs a correct hardware design.  Instead, the design is often meticulously tested, often using the higher level design as a model.  As these tests are performed on the hardware design directly, they have to be run on a simulator, which takes much longer than if the original C was tested.  Any formal properties obtained from the C code would also have to be checked again in the resulting design, to ensure that these hold there as well, as the synthesis tool may have translated the input incorrectly.

It is often assumed that because current HLS tools should transform the input specification into a semantically equivalent design, such as mentioned in \citet{lahti19_are_we_there_yet}.  However, this is not the case, and as with all complex pieces of software there are bugs in HLS tools as well.  For example, Vivado HLS was found to incorrectly apply pipelining optimisations\footnote{\url{https://bit.ly/vivado-hls-pipeline-bug}} or generate wrong designs with valid C code as input, but which the HLS tool interprets differently compared to a C compiler, leading to undefined behaviour.  These types of bugs are difficult to identify, and exist because firstly it is not quite clear what input these tools support, and secondly if the output design actually behaves the same as the input.

The solution to both of these points is to have a formally verified high-level synthesis tool.  It not only guarantees that the output is correct, but also brings a formal specification of the input and output language semantics.  These are the only parts of the compiler that need to be trusted, and if these are well-specified, then the behaviour of the resulting design can be fully trusted.  In addition to that, if the semantics of the input semantics are taken from a tool that is widely trusted already, then there should not be any strange behaviour; the resultant design will either behave exactly like the input specification, or the translation will fail early at compile time.

\section{How to prove an HLS tool correct}

The standard methods for checking the outputs of the HLS tools are the following.  First, the output could be checked by using a test bench and checking the outputs against the model.  However, this does not provide many guarantees, as many edge cases may never be tested.  Second, if testing is not rigorous enough, there has been research on checking that the generated hardware design has the same functionality as the input design, where the focus is on creating translation validators~\cite{pnueli98_trans} to prove the equivalence between the design and input code, while supporting various optimisations such as scheduling~\cite{kim04_autom_fsmd,karfa06_formal_verif_method_sched_high_synth,chouksey20_verif_sched_condit_behav_high_level_synth} or code motion~\cite{banerjee14_verif_code_motion_techn_using_value_propag,chouksey19_trans_valid_code_motion_trans_invol_loops}.  However, these aren't perfect solutions either, as there is no guarantee that these proofs really compose with each other.  This means that an equivalence checker normally needs to work for the all of the translations that the HLS tool might perform, increasing the chances that it cannot check for equivalence anymore.

\subsection{Construction of a realistic verified HLS tool}

The radical solution to this problem is to formally verify the complete tool, writing the complete tool in Coq~\cite{coquand86}, an interactive theorem prover.  This has been proven to be successful, for example, CompCert~\cite{leroy09_formal_verif_realis_compil} is a formally verified C compiler written in Coq.  The reliability of formal verification in Coq was demonstrated by CSmith~\cite{yang11_findin_under_bugs_c_compil}, a random C, valid C generator, which found more than 300 bugs in GCC and Clang, whereas only 5 bugs were found in the unverified parts of CompCert, which prompted the verification of those parts as well.  In addition to that, recently Lutsig~\cite{loow21_lutsig}, a synthesis tool going from behavioural Verilog to a technology mapped netlist in Verilog, was also proven correct.

In this respect, we have developed our own verified HLS tool called Vericert~\cite{herklotz21_formal_verif_high_level_synth} based on CompCert, adding a Verilog back end to the compiler.  We currently have a completely verified translation from a large subset of C into Verilog, which fullfills the minimum requirements for an HLS tool.

\section{Guarantees of trusted code}

There are a couple of theorems one can prove about the HLS tool once it is written in Coq.  The first, which is the most important one, is that transforming the C code into Verilog is semantics preserving, meaning the translated code will behave the same as the C code.  The following backward simulation, as initially presented in CompCert, shows the main property that we should want to show:

\begin{equation*}
  \forall C, V, B \in \texttt{Safe},\, \yhfunction{hls} (C) = \yhconstant{OK} (V) \land V \Downarrow B \implies C \Downarrow B.
\end{equation*}

\noindent Given a \textit{safe} behaviour $B$, meaning it does not contain undefined behaviour, we want to show that if the translation from C to Verilog succeeded, that executing the Verilog program with that behaviour implies that the C code will execute with the same behaviour.

However, there are also assumptions that have to be made about the tool.  In the case of Vericert, the only assumption that needs to be made is if the C semantics and Verilog semantics can be trusted.  This is much simpler than checking that the translation algorithm is implemented properly.  Using that assumption, the proofs ensure that the translation preserves the behaviour and doesn't need to be trusted.

\section{Performance of such a tool}

Finally, it is interesting to compare the performance of Vericert compared to existing HLS tools, to see how correctness guarantees affect the size and speed of the final hardware.  This was done on a common compiler benchmark called PolyBench/C~\cite{polybench}.

\begin{figure}
\definecolor{polycol}{HTML}{e6ab02}
\definecolor{polywocol}{HTML}{7570b3}
\begin{tikzpicture}
\begin{axis}[
    xmode=log,
    ymode=log,
    height=80mm,
    width=80mm,
    xlabel={\legup{} execution time (ms)},
    ylabel={\vericert{} execution time (ms)},
    xmin=10,
    xmax=1000000,
    ymax=1000000,
    ymin=10,
    legend pos=south east,
    %log ticks with fixed point,
  ]


\addplot[draw=none, mark=*, draw opacity=0, fill opacity=0.8, polycol]
  table [x expr={\thisrow{legupcycles}/\thisrow{legupfreqMHz}}, y expr={\thisrow{vericertcycles}/\thisrow{vericertoldfreqMHz}}, col sep=comma]
  {results/poly.csv};

\addlegendentry{PolyBench}

\addplot[draw=none, mark=o, fill opacity=0, polywocol]
  table [x expr={\thisrow{legupcycles}/\thisrow{legupfreqMHz}}, y expr={\thisrow{vericertcycles}/\thisrow{vericertfreqMHz}}, col sep=comma]
  {results/poly.csv};

\addlegendentry{PolyBench w/o division}

\addplot[dotted, domain=10:1000000]{x};
%\addplot[dashed, domain=10:10000]{9.02*x + 442};

\end{axis}
\end{tikzpicture}
\caption{A comparison of the execution time of hardware designs generated by \vericert{} and by \legup{}. \YH{Need to update performance figures.}}\label{fig:comparison_time}
\end{figure}

\section{Improvements to \vericert{}}

There are many optimisations that need to be added to Vericert to make turn it into a viable and competitive HLS tool.  First of all, the most important addition is a good scheduling implementation, which supports operation chaining and properly pipelining operators.

%% Acknowledgments
%\begin{acks}%% acks environment is optional
%% contents suppressed with 'anonymous'
  %% Commands \grantsponsor{<sponsorID>}{<name>}{<url>} and
  %% \grantnum[<url>]{<sponsorID>}{<number>} should be used to
  %% acknowledge financial support and will be used by metadata
  %% extraction tools.
%\end{acks}


%% Bibliography
\bibliography{references.bib}


%% Appendix
%\input{appendix}

\end{document}

%%% Local Variables:
%%% mode: latex
%%% TeX-master: t
%%% TeX-command-extra-options: "-shell-escape"
%%% End: