summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorJohn Wickerson <j.wickerson@imperial.ac.uk>2021-12-04 21:33:23 +0000
committernode <node@git-bridge-prod-0>2021-12-07 07:32:38 +0000
commita15d00f03ec0ca58e05088059527937b4e2ec717 (patch)
treedd543e6e092221ee7eb2e35e7065d0429a391326
parent37ed1a0e3d2714bf75aa29436ccf05af3ddafff2 (diff)
downloadfccm22_rsvhls-a15d00f03ec0ca58e05088059527937b4e2ec717.tar.gz
fccm22_rsvhls-a15d00f03ec0ca58e05088059527937b4e2ec717.zip
Update on Overleaf.
-rw-r--r--references.bib155
-rw-r--r--verified_resource_sharing.tex97
2 files changed, 246 insertions, 6 deletions
diff --git a/references.bib b/references.bib
index ed9e2bb..770cc59 100644
--- a/references.bib
+++ b/references.bib
@@ -149,3 +149,158 @@
location = {Monterey, CA, USA},
series = {FPGA '11},
}
+
+@INPROCEEDINGS{faissole+19,
+ author={Faissole, Florian and Constantinides, George A. and Thomas, David},
+ booktitle={2019 IEEE 27th Annual International Symposium on Field-Programmable Custom Computing Machines (FCCM)},
+ title={Formalizing Loop-Carried Dependencies in Coq for High-Level Synthesis},
+ year={2019},
+ volume={},
+ number={},
+ pages={315-315},
+ abstract={High-level synthesis (HLS) tools such as VivadoHLS interpret C/C++ code supplemented by proprietary optimization directives called pragmas. In order to perform loop pipelining, HLS compilers have to deal with non-trivial loop-carried data dependencies. In VivadoHLS, the dependence pragma could be used to enforce or to eliminate such dependencies, but, the behavior of this directive is only informally specified through examples. Most of the time programmers and the compiler seem to agree on what the directive means, but the accidental misuse of this pragma can lead to the silent generation of an erroneous register-transfer level (RTL) design, meaning code that previously worked may break with newer more aggressively optimised releases of the compiler. We use the Coq proof assistant to formally specify and verify the behavior of the VivadoHLS dependence pragma. We first embed the syntax and the semantics of a tiny imperative language Imp in Coq and specify a conformance relation between an Imp program and a dependence pragma based on data-flow transformations. We then implement semi-automated methods to formally verify such conformance relations for non-nested loop bodies.},
+ keywords={},
+ doi={10.1109/FCCM.2019.00056},
+ ISSN={2576-2621},
+ month={April},}
+
+@inproceedings{malecha+10,
+ author = {J. Gregory Malecha and
+ Greg Morrisett and
+ Avraham Shinnar and
+ Ryan Wisnesky},
+ editor = {Manuel V. Hermenegildo and
+ Jens Palsberg},
+ title = {Toward a verified relational database management system},
+ booktitle = {Proceedings of the 37th {ACM} {SIGPLAN-SIGACT} Symposium on Principles
+ of Programming Languages, {POPL} 2010, Madrid, Spain, January 17-23,
+ 2010},
+ pages = {237--248},
+ publisher = {{ACM}},
+ year = {2010},
+ url = {https://doi.org/10.1145/1706299.1706329},
+ doi = {10.1145/1706299.1706329},
+ timestamp = {Tue, 22 Jun 2021 17:10:57 +0200},
+ biburl = {https://dblp.org/rec/conf/popl/MalechaMSW10.bib},
+ bibsource = {dblp computer science bibliography, https://dblp.org}
+}
+
+@inproceedings{chlipala15,
+ author = {Adam Chlipala},
+ editor = {Sriram K. Rajamani and
+ David Walker},
+ title = {From Network Interface to Multithreaded Web Applications: {A} Case
+ Study in Modular Program Verification},
+ booktitle = {Proceedings of the 42nd Annual {ACM} {SIGPLAN-SIGACT} Symposium on
+ Principles of Programming Languages, {POPL} 2015, Mumbai, India, January
+ 15-17, 2015},
+ pages = {609--622},
+ publisher = {{ACM}},
+ year = {2015},
+ url = {https://doi.org/10.1145/2676726.2677003},
+ doi = {10.1145/2676726.2677003},
+ timestamp = {Wed, 23 Jun 2021 17:06:05 +0200},
+ biburl = {https://dblp.org/rec/conf/popl/Chlipala15a.bib},
+ bibsource = {dblp computer science bibliography, https://dblp.org}
+}
+
+@inproceedings{gu+16,
+ author = {Ronghui Gu and
+ Zhong Shao and
+ Hao Chen and
+ Xiongnan (Newman) Wu and
+ Jieung Kim and
+ Vilhelm Sj{\"{o}}berg and
+ David Costanzo},
+ editor = {Kimberly Keeton and
+ Timothy Roscoe},
+ title = {CertiKOS: An Extensible Architecture for Building Certified Concurrent
+ {OS} Kernels},
+ booktitle = {12th {USENIX} Symposium on Operating Systems Design and Implementation,
+ {OSDI} 2016, Savannah, GA, USA, November 2-4, 2016},
+ pages = {653--669},
+ publisher = {{USENIX} Association},
+ year = {2016},
+ url = {https://www.usenix.org/conference/osdi16/technical-sessions/presentation/gu},
+ timestamp = {Tue, 02 Feb 2021 08:06:02 +0100},
+ biburl = {https://dblp.org/rec/conf/osdi/GuSCWKSC16.bib},
+ bibsource = {dblp computer science bibliography, https://dblp.org}
+}
+
+@inproceedings{braibant+13,
+ author = {Thomas Braibant and
+ Adam Chlipala},
+ editor = {Natasha Sharygina and
+ Helmut Veith},
+ title = {Formal Verification of Hardware Synthesis},
+ booktitle = {Computer Aided Verification - 25th International Conference, {CAV}
+ 2013, Saint Petersburg, Russia, July 13-19, 2013. Proceedings},
+ series = {Lecture Notes in Computer Science},
+ volume = {8044},
+ pages = {213--228},
+ publisher = {Springer},
+ year = {2013},
+ url = {https://doi.org/10.1007/978-3-642-39799-8\_14},
+ doi = {10.1007/978-3-642-39799-8\_14},
+ timestamp = {Tue, 14 May 2019 10:00:43 +0200},
+ biburl = {https://dblp.org/rec/conf/cav/BraibantC13.bib},
+ bibsource = {dblp computer science bibliography, https://dblp.org}
+}
+
+@inproceedings{bourgeat+20,
+ author = {Bourgeat, Thomas and Pit-Claudel, Cl\'{e}ment and Chlipala, Adam and Arvind},
+ title = {The Essence of {Bluespec}: A Core Language for Rule-Based Hardware Design},
+ booktitle = {Proceedings of the 41st ACM SIGPLAN Conference on Programming Language Design
+ and Implementation},
+ year = 2020,
+ pages = {243-257},
+ doi = {10.1145/3385412.3385965},
+ url = {https://doi.org/10.1145/3385412.3385965},
+ address = {New York, NY, USA},
+ isbn = 9781450376136,
+ keywords = {Hardware Description Language, Compiler Correctness, Semantics},
+ location = {London, UK},
+ numpages = 15,
+ publisher = {ACM},
+ series = {PLDI 2020},
+}
+
+@misc{silveroak,
+ author = {Google},
+ title = {Project Silver Oak: Formal specification and verification of hardware, especially for security and privacy},
+ year = {2019},
+ publisher = {GitHub},
+ journal = {GitHub repository},
+ howpublished = {\url{https://github.com/project-oak/silveroak}}
+}
+
+@article{compcert,
+ author = {Xavier Leroy},
+ title = {Formal verification of a realistic compiler},
+ journal = {Commun. {ACM}},
+ volume = {52},
+ number = {7},
+ pages = {107--115},
+ year = {2009},
+ url = {https://doi.org/10.1145/1538788.1538814},
+ doi = {10.1145/1538788.1538814},
+ timestamp = {Tue, 06 Nov 2018 12:51:38 +0100},
+ biburl = {https://dblp.org/rec/journals/cacm/Leroy09.bib},
+ bibsource = {dblp computer science bibliography, https://dblp.org}
+}
+
+@book{coq,
+ author = {Yves Bertot and
+ Pierre Cast{\'{e}}ran},
+ title = {Interactive Theorem Proving and Program Development - Coq'Art: The
+ Calculus of Inductive Constructions},
+ series = {Texts in Theoretical Computer Science. An {EATCS} Series},
+ publisher = {Springer},
+ year = {2004},
+ url = {https://doi.org/10.1007/978-3-662-07964-5},
+ doi = {10.1007/978-3-662-07964-5},
+ isbn = {978-3-642-05880-6},
+ timestamp = {Tue, 16 May 2017 14:24:38 +0200},
+ biburl = {https://dblp.org/rec/series/txtcs/BertotC04.bib},
+ bibsource = {dblp computer science bibliography, https://dblp.org}
+} \ No newline at end of file
diff --git a/verified_resource_sharing.tex b/verified_resource_sharing.tex
index bd7cea2..5f6684f 100644
--- a/verified_resource_sharing.tex
+++ b/verified_resource_sharing.tex
@@ -2,6 +2,7 @@
\usepackage[textsize=small,shadow]{todonotes}%
\usepackage{soul}
+\usepackage{subcaption}
% Leave review comments using
% \jwcomment{...} (for John) or \yhcomment{...} (for Yann)
@@ -39,7 +40,7 @@
\begin{abstract}
High-level synthesis (HLS) is playing an ever-increasing role in hardware design, but concerns have been raised about its reliability. Seeking to address these concerns, Herklotz et al. have recently developed an HLS compiler, called Vericert, that has been mechanically proven (using the Coq proof assistant) to output Verilog designs that are behaviourally equivalent to the input C program. Unfortunately, Vericert's output cannot compete performance-wise with that of established HLS tools such as LegUp. A major reason for this is Vericert's complete lack of support for resource sharing.
- In this paper, we present Vericert-Fun: Vericert extended with function-level resource sharing. Where original Vericert creates one block of hardware per \emph{function call}, Vericert-Fun creates one block of hardware per \emph{function definition}. The enabling innovation is to extend Vericert's intermediate language HTL with the ability to represent \emph{multiple} state machines, and then to add support for function calls that transfer control between these state machines. We have extended Vericert's formal correctness proof to cover the translation from C source into this extended HTL language.
+ In this paper, we present Vericert-Fun: Vericert extended with function-level resource sharing. Where original Vericert creates one block of hardware per function \emph{call}, Vericert-Fun creates one block of hardware per function \emph{definition}. The enabling innovation is to extend Vericert's intermediate language HTL with the ability to represent \emph{multiple} state machines, and then to add support for function calls that transfer control between these state machines. We have extended Vericert's formal correctness proof to cover the translation from C source into this extended HTL language.
In order to benchmark Vericert-Fun's performance, we have added an (unverified) Verilog-producing backend. Our results on the PolyBench/C benchmark suite show the generated hardware having a resource usage of 61\% of Vericert's on average and 46\% in the best case, for only a 3\% average decrease in max frequency and 1\% average increase in cycle count.
\end{abstract}
@@ -49,15 +50,99 @@
\section{Introduction}
\label{sec:introduction}
-The need for faster, more energy-efficient computation has, in recent years, caused a surge in the need for custom hardware accelerators. Such devices are commonly designed using a hardware description language such as Verilog or VHDL.\@ The complexities of designing hardware in such a language, as well as the abundance of engineers trained in software rather than hardware development has meant that high-level synthesis tools have become an enticing option. These tools, while incredibly useful, are also known to be unreliable.
+The drive for faster, more energy-efficient computation has led to a surge in demand for custom hardware accelerators. These devices are traditionally designed using hardware description languages such as Verilog or VHDL, but the complexities of designing in such a language, as well as the abundance of engineers trained in software rather than hardware development, has meant that \emph{high-level synthesis} (HLS) tools have become an enticing option.
-Previous work by \citeintext{Herklotz2021_shouldbeproven} has found extensive \emph{miscompilation} bugs in commercial HLS tools including Vivado HLS (now Vitis HLS)~\cite{xilinx_vitis}, Intel i++~\cite{intel_hls} and LegUp~\cite{legup_CanisAndrew2011}\todo{Maybe more evidence of instability}. This instability can be a significant hindrance in the development process. This is compounded by the longer iteration times of hardware design compared to software, making the setback when a bug is encountered much more significant. It is therefore essential to ensure that all software used in this process, including the high-level synthesis tool, is as reliable as possible.
+These tools, while incredibly useful, are also known to be unreliable. Previous work by \citet{Herklotz2021_empiricalstudy} has found numerous \emph{miscompilation} bugs in commercial HLS tools including Xilinx Vivado HLS~\cite{xilinx_vitis}, Intel i++~\cite{intel_hls}, and LegUp~\cite{legup_CanisAndrew2011}. This instability can be a significant hindrance in the development process, especially given the longer iteration times of hardware design compared to software. It also undermines the usefulness of HLS in safety- or security-critical settings. It is therefore essential to ensure that high-level synthesis tools are as reliable as possible.
-Vericert~\cite{Herklotz2020} is a High-Level Synthesis compiler which aims to address this issue. It has been verified correct by the highest possible standard: machine-checked formal proof. It achieves that by providing a proof, checked using the Coq proof assistant, that every step of its translation from C to Verilog preserves the semantics of (i.e.\ behaves the same way as) its input program. This proof means that we can always trust any Verilog produced by Vericert to behave the same way as the C program given to it as input. It is based on the CompCert~\cite{compcert_Leroy2009} verified C compiler.
+Vericert~\cite{Herklotz2020} is an HLS tool that aims to address this issue. Its correctness has been checked to the highest possible standard: machine-checked proof. It achieves that by providing a proof, checked using the Coq proof assistant, that every step of its translation from C to Verilog preserves the semantics of (i.e.\ behaves the same way as) its input program. This proof means that we can always trust any Verilog design produced by Vericert to behave the same way as the C program given to it as input. %It is based on the CompCert~\cite{compcert_Leroy2009} verified C compiler.
-Clearly, however, it is not enough for a high-level synthesis tool to simply be \emph{correct}. The generated hardware needs to also satisfy a number of other qualities, including high throughput, low latency, and \emph{area efficiency}. A common optimisation in HLS tools for improving area usage is \emph{resource sharing}. That is, avoiding generating the same hardware more than once, and, instead, re-using that hardware for more than one purpose.\todo{This is not the clearest explanation. It might be OK to completely remove it and explain resource sharing in the background}
+Clearly, however, it is not enough for an HLS tool simply to be \emph{correct}. The generated hardware must also meet several other desiderata, including high throughput, low latency, and \emph{area efficiency}. A common optimisation used by HLS tools to improve area efficiency is \emph{resource sharing}; that is, re-using hardware for more than one purpose. Accordingly, our work adds resource sharing to Vericert. Keeping with the aims of the Vericert project, we also extend the correctness proof.
-Our work implements a resource sharing optimisation in Vericert. Keeping with the aims of the project, we also extend the correctness proof
+\section{Background}
+
+\paragraph{The Coq proof assistant} Vericert is implemented using the Coq proof assistant~\cite{coq}. This means that it consists of a collection of functions that define the compilation process, together with the proof of a theorem stating that those definitions constitute a correct HLS tool. Coq mechanically checks this proof using a formal mathematical calculus, and then automatically translates the function definitions into OCaml code that can be compiled and executed.
+
+Engineering a software system within a proof assistant like Coq is widely held to be the gold standard for correctness. Recent years have shown that it is feasible to design substantial pieces of software in this way, such as database management system~\cite{malecha+10}, web servers~\cite{chlipala15}, and operating system kernels~\cite{gu+16}. Coq has also been successfully deployed in the hardware design process, both in academia~\cite{braibant+13, bourgeat+20} and in industry~\cite{silveroak}. It has even been applied specifically to the HLS process: Faissole et al.~\cite{faissole+19} have used it to verify that HLS optimisations respect dependencies present in the source code.
+
+\paragraph{The CompCert verified C compiler} Among the most celebrated applications of Coq is the CompCert project~\cite{compcert}. CompCert is a lightly optimising C compiler, with backend support for the Arm, x86, PowerPC, and Kalray VLIW architectures, that has been implemented and proven correct using Coq. CompCert handles most of the C99 language, and generally generates code of comparable performance to that generated by GCC at optimisation level \texttt{-O1}.
+
+CompCert transforms its input through a series of ten intermediate languages before generating the final output. This design ensures that each individual pass remains simple and well-scoped, and hence feasible to prove correct. The correctness proof of the entire compiler is formed by composing the correctness proofs of each of its internal passes.
+
+\paragraph{The Vericert verified HLS tool}
+
+Introduced by \citet{Herklotz2020}, Vericert is a verified C-to-Verilog HLS tool. It is an extension of CompCert, essentially augmenting the existing verified C compiler with a new hardware-oriented intermediate language (called HTL) and a Verilog backend. In its current form, Vericert performs no significant optimisations, beyond those it inherits from CompCert's frontend. This results in performance generally about one order of magnitude slower than the designs generated by comparable, unverified HLS tools such as LegUp~\cite{legup_CanisAndrew2011}.
+
+Vericert branches off from CompCert at the intermediate language called register-transfer language (RTL). Since `RTL' is better known in the hardware community as `register-transfer level', and the two concepts are completely distinct, we shall henceforth refer to the CompCert intermediate language as `3AC' (for `three-address code').
+
+In the 3AC language, each function in the program is represented as a numbered list of instructions with gotos -- essentially, a control-flow graph (CFG). The essence of Vericert's compilation strategy is to treat this CFG as a finite-state machine (FSM), with each instruction in the CFG becoming an FSM state, and each edge in the CFG becoming an FSM transition. Moreover, program variables that do not have their address taken are mapped to hardware registers; other variables (including arrays and structs) are allocated in a block of RAM that represents the stack. More specifically, Vericert builds a finite-state machine with datapath (FSMD)~\cite{HwangVahid1999}. An FSMD comprises two maps, both of which take the current FSM state as their input: a \emph{control map} for determining the next FSM state, and a \emph{datapath} for updating the RAM and registers. FSMDs are captured in Vericert's new intermediate language, HTL. When Vericert compiles from HTL to the final Verilog output, these maps are converted from proper `mathematical' functions into syntactic Verilog case-statements, and each is placed inside an always-block.
+
+\JW{Worked example around here.}
+
+\section{Rough notes}
+
+\begin{figure}
+\subcaptionbox{\label{fig:flow:compcert}}{
+\begin{tikzpicture}
+\node[anchor=west](C) at (0.4,0) {C};
+\node(TAC) at (4,0) {3AC};
+\node[anchor=east](Asm) at (8,0) {Assembly};
+\draw[->] (C) to node [auto] {CompCert frontend} (TAC);
+\draw[->] (TAC) to [loop above] node [auto] {selective inlining} (TAC);
+\draw[->] (TAC) to (Asm);
+\end{tikzpicture}
+}
+\subcaptionbox{\label{fig:flow:vericert}}{
+\begin{tikzpicture}
+\node[anchor=west](C2) at (0.4,0) {C};
+\node(TAC2) at (4,0) {3AC};
+\node(HTL) at (5.5,0) {HTL};
+\node[anchor=east](Verilog) at (8,0) {Verilog};
+\draw[->] (C2) to node [auto] {CompCert frontend} (TAC2);
+\draw[->] (TAC2) to [loop above] node [auto] {full inlining} (TAC2);
+\draw[->] (TAC2) to (HTL);
+\draw[->] (HTL) to (Verilog);
+\end{tikzpicture}
+}
+
+\subcaptionbox{\label{fig:flow:vericertfun}}{
+\begin{tikzpicture}
+\node[anchor=west](C3) at (0.4,0) {C};
+\node(TAC3) at (4,0) {3AC};
+\node(HTL3) at (5.5,0) {HTL};
+\node[anchor=east](Verilog3) at (8,0) {Verilog};
+\draw[->] (C3) to node [auto] {CompCert frontend} (TAC3);
+\draw[->] (TAC3) to [loop above] node [auto] {selective inlining} (TAC3);
+\draw[->] (TAC3) to (HTL3);
+\draw[->, dashed] (HTL3) to (Verilog3);
+\end{tikzpicture}
+}
+
+\caption{Key compilation passes and intermediate languages in (a) CompCert~\cite{compcert}, (b) Vericert~\cite{vericert}, and (c) Vericert-Fun (this paper). Dashed arrows indicate passes that have been implemented but not verified.}
+\label{fig:flow}
+\end{figure}
+
+
+
+A few points that we might want to answer at some point:
+
+\begin{itemize}
+
+\item What is so good about having a machine-checked correctness proof for an HLS tool? Explain that it is the gold-standard for high-integrity computer systems. Give example of Csmith finding 0 bugs in the verified parts of CompCert, compared to 100s in GCC and LLVM. Explain that machine-checked correctness proofs are becoming more common these days thanks to advances in automated theorem proving technology. Explain that we can't guarantee that hardware produced via Vericert will never go wrong, because:
+\begin{itemize}
+\item the translation from the Coq source code of Vericert into executable OCaml is unverified,
+\item the compilation of that OCaml program is unverified,
+\item the machine that runs the compiled OCaml program could go wrong,
+\item the Coq proof assistant could contain a bug, as could the machine running it,
+\item the pretty-printing of the final Verilog design is unverified,
+\item the synthesis of that Verilog design into a netlist is unverified, as are the tools that perform place-and-route and FPGA bitstream generation,
+\item the execution on the FPGA could go wrong, e.g. as a result of cosmic rays.
+\end{itemize}
+
+\item What does Vericert currently do?
+
+\item What is needed for the correctness proof? (Semantics for C, semantics for Verilog, ...)
+
+\end{itemize}
\bibliographystyle{ACM-Reference-Format}