From dfb1a8a90bfd55b632b98ad7686002f599427949 Mon Sep 17 00:00:00 2001 From: Yann Herklotz Date: Tue, 17 May 2022 16:58:52 -0400 Subject: Add new additions --- assets/flashlight-logo.pdf | Bin 0 -> 17402 bytes beamerthemeauriga.sty | 2 +- flashlight22.org | 161 +++++++++++++++++++++++++++++++++++++++------ setup.org | 3 +- 4 files changed, 144 insertions(+), 22 deletions(-) create mode 100644 assets/flashlight-logo.pdf diff --git a/assets/flashlight-logo.pdf b/assets/flashlight-logo.pdf new file mode 100644 index 0000000..51ce32e Binary files /dev/null and b/assets/flashlight-logo.pdf differ diff --git a/beamerthemeauriga.sty b/beamerthemeauriga.sty index e4b18e4..eeaa671 100644 --- a/beamerthemeauriga.sty +++ b/beamerthemeauriga.sty @@ -103,7 +103,7 @@ {\usebeamerfont{title page institute}\usebeamercolor[fg]{title page}\insertinstitute\\[1ex]} \vskip0pt plus 1filll \end{centering} - {\includegraphics[height=1cm]{./assets/imperial_logo.pdf}\hfill\includegraphics[height=1cm]{./assets/oopsla21.png}} + {\includegraphics[height=1cm]{./assets/imperial_logo.pdf}\hfill\includegraphics[height=2cm]{./assets/flashlight-logo.pdf}} } % Footer diff --git a/flashlight22.org b/flashlight22.org index ec4921a..4ebbbc7 100644 --- a/flashlight22.org +++ b/flashlight22.org @@ -4,11 +4,84 @@ #+columns: %45ITEM %10BEAMER_ENV(Env) %10BEAMER_ACT(Act) %4BEAMER_COL(Col) #+setupfile: setup.org -** Why HLS is unreliable -** Small Introduction to Coq -** Solution: Formally Verify HLS -*** Vericert +** The Need For Formally Verified HLS + +#+attr_beamer: :overlay +- +- HLS cannot be used for *critical applications*. + + Even simple programs can produce bugs in HLS tools. +- *Functional testing* of hardware has to be *redone*. +- *Goal*: Create a practical, formally verified HLS tool. + ** Current Status of Vericert + +#+begin_export latex +\definecolor{compcert}{HTML}{bebada} +\definecolor{formalhls}{HTML}{8dd3c7} +\begin{center} + \begin{tikzpicture}% + [language/.style={fill=white,rounded corners=3pt,minimum height=7mm}, + continuation/.style={}, + linecount/.style={rounded corners=3pt,dashed},scale=0.8,shorten >=1pt] + \fill[compcert,rounded corners=3pt] (-1.2,-0.5) rectangle (14,2); + \fill[formalhls,rounded corners=3pt] (-1.2,-1) rectangle (14,-2.4); + %\draw[linecount] (-0.95,-0.45) rectangle (3.6,1); + %\draw[linecount] (4,-0.45) rectangle (7.5,1); + \node[language] at (-0.3,0) (clight) {Clight}; + \node[continuation] at (1.3,0) (conta) {$\cdots$}; + \node[language] at (3.5,0) (cminor) {CminorSel}; + \node[language] at (6,0) (rtl) {RTL}; + \node[language] at (8,0) (ltl) {LTL}; + \node[language,anchor=west] at (11.5,0) (aarch) {aarch64}; + \node[language,anchor=west] at (11.5,0.8) (x86) {x86}; + \node[continuation,anchor=west] at (11.5,1.4) (backs) {$\cdots$}; + \node[continuation] at (10,0) (contb) {$\cdots$}; + \node[language] at (11,-1.5) (htl) {HTL}; + \node[language] at (2.5,-1.5) (rtlblock) {RTLBlock}; + \node[language] at (5.5,-1.5) (rtlpar) {RTLPar}; + \node[language] at (8.5,-1.5) (rtlparfu) {RTLParFU}; + \node[language] at (13,-1.5) (verilog) {Verilog}; + \node[anchor=west] at (-0.9,1.6) {\bf CompCert}; + \node[anchor=west] at (-0.9,-1.4) {\bf Vericert}; + %%\node[anchor=west] at (-0.9,0.7) {\small $\sim$ 27 kloc}; + %%\node[anchor=west] at (4.1,0.7) {\small $\sim$ 46 kloc}; + %%\node[anchor=west] at (2,-1.5) {\small $\sim$ 17 kloc}; + \draw[->,thick] (clight) -- (conta); + \draw[->,thick] (conta) -- (cminor); + \draw[->,thick] (cminor) -- (rtl); + \draw[->,thick] (rtl) -- (ltl); + \draw[->,thick] (ltl) -- (contb); + \draw[->,thick] (contb) -- (aarch); + \draw[->,thick] (contb) to [out=0,in=200] (x86.west); + \draw[->,thick] (contb) to [out=0,in=190] (backs.west); + \draw[->,thick] (rtl) -- (rtlblock); + \draw[->,thick] (rtlblock) -- node[below,yshift=-0.3cm] {\footnotesize scheduling} (rtlpar); + \draw[->,thick] (rtlpar) -- node[below,yshift=-0.3cm] {\footnotesize pipelining} (rtlparfu); + \draw[->,thick] (rtlparfu) -- (htl); + \draw[->,thick] (htl) -- (verilog); +%% \draw[->,thick] (htl) to [out=230,in=310,loop,looseness=5] node[align=center,below] {\footnotesize RAM\\[-0.5em]\footnotesize insertion} (htl); + \draw[->,thick] (rtlblock) to [out=190,in=240,loop,looseness=5] node[align=center,below] {\footnotesize if-conversion} (rtlblock); + \only<2->{\draw[->,thick,red] (rtlblock) to [out=350,in=300,loop,looseness=5] node[align=center,below] {Loop Scheduling} (rtlblock);} + \end{tikzpicture}%} +\end{center} +#+end_export + +*** Below +:PROPERTIES: +:BEAMER_ENV: onlyenvNH +:BEAMER_ACT: 1 +:END: + +Current work adds *hyperblock scheduling* to *Vericert*. + +*** Below 2 +:PROPERTIES: +:BEAMER_ENV: onlyenvNH +:BEAMER_ACT: 2 +:END: + +We argue we can add *hardawre loop pipelining* as a *source-to-source transformation* doing +*software loop pipelining*, which is easier to verify in *Coq*. + * Loop Pipelining ** The Need for Loop Pipelining @@ -241,12 +314,12 @@ In software pipelining we represent a vertical slice of the pipeline. - Use *predicated execution* to avoid adding explicit prologue and epilogue. -**** Column left +***** Column left :PROPERTIES: :BEAMER_COL: 0.45 :END: -***** Initial loop +****** Initial loop :PROPERTIES: :BEAMER_ENV: onlyenvNH :BEAMER_ACT: 5 @@ -290,12 +363,16 @@ for (int i = 3; i < N; i++) { } #+end_src -***** Execution control +****** Execution control :PROPERTIES: :BEAMER_ENV: onlyenvNH :BEAMER_ACT: 6 :END: +#+begin_export latex +\tikz{node[](box1){};} +#+end_export + #+begin_src c if (i < N): p0 = true || if p0: p1 = true @@ -307,12 +384,12 @@ if (i >= N): p0 = false || if !p2: p3 = false #+end_src -**** Column right +***** Column right :PROPERTIES: :BEAMER_COL: 0.45 :END: -***** Initial converted +****** Initial converted :PROPERTIES: :BEAMER_ENV: onlyenvNH :BEAMER_ACT: 5 @@ -334,7 +411,7 @@ for (int i = 3; i < N; i++) { } #+end_src -***** With predicates +****** With predicates :PROPERTIES: :BEAMER_ENV: onlyenvNH :BEAMER_ACT: 6 @@ -343,6 +420,7 @@ for (int i = 3; i < N; i++) { #+attr_latex: :options escapeinside=|| #+begin_src c for (int i = 3; i < N+4; i++) { + |\tikz{node[](box){};}|... if p0: |\sA{1}|[i] if p0: |\sR{2}|[i] if p0: |\sR{3}|[i] @@ -356,6 +434,19 @@ for (int i = 3; i < N+4; i++) { } #+end_src + +**** Overlay +:PROPERTIES: +:BEAMER_ENV: onlyenvNH +:BEAMER_ACT: 6 +:END: + +#+begin_export latex +\begin{tikzpicture}[overlay] + \draw[->,very thick] (6,2) -- (9,4.5); +\end{tikzpicture} +#+end_export + ** Use Symbolic Execution to Verify the Transformation *** Symbolic Execution @@ -405,33 +496,65 @@ For a loop $\mathcal{L}_1$ and a pipelined loop $\mathcal{L}_2$, we want to prov It is enough to prove various static properties: +#+begin_export latex +\begin{center} + \begin{tikzpicture}[lnode/.style={circle,draw=black,minimum size=4mm,scale=0.4},node + distance=1cm,shorten >=1pt] + \node[scale=3,left of=t2E2,xshift=-1.1cm] {$\alpha($}; + \node[scale=3,right of=t2E2,xshift=-0.8cm] (rb) {$)$}; + \node[scale=1.5,right of=rb] {$\quad=\quad\alpha(\mathcal{L}^3)$}; + \node[lnode,fill=s1col] (A1) {1}; \node[lnode,fill=s2col,below of=A1] (R2) {2}; + \node[lnode,fill=s2col,below of=R2] (R3) {3}; \node[lnode,fill=s2col,right of=A1] (R1) {1}; + \node[lnode,fill=s3col,right of=R1] (M2) {2}; \node[lnode,fill=s3col,right of=M2] (M1) {1}; + \node[lnode,fill=s1col,right of=M1] (A2) {2}; \node[lnode,right of=A2] (E1) {}; + \node[lnode,fill=s4col,right of=E1,xshift=2cm] (W1) {1}; \node[lnode,right of=W1] (E2) {}; + \begin{scope}[xshift=0.8cm,yshift=-0.5cm] + \node[lnode,fill=s1col] (t1A1) {1}; \node[lnode,fill=s2col,below of=t1A1] (t1R2) {2}; + \node[lnode,fill=s2col,below of=t1R2] (t1R3) {3}; \node[lnode,fill=s2col,right of=t1A1] (t1R1) + {1}; \node[lnode,fill=s3col,right of=t1R1] (t1M2) {2}; \node[lnode,fill=s3col,right of=t1M2] + (t1M1) {1}; \node[lnode,fill=s1col,right of=t1M1,xshift=2cm] (t1A2) {2}; \node[lnode,right of=t1A2] + (t1E1) {}; \node[lnode,fill=s4col,right of=t1E1] (t1W1) {1}; \node[lnode,right of=t1W1] (t1E2) + {}; + \end{scope} + \begin{scope}[xshift=1.6cm,yshift=-1cm] + \node[lnode,fill=s1col] (t2A1) {1}; \node[lnode,fill=s2col,below of=t2A1] (t2R2) {2}; + \node[lnode,fill=s2col,below of=t2R2] (t2R3) {3}; \node[lnode,fill=s2col,right of=t2A1] (t2R1) + {1}; \node[lnode,fill=s3col,right of=t2R1,xshift=2cm] (t2M2) {2}; \node[lnode,fill=s3col,right of=t2M2] + (t2M1) {1}; \node[lnode,fill=s1col,right of=t2M1] (t2A2) {2}; \node[lnode,right of=t2A2] + (t2E1) {}; \node[lnode,fill=s4col,right of=t2E1] (t2W1) {1}; \node[lnode,right of=t2W1] (t2E2) + {}; + \end{scope} + \end{tikzpicture} +\end{center} +#+end_export + * Comparing Software and Hardware Loop Pipelining ** Comparing Pipelines in Hardware and Software *** Representation -- Hardware pipelining :: each instruction is put into a state and it is filled with data at the +- *Hardware pipelining* :: each instruction is put into a state and it is filled with data at the correct II. -- Software pipelining :: the code represents the kernel of the pipeline, expressing each repeating +- *Software pipelining* :: the code represents the kernel of the pipeline, expressing each repeating instruction. *** Pipelines themselvs are identical -- In terms of expressivity, both hardware and software pipelining can express the same loop - pipelines. +- In terms of *expressivity*, both hardware and software pipelining can express the *same loop + pipelines*. ** Resource Usage of Pipelines -- With some *hardware support* software pipeline resource usage can get close to hardware pipeline - resource usage. -- Hardware pipelining is still simpler as filling a pipeline at an II is straightforward. +- With some *hardware support*, *software pipeline* resource usage can get close to *hardware + pipeline* resource usage. +- *Hardware pipelining* is still simpler as filling a pipeline at an II is straightforward. * Wrapping up ** Conclusion -- Verifying hardware pipelining together with scheduling is difficult. +- Verifying *hardware pipelining* together with scheduling is difficult. + Too many instructions move around and their positions need to be recovered. - By doing *software pipelining* followed by *instruction scheduling* and *hardware generation*, hardware pipelines can be approximated. - + Same pipeline but with higher resource usage. + + Same pipeline but with *higher resource usage*. diff --git a/setup.org b/setup.org index 43dfc58..7f8032e 100644 --- a/setup.org +++ b/setup.org @@ -6,10 +6,10 @@ #+latex_header: \usepackage{booktabs} #+latex_header: \usepackage{microtype} #+latex_header: \usepackage{mathtools} -#+latex_header: \usepackage{natbib} #+latex_header: \usepackage{graphicx} #+latex_header: \usepackage{pgfplots} #+latex_header: \usepackage{pgfplotstable} +#+latex_header: \usepackage{xcolor} #+latex_header_extra: \DisableLigatures{encoding=T1, family=tt*} #+latex_header_extra: \usemintedstyle{manni} @@ -25,7 +25,6 @@ #+latex_header_extra: \newcommand\yhfunctionsp[1]{\;\;\texttt{\textcolor{functioncolour}{#1}}} #+latex_header_extra: \newcommand\yhfunction[1]{\texttt{\textcolor{functioncolour}{#1}}} #+latex_header_extra: \newcommand\yhconstant[1]{\texttt{\textcolor{constantcolour}{#1}}} -#+latex_header_extra: \bibliographystyle{plainnat} #+latex_header_extra: \pgfplotsset{compat=1.16} #+latex_header_extra: \usetikzlibrary{pgfplots.groupplots} #+latex_header_extra: \tikzexternalize -- cgit