From e9c3671e0408e0dfb09481280deac27e6153b514 Mon Sep 17 00:00:00 2001 From: Yann Herklotz Date: Tue, 17 May 2022 11:56:47 -0400 Subject: Add changes to presentation --- assets/imperial_logo.pdf | Bin 0 -> 3623 bytes flashlight22.org | 390 ++++++++++++++++++++++++++++++++++++++--------- setup.org | 2 +- 3 files changed, 322 insertions(+), 70 deletions(-) create mode 100644 assets/imperial_logo.pdf diff --git a/assets/imperial_logo.pdf b/assets/imperial_logo.pdf new file mode 100644 index 0000000..858d2ee Binary files /dev/null and b/assets/imperial_logo.pdf differ diff --git a/flashlight22.org b/flashlight22.org index ae68e33..ec4921a 100644 --- a/flashlight22.org +++ b/flashlight22.org @@ -1,5 +1,5 @@ -#+title: Formal Verification of High-Level Synthesis -#+author: \underline{Yann Herklotz}, John Wickerson +#+title: Verifying Software Pipelining to Approximate Hardware Pipelining in Verified High-Level Synthesis +#+author: Yann Herklotz #+options: H:2 toc:nil #+columns: %45ITEM %10BEAMER_ENV(Env) %10BEAMER_ACT(Act) %4BEAMER_COL(Col) #+setupfile: setup.org @@ -16,54 +16,88 @@ - Main difficulty with having hardware as a target is the need to pipeline loops. -*** Column left +*** Minipage :PROPERTIES: -:BEAMER_COL: 0.45 +:BEAMER_ENV: minipage +:BEAMER_OPT: \textwidth +:END: + +**** Original Code +:PROPERTIES: +:BEAMER_ENV: onlyenvNH +:BEAMER_ACT: 1 +:END: + +#+begin_src c +for (int i = 3; i < N; i++) + acc[i] = acc[i-3]*c+x[i]*y[i]; +#+end_src + +**** Transformed code +:PROPERTIES: +:BEAMER_ENV: onlyenvNH +:BEAMER_ACT: 2 :END: #+attr_latex: :options fontsize=\small,escapeinside=|| #+begin_src c -for (int i = 0; i < N; i++) { -|\sA{1}| x18 = i - 1 -|\sR{1}| x16 = load[1, x18 * 4] +for (int i = 3; i < N; i++) { +|\sA{1}| x18 = i - 3 +|\sR{1}| x16 = load[1, x18] |\sM{1}| x8 = x16 * x1 -|\sR{2}| x12 = load[3, i * 4] -|\sR{3}| x13 = load[2, i * 4] +|\sR{2}| x12 = load[3, i] +|\sR{3}| x13 = load[2, i] |\sM{2}| x7 = x12 * x13 |\sA{2}| x11 = x8 + x7 -|\sW{1}| store[1, i * 4] = x11 +|\sW{1}| store[1, i] = x11 i = i + 1 } #+end_src -*** Column right +**** Final DFG :PROPERTIES: -:BEAMER_COL: 0.45 +:BEAMER_ENV: onlyenvNH +:BEAMER_ACT: 3 :END: +#+attr_latex: :options xleftmargin=0.3\textwidth +#+begin_src c +for (int i = 3; i < N; i++) { +#+end_src + #+begin_export latex -\begin{tikzpicture}[lnode/.style={circle,draw=black,minimum size=4mm,scale=0.8},nlabel/.style={midway,right -,font=\tiny},node distance=1.3cm,shorten >=1pt] - \node[lnode,fill=s1col] (A1) {1}; - \node[lnode,fill=s2col,below of=A1] (R1) {1}; - \node[lnode,fill=s3col,below of=R1] (M1) {1}; - \node[lnode,fill=s2col,right of=A1] (R2) {2}; - \node[lnode,fill=s2col,right of=R2] (R3) {3}; - \node[lnode,fill=s3col,below of=R2,xshift=0.7cm] (M2) {2}; - \node[lnode,fill=s1col,below right of=M1,xshift=0.4cm] (A2) {2}; - \node[lnode,fill=s4col,below of=A2] (W1) {1}; - - \draw[->,thick] (A1) -- (R1); - \draw[->,thick] (R1) -- node[nlabel] {2} (M1); - \draw[->,thick] (R2) -- node[nlabel] {2} (M2); - \draw[->,thick] (R3) -- node[nlabel] {2} (M2); - \draw[->,thick] (M1) -- (A2); - \draw[->,thick] (M2) -- (A2); - \draw[->,thick] (A2) -- (W1); - \draw[->,thick] (R1) to [out=220,in=180,loop,looseness=2] (W1); -\end{tikzpicture} +\begin{center} + \begin{tikzpicture}[lnode/.style={circle,draw=black,minimum + size=4mm,scale=0.7},nlabel/.style={midway,right ,font=\tiny},node distance=1.3cm,shorten + >=1pt] + \node[lnode,fill=s1col] (A1) {1}; + \node[lnode,fill=s2col,below of=A1] (R1) {1}; + \node[lnode,fill=s3col,below of=R1] (M1) {1}; + \node[lnode,fill=s2col,right of=A1] (R2) {2}; + \node[lnode,fill=s2col,right of=R2] (R3) {3}; + \node[lnode,fill=s3col,below of=R2,xshift=0.7cm] (M2) {2}; + \node[lnode,fill=s1col,below right of=M1,xshift=0.4cm] (A2) {2}; + \node[lnode,fill=s4col,below of=A2] (W1) {1}; + + \draw[->,thick] (A1) -- (R1); + \draw[->,thick,dashed] (R1) -- (M1); + \draw[->,thick,dashed] (R2) -- (M2); + \draw[->,thick,dashed] (R3) -- (M2); + \draw[->,thick] (M1) -- (A2); + \draw[->,thick] (M2) -- (A2); + \draw[->,thick] (A2) -- (W1); + \draw[->,thick] (W1) to [out=220,in=180,loop,looseness=2] node [midway,right] {3} (R1); + \end{tikzpicture} +\end{center} #+end_export +#+beamer: \vspace{-4em} + +#+attr_latex: :options xleftmargin=0.3\textwidth +#+begin_src c +} +#+end_src + ** Ideas Behind Static Hardware Loop Pipelining *** One Possible Workflow @@ -99,52 +133,230 @@ for (int i = 0; i < N; i++) { * Verifying Loop Pipelining ** Ideas Behind Software Loop Pipelining -*** Main Idea +*** Schedule +:PROPERTIES: +:BEAMER_ENV: onlyenvNH +:BEAMER_ACT: 1-4 +:END: -Can perform a source-to-source transformation which generates a pipeline purely in software. +In software pipelining we represent a vertical slice of the pipeline. -*** Column left +#+begin_export latex +\begin{center} + \begin{tikzpicture}[lnode/.style={circle,draw=black,minimum size=4mm,scale=0.4},node + distance=1cm,shorten >=1pt] + \node[lnode,fill=s1col] (A1) {1}; + \node[lnode,fill=s2col,below of=A1] (R2) {2}; + \node[lnode,fill=s2col,below of=R2] (R3) {3}; + \node[lnode,fill=s2col,right of=A1] (R1) {1}; + \node[lnode,fill=s3col,right of=R1] (M2) {2}; + \node[lnode,fill=s3col,right of=M2] (M1) {1}; + \node[lnode,fill=s1col,right of=M1] (A2) {2}; + \node[lnode,right of=A2] (E1) {}; + \node[lnode,fill=s4col,right of=E1] (W1) {1}; + \node[lnode,right of=W1] (E2) {}; + \begin{scope}[xshift=0.8cm,yshift=-0.5cm] + \node[lnode,fill=s1col] (t1A1) {1}; + \node[lnode,fill=s2col,below of=t1A1] (t1R2) {2}; + \node[lnode,fill=s2col,below of=t1R2] (t1R3) {3}; + \node[lnode,fill=s2col,right of=t1A1] (t1R1) {1}; + \node[lnode,fill=s3col,right of=t1R1] (t1M2) {2}; + \node[lnode,fill=s3col,right of=t1M2] (t1M1) {1}; + \node[lnode,fill=s1col,right of=t1M1] (t1A2) {2}; + \node[lnode,right of=t1A2] (t1E1) {}; + \node[lnode,fill=s4col,right of=t1E1] (t1W1) {1}; + \node[lnode,right of=t1W1] (t1E2) {}; + \end{scope} + \begin{scope}[xshift=1.6cm,yshift=-1cm] + \node[lnode,fill=s1col] (t2A1) {1}; + \node[lnode,fill=s2col,below of=t2A1] (t2R2) {2}; + \node[lnode,fill=s2col,below of=t2R2] (t2R3) {3}; + \node[lnode,fill=s2col,right of=t2A1] (t2R1) {1}; + \node[lnode,fill=s3col,right of=t2R1] (t2M2) {2}; + \node[lnode,fill=s3col,right of=t2M2] (t2M1) {1}; + \node[lnode,fill=s1col,right of=t2M1] (t2A2) {2}; + \node[lnode,right of=t2A2] (t2E1) {}; + \node[lnode,fill=s4col,right of=t2E1] (t2W1) {1}; + \node[lnode,right of=t2W1] (t2E2) {}; + \end{scope} + \begin{scope}[xshift=2.4cm,yshift=-1.5cm] + \node[lnode,fill=s1col] (t3A1) {1}; + \node[lnode,fill=s2col,below of=t3A1] (t3R2) {2}; + \node[lnode,fill=s2col,below of=t3R2] (t3R3) {3}; + \node[lnode,fill=s2col,right of=t3A1] (t3R1) {1}; + \node[lnode,fill=s3col,right of=t3R1] (t3M2) {2}; + \node[lnode,fill=s3col,right of=t3M2] (t3M1) {1}; + \node[lnode,fill=s1col,right of=t3M1] (t3A2) {2}; + \node[lnode,right of=t3A2] (t3E1) {}; + \node[lnode,fill=s4col,right of=t3E1] (t3W1) {1}; + \node[lnode,right of=t3W1] (t3E2) {}; + \end{scope} + \node[above right of=R1,xshift=-0.5cm] (S1T) {}; + \node[below of=S1T,yshift=-2.8cm] (S1B) {}; + \draw[thick] (S1T) -- (S1B); + \node[above right of=M1,xshift=-0.5cm] (S2T) {}; + \node[below of=S2T,yshift=-2.8cm] (S2B) {}; + \draw[thick] (S2T) -- (S2B); + \node[above right of=E1,xshift=-0.5cm] (S3T) {}; + \node[below of=S3T,yshift=-2.8cm] (S3B) {}; + \draw[thick] (S3T) -- (S3B); + \node[above right of=E2,xshift=-0.5cm] (S4T) {}; + \node[below of=S4T,yshift=-2.8cm] (S4B) {}; + \draw[thick] (S4T) -- (S4B); + \node[right of=S4T,xshift=-0.2cm] (S5T) {}; + \node[below of=S5T,yshift=-2.8cm] (S5B) {}; + \draw[thick] (S5T) -- (S5B); + \node[right of=S5T,xshift=-0.2cm] (S6T) {}; + \node[below of=S6T,yshift=-2.8cm] (S6B) {}; + \draw[thick] (S6T) -- (S6B); + + \only<2>{\draw[very thick,red] ($(W1.north west)+(-0.1,0.1)$) rectangle ($(t3R3.south east)+(0.1,-0.1)$);} + \only<3>{\draw[very thick,red] ($(E2.north west)+(-0.1,0.1)$) rectangle ($(t3R1.south east)+(0.1,-0.1)$);} + \only<4>{\draw[very thick,red] ($(A1.north west)+(-0.1,0.1)$) rectangle (t3R2.west);} + \only<4>{\draw[very thick,red] ($(t1W1.north west)+(-0.1,0.1)$) rectangle ($(t3E2.south east)+(0.1,-0.1)$);} + \end{tikzpicture} +\end{center} +#+end_export + +*** Columns +:PROPERTIES: +:BEAMER_ENV: onlyenvNH +:BEAMER_ACT: 5- +:END: + +**** Main idea +:PROPERTIES: +:BEAMER_ENV: onlyenvNH +:BEAMER_ACT: 5 +:END: + +- Source-to-source transformation to generate a *pipeline* in *software*. +- Use *rotating register file* to avoid unrolling due to *modulo variable expansion*. + +**** Need to add the epilogue and prologue +:PROPERTIES: +:BEAMER_ENV: onlyenvNH +:BEAMER_ACT: 6 +:END: + +- Use *predicated execution* to avoid adding explicit prologue and epilogue. + +**** Column left :PROPERTIES: :BEAMER_COL: 0.45 :END: -#+attr_latex: :options fontsize=\small,escapeinside=|| +***** Initial loop +:PROPERTIES: +:BEAMER_ENV: onlyenvNH +:BEAMER_ACT: 5 +:END: + +#+attr_latex: :options xleftmargin=0.3\textwidth +#+begin_src c +for (int i = 3; i < N; i++) { +#+end_src + +#+begin_export latex +\begin{center} + \begin{tikzpicture}[lnode/.style={circle,draw=black,minimum + size=4mm,scale=0.7},nlabel/.style={midway,right ,font=\tiny},node distance=1.3cm,shorten + >=1pt] + \node[lnode,fill=s1col] (A1) {1}; + \node[lnode,fill=s2col,below of=A1] (R1) {1}; + \node[lnode,fill=s3col,below of=R1] (M1) {1}; + \node[lnode,fill=s2col,right of=A1] (R2) {2}; + \node[lnode,fill=s2col,right of=R2] (R3) {3}; + \node[lnode,fill=s3col,below of=R2,xshift=0.7cm] (M2) {2}; + \node[lnode,fill=s1col,below right of=M1,xshift=0.4cm] (A2) {2}; + \node[lnode,fill=s4col,below of=A2] (W1) {1}; + + \draw[->,thick] (A1) -- (R1); + \draw[->,thick,dashed] (R1) -- (M1); + \draw[->,thick,dashed] (R2) -- (M2); + \draw[->,thick,dashed] (R3) -- (M2); + \draw[->,thick] (M1) -- (A2); + \draw[->,thick] (M2) -- (A2); + \draw[->,thick] (A2) -- (W1); + \draw[->,thick] (W1) to [out=220,in=180,loop,looseness=2] node [midway,right] {3} (R1); + \end{tikzpicture} +\end{center} +#+end_export + +#+beamer: \vspace{-4em} + +#+attr_latex: :options xleftmargin=0.3\textwidth #+begin_src c -for (int i = 0; i < N; i++) { -|\sA{1}| x18 = i - 1 -|\sR{1}| x16 = load[1, x18 * 4] -|\sM{1}| x8 = x16 * x1 -|\sR{2}| x12 = load[3, i * 4] -|\sR{3}| x13 = load[2, i * 4] -|\sM{2}| x7 = x12 * x13 -|\sA{2}| x11 = x8 + x7 -|\sW{1}| store[1, i * 4] = x11 - i = i + 1 } #+end_src -*** Column right +***** Execution control +:PROPERTIES: +:BEAMER_ENV: onlyenvNH +:BEAMER_ACT: 6 +:END: + +#+begin_src c +if (i < N): p0 = true +|| if p0: p1 = true +|| if p1: p2 = true +|| if p2: p3 = true +if (i >= N): p0 = false +|| if !p0: p1 = false +|| if !p1: p2 = false +|| if !p2: p3 = false +#+end_src + +**** Column right :PROPERTIES: :BEAMER_COL: 0.45 :END: -#+attr_latex: :options fontsize=\footnotesize,escapeinside=|| +***** Initial converted +:PROPERTIES: +:BEAMER_ENV: onlyenvNH +:BEAMER_ACT: 5 +:END: + +#+attr_latex: :options escapeinside=|| #+begin_src c -for (int i = 0; i < N; i++) { -|\sA{1}| x18[i] = i - 1 -|\sR{2}| x12[i] = load[3, i * 4] -|\sR{3}| x13[i] = load[2, i * 4] -|\sM{2}| x7[i-1] = x12[i-1] * x13[i-1] -|\sA{2}| x11[i-2] = x8[i-2] + x7[i-2] -|\sW{1}| store[1, (i-3) * 4] = x11[i-3] - -|\sR{1}| x16[i] = load[1, x18[i] * 4] -|\sM{1}| x8[i-1] = x16[i-1] * x1 - i = i + 1 +for (int i = 3; i < N; i++) { + |\sA{1}|[i] + |\sR{2}|[i] + |\sR{3}|[i] + |\sM{2}|[i-1] + |\sA{2}|[i-2] + |\sW{1}|[i-3] + + |\sR{1}|[i] + |\sM{1}|[i-1] + i = i + 1 } #+end_src -** Verifying Software Loop Pipelining +***** With predicates +:PROPERTIES: +:BEAMER_ENV: onlyenvNH +:BEAMER_ACT: 6 +:END: + +#+attr_latex: :options escapeinside=|| +#+begin_src c +for (int i = 3; i < N+4; i++) { + if p0: |\sA{1}|[i] + if p0: |\sR{2}|[i] + if p0: |\sR{3}|[i] + if p1: |\sM{2}|[i-1] + if p2: |\sA{2}|[i-2] + if p3: |\sW{1}|[i-3] + + if p0: |\sR{1}|[i] + if p1: |\sM{1}|[i-1] + i = i + 1 +} +#+end_src + +** Use Symbolic Execution to Verify the Transformation *** Symbolic Execution @@ -152,34 +364,74 @@ Define an $\alpha$, such that $\alpha(\mathcal{C})$ evaluates some code $\mathca *symbolic states* for all registers. *** Example +:PROPERTIES: +:BEAMER_ACT: 2- +:END: Executing the following code will evaluate to the following symbolic code: -*** Column left -:PROPERTIES: -:BEAMER_COL: 0.45 -:END: +#+begin_export latex +\begin{equation*} +\alpha +\left(\begin{aligned} + & \texttt{x = 2}\\ + & \texttt{y = x + z} +\end{aligned}\right)\qquad + =\qquad +\begin{aligned} + &\texttt{x}\mapsto 2\\ + &\texttt{y}\mapsto 2 + \texttt{z}^0 +\end{aligned} +\end{equation*} +#+end_export + +** Verifying Software Loop Pipelining +For a loop $\mathcal{L}_1$ and a pipelined loop $\mathcal{L}_2$, we want to prove: + +#+begin_export latex +\begin{equation*} + \forall N, \alpha(\mathcal{L}_{1}^{N}) = \alpha(\mathcal{L}_{2}^{N}) +\end{equation*} +#+end_export +- This is not feasible as $N$ is often not known statically -*** Column right +*** Actual proof :PROPERTIES: -:BEAMER_COL: 0.45 +:BEAMER_ENV: blockNH +:BEAMER_ACT: 2 :END: +It is enough to prove various static properties: + * Comparing Software and Hardware Loop Pipelining + ** Comparing Pipelines in Hardware and Software -*** Pipelines can be identical +*** Representation + +- Hardware pipelining :: each instruction is put into a state and it is filled with data at the + correct II. +- Software pipelining :: the code represents the kernel of the pipeline, expressing each repeating + instruction. + +*** Pipelines themselvs are identical - In terms of expressivity, both hardware and software pipelining can express the same loop pipelines. ** Resource Usage of Pipelines -Naively, They will both contain +- With some *hardware support* software pipeline resource usage can get close to hardware pipeline + resource usage. +- Hardware pipelining is still simpler as filling a pipeline at an II is straightforward. * Wrapping up ** Conclusion -- To verify loop pipelines, it seems easier +- Verifying hardware pipelining together with scheduling is difficult. + + Too many instructions move around and their positions need to be recovered. +- By doing *software pipelining* followed by *instruction scheduling* and *hardware generation*, + hardware pipelines can be approximated. + + Same pipeline but with higher resource usage. diff --git a/setup.org b/setup.org index 67e3696..43dfc58 100644 --- a/setup.org +++ b/setup.org @@ -13,7 +13,7 @@ #+latex_header_extra: \DisableLigatures{encoding=T1, family=tt*} #+latex_header_extra: \usemintedstyle{manni} -#+latex_header_extra: \setminted{fontsize=\small} +#+latex_header_extra: \setminted{fontsize=\footnotesize,xleftmargin=.2\textwidth,xrightmargin=.2\textwidth} # #+latex_header_extra: \institute[shortinst]{Imperial College London} #+latex_header_extra: \AtBeginSection[]{\begin{frame}\frametitle{Outline}\tableofcontents[currentsection]\end{frame}} #+latex_header_extra: \usetikzlibrary{shapes,calc,arrows.meta} -- cgit