summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorYann Herklotz <git@yannherklotz.com>2022-05-17 11:56:47 -0400
committerYann Herklotz <git@yannherklotz.com>2022-05-17 11:56:47 -0400
commite9c3671e0408e0dfb09481280deac27e6153b514 (patch)
tree9d89f630bcb3cf0f6b0d45f8fc4230c03ea4df6c
parent8ba729708446b87baf9ec19cc25d9726ea247db7 (diff)
downloadflashlight22-e9c3671e0408e0dfb09481280deac27e6153b514.tar.gz
flashlight22-e9c3671e0408e0dfb09481280deac27e6153b514.zip
Add changes to presentation
-rw-r--r--assets/imperial_logo.pdfbin0 -> 3623 bytes
-rw-r--r--flashlight22.org390
-rw-r--r--setup.org2
3 files changed, 322 insertions, 70 deletions
diff --git a/assets/imperial_logo.pdf b/assets/imperial_logo.pdf
new file mode 100644
index 0000000..858d2ee
--- /dev/null
+++ b/assets/imperial_logo.pdf
Binary files differ
diff --git a/flashlight22.org b/flashlight22.org
index ae68e33..ec4921a 100644
--- a/flashlight22.org
+++ b/flashlight22.org
@@ -1,5 +1,5 @@
-#+title: Formal Verification of High-Level Synthesis
-#+author: \underline{Yann Herklotz}, John Wickerson
+#+title: Verifying Software Pipelining to Approximate Hardware Pipelining in Verified High-Level Synthesis
+#+author: Yann Herklotz
#+options: H:2 toc:nil
#+columns: %45ITEM %10BEAMER_ENV(Env) %10BEAMER_ACT(Act) %4BEAMER_COL(Col)
#+setupfile: setup.org
@@ -16,54 +16,88 @@
- Main difficulty with having hardware as a target is the need to pipeline loops.
-*** Column left
+*** Minipage
:PROPERTIES:
-:BEAMER_COL: 0.45
+:BEAMER_ENV: minipage
+:BEAMER_OPT: \textwidth
+:END:
+
+**** Original Code
+:PROPERTIES:
+:BEAMER_ENV: onlyenvNH
+:BEAMER_ACT: 1
+:END:
+
+#+begin_src c
+for (int i = 3; i < N; i++)
+ acc[i] = acc[i-3]*c+x[i]*y[i];
+#+end_src
+
+**** Transformed code
+:PROPERTIES:
+:BEAMER_ENV: onlyenvNH
+:BEAMER_ACT: 2
:END:
#+attr_latex: :options fontsize=\small,escapeinside=||
#+begin_src c
-for (int i = 0; i < N; i++) {
-|\sA{1}| x18 = i - 1
-|\sR{1}| x16 = load[1, x18 * 4]
+for (int i = 3; i < N; i++) {
+|\sA{1}| x18 = i - 3
+|\sR{1}| x16 = load[1, x18]
|\sM{1}| x8 = x16 * x1
-|\sR{2}| x12 = load[3, i * 4]
-|\sR{3}| x13 = load[2, i * 4]
+|\sR{2}| x12 = load[3, i]
+|\sR{3}| x13 = load[2, i]
|\sM{2}| x7 = x12 * x13
|\sA{2}| x11 = x8 + x7
-|\sW{1}| store[1, i * 4] = x11
+|\sW{1}| store[1, i] = x11
i = i + 1
}
#+end_src
-*** Column right
+**** Final DFG
:PROPERTIES:
-:BEAMER_COL: 0.45
+:BEAMER_ENV: onlyenvNH
+:BEAMER_ACT: 3
:END:
+#+attr_latex: :options xleftmargin=0.3\textwidth
+#+begin_src c
+for (int i = 3; i < N; i++) {
+#+end_src
+
#+begin_export latex
-\begin{tikzpicture}[lnode/.style={circle,draw=black,minimum size=4mm,scale=0.8},nlabel/.style={midway,right
-,font=\tiny},node distance=1.3cm,shorten >=1pt]
- \node[lnode,fill=s1col] (A1) {1};
- \node[lnode,fill=s2col,below of=A1] (R1) {1};
- \node[lnode,fill=s3col,below of=R1] (M1) {1};
- \node[lnode,fill=s2col,right of=A1] (R2) {2};
- \node[lnode,fill=s2col,right of=R2] (R3) {3};
- \node[lnode,fill=s3col,below of=R2,xshift=0.7cm] (M2) {2};
- \node[lnode,fill=s1col,below right of=M1,xshift=0.4cm] (A2) {2};
- \node[lnode,fill=s4col,below of=A2] (W1) {1};
-
- \draw[->,thick] (A1) -- (R1);
- \draw[->,thick] (R1) -- node[nlabel] {2} (M1);
- \draw[->,thick] (R2) -- node[nlabel] {2} (M2);
- \draw[->,thick] (R3) -- node[nlabel] {2} (M2);
- \draw[->,thick] (M1) -- (A2);
- \draw[->,thick] (M2) -- (A2);
- \draw[->,thick] (A2) -- (W1);
- \draw[->,thick] (R1) to [out=220,in=180,loop,looseness=2] (W1);
-\end{tikzpicture}
+\begin{center}
+ \begin{tikzpicture}[lnode/.style={circle,draw=black,minimum
+ size=4mm,scale=0.7},nlabel/.style={midway,right ,font=\tiny},node distance=1.3cm,shorten
+ >=1pt]
+ \node[lnode,fill=s1col] (A1) {1};
+ \node[lnode,fill=s2col,below of=A1] (R1) {1};
+ \node[lnode,fill=s3col,below of=R1] (M1) {1};
+ \node[lnode,fill=s2col,right of=A1] (R2) {2};
+ \node[lnode,fill=s2col,right of=R2] (R3) {3};
+ \node[lnode,fill=s3col,below of=R2,xshift=0.7cm] (M2) {2};
+ \node[lnode,fill=s1col,below right of=M1,xshift=0.4cm] (A2) {2};
+ \node[lnode,fill=s4col,below of=A2] (W1) {1};
+
+ \draw[->,thick] (A1) -- (R1);
+ \draw[->,thick,dashed] (R1) -- (M1);
+ \draw[->,thick,dashed] (R2) -- (M2);
+ \draw[->,thick,dashed] (R3) -- (M2);
+ \draw[->,thick] (M1) -- (A2);
+ \draw[->,thick] (M2) -- (A2);
+ \draw[->,thick] (A2) -- (W1);
+ \draw[->,thick] (W1) to [out=220,in=180,loop,looseness=2] node [midway,right] {3} (R1);
+ \end{tikzpicture}
+\end{center}
#+end_export
+#+beamer: \vspace{-4em}
+
+#+attr_latex: :options xleftmargin=0.3\textwidth
+#+begin_src c
+}
+#+end_src
+
** Ideas Behind Static Hardware Loop Pipelining
*** One Possible Workflow
@@ -99,52 +133,230 @@ for (int i = 0; i < N; i++) {
* Verifying Loop Pipelining
** Ideas Behind Software Loop Pipelining
-*** Main Idea
+*** Schedule
+:PROPERTIES:
+:BEAMER_ENV: onlyenvNH
+:BEAMER_ACT: 1-4
+:END:
-Can perform a source-to-source transformation which generates a pipeline purely in software.
+In software pipelining we represent a vertical slice of the pipeline.
-*** Column left
+#+begin_export latex
+\begin{center}
+ \begin{tikzpicture}[lnode/.style={circle,draw=black,minimum size=4mm,scale=0.4},node
+ distance=1cm,shorten >=1pt]
+ \node[lnode,fill=s1col] (A1) {1};
+ \node[lnode,fill=s2col,below of=A1] (R2) {2};
+ \node[lnode,fill=s2col,below of=R2] (R3) {3};
+ \node[lnode,fill=s2col,right of=A1] (R1) {1};
+ \node[lnode,fill=s3col,right of=R1] (M2) {2};
+ \node[lnode,fill=s3col,right of=M2] (M1) {1};
+ \node[lnode,fill=s1col,right of=M1] (A2) {2};
+ \node[lnode,right of=A2] (E1) {};
+ \node[lnode,fill=s4col,right of=E1] (W1) {1};
+ \node[lnode,right of=W1] (E2) {};
+ \begin{scope}[xshift=0.8cm,yshift=-0.5cm]
+ \node[lnode,fill=s1col] (t1A1) {1};
+ \node[lnode,fill=s2col,below of=t1A1] (t1R2) {2};
+ \node[lnode,fill=s2col,below of=t1R2] (t1R3) {3};
+ \node[lnode,fill=s2col,right of=t1A1] (t1R1) {1};
+ \node[lnode,fill=s3col,right of=t1R1] (t1M2) {2};
+ \node[lnode,fill=s3col,right of=t1M2] (t1M1) {1};
+ \node[lnode,fill=s1col,right of=t1M1] (t1A2) {2};
+ \node[lnode,right of=t1A2] (t1E1) {};
+ \node[lnode,fill=s4col,right of=t1E1] (t1W1) {1};
+ \node[lnode,right of=t1W1] (t1E2) {};
+ \end{scope}
+ \begin{scope}[xshift=1.6cm,yshift=-1cm]
+ \node[lnode,fill=s1col] (t2A1) {1};
+ \node[lnode,fill=s2col,below of=t2A1] (t2R2) {2};
+ \node[lnode,fill=s2col,below of=t2R2] (t2R3) {3};
+ \node[lnode,fill=s2col,right of=t2A1] (t2R1) {1};
+ \node[lnode,fill=s3col,right of=t2R1] (t2M2) {2};
+ \node[lnode,fill=s3col,right of=t2M2] (t2M1) {1};
+ \node[lnode,fill=s1col,right of=t2M1] (t2A2) {2};
+ \node[lnode,right of=t2A2] (t2E1) {};
+ \node[lnode,fill=s4col,right of=t2E1] (t2W1) {1};
+ \node[lnode,right of=t2W1] (t2E2) {};
+ \end{scope}
+ \begin{scope}[xshift=2.4cm,yshift=-1.5cm]
+ \node[lnode,fill=s1col] (t3A1) {1};
+ \node[lnode,fill=s2col,below of=t3A1] (t3R2) {2};
+ \node[lnode,fill=s2col,below of=t3R2] (t3R3) {3};
+ \node[lnode,fill=s2col,right of=t3A1] (t3R1) {1};
+ \node[lnode,fill=s3col,right of=t3R1] (t3M2) {2};
+ \node[lnode,fill=s3col,right of=t3M2] (t3M1) {1};
+ \node[lnode,fill=s1col,right of=t3M1] (t3A2) {2};
+ \node[lnode,right of=t3A2] (t3E1) {};
+ \node[lnode,fill=s4col,right of=t3E1] (t3W1) {1};
+ \node[lnode,right of=t3W1] (t3E2) {};
+ \end{scope}
+ \node[above right of=R1,xshift=-0.5cm] (S1T) {};
+ \node[below of=S1T,yshift=-2.8cm] (S1B) {};
+ \draw[thick] (S1T) -- (S1B);
+ \node[above right of=M1,xshift=-0.5cm] (S2T) {};
+ \node[below of=S2T,yshift=-2.8cm] (S2B) {};
+ \draw[thick] (S2T) -- (S2B);
+ \node[above right of=E1,xshift=-0.5cm] (S3T) {};
+ \node[below of=S3T,yshift=-2.8cm] (S3B) {};
+ \draw[thick] (S3T) -- (S3B);
+ \node[above right of=E2,xshift=-0.5cm] (S4T) {};
+ \node[below of=S4T,yshift=-2.8cm] (S4B) {};
+ \draw[thick] (S4T) -- (S4B);
+ \node[right of=S4T,xshift=-0.2cm] (S5T) {};
+ \node[below of=S5T,yshift=-2.8cm] (S5B) {};
+ \draw[thick] (S5T) -- (S5B);
+ \node[right of=S5T,xshift=-0.2cm] (S6T) {};
+ \node[below of=S6T,yshift=-2.8cm] (S6B) {};
+ \draw[thick] (S6T) -- (S6B);
+
+ \only<2>{\draw[very thick,red] ($(W1.north west)+(-0.1,0.1)$) rectangle ($(t3R3.south east)+(0.1,-0.1)$);}
+ \only<3>{\draw[very thick,red] ($(E2.north west)+(-0.1,0.1)$) rectangle ($(t3R1.south east)+(0.1,-0.1)$);}
+ \only<4>{\draw[very thick,red] ($(A1.north west)+(-0.1,0.1)$) rectangle (t3R2.west);}
+ \only<4>{\draw[very thick,red] ($(t1W1.north west)+(-0.1,0.1)$) rectangle ($(t3E2.south east)+(0.1,-0.1)$);}
+ \end{tikzpicture}
+\end{center}
+#+end_export
+
+*** Columns
+:PROPERTIES:
+:BEAMER_ENV: onlyenvNH
+:BEAMER_ACT: 5-
+:END:
+
+**** Main idea
+:PROPERTIES:
+:BEAMER_ENV: onlyenvNH
+:BEAMER_ACT: 5
+:END:
+
+- Source-to-source transformation to generate a *pipeline* in *software*.
+- Use *rotating register file* to avoid unrolling due to *modulo variable expansion*.
+
+**** Need to add the epilogue and prologue
+:PROPERTIES:
+:BEAMER_ENV: onlyenvNH
+:BEAMER_ACT: 6
+:END:
+
+- Use *predicated execution* to avoid adding explicit prologue and epilogue.
+
+**** Column left
:PROPERTIES:
:BEAMER_COL: 0.45
:END:
-#+attr_latex: :options fontsize=\small,escapeinside=||
+***** Initial loop
+:PROPERTIES:
+:BEAMER_ENV: onlyenvNH
+:BEAMER_ACT: 5
+:END:
+
+#+attr_latex: :options xleftmargin=0.3\textwidth
+#+begin_src c
+for (int i = 3; i < N; i++) {
+#+end_src
+
+#+begin_export latex
+\begin{center}
+ \begin{tikzpicture}[lnode/.style={circle,draw=black,minimum
+ size=4mm,scale=0.7},nlabel/.style={midway,right ,font=\tiny},node distance=1.3cm,shorten
+ >=1pt]
+ \node[lnode,fill=s1col] (A1) {1};
+ \node[lnode,fill=s2col,below of=A1] (R1) {1};
+ \node[lnode,fill=s3col,below of=R1] (M1) {1};
+ \node[lnode,fill=s2col,right of=A1] (R2) {2};
+ \node[lnode,fill=s2col,right of=R2] (R3) {3};
+ \node[lnode,fill=s3col,below of=R2,xshift=0.7cm] (M2) {2};
+ \node[lnode,fill=s1col,below right of=M1,xshift=0.4cm] (A2) {2};
+ \node[lnode,fill=s4col,below of=A2] (W1) {1};
+
+ \draw[->,thick] (A1) -- (R1);
+ \draw[->,thick,dashed] (R1) -- (M1);
+ \draw[->,thick,dashed] (R2) -- (M2);
+ \draw[->,thick,dashed] (R3) -- (M2);
+ \draw[->,thick] (M1) -- (A2);
+ \draw[->,thick] (M2) -- (A2);
+ \draw[->,thick] (A2) -- (W1);
+ \draw[->,thick] (W1) to [out=220,in=180,loop,looseness=2] node [midway,right] {3} (R1);
+ \end{tikzpicture}
+\end{center}
+#+end_export
+
+#+beamer: \vspace{-4em}
+
+#+attr_latex: :options xleftmargin=0.3\textwidth
#+begin_src c
-for (int i = 0; i < N; i++) {
-|\sA{1}| x18 = i - 1
-|\sR{1}| x16 = load[1, x18 * 4]
-|\sM{1}| x8 = x16 * x1
-|\sR{2}| x12 = load[3, i * 4]
-|\sR{3}| x13 = load[2, i * 4]
-|\sM{2}| x7 = x12 * x13
-|\sA{2}| x11 = x8 + x7
-|\sW{1}| store[1, i * 4] = x11
- i = i + 1
}
#+end_src
-*** Column right
+***** Execution control
+:PROPERTIES:
+:BEAMER_ENV: onlyenvNH
+:BEAMER_ACT: 6
+:END:
+
+#+begin_src c
+if (i < N): p0 = true
+|| if p0: p1 = true
+|| if p1: p2 = true
+|| if p2: p3 = true
+if (i >= N): p0 = false
+|| if !p0: p1 = false
+|| if !p1: p2 = false
+|| if !p2: p3 = false
+#+end_src
+
+**** Column right
:PROPERTIES:
:BEAMER_COL: 0.45
:END:
-#+attr_latex: :options fontsize=\footnotesize,escapeinside=||
+***** Initial converted
+:PROPERTIES:
+:BEAMER_ENV: onlyenvNH
+:BEAMER_ACT: 5
+:END:
+
+#+attr_latex: :options escapeinside=||
#+begin_src c
-for (int i = 0; i < N; i++) {
-|\sA{1}| x18[i] = i - 1
-|\sR{2}| x12[i] = load[3, i * 4]
-|\sR{3}| x13[i] = load[2, i * 4]
-|\sM{2}| x7[i-1] = x12[i-1] * x13[i-1]
-|\sA{2}| x11[i-2] = x8[i-2] + x7[i-2]
-|\sW{1}| store[1, (i-3) * 4] = x11[i-3]
-
-|\sR{1}| x16[i] = load[1, x18[i] * 4]
-|\sM{1}| x8[i-1] = x16[i-1] * x1
- i = i + 1
+for (int i = 3; i < N; i++) {
+ |\sA{1}|[i]
+ |\sR{2}|[i]
+ |\sR{3}|[i]
+ |\sM{2}|[i-1]
+ |\sA{2}|[i-2]
+ |\sW{1}|[i-3]
+
+ |\sR{1}|[i]
+ |\sM{1}|[i-1]
+ i = i + 1
}
#+end_src
-** Verifying Software Loop Pipelining
+***** With predicates
+:PROPERTIES:
+:BEAMER_ENV: onlyenvNH
+:BEAMER_ACT: 6
+:END:
+
+#+attr_latex: :options escapeinside=||
+#+begin_src c
+for (int i = 3; i < N+4; i++) {
+ if p0: |\sA{1}|[i]
+ if p0: |\sR{2}|[i]
+ if p0: |\sR{3}|[i]
+ if p1: |\sM{2}|[i-1]
+ if p2: |\sA{2}|[i-2]
+ if p3: |\sW{1}|[i-3]
+
+ if p0: |\sR{1}|[i]
+ if p1: |\sM{1}|[i-1]
+ i = i + 1
+}
+#+end_src
+
+** Use Symbolic Execution to Verify the Transformation
*** Symbolic Execution
@@ -152,34 +364,74 @@ Define an $\alpha$, such that $\alpha(\mathcal{C})$ evaluates some code $\mathca
*symbolic states* for all registers.
*** Example
+:PROPERTIES:
+:BEAMER_ACT: 2-
+:END:
Executing the following code will evaluate to the following symbolic code:
-*** Column left
-:PROPERTIES:
-:BEAMER_COL: 0.45
-:END:
+#+begin_export latex
+\begin{equation*}
+\alpha
+\left(\begin{aligned}
+ & \texttt{x = 2}\\
+ & \texttt{y = x + z}
+\end{aligned}\right)\qquad
+ =\qquad
+\begin{aligned}
+ &\texttt{x}\mapsto 2\\
+ &\texttt{y}\mapsto 2 + \texttt{z}^0
+\end{aligned}
+\end{equation*}
+#+end_export
+
+** Verifying Software Loop Pipelining
+For a loop $\mathcal{L}_1$ and a pipelined loop $\mathcal{L}_2$, we want to prove:
+
+#+begin_export latex
+\begin{equation*}
+ \forall N, \alpha(\mathcal{L}_{1}^{N}) = \alpha(\mathcal{L}_{2}^{N})
+\end{equation*}
+#+end_export
+- This is not feasible as $N$ is often not known statically
-*** Column right
+*** Actual proof
:PROPERTIES:
-:BEAMER_COL: 0.45
+:BEAMER_ENV: blockNH
+:BEAMER_ACT: 2
:END:
+It is enough to prove various static properties:
+
* Comparing Software and Hardware Loop Pipelining
+
** Comparing Pipelines in Hardware and Software
-*** Pipelines can be identical
+*** Representation
+
+- Hardware pipelining :: each instruction is put into a state and it is filled with data at the
+ correct II.
+- Software pipelining :: the code represents the kernel of the pipeline, expressing each repeating
+ instruction.
+
+*** Pipelines themselvs are identical
- In terms of expressivity, both hardware and software pipelining can express the same loop
pipelines.
** Resource Usage of Pipelines
-Naively, They will both contain
+- With some *hardware support* software pipeline resource usage can get close to hardware pipeline
+ resource usage.
+- Hardware pipelining is still simpler as filling a pipeline at an II is straightforward.
* Wrapping up
** Conclusion
-- To verify loop pipelines, it seems easier
+- Verifying hardware pipelining together with scheduling is difficult.
+ + Too many instructions move around and their positions need to be recovered.
+- By doing *software pipelining* followed by *instruction scheduling* and *hardware generation*,
+ hardware pipelines can be approximated.
+ + Same pipeline but with higher resource usage.
diff --git a/setup.org b/setup.org
index 67e3696..43dfc58 100644
--- a/setup.org
+++ b/setup.org
@@ -13,7 +13,7 @@
#+latex_header_extra: \DisableLigatures{encoding=T1, family=tt*}
#+latex_header_extra: \usemintedstyle{manni}
-#+latex_header_extra: \setminted{fontsize=\small}
+#+latex_header_extra: \setminted{fontsize=\footnotesize,xleftmargin=.2\textwidth,xrightmargin=.2\textwidth}
# #+latex_header_extra: \institute[shortinst]{Imperial College London}
#+latex_header_extra: \AtBeginSection[]{\begin{frame}\frametitle{Outline}\tableofcontents[currentsection]\end{frame}}
#+latex_header_extra: \usetikzlibrary{shapes,calc,arrows.meta}