From 8ba729708446b87baf9ec19cc25d9726ea247db7 Mon Sep 17 00:00:00 2001 From: Yann Herklotz Date: Sun, 15 May 2022 15:30:49 -0400 Subject: Add initial files --- flashlight22.org | 185 +++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 185 insertions(+) create mode 100644 flashlight22.org (limited to 'flashlight22.org') diff --git a/flashlight22.org b/flashlight22.org new file mode 100644 index 0000000..ae68e33 --- /dev/null +++ b/flashlight22.org @@ -0,0 +1,185 @@ +#+title: Formal Verification of High-Level Synthesis +#+author: \underline{Yann Herklotz}, John Wickerson +#+options: H:2 toc:nil +#+columns: %45ITEM %10BEAMER_ENV(Env) %10BEAMER_ACT(Act) %4BEAMER_COL(Col) +#+setupfile: setup.org + +** Why HLS is unreliable +** Small Introduction to Coq +** Solution: Formally Verify HLS +*** Vericert +** Current Status of Vericert +* Loop Pipelining +** The Need for Loop Pipelining + +# - really useful, or you might say necessary optimisation that HLS performs. + +- Main difficulty with having hardware as a target is the need to pipeline loops. + +*** Column left +:PROPERTIES: +:BEAMER_COL: 0.45 +:END: + +#+attr_latex: :options fontsize=\small,escapeinside=|| +#+begin_src c +for (int i = 0; i < N; i++) { +|\sA{1}| x18 = i - 1 +|\sR{1}| x16 = load[1, x18 * 4] +|\sM{1}| x8 = x16 * x1 +|\sR{2}| x12 = load[3, i * 4] +|\sR{3}| x13 = load[2, i * 4] +|\sM{2}| x7 = x12 * x13 +|\sA{2}| x11 = x8 + x7 +|\sW{1}| store[1, i * 4] = x11 + i = i + 1 +} +#+end_src + +*** Column right +:PROPERTIES: +:BEAMER_COL: 0.45 +:END: + +#+begin_export latex +\begin{tikzpicture}[lnode/.style={circle,draw=black,minimum size=4mm,scale=0.8},nlabel/.style={midway,right +,font=\tiny},node distance=1.3cm,shorten >=1pt] + \node[lnode,fill=s1col] (A1) {1}; + \node[lnode,fill=s2col,below of=A1] (R1) {1}; + \node[lnode,fill=s3col,below of=R1] (M1) {1}; + \node[lnode,fill=s2col,right of=A1] (R2) {2}; + \node[lnode,fill=s2col,right of=R2] (R3) {3}; + \node[lnode,fill=s3col,below of=R2,xshift=0.7cm] (M2) {2}; + \node[lnode,fill=s1col,below right of=M1,xshift=0.4cm] (A2) {2}; + \node[lnode,fill=s4col,below of=A2] (W1) {1}; + + \draw[->,thick] (A1) -- (R1); + \draw[->,thick] (R1) -- node[nlabel] {2} (M1); + \draw[->,thick] (R2) -- node[nlabel] {2} (M2); + \draw[->,thick] (R3) -- node[nlabel] {2} (M2); + \draw[->,thick] (M1) -- (A2); + \draw[->,thick] (M2) -- (A2); + \draw[->,thick] (A2) -- (W1); + \draw[->,thick] (R1) to [out=220,in=180,loop,looseness=2] (W1); +\end{tikzpicture} +#+end_export + +** Ideas Behind Static Hardware Loop Pipelining + +*** One Possible Workflow + +- Generate scheduling constraints for linear code as well as loops. +- Solve for a scheduling using an ILP solver. +- Place the instructions into the cycle that it was assigned to. + +#+begin_export latex +\begin{center} + \begin{tikzpicture}[lnode/.style={circle,draw=black,minimum size=4mm,scale=0.8},node + distance=1.3cm,shorten >=1pt] + \node[lnode,fill=s1col] (A1) {1}; \node[lnode,fill=s2col,below of=A1] (R2) {2}; + \node[lnode,fill=s2col,below of=R2] (R3) {3}; \node[lnode,fill=s2col,right of=A1] (R1) {1}; + \node[lnode,fill=s3col,right of=R1] (M2) {2}; \node[lnode,fill=s3col,right of=M2] (M1) {1}; + \node[lnode,fill=s1col,right of=M1] (A2) {2}; \node[lnode,right of=A2] (E1) {}; + \node[lnode,fill=s4col,right of=E1] (W1) {1}; \node[lnode,right of=W1] (E2) {}; \node[above + right of=R1,xshift=-0.35cm] (S1T) {}; \node[below of=S1T,yshift=-2.5cm] (S1B) {}; \draw[very + thick] (S1T) -- (S1B); \node[above right of=M1,xshift=-0.35cm] (S2T) {}; \node[below + of=S2T,yshift=-2.5cm] (S2B) {}; \draw[very thick] (S2T) -- (S2B); \node[above right + of=E1,xshift=-0.35cm] (S3T) {}; \node[below of=S3T,yshift=-2.5cm] (S3B) {}; \draw[very thick] + (S3T) -- (S3B); + \end{tikzpicture} +\end{center} +#+end_export + +** Verifying Hardware Pipelining is Difficult + +- Normally part of the scheduling step. +- Lose control about how the loops are translated, the fundamental structure of the loop could + change and would be difficult to identify again. + +* Verifying Loop Pipelining +** Ideas Behind Software Loop Pipelining + +*** Main Idea + +Can perform a source-to-source transformation which generates a pipeline purely in software. + +*** Column left +:PROPERTIES: +:BEAMER_COL: 0.45 +:END: + +#+attr_latex: :options fontsize=\small,escapeinside=|| +#+begin_src c +for (int i = 0; i < N; i++) { +|\sA{1}| x18 = i - 1 +|\sR{1}| x16 = load[1, x18 * 4] +|\sM{1}| x8 = x16 * x1 +|\sR{2}| x12 = load[3, i * 4] +|\sR{3}| x13 = load[2, i * 4] +|\sM{2}| x7 = x12 * x13 +|\sA{2}| x11 = x8 + x7 +|\sW{1}| store[1, i * 4] = x11 + i = i + 1 +} +#+end_src + +*** Column right +:PROPERTIES: +:BEAMER_COL: 0.45 +:END: + +#+attr_latex: :options fontsize=\footnotesize,escapeinside=|| +#+begin_src c +for (int i = 0; i < N; i++) { +|\sA{1}| x18[i] = i - 1 +|\sR{2}| x12[i] = load[3, i * 4] +|\sR{3}| x13[i] = load[2, i * 4] +|\sM{2}| x7[i-1] = x12[i-1] * x13[i-1] +|\sA{2}| x11[i-2] = x8[i-2] + x7[i-2] +|\sW{1}| store[1, (i-3) * 4] = x11[i-3] + +|\sR{1}| x16[i] = load[1, x18[i] * 4] +|\sM{1}| x8[i-1] = x16[i-1] * x1 + i = i + 1 +} +#+end_src + +** Verifying Software Loop Pipelining + +*** Symbolic Execution + +Define an $\alpha$, such that $\alpha(\mathcal{C})$ evaluates some code $\mathcal{C}$ and returns +*symbolic states* for all registers. + +*** Example + +Executing the following code will evaluate to the following symbolic code: + +*** Column left +:PROPERTIES: +:BEAMER_COL: 0.45 +:END: + + + +*** Column right +:PROPERTIES: +:BEAMER_COL: 0.45 +:END: + +* Comparing Software and Hardware Loop Pipelining +** Comparing Pipelines in Hardware and Software + +*** Pipelines can be identical + +- In terms of expressivity, both hardware and software pipelining can express the same loop + pipelines. + +** Resource Usage of Pipelines + +Naively, They will both contain + +* Wrapping up +** Conclusion + +- To verify loop pipelines, it seems easier -- cgit