Add algorithm

author: Yann Herklotz <git@yannherklotz.com> 2020-06-30 22:54:28 +0100
committer: Yann Herklotz <git@yannherklotz.com> 2020-06-30 22:54:28 +0100
commit: 89135e34402f3cd58892a028db8e75198e3f8ae2 (patch)
tree: c627ff97cc63c61b459954ea0ba1851f75d505db /algorithm.tex
parent: ffba05577aae83b7e3e9c041bc772d74c4bc5a1c (diff)
download: oopsla21_fvhls-89135e34402f3cd58892a028db8e75198e3f8ae2.tar.gz
oopsla21_fvhls-89135e34402f3cd58892a028db8e75198e3f8ae2.zip
1 files changed, 147 insertions, 37 deletions
diff --git a/algorithm.tex b/algorithm.tex
index 31bf6bb..1b84009 100644
--- a/algorithm.tex
+++ b/algorithm.tex
@@ -33,23 +33,163 @@ Existing HLS compilers usually use LLVM IR as an intermediate representation whe
 
 %%TODO: Maybe add why LTL and the other smaller languages are not that well suited
 
+\subsection{Example}
+
 \begin{figure}
   \centering
-  \begin{minipage}{0.5\linewidth}
+  \begin{subfigure}[b]{0.4\linewidth}
 \begin{minted}{c}
 int main() {
-    int x[5] = {1, 2, 3, 4, 5};
-    int sum = 0;
-    for (int i = 0; i < 5; i++)
+    int x[3] = {1, 2, 3};
+    int sum = 0, incr = 1;
+    for (int i = 0; i < 3; i=i+incr)
         sum += x[i];
     return sum;
 }
 \end{minted}
-  \end{minipage}
-  \caption{Accumulator example C code.}\label{fig:accumulator_c}
+    \caption{Accumulator C code.}\label{fig:accumulator_c}
+  \end{subfigure}%
+  \begin{subfigure}[b]{0.6\linewidth}
+\begin{minted}[fontsize=\footnotesize]{c}
+main() {
+   19:	x9 = 1
+   18:	int32[stack(0)] = x9
+   17:	x8 = 2
+   16:	int32[stack(4)] = x8
+   15:	x7 = 3
+   14:	int32[stack(8)] = x7
+   13:	x3 = 0
+   12:	x2 = 1
+   11:	x1 = 0
+   10:	nop
+    9:	if (x1 <s 3) goto 8 else goto 3
+    8:	x6 = stack(0) (int)
+    7:	x5 = int32[x6 + x1 * 4 + 0]
+    6:	x3 = x3 + x5 + 0 (int)
+    5:	x1 = x1 + x2 + 0 (int)
+    4:	goto 9
+    3:	x4 = x3 goto 1
+    2:	x4 = 0
+    1:	return x4
+}
+\end{minted}
+    \caption{Accumulator C code.}\label{fig:accumulator_rtl}
+  \end{subfigure}
+  \caption{Accumulator example using CompCert to translate from C to RTL.}\label{fig:accumulator}
 \end{figure}
 
-To describe the translation, we start with an example of how to translate a simple accumulator example, which is shown in figure~\ref{fig:accumulator_c}.  Using this example, the different stages in the translation can be explained, together with how they were proven.  The example includes constructs such as arrays, for loops and addition, however, CoqUp also supports all other operators like multiplication and division, as well as conditional statements.  Function calls are also supported by inlining all of the functions, meaning recursive function calls are not supported.
+\begin{figure}
+  \centering
+  \begin{subfigure}[b]{0.5\linewidth}
+\begin{minted}[fontsize=\tiny]{systemverilog}
+main() {
+  datapath {
+   16:	reg_9 <= 32'd1;
+   15:	reg_13[32'd0] <= reg_9;
+   14:	reg_8 <= 32'd2;
+   13:	reg_13[32'd1] <= reg_8;
+   12:	reg_7 <= 32'd3;
+   11:	reg_13[32'd2] <= reg_7;
+   10:	reg_3 <= 32'd0;
+    9:	;
+    8:	reg_1 <= 32'd0;
+    7:	reg_6 <= 32'd0;
+    6:	reg_5 <= reg_13[{{{reg_6 + 32'd0}
+                        + {reg_1 * 32'd4}} / 32'd4}];
+    5:	reg_3 <= {reg_3 + {reg_5 + 32'd0}};
+    4:	reg_1 <= {reg_1 + 32'd1};
+    3:	;
+    2:	reg_4 <= reg_3;
+    1:	reg_11 <= 1'd1; reg_12 <= reg_4;
+  }
+
+  controllogic {
+   16:	reg_10 <= 32'd15;
+   15:	reg_10 <= 32'd14;
+   14:	reg_10 <= 32'd13;
+   13:	reg_10 <= 32'd12;
+   12:	reg_10 <= 32'd11;
+   11:	reg_10 <= 32'd10;
+   10:	reg_10 <= 32'd9;
+    9:	reg_10 <= 32'd8;
+    8:	reg_10 <= 32'd7;
+    7:	reg_10 <= 32'd6;
+    6:	reg_10 <= 32'd5;
+    5:	reg_10 <= 32'd4;
+    4:	reg_10 <= 32'd3;
+    3:	reg_10 <= ({$signed(reg_1) < $signed(32'd3)}
+                        ? 32'd7 : 32'd2);
+    2:	reg_10 <= 32'd1;
+    1:	;
+  }
+}
+\end{minted}
+    \caption{Accumulator C code.}\label{fig:accumulator_c}
+  \end{subfigure}%
+  \begin{subfigure}[b]{0.5\linewidth}
+\begin{minted}[fontsize=\tiny]{systemverilog}
+module main(reg_14, reg_15, reg_16, reg_11, reg_12);
+  always @(posedge reg_16)
+    if ({reg_15 == 1'd1})
+      reg_10 <= 32'd16;
+    else
+      case (reg_10)
+        32'd16: reg_10 <= 32'd15;
+        32'd8: reg_10 <= 32'd7;
+        32'd4: reg_10 <= 32'd3;
+        32'd12: reg_10 <= 32'd11;
+        32'd2: reg_10 <= 32'd1;
+        32'd10: reg_10 <= 32'd9;
+        32'd6: reg_10 <= 32'd5;
+        32'd14: reg_10 <= 32'd13;
+        32'd1: ;
+        32'd9: reg_10 <= 32'd8;
+        32'd5: reg_10 <= 32'd4;
+        32'd13: reg_10 <= 32'd12;
+        32'd3: reg_10 <= ({$signed(reg_1) < $signed(32'd3)}
+                               ? 32'd7 : 32'd2);
+        32'd11: reg_10 <= 32'd10;
+        32'd7: reg_10 <= 32'd6;
+        32'd15: reg_10 <= 32'd14;
+        default:;
+      endcase
+  always @(posedge reg_16)
+    case (reg_10)
+      32'd16: reg_9 <= 32'd1;
+      32'd8: reg_1 <= 32'd0;
+      32'd4: reg_1 <= {reg_1 + 32'd1};
+      32'd12: reg_7 <= 32'd3;
+      32'd2: reg_4 <= reg_3;
+      32'd10: reg_3 <= 32'd0;
+      32'd6: reg_5 <= reg_13[{{{reg_6 + 32'd0}
+                             + {reg_1 * 32'd4}} / 32'd4}];
+      32'd14: reg_8 <= 32'd2;
+      32'd1: begin reg_11 = 1'd1; reg_12 = reg_4; end
+      32'd9: ;
+      32'd5: reg_3 <= {reg_3 + {reg_5 + 32'd0}};
+      32'd13: reg_13[32'd1] <= reg_8;
+      32'd3: ;
+      32'd11: reg_13[32'd2] <= reg_7;
+      32'd7: reg_6 <= 32'd0;
+      32'd15: reg_13[32'd0] <= reg_9;
+      default:;
+    endcase
+  reg [31:0] reg_13 [2:0];
+  input [0:0] reg_16, reg_14, reg_15;
+  reg [31:0] reg_8, reg_4, reg_10, reg_6,
+             reg_1, reg_9, reg_5, reg_3, reg_7;
+  output reg [31:0] reg_12;
+  output reg [0:0] reg_11;
+endmodule
+\end{minted}
+    \caption{Accumulator C code.}\label{fig:accumulator_rtl}
+  \end{subfigure}
+  \caption{Accumulator example using CompCert to translate from C to RTL.}\label{fig:accumulator}
+\end{figure}
+
+To describe the translation, we start with an example of how to translate a simple accumulator example, which is shown in figure~\ref{fig:accumulator}.  Using this example, the different stages in the translation can be explained, together with how they were proven.  The example includes constructs such as arrays, for loops and addition, however, CoqUp also supports all other operators like multiplication and division, as well as conditional statements.  Function calls are also supported by inlining all of the functions, meaning recursive function calls are not supported.
+
+The first step is to use the CompCert front end passes to convert the C code into RTL, which is the intermediate language that CompCert uses for most of its optimisations.  The translation is shown in figure~\ref{fig:accumulator}, where the RTL code is depicted in figure~\ref{fig:accumulator_rtl}.  After that, the code is
 
 \subsection{CompCert RTL}
 
@@ -63,36 +203,6 @@ All CompCert intermediate language follow the similar structure below:
 
 \noindent where function definitions can either be internal or external.  External functions are functions that are not defined in the current translation unit, and are therefore not part of the current translation.  The difference in between the CompCert intermediate languages is therefore how the internal function is defined, as that defines the structure of the language itself.
 
-\begin{figure}
-  \centering
-  \begin{minipage}{0.5\linewidth}
-\begin{minted}{c}
-main() {
-   19:	x10 = 1
-   18:	int32[stack(0)] = x10
-   17:	x9 = 2
-   16:	int32[stack(4)] = x9
-   15:	x8 = 3
-   14:	int32[stack(8)] = x8
-   13:	x7 = 4
-   12:	int32[stack(12)] = x7
-   11:	x6 = 5
-   10:	int32[stack(16)] = x6
-    9:	x2 = 0
-    8:	x1 = 0
-    7:	x5 = stack(0) (int)
-    6:	x4 = int32[x5 + x1 * 4 + 0]
-    5:	x2 = x2 + x4 + 0 (int)
-    4:	x1 = x1 + 1 (int)
-    3:	if (x1 <s 5) goto 7 else goto 2
-    2:	x3 = x2
-    1:	return x3
-}
-\end{minted}
-  \end{minipage}
-  \caption{Accumulator example RTL code.}\label{fig:accumulator_rtl}
-\end{figure}
-
 %% Describe RTL
 The accumulator example in RTL function definitions are a sequence of instructions encoded in a control-flow graph, with each instruction linking to the next instruction that should be executed.
author	Yann Herklotz <git@yannherklotz.com>	2020-06-30 22:54:28 +0100
committer	Yann Herklotz <git@yannherklotz.com>	2020-06-30 22:54:28 +0100
commit	89135e34402f3cd58892a028db8e75198e3f8ae2 (patch)
tree	c627ff97cc63c61b459954ea0ba1851f75d505db /algorithm.tex
parent	ffba05577aae83b7e3e9c041bc772d74c4bc5a1c (diff)
download	oopsla21_fvhls-89135e34402f3cd58892a028db8e75198e3f8ae2.tar.gz oopsla21_fvhls-89135e34402f3cd58892a028db8e75198e3f8ae2.zip