summaryrefslogtreecommitdiffstats
path: root/algorithm.tex
diff options
context:
space:
mode:
authorYann Herklotz <git@yannherklotz.com>2021-04-14 00:37:38 +0100
committerYann Herklotz <git@yannherklotz.com>2021-04-14 00:37:38 +0100
commit7c65e3ed3d9c8ef722aae52816d85e3486ca8de2 (patch)
tree0ee53f538cdcf03a0cdf0dd22e4936c53f3d08f6 /algorithm.tex
parent2e70aee3a563ca6c78c75be1922c9f657a3fc40a (diff)
downloadoopsla21_fvhls-7c65e3ed3d9c8ef722aae52816d85e3486ca8de2.tar.gz
oopsla21_fvhls-7c65e3ed3d9c8ef722aae52816d85e3486ca8de2.zip
Update main diagram
Diffstat (limited to 'algorithm.tex')
-rw-r--r--algorithm.tex113
1 files changed, 27 insertions, 86 deletions
diff --git a/algorithm.tex b/algorithm.tex
index deafece..6bc88d8 100644
--- a/algorithm.tex
+++ b/algorithm.tex
@@ -75,107 +75,48 @@ It has an unlimited number of pseudo-registers, and is represented as a control
\begin{figure}
\centering
- \begin{subfigure}[b]{0.24\linewidth}
-\begin{minted}{c}
-int main() {
- int x[3] = {1, 2, 3};
- int sum = 0;
- for (int i = 0;
- i < 3;
- i++)
- sum += x[i];
- return sum;
-}
-\end{minted}
- \caption{Input C code.}\label{fig:accumulator_c}
- \end{subfigure}
- \begin{subfigure}[b]{0.24\linewidth}
+ \begin{subfigure}[b]{0.3\linewidth}
\begin{minted}[fontsize=\footnotesize]{c}
main() {
- 15: x8 = 1
- 14: int32[stack(0)] = x8
- 13: x7 = 2
- 12: int32[stack(4)] = x7
- 11: x6 = 3
- 10: int32[stack(8)] = x6
- 9: x2 = 0
- 8: x1 = 0
- 7: x5 = stack(0) (int)
- 6: x4 = int32[x5 + x1 * 4 + 0]
- 5: x2 = x2 + x4 + 0 (int)
- 4: x1 = x1 + 1 (int)
- 3: if (x1 <s 3) goto 7 else goto 2
- 2: x3 = x2
- 1: return x3
+ x2 = 3
+ int32[stack(0)] = x2
+ x1 = int32[stack(0)]
+ return x1
}
\end{minted}
\caption{3AC produced by \compcert{}.}\label{fig:accumulator_rtl}
- \end{subfigure}
-\begin{subfigure}[b]{0.48\linewidth}
+ \end{subfigure}\hfill%
+\begin{subfigure}[b]{0.65\linewidth}
\vspace{1em}
\begin{minted}[fontsize=\tiny]{verilog}
module main(reset, clk, finish, return_val);
- reg [31:0] stack [2:0];
- input [0:0] clk, reset;
- output reg [31:0] return_val;
- output reg [0:0] finish;
- reg [31:0] reg_8, reg_4, state,
- reg_6, reg_1, reg_7,
- reg_5, reg_3;
+ input [0:0] clk, reset; output reg [31:0] return_val = 0; output reg [0:0] finish = 0;
+ reg [31:0] state = 0, d_out = 0, d_in = 0, reg_1 = 0, addr = 0, reg_2 = 0;
+ reg [0:0] en = 0, wr_en = 0, u_en = 0; reg [31:0] stack [0:0];
+ always @(negedge clk)
+ if ({u_en != en}) begin
+ if (wr_en) stack[addr] <= d_in; else d_out <= stack[addr];
+ en <= u_en;
+ end
always @(posedge clk)
case (state)
- 32'd15: reg_8 <= 32'd1;
- 32'd14: stack[32'd0] <= reg_8;
- 32'd13: reg_7 <= 32'd2;
- 32'd12: stack[32'd1] <= reg_7;
- 32'd11: reg_6 <= 32'd3;
- 32'd10: stack[32'd2] <= reg_7;
- 32'd9: reg_2 <= 32'd0;
- 32'd8: reg_1 <= 32'd0;
- 32'd7: reg_5 <= 32'd0;
- 32'd6: reg_4 <= stack[{{{reg_5 + 32'd0}
- + {reg_1 * 32'd4}} / 32'd4}];
- 32'd5: reg_2 <= {reg_2 + {reg_4 + 32'd0}};
- 32'd4: reg_1 <= {reg_1 + 32'd1};
- 32'd3: ;
- 32'd2: reg_3 <= reg_2;
- 32'd1: begin
- finish = 1'd1;
- return_val = reg_3;
- end
+ 32'd6: reg_1 <= d_out;
+ 32'd4: reg_2 <= 32'd3;
+ 32'd3: begin u_en <= ( ! u_en); wr_en <= 32'd1; d_in <= reg_2; addr <= 32'd0; end
+ 32'd2: begin u_en <= ( ! u_en); wr_en <= 32'd0; addr <= 32'd0; end
+ 32'd1: begin finish <= 32'd1; return_val <= reg_1; end
default:;
endcase
-\end{minted}
- %\caption{Verilog always block describing the datapath of the module.}\label{fig:accumulator_v_1}
- %\end{subfigure}\hfill%
- %\begin{subfigure}[b]{0.49\linewidth}
-\vspace*{-63mm}
-\begin{minted}[xleftmargin=44mm, fontsize=\tiny]{verilog}
always @(posedge clk)
- if ({reset == 1'd1})
- state <= 32'd16;
- else
- case (state)
- 32'd15: state <= 32'd14;
- 32'd14: state <= 32'd13;
- 32'd13: state <= 32'd12;
- 32'd12: state <= 32'd11;
- 32'd11: state <= 32'd10;
- 32'd10: state <= 32'd9;
- 32'd9: state <= 32'd8;
- 32'd8: state <= 32'd7;
- 32'd7: state <= 32'd6;
- 32'd6: state <= 32'd5;
- 32'd5: state <= 32'd4;
+ if ({reset == 32'd1}) state <= 32'd4;
+ else case (state)
+ 32'd6: state <= 32'd1;
32'd4: state <= 32'd3;
- 32'd3: state <=
- ({$signed(reg_1)
- < $signed(32'd3)}
- ? 32'd7 : 32'd2);
- 32'd2: state <= 32'd1;
+ 32'd3: state <= 32'd2;
+ 32'd2: state <= 32'd6;
32'd1: ;
default:;
- endcase
+ endcase
endmodule
\end{minted}
\caption{Verilog produced by \vericert{}. The left column contains the data-path and the right column contains the control logic.}\label{fig:accumulator_v}
@@ -189,7 +130,7 @@ In this section, we describe the stages of the \vericert{} translation, referrin
\subsubsection{Translating C to 3AC}
-The first stage of the translation uses unmodified \compcert{} to transform the C input, shown in Figure~\ref{fig:accumulator_c}, into a 3AC intermediate representation, shown in Figure~\ref{fig:accumulator_rtl}.
+The first stage of the translation uses unmodified \compcert{} to transform the C input into a 3AC intermediate representation, shown in Figure~\ref{fig:accumulator_rtl}.
As part of this translation, function inlining is also performed on all functions, which allows us to support function calls without having to support the \texttt{Icall} 3AC instruction. Although the duplication of the function bodies caused by inlining can increase the area of the hardware, it can have a positive effect on latency. Inlining precludes support for recursive function calls, but this feature isn't supported in most other HLS tools either~\cite{davidthomas_asap16}.
%\JW{Is that definitely true? Was discussing this with Nadesh and George recently, and I ended up not being so sure. Inlining could actually lead to \emph{reduced} resource usage because once everything has been inlined, the (big) scheduling problem could then be solved quite optimally. Certainly inlining is known to increase register pressure, but that's not really an issue here. If we're not sure, we could just say that inlining everything leads to bloated Verilog files and the inability to support recursion, and leave it at that.}\YH{I think that is true, just because we don't do scheduling. With scheduling I think that's true, inlining actually becomes quite good.}