summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--algorithm.tex113
-rw-r--r--data/accumulator.c5
-rw-r--r--data/accumulator.htl24
-rw-r--r--data/accumulator.v108
-rw-r--r--data/accumulator2.v156
5 files changed, 287 insertions, 119 deletions
diff --git a/algorithm.tex b/algorithm.tex
index b620447..930e3ca 100644
--- a/algorithm.tex
+++ b/algorithm.tex
@@ -79,8 +79,9 @@ It has an unlimited number of pseudo-registers, and is represented as a control
\begin{subfigure}[t]{1\linewidth}
\begin{minted}[fontsize=\footnotesize,linenos,xleftmargin=20pt]{c}
int main() {
- int x[1] = {3};
- return x[0];
+ int x[2] = {3, 6};
+ int i = 1;
+ return x[i];
}
\end{minted}
\caption{Example C code passed to \vericert{}.}\label{fig:accumulator_c}
@@ -88,51 +89,60 @@ int main() {
\begin{subfigure}[b]{1\linewidth}
\begin{minted}[fontsize=\footnotesize,linenos,xleftmargin=20pt]{c}
main() {
- x2 = 3
- int32[stack(0)] = x2
- x1 = int32[stack(0)]
- return x1
+ x5 = 3
+ int32[stack(0)] = x5
+ x4 = 6
+ int32[stack(4)] = x4
+ x1 = 1
+ x3 = stack(0) (int)
+ x2 = int32[x3 + x1
+ * 4 + 0]
+ return x2
}
\end{minted}
- \caption{3AC produced by \compcert{}.}\label{fig:accumulator_rtl}
+ \caption{3AC produced by the \compcert{} front-end without any optimisations.}\label{fig:accumulator_rtl}
\end{subfigure}
\end{subfigure}\hfill%
\begin{subfigure}[b]{0.65\linewidth}
- \vspace{1em}
\begin{minted}[fontsize=\tiny,linenos,xleftmargin=20pt]{verilog}
module main(reset, clk, finish, return_val);
- input [0:0] clk, reset; output reg [31:0] return_val = 0;
+ input [0:0] reset, clk;
output reg [0:0] finish = 0;
- reg [31:0] state = 0, d_out = 0, d_in = 0, reg_1 = 0, addr = 0, reg_2 = 0;
- reg [0:0] en = 0, wr_en = 0, u_en = 0; reg [31:0] stack [0:0];
- // RAM template
+ output reg [31:0] return_val = 0;
+ reg [31:0] reg_3 = 0, addr = 0, d_in = 0, reg_5 = 0, wr_en = 0;
+ reg [0:0] en = 0, u_en = 0;
+ reg [31:0] state = 0, reg_2 = 0, reg_4 = 0, d_out = 0, reg_1 = 0;
+ reg [31:0] stack [1:0];
always @(negedge clk)
if ({u_en != en}) begin
if (wr_en) stack[addr] <= d_in; else d_out <= stack[addr];
en <= u_en;
end
- // Data-path
always @(posedge clk)
case (state)
- 32'd6: reg_1 <= d_out;
- 32'd4: reg_2 <= 32'd3;
- 32'd3: begin u_en <= ( ! u_en); wr_en <= 32'd1;
- d_in <= reg_2; addr <= 32'd0; end
- 32'd2: begin u_en <= ( ! u_en); wr_en <= 32'd0; addr <= 32'd0; end
- 32'd1: begin finish <= 32'd1; return_val <= reg_1; end
- default:;
+ 32'd11: reg_2 <= d_out;
+ 32'd8: reg_5 <= 32'd3;
+ 32'd7: begin u_en <= ( ! u_en); wr_en <= 32'd1;
+ d_in <= reg_5; addr <= 32'd0; end
+ 32'd6: reg_4 <= 32'd6;
+ 32'd5: begin u_en <= ( ! u_en); wr_en <= 32'd1;
+ d_in <= reg_4; addr <= 32'd1; end
+ 32'd4: reg_1 <= 32'd1;
+ 32'd3: reg_3 <= 32'd0;
+ 32'd2: begin u_en <= ( ! u_en); wr_en <= 32'd0;
+ addr <= {{{reg_3 + 32'd0} + {reg_1 * 32'd4}} / 32'd4}; end
+ 32'd1: begin finish = 32'd1; return_val = reg_2; end
+ default: ;
endcase
- // Control-logic
always @(posedge clk)
- if ({reset == 32'd1}) state <= 32'd4;
+ if ({reset == 32'd1}) state <= 32'd8;
else case (state)
- 32'd6: state <= 32'd1;
- 32'd4: state <= 32'd3;
- 32'd3: state <= 32'd2;
- 32'd2: state <= 32'd6;
- 32'd1: ;
- default:;
- endcase
+ 32'd11: state <= 32'd1; 32'd4: state <= 32'd3;
+ 32'd8: state <= 32'd7; 32'd3: state <= 32'd2;
+ 32'd7: state <= 32'd6; 32'd2: state <= 32'd11;
+ 32'd6: state <= 32'd5; 32'd1: ;
+ 32'd5: state <= 32'd4; default: ;
+ endcase
endmodule
\end{minted}
\caption{Verilog produced by \vericert{}. It demonstrates the instantiation of the RAM, the data-path and finally the control-logic.}\label{fig:accumulator_v}
@@ -147,7 +157,7 @@ In this section, we describe the stages of the \vericert{} translation, referrin
\subsubsection{Translating C to 3AC}
The first stage of the translation uses unmodified \compcert{} to transform the C input into a 3AC intermediate representation, shown in Figure~\ref{fig:accumulator_rtl}.
-As part of this translation, function inlining is also performed on all functions, which allows us to support function calls without having to support the \texttt{Icall} 3AC instruction. Although the duplication of the function bodies caused by inlining can increase the area of the hardware, it can have a positive effect on latency. Inlining precludes support for recursive function calls, but this feature isn't supported in most other HLS tools either~\cite{davidthomas_asap16}.
+As part of this translation, function inlining is also performed on all functions, which allows us to support function calls without having to support the \texttt{Icall} 3AC instruction. Although the duplication of the function bodies caused by inlining can increase the area of the hardware, it can have a positive effect on latency\YH{TODO: Add sentence on why inlining is often the better choice}. Inlining precludes support for recursive function calls, but this feature isn't supported in most other HLS tools either~\cite{davidthomas_asap16}.
%\JW{Is that definitely true? Was discussing this with Nadesh and George recently, and I ended up not being so sure. Inlining could actually lead to \emph{reduced} resource usage because once everything has been inlined, the (big) scheduling problem could then be solved quite optimally. Certainly inlining is known to increase register pressure, but that's not really an issue here. If we're not sure, we could just say that inlining everything leads to bloated Verilog files and the inability to support recursion, and leave it at that.}\YH{I think that is true, just because we don't do scheduling. With scheduling I think that's true, inlining actually becomes quite good.}
@@ -180,18 +190,17 @@ Figure~\ref{fig:accumulator_diagram} shows the resulting FSMD architecture. The
\fill[white,rounded corners=10pt] (7,0.5) rectangle (11.5,2.2);
\node at (8,2) {\tiny Next State FSM};
- \foreach \x in {15,...,9}
- {\pgfmathtruncatemacro{\y}{\x-9}%
- \node[draw,circle,inner sep=0,minimum size=10,scale=0.8] (s\x) at (7.5+\y/2,1.6) {\tiny \x};}
\foreach \x in {8,...,2}
{\pgfmathtruncatemacro{\y}{8-\x}%
- \node[draw,circle,inner sep=0,minimum size=10,scale=0.8] (s\x) at (7.5+\y/2,1.1) {\tiny \x};}
+ \node[draw,circle,inner sep=0,minimum size=10,scale=0.8] (s\x) at (7.5+\y/2,1.35) {\tiny \x};}
\node[draw,circle,inner sep=0,minimum size=10,scale=0.8] (s1c) at (11,1.35) {\tiny 1};
\node[draw,circle,inner sep=0,minimum size=13,scale=0.8] (s1) at (s1c) {};
- \foreach \x in {15,...,2}
+ \foreach \x in {8,...,3}
{\pgfmathtruncatemacro{\y}{\x-1}\draw[-{Latex[length=1mm,width=0.7mm]}] (s\x) -- (s\y);}
- \draw[-{Latex[length=1mm,width=0.7mm]}] (10.8,2) to [out=200,in=70] (s15);
- \draw[-{Latex[length=1mm,width=0.7mm]}] (s3) to [out=210,in=330] (s7);
+ \node[draw,circle,inner sep=0,minimum size=10,scale=0.8] (s11) at (10.5,0.9) {\tiny 11};
+ \draw[-{Latex[length=1mm,width=0.7mm]}] (s2) -- (s11);
+ \draw[-{Latex[length=1mm,width=0.7mm]}] (s11) -- (s1);
+ \draw[-{Latex[length=1mm,width=0.7mm]}] (7.2,1.7) to [out=0,in=100] (s8);
\node[draw,fill=white] (nextstate) at (9.25,3) {\tiny Current State};
\draw[-{Latex[length=1mm,width=0.7mm]}] let \p1 = (nextstate) in
@@ -205,7 +214,7 @@ Figure~\ref{fig:accumulator_diagram} shows the resulting FSMD architecture. The
\fill[white,rounded corners=10pt] (2,0.5) rectangle (5,3);
\filldraw[fill=white] (0.25,0.5) rectangle (1.5,2.75);
\node at (2.6,2.8) {\tiny Update};
- \node[align=center] at (0.875,2.4) {\tiny \texttt{RAM}\\[-1.5ex]\tiny\texttt{(Array)}};
+ \node[align=center] at (0.875,2.4) {\tiny \texttt{RAM}};
\node[scale=0.4] at (4.7,1.5) {\texttt{state}};
\draw[-{Latex[length=1mm,width=0.7mm]}] (6,1.5) -- (5,1.5);
\draw[-{Latex[length=1mm,width=0.7mm]}] (6,1.5) -- (7,1.5);
@@ -214,15 +223,31 @@ Figure~\ref{fig:accumulator_diagram} shows the resulting FSMD architecture. The
\node[scale=0.4,rotate=60] at (2.5,0.75) {\texttt{clk}};
\node[scale=0.4,rotate=60] at (2.7,0.75) {\texttt{rst}};
+ \node[scale=0.4,right,inner sep=5pt] (ram1) at (2,2.1) {\texttt{u\_en}};
+ \node[scale=0.4,right,inner sep=5pt] (ram2) at (2,1.9) {\texttt{wr\_en}};
+ \node[scale=0.4,right,inner sep=5pt] (ram3) at (2,1.7) {\texttt{addr}};
+ \node[scale=0.4,right,inner sep=5pt] (ram4) at (2,1.5) {\texttt{d\_in}};
+ \node[scale=0.4,right,inner sep=5pt] (ram5) at (2,1.3) {\texttt{d\_out}};
+
+ \node[scale=0.4,left,inner sep=5pt] (r1) at (1.5,2.1) {\texttt{u\_en}};
+ \node[scale=0.4,left,inner sep=5pt] (r2) at (1.5,1.9) {\texttt{wr\_en}};
+ \node[scale=0.4,left,inner sep=5pt] (r3) at (1.5,1.7) {\texttt{addr}};
+ \node[scale=0.4,left,inner sep=5pt] (r4) at (1.5,1.5) {\texttt{d\_in}};
+ \node[scale=0.4,left,inner sep=5pt] (r5) at (1.5,1.3) {\texttt{d\_out}};
+
+ \draw[-{Latex[length=1mm,width=0.7mm]}] (ram1) -- (r1);
+ \draw[-{Latex[length=1mm,width=0.7mm]}] (ram2) -- (r2);
+ \draw[-{Latex[length=1mm,width=0.7mm]}] (ram3) -- (r3);
+ \draw[-{Latex[length=1mm,width=0.7mm]}] (ram4) -- (r4);
+ \draw[-{Latex[length=1mm,width=0.7mm]}] (r5) -- (ram5);
+
\draw[-{Latex[length=1mm,width=0.7mm]}] (4,0.5) -- (4,-0.5);
\draw[-{Latex[length=1mm,width=0.7mm]}] (3.75,0.5) -- (3.75,-0.5);
\draw[-{Latex[length=1mm,width=0.7mm]}] (2.45,-0.5) -- (2.45,0.5);
\draw[-{Latex[length=1mm,width=0.7mm]}] (2.65,-0.5) -- (2.65,0.5);
- \foreach \x in {0,...,2}
- {\draw (0.25,1.25-0.25*\x) -- (1.5,1.25-0.25*\x); \node at (0.875,1.13-0.25*\x) {\tiny \x};}
- \draw[-{Latex[length=1mm,width=0.7mm]}] (1.5,1.5) -- (2,1.5);
- \draw[-{Latex[length=1mm,width=0.7mm]}] (2,1.75) -- (1.5,1.75);
+ \foreach \x in {0,...,1}
+ {\draw (0.25,1-0.25*\x) -- (1.5,1-0.25*\x); \node at (0.875,0.88-0.25*\x) {\tiny \x};}
%\node[scale=0.4] at (1.2,2.2) {\texttt{wr\_en}};
%\node[scale=0.4] at (1.2,2) {\texttt{wr\_addr}};
@@ -251,8 +276,8 @@ Figure~\ref{fig:accumulator_diagram} shows the resulting FSMD architecture. The
\node[scale=0.4] at (3.5,4.2) {\texttt{reg\_1}};
\node[scale=0.4] at (3.5,4) {\texttt{reg\_2}};
\node[scale=0.4] at (3.5,3.8) {\texttt{reg\_3}};
- \node[scale=0.4] at (3.5,3.6) {$\cdots$};
- \node[scale=0.4] at (3.5,3.4) {\texttt{reg\_8}};
+ \node[scale=0.4] at (3.5,3.6) {\texttt{reg\_4}};
+ \node[scale=0.4] at (3.5,3.4) {\texttt{reg\_5}};
\end{tikzpicture}}
\caption{The FSMD for the example shown in Figure~\ref{fig:accumulator_c_rtl}, split into a data-path and control logic for the next state calculation. The Update block takes the current state, current values of all registers and at most one value stored in the array, and calculates a new value that can either be stored back in the array or in a register.}\label{fig:accumulator_diagram}
\end{figure*}
diff --git a/data/accumulator.c b/data/accumulator.c
index f8b5e32..217851c 100644
--- a/data/accumulator.c
+++ b/data/accumulator.c
@@ -1,4 +1,5 @@
int main() {
- int x[1] = {3};
- return x[0];
+ int x[2] = {3, 6};
+ int i = 1;
+ return x[i];
}
diff --git a/data/accumulator.htl b/data/accumulator.htl
index ecab36f..6f7ec5f 100644
--- a/data/accumulator.htl
+++ b/data/accumulator.htl
@@ -1,16 +1,24 @@
main() {
datapath {
- 4: reg_2 <= 32'd3;
- 3: reg_6[32'd0] <= reg_2;
- 2: reg_1 <= reg_6[32'd0];
- 1: reg_4 = 32'd1;
-reg_5 = reg_1;
+ 8: reg_5 <= 32'd3;
+ 7: reg_9[32'd0] <= reg_5;
+ 6: reg_4 <= 32'd6;
+ 5: reg_9[32'd1] <= reg_4;
+ 4: reg_1 <= 32'd1;
+ 3: reg_3 <= 32'd0;
+ 2: reg_2 <= reg_9[{{{reg_3 + 32'd0} + {reg_1 * 32'd4}} / 32'd4}];
+ 1: reg_7 = 32'd1;
+reg_8 = reg_2;
}
controllogic {
- 4: reg_3 <= 32'd3;
- 3: reg_3 <= 32'd2;
- 2: reg_3 <= 32'd1;
+ 8: reg_6 <= 32'd7;
+ 7: reg_6 <= 32'd6;
+ 6: reg_6 <= 32'd5;
+ 5: reg_6 <= 32'd4;
+ 4: reg_6 <= 32'd3;
+ 3: reg_6 <= 32'd2;
+ 2: reg_6 <= 32'd1;
1: ;
}
}
diff --git a/data/accumulator.v b/data/accumulator.v
index aaed3da..f0057b5 100644
--- a/data/accumulator.v
+++ b/data/accumulator.v
@@ -1,46 +1,68 @@
+
module main(reset, clk, finish, return_val);
- input [0:0] clk, reset;
- output reg [31:0] return_val = 0;
- output reg [0:0] finish = 0;
- reg [0:0] en = 0, wr_en = 0, u_en = 0;
- reg [31:0] state = 0, d_out = 0, d_in = 0;
- reg [31:0] reg_1 = 0, addr = 0, reg_2 = 0;
- reg [31:0] stack [0:0];
- always @(negedge clk)
- if ({u_en != en}) begin
- if (wr_en) stack[addr] <= d_in;
- else d_out <= stack[addr];
- en <= u_en;
- end
- always @(posedge clk)
- case (state)
- 32'd6: reg_1 <= d_out;
- 32'd4: reg_2 <= 32'd3;
- 32'd3: begin
- u_en <= ( ! u_en);
- wr_en <= 32'd1;
- d_in <= reg_2;
- addr <= 32'd0;
+input [0:0] reset, clk;
+output reg [0:0] finish = 0;
+output reg [31:0] return_val = 0;
+reg [31:0] reg_3 = 0, addr = 0, d_in = 0, reg_5 = 0, wr_en = 0;
+reg [0:0] en = 0, u_en = 0;
+reg [31:0] state = 0, reg_2 = 0, reg_4 = 0, d_out = 0, reg_1 = 0;
+reg [31:0] stack [1:0];
+always @(negedge clk)
+if ({u_en != en}) begin
+if (wr_en) stack[addr] <= d_in; else d_out <= stack[addr];
+en <= u_en;
+end
+always @(posedge clk)
+case (state)
+32'd11: reg_2 <= d_out;
+32'd8: reg_5 <= 32'd3;
+32'd7: begin u_en <= ( ! u_en); wr_en <= 32'd1;
+d_in <= reg_5; addr <= 32'd0; end
+32'd6: reg_4 <= 32'd6;
+32'd5: begin u_en <= ( ! u_en); wr_en <= 32'd1;
+d_in <= reg_4; addr <= 32'd1; end
+32'd4: reg_1 <= 32'd1;
+32'd3: reg_3 <= 32'd0;
+32'd2: begin u_en <= ( ! u_en); wr_en <= 32'd0;
+addr <= {{{reg_3 + 32'd0} + {reg_1 * 32'd4}} / 32'd4}; end
+32'd1: begin finish = 32'd1; return_val = reg_2; end
+default: ;
+endcase
+always @(posedge clk)
+if ({reset == 32'd1}) state <= 32'd8;
+else case (state)
+32'd11: state <= 32'd1;
+32'd4: state <= 32'd3;
+32'd8: state <= 32'd7;
+32'd3: state <= 32'd2;
+32'd7: state <= 32'd6;
+32'd2: state <= 32'd11;
+
+module testbench;
+ reg start, reset, clk;
+ wire finish;
+ wire [31:0] return_val;
+ reg [31:0] cycles;
+
+ main m(reset, clk, finish, return_val);
+
+ initial begin
+ clk = 0;
+ start = 0;
+ reset = 0;
+ @(posedge clk) reset = 1;
+ @(posedge clk) reset = 0;
+ cycles = 0;
+ end
+
+ always #5 clk = ~clk;
+
+ always @(posedge clk) begin
+ if (finish == 1) begin
+ $display("cycles: %0d", cycles);
+ $display("finished: %0d", return_val);
+ $finish;
end
- 32'd2: begin
- u_en <= ( ! u_en);
- wr_en <= 32'd0;
- addr <= 32'd0;
- end
- 32'd1: begin
- finish = 32'd1;
- return_val = reg_1;
- end
- default:;
- endcase
- always @(posedge clk)
- if ({reset == 32'd1}) state <= 32'd4;
- else case (state)
- 32'd6: state <= 32'd1;
- 32'd4: state <= 32'd3;
- 32'd3: state <= 32'd2;
- 32'd2: state <= 32'd6;
- 32'd1: ;
- default:;
- endcase
+ cycles <= cycles + 1;
+ end
endmodule
diff --git a/data/accumulator2.v b/data/accumulator2.v
index c7bcc59..bb4c198 100644
--- a/data/accumulator2.v
+++ b/data/accumulator2.v
@@ -1,26 +1,138 @@
-always @(posedge clk)
- if ({reset == 1'd1})
- state <= 32'd16;
- else
+module main(start, reset, clk, finish, return_val);
+ reg [31:0] reg_15 = 0;
+ output reg [0:0] finish = 0;
+ input [0:0] reset;
+ reg [31:0] reg_3 = 0;
+ reg [31:0] reg_13 = 0;
+ reg [31:0] reg_5 = 0;
+ reg [0:0] reg_17 = 0;
+ reg [31:0] reg_1 = 0;
+ reg [0:0] reg_14 = 0;
+ reg [31:0] state = 0;
+ input [0:0] start;
+ reg [0:0] reg_18 = 0;
+ reg [31:0] reg_2 = 0;
+ input [0:0] clk;
+ reg [31:0] reg_4 = 0;
+ output reg [31:0] return_val = 0;
+ reg [31:0] reg_16 = 0;
+ reg [31:0] stack [1:0];
+ always @(negedge clk) begin
+ if ({reg_18 != reg_14}) begin
+ if (reg_17) begin
+ stack[reg_13] <= reg_15;
+ end else begin
+ reg_16 <= stack[reg_13];
+ end
+ reg_14 <= reg_18;
+ end else begin
+ ;
+ end
+ end
+ always @(posedge clk) begin
+ case (state)
+ 32'd11: begin
+ reg_2 <= reg_16;
+ end
+ 32'd8: begin
+ reg_5 <= 32'd3;
+ end
+ 32'd7: begin
+ reg_18 <= ( ! reg_18);
+ reg_17 <= 32'd1;
+ reg_15 <= reg_5;
+ reg_13 <= 32'd0;
+ end
+ 32'd6: begin
+ reg_4 <= 32'd6;
+ end
+ 32'd5: begin
+ reg_18 <= ( ! reg_18);
+ reg_17 <= 32'd1;
+ reg_15 <= reg_4;
+ reg_13 <= 32'd1;
+ end
+ 32'd4: begin
+ reg_1 <= 32'd1;
+ end
+ 32'd3: begin
+ reg_3 <= 32'd0;
+ end
+ 32'd2: begin
+ reg_18 <= ( ! reg_18);
+ reg_17 <= 32'd0;
+ reg_13 <= {{{reg_3 + 32'd0} + {reg_1 * 32'd4}} / 32'd4};
+ end
+ 32'd1: begin
+ finish = 32'd1;
+ return_val = reg_2;
+ end
+ default:;
+ endcase
+ end
+ always @(posedge clk) begin
+ if ({reset == 32'd1}) begin
+ state <= 32'd8;
+ end else begin
case (state)
- 32'd16: state <= 32'd15;
- 32'd15: state <= 32'd14;
- 32'd14: state <= 32'd13;
- 32'd13: state <= 32'd12;
- 32'd12: state <= 32'd11;
- 32'd11: state <= 32'd10;
- 32'd10: state <= 32'd9;
- 32'd9: state <= 32'd8;
- 32'd8: state <= 32'd7;
- 32'd7: state <= 32'd6;
- 32'd6: state <= 32'd5;
- 32'd5: state <= 32'd4;
- 32'd4: state <= 32'd3;
- 32'd3: state <=
- ({$signed(reg_1) < $signed(32'd3)}
- ? 32'd7 : 32'd2);
- 32'd2: state <= 32'd1;
- 32'd1: ;
+ 32'd11: begin
+ state <= 32'd1;
+ end
+ 32'd8: begin
+ state <= 32'd7;
+ end
+ 32'd7: begin
+ state <= 32'd6;
+ end
+ 32'd6: begin
+ state <= 32'd5;
+ end
+ 32'd5: begin
+ state <= 32'd4;
+ end
+ 32'd4: begin
+ state <= 32'd3;
+ end
+ 32'd3: begin
+ state <= 32'd2;
+ end
+ 32'd2: begin
+ state <= 32'd11;
+ end
+ 32'd1: begin
+ ;
+ end
default:;
endcase
+ end
+ end
+endmodule
+
+module testbench;
+ reg start, reset, clk;
+ wire finish;
+ wire [31:0] return_val;
+ reg [31:0] cycles;
+
+ main m(start, reset, clk, finish, return_val);
+
+ initial begin
+ clk = 0;
+ start = 0;
+ reset = 0;
+ @(posedge clk) reset = 1;
+ @(posedge clk) reset = 0;
+ cycles = 0;
+ end
+
+ always #5 clk = ~clk;
+
+ always @(posedge clk) begin
+ if (finish == 1) begin
+ $display("cycles: %0d", cycles);
+ $display("finished: %0d", return_val);
+ $finish;
+ end
+ cycles <= cycles + 1;
+ end
endmodule