summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--evaluation.tex21
-rw-r--r--results/poly.csv26
-rw-r--r--verilog.tex7
3 files changed, 42 insertions, 12 deletions
diff --git a/evaluation.tex b/evaluation.tex
index 2058581..38dbf46 100644
--- a/evaluation.tex
+++ b/evaluation.tex
@@ -51,18 +51,19 @@ Our evaluation is designed to answer the following three research questions. \JW
width=80mm,
xlabel={LegUp (ms)},
ylabel={\vericert{} (ms)},
- xmin=0.01,
- xmax=10,
- ymax=500,
- ymin=0.5,
+ % xmin=0.01,
+ % xmax=10,
+ % ymax=500,
+ % ymin=0.5,
log ticks with fixed point,
]
\addplot[draw=none, mark=*, draw opacity=0, fill opacity=0.3]
- table [x=legupwallclockMS, y=vericertwallclockMS, col sep=comma]
- {results/comparison.csv};
+ table [x expr={\thisrow{legupcycles}/\thisrow{legupfreqMHz}}, y expr={\thisrow{vericertcycles}/\thisrow{vericertfreqMHz}}, col sep=comma]
+ {results/poly.csv};
-\addplot[dashed, domain=0.01:10]{50*x};
+\addplot[dashed, domain=10:10000]{x};
+\addplot[dashed, domain=10:10000]{10*x};
\end{axis}
\end{tikzpicture}
@@ -87,8 +88,10 @@ The difference in cycle counts shows the degree of parallelism that \legup{}'s
]
\addplot[draw=none, mark=*, draw opacity=0, fill opacity=0.3]
- table [x=leguputilisation, y=vericertutilisation, col sep=comma]
- {results/comparison.csv};
+ table [x expr=(\thisrow{legupluts}/427200*100), y expr=(\thisrow{vericertluts}/427200*100), col sep=comma]
+ {results/poly.csv};
+
+ \addplot[dashed, domain=0:1]{30*x};
\end{axis}
\end{tikzpicture}
diff --git a/results/poly.csv b/results/poly.csv
new file mode 100644
index 0000000..dee66fb
--- /dev/null
+++ b/results/poly.csv
@@ -0,0 +1,26 @@
+benchmark,legupcycles,legupfreqMHz,legupluts,legupregs,leguprams,legupdsps,legupcomptime,vericertcycles,vericertfreqMHz,vericertluts,vericertregs,vericertrams,vericertdsps,vericertcomptime
+durbin,15106,188.61,2509,4008,0,8,4.77,19852,199.2,3027,6168,0,8,0.077
+lu,482766,244.62,3116,4593,0,10,4.72,2634766,92.97,54806,106037,0,6,0.097
+ludcmp,470843,249.69,3605,5397,0,15,4.87,2401354,83.52,57145,111408,0,10,0.132
+trisolv,35382,213.9,2412,3749,0,3,4.73,33550,112.59,28101,55109,0,2,0.086
+2mm,60088,226.5,1114,1936,0,7,4.80,427098,116.9,32600,63212,0,18,0.136
+3mm,204195,188.96,4210,4801,0,43,4.88,539430,97.13,45073,89719,0,18,0.147
+atas,126288,193.24,3026,3719,0,10,4.80,92000,130.41,28603,56646,0,6,0.069
+bicg,11907,303.4,308,537,0,6,4.78,121134,122.74,30091,58799,0,8,0.097
+mvt,16806,384.47,372,597,0,4,4.79,139194,119.93,30753,60450,0,6,0.130
+doitgen,57199,252.14,909,1402,0,2,5.05,350302,126.18,19898,38461,0,4,0.105
+symm,64903,284.41,2155,3170,0,10,4.63,248930,113.92,27693,54514,0,14,0.115
+syrk,57395,278.01,598,976,0,2,4.73,309018,87.15,76889,57816,0,8,0.094
+syr2k,125705,240.85,3149,3679,0,6,4.85,478040,78.6,120116,82032,0,12,0.116
+trmm,41432,281.61,610,990,0,4,4.71,147528,105.61,64031,40502,0,4,0.089
+gemm,83676,192.68,1029,1544,0,35,4.79,360772,121.61,31853,62126,0,16,0.104
+gemver,28087,303.49,1854,2380,0,8,4.68,175326,107.27,32615,64118,0,14,0.099
+gesummv,6634,310.46,298,504,0,4,4.77,111094,79.97,113876,72908,0,10,0.101
+covariance,109992,245.16,2098,3096,0,5,4.77,297450,110.57,28729,56660,0,4,0.083
+fdtd-2d,214153,262.61,2736,3801,0,2,4.73,831912,108.23,31333,61421,0,6,0.116
+heat-3d,41059,115.54,3132,2910,0,60,4.92,555930,110.42,33915,67273,0,9,0.181
+jacobi-1d,6914,386.25,1355,1885,0,0,4.72,16606,277.93,1636,3305,0,0,0.071
+jacobi-2d,84609,240.79,2347,3185,0,2,4.81,357100,113.53,30393,59782,0,4,0.079
+seidel-2d,345294,232.4,2128,3337,0,2,4.68,875758,127.99,26948,53133,0,2,0.091
+floyd-warshall,1238764,235.52,1869,2367,0,2,4.71,4762766,109.4,59859,118101,0,2,0.094
+nussinov,216467,273.07,1078,1431,0,2,4.79,837958,90.73,60663,119303,0,0,0.080 \ No newline at end of file
diff --git a/verilog.tex b/verilog.tex
index 7db80a6..1e75840 100644
--- a/verilog.tex
+++ b/verilog.tex
@@ -37,15 +37,16 @@ Two types of assignments are supported in always blocks: nonblocking and blockin
\inferrule[Module]{(\Gamma, \epsilon, \vec{m})\ \downarrow_{\vec{m}} (\Gamma', \Delta')}{(\Gamma, \yhkeyword{module } \yhconstant{main} \yhkeyword{(...);}\ \vec{m}\ \yhkeyword{endmodule}) \downarrow_{m} (\Gamma' // \Delta')}
\end{equation*}
-\noindent \JW{On the top of that rule you have two occurrences of $\vec{m}$ -- is that right?} This rule dictates how the whole module runs in one clock cycle, from an initial state of all the variables $\Gamma$, to a final state $(\Gamma' // \Delta')$, where $//$ defines a merge between the two maps from variable names to values, where the right hand side values take priority if there is a conflict. \JW{Is $//$ a standard notation for this? If so, that's fine. It's just I haven't come across it before.} This rule correctly implements the behaviours for the blocking and nonblocking assignment by overwriting the assignments made through blocking assignment at the end of the clock cycle with the atomic assignments from the nonblocking assignments.
+\noindent \JW{On the top of that rule you have two occurrences of $\vec{m}$ -- is that right? If the $\vec{m}$ on the arrow is just the name of the transition type, rather than a variable, then I suggest $\downarrow_{\text{module}}$ or something. And perhaps the bottom of the rule could use $\downarrow_{\text{program}}$ or something.} This rule dictates how the whole module runs in one clock cycle, from an initial state of all the variables $\Gamma$, to a final state $(\Gamma' // \Delta')$, where $//$ defines a merge between the two maps from variable names to values, where the right hand side values take priority if there is a conflict. \JW{Is $//$ a standard notation for this? If so, that's fine. It's just I haven't come across it before.} This rule correctly implements the behaviours for the blocking and nonblocking assignment by overwriting the assignments made through blocking assignment at the end of the clock cycle with the atomic assignments from the nonblocking assignments.
+\JW{I think most of the following paragraph could be chopped. We've already mentioned that we're only targeting a small fragment of Verilog, so we could mention at that point that in the fragment we target there is no discrepancy between the simulation behaviour and the synthesis behaviour. I don't know how important it is to talk about the details of evaluation order here -- does the reader really need to be aware of this in order to understand your work?}
When targeting a hardware description language such as Verilog, it is important to be consistent between simulating the hardware and the behaviour of the synthesised result on actual hardware. In the target Verilog semantics, only clocked always blocks are supported as these ensure that there are not mismatches between simulation and synthesis, as combinational always blocks that trigger on any change of an internal signal may behave differently in simulation or synthesis based on the order of operations. This limitation in the semantics therefore helps keep the Verilog correct and consistent. In addition to that, only nonblocking assignment is used in signals that are used in multiple always blocks, which stops any race conditions from occurring as all the signal updates happen deterministically. \NR{So the semantics allows multiple drivers then?} Finally, a specific order of evaluation for the always blocks is chosen, and because of the limitations listed before, choosing an order is guaranteed to have the same behaviour as executing the always blocks in any order.\NR{Order is not guaranteed if we have multiple drivers.}
\subsection{Changes to the Semantics}
-Some small changes were made to the semantics proposed by \citet{loow19_verif_compil_verif_proces} to adapt the semantics to work better as a HLS target, as well as adding support for features that were needed in the target language.
+Some small changes were made to the semantics proposed by \citet{loow19_verif_compil_verif_proces} to make it suitable as a HLS target.
-The main change to the semantics is the support for two-dimensional arrays, which are needed to model RAM in Verilog. RAM is needed to model the stack in C efficiently, without having to declare a variable for each possible stack location. In the original semantics, RAMs, as well as inputs and outputs to the module could be modelled using a function from variable names (strings) to values, which could be modified accordingly to model inputs to the module. This is quite an abstract description of memory, which can also be expressed as an array of bitvectors, which is the path we took instead. This requires the addition of array operators to the semantics and correct reasoning of loads and stores to the array in different always blocks simultaneously. Consider the following Verilog code:
+The main change is the addition of support for two-dimensional arrays, which are needed to model RAM in Verilog. RAM is needed to model the stack in C efficiently, without having to declare a variable for each possible stack location. In the original semantics, RAMs, as well as inputs and outputs to the module could be modelled using a function from variable names (strings) to values, which could be modified accordingly to model inputs to the module. This is quite an abstract description of memory, which can also be expressed as an array of bitvectors, which is the path we took instead. This requires the addition of array operators to the semantics and correct reasoning of loads and stores to the array in different always blocks simultaneously. Consider the following Verilog code:
\begin{minted}{verilog}
always @(posedge clk) begin