summaryrefslogtreecommitdiffstats
path: root/archive/evaluation.tex
blob: 6c21cbff1482f9341e3bcb518508fe2ebec64f73 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
\begin{table*}
  \begin{tabular}{lcccccccccccc}
    \toprule
    \textbf{Benchmark} & \multicolumn{2}{c}{\bf Cycles} & \multicolumn{2}{c}{\bf Frequency / MHz} & \multicolumn{2}{c}{\bf LUTs} & \multicolumn{2}{c}{\bf Registers} & \multicolumn{2}{c}{\bf Block RAMs} & \multicolumn{2}{c}{\bf DSPs}\\
    & L & V & L & V & L & V & L & V & L & V & L & V\\
    \midrule
    adpcm & 30241 & 121386 & 90.05 & 66.3 & 7719 & 51626 & 12034 & 42688 & 7 & 0 & 0 & 48\\
    aes & 8489 & 41958 & 87.83 & 19.6 & 24413 & 104017 & 23796 & 94239 & 19  & 0 & 0 & 6\\
    gsm & 7190 & 21994 & 119.25 & 66.1 & 6638 & 45764 & 9201 & 33675 & 3 & 0 & 0 & 8 \\
    mips & 7754 & 18482 & 98.95 & 78.43 & 5049 & 10617 & 4185 & 7690 & 0 & 0 & 0 & 0\\
    \bottomrule
  \end{tabular}
  \caption{CHStone programs synthesised with \legup{} 5.1 (L) and with \vericert{} (V) \JW{I guess this table is for the chop?}}\label{tab:chstone}
\end{table*}


\pgfplotstableread[col sep=comma]{results/exec-time.csv}{\nodivtimingtable}
\begin{figure}\centering
  \begin{tikzpicture}
    \begin{semilogyaxis}[
      ybar=0pt,
      width=1\textwidth,
      height=0.5\textwidth,
      bar width=3pt,
      ymin=0.1,
      ymax=3,
      log ticks with fixed point,
      legend pos=south east,
      xlabel={Polybench Benchmarks},
      xticklabels from table={\nodivtimingtable}{benchmark},
      ylabel={\vericert{} / \legup{} execution time ratio},
      legend style={nodes={scale=0.7, transform shape}},
      x tick label style={rotate=60,anchor=east,font=\footnotesize},
      xtick=data,
      enlarge x limits={abs=0.5},
      ]

      \addplot+ table [x expr=\coordindex,y=v no nc,col sep=comma] from \nodivtimingtable;
      \addlegendentry{LegUp w/o opt w/o chain};
      \addplot+ table [x expr=\coordindex,y=v no,col sep=comma] from \nodivtimingtable;
      \addlegendentry{LegUp w/o opt};
      \addplot+ table [x expr=\coordindex,y=v op,col sep=comma] from \nodivtimingtable;
      \addlegendentry{LegUp};

    \end{semilogyaxis}
  \end{tikzpicture}
\end{figure}

\pgfplotstableread[col sep=comma]{results/slice-nodiv.csv}{\nodivslicetable}
\begin{figure}\centering
  \begin{tikzpicture}
    \begin{semilogyaxis}[
      ybar=0pt,
      width=1\textwidth,
      height=0.5\textwidth,
      bar width=3pt,
      ymin=0.1,
      ymax=3,
      log ticks with fixed point,
      legend pos=south east,
      xlabel={Polybench Benchmarks},
      xticklabels from table={\nodivslicetable}{benchmark},
      ylabel={\vericert{} / \legup{} execution time ratio},
      legend style={nodes={scale=0.7, transform shape}},
      x tick label style={rotate=60,anchor=east,font=\footnotesize},
      xtick=data,
      enlarge x limits={abs=0.5},
      legend columns=-1,
      ]

      \addplot+ table [x expr=\coordindex,y=legup noopt nochain,col sep=comma] from \nodivslicetable;
      \addlegendentry{LegUp w/o opt w/o chain};
      \addplot+ table [x expr=\coordindex,y=legup noopt,col sep=comma] from \nodivslicetable;
      \addlegendentry{LegUp w/o opt};
      \addplot+ table [x expr=\coordindex,y=legup,col sep=comma] from \nodivslicetable;
      \addlegendentry{LegUp};

    \end{semilogyaxis}
  \end{tikzpicture}
\end{figure}

\begin{figure}\centering
\begin{subfigure}[t]{0.48\textwidth}
\definecolor{cyclecountcol}{HTML}{1b9e77}
\begin{tikzpicture}
\begin{axis}[
    xmode=log,
    ymode=log,
    height=1\textwidth,
    width=1\textwidth,
    xlabel={\legup{} cycle count},
    ylabel={\vericert{} cycle count},
    xmin=1000,
    xmax=10000000,
    ymax=10000000,
    ymin=1000,
    %log ticks with fixed point,
  ]

\addplot[draw=none, mark=*, draw opacity=0, fill opacity=0.6,cyclecountcol]
  table [x=legupcycles, y=vericertcycles, col sep=comma]
  {results/poly.csv};

\addplot[dotted, domain=1000:10000000]{x};
%\addplot[dashed, domain=10:10000]{9.02*x};

\end{axis}
\end{tikzpicture}
\caption{A comparison of the cycle count of hardware designs generated by \vericert{} and by \legup{}.}
\label{fig:comparison_cycles}
\end{subfigure}\hfill%
\begin{subfigure}[t]{0.48\textwidth}
\definecolor{polycol}{HTML}{e6ab02}
\definecolor{polywocol}{HTML}{7570b3}
\begin{tikzpicture}
\begin{axis}[
    xmode=log,
    ymode=log,
    height=1\textwidth,
    width=1\textwidth,
    xlabel={\legup{} execution time (ms)},
    ylabel={\vericert{} execution time (ms)},
    xmin=10,
    xmax=1000000,
    ymax=1000000,
    ymin=10,
    legend pos=south east,
    %log ticks with fixed point,
  ]

\addplot[draw=none, mark=*, draw opacity=0, fill opacity=0.8, polycol]
  table [x expr={\thisrow{legupcycles}/\thisrow{legupfreqMHz}}, y expr={\thisrow{vericertcycles}/\thisrow{vericertoldfreqMHz}}, col sep=comma]
  {results/poly.csv};

\addlegendentry{PolyBench}

\addplot[draw=none, mark=o, fill opacity=0, polywocol]
  table [x expr={\thisrow{legupcycles}/\thisrow{legupfreqMHz}}, y expr={\thisrow{vericertcycles}/\thisrow{vericertfreqMHz}}, col sep=comma]
  {results/poly.csv};

\addlegendentry{PolyBench w/o division}

\addplot[dotted, domain=10:1000000]{x};
%\addplot[dashed, domain=10:10000]{9.02*x + 442};

\end{axis}
\end{tikzpicture}
\caption{A comparison of the execution time of hardware designs generated by \vericert{} and by \legup{}.}
\label{fig:comparison_time}
\end{subfigure}
\end{figure}

Firstly, before comparing any performance metrics, it is worth highlighting that any Verilog produced by \vericert{} is guaranteed to be \emph{correct}, whilst no such guarantee can be provided by \legup{}.
This guarantee in itself provides a significant leap in terms of HLS reliability, compared to any other HLS tools available.

igure~\ref{fig:comparison_cycles} compares the cycle counts of our 27 programs executed by \vericert{} and \legup{} respectively.
n most cases, we see that the data points are above the diagonal, which demonstrates that the \legup{}-generated hardware is faster than \vericert{}-generated Verilog.

n average, \legup{} designs are $4.5\times$ faster than \vericert{} designs.
his performance gap is mostly due to \legup{} optimisations such as scheduling and memory analysis, which are designed to extract parallelism from input programs.
This gap does not represent the performance cost that comes with formally proving a HLS tool.
Instead, it is simply a gap between an unoptimised \vericert{} versus an optimised \legup{}.
t is notable that even without \vericert{} performing many optimisations, a few data points are close to the diagonal and even below it.
We are very encouraged by these data points.
s we improve \vericert{} by incorporating further  optimisations, this gap should reduce whilst preserving our correctness guarantees.

ycle count is one factor in calculating execution times; the other is the clock frequency, which determines the duration of each of these cycles. Figure~\ref{fig:comparison_time} compares the execution times of \vericert{} and \legup{}. Across the original \polybench{} benchmarks, we see that \vericert{} designs are about \slowdownOrig$\times$ slower than \legup{} designs. This dramatic discrepancy in performance can be largely attributed to \vericert's na\"ive implementations of division and modulo operations, as explained in Section~\ref{sec:evaluation:setup}. Indeed, \vericert{} achieved an average clock frequency of just 21MHz, while \legup{} managed about 247MHz. After replacing the division/modulo operations with our own C-based implementations, \vericert{}'s average clock frequency becomes about 112MHz. This is better, but still substantially below \legup{}, which uses various additional optimisations and Intel-specific IP blocks. Across the modified \polybench{} benchmarks, we see that \vericert{} designs are about \slowdownDiv$\times$ slower than \legup{} designs.

subsection{RQ2: How area-efficient is \vericert{}-generated hardware?}

\begin{figure}
\begin{subfigure}[t]{0.48\textwidth}
\definecolor{resourceutilcol}{HTML}{e7298a}
\begin{tikzpicture}
\begin{axis}[
    height=1\textwidth,
    width=1\textwidth,
    xlabel={\legup{} resource utilisation (\%)},
    ylabel={\vericert{} resource utilisation (\%)},
    xmin=0, ymin=0,
    xmax=1, ymax=30,
  ]

\addplot[draw=none, mark=*, draw opacity=0, fill opacity=0.6,resourceutilcol]
  table [x expr=(\thisrow{legupluts}/427200*100), y expr=(\thisrow{vericertluts}/427200*100), col sep=comma]
  {results/poly.csv};

%   \addplot[dashed, domain=0:1]{x};

\end{axis}
\end{tikzpicture}
\caption{A comparison of the resource utilisation of designs generated by \vericert{} and by \legup{}.}
\label{fig:comparison_area}
\end{subfigure}\hfill%
\begin{subfigure}[t]{0.48\textwidth}
\definecolor{compiltimecol}{HTML}{66a61e}
\begin{tikzpicture}
\begin{axis}[
    height=1\textwidth,
    width=1\textwidth,
    xlabel={\legup{} compilation time (s)},
    ylabel={\vericert{} compilation time (s)},
    yticklabel style={
        /pgf/number format/fixed,
        /pgf/number format/precision=2},
    xmin=4.6,
    xmax=5.1,
    ymin=0.06,
    ymax=0.20,
  ]

\addplot[draw=none, mark=*, draw opacity=0, fill opacity=0.6,compiltimecol]
  table [x=legupcomptime, y=vericertcomptime, col sep=comma]
  {results/poly.csv};

  %\addplot[dashed, domain=4.5:5.1]{0.1273*x-0.5048};

\end{axis}
\end{tikzpicture}
\caption{A comparison of compilation time for \vericert{} and for \legup{}}
\label{fig:comparison_comptime}
\end{subfigure}
\end{figure}