From 428e424b75999eda16f5c5ccb4ee48763d99c9d1 Mon Sep 17 00:00:00 2001
From: Yann Herklotz <git@yannherklotz.com>
Date: Tue, 3 May 2022 23:20:21 +0100
Subject: Add section on discussion of hardware pipelining

---
 chapters/background.tex   |  2 +-
 chapters/introduction.tex |  2 +-
 chapters/pipelining.tex   | 62 ++++++++++++++++++++++++++++++++++++++++++++---
 chapters/scheduling.tex   |  2 +-
 fonts_env.tex             |  8 +++---
 lsr_env.tex               |  1 +
 6 files changed, 67 insertions(+), 10 deletions(-)

diff --git a/chapters/background.tex b/chapters/background.tex
index 51a6632..7ce2064 100644
--- a/chapters/background.tex
+++ b/chapters/background.tex
@@ -387,7 +387,7 @@ correct manner.  In addition to that, the assumption is made that the semantics
 same in Handel-C as well as in the netlist format that is generated, which could be proven if the
 constructs are shown to behave in the exact same way as the handel-C constructs.
 
-\startmode[section]
+\startmode[chapter]
   \section{Bibliography}
   \placelistofpublications
 \stopmode
diff --git a/chapters/introduction.tex b/chapters/introduction.tex
index 55ed41b..c7dd2cc 100644
--- a/chapters/introduction.tex
+++ b/chapters/introduction.tex
@@ -109,7 +109,7 @@ a fork of CompCert which implements \SSA\ as an intermediate language.
 
 \desc{\in{Chapter}[sec:schedule]} describes the current implementation timeline.
 
-\startmode[section]
+\startmode[chapter]
   \section{Bibliography}
   \placelistofpublications
 \stopmode
diff --git a/chapters/pipelining.tex b/chapters/pipelining.tex
index 5aa03dc..26ed8c3 100644
--- a/chapters/pipelining.tex
+++ b/chapters/pipelining.tex
@@ -97,17 +97,71 @@ semantics.  Essentially, as the loop pipelining algorithm only modifies loops, i
 that the input loop $X$ is equivalent to the pipelined version of the loop $Y$.  Assuming that the
 validation algorithm is called $\alpha$, we therefore want to show that the following statement
 holds, where $X_N$ means that we are considering $N$ iterations of the loop.
+
 \placeformula\startformula
   \forall N,\quad \alpha(X_N) = \startmathcases
     \NC \alpha(Y_{N/\delta}; X_{N\%\delta}) \NC N \ge \mu \NR
     \NC \alpha(X_N) \NC \text{otherwise} \NR
 \stopmathcases\stopformula
 
-This states that for any number of iteration of the loop X, the formula should hold.  As the number
-of loop iterations $N$ are not always known at compile time, this may become an infinite property
-that needs to be checked.
+This states that for any number of iteration of the loop $X$, the formula should hold.  As the
+number of loop iterations $N$ are not always known at compile time, this may become an infinite
+property that needs to be checked.
+
+This infinite property can be proven by checking smaller finite properties that loop pipelining
+should ensure.  The two key insights that \cite[authors][tristan10_simpl_verif_valid_softw_pipel]
+found where that mainly only the two following properties needed to be checked:
 
-\startmode[section]
+\placeformula\startformula
+\alpha(\mathcal{S}; \mathcal{E}) = \alpha(\mathcal{E}; \mathcal{B}^{\delta})
+\stopformula
+
+\placeformula\startformula
+\alpha(\mathcal{S}; \mathcal{E}) = \alpha(\mathcal{E}; \mathcal{B}^{\delta})
+\stopformula
+
+\noindent In addition to some simpler properties, this means that the infinite property can still be
+checked.
+
+Finally, the verifier itself needs to be proven correct, meaning the correctness of the symbolic
+evaluation needs to be shown to be semantics preserving.  If the comparison between two symbolic
+states succeeds, then this must imply that the initial blocks themselves execute in the same way.
+
+\section{Hardware and Software Pipelining}
+
+Hardware and software pipelining algorithms have a lot of commonalities, even though the way the
+pipeline is implemented in the end differs.
+
+Pipelining in hardware is intuitive.  If one has two separate blocks where one is data-dependent on
+the other, then it is natural to start executing the first block once it has finished and passed the
+result to the second block.  This idea is also the key building block for processor designs, as it
+is a straightforward way to improve the throughput of a series of distinct and dependent operations.
+In \HLS\ it is also an important optimisation to efficiently translate loops.  As these are often
+executed many times, any improvements can reduce the throughput of the entire design.  The \HLS\
+tool therefore often automatically creates pipeline stages for loops and calculates a correct \II\
+for the loop, which will be the rate at which the pipeline needs to be filled.  The benefit though
+of generating hardware directly, is that the pipeline can already be laid out in the optimal and
+natural way.
+
+It is less clear why, and especially how, software can be pipelined in the same way as hardware, and
+what the benefits are if the code is going to be executed.  The main difference between the hardware
+pipeline is that in software one inherently only following one control-flow through the program.
+This means that it is not possible to lay out a pipeline in the same way as it is done in hardware
+to improve the throughput of loops.  However, a pipeline can be simulated in software, so that it
+behaves the same as the hardware pipeline, but it will manually have to unroll the loop so that all
+the stages can be executed by following one control-flow.  However, the inherent order of
+instructions will be quite different to the order of the pipeline stages in hardware.  In hardware,
+the pipeline can be written naturally in sequential order, and then the \II\ contract is fulfilled
+by setting the rate at which the pipeline is filled.  However, in software, one horizontal slice of
+the pipeline needs to be extracted and will be the new loop body, which represents the computation
+that the whole hardware pipeline is performing at one point in time.
+
+The main question that comes up is are the two pipelining methods actually equivalent?  In Vericert
+we are assuming that performing software pipelining, followed by scheduling and finally followed by
+the hardware generation approximates the hardware pipelining that traditional \HLS\ tools perform as
+part of their hardware generation.
+
+\startmode[chapter]
   \section{Bibliography}
   \placelistofpublications
 \stopmode
diff --git a/chapters/scheduling.tex b/chapters/scheduling.tex
index 75ae1fd..3f4acd6 100644
--- a/chapters/scheduling.tex
+++ b/chapters/scheduling.tex
@@ -478,7 +478,7 @@ This material is based upon work supported by the under Grant No.~ and Grant No.
 findings, and conclusions or recommendations expressed in this material are those of the author and
 do not necessarily reflect the views of the National Science Foundation.
 
-\startmode[section]
+\startmode[chapter]
   \section{Bibliography}
   \placelistofpublications
 \stopmode
diff --git a/fonts_env.tex b/fonts_env.tex
index d5cb7cd..157d4f3 100644
--- a/fonts_env.tex
+++ b/fonts_env.tex
@@ -15,11 +15,13 @@
   \definefontsynonym[MonoBoldItalic] [Iosevka-BoldItalic] [features=default]
 \stoptypescript
 
+\definefallbackfamily[lsrfont][math][modern][range={uppercasescript,lowercasescript}]
+
 \starttypescript [lsrfont]
   \definetypeface [lsrfont] [rm] [serif] [palatino]   [default]
-  \definetypeface [lsrfont] [ss] [sans]  [heros]       [default]
-  \definetypeface [lsrfont] [tt] [mono]  [modern]    [default]
-  \definetypeface [lsrfont] [mm] [math]  [palatino]    [default]
+  \definetypeface [lsrfont] [ss] [sans]  [heros]      [default]
+  \definetypeface [lsrfont] [tt] [mono]  [modern]     [default]
+  \definetypeface [lsrfont] [mm] [math]  [eulernova]  [default]
 \stoptypescript
 
 \stopenvironment
diff --git a/lsr_env.tex b/lsr_env.tex
index 43e33d6..53362e5 100644
--- a/lsr_env.tex
+++ b/lsr_env.tex
@@ -27,6 +27,7 @@
 % Layout
 % ==========================================================================
 \setuppapersize[A4][A4]
+
 \definefontfeature[default][default][protrusion=quality,expansion=quality]
 \setupalign[hz,hanging,lesshyphenation,verytolerant]
 \setupbodyfont[lsrfont,11pt]
-- 
cgit