Merge branch 'master' of http://git.stderr.nl/matthijs/master-project/paper

[matthijs/master-project/dsd-paper.git] / cλash.lhs
diff --git a/cλash.lhs b/cλash.lhs

index 487d6964ccccec84494716bd89488a772cb49332..6f158b5edc0b4e04357e51977bfb7a6ca8ab9e3f 100644 (file)
--- a/cλash.lhs
+++ b/cλash.lhs
@@ -375,6 +375,9 @@
  \newcommand{\fref}[1]{\cref{#1}} 
  \newcommand{\Fref}[1]{\Cref{#1}}
  
+\usepackage{epstopdf}
+
+\epstopdfDeclareGraphicsRule{.svg}{pdf}{.pdf}{rsvg-convert --format=pdf < #1 > \noexpand\OutputFile}
  
  %include polycode.fmt
  %include clash.fmt
@@ -442,7 +445,15 @@ c.p.r.baaij@@utwente.nl, matthijs@@stdin.nl, j.kuper@@utwente.nl}}
  
  \begin{abstract}
  %\boldmath
-The abstract goes here.
+\CLaSH\ is a functional hardware description language that borrows both its 
+syntax and semantics from the functional programming language Haskell. Circuit 
+descriptions can be translated to synthesizable VHDL using the prototype 
+\CLaSH\ compiler. As the circuit descriptions are made in plain Haskell, 
+simulations can also be compiled by a Haskell compiler.
+
+The use of polymorphism and higher-order functions allow a circuit designer to 
+describe more abstract and general specifications than are possible in the 
+traditional hardware description languages.
  \end{abstract}
  % IEEEtran.cls defaults to using nonbold math in the Abstract.
  % This preserves the distinction between vectors and scalars. However,
@@ -509,7 +520,7 @@ in Haskell as a domain specific languages. As far as the authors know, such
  extensive support for choice-elements is new in the domain of functional 
  hardware description languages. As the hardware descriptions are plain Haskell 
  functions, these descriptions can be compiled for simulation using an 
-optimizing Haskell compiler such as the Glasgow Haskell Compiler (\GHC).
+optimizing Haskell compiler such as the Glasgow Haskell Compiler (\GHC)~\cite{ghc}.
  
  Where descriptions in a conventional hardware description language have an 
  explicit clock for the purpose state and synchronicity, the clock is implied 
@@ -562,7 +573,7 @@ by an (optimizing) \VHDL\ synthesis tool.
      \end{code}
      
      \begin{figure}
-    \centerline{\includegraphics{mac}}
+    \centerline{\includegraphics{mac.svg}}
      \caption{Combinatorial Multiply-Accumulate}
      \label{img:mac-comb}
      \end{figure}
@@ -576,7 +587,7 @@ by an (optimizing) \VHDL\ synthesis tool.
      \end{code}
      
      \begin{figure}
-    \centerline{\includegraphics{mac-nocurry}}
+    \centerline{\includegraphics{mac-nocurry.svg}}
      \caption{Combinatorial Multiply-Accumulate (complex input)}
      \label{img:mac-comb-nocurry}
      \end{figure}
@@ -595,10 +606,7 @@ by an (optimizing) \VHDL\ synthesis tool.
      % against the constructors in the \hs{case} expressions. 
      We can see two versions of a contrived example below, the first 
      using a \hs{case} construct and the other using a \hs{if-then-else} 
-    constructs, in the code below. The example sums two values when they are 
-    equal or non-equal (depending on the predicate given) and returns 0 
-    otherwise. Both versions of the example roughly correspond to the same 
-    netlist, which is depicted in \Cref{img:choice}.
+    constructs, in the code below. 
      
      \begin{code}
      sumif pred a b = case pred of
@@ -619,10 +627,15 @@ by an (optimizing) \VHDL\ synthesis tool.
      \end{code}
  
      \begin{figure}
-    \centerline{\includegraphics{choice-case}}
+    \centerline{\includegraphics{choice-case.svg}}
      \caption{Choice - sumif}
      \label{img:choice}
      \end{figure}
+    
+    The example sums two values when they are equal or non-equal (depending on 
+    the predicate given) and returns 0 otherwise. Both versions of the example 
+    roughly correspond to the same netlist, which is depicted in 
+    \Cref{img:choice}.
  
      A slightly more complex (but very powerful) form of choice is pattern 
      matching. A function can be defined in multiple clauses, where each clause 
@@ -896,44 +909,6 @@ by an (optimizing) \VHDL\ synthesis tool.
      \begin{code}
      map :: (a -> b) -> [a|n] -> [b|n]
      \end{code}
-    
-    As an example of a common hardware design where the use of higher-order
-    functions leads to a very natural description is a FIR filter, which is 
-    basically the dot-product of two vectors:
-
-    \begin{equation}
-    y_t  = \sum\nolimits_{i = 0}^{n - 1} {x_{t - i}  \cdot h_i } 
-    \end{equation}
-    
-    A FIR filter multiplies fixed constants ($h$) with the current 
-    and a few previous input samples ($x$). Each of these multiplications
-    are summed, to produce the result at time $t$. The equation of the FIR 
-    filter is indeed equivalent to the equation of the dot-product, which is 
-    shown below:
-    
-    \begin{equation}
-    \mathbf{x}\bullet\mathbf{y} = \sum\nolimits_{i = 0}^{n - 1} {x_i \cdot y_i } 
-    \end{equation}
-
-    We can easily and directly implement the equation for the dot-product
-    using higher-order functions:
-
-    \begin{code}
-    xs *+* ys = foldl1 (+) (zipWith (*) xs hs)
-    \end{code}
-
-    The \hs{zipWith} function is very similar to the \hs{map} function: It 
-    takes a function, two vectors, and then applies the function to each of 
-    the elements in the two vectors pairwise (\emph{e.g.}, \hs{zipWith (*) [1, 
-    2] [3, 4]} becomes \hs{[1 * 3, 2 * 4]} $\equiv$ \hs{[3,8]}).
-
-    The \hs{foldl1} function takes a function, a single vector, and applies 
-    the function to the first two elements of the vector. It then applies the
-    function to the result of the first application and the next element from 
-    the vector. This continues until the end of the vector is reached. The 
-    result of the \hs{foldl1} function is the result of the last application.
-    As you can see, the \hs{zipWith (*)} function is just pairwise 
-    multiplication and the \hs{foldl1 (+)} function is just summation.
  
      So far, only functions have been used as higher-order values. In
      Haskell, there are two more ways to obtain a function-typed value:
@@ -1007,7 +982,7 @@ by an (optimizing) \VHDL\ synthesis tool.
      \end{code}
      
      \begin{figure}
-    \centerline{\includegraphics{mac-state}}
+    \centerline{\includegraphics{mac-state.svg}}
      \caption{Stateful Multiply-Accumulate}
      \label{img:mac-state}
      \end{figure}
@@ -1016,16 +991,90 @@ by an (optimizing) \VHDL\ synthesis tool.
      state, and what part of the output is part of the updated state. This 
      aspect will also reflected in the type signature of the function. 
      Abstracting the state of a circuit in this way makes it very explicit: 
-    which variables  are part of the state is completely determined by the 
+    which variables are part of the state is completely determined by the 
      type signature. This approach to state is well suited to be used in 
      combination with the existing code and language features, such as all the 
-    choice constructs, as state values are just normal values.    
+    choice constructs, as state values are just normal values. We can simulate 
+    stateful descriptions using the recursive \hs{run} function:
+    
+    \begin{code}
+    run f s (i:inps) = o : (run f s' inps)
+      where
+        (s', o) = f s i
+    \end{code}
+    
+    The \hs{run} function maps a list of inputs over the function that a 
+    developer wants to simulate, passing the state to each new iteration. Each
+    value in the input list corresponds to exactly one cycle of the (implicit) 
+    clock. The result of the simulation is a list of outputs for every clock
+    cycle. As both the \hs{run} function and the hardware description are 
+    plain hardware, the complete simulation can be compiled by an optimizing
+    Haskell compiler.
+    
  \section{\CLaSH\ prototype}
  
-foo\par bar
+The \CLaSH language as presented above can be translated to \VHDL using
+the prototype \CLaSH compiler. This compiler allows experimentation with
+the \CLaSH language and allows for running \CLaSH designs on actual FPGA
+hardware.
+
+\comment{Add clash pipeline image}
+The prototype heavily uses \GHC, the Glasgow Haskell Compiler. Figure
+TODO shows the \CLaSH compiler pipeline. As you can see, the frontend
+is completely reused from \GHC, which allows the \CLaSH prototype to
+support most of the Haskell Language. The \GHC frontend produces the
+program in the \emph{Core} format, which is a very small, functional,
+typed language which is relatively easy to process.
+
+The second step in the compilation process is \emph{normalization}. This
+step runs a number of \emph{meaning preserving} transformations on the
+Core program, to bring it into a \emph{normal form}. This normal form
+has a number of restrictions that make the program similar to hardware.
+In particular, a program in normal form no longer has any polymorphism
+or higher order functions.
+
+The final step is a simple translation to \VHDL.
  
  \section{Use cases}
-Returning to the example of the FIR filter, we will slightly change the
+As an example of a common hardware design where the use of higher-order
+functions leads to a very natural description is a FIR filter, which is 
+basically the dot-product of two vectors:
+
+\begin{equation}
+y_t  = \sum\nolimits_{i = 0}^{n - 1} {x_{t - i}  \cdot h_i } 
+\end{equation}
+
+A FIR filter multiplies fixed constants ($h$) with the current 
+and a few previous input samples ($x$). Each of these multiplications
+are summed, to produce the result at time $t$. The equation of a FIR 
+filter is indeed equivalent to the equation of the dot-product, which is 
+shown below:
+
+\begin{equation}
+\mathbf{x}\bullet\mathbf{y} = \sum\nolimits_{i = 0}^{n - 1} {x_i \cdot y_i } 
+\end{equation}
+
+We can easily and directly implement the equation for the dot-product
+using higher-order functions:
+
+\begin{code}
+xs *+* ys = foldl1 (+) (zipWith (*) xs hs)
+\end{code}
+
+The \hs{zipWith} function is very similar to the \hs{map} function seen 
+earlier: It takes a function, two vectors, and then applies the function to 
+each of the elements in the two vectors pairwise (\emph{e.g.}, \hs{zipWith (*) 
+[1, 2] [3, 4]} becomes \hs{[1 * 3, 2 * 4]} $\equiv$ \hs{[3,8]}).
+
+The \hs{foldl1} function takes a function, a single vector, and applies 
+the function to the first two elements of the vector. It then applies the
+function to the result of the first application and the next element from 
+the vector. This continues until the end of the vector is reached. The 
+result of the \hs{foldl1} function is the result of the last application.
+As you can see, the \hs{zipWith (*)} function is just pairwise 
+multiplication and the \hs{foldl1 (+)} function is just summation.
+
+Returning to the actual FIR filter, we will slightly change the
  equation belong to it, so as to make the translation to code more obvious.
  What we will do is change the definition of the vector of input samples.
  So, instead of having the input sample received at time
@@ -1033,7 +1082,7 @@ $t$ stored in $x_t$, $x_0$ now always stores the current sample, and $x_i$
  stores the $ith$ previous sample. This changes the equation to the
  following (Note that this is completely equivalent to the original
  equation, just with a different definition of $x$ that will better suit
-the the transformation to code):
+the transformation to code):
  
  \begin{equation}
  y_t  = \sum\nolimits_{i = 0}^{n - 1} {x_i  \cdot h_i } 
@@ -1041,8 +1090,7 @@ y_t  = \sum\nolimits_{i = 0}^{n - 1} {x_i  \cdot h_i }
  
  Consider that the vector \hs{hs} contains the FIR coefficients and the 
  vector \hs{xs} contains the current input sample in front and older 
-samples behind. The function that does this shifting of the input samples 
-is shown below:
+samples behind. The function that shifts the input samples is shown below:
  
  \begin{code}
  x >> xs = x +> tail xs  
@@ -1060,7 +1108,7 @@ The resulting netlist of a 4-taps FIR filter based on the above definition
  is depicted in \Cref{img:4tapfir}.
  
  \begin{figure}
-\centerline{\includegraphics{4tapfir}}
+\centerline{\includegraphics{4tapfir.svg}}
  \caption{4-taps FIR Filter}
  \label{img:4tapfir}
  \end{figure}
@@ -1071,22 +1119,34 @@ years. Early work includes such languages as $\mu$\acro{FP}~\cite{muFP}, an
  extension of Backus' \acro{FP} language to synchronous streams, designed 
  particularly for describing and reasoning about regular circuits. The 
  Ruby~\cite{Ruby} language uses relations, instead of functions, to describe 
-circuits, and has a particular focus on layout. \acro{HML}~\cite{HML2} is a 
-hardware modeling language based on the strict functional language 
-\acro{ML}, and has support for polymorphic types and higher-order functions. 
-Published work suggests that there is no direct simulation support for 
-\acro{HML}, and that the translation to \VHDL\ is only partial.
+circuits, and has a particular focus on layout. 
+
+\acro{HML}~\cite{HML2} is a hardware modeling language based on the strict 
+functional language \acro{ML}, and has support for polymorphic types and 
+higher-order functions. Published work suggests that there is no direct 
+simulation support for \acro{HML}, but that a description in \acro{HML} has to 
+be translated to \VHDL\ and that the translated description can than be 
+simulated in a \VHDL\ simulator. Also not all of the mentioned language 
+features of \acro{HML} could be translated to hardware. The \CLaSH\ compiler 
+on the other hand can correctly translate all of the language constructs 
+mentioned in this paper to a netlist format.
  
  Like this work, many functional hardware description languages have some sort 
  of foundation in the functional programming language Haskell. 
  Hawk~\cite{Hawk1} uses Haskell to describe system-level executable 
  specifications used to model the behavior of superscalar microprocessors. Hawk 
  specifications can be simulated, but there seems to be no support for 
-automated circuit synthesis. The ForSyDe~\cite{ForSyDe2} system uses Haskell 
-to specify abstract system models, which can (manually) be transformed into an 
-implementation model using semantic preserving transformations. ForSyDe has 
-several simulation and synthesis backends, though synthesis is restricted to 
-the synchronous subset of the ForSyDe language.
+automated circuit synthesis. 
+
+The ForSyDe~\cite{ForSyDe2} system uses Haskell to specify abstract system 
+models, which can (manually) be transformed into an implementation model using 
+semantic preserving transformations. A designer can model systems using 
+heterogeneous models of computation, which include continuous time, 
+synchronous and untimed models of computation. Using so-called domain 
+interfaces a designer can simulate electronic systems which have both analog 
+as digital parts. ForSyDe has several simulation and  synthesis backends, 
+though synthesis is restricted to the synchronous subset of the ForSyDe 
+language. Unlike \CLaSH\ there is no support for the automated synthesis of description that contain polymorphism or higher-order functions.
  
  Lava~\cite{Lava} is a hardware description language that focuses on the 
  structural representation of hardware. Besides support for simulation and 
@@ -1095,20 +1155,19 @@ tools for formal verification. Lava descriptions are actually circuit
  generators when viewed from a synthesis viewpoint, in that the language 
  elements of Haskell, such as choice, can be used to guide the circuit 
  generation. If a developer wants to insert a choice element inside an actual 
-circuit he will have to specify this explicitly as a component. In this 
-respect \CLaSH\ differs from Lava, in that all the choice elements, such as 
-case-statements and pattern matching, are synthesized to choice elements in the 
-eventual circuit. As such, richer control structures can both be specified and 
-synthesized in \CLaSH\ compared to any of the languages mentioned in this 
-section.
+circuit he will have to explicitly instantiate a multiplexer-like component. 
+
+In this respect \CLaSH\ differs from Lava, in that all the choice elements, 
+such as case-statements and pattern matching, are synthesized to choice 
+elements in the eventual circuit. As such, richer control structures can both 
+be specified and synthesized in \CLaSH\ compared to any of the languages 
+mentioned in this section.
  
  The merits of polymorphic typing, combined with higher-order functions, are 
  now also recognized in the `main-stream' hardware description languages, 
-exemplified by the new \VHDL-2008 standard~\cite{VHDL2008}. \VHDL-2008 has 
-support to specify types as generics, thus allowing a developer to describe 
+exemplified by the new \VHDL-2008 standard~\cite{VHDL2008}. \VHDL-2008 support for generics has been extended to types, allowing a developer to describe 
  polymorphic components. Note that those types still require an explicit 
-generic map, whereas type-inference and type-specialization are implicit in 
-\CLaSH.
+generic map, whereas types can be automatically inferred in \CLaSH.
  
  % Wired~\cite{Wired},, T-Ruby~\cite{T-Ruby}, Hydra~\cite{Hydra}. 
  %