+
+\note[itemize]
+{
+ \item Master's thesis - hardware description language \& compiler Cλash
+ \item Short introduction, then examples
+ \item VHDL is common, but sucks
+ \item Cλash compiler is not embedded, but external
+}
+
+\frame
+{
+ \frametitle{Compiler}
+ \begin{center}
+ \includegraphics[width=10cm]{figures/pipeline}
+ \end{center}
+}
+
+\note[itemize]
+{
+ \item Working prototype, rough edges
+ \item Reuse GHC
+ \item Custom normalization - Reduction system of transformations
+ \item Simple subset of VHDL - existing tooling
+}
+
+\section{Examples}
+
+\frame
+{
+ \frametitle{Multiply-accumulate}
+ \only<1> {
+ \begin{code}
+mac :: Num a => a -> a -> a -> a
+ \end{code}
+ }
+ \only<2-> {
+ \begin{code}
+type Word = SizedWord D16
+mac :: Word -> Word -> Word -> Word
+ \end{code}
+ }
+ \begin{code}
+mac x y acc = acc + x * y
+ \end{code}
+ \smallskip
+ \includegraphics[width=6cm]{figures/mac}
+}
+
+\note[itemize]
+{
+ \item functions are components (operators are functions too!)
+ \item function application is component instantiation
+ \item Polymorphic description
+ \item But top level must be monomorphic (next frame)
+}
+
+\frame
+{
+ \frametitle{Stateful multiply-accumulate}
+ \begin{code}
+newtype State a = State a
+
+smac :: State Word -> Word -> Word -> (State Word, Word)
+smac (State s) x y = (State s', s')
+ where s' = s + x * y
+ \end{code}
+ \smallskip
+ \includegraphics[width=6cm]{figures/smac}
+}
+
+\note[itemize]
+{
+ \item State is explicit: Argument and result
+ \item Produces register == memory
+}
+
+\frame
+{
+ \frametitle{Simple CPU}
+ \includegraphics[width=11cm]{figures/cpu}
+}
+
+\note[itemize]
+{
+ \item Simple CPU: Instructions are one opcode and four address pairs
+ \item One input line, one output line, no memories
+ \item Small, but basis for real hardware
+ \item Three fixed function units, one multipurpose function unit
+}
+
+\frame
+{
+ \frametitle{Fixed function function units}
+ \begin{code}
+ fu :: (... u ~ n :-: D1 ...) => (a -> a -> t)
+ -> Vector n a
+ -> (Index u, Index u)
+ -> t
+
+ fu op inputs (a1, a2) = op (inputs!a1) (inputs!a2)
+ \end{code}
+ \vspace{2cm}
+
+ \begin{code}
+ fu1 = fu (+)
+ fu2 = fu (-)
+ fu3 = fu (*)
+ \end{code}
+}
+
+\note[itemize]
+{
+ \item fu abstracts the input selection
+ \item fu takes an arbitrary binary operation
+ \item Some context left out
+ \item Vector is a fixed size vector, Index an index
+}
+
+\frame
+{
+ \frametitle{Multi-purpose function unit}
+ \begin{code}
+ data Opcode = Shift | Xor | Equal
+
+ multiop :: Opcode -> Word -> Word -> Word
+ multiop Shift = shift
+ multiop Xor = xor
+ multiop Equal = \a b -> if a == b then 1 else 0
+ \end{code}
+ \vspace{2cm}
+
+ \begin{code}
+ fu0 c = fu (multiop c)
+ \end{code}
+}
+
+\note[itemize]
+{
+ \item multiop takes an opcode and produces a binary operation
+ \item multiop is partially applied to the opcode
+}
+
+\frame
+{
+ \frametitle{The complete CPU}
+ \begin{code}
+type CpuState = State (Vector D4 Word)
+
+cpu :: CpuState
+ -> (Word, Opcode, Vector D4 (Index D6, Index D6))
+ -> (CpuState, Word)
+cpu (State s) (x, opc, addrs) = (State s', out)
+ where
+ inputs = x +> (0 +> (1 +> s))
+ s' = (fu0 opc inputs (addrs!(0 :: Index D3))) +> (
+ (fu1 inputs (addrs!(1 :: Index D3))) +> (
+ (fu2 inputs (addrs!(2 :: Index D3))) +> (
+ (fu3 inputs (addrs!(3 :: Index D3))) +> (
+ (empty)))))
+ out = last s
+ \end{code}
+}
+
+\note[itemize] {
+ \item Uses partial application for fu0
+ \item Cpu state is one register per fu
+}
+
+\frame
+{
+ \frametitle{Floating point reduction circuit}
+ \includegraphics[width=11cm]{figures/reducer}
+}
+\note[itemize]
+{
+ \item Sums rows of corresponding FP numbers (e.g., sparse matrix
+ multiplication)
+ \item Complexity: Pipelined adder, multiple rows simultaneously
+ \item Big design, implemented in Cλash
+}
+
+\frame
+{
+ \frametitle{Controller function}
+ \begin{code}
+controller (inp1, inp2, pT, from_res_mem) =
+ (arg1, arg2, shift, to_res_mem)
+ where
+ (arg1, arg2, shift, to_res_mem)
+ | valid pT && valid from_res_mem
+ = (pT , from_res_mem , 0, False)
+ | valid pT && valid inp1 && discr pT == discr inp1
+ = (pT , inp1 , 1, False)
+ | valid inp1 && valid inp2 && discr inp1 == discr inp2
+ = (inp1 , inp2 , 2, valid pT)
+ | valid inp1
+ = (inp1 , (True, (0, discr inp1)) , 1, valid pT)
+ | otherwise
+ = (notValid, notValid , 0, valid pT)
+ \end{code}
+}
+
+\note[itemize]
+{
+ \item Elegant implementation of algorithm rules
+}
+
+
+\section{Future}
+
+\frame
+{
+ \frametitle{Future work}
+ \begin{itemize}
+ \item More systematic normalization
+ \item Recursion / normal lists
+ \item Nested state abstraction
+ \item Multiple clock domains / asynchronicity
+ \item Graphical output
+ \end{itemize}
+}
+
+\note[itemize]
+{
+ \item More systematic normalization - proofs
+ \item Recursion / normal lists - Fixed size recursion has problems
+ \item Nested state abstraction - Larger designs can get messy
+ \item Multiple clock domains / asynchronicity - Clock is currently implicit
+ \item Graphical output - For analysis and testing
+ \item Plenty of assignments!
+}
+
+
+\subsection{Thanks}
+\frame
+{
+\vspace{2cm}\centerline{\Huge{Thanks!}}
+\vspace{2cm}
+http://wwwhome.cs.utwente.nl/~baaijcpr/ClaSH/Index.html
+\begin{center}or just\end{center}
+http://google.com/search?q={\bf{}C$\lambda$aSH}\&btnI=I'm Feeling Lucky