From 747c2ba3d485fb5c3543e9435fbd3dff59ddb3f8 Mon Sep 17 00:00:00 2001 From: Matthijs Kooijman Date: Mon, 7 Dec 2009 01:10:28 +0100 Subject: [PATCH] Fix things following from comments from Marco and Sabih. --- Chapters/Conclusions.tex | 12 +- Chapters/Context.tex | 182 ++++++++++------ Chapters/Future.tex | 189 ++++++++-------- Chapters/HardwareDescription.tex | 356 +++++++++++++++++++------------ Chapters/Introduction.tex | 13 +- Chapters/Normalization.tex | 302 ++++++++++++++------------ Chapters/Prototype.tex | 155 ++++++++------ Outline | 1 + Report.bib | 58 +++++ 9 files changed, 755 insertions(+), 513 deletions(-) diff --git a/Chapters/Conclusions.tex b/Chapters/Conclusions.tex index 0b37899..fff4b3d 100644 --- a/Chapters/Conclusions.tex +++ b/Chapters/Conclusions.tex @@ -9,17 +9,17 @@ component instantiation and the various choice mechanisms (pattern matching, case expressions, if expressions) are well suited to describe conditional assigment in the hardware. -Useful features from the functional perspective, like polymorphism and higher -order functions and expressions also prove suitable to describe hardware -and our implementation shows that they can be translated to \VHDL as -well. +Useful features from the functional perspective, like polymorphism and +higher-order functions and expressions also prove suitable to describe +hardware and our implementation shows that they can be translated to +\VHDL as well. A prototype compiler was created in this research. For this prototype the Haskell language was chosen as the input language, instead of creating a new language from scratch. This has enabled creating the prototype rapidly, allowing for experimenting with various functional language features and interpretations in Cλash. Even supporting more complex features like -polymorphism and higher order values has been possible. If a new language and +polymorphism and higher-order values has been possible. If a new language and compiler would have been designed from scratch, that new language would not have been nearly as advanced as current Cλash. @@ -84,7 +84,7 @@ that this scope is too narrow for practical use of Cλash. Most people that hear about using a functional language for hardware description instantly hope to be able to provide a concise mathematical description of their algorithm and have the hardware generated for them. Since this -is obviously a different problem alltogether, we could not have hoped to +is obviously a different problem altogether, we could not have hoped to even start solving it. However, hopefully the current Cλash system provides a solid base on top of which further experimentation with functional descriptions can be built. diff --git a/Chapters/Context.tex b/Chapters/Context.tex index f07f1d9..a12fe73 100644 --- a/Chapters/Context.tex +++ b/Chapters/Context.tex @@ -1,24 +1,55 @@ \chapter[chap:context]{Context} + + \placeintermezzo{}{ + \defref{EDSL} + \startframedtext[width=8.5cm,background=box,frame=no] + \startalignment[center] + {\tfa Embedded domain-specific languages (\small{EDSL})} + \stopalignment + \blank[medium] + + \startcitedquotation[deursen00] + A domain-specific language (\small{DSL}) is a program- + ming language or executable specification language + that offers, through appropriate notations and ab- + stractions, expressive power focused on, and usu- + ally restricted to, a particular problem domain. + \stopcitedquotation + + An embedded \small{DSL} is a \small{DSL} that is embedded in + another language. Haskell is commonly used to embed \small{DSL}s + in, which typically means a number of Haskell functions and types + are made available that can be called to construct a large value + of some domain-specific datatype (\eg, a circuit datatype). This + generated datatype can then be processed further by the + \small{EDSL} \quote{compiler} (which runs in the same environment + as the \small{EDSL} itself. The embedded language is then a, mostly + applicative, subset of Haskell where the library functions are the + primitives. Sometimes advanced Haskell features such as + polymorphism, higher-order values or type classes can be used in + the embedded language. \cite[hudak96] + + For an \small{EDSL}, the definitions of compiletime and runtime + are typically fuzzy (and thus hard to define here), since the + \small{EDSL} \quote{compiler} is usually compiled by the same + Haskell compiler as the \small{EDSL} program itself. + \stopframedtext + } + An obvious question that arises when starting any research is \quote{Has this not been done before?}. Using a functional language for describing hardware is not a new idea at all. In fact, there has been research into functional hardware description even before the conventional hardware description - languages were created. \todo{Reference about early FHDLs} However, - functional languages were not nearly as advanced as they are now, and - functional hardware description never really got off. + languages were created. Examples of these are µFP \cite[sheeran83] and + Ruby \cite[jones90]. Functional languages were not nearly as advanced + as they are now, and functional hardware description never really got + off. -\todo{Add references} - Recently, there have been some renewed efforts, especially using the Haskell - functional language. Examples are Lava, ForSyde, ..., which are all a form of an - embedded domain specific language. Each of these have a slightly different - approach, but all of these do some trickery inside the Haskell language - itself, meaning you write a program that generates a hardware circuit, - instead of describing the circuit directly (either by running the haskell - code after compilation, or using Template Haskell to inspect parts of the - code you have written). This allows the full power of Haskell for generating - a circuit. However it also creates severe limitations in the use of the - language (you can't use case expressions in Lava, since they would be - executed only once during circuit generation) and extra notational overhead. + Recently, there have been some renewed efforts, especially using the + Haskell functional language. Examples are Lava \cite[claessen00] (an + \small{EDSL}) and ForSyde \cite[sander04] (an \small{EDSL} using + Template Haskell). \cite[baaij09] has a more complete overview of the + current field. We will now have a look at the existing hardware description languages, both conventional and functional to see the fields in which Cλash is @@ -30,12 +61,14 @@ the functional style to hardware description, we hope to obtain a hardware description language that is: \startitemize - \item More consise. Functional programs are known for their conciseness - and ability to abstract away common patterns. This is largely enabled - by features like an advanced type system with polymorphism and higher - order functions. - \item Type-safer. Functional programs typically have a highly expressive - type system, which makes it harder to write incorrect code. + \item More concise. Functional programs are known for their conciseness + and ability to provide reusable functions that are abstractions of + common patterns. This is largely enabled by features like an + advanced type system with polymorphism and higher-order functions. + \item Type-safer. Functional programs typically have a highly + expressive type system, which allows a programmer to more strictly + define limitations on values, making it easier to find violations + and thus bugs. \item Easy to process. Functional languages have nice properties like purity \refdef{purity} and single binding behaviour, which make it easy to apply program transformations and optimizations and could potentially @@ -43,66 +76,75 @@ \stopitemize \placeintermezzo{}{ - \defref{EDSL} + \defref{Template Haskell} \startframedtext[width=8.5cm,background=box,frame=no] \startalignment[center] - {\tfa Embedded domain-specific languages (\small{EDSL})} + {\tfa Template Haskell} \stopalignment \blank[medium] - - \startcitedquotation[deursen00] - A domain-specific language (\small{DSL}) is a program- - ming language or executable specification language - that offers, through appropriate notations and ab- - stractions, expressive power focused on, and usu- - ally restricted to, a particular problem domain. - \stopcitedquotation + + Template Haskell is an extension to the \GHC compiler that allows + a program to mark some parts to be evaluated \emph{at compile + time}. These \emph{templates} can then access the abstract syntax + tree (\small{AST}) of the program that is being compiled and + generate parts of this \small{AST}. - An embedded \small{DSL} is a \small{DSL} that is embedded in - another language. Haskell is commonly used to embed \small{DSL}s - in, which typically means a number of Haskell functions and types - are made available that can be called to construct a large value - of some domain-specific datatype (\eg, a circuit datatype). This - generated datatype can then be processed further by the - \small{EDSL} \quote{compiler} (which runs in the same environment - as the \small{EDSL} itself. The embedded language is then a, mostly - applicative, subset of Haskell where the library functions are the - primitives. Sometimes advanced haskell features such as - polymorphism, higher order values or type classes can be used in - the embedded language. \cite[hudak96] + Template Haskell is a very powerful, but also complex mechanism. + It was inteded to simplify the generation of some repetive pieces + of code, but its introspection features have inspired all sorts of + applications, such as hardware description compilers. \stopframedtext } \section[sec:context:fhdls]{Existing functional hardware description languages} - As noted above, we're not the first to walk this path. However, current - embedded functional hardware description languages (in particular those - using Haskell) are limited by:\todo{Separate TH and EDSL approaches - better} + As noted above, this path has been explored before. However, current + embedded functional hardware description languages (in particular + those using Haskell) are limited. Below a number of downsides are + sketched of the recent attempts using the Haskell language. + \cite[baaij09] has a more complete overview of these and other + languages. + + This list uses Lava and ForSyDe as examples, but tries to generalize + the conclusions to the techniques of embedding a \small{DSL} and + using Template Haskell. + \startitemize \item Not all of Haskell's constructs can be captured by embedded domain - specific languages. For example, an if or case expression is typically - executed only once and only its result is reflected in the embedded - description, not the if or case expression itself. Also, sharing of - variables (\eg, using the same variable twice while only calculating it - once) and cycles in circuits are non-trivial to properly and safely - translate (though there is some work to fix this, but that has not been - possible in a completely reliable way yet \cite[gill09]). - \item Some things are verbose to express. Especially ForSyDe suffers - from a lot of notational overhead due to the Template Haskell approach - used. Since conditional expressions are not supported, a lot of Haskell's - syntax sugar (if expressions, pattern matching, guards) cannot be used - either, leading to more verbose notation as well. - \item Polymorphism and higher order values are not supported within the - embedded language. The use of Haskell as a host language allows the use - of polymorphism and higher order functions at circuit generation time - (even for free, without any additional cost on the \small{EDSL} - programmers), but the described circuits do not have any polymorphism - or higher order functions, which can be limiting. \todo{How true or - appropriate is this point?} - \todo{Function structure gets lost (in Lava)} - \stopitemize + specific languages. For example, an \hs{if} or \hs{case} + expression is typically executed once when the Haskell description + is processed and only the result is reflected in the generated + datastructure (and thus in the final program). In Lava, for + example, conditional assignment can only be described by using + explicit multiplexer components, not using any of Haskell's + compact mechanisms (such as \hs{case} expressions or pattern + matching). + + Also, sharing of variables (\eg, using the same variable twice + while only calculating it once) and cycles in circuits are + non-trivial to properly and safely translate (though there is some + work to fix this, but that has not been possible in a completely + reliable way yet \cite[gill09]). + \item Template Haskell makes descriptions verbose. Template + Haskell needs extra annotations to move things around between + \small{TH} and the normal program. When \small{TH} is combined + with an \small{EDSL} approach, it can get confusing when to use + \small{TH} and when not to. + \item Function hierarchies cannot be observed in an \small{EDSL}. + For example, Lava generates a single flat \VHDL architecture, + which has no structure whatsoever. Even though this is strictly + correct, it offers no support to the synthesis software about + which parts of the system can best be placed together and makes + debugging the system very hard. - \todo{Complete translation in TH is complex: Works with Haskell AST - instead of Core} + It is possible to add explicit annotation to overcome this + limitation (ForSyDe does this using Template Haskell), but this + adds extra verbosity again. + \item Processing in Template Haskell can become very complex, + since it works on the Haskell \small{AST} directly. This means + that every part of the Haskell syntax that is used must be + supported explicitly by the Template Haskell code. + \in{Chapter}[chap:prototype] shows that working on a smaller + \small{AST} is much more efficient. + \stopitemize % vim: set sw=2 sts=2 expandtab: diff --git a/Chapters/Future.tex b/Chapters/Future.tex index 7331e9c..0399db9 100644 --- a/Chapters/Future.tex +++ b/Chapters/Future.tex @@ -1,4 +1,44 @@ \chapter[chap:future]{Future work} +\section{New language} +During the development of the prototype, it has become clear that Haskell is +not the perfect language for this work. There are two main problems: +\startitemize +\item Haskell is not expressive enough. Haskell is still quite new in the area +of dependent typing, something we use extensively. Also, Haskell has some +special syntax to write monadic composition and arrow composition, which is +well suited to normal Haskell programs. However, for our hardware +descriptions, we could use some similar, but other, syntactic sugar (as we +have seen above). +\item Haskell is too expressive. There are some things that Haskell can +express, but we can never properly translate. There are certain types (both +primitive types and certain type constructions like recursive types) we can +never translate, as well as certain forms of recursion. +\stopitemize + +It might be good to reevaluate the choice of language for Cλash, perhaps there +are other languages which are better suited to describe hardware, now that +we have learned a lot about it. + +Perhaps Haskell (or some other language) can be extended by using a +preprocessor. There has been some (point of proof) work on a the Strathclyde +Haskell Enhancement (\small{SHE}) preprocessor for type-level programming, +perhaps we can extend that, or create something similar for hardware-specific +notation. + +It is not unlikely that the most flexible way +forward would be to define a completely new language with exactly the needed +features. This is of course an enormous effort, which should not be taken +lightly. + +\section{Correctness proofs of the normalization system} +As stated in \in{section}[sec:normalization:properties], there are a +number of properties we would like to see verified about the +normalization system. In particular, the \emph{termination} and +\emph{completeness} of the system would be a good candidate for future +research. Specifying formal semantics for the Core language in +order to verify the \emph{soundness} of the system would be an even more +challenging task. + \section{Improved notation for hierarchical state} The hierarchical state model requires quite some boilerplate code for unpacking and distributing the input state and collecting and repacking the output @@ -52,8 +92,8 @@ results of functions in a datatype (the \emph{monad}) that can contain extra information, and hide extra behaviour in the binding operators. The \hs{>>=} operator allows extracting the actual result of a function -and passing it to another function. Let's try to illustrate this from an -example. The following snippet: +and passing it to another function. The following snippet should +illustrate this: \starthaskell do @@ -77,7 +117,7 @@ it should serve to understand the following discussion. The monadic notation could perhaps be used to compose a number of stateful functions into another stateful computation. Perhaps this could move all the boilerplate code into the \hs{>>} and \hs{>>=} operators. -Because the boilerplate is still there (it's not magically disappeared, +Because the boilerplate is still there (it has not magically disappeared, just moved into these functions), the compiler should still be able to compile these descriptions without any special magic (though perhaps it should always inline the binding operators to reveal the flow of values). @@ -116,8 +156,8 @@ There is a trick we can apply to change the signature of the \hs{>>} operator. \small{GHC} does not require the bind operators to be part of the \hs{Monad} type class, as long as it can use them to translate the do notation. This means we can define our own \hs{>>} and \hs{>>=} operators, outside of the -\hs{Monad} typeclass. This does conflict with the existing methods of the -\hs{Monad} typeclass, so we should prevent \small{GHC} from loading those (and +\hs{Monad} type class. This does conflict with the existing methods of the +\hs{Monad} type class, so we should prevent \small{GHC} from loading those (and all of the Prelude) by passing \type{-XNoImplicitPrelude} to \type{ghc}. This is slightly inconvenient, but since we hardly using anything from the prelude, this is not a big problem. We might even be able to replace some of the @@ -125,7 +165,7 @@ Prelude with hardware-translateable versions by doing this. The binding operators can now be defined exactly as they are needed. For completeness, the \hs{return} function is also defined. It is not called -by \small{GHC} implicitely, but can be called explicitely by a hardware +by \small{GHC} implicitly, but can be called explicitly by a hardware description. \in{Example}[ex:StateMonad] shows these definitions. \startbuffer[StateMonad] @@ -202,7 +242,7 @@ concept further investigation is appropriate. \section[sec:future:pipelining]{Improved notation or abstraction for pipelining} Since pipelining is a very common optimization for hardware systems, it should be easy to specify a pipelined system. Since it introduces quite some registers -into an otherwise regular combinatoric system, we might look for some way to +into an otherwise regular combinational system, we might look for some way to abstract away some of the boilerplate for pipelining. Something similar to the state boilerplate removal above might be appropriate: @@ -215,11 +255,11 @@ in the amount of registers (which is similar to the extra \hs{()} state type in \in{example}[ex:DoState], which is harmless there, but would be a problem if it introduced an extra, useless, pipeline stage). -This problem is slightly more complex than the problem we've seen before. One +This problem is slightly more complex than the problem we have seen before. One significant difference is that each variable that crosses a stage boundary needs a register. However, when a variable crosses multiple stage boundaries, it must be stored for a longer period and should receive multiple registers. -Since we can't find out from the combinator code where the result of the +Since we cannot find out from the combinator code where the result of the combined values is used (at least not without using Template Haskell to inspect the \small{AST}), there seems to be no easy way to find how much registers are needed. @@ -229,14 +269,14 @@ There seem to be two obvious ways of handling this problem: \startitemize \item Limit the scoping of each variable produced by a stage to the next stage only. This means that any variables that are to be used in subsequent - stages should be passed on explicitely, which should allocate the required + stages should be passed on explicitly, which should allocate the required number of registers. This produces cumbersome code, where there is still a lot of explicitness (though this could be hidden in syntax sugar). \todo{The next sentence is unclear} \item Scope each variable over every subsequent pipeline stage and allocate - the maximum amount of registers that \emph{could} be needed. This means we + the maximum number of registers that \emph{could} be needed. This means we will allocate registers that are never used, but those could be optimized away later. This does mean we need some way to introduce a variable number of variables (depending on the total number of stages), assign the output of @@ -245,7 +285,7 @@ There seem to be two obvious ways of handling this problem: This also means that when adding a stage to an existing pipeline will change the state type of each of the subsequent pipeline stages, and the state type - ofthe added stage depends on the number of subsequent stages. + of the added stage depends on the number of subsequent stages. Properly describing this will probably also require quite explicit syntax, meaning this is not feasible without some special syntax. @@ -258,7 +298,7 @@ stateful, mixing pipelined with normal computation, etc. The main problems of recursion have been described in \in{section}[sec:recursion]. In the current implementation, recursion is therefore not possible, instead we rely on a number of implicitly list-recursive -builtin functions. +built-in functions. Since recursion is a very important and central concept in functional programming, it would very much improve the flexibility and elegance of our @@ -293,7 +333,7 @@ There might be asynchronous events to which a system wants to respond. The most obvious asynchronous event is of course a reset signal. Though a reset signal can be synchronous, that is less flexible (and a hassle to describe in Cλash, currently). Since every function in Cλash describes the behaviour on -each cycle boundary, we really can't fit in asynchronous behaviour easily. +each cycle boundary, we really cannot fit in asynchronous behaviour easily. Due to the same reason, multiple clock domains cannot be easily supported. There is currently no way for the compiler to know in which clock domain a function @@ -326,7 +366,7 @@ main inp event (State acc) = (State acc', acc') -- On any other event, leave state and output unchanged _ -> acc --- func is some combinatoric expression +-- func is some combinational expression func :: Word -> Event -> Word func inp _ = inp * 2 + 3 \stopbuffer @@ -340,11 +380,11 @@ func inp _ = inp * 2 + 3 approach. In this example we see that every function takes an input of type \hs{Event}. The function \hs{main} that takes the output of \hs{func} and accumulates it on every clock cycle. On a reset signal, -the accumulator is reset. The function \hs{func} is just a combinatoric +the accumulator is reset. The function \hs{func} is just a combinational function, with no synchronous elements. We can see this because there is no state and the event input is completely ignored. If the compiler is smart enough, we could even leave the event input out for functions -that don't use it, either because they are completely combinatoric (like +that do not use it, either because they are completely combinational (like in this example), or because they rely on the the caller to select the clock signal. @@ -396,7 +436,7 @@ subb = ... {\typebufferhs{MulticlockDesc}} Note that in \in{example}[ex:MulticlockDesc] the \hs{suba} and \hs{subb} -functions are \emph{always} called, to get at their combinatoric +functions are \emph{always} called, to get at their combinational outputs. The new state is calculated as well, but only saved when the right clock has an up transition. @@ -453,8 +493,8 @@ take multiple cycles. Some examples include: \item Some kind of map or fold over a list that could be expanded in time instead of space. This would result in a function that describes n cycles instead of just one, where n is the lenght of the list. - \item A large combinatoric expressions that would introduce a very long - combinatoric path and thus limit clock frequency. Such an expression could + \item A large combinational expressions that would introduce a very long + combinational path and thus limit clock frequency. Such an expression could be broken up into multiple stages, which effectively results in a pipelined system (see also \in{section}[sec:future:pipelining]) with a known delay. There should probably be some way for the developer to specify the cycle @@ -480,11 +520,11 @@ recursive expression typically has the return type \lam{[a]}, while the rest of the system would expect just \lam{a} (since the recursive expression generates just a single element each cycle). -Naively, this matching could be done using a (builtin) function with a +Naively, this matching could be done using a (built-in) function with a signature like \lam{[a] -> a}, which also serves as an indicator to the compiler that some expanding over time is required. However, this poses a problem for simulation: How will our Haskell implementation of this magical -builtin function know which element of the input list to return. This +built-in function know which element of the input list to return. This obviously depends on the current cycle number, but there is no way for this function to know the current cycle without breaking all kinds of safety and purity properties. Making this function have some state input and output could @@ -497,8 +537,8 @@ How to interface with the rest of the system, without sacrificing safety and simulation capabilities? \section{Higher order values in state} -Another interesting idea is putting a higher order value inside a function's -state value. Since we can use higher order values anywhere, why not in the +Another interesting idea is putting a higher-order value inside a function's +state value. Since we can use higher-order values anywhere, why not in the state? \startbuffer[HigherAccum] @@ -512,90 +552,55 @@ main :: Word -> State Acc -> (State Acc, Word) main a s = (State s', out) where (s', out) = s a \stopbuffer -\placeexample[][ex:HigherAccum]{An accumulator using a higher order state.} +\placeexample[][ex:HigherAccum]{An accumulator using a higher-order state.} {\typebufferhs{HigherAccum}} As a (contrived) example, consider the accumulator in \in{example}[ex:HigherAccum]. This code uses a function as its state, -which implicitely contains the value accumulated so far. This is a +which implicitly contains the value accumulated so far. This is a fairly trivial example, that is more easy to write with a simple \hs{Word} state value, but for more complex descriptions this style might pay off. Note that in a way we are using the \emph{continuation passing style} of writing functions, where we pass a sort of \emph{continuation} from one cycle to the next. -Our normalization process completely removes higher order values inside a -function by moving applications and higher order values around so that every -higher order value will eventually be full applied. However, if we would put a -higher order value inside our state, the path from the higher order value +Our normalization process completely removes higher-order values inside a +function by moving applications and higher-order values around so that every +higher-order value will eventually be full applied. However, if we would put a +higher-order value inside our state, the path from the higher-order value definition to its application runs through the state, which is external to the -function. A higher order value defined in one cycle is not applied until a +function. A higher-order value defined in one cycle is not applied until a later cycle. Our normalization process only works within the function, so it -cannot remove this use of a higher order value. +cannot remove this use of a higher-order value. -However, we cannot leave a higher order value in our state value, since it is -impossible to generate a register containing a higher order value, we simply -can't translate a function type to a hardware type. To solve this, we must -replace the higher order value inside our state with some other value -representing these higher order values. +However, we cannot leave a higher-order value in our state value, since it is +impossible to generate a register containing a higher-order value, we simply +cannot translate a function type to a hardware type. To solve this, we must +replace the higher-order value inside our state with some other value +representing these higher-order values. On obvious approach here is to use an algebraic datatype where each -constructor represents one possible higher order value that can end up in the +constructor represents one possible higher-order value that can end up in the state and each constructor has an argument for each free variable of the -higher order value replaced. This allows us to completely reconstruct the -higher order value at the spot where it is applied, and thus the higher order +higher-order value replaced. This allows us to completely reconstruct the +higher-order value at the spot where it is applied, and thus the higher-order value disappears. This approach is commonly known as the \quote{Reynolds approach to -defuntionalization}, first described by J.C. Reynolds and seems to apply well -to this situation. One note here is that Reynolds' approach puts all the -higher order values in a single datatype. For a typed language, we will at -least have to have a single datatype for each function type, since we can't -mix them. It would be even better to split these datatypes a bit further, so -that such a datatype will never hold a constructor that is never used for a -particular state variable. This separation is probably a non-trivial problem, -though. - -\todo{Reference "Definitional interpreters for higher-order programming -languages": -http://portal.acm.org/citation.cfm?id=805852\&dl=GUIDE\&coll=GUIDE\&CFID=58835435\&CFTOKEN=81856623} - -\section{New language} -During the development of the prototype, it has become clear that Haskell is -not the perfect language for this work. There are two main problems: -\startitemize -\item Haskell is not expressive enough. Haskell is still quite new in the area -of dependent typing, something we use extensively. Also, Haskell has some -special syntax to write monadic composition and arrow composition, which is -well suited to normal Haskell programs. However, for our hardware -descriptions, we could use some similar, but other, syntactic sugar (as we -have seen above). -\item Haskell is too expressive. There are some things that Haskell can -express, but we can never properly translate. There are certain types (both -primitive types and certain type constructions like recursive types) we can -never translate, as well as certain forms of recursion. -\stopitemize - -It might be good to reevaluate the choice of language for Cλash, perhaps there -are other languages which are better suited to describe hardware, now that -we've learned a lot about it. - -Perhaps Haskell (or some other language) can be extended by using a -preprocessor. There has been some (point of proof) work on a the Strathclyde -Haskell Enhancement (\small{SHE}) preprocessor for type-level programming, -perhaps we can extend that, or create something similar for hardware-specific -notation. - -It is not unlikely that the most flexible way -forward would be to define a completely new language with exactly the needed -features. This is of course an enormous effort, which should not be taken -lightly. +defuntionalization}, first described by J.C. Reynolds \cite[reynolds98] and +seems to apply well to this situation. One note here is that Reynolds' +approach puts all the higher-order values in a single datatype. For a typed +language, we will at least have to have a single datatype for each function +type, since we cannot mix them. It would be even better to split these +datatypes a bit further, so that such a datatype will never hold a constructor +that is never used for a particular state variable. This separation is +probably a non-trivial problem, though. \section{Don't care values} A powerful value in \VHDL is the \emph{don't care} value, given as - \type{'-'}. This value tells the compiler that you don't really care about + \type{'-'}. This value tells the compiler that you do not really care about which value is assigned to a signal, allowing the compiler to make some - optimizations. Since hardware choice in hardware is often implemented using + optimizations. Since choice in hardware is often implemented using a collection of logic gates instead of multiplexers only, synthesizers can often reduce the amount of hardware needed by smartly choosing values for these don't care cases. @@ -614,13 +619,13 @@ lightly. type. An exception can be made for vectors and product types, since those can simply use the don't care values of the types they contain. - This would also require some kind of \quote{Dont careable} type class + This would also require some kind of \quote{Don't careable} type class that allows each type to specify what its don't care value is. The compiler must then recognize this constant and replace it with don't care values in the final \VHDL code. This is of course a very intrusive solution. Every type must become member - of this typeclass, and there is now some member in every type that is a + of this type class, and there is now some member in every type that is a special don't care value. Guaranteeing the obvious don't care semantics also becomes harder, since every pattern match or case expressions must now also take care of the don't care value (this might actually be an @@ -645,7 +650,7 @@ lightly. definition of an \hs{and} operator: \starthaskell - or :: Bit -> Bit -> Bit + and :: Bit -> Bit -> Bit and Low _ = Low and _ Low = Low and High High = High @@ -665,12 +670,4 @@ lightly. methods for describing don't care conditions. Possibly there are completely other methods which work better. -\section{Correctness proofs of the normalization system} -As stated in \in{section}[sec:normalization:properties], there are a -number of properties we would like to see verified about the -normalization system. In particular, the \emph{termination} and -\emph{completeness} of the system would be a good candidate for future -research. Specifying formal semantics for the Core language in -order to verify the \emph{soundness} of the system would be an even more -challenging task. % vim: set sw=2 sts=2 expandtab: diff --git a/Chapters/HardwareDescription.tex b/Chapters/HardwareDescription.tex index ada3817..9c148a5 100644 --- a/Chapters/HardwareDescription.tex +++ b/Chapters/HardwareDescription.tex @@ -1,8 +1,9 @@ \chapter[chap:description]{Hardware description} - This chapter will provide an overview of the hardware description language - that was created and the issues that have arisen in the process. It will - focus on the issues of the language, not the implementation. The prototype - implementation will be discussed in \in{chapter}[chap:prototype]. + In this chapter an overview will be provided of the hardware + description language that was created and the issues that have arisen + in the process. The focus will be on the issues of the language, not + the implementation. The prototype implementation will be discussed in + \in{chapter}[chap:prototype]. \todo{Shortshort introduction to Cλash (Bit, Word, and, not, etc.)} @@ -15,25 +16,65 @@ hardware whenever possible, to make working with Cλash as intuitive as possible. + \placeintermezzo{}{ + \defref{top level binder} + \defref{top level function} + \startframedtext[width=8cm,background=box,frame=no] + \startalignment[center] + {\tfa Top level binders and functions} + \stopalignment + \blank[medium] + A top level binder is any binder (variable) that is declared in + the \quote{global} scope of a Haskell program (as opposed to a + binder that is bound inside a function. + + In Haskell, there is no sharp distinction between a variable and a + function: A function is just a variable (binder) with a function + type. This means that a top level function is just any top level + binder with a function type. + + As an example, consider the following Haskell snippet: + + \starthaskell + foo :: Int -> Int + foo x = inc x + where + inc = \y -> y + 1 + \stophaskell + + Here, \hs{foo} is a top level binder, whereas \hs{inc} is a + function (since it is bound to a lambda extraction, indicated by + the backslash) but is not a top level binder or function. Since + the type of \hs{foo} is a function type, namely \hs{Int -> Int}, + it is also a top level function. + \stopframedtext + } In this chapter we describe how to interpret a Haskell program from a hardware perspective. We provide a description of each Haskell language element that needs translation, to provide a clear picture of what is supported and how. + \section[sec:description:application]{Function application} \todo{Sidenote: Inputs vs arguments} The basic syntactic elements of a functional program are functions and - function application. These have a single obvious \small{VHDL} translation: Each - function becomes a hardware component, where each argument is an input port - and the result value is the (single) output port. This output port can have - a complex type (such as a tuple), so having just a single output port does - not pose a limitation. + function application. These have a single obvious \small{VHDL} + translation: Each top level function becomes a hardware component, where each + argument is an input port and the result value is the (single) output + port. This output port can have a complex type (such as a tuple), so + having just a single output port does not pose a limitation. Each function application in turn becomes component instantiation. Here, the result of each argument expression is assigned to a signal, which is mapped to the corresponding input port. The output port of the function is also mapped to a signal, which is used as the result of the application. + Since every top level function generates its own component, the + hierarchy of of function calls is reflected in the final \VHDL output + as well, creating a hierarchical \VHDL description of the hardware. + This separation in different components makes the resulting \VHDL + output easier to read and debug. + \in{Example}[ex:And3] shows a simple program using only function application and the corresponding architecture. @@ -44,7 +85,6 @@ and3 :: Bit -> Bit -> Bit -> Bit and3 a b c = and (and a b) c \stopbuffer -\todo{Mirror this image vertically} \startuseMPgraphic{And3} save a, b, c, anda, andb, out; @@ -82,9 +122,6 @@ and3 a b c = and (and a b) c {\boxedgraphic{And3}}{The architecture described by the Haskell description.} \stopcombination - \todo{This section should also mention hierarchy, top level functions and - subcircuits} - \section{Choice} Although describing components and connections allows us to describe a lot of hardware designs already, there is an obvious thing missing: Choice. We @@ -120,7 +157,7 @@ and3 a b c = and (and a b) c parallel, regardless of which output would be chosen eventually). If we would translate a Boolean to a bit value, we could of course remove - the comparator and directly feed 'in' into the multiplex (or even use an + the comparator and directly feed 'in' into the multiplexer (or even use an inverter instead of a multiplexer). However, we will try to make a general translation, which works for all possible \hs{case} expressions. Optimizations such as these are left for the \VHDL synthesizer, which @@ -199,29 +236,30 @@ and3 a b c = and (and a b) c other logic, that check each pattern in turn. \section{Types} - We've seen the two most basic functional concepts translated to hardware: - Function application and choice. Before we look further into less obvious - concepts like higher order expressions and polymorphism, we will have a - look at the types of the values we use in our descriptions. - - When working with values in our descriptions, we'll also need to provide - some way to translate the values used to hardware equivalents. In - particular, this means having to come up with a hardware equivalent for - every \emph{type} used in our program. - - Since most functional languages have a lot of standard types that are - hard to translate (integers without a fixed size, lists without a static - length, etc.), we will start out by defining a number of \quote{builtin} - types ourselves. These types are builtin in the sense that our compiler - will have a fixed VHDL type for these. User defined types, on the other - hand, will have their hardware type derived directly from their Haskell - declaration automatically, according to the rules we sketch here. + Translation of two most basic functional concepts has been + discussed: Function application and choice. Before looking further + into less obvious concepts like higher-order expressions and + polymorphism, the possible types that can be used in hardware + descriptions will be discussed. + + Some way is needed to translate every values used to its hardware + equivalents. In particular, this means a hardware equivalent for + every \emph{type} used in a hardware description is needed + + Since most functional languages have a lot of standard types that + are hard to translate (integers without a fixed size, lists without + a static length, etc.), a number of \quote{built-in} types will be + defined first. These types are built-in in the sense that our + compiler will have a fixed VHDL type for these. User defined types, + on the other hand, will have their hardware type derived directly + from their Haskell declaration automatically, according to the rules + sketched here. \todo{Introduce Haskell type syntax (type constructors, type application, :: operator?)} - \subsection{Builtin types} - The language currently supports the following builtin types. Of these, + \subsection{Built-in types} + The language currently supports the following built-in types. Of these, only the \hs{Bool} type is supported by Haskell out of the box (the others are defined by the Cλash package, so they are user-defined types from Haskell's point of view). @@ -236,7 +274,7 @@ and3 a b c = and (and a b) c \todo{Sidenote bit vs stdlogic} \stopdesc \startdesc{\hs{Bool}} - This is the only builtin Haskell type supported and is translated + This is the only built-in Haskell type supported and is translated exactly like the Bit type (where a value of \hs{True} corresponds to a value of \hs{High}). Supporting the Bool type is particularly useful to support \hs{if ... then ... else ...} expressions, which @@ -309,7 +347,9 @@ and3 a b c = and (and a b) c This type is translated to the \type{unsigned} \small{VHDL} type. \stopdesc - \fxnote{There should be a reference to Christiaan's work here.} + + The integer and vector built-in types are discussed in more detail + in \cite[baaij09]. \subsection{User-defined types} There are three ways to define new types in Haskell: Algebraic @@ -323,7 +363,7 @@ and3 a b c = and (and a b) c completely new type, for which we provide the \VHDL translation below. Type synonyms and renamings only define new names for existing types (where synonyms are completely interchangeable and - renamings need explicit conversion). Therefore, these don't need + renamings need explicit conversion). Therefore, these do not need any particular \VHDL translation, a synonym or renamed type will just use the same representation as the original type. The distinction between a renaming and a synonym does no longer matter @@ -335,11 +375,11 @@ and3 a b c = and (and a b) c A product type is an algebraic datatype with a single constructor with two or more fields, denoted in practice like (a,b), (a,b,c), etc. This is essentially a way to pack a few values together in a record-like - structure. In fact, the builtin tuple types are just algebraic product + structure. In fact, the built-in tuple types are just algebraic product types (and are thus supported in exactly the same way). The \quote{product} in its name refers to the collection of values belonging - to this type. The collection for a product type is the cartesian + to this type. The collection for a product type is the Cartesian product of the collections for the types of its fields. These types are translated to \VHDL record types, with one field for @@ -365,8 +405,8 @@ and3 a b c = and (and a b) c A sum type is an algebraic datatype with multiple constructors, where the constructors have one or more fields. Technically, a type with more than one field per constructor is a sum of products type, but - for our purposes this distinction does not really make a difference, - so we'll leave it out. + for our purposes this distinction does not really make a + difference, so this distinction is note made. The \quote{sum} in its name refers again to the collection of values belonging to this type. The collection for a sum type is the @@ -400,13 +440,13 @@ and3 a b c = and (and a b) c sure that the two \hs{Word}s in the \hs{Sum} type will never be valid at the same time, this is a waste of space. - Obviously, we could do some duplication detection here to reuse a + Obviously, duplication detection could be used to reuse a particular field for another constructor, but this would only - partially solve the problem. If I would, for example, have an array of - 8 bits and an 8 bit unsiged word, these are different types and could - not be shared. However, in the final hardware, both of these types - would simply be 8 bit connections, so we have a 100\% size increase by - not sharing these. + partially solve the problem. If two fields would be, for + example, an array of 8 bits and an 8 bit unsiged word, these are + different types and could not be shared. However, in the final + hardware, both of these types would simply be 8 bit connections, + so we have a 100\% size increase by not sharing these. \stopdesc Another interesting case is that of recursive types. In Haskell, an @@ -423,33 +463,35 @@ and3 a b c = and (and a b) c immediately shows the problem with recursive types: What hardware type to allocate here? - If we would use the naive approach for sum types we described above, we - would create a record where the first field is an enumeration to - distinguish \hs{Empty} from \hs{Cons}. Furthermore, we would add two - more fields: One with the (\VHDL equivalent of) type \hs{t} (assuming we - actually know what type this is at compile time, this should not be a - problem) and a second one with type \hs{List t}. The latter one is of - course a problem: This is exactly the type we were trying to translate - in the first place. + If the naive approach for sum types described above would be used, + a record would be created where the first field is an enumeration + to distinguish \hs{Empty} from \hs{Cons}. Furthermore, two more + fields would be added: One with the (\VHDL equivalent of) type + \hs{t} (assuming this type is actually known at compile time, this + should not be a problem) and a second one with type \hs{List t}. + The latter one is of course a problem: This is exactly the type + that was to be translated in the first place. - Our \VHDL type will thus become infinitely deep. In other words, there - is no way to statically determine how long (deep) the list will be - (it could even be infinite). + The resulting \VHDL type will thus become infinitely deep. In + other words, there is no way to statically determine how long + (deep) the list will be (it could even be infinite). - In general, we can say we can never properly translate recursive types: - All recursive types have a potentially infinite value (even though in + In general, recursive types can never be properly translated: All + recursive types have a potentially infinite value (even though in practice they will have a bounded value, there is no way for the compiler to automatically determine an upper bound on its size). \subsection{Partial application} - Now we've seen how to to translate application, choice and types, we will - get to a more complex concept: Partial applications. A \emph{partial - application} is any application whose (return) type is (again) a function - type. + Now the translation of application, choice and types has been + discussed, a more complex concept can be considered: Partial + applications. A \emph{partial application} is any application whose + (return) type is (again) a function type. - From this, we can see that the translation rules for full application do not - apply to a partial application. \in{Example}[ex:Quadruple] shows an example - use of partial application and the corresponding architecture. + From this, it should be clear that the translation rules for full + application does not apply to a partial application: There are not + enough values for all the input ports in the resulting \VHDL. + \in{Example}[ex:Quadruple] shows an example use of partial application + and the corresponding architecture. \startbuffer[Quadruple] -- | Multiply the input word by four. @@ -496,18 +538,20 @@ quadruple n = mul (mul n) Here, the definition of mul is a partial function application: It applies the function \hs{(*) :: Word -> Word -> Word} to the value \hs{2 :: Word}, resulting in the expression \hs{(*) 2 :: Word -> Word}. Since this resulting - expression is again a function, we can't generate hardware for it directly. - This is because the hardware to generate for \hs{mul} depends completely on - where and how it is used. In this example, it is even used twice. - - However, it is clear that the above hardware description actually describes - valid hardware. In general, we can see that any partial applied function - must eventually become completely applied, at which point we can generate - hardware for it using the rules for function application given in - \in{section}[sec:description:application]. It might mean that a partial - application is passed around quite a bit (even beyond function boundaries), - but eventually, the partial application will become completely applied. - \todo{Provide a step-by-step example of how this works} + expression is again a function, hardware cannot be generated for it + directly. This is because the hardware to generate for \hs{mul} + depends completely on where and how it is used. In this example, it is + even used twice. + + However, it is clear that the above hardware description actually + describes valid hardware. In general, any partial applied function + must eventually become completely applied, at which point hardware for + it can be generated using the rules for function application given in + \in{section}[sec:description:application]. It might mean that a + partial application is passed around quite a bit (even beyond function + boundaries), but eventually, the partial application will become + completely applied. An example of this principe is given in + \in{section}[sec:normalization:defunctionalization]. \section{Costless specialization} Each (complete) function application in our description generates a @@ -572,14 +616,14 @@ quadruple n = mul (mul n) Specialization is useful for our hardware descriptions for functions that contain arguments that cannot be translated to hardware directly - (polymorphic or higher order arguments, for example). If we can create + (polymorphic or higher-order arguments, for example). If we can create specialized functions that remove the argument, or make it translatable, we can use specialization to make the original, untranslatable, function obsolete. \section{Higher order values} What holds for partial application, can be easily generalized to any - higher order expression. This includes partial applications, plain + higher-order expression. This includes partial applications, plain variables (e.g., a binder referring to a top level function), lambda expressions and more complex expressions with a function type (a \hs{case} expression returning lambda's, for example). @@ -590,7 +634,7 @@ quadruple n = mul (mul n) become complete applications, applied lambda expressions disappear by applying β-reduction, etc. - So any higher order value will be \quote{pushed down} towards its + So any higher-order value will be \quote{pushed down} towards its application just like partial applications. Whenever a function boundary needs to be crossed, the called function can be specialized. @@ -600,7 +644,7 @@ quadruple n = mul (mul n) In Haskell, values can be \emph{polymorphic}: They can have multiple types. For example, the function \hs{fst :: (a, b) -> a} is an example of a polymorphic function: It works for tuples with any two element types. Haskell - typeclasses allow a function to work on a specific set of types, but the + type classes allow a function to work on a specific set of types, but the general idea is the same. The opposite of this is a \emph{monomorphic} value, which has a single, fixed, type. @@ -611,19 +655,20 @@ quadruple n = mul (mul n) % the \hs{even :: (Integral a) => a -> Bool} function, which can map any % value of a type that is member of the \hs{Integral} type class - When generating hardware, polymorphism can't be easily translated. How + When generating hardware, polymorphism cannot be easily translated. How many wires will you lay down for a value that could have any type? When type classes are involved, what hardware components will you lay down for a class method (whose behaviour depends on the type of its arguments)? - Note that Cλash currently does not allow user-defined typeclasses, - but does partly support some of the builtin typeclasses (like \hs{Num}). + Note that Cλash currently does not allow user-defined type classes, + but does partly support some of the built-in type classes (like \hs{Num}). Fortunately, we can again use the principle of specialization: Since every function application generates a separate piece of hardware, we can know - the types of all arguments exactly. Provided that we don't use existential - typing, all of the polymorphic types in a function must depend on the - types of the arguments (In other words, the only way to introduce a type - variable is in a lambda abstraction). + the types of all arguments exactly. Provided that existential typing + (which is a \GHC extension) is not used typing, all of the + polymorphic types in a function must depend on the types of the + arguments (In other words, the only way to introduce a type variable + is in a lambda abstraction). If a function is monomorphic, all values inside it are monomorphic as well, so any function that is applied within the function can only be @@ -631,8 +676,9 @@ quadruple n = mul (mul n) specialized to work just for these specific types, removing the polymorphism from the applied functions as well. - \defref{top level function}Our top level function must not have a polymorphic type (otherwise we - wouldn't know the hardware interface to our top level function). + \defref{entry function}The entry function must not have a + polymorphic type (otherwise no hardware interface could be generated + for the entry function). By induction, this means that all functions that are (indirectly) called by our top level function (meaning all functions that are translated in @@ -640,7 +686,7 @@ quadruple n = mul (mul n) \section{State} A very important concept in hardware designs is \emph{state}. In a - stateless (or, \emph{combinatoric}) design, every output is directly and solely dependent on the + stateless (or, \emph{combinational}) design, every output is directly and solely dependent on the inputs. In a stateful design, the outputs can depend on the history of inputs, or the \emph{state}. State is usually stored in \emph{registers}, which retain their value during a clockcycle, and are typically updated at @@ -650,9 +696,8 @@ quadruple n = mul (mul n) \todo{Sidenote? Registers can contain any (complex) type} - To make our hardware description language useful to describe more than - simple combinatoric designs, we'll need to be able to describe state in - some way. + To make Cλash useful to describe more than simple combinational + designs, it needs to be able to describe state in some way. \subsection{Approaches to state} In Haskell, functions are always pure (except when using unsafe @@ -661,22 +706,48 @@ quadruple n = mul (mul n) evaluate a given function with given inputs, it will always provide the same output. - \todo{Define pure} + \placeintermezzo{}{ + \defref{purity} + \startframedtext[width=8cm,background=box,frame=no] + \startalignment[center] + {\tfa Purity} + \stopalignment + \blank[medium] + + A function is said to be pure if it satisfies two conditions: + + \startitemize[KR] + \item When a pure function is called with the same arguments twice, it should + return the same value in both cases. + \item When a pure function is called, it should have not + observable side-effects. + \stopitemize + + Purity is an important property in functional languages, since + it enables all kinds of mathematical reasoning and + optimizattions with pure functions, that are not guaranteed to + be correct for impure functions. - This is a perfect match for a combinatoric circuit, where the output + An example of a pure function is the square root function + \hs{sqrt}. An example of an impure function is the \hs{today} + function that returns the current date (which of course cannot + exist like this in Haskell). + \stopframedtext + } + + This is a perfect match for a combinational circuit, where the output also solely depends on the inputs. However, when state is involved, this - no longer holds. Since we're in charge of our own language (or at least - let's pretend we aren't using Haskell and we are), we could remove this - purity constraint and allow a function to return different values - depending on the cycle in which it is evaluated (or rather, the current - state). However, this means that all kinds of interesting properties of - our functional language get lost, and all kinds of transformations and - optimizations might no longer be meaning preserving. - - Provided that we want to keep the function pure, the current state has - to be present in the function's arguments in some way. There seem to be - two obvious ways to do this: Adding the current state as an argument, or - including the full history of each argument. + no longer holds. Of course this purity constraint cannot just be + removed from Haskell. But even when designing a completely new (hardware + description) language, it does not seem to be a good idea to + remove this purity. This would that all kinds of interesting properties of + the functional language get lost, and all kinds of transformations + and optimizations are no longer be meaning preserving. + + So our functions must remain pure, meaning the current state has + to be present in the function's arguments in some way. There seem + to be two obvious ways to do this: Adding the current state as an + argument, or including the full history of each argument. \subsubsection{Stream arguments and results} Including the entire history of each input (\eg, the value of that @@ -688,7 +759,7 @@ quadruple n = mul (mul n) An obvious downside of this solution is that on each cycle, all the previous cycles must be resimulated to obtain the current state. To do this, it might be needed to have a recursive helper function as well, - wich might be hard to be properly analyzed by the compiler. + which might be hard to be properly analyzed by the compiler. A slight variation on this approach is one taken by some of the other functional \small{HDL}s in the field: \todo{References to Lava, @@ -708,6 +779,13 @@ quadruple n = mul (mul n) well (note that changing a single-element operation to a stream operation can done with \hs{map}, \hs{zipwith}, etc.). + This also means that primitive operations from an existing + language such as Haskell cannot be used directly (including some + syntax constructs, like \hs{case} expressions and \hs{if} + expressions). This mkes this approach well suited for use in + \small{EDSL}s, since those already impose these same + limitations. \refdef{EDSL} + Note that the concept of \emph{state} is no more than having some way to communicate a value from one cycle to the next. By introducing a \hs{delay} function, we can do exactly that: Delay (each value in) a @@ -765,13 +843,9 @@ acc in = out definition of out), but is essentially easy to interpret. There is a single call to delay, resulting in a circuit with a single register, whose input is connected to \hs{out} (which is the output of the - adder), and it's output is the expression \hs{delay out 0} (which is + adder), and its output is the expression \hs{delay out 0} (which is connected to one of the adder inputs). - This notation has a number of downsides, amongst which are limited - readability and ambiguity in the interpretation. \todo{Reference - Christiaan, who has done further investigation} - \subsubsection{Explicit state arguments and results} A more explicit way to model state, is to simply add an extra argument containing the current state value. This allows an output to depend on @@ -780,9 +854,9 @@ acc in = out the current state is now an argument. In Haskell, this would look like - \in{example}[ex:ExplicitAcc]\footnote[notfinalsyntax]{This example is not in the final - Cλash syntax}. \todo{Referencing notfinalsyntax from Introduction.tex doesn't - work} + \in{example}[ex:ExplicitAcc]\footnote[notfinalsyntax]{This + example is not in the final Cλash syntax}. \todo{Referencing + notfinalsyntax from Introduction.tex doesn't work} \startbuffer[ExplicitAcc] -- input -> current state -> (new state, output) @@ -804,14 +878,18 @@ acc in s = (s', out) variables are used by a function can be completely determined from its type signature (as opposed to the stream approach, where a function looks the same from the outside, regardless of what state variables it - uses or whether it's stateful at all). - - This approach is the one chosen for Cλash and will be examined more - closely below. + uses or whether it is stateful at all). + + This approach to state has been one of the initial drives behind + this research. Unlike a stream based approach it is well suited + to completely use existing code and language features (like + \hs{if} and \hs{case} expressions) because it operates on normal + values. Because of these reasons, this is the approach chosen + for Cλash. It will be examined more closely below. \subsection{Explicit state specification} - We've seen the concept of explicit state in a simple example below, but - what are the implications of this approach? + The concept of explicit state has been introduced with some + examples above, but what are the implications of this approach? \subsubsection{Substates} Since a function's state is reflected directly in its type signature, @@ -889,7 +967,7 @@ acc in s = (s', out) \item accept that the generated hardware might not be exactly what we intended, in some specific cases. In most cases, the hardware will be what we intended. - \item explicitely annotate state arguments and results in the input + \item explicitly annotate state arguments and results in the input description. \stopitemize @@ -916,7 +994,7 @@ acc in s = (s', out) arguments and stateful (parts of) results a different type. This has the potential to make this annotation visible inside the function as well, such that when looking at a value inside the function body you can - tell if it's stateful by looking at its type. This could possibly make + tell if it is stateful by looking at its type. This could possibly make the translation process a lot easier, since less analysis of the program flow might be required. \stopitemize @@ -928,7 +1006,7 @@ acc in s = (s', out) \todo{Note about conditions on state variables and checking them} \section[sec:recursion]{Recursion} - An import concept in functional languages is recursion. In it's most basic + An important concept in functional languages is recursion. In its most basic form, recursion is a definition that is described in terms of itself. A recursive function is thus a function that uses itself in its body. This usually requires multiple evaluations of this function, with changing @@ -943,7 +1021,7 @@ acc in s = (s', out) problem for generating hardware. This is expected for functions that describe infinite recursion. In that - case, we can't generate hardware that shows correct behaviour in a single + case, we cannot generate hardware that shows correct behaviour in a single cycle (at best, we could generate hardware that needs an infinite number of cycles to complete). @@ -999,8 +1077,8 @@ acc in s = (s', out) \stopitemize As you can see, using a simple \hs{case} expression at value level causes - the type checker to always typecheck both alternatives, which can't be - done! The typechecker is unable to distinguish the two case + the type checker to always typecheck both alternatives, which cannot be + done. The typechecker is unable to distinguish the two case alternatives (this is partly possible using \small{GADT}s, but that approach faced other problems \todo{ref christiaan?}). @@ -1009,7 +1087,7 @@ acc in s = (s', out) implementations of the sum function, based on the type of the argument, this sounds like the perfect problem to solve with a type class. However, this approach has its own problems (not the least of - them that you need to define a new typeclass for every recursive + them that you need to define a new type class for every recursive function you want to define). \todo{This should reference Christiaan} @@ -1020,17 +1098,19 @@ acc in s = (s', out) counter could be expressed, but only translated to hardware for a fixed number of iterations. Also, this would require extensive support for compile time simplification (constant propagation) and compile time evaluation - (evaluation of constant comparisons), to ensure non-termination. Even then, it - is hard to really guarantee termination, since the user (or GHC desugarer) - might use some obscure notation that results in a corner case of the - simplifier that is not caught and thus non-termination. + (evaluation of constant comparisons), to ensure non-termination. + Supporting general recursion will probably require strict conditions + on the input descriptions. Even then, it will be hard (if not + impossible) to really guarantee termination, since the user (or \GHC + desugarer) might use some obscure notation that results in a corner + case of the simplifier that is not caught and thus non-termination. Evaluating all possible (and non-possible) ways to add recursion to our descriptions, it seems better to limit the scope of this research to exclude recursion. This allows for focusing on other interesting - areas instead. By including (builtin) support for a number of higher - order functions like \hs{map} and \hs{fold}, we can still express most - of the things we would use (list) recursion for. + areas instead. By including (built-in) support for a number of + higher-order functions like \hs{map} and \hs{fold}, we can still + express most of the things we would use (list) recursion for. % vim: set sw=2 sts=2 expandtab: diff --git a/Chapters/Introduction.tex b/Chapters/Introduction.tex index 5b11f5d..2f7172d 100644 --- a/Chapters/Introduction.tex +++ b/Chapters/Introduction.tex @@ -6,6 +6,13 @@ compilers and introduce the hardware description language Cλash that will connect these worlds and puts a step towards making hardware programming on the whole easier, more maintainable and generally more pleasant. +This assignment has been performed in close cooperation with Christiaan +Baaij, whose Master's thesis \cite[baaij09] has been completed at the +same time as this thesis. Where this thesis focuses on the +interpretation of the Haskell language and the compilation process, +\cite[baaij09] has a more thorough study of the field, explores more +advanced types and provides a case study. + % Use \subject to hide this section from the toc \subject{Research goals} This research started out with the notion that a functional program is very @@ -141,7 +148,7 @@ on the whole easier, more maintainable and generally more pleasant. Or... is this the description of a single accumulating adder, that will add one element of each input each clock cycle and has a reset value of - 0\todo{normal 0}? In + \lam{0}? In that case, we would have described the architecture show in \in{example}[ex:RecursiveSumAlt] \startuseMPgraphic{RecursiveSumAlt} @@ -207,7 +214,7 @@ on the whole easier, more maintainable and generally more pleasant. \startitemize \item How to interpret recursion in descriptions? \item How to interpret polymorphism? - \item How to interpret higher order descriptions? + \item How to interpret higher-order descriptions? \stopitemize In addition to looking at designing a hardware description language, we @@ -241,7 +248,7 @@ on the whole easier, more maintainable and generally more pleasant. In the first chapter, we will sketch the context for this research. The current state and history of hardware description languages will be briefly discussed, as well as the state and history of functional -programming. Since we're not the first to have merged these approaches, +programming. Since we are not the first to have merged these approaches, a number of other functional hardware description languages are briefly described. diff --git a/Chapters/Normalization.tex b/Chapters/Normalization.tex index 4f4ee15..cbb634c 100644 --- a/Chapters/Normalization.tex +++ b/Chapters/Normalization.tex @@ -25,7 +25,7 @@ aim to bring the core description into a simpler form, which we can subsequently translate into \small{VHDL} easily. This normal form is needed because the full core language is more expressive than \small{VHDL} in some - areas (higher order expressions, limited polymorphism using type + areas (higher-order expressions, limited polymorphism using type classes, etc.) and because core can describe expressions that do not have a direct hardware interpretation. @@ -44,16 +44,16 @@ to see if this normal form actually has the properties we would like it to have. - But, before getting into more definitions and details about this normal form, - let's try to get a feeling for it first. The easiest way to do this is by - describing the things we want to not have in a normal form. + But, before getting into more definitions and details about this normal + form, let us try to get a feeling for it first. The easiest way to do this + is by describing the things that are unwanted in the intended normal form. \startitemize \item Any \emph{polymorphism} must be removed. When laying down hardware, we - can't generate any signals that can have multiple types. All types must be + cannot generate any signals that can have multiple types. All types must be completely known to generate hardware. - \item All \emph{higher order} constructions must be removed. We can't + \item All \emph{higher-order} constructions must be removed. We cannot generate a hardware signal that contains a function, so all values, arguments and return values used must be first order. @@ -204,7 +204,7 @@ As a more complete example, consider \in{example}[ex:NormalComplete]. This example shows everything that - is allowed in normal form, except for builtin higher order functions + is allowed in normal form, except for built-in higher-order functions (like \lam{map}). The graphical version of the architecture contains a slightly simplified version, since the state tuple packing and unpacking have been left out. Instead, two separate registers are @@ -321,7 +321,7 @@ Now we have some intuition for the normal form, we can describe how we want the normal form to look like in a slightly more formal manner. The following EBNF-like description captures most of the intended structure (and - generates a subset of GHC's core format). + generates a subset of \GHC's core format). There are two things missing: Cast expressions are sometimes allowed by the prototype, but not specified here and the below @@ -365,8 +365,8 @@ \italic{userarg} := var (lvar(var)) \italic{builtinapp} := \italic{builtinfunc} | \italic{builtinapp} \italic{builtinarg} - \italic{builtinfunc} := var (bvar(var)) - \italic{builtinarg} := var (representable(var) ∧ lvar(var)) + \italic{built-infunc} := var (bvar(var)) + \italic{built-inarg} := var (representable(var) ∧ lvar(var)) | \italic{partapp} (partapp :: a -> b) | \italic{coreexpr} (¬representable(coreexpr) ∧ ¬(coreexpr :: a -> b)) \italic{partapp} := \italic{userapp} @@ -384,8 +384,8 @@ Most function applications bound by the let expression define a component instantiation, where the input and output ports are mapped to local signals or arguments. Some of the others use a - builtin construction (\eg the \lam{case} expression) or call a - builtin function (\eg \lam{+} or \lam{map}). For these, a + built-in construction (\eg the \lam{case} expression) or call a + built-in function (\eg \lam{+} or \lam{map}). For these, a hardcoded \small{VHDL} translation is available. \section[sec:normalization:transformation]{Transformation notation} @@ -399,13 +399,13 @@ ~ -------------------------- - + ~ \stoptrans - This format desribes a transformation that applies to \lam{} and transforms it into \lam{}, assuming + This format describes a transformation that applies to \lam{} and transforms it into \lam{}, assuming that all conditions are satisfied. In this format, there are a number of placeholders in pointy brackets, most of which should be rather obvious in their meaning. Nevertheless, we will more precisely specify their meaning below: @@ -474,8 +474,8 @@ To understand this notation better, the step by step application of the η-abstraction transformation to a simple \small{ALU} will be - shown. Consider η-abstraction, described using above notation as - follows: + shown. Consider η-abstraction, which is a common transformation from + labmda calculus, described using above notation as follows: \starttrans E \lam{E :: a -> b} @@ -546,7 +546,7 @@ By now, the placeholder \lam{E} is bound to the entire expression. The placeholder \lam{x}, which occurs in the replacement template, is not bound - yet, so we need to generate a fresh binder for that. Let's use the binder + yet, so we need to generate a fresh binder for that. Let us use the binder \lam{a}. This results in the following replacement expression: \startlambda @@ -576,7 +576,7 @@ \stoplambda The transformation does not apply to this lambda abstraction, so we - look at its body. For brevity, we'll put the case expression on one line from + look at its body. For brevity, we will put the case expression on one line from now on. \startlambda @@ -598,7 +598,7 @@ function position (which makes the second condition false). In the same way the transformation does not apply to both components of this expression (\lam{case opcode of Low -> (+); High -> (-)} and \lam{a}), so - we'll skip to the components of the case expression: The scrutinee and + we will skip to the components of the case expression: The scrutinee and both alternatives. Since the opcode is not a function, it does not apply here. @@ -677,12 +677,12 @@ dictionaries, functions. \defref{representable} - A \emph{builtin function} is a function supplied by the Cλash framework, whose + A \emph{built-in function} is a function supplied by the Cλash framework, whose implementation is not valid Cλash. The implementation is of course valid Haskell, for simulation, but it is not expressable in Cλash. - \defref{builtin function} \defref{user-defined function} + \defref{built-in function} \defref{user-defined function} - For these functions, Cλash has a \emph{builtin hardware translation}, so calls + For these functions, Cλash has a \emph{built-in hardware translation}, so calls to these functions can still be translated. These are functions like \lam{map}, \lam{hwor} and \lam{length}. @@ -750,9 +750,9 @@ binder uniqueness problems in \small{GHC}. In our transformation system, maintaining this non-shadowing invariant is - a bit harder to do (mostly due to implementation issues, the prototype doesn't - use \small{GHC}'s subsitution code). Also, the following points can be - observed. + a bit harder to do (mostly due to implementation issues, the prototype + does not use \small{GHC}'s subsitution code). Also, the following points + can be observed. \startitemize \item Deshadowing does not guarantee overall uniqueness. For example, the @@ -891,6 +891,58 @@ \transexample{beta-type}{β-reduction for type abstractions}{from}{to} + \subsubsection{Unused let binding removal} + This transformation removes let bindings that are never used. + Occasionally, \GHC's desugarer introduces some unused let bindings. + + This normalization pass should really be not be necessary to get + into intended normal form (since the intended normal form + definition \refdef{intended normal form definition} does not + require that every binding is used), but in practice the + desugarer or simplifier emits some bindings that cannot be + normalized (e.g., calls to a + \hs{Control.Exception.Base.patError}) but are not used anywhere + either. To prevent the \VHDL generation from breaking on these + artifacts, this transformation removes them. + + \todo{Do not use old-style numerals in transformations} + \starttrans + letrec + a0 = E0 + \vdots + ai = Ei + \vdots + an = En + in + M \lam{ai} does not occur free in \lam{M} + ---------------------------- \lam{\forall j, 0 ≤ j ≤ n, j ≠ i} (\lam{ai} does not occur free in \lam{Ej}) + letrec + a0 = E0 + \vdots + ai-1 = Ei-1 + ai+1 = Ei+1 + \vdots + an = En + in + M + \stoptrans + + % And an example + \startbuffer[from] + let + x = 1 + in + 2 + \stopbuffer + + \startbuffer[to] + let + in + 2 + \stopbuffer + + \transexample{unusedlet}{Unused let binding removal}{from}{to} + \subsubsection{Empty let removal} This transformation is simple: It removes recursive lets that have no bindings (which usually occurs when unused let binding removal removes the last @@ -905,7 +957,18 @@ M \stoptrans - \todo{Example} + % And an example + \startbuffer[from] + let + in + 2 + \stopbuffer + + \startbuffer[to] + 2 + \stopbuffer + + \transexample{emptylet}{Empty let removal}{from}{to} \subsubsection[sec:normalization:simplelet]{Simple let binding removal} This transformation inlines simple let bindings, that bind some @@ -940,44 +1003,6 @@ \todo{example} - \subsubsection{Unused let binding removal} - This transformation removes let bindings that are never used. - Occasionally, \GHC's desugarer introduces some unused let bindings. - - This normalization pass should really be not be necessary to get - into intended normal form (since the intended normal form - definition \refdef{intended normal form definition} does not - require that every binding is used), but in practice the - desugarer or simplifier emits some bindings that cannot be - normalized (e.g., calls to a - \hs{Control.Exception.Base.patError}) but are not used anywhere - either. To prevent the \VHDL generation from breaking on these - artifacts, this transformation removes them. - - \todo{Don't use old-style numerals in transformations} - \starttrans - letrec - a0 = E0 - \vdots - ai = Ei - \vdots - an = En - in - M \lam{ai} does not occur free in \lam{M} - ---------------------------- \forall j, 0 ≤ j ≤ n, j ≠ i (\lam{ai} does not occur free in \lam{Ej}) - letrec - a0 = E0 - \vdots - ai-1 = Ei-1 - ai+1 = Ei+1 - \vdots - an = En - in - M - \stoptrans - - \todo{Example} - \subsubsection{Cast propagation / simplification} This transform pushes casts down into the expression as far as possible. This transformation has been added to make a few @@ -1015,8 +1040,9 @@ Note that this transformation is completely optional. It is not required to get any function into intended normal form, but it does help making - the resulting VHDL output easier to read (since it removes a bunch of - components that are really boring). + the resulting VHDL output easier to read (since it removes components + that do not add any real structure, but do hide away operations and + cause extra clutter). This transform takes any top level binding generated by \GHC, whose normalized form contains only a single let binding. @@ -1054,9 +1080,9 @@ allowed in \VHDL architecture names\footnote{Technically, it is allowed to use non-alphanumerics when using extended identifiers, but it seems that none of the tooling likes - extended identifiers in filenames, so it effectively doesn't + extended identifiers in filenames, so it effectively does not work.}, so the entity would be called \quote{w7aA7f} or - something similarly unreadable and autogenerated). + something similarly meaningless and autogenerated). \subsection{Program structure} These transformations are aimed at normalizing the overall structure @@ -1161,8 +1187,9 @@ This transformation makes all non-recursive lets recursive. In the end, we want a single recursive let in our normalized program, so all non-recursive lets can be converted. This also makes other - transformations simpler: They can simply assume all lets are - recursive. + transformations simpler: They only need to be specified for recursive + let expressions (and simply will not apply to non-recursive let + expressions until this transformation has been applied). \starttrans let @@ -1337,10 +1364,10 @@ \transexample{argsimpl}{Argument simplification}{from}{to} - \subsection[sec:normalization:builtins]{Builtin functions} - This section deals with (arguments to) builtin functions. In the + \subsection[sec:normalization:built-ins]{Built-in functions} + This section deals with (arguments to) built-in functions. In the intended normal form definition\refdef{intended normal form definition} - we can see that there are three sorts of arguments a builtin function + we can see that there are three sorts of arguments a built-in function can receive. \startitemize[KR] @@ -1348,9 +1375,9 @@ common argument to any function. The argument simplification transformation described in \in{section}[sec:normalization:argsimpl] makes sure that \emph{any} representable argument to \emph{any} - function (including builtin functions) is turned into a local variable + function (including built-in functions) is turned into a local variable reference. - \item (A partial application of) a top level function (either builtin on + \item (A partial application of) a top level function (either built-in on user-defined). The function extraction transformation described in this section takes care of turning every functiontyped argument into (a partial application of) a top level function. @@ -1359,25 +1386,25 @@ transformation needed. Note that this category is exactly all expressions that are not transformed by the transformations for the previous two categories. This means that \emph{any} core expression - that is used as an argument to a builtin function will be either + that is used as an argument to a built-in function will be either transformed into one of the above categories, or end up in this categorie. In any case, the result is in normal form. \stopitemize As noted, the argument simplification will handle any representable - arguments to a builtin function. The following transformation is needed + arguments to a built-in function. The following transformation is needed to handle non-representable arguments with a function type, all other - non-representable arguments don't need any special handling. + non-representable arguments do not need any special handling. \subsubsection[sec:normalization:funextract]{Function extraction} - This transform deals with function-typed arguments to builtin + This transform deals with function-typed arguments to built-in functions. - Since builtin functions cannot be specialized (see + Since built-in functions cannot be specialized (see \in{section}[sec:normalization:specialize]) to remove the arguments, these arguments are extracted into a new global function instead. In other words, we create a new top level function that has exactly the extracted argument as its body. This greatly simplifies the - translation rules needed for builtin functions, since they only need + translation rules needed for built-in functions, since they only need to handle (partial applications of) top level functions. Any free variables occuring in the extracted arguments will become @@ -1385,14 +1412,14 @@ with a reference to the new function, applied to any free variables from the original argument. - This transformation is useful when applying higher order builtin functions + This transformation is useful when applying higher-order built-in functions like \hs{map} to a lambda abstraction, for example. In this case, the code that generates \small{VHDL} for \hs{map} only needs to handle top level functions and partial applications, not any other expression (such as lambda abstractions or even more complicated expressions). \starttrans - M N \lam{M} is (a partial aplication of) a builtin function. + M N \lam{M} is (a partial aplication of) a built-in function. --------------------- \lam{f0 ... fn} are all free local variables of \lam{N} M (x f0 ... fn) \lam{N :: a -> b} ~ \lam{N} is not a (partial application of) a top level function @@ -1472,7 +1499,7 @@ C0 v0,0 ... v0,m -> E0 \vdots Cn vn,0 ... vn,m -> En - --------------------------------------------------- \forall i \forall j, 0 ≤ i ≤ n, 0 ≤ i < m (\lam{wi,j} is a wild (unused) binder) + --------------------------------------------------- \lam{\forall i \forall j, 0 ≤ i ≤ n, 0 ≤ i < m} (\lam{wi,j} is a wild (unused) binder) letrec The case expression is not an extractor case v0,0 = case E of C0 x0,0 .. x0,m -> x0,0 The case expression is not a selector case \vdots @@ -1569,14 +1596,14 @@ values used in our expression representable. There are two main transformations that are applied to \emph{all} unrepresentable let bindings and function arguments. These are meant to address three - different kinds of unrepresentable values: Polymorphic values, higher - order values and literals. The transformation are described generically: - They apply to all non-representable values. However, non-representable - values that don't fall into one of these three categories will be moved - around by these transformations but are unlikely to completely - disappear. They usually mean the program was not valid in the first - place, because unsupported types were used (for example, a program using - strings). + different kinds of unrepresentable values: Polymorphic values, + higher-order values and literals. The transformation are described + generically: They apply to all non-representable values. However, + non-representable values that do not fall into one of these three + categories will be moved around by these transformations but are + unlikely to completely disappear. They usually mean the program was not + valid in the first place, because unsupported types were used (for + example, a program using strings). Each of these three categories will be detailed below, followed by the actual transformations. @@ -1600,13 +1627,13 @@ take care of exactly this. There is one case where polymorphism cannot be completely - removed: Builtin functions are still allowed to be polymorphic + removed: Built-in functions are still allowed to be polymorphic (Since we have no function body that we could properly - specialize). However, the code that generates \VHDL for builtin + specialize). However, the code that generates \VHDL for built-in functions knows how to handle this, so this is not a problem. - \subsubsection{Defunctionalization} - These transformations remove higher order expressions from our + \subsubsection[sec:normalization:defunctionalization]{Defunctionalization} + These transformations remove higher-order expressions from our program, making all values first-order. Higher order values are always introduced by lambda abstractions, none @@ -1614,7 +1641,7 @@ However, other expressions can \emph{have} a function type, when they have a lambda expression in their body. - For example, the following expression is a higher order expression + For example, the following expression is a higher-order expression that is not a lambda expression itself: \refdef{id function} @@ -1625,24 +1652,24 @@ \stoplambda The reference to the \lam{id} function shows that we can introduce a - higher order expression in our program without using a lambda + higher-order expression in our program without using a lambda expression directly. However, inside the definition of the \lam{id} function, we can be sure that a lambda expression is present. Looking closely at the definition of our normal form in \in{section}[sec:normalization:intendednormalform], we can see that - there are three possibilities for higher order values to appear in our + there are three possibilities for higher-order values to appear in our intended normal form: \startitemize[KR] \item[item:toplambda] Lambda abstractions can appear at the highest level of a top level function. These lambda abstractions introduce the arguments (input ports / current state) of the function. - \item[item:builtinarg] (Partial applications of) top level functions can appear as an - argument to a builtin function. + \item[item:built-inarg] (Partial applications of) top level functions can appear as an + argument to a built-in function. \item[item:completeapp] (Partial applications of) top level functions can appear in function position of an application. Since a partial application - cannot appear anywhere else (except as builtin function arguments), + cannot appear anywhere else (except as built-in function arguments), all partial applications are applied, meaning that all applications will become complete applications. However, since application of arguments happens one by one, in the expression: @@ -1653,7 +1680,7 @@ allowed, since it is inside a complete application. \stopitemize - We will take a typical function with some higher order values as an + We will take a typical function with some higher-order values as an example. The following function takes two arguments: a \lam{Bit} and a list of numbers. Depending on the first argument, each number in the list is doubled, or the list is returned unmodified. For the sake of @@ -1667,15 +1694,15 @@ High -> λz. z \stoplambda - This example shows a number of higher order values that we cannot + This example shows a number of higher-order values that we cannot translate to \VHDL directly. The \lam{double} binder bound in the let expression has a function type, as well as both of the alternatives of the case expression. The first alternative is a partial application of - the \lam{map} builtin function, whereas the second alternative is a + the \lam{map} built-in function, whereas the second alternative is a lambda abstraction. - To reduce all higher order values to one of the above items, a number - of transformations we've already seen are used. The η-abstraction + To reduce all higher-order values to one of the above items, a number + of transformations we have already seen are used. The η-abstraction transformation from \in{section}[sec:normalization:eta] ensures all function arguments are introduced by lambda abstraction on the highest level of a function. These lambda arguments are allowed because of @@ -1704,7 +1731,7 @@ High -> (λz. z) q \stoplambda - This propagation makes higher order values become applied (in + This propagation makes higher-order values become applied (in particular both of the alternatives of the case now have a representable type). Completely applied top level functions (like the first alternative) are now no longer invalid (they fall under @@ -1720,11 +1747,11 @@ \stoplambda As you can see in our example, all of this moves applications towards - the higher order values, but misses higher order functions bound by + the higher-order values, but misses higher-order functions bound by let expressions. The applications cannot be moved towards these values (since they can be used in multiple places), so the values will have to be moved towards the applications. This is achieved by inlining all - higher order values bound by let applications, by the + higher-order values bound by let applications, by the non-representable binding inlining transformation below. When applying it to our example, we get the following: @@ -1734,9 +1761,9 @@ High -> q \stoplambda - We've nearly eliminated all unsupported higher order values from this - expressions. The one that's remaining is the first argument to the - \lam{map} function. Having higher order arguments to a builtin + We have nearly eliminated all unsupported higher-order values from this + expressions. The one that is remaining is the first argument to the + \lam{map} function. Having higher-order arguments to a built-in function like \lam{map} is allowed in the intended normal form, but only if the argument is a (partial application) of a top level function. This is easily done by introducing a new top level function @@ -1761,10 +1788,10 @@ intended normal form. There is one case that has not been discussed yet. What if the - \lam{map} function in the example above was not a builtin function + \lam{map} function in the example above was not a built-in function but a user-defined function? Then extracting the lambda expression into a new function would not be enough, since user-defined functions - can never have higher order arguments. For example, the following + can never have higher-order arguments. For example, the following expression shows an example: \startlambda @@ -1776,13 +1803,13 @@ This example shows a function \lam{twice} that takes a function as a first argument and applies that function twice to the second argument. - Again, we've made the function monomorphic for clarity, even though + Again, we have made the function monomorphic for clarity, even though this function would be a lot more useful if it was polymorphic. The function \lam{main} uses \lam{twice} to apply a lambda epression twice. When faced with a user defined function, a body is available for that function. This means we could create a specialized version of the - function that only works for this particular higher order argument + function that only works for this particular higher-order argument (\ie, we can just remove the argument and call the specialized function without the argument). This transformation is detailed below. Applying this transformation to the example gives: @@ -1794,13 +1821,13 @@ main = λa.app' a \stoplambda - The \lam{main} function is now in normal form, since the only higher - order value there is the top level lambda expression. The new - \lam{twice'} function is a bit complex, but the entire original body of - the original \lam{twice} function is wrapped in a lambda abstraction - and applied to the argument we've specialized for (\lam{λx. x + x}) - and the other arguments. This complex expression can fortunately be - effectively reduced by repeatedly applying β-reduction: + The \lam{main} function is now in normal form, since the only + higher-order value there is the top level lambda expression. The new + \lam{twice'} function is a bit complex, but the entire original body + of the original \lam{twice} function is wrapped in a lambda + abstraction and applied to the argument we have specialized for + (\lam{λx. x + x}) and the other arguments. This complex expression can + fortunately be effectively reduced by repeatedly applying β-reduction: \startlambda twice' :: Word -> Word @@ -1824,7 +1851,7 @@ representable type: Integer literals. Cλash supports using integer literals for all three integer types supported (\hs{SizedWord}, \hs{SizedInt} and \hs{RangedWord}). This is implemented using - Haskell's \hs{Num} typeclass, which offers a \hs{fromInteger} method + Haskell's \hs{Num} type class, which offers a \hs{fromInteger} method that converts any \hs{Integer} to the Cλash datatypes. When \GHC sees integer literals, it will automatically insert calls to @@ -1850,7 +1877,7 @@ Both the \lam{GHC.Prim.Int\#} and \lam{Integer} types are not representable, and cannot be translated directly. Fortunately, there - is no need to translate them, since \lam{fromInteger} is a builtin + is no need to translate them, since \lam{fromInteger} is a built-in function that knows how to handle these values. However, this does require that the \lam{fromInteger} function is directly applied to these non-representable literal values, otherwise errors will occur. @@ -1864,7 +1891,7 @@ By inlining these let-bindings, we can ensure that unrepresentable literals bound by a let binding end up in an application of the - appropriate builtin function, where they are allowed. Since it is + appropriate built-in function, where they are allowed. Since it is possible that the application of that function is in a different function than the definition of the literal value, we will always need to specialize away any unrepresentable literals that are used as @@ -1878,7 +1905,7 @@ but to inline the binding to remove it. As we have seen in the previous sections, inlining these bindings - solves (part of) the polymorphism, higher order values and + solves (part of) the polymorphism, higher-order values and unrepresentable literals in an expression. \refdef{substitution notation} @@ -2046,8 +2073,8 @@ letrec x = M in E \stoptrans - This doesn't seem like much of an improvement, but it does get rid of - the lambda expression (and the associated higher order value), while + This does not seem like much of an improvement, but it does get rid of + the lambda expression (and the associated higher-order value), while at the same time introducing a new let binding. Since the result of every application or case expression must be bound by a let expression in the intended normal form anyway, this is probably not a problem. If @@ -2200,7 +2227,7 @@ possible proof strategies are shown below. \subsection{Graph representation} - Before looking into how to prove these properties, we'll look at + Before looking into how to prove these properties, we will look at transformation systems from a graph perspective. We will first define the graph view and then illustrate it using a simple example from lambda calculus (which is a different system than the Cλash normalization @@ -2259,7 +2286,7 @@ Of course the graph for Cλash is unbounded, since we can construct an infinite amount of Core expressions. Also, there might potentially be multiple edges between two given nodes (with different labels), though - seems unlikely to actually happen in our system. + this seems unlikely to actually happen in our system. See \in{example}[ex:TransformGraph] for the graph representation of a very simple lambda calculus that contains just the expressions \lam{(λx.λy. (+) x @@ -2278,8 +2305,9 @@ From such a graph, we can derive some properties easily: \startitemize[KR] - \item A system will \emph{terminate} if there is no path of infinite length - in the graph (this includes cycles, but can also happen without cycles). + \item A system will \emph{terminate} if there is no walk (sequence of + edges, or transformations) of infinite length in the graph (this + includes cycles, but can also happen without cycles). \item Soundness is not easily represented in the graph. \item A system is \emph{complete} if all of the nodes in the normal set have the intended normal form. The inverse (that all of the nodes outside of @@ -2294,8 +2322,8 @@ When looking at the \in{example}[ex:TransformGraph], we see that the system terminates for both the reduction and expansion systems (but note that, for - expansion, this is only true because we've limited the possible - expressions. In comlete lambda calculus, there would be a path from + expansion, this is only true because we have limited the possible + expressions. In complete lambda calculus, there would be a path from \lam{(λx.λy. (+) x y) 1} to \lam{(λx.λy.(λz.(+) z) x y) 1} to \lam{(λx.λy.(λz.(λq.(+) q) z) x y) 1} etc.) diff --git a/Chapters/Prototype.tex b/Chapters/Prototype.tex index 6a2683e..e5cbb9d 100644 --- a/Chapters/Prototype.tex +++ b/Chapters/Prototype.tex @@ -12,10 +12,11 @@ describe here. \section[sec:prototype:input]{Input language} - When implementing this prototype, the first question to ask is: What - (functional) language will we use to describe our hardware? (Note that - this does not concern the \emph{implementation language} of the compiler, - just the language \emph{translated by} the compiler). + When implementing this prototype, the first question to ask is: + Which (functional) language will be used to describe our hardware? + (Note that this does not concern the \emph{implementation language} + of the compiler, just the language \emph{translated by} the + compiler). Initially, we have two choices: @@ -40,7 +41,7 @@ Note that in this consideration, embedded domain-specific languages (\small{EDSL}) and Template Haskell (\small{TH}) - approaches have not been included. As we've seen in + approaches have not been included. As we have seen in \in{section}[sec:context:fhdls], these approaches have all kinds of limitations on the description language that we would like to avoid. @@ -52,7 +53,7 @@ lots of work!), using an existing language is the obvious choice. This also has the advantage that a large set of language features is available to experiment with and it is easy to find which features apply well and - which don't. A possible second prototype could use a custom language with + which do not. A possible second prototype could use a custom language with just the useful features (and possibly extra features that are specific to the domain of hardware description as well). @@ -66,10 +67,11 @@ Haskell an obvious choice. \section[sec:prototype:output]{Output format} - The second important question is: What will be our output format? Since - our prototype won't be able to program FPGA's directly, we'll have to have - output our hardware in some format that can be later processed and - programmed by other tools. + The second important question is: What will be our output format? + This output format should at least allow for programming the + hardware design into a field-programmable gate array (\small{FPGA}). + The choice of output format is thus limited by what hardware + synthesis and programming tools can process. Looking at other tools in the industry, the Electronic Design Interchange Format (\small{EDIF}) is commonly used for storing intermediate @@ -85,7 +87,7 @@ This means that when working with \small{EDIF}, our prototype would become technology dependent (\eg only work with \small{FPGA}s of a specific vendor, or even only with specific chips). This limits the applicability - of our prototype. Also, the tools we'd like to use for verifying, + of our prototype. Also, the tools we would like to use for verifying, simulating and draw pretty pictures of our output (like Precision, or QuestaSim) are designed for \small{VHDL} or Verilog input. @@ -97,31 +99,32 @@ An added advantage of using VHDL is that we can profit from existing optimizations in VHDL synthesizers. A lot of optimizations are done on the - VHDL level by existing tools. These tools have years of experience in this - field, so it would not be reasonable to assume we could achieve a similar - amount of optimization in our prototype (nor should it be a goal, - considering this is just a prototype). + VHDL level by existing tools. These tools have been under + development for years, so it would not be reasonable to assume we + could achieve a similar amount of optimization in our prototype (nor + should it be a goal, considering this is just a prototype). Note that we will be using \small{VHDL} as our output language, but will not use its full expressive power. Our output will be limited to using - simple, structural descriptions, without any behavioural descriptions - (which might not be supported by all tools). This ensures that any tool - that works with \VHDL will understand our output (most tools don't support - synthesis of more complex \VHDL). This also leaves open the option to - switch to \small{EDIF} in the future, with minimal changes to the + simple, structural descriptions, without any complex behavioural + descriptions like arbitrary sequential statements (which might not + be supported by all tools). This ensures that any tool that works + with \VHDL will understand our output (most tools do not support + synthesis of more complex \VHDL). This also leaves open the option + to switch to \small{EDIF} in the future, with minimal changes to the prototype. \section[sec:prototype:design]{Prototype design} As suggested above, we will use the Glasgow Haskell Compiler (\small{GHC}) to implement our prototype compiler. To understand the design of the - compiler, we will first dive into the \small{GHC} compiler a bit. It's + compiler, we will first dive into the \small{GHC} compiler a bit. Its compilation consists of the following steps (slightly simplified): \startuseMPgraphic{ghc-pipeline} % Create objects save inp, front, desugar, simpl, back, out; newEmptyBox.inp(0,0); - newBox.front(btex Fronted etex); + newBox.front(btex Frontend etex); newBox.desugar(btex Desugarer etex); newBox.simpl(btex Simplifier etex); newBox.back(btex Backend etex); @@ -177,8 +180,8 @@ \stopdesc In this process, there are a number of places where we can start our work. - Assuming that we don't want to deal with (or modify) parsing, typechecking - and other frontend business and that native code isn't really a useful + Assuming that we do not want to deal with (or modify) parsing, typechecking + and other frontend business and that native code is not really a useful format anymore, we are left with the choice between the full Haskell \small{AST}, or the smaller (simplified) core representation. @@ -190,9 +193,9 @@ Using the core representation gives us a much more compact datastructure (a core expression only uses 9 constructors). Note that this does not mean - that the core representation itself is smaller, on the contrary. Since the - core language has less constructs, a lot of things will take a larger - expression to express. + that the core representation itself is smaller, on the contrary. + Since the core language has less constructs, most Core expressions + are larger than the equivalent versions in Haskell. However, the fact that the core language is so much smaller, means it is a lot easier to analyze and translate it into something else. For the same @@ -208,7 +211,7 @@ % Create objects save inp, front, norm, vhdl, out; newEmptyBox.inp(0,0); - newBox.front(btex \small{GHC} frontend + desugarer etex); + newBox.front(btex \small{GHC} frontend etex); newBox.norm(btex Normalization etex); newBox.vhdl(btex \small{VHDL} generation etex); newEmptyBox.out(0,0); @@ -235,16 +238,16 @@ \placefigure[right]{Cλash compiler pipeline}{\useMPgraphic{clash-pipeline}} \startdesc{Frontend} - This is exactly the frontend and desugarer from the \small{GHC} - pipeline, that translates Haskell sources to a core representation. + This is exactly the frontend from the \small{GHC} pipeline, that + translates Haskell sources to a typed Core representation. \stopdesc \startdesc{Normalization} This is a step that transforms the core representation into a normal form. This normal form is still expressed in the core language, but has - to adhere to an extra set of constraints. This normal form is less - expressive than the full core language (e.g., it can have limited higher - order expressions, has a specific structure, etc.), but is also very - close to directly describing hardware. + to adhere to an additional set of constraints. This normal form is less + expressive than the full core language (e.g., it can have limited + higher-order expressions, has a specific structure, etc.), but is + also very close to directly describing hardware. \stopdesc \startdesc{\small{VHDL} generation} The last step takes the normal formed core representation and generates @@ -257,8 +260,14 @@ hardware interpretation, are removed and translated into hardware constructs. This step is described in a lot of detail at \in{chapter}[chap:normalization]. + + + \defref{entry function}Translation of a hardware description always + starts at a single function, which is referred to as the \emph{entry + function}. \VHDL is generated for this function first, followed by + any functions used by the entry functions (recursively). - \section{The Core language} + \section[sec:prototype:core]{The Core language} \defreftxt{core}{the Core language} Most of the prototype deals with handling the program in the Core language. In this section we will show what this language looks like and @@ -276,7 +285,7 @@ structure. In the \small{GHC} compiler, the Haskell module structure is used for the resulting Core code as well. Since this is not so relevant for understanding the Core language or the Normalization - process, we'll only look at the Core expression language here. + process, we will only look at the Core expression language here. Each Core expression consists of one of these possible expressions. @@ -285,7 +294,7 @@ \startlambda bndr :: T \stoplambda - This is a reference to a binder. It's written down as the + This is a reference to a binder. It is written down as the name of the binder that is being referred to along with its type. The binder name should of course be bound in a containing scope (including top level scope, so a reference to a top level function @@ -314,7 +323,7 @@ This is a literal. Only primitive types are supported, like chars, strings, ints and doubles. The types of these literals are the \quote{primitive}, unboxed versions, like \lam{Char\#} and \lam{Word\#}, not the - normal Haskell versions (but there are builtin conversion + normal Haskell versions (but there are built-in conversion functions). Without going into detail about these types, note that a few conversion functions exist to convert these to the normal (boxed) Haskell equivalents. @@ -329,7 +338,25 @@ parts: The function part and the argument part. Applications are used for normal function \quote{calls}, but also for applying type abstractions and data constructors. + + In core, there is no distinction between an operator and a + function. This means that, for example the addition of two numbers + looks like the following in Core: + \startlambda + (+) 1 2 + \stoplambda + + Where the function \quote{\lam{(+)}} is applied to the numbers 1 + and 2. However, to increase readability, an application of an + operator like \lam{(+)} is sometimes written infix. In this case, + the parenthesis are also left out, just like in Haskell. In other + words, the following means exactly the same as the addition above: + + \startlambda + 1 + 2 + \stoplambda + The value of an application is the value of the function part, with the first argument binder bound to the argument part. \stopdesc @@ -365,7 +392,7 @@ evaluating to some other value (for which that binder is in scope). This allows for sharing of subexpressions (you can use a binder twice) and explicit \quote{naming} of arbitrary expressions. A binder is not - in scope in the value bound it is bound to, so it's not possible + in scope in the value bound it is bound to, so it is not possible to make recursive definitions with a non-recursive let expression (see the recursive form below). @@ -398,8 +425,9 @@ Core implementation, non-recursive and recursive lets are not so distinct as we present them here, but this provides a clearer overview. - The main difference with the normal let expression is that each of the - binders is in scope in each of the values, in addition to the body. This + The main difference with the normal let expression is that it can + contain multiple bindings (or even none) and each of the binders + is in scope in each of the values, in addition to the body. This allows for self-recursive or mutually recursive definitions. It is also possible to express a recursive let expression using @@ -520,7 +548,7 @@ Here, there is only a single alternative (but no \lam{DEFAULT} alternative, since the single alternative is already exhaustive). When - it's body is evaluated, the arguments to the tuple constructor \lam{(,)} + its body is evaluated, the arguments to the tuple constructor \lam{(,)} (\eg, the elements of the tuple) are bound to \lam{a} and \lam{b}. \stopdesc @@ -555,7 +583,7 @@ Note that this syntax is also used sometimes to indicate that a particular expression has a particular type, even when no cast expression is involved. This is then purely informational, since the only elements that - are explicitely typed in the Core language are the binder references and + are explicitly typed in the Core language are the binder references and cast expressions, the types of all other elements are determined at runtime. \stopdesc @@ -563,7 +591,7 @@ \startdesc{Note} The Core language in \small{GHC} allows adding \emph{notes}, which serve as hints to the inliner or add custom (string) annotations to a core - expression. These shouldn't be generated normally, so these are not + expression. These should not be generated normally, so these are not handled in any way in the prototype. \stopdesc @@ -583,8 +611,8 @@ applications above. This means that a type expression in Core can only ever occur in the argument position of an application, and only if the type of the function that is applied to expects a type - as the first argument. This happens for all polymorphic functions, - for example, the \lam{fst} function: + as the first argument. This happens in applications of all + polymorphic functions. Consider the \lam{fst} function: \startlambda fst :: \forall t1. \forall t2. (t1, t2) ->t1 @@ -610,7 +638,7 @@ We will slightly limit our view on Core's type system, since the more complicated parts of it are only meant to support Haskell's (or rather, \GHC's) type extensions, such as existential types, \small{GADT}s, type - families and other non-standard Haskell stuff which we don't (plan to) + families and other non-standard Haskell stuff which we do not (plan to) support. In Core, every expression is typed. The translation to Core happens @@ -683,7 +711,7 @@ binder in the topmost lambda. When using a value with a forall type, the actual type - used must be applied first. For example haskell expression \hs{id + used must be applied first. For example Haskell expression \hs{id True} (the function \hs{id} appleid to the dataconstructor \hs{True}) translates to the following Core: @@ -878,7 +906,7 @@ \in{example}[ex:AvgState]. This circuit lets the accumulation of the inputs be done by a subcomponent, \hs{acc}, but keeps a count of value accumulated in its own state.\footnote{Currently, the prototype - is not able to compile this example, since the builtin function + is not able to compile this example, since the built-in function for division has not been added.} \startbuffer[AvgState] @@ -917,7 +945,7 @@ \section{Implementing state} Now its clear how to put state annotations in the Haskell source, there is the question of how to implement this state translation. As - we've seen in \in{section}[sec:prototype:design], the translation to + we have seen in \in{section}[sec:prototype:design], the translation to \VHDL happens as a simple, final step in the compilation process. This step works on a core expression in normal form. The specifics of normal form will be explained in @@ -1133,22 +1161,23 @@ called function. So, we can completely ignore substates when generating \VHDL for a function. - From this observation, we might think to remove the substates from a - function's states alltogether, and leave only the state components - which are actual states of the current function. While doing this - would not remove any information needed to generate \small{VHDL} from - the function, it would cause the function definition to become invalid - (since we won't have any substate to pass to the functions anymore). - We could solve the syntactic problems by passing \type{undefined} for - state variables, but that would still break the code on the semantic - level (\ie, the function would no longer be semantically equivalent to - the original input). + From this observation it might seem logical to remove the + substates from a function's states altogether and leave only the + state components which are actual states of the current function. + While doing this would not remove any information needed to + generate \small{VHDL} from the function, it would cause the + function definition to become invalid (since we will not have any + substate to pass to the functions anymore). We could solve the + syntactic problems by passing \type{undefined} for state + variables, but that would still break the code on the semantic + level (\ie, the function would no longer be semantically + equivalent to the original input). To keep the function definition correct until the very end of the process, we will not deal with (sub)states until we get to the \small{VHDL} generation. Then, we are translating from Core to \small{VHDL}, and we can simply ignore substates, effectively removing - the substate components alltogether. + the substate components altogether. But, how will we know what exactly is a substate? Since any state argument or return value that represents state must be of the @@ -1199,7 +1228,7 @@ When applying these rules to the description in \in{example}[ex:AvgStateNormal], we be left with the description - in \in{example}[ex:AvgStateRemoved]. All the parts that don't + in \in{example}[ex:AvgStateRemoved]. All the parts that do not generate any \VHDL directly are crossed out, leaving just the actual flow of values in the final hardware. @@ -1228,7 +1257,7 @@ \lam{s'} is assigned the "next" state. So, at the end of each clock cycle, \lam{s'} should be assigned to \lam{s}. - As you can see, the definition of \lam{s'} is still present, since + In the example the definition of \lam{s'} is still present, since it does not have a state type. The \lam{accums'} substate has been removed, leaving us just with the state of \lam{avg} itself. diff --git a/Outline b/Outline index b9939ea..c0b7ad2 100644 --- a/Outline +++ b/Outline @@ -66,3 +66,4 @@ TODO: Abstract TODO: Preface TODO: Footnote font has not lambda TODO: eta-abstraction -> expansion +TODO: Top level function -> top level binder diff --git a/Report.bib b/Report.bib index 3e64e11..ec6846a 100644 --- a/Report.bib +++ b/Report.bib @@ -92,3 +92,61 @@ publisher = {ACM}, address = {New York, NY, USA}, } + +@phdthesis{sheeran83, + author = {Mary Sheeran}, + title = {µFP, an algebraic VLSI design language}, + year = {1983}, + school = {Programming Research Group, Oxford University}, +} + +@inproceedings{jones90, + address = {Lyngby, Denmark}, + author = {Jones, G. and Sheeran, M.}, + booktitle = {Formal Methods for VLSI Design}, + journal = {Circuit Design in Ruby}, + publisher = {Elsevier Science Publishers}, + title = {Circuit Design in Ruby}, + year = {1990} +} + + +@phdthesis{claessen00, + title = {An Embedded Language Approach to Hardware Description and Verification}, + author = {Koen Claessen}, + year = {2000}, + school = {Dept. of Computer Science and Engineering, Chalmers University of Technology.}, +} + +@inproceedings{sander04, + author = {Sander, I. and Jantsch, A.}, + title = {System Modeling and Design Refinement in ForSyDe}, + journal = {IEEE Transactions on Computer-Aided Design of Integrated Circuits and Systems}, + institution = {}, + year = {2004}, + volume = {23}, + number = {1}, + pages = {17--32}, + issn = {0278-0070}, +} + +@mastersthesis{baaij09, + author = {Christiaan Baaij}, + title = {CλasH: From Haskell to Hardware}, + year = {2009}, + school = {Twente University}, +} + +@article{reynolds98, + author = {Reynolds, John C.}, + title = {Definitional Interpreters for Higher-Order Programming Languages}, + journal = {Higher Order Symbol. Comput.}, + volume = {11}, + number = {4}, + year = {1998}, + issn = {1388-3690}, + pages = {363--397}, + doi = {http://dx.doi.org/10.1023/A:1010027404223}, + publisher = {Kluwer Academic Publishers}, + address = {Hingham, MA, USA}, +} -- 2.30.2