Fix spelling as suggested by aspell.

[matthijs/master-project/report.git] / Chapters / Prototype.tex
diff --git a/Chapters/Prototype.tex b/Chapters/Prototype.tex

index eb66f002e1cb1731651164fd09e6754057ba9720..317e2dc774589ec5f2c877dc19f334bf4367e296 100644 (file)
--- a/Chapters/Prototype.tex
+++ b/Chapters/Prototype.tex
@@ -26,7 +26,7 @@
        are convenient for describing hardware and can contain special
        constructs that allows our hardware descriptions to be more powerful or
        concise.
-      \item Use an existing language and create a new backend for it. This has
+      \item Use an existing language and create a new back-end for it. This has
        the advantage that existing tools can be reused, which will speed up
        development.
      \stopitemize
@@ -49,7 +49,7 @@
      }
      Considering that we required a prototype which should be working quickly,
      and that implementing parsers, semantic checkers and especially
-    typcheckers is not exactly the core of this research (but it is lots and
+    type-checkers is not exactly the Core of this research (but it is lots and
      lots of work!), using an existing language is the obvious choice. This
      also has the advantage that a large set of language features is available
      to experiment with and it is easy to find which features apply well and
@@ -117,7 +117,7 @@
        \stopalignment
        \blank[medium]
          In this thesis the words \emph{translation}, \emph{compilation} and
-        sometimes \emph{synthesis} will be used interchangedly to refer to the
+        sometimes \emph{synthesis} will be used interchangeably to refer to the
          process of translating the hardware description from the Haskell
          language to the \VHDL\ language.
  
@@ -131,7 +131,7 @@
  
      Note that we will be using \small{VHDL} as our output language, but will
      not use its full expressive power. Our output will be limited to using
-    simple, structural descriptions, without any complex behavioural
+    simple, structural descriptions, without any complex behavioral
      descriptions like arbitrary sequential statements (which might not
      be supported by all tools). This ensures that any tool that works
      with \VHDL\ will understand our output (most tools do not support
@@ -196,7 +196,7 @@
        complete Haskell language and is thus a very complex one (in contrast
        with the Core \small{AST}, later on). All identifiers in this
        \small{AST} are resolved by the renamer and all types are checked by the
-      typechecker.
+      type-checker.
      \stopdesc
      \startdesc{Desugaring}
        This steps takes the full \small{AST} and translates it to the
@@ -207,7 +207,7 @@
      \stopdesc
      \startdesc{Simplification}
        Through a number of simplification steps (such as inlining, common
-      subexpression elimination, etc.) the Core program is simplified to make
+      sub-expression elimination, etc.) the Core program is simplified to make
        it faster or easier to process further.
      \stopdesc
      \startdesc{Backend}
@@ -217,26 +217,26 @@
      \stopdesc
  
      In this process, there are a number of places where we can start our work.
-    Assuming that we do not want to deal with (or modify) parsing, typechecking
-    and other frontend business and that native code is not really a useful
+    Assuming that we do not want to deal with (or modify) parsing, type-checking
+    and other front end business and that native code is not really a useful
      format anymore, we are left with the choice between the full Haskell
-    \small{AST}, or the smaller (simplified) core representation.
+    \small{AST}, or the smaller (simplified) Core representation.
  
      The advantage of taking the full \small{AST} is that the exact structure
      of the source program is preserved. We can see exactly what the hardware
      description looks like and which syntax constructs were used. However,
-    the full \small{AST} is a very complicated datastructure. If we are to
+    the full \small{AST} is a very complicated data-structure. If we are to
      handle everything it offers, we will quickly get a big compiler.
  
-    Using the core representation gives us a much more compact datastructure
-    (a core expression only uses 9 constructors). Note that this does not mean
-    that the core representation itself is smaller, on the contrary.
-    Since the core language has less constructs, most Core expressions
+    Using the Core representation gives us a much more compact data-structure
+    (a Core expression only uses 9 constructors). Note that this does not mean
+    that the Core representation itself is smaller, on the contrary.
+    Since the Core language has less constructs, most Core expressions
      are larger than the equivalent versions in Haskell.
  
-    However, the fact that the core language is so much smaller, means it is a
+    However, the fact that the Core language is so much smaller, means it is a
      lot easier to analyze and translate it into something else. For the same
-    reason, \small{GHC} runs its simplifications and optimizations on the core
+    reason, \small{GHC} runs its simplifications and optimizations on the Core
      representation as well \cite[jones96].
  
      We will use the normal Core representation, not the simplified Core. Even
@@ -255,7 +255,7 @@
        % Create objects
        save inp, front, norm, vhdl, out;
        newEmptyBox.inp(0,0);
-      newBox.front(btex \small{GHC} frontend etex);
+      newBox.front(btex \small{GHC} front-end etex);
        newBox.norm(btex Normalization etex);
        newBox.vhdl(btex \small{VHDL} generation etex);
        newEmptyBox.out(0,0);
@@ -282,19 +282,19 @@
      \placefigure[right]{Cλash compiler pipeline}{\startboxed \useMPgraphic{clash-pipeline}\stopboxed}
  
      \startdesc{Frontend}
-      This is exactly the frontend from the \small{GHC} pipeline, that
+      This is exactly the front-end from the \small{GHC} pipeline, that
        translates Haskell sources to a typed Core representation.
      \stopdesc
      \startdesc{Normalization}
-      This is a step that transforms the core representation into a normal
-      form. This normal form is still expressed in the core language, but has
+      This is a step that transforms the Core representation into a normal
+      form. This normal form is still expressed in the Core language, but has
        to adhere to an additional set of constraints. This normal form is less
-      expressive than the full core language (e.g., it can have limited 
+      expressive than the full Core language (e.g., it can have limited 
        higher-order expressions, has a specific structure, etc.), but is
        also very close to directly describing hardware.
      \stopdesc
      \startdesc{\small{VHDL} generation}
-      The last step takes the normal formed core representation and generates
+      The last step takes the normal formed Core representation and generates
        \small{VHDL} for it. Since the normal form has a specific, hardware-like
        structure, this final step is very straightforward.
      \stopdesc
@@ -312,7 +312,7 @@
      any functions used by the entry functions (recursively).
      
    \section[sec:prototype:core]{The Core language}
-    \defreftxt{core}{the Core language}
+    \defreftxt{Core}{the Core language}
      Most of the prototype deals with handling the program in the Core
      language. In this section we will show what this language looks like and
      how it works.
@@ -343,7 +343,7 @@
        binder name should of course be bound in a containing scope
        (including top level scope, so a reference to a top level function
        is also a variable reference). Additionally, constructors from
-      algebraic datatypes also become variable references.
+      algebraic data-types also become variable references.
  
        In our examples, binders will commonly consist of a single
        characters, but they can have any length.
@@ -365,7 +365,7 @@
        10
        \stoplambda
        This is a literal. Only primitive types are supported, like
-      chars, strings, ints and doubles. The types of these literals are the
+      chars, strings, integers and doubles. The types of these literals are the
        \quote{primitive}, unboxed versions, like \lam{Char\#} and \lam{Word\#}, not the
        normal Haskell versions (but there are built-in conversion
        functions). Without going into detail about these types, note that
@@ -383,7 +383,7 @@
        for normal function \quote{calls}, but also for applying type
        abstractions and data constructors.
  
-      In core, there is no distinction between an operator and a
+      In Core, there is no distinction between an operator and a
        function. This means that, for example the addition of two numbers
        looks like the following in Core:
        
@@ -434,7 +434,7 @@
        \stoplambda
        A let expression allows you to bind a binder to some value, while
        evaluating to some other value (for which that binder is in scope). This
-      allows for sharing of subexpressions (you can use a binder twice) and
+      allows for sharing of sub-expressions (you can use a binder twice) and
        explicit \quote{naming} of arbitrary expressions. A binder is not
        in scope in the value bound it is bound to, so it is not possible
        to make recursive definitions with a non-recursive let expression
@@ -512,7 +512,7 @@
  
        A case expression is the only way in Core to choose between values. All
        \hs{if} expressions and pattern matchings from the original Haskell
-      PRogram have been translated to case expressions by the desugarer. 
+      program have been translated to case expressions by the desugarer. 
        
        A case expression evaluates its scrutinee, which should have an
        algebraic datatype, into weak head normal form (\small{WHNF}) and
@@ -525,7 +525,7 @@
  
        This is best illustrated with an example. Assume
        there is an algebraic datatype declared as follows\footnote{This
-      datatype is not suported by the current Cλash implementation, but
+      datatype is not supported by the current Cλash implementation, but
        serves well to illustrate the case expression}:
  
        \starthaskell
@@ -561,7 +561,7 @@
        
        To support strictness, the scrutinee is always evaluated into
        \small{WHNF}, even when there is only a \lam{DEFAULT} alternative. This
-      allows aplication of the strict function \lam{f} to the argument \lam{a}
+      allows application of the strict function \lam{f} to the argument \lam{a}
        to be written like:
  
        \startlambda
@@ -581,7 +581,7 @@
        in Haskell. Only the constructor of an expression can be matched,
        complex patterns are implemented using multiple nested case expressions.
  
-      Case expressions are also used for unpacking of algebraic datatypes, even
+      Case expressions are also used for unpacking of algebraic data-types, even
        when there is only a single constructor. For examples, to add the elements
        of a tuple, the following Core is generated:
  
@@ -613,8 +613,8 @@
        different types (so a cast is needed) with the same representation (but
        no work is done by the cast).
  
-      More complex are types that are proven to be equal by the typechecker,
-      but look different at first glance. To ensure that, once the typechecker
+      More complex are types that are proven to be equal by the type-checker,
+      but look different at first glance. To ensure that, once the type-checker
        has proven equality, this information sticks around, explicit casts are
        added. In our notation we only write the target type, but in reality a
        cast expressions carries around a \emph{coercion}, which can be seen as a
@@ -626,7 +626,7 @@
  
      \startdesc{Note}
        The Core language in \small{GHC} allows adding \emph{notes}, which serve
-      as hints to the inliner or add custom (string) annotations to a core
+      as hints to the inliner or add custom (string) annotations to a Core
        expression. These should not be generated normally, so these are not
        handled in any way in the prototype.
      \stopdesc
@@ -660,7 +660,7 @@
            
        The type of \lam{fst} has two universally quantified type variables. When
        \lam{fst} is applied in \lam{fstint}, it is first applied to two types.
-      (which are substitued for \lam{t1} and \lam{t2} in the type of \lam{fst}, so
+      (which are substituted for \lam{t1} and \lam{t2} in the type of \lam{fst}, so
        the actual type of arguments and result of \lam{fst} can be found:
        \lam{fst @Int @Int :: (Int, Int) -> Int}).
      \stopdesc
@@ -678,6 +678,7 @@
        support.
  
        \placeintermezzo{}{
+        \defref{id function}
          \startframedtext[width=8cm,background=box,frame=no]
          \startalignment[center]
            {\tfa The \hs{id} function}
@@ -694,11 +695,11 @@
          \stopframedtext
        }
        In Core, every expression is typed. The translation to Core happens
-      after the typechecker, so types in Core are always correct as well
+      after the type-checker, so types in Core are always correct as well
        (though you could of course construct invalidly typed expressions
        through the \GHC\ API).
  
-      Any type in core is one of the following:
+      Any type in Core is one of the following:
  
        \startdesc{A type variable}
          \startlambda
@@ -764,7 +765,7 @@
  
          When using a value with a forall type, the actual type
          used must be applied first. For example Haskell expression \hs{id
-        True} (the function \hs{id} appleid to the dataconstructor \hs{True})
+        True} (the function \hs{id} applied to the data-constructor \hs{True})
          translates to the following Core:
  
          \startlambda
@@ -795,7 +796,7 @@
          A predicate type introduces a constraint on a type variable introduced
          by a forall type (or type lambda). In the example above, the type
          variable \lam{t} can only contain types that are an \emph{instance} of
-        the \emph{type class} \lam{Show}. \refdef{type class}
+        the \emph{type class} \lam{Show}.
  
          There are other sorts of predicate types, used for the type families
          extension, which we will not discuss here.
@@ -840,7 +841,7 @@
        Haskell provides type synonyms as a way to declare a new type that is
        equal to an existing type (or rather, a new name for an existing type).
        This allows both the original type and the synonym to be used
-      interchangedly in a Haskell program. This means no explicit conversion
+      interchangeably in a Haskell program. This means no explicit conversion
        is needed. For example, a simple accumulator would become:
  
        \starthaskell
@@ -867,8 +868,8 @@
      % section headings.
      \subsection{Type renaming (\type{newtype})}
        Haskell also supports type renamings as a way to declare a new type that
-      has the same (runtime) representation as an existing type (but is in
-      fact a different type to the typechecker). With type renaming,
+      has the same (run-time) representation as an existing type (but is in
+      fact a different type to the type-checker). With type renaming,
        explicit conversion between values of the two types is needed. The
        accumulator would then become:
  
@@ -902,14 +903,14 @@
        needed. For example, consider the following state type (this is just the
        state type, not the entire function type):
  
-      \starttyping
+      \starthaskell
        State (State Bit, State (State Word, Bit), Word)
-      \stoptyping
+      \stophaskell
  
        We cannot leave all these \hs{State} type constructors out, since that
        would change the type (unlike when using type synonyms). However, when
-      using type synonyms to hide away substates (see
-      \in{section}[sec:prototype:substatesynonyms] below), this
+      using type synonyms to hide away sub-states (see
+      \in{section}[sec:prototype:sub-statesynonyms] below), this
        disadvantage should be limited.
  
        \subsubsection{Different input and output types}
@@ -918,7 +919,7 @@
          then become something like:
  
          \starthaskell
-        -- These type renaminges would become part of Cλash, it is shown
+        -- These type renamings would become part of Cλash, it is shown
          -- here just for clarity.
          newtype StateIn s = StateIn s
          newtype StateOut s = StateOut s
@@ -931,12 +932,12 @@
          descriptions less error-prone (you can no longer \quote{forget} to
          unpack and repack a state variable and just return it directly, which
          can be a problem in the current prototype). However, it also means we
-        need twice as many type synonyms to hide away substates, making this
+        need twice as many type synonyms to hide away sub-states, making this
          approach a bit cumbersome. It also makes it harder to compare input
          and output state types, possible reducing the type-safety of the
          descriptions.
  
-    \subsection[sec:prototype:substatesynonyms]{Type synonyms for substates}
+    \subsection[sec:prototype:sub-statesynonyms]{Type synonyms for sub-states}
        As noted above, when using nested (hierarchical) states, the state types
        of the \quote{upper} functions (those that call other functions, which
        call other functions, etc.) quickly become complicated. Also, when the
@@ -965,7 +966,7 @@
        \subsubsection{Example}
          As an example of the used approach, a simple averaging circuit
          is shown in \in{example}[ex:AvgState]. This circuit lets the
-        accumulation of the inputs be done by a subcomponent, \hs{acc},
+        accumulation of the inputs be done by a sub-component, \hs{acc},
          but keeps a count of value accumulated in its own
          state.\footnote{Currently, the prototype is not able to compile
          this example, since there is no built-in function for division.}
@@ -1071,7 +1072,7 @@
        equal to \lam{State Word}).
  
        We also use a distinction between \emph{input} and \emph{output
-      (state) variables} and \emph{substate variables}, which will be
+      (state) variables} and \emph{sub-state variables}, which will be
        defined in the rules themselves.
  
        These rules describe everything that can be done with state
@@ -1100,17 +1101,17 @@
          If the result of this unpacking does not have a state type and does
          not contain state variables, there are no limitations on its
          use (this is the function's own state).  Otherwise if it does
-        not have a state type but does contain substates, we refer to it
+        not have a state type but does contain sub-states, we refer to it
          as a \emph{state-containing input variable} and the limitations
          below apply. If it has a state type itself, we refer to it as an
-        \emph{input substate variable} and the below limitations apply
+        \emph{input sub-state variable} and the below limitations apply
          as well.
  
          It may seem strange to consider a variable that still has a state
          type directly after unpacking, but consider the case where a
          function does not have any state of its own, but does call a single
          stateful function. This means it must have a state argument that
-        contains just a substate. The function signature of such a function
+        contains just a sub-state. The function signature of such a function
          could look like:
  
          \starthaskell
@@ -1126,8 +1127,8 @@
          \stoplambda
  
          A state-containing input variable is typically a tuple containing
-        multiple elements (like the current function's state, substates or
-        more tuples containing substates). All of these can be extracted
+        multiple elements (like the current function's state, sub-states or
+        more tuples containing sub-states). All of these can be extracted
          from an input variable using an extractor case (or possibly
          multiple, when the input variable is nested).
  
@@ -1137,25 +1138,25 @@
          type but does contain state variables we refer to it as a
          \emph{state-containing input variable} and this limitation keeps
          applying. If the variable has a state type itself, we refer to
-        it as an \emph{input substate variable} and below limitations
+        it as an \emph{input sub-state variable} and below limitations
          apply.
  
-      \startdesc{Input substate variables can be passed to functions.} 
+      \startdesc{Input sub-state variables can be passed to functions.} 
          \startlambda
            accres = acc i accs
            accs' = case accres of (e, f) -> e
          \stoplambda
          
-        An input substate variable can (only) be passed to a function.
-        Additionally, every input substate variable must be used in exactly
+        An input sub-state variable can (only) be passed to a function.
+        Additionally, every input sub-state variable must be used in exactly
          \emph{one} application, no more and no less.
  
          The function result should contain exactly one state variable, which
          can be extracted using (multiple) case expressions. The extracted
-        state variable is referred to the \emph{output substate}
+        state variable is referred to the \emph{output sub-state}
  
-        The type of this output substate must be identical to the type of
-        the input substate passed to the function.
+        The type of this output sub-state must be identical to the type of
+        the input sub-state passed to the function.
        \stopdesc
  
        \startdesc{Variables can be inserted into a state-containing output variable.}
@@ -1164,7 +1165,7 @@
          \stoplambda
          
          A function's output state is usually a tuple containing its own
-        updated state variables and all output substates. This result is
+        updated state variables and all output sub-states. This result is
          built up using any single-constructor algebraic datatype
          (possibly nested).
  
@@ -1178,7 +1179,7 @@
            spacked' = s' ▶ State (AccState, Word)
          \stoplambda
  
-        As soon as all a functions own update state and output substate
+        As soon as all a functions own update state and output sub-state
          variables have been joined together, the resulting
          state-containing output variable can be packed into an output
          state variable. Packing is done by casting to a state type.
@@ -1199,10 +1200,10 @@
        \stopdesc
  
        There is one final limitation that is hard to express in the above
-      itemization. Whenever substates are extracted from the input state
-      to be passed to functions, the corresponding output substates
+      itemization. Whenever sub-states are extracted from the input state
+      to be passed to functions, the corresponding output sub-states
        should be inserted into the output state in the same way. In other
-      words, each pair of corresponding substates in the input and
+      words, each pair of corresponding sub-states in the input and
        output states should be passed to / returned from the same called
        function.
  
@@ -1224,21 +1225,21 @@
        \in{section}[sec:normalization:stateproblems].
  
        This approach seems simple enough, but will this also work for more
-      complex stateful functions involving substates?  Observe that any
-      component of a function's state that is a substate, \ie\ passed on as
+      complex stateful functions involving sub-states?  Observe that any
+      component of a function's state that is a sub-state, \ie\ passed on as
        the state of another function, should have no influence on the
        hardware generated for the calling function. Any state-specific
        \small{VHDL} for this component can be generated entirely within the
-      called function.  So, we can completely ignore substates when
+      called function.  So, we can completely ignore sub-states when
        generating \VHDL\ for a function.
        
        From this observation it might seem logical to remove the
-      substates from a function's states altogether and leave only the
+      sub-states from a function's states altogether and leave only the
        state components which are actual states of the current function.
        While doing this would not remove any information needed to
        generate \small{VHDL} from the function, it would cause the
        function definition to become invalid (since we will not have any
-      substate to pass to the functions anymore).  We could solve the
+      sub-state to pass to the functions anymore).  We could solve the
        syntactic problems by passing \type{undefined} for state
        variables, but that would still break the code on the semantic
        level (\ie, the function would no longer be semantically
@@ -1247,24 +1248,24 @@
        To keep the function definition correct until the very end of the
        process, we will not deal with (sub)states until we get to the
        \small{VHDL} generation.  Then, we are translating from Core to
-      \small{VHDL}, and we can simply generate no \VHDL for substates,
+      \small{VHDL}, and we can simply generate no \VHDL for sub-states,
        effectively removing them altogether.
  
-      But, how will we know what exactly is a substate? Since any state
+      But, how will we know what exactly is a sub-state? Since any state
        argument or return value that represents state must be of the
        \type{State} type, we can look at the type of a value. However, we
-      must be careful to ignore only \emph{substates}, and not a
+      must be careful to ignore only \emph{sub-states}, and not a
        function's own state.
  
        For \in{example}[ex:AvgStateNormal] above, we should generate a register
        with its output connected to \lam{s} and its input connected
        to \lam{s'}. However, \lam{s'} is build up from both \lam{accs'} and
        \lam{count'}, while only \lam{count'} should end up in the register.
-      \lam{accs'} is a substate for the \lam{acc} function, for which a
+      \lam{accs'} is a sub-state for the \lam{acc} function, for which a
        register will be created when generating \VHDL\ for the \lam{acc}
        function.
  
-      Fortunately, the \lam{accs'} variable (and any other substate) has a
+      Fortunately, the \lam{accs'} variable (and any other sub-state) has a
        property that we can easily check: it has a \lam{State} type. This
        means that whenever \VHDL\ is generated for a tuple (or other
        algebraic type), we can simply leave out all elements that have a
@@ -1289,11 +1290,11 @@
          new state.
          \item Any values of a State type should not be translated to
          \small{VHDL}. In particular, State elements should be removed from
-        tuples (and other datatypes) and arguments with a state type should
+        tuples (and other data-types) and arguments with a state type should
          not generate ports.
          \item To make the state actually work, a simple \small{VHDL}
          (sequential) process should be generated. This process updates
-        the state at every clockcycle, by assigning the new state to the
+        the state at every clock cycle, by assigning the new state to the
          current state. This will be recognized by synthesis tools as a
          register specification.
        \stopitemize
@@ -1326,7 +1327,7 @@
              res
        \stopbuffer
        \placeexample[here][ex:AvgStateRemoved]{Normalized version of \in{example}[ex:AvgState] with ignored parts crossed out}
-          {\typebufferlam{AvgStatRemoved}}
+          {\typebufferlam{AvgStateRemoved}}
                
        When we actually leave out the crossed out parts, we get a slightly
        weird program: there is a variable \lam{s} which has no value, and there
@@ -1424,12 +1425,28 @@
          end architecture structural;
        \stopbuffer 
      
-      \placeexample[][ex:AvgStateTypes]{\VHDL\ types generated for acc and avg from \in{example}[ex:AvgState]}
+      \placeexample[][ex:AvgStateTypes]{\VHDL\ types generated for \hs{acc} and \hs{avg} from \in{example}[ex:AvgState]}
            {\typebuffervhdl{AvgStateTypes}}
-      \placeexample[][ex:AccStateVHDL]{\VHDL\ generated for acc from \in{example}[ex:AvgState]}
+      \placeexample[][ex:AccStateVHDL]{\VHDL\ generated for \hs{acc} from \in{example}[ex:AvgState]}
            {\typebuffervhdl{AccStateVHDL}}
-      \placeexample[][ex:AvgStateVHDL]{\VHDL\ generated for avg from \in{example}[ex:AvgState]}
+      \placeexample[][ex:AvgStateVHDL]{\VHDL\ generated for \hs{avg} from \in{example}[ex:AvgState]}
            {\typebuffervhdl{AvgStateVHDL}}
+  \section{Prototype implementation}
+    The prototype has been implemented using Haskell as its
+    implementation language, just like \GHC. This allows the prototype
+    do directly use parts of \GHC\ through the \small{API} it exposes
+    (which essentially taps directly into the internals of \GHC, making
+    this \small{API} not really a stable interface).
+
+    Cλash can be run from a separate library, but has also been
+    integrated into \type{ghci} \cite[baaij09]. The latter does requires
+    a custom \GHC\ build, however.
+
+    The latest version and all history of the Cλash code can be browsed
+    on-line or retrieved using the \type{git} program.
+
+    http://git.stderr.nl/gitweb?p=matthijs/projects/cλash.git
+
  %    \subsection{Initial state}
  %      How to specify the initial state? Cannot be done inside a hardware
  %      function, since the initial state is its own state argument for the first