Define \epmh{foo} for easy emphasis.

[matthijs/master-project/report.git] / Chapters / Normalization.tex
diff --git a/Chapters/Normalization.tex b/Chapters/Normalization.tex

index 2acd1c8a14296539507ef8e153c19ff79a9498f5..6c9b43470dee4a4b714b08bdad4a0b5fcd23182f 100644 (file)
--- a/Chapters/Normalization.tex
+++ b/Chapters/Normalization.tex
@@ -93,6 +93,44 @@ An example of a program in canonical form would be:
      res
  \stoplambda
  
      res
  \stoplambda
  
+\startlambda
+\italic{normal} = \italic{lambda}
+\italic{lambda} = λvar.\italic{lambda} (representable(typeof(var)))
+                | \italic{toplet} 
+\italic{toplet} = let \italic{binding} in \italic{toplet} 
+                | letrec [\italic{binding}] in \italic{toplet}
+                | var (representable(typeof(var)), fvar(var))
+\italic{binding} = var = \italic{rhs} (representable(typeof(rhs)))
+                 -- State packing and unpacking by coercion
+                 | var0 = var1 :: State ty (fvar(var1))
+                 | var0 = var1 :: ty (var0 :: State ty) (fvar(var1))
+\italic{rhs} = userapp
+             | builtinapp
+             -- Extractor case
+             | case var of C a0 ... an -> ai (fvar(var))
+             -- Selector case
+             | case var of (fvar(var))
+                DEFAULT -> var0 (fvar(var0))
+                C w0 ... wn -> resvar (\forall{}i, wi \neq resvar, fvar(resvar))
+\italic{userapp} = \italic{userfunc}
+                 | \italic{userapp} {userarg}
+\italic{userfunc} = var (tvar(var))
+\italic{userarg} = var (fvar(var))
+\italic{builtinapp} = \italic{builtinfunc}
+                    | \italic{builtinapp} \italic{builtinarg}
+\italic{builtinfunc} = var (bvar(var))
+\italic{builtinarg} = \italic{coreexpr}
+\stoplambda
+
+-- TODO: Define tvar, fvar, typeof, representable
+-- TODO: Limit builtinarg further
+
+-- TODO: There can still be other casts around (which the code can handle,
+e.g., ignore), which still need to be documented here.
+
+-- TODO: Note about the selector case. It just supports Bit and Bool
+currently, perhaps it should be generalized in the normal form?
+
  When looking at such a program from a hardware perspective, the top level
  lambda's define the input ports. The value produced by the let expression is
  the output port. Most function applications bound by the let expression
  When looking at such a program from a hardware perspective, the top level
  lambda's define the input ports. The value produced by the let expression is
  the output port. Most function applications bound by the let expression
@@ -202,41 +240,44 @@ into expressions as far as possible. In lambda calculus, this reduction
  is known as β-reduction, but it is of course only defined for
  applications of lambda abstractions. We extend this reduction to also
  work for the rest of core (case and let expressions).
  is known as β-reduction, but it is of course only defined for
  applications of lambda abstractions. We extend this reduction to also
  work for the rest of core (case and let expressions).
-\startbuffer[from]
+
+For let expressions:
+\starttrans
+let binds in E) M
+-----------------
+let binds in E M
+\stoptrans
+
+For case statements:
+\starttrans
  (case x of
    p1 -> E1
    \vdots
    pn -> En) M
  (case x of
    p1 -> E1
    \vdots
    pn -> En) M
-\stopbuffer
-\startbuffer[to]
+-----------------
  case x of
    p1 -> E1 M
    \vdots
    pn -> En M
  case x of
    p1 -> E1 M
    \vdots
    pn -> En M
-\stopbuffer
+\stoptrans
  
  
-%\transform{Extended β-reduction}
-%{
-%\conclusion
-%\trans{(λx.E) M}{E[M/x]}
-%
-%\nextrule
-%\conclusion
-%\trans{(let binds in E) M}{let binds in E M}
-%
-%\nextrule
-%\conclusion
-%\transbuf{from}{to}
-%}
+For lambda expressions:
+\starttrans
+(λx.E) M
+-----------------
+E[M/x]
+\stoptrans
  
  
+% And an example
  \startbuffer[from]
  \startbuffer[from]
-let a = (case x of 
-           True -> id
-           False -> neg
-        ) 1
-    b = (let y = 3 in add y) 2
-in
-  (λz.add 1 z) 3
+( let a = (case x of 
+            True -> id
+            False -> neg
+          ) 1
+      b = (let y = 3 in add y) 2
+  in
+    (λz.add 1 z)
+) 3
  \stopbuffer
  
  \startbuffer[to]
  \stopbuffer
  
  \startbuffer[to]
@@ -250,6 +291,339 @@ in
  
  \transexample{Extended β-reduction}{from}{to}
  
  
  \transexample{Extended β-reduction}{from}{to}
  
+\subsection{Let derecursification}
+This transformation is meant to make lets non-recursive whenever possible.
+This might allow other optimizations to do their work better. TODO: Why is
+this needed exactly?
+
+\subsection{Let flattening}
+This transformation puts nested lets in the same scope, by lifting the
+binding(s) of the inner let into a new let around the outer let. Eventually,
+this will cause all let bindings to appear in the same scope (they will all be
+in scope for the function return value).
+
+Note that this transformation does not try to be smart when faced with
+recursive lets, it will just leave the lets recursive (possibly joining a
+recursive and non-recursive let into a single recursive let). The let
+rederursification transformation will do this instead.
+
+\starttrans
+letnonrec x = (let bindings in M) in N
+------------------------------------------
+let bindings in (letnonrec x = M) in N
+\stoptrans
+
+\starttrans
+letrec 
+  \vdots
+  x = (let bindings in M)
+  \vdots
+in
+  N
+------------------------------------------
+letrec
+  \vdots
+  bindings
+  x = M
+  \vdots
+in
+  N
+\stoptrans
+
+\startbuffer[from]
+let
+  a = letrec
+    x = 1
+    y = 2
+  in
+    x + y
+in
+  letrec
+    b = let c = 3 in a + c
+    d = 4
+  in
+    d + b
+\stopbuffer
+\startbuffer[to]
+letrec
+  x = 1
+  y = 2
+in
+  let
+    a = x + y
+  in
+    letrec
+      c = 3
+      b = a + c
+      d = 4
+    in
+      d + b
+\stopbuffer
+
+\transexample{Let flattening}{from}{to}
+
+\subsection{Empty let removal}
+This transformation is simple: It removes recursive lets that have no bindings
+(which usually occurs when let derecursification removes the last binding from
+it).
+
+\starttrans
+letrec in M
+--------------
+M
+\stoptrans
+
+\subsection{Simple let binding removal}
+This transformation inlines simple let bindings (\eg a = b).
+
+This transformation is not needed to get into normal form, but makes the
+resulting VHDL a lot shorter.
+
+\starttrans
+letnonrec
+  a = b
+in
+  M
+-----------------
+M[b/a]
+\stoptrans
+
+\starttrans
+letrec
+  \vdots
+  a = b
+  \vdots
+in
+  M
+-----------------
+let
+  \vdots [b/a]
+  \vdots [b/a]
+in
+  M[b/a]
+\stoptrans
+
+\subsection{Unused let binding removal}
+This transformation removes let bindings that are never used. Usually,
+the desugarer introduces some unused let bindings.
+
+This normalization pass should really be unneeded to get into normal form
+(since ununsed bindings are not forbidden by the normal form), but in practice
+the desugarer or simplifier emits some unused bindings that cannot be
+normalized (e.g., calls to a \type{PatError} (TODO: Check this name)). Also,
+this transformation makes the resulting VHDL a lot shorter.
+
+\starttrans
+let a = E in M
+----------------------------    \lam{a} does not occur free in \lam{M}
+M
+\stoptrans
+
+\starttrans
+letrec
+  \vdots
+  a = E
+  \vdots
+in
+  M
+----------------------------    \lam{a} does not occur free in \lam{M}
+letrec
+  \vdots
+  \vdots
+in
+  M
+\stoptrans
+
+\subsection{Non-representable binding inlining}
+This transform inlines let bindings that have a non-representable type. Since
+we can never generate a signal assignment for these bindings (we cannot
+declare a signal assignment with a non-representable type, for obvious
+reasons), we have no choice but to inline the binding to remove it.
+
+If the binding is non-representable because it is a lambda abstraction, it is
+likely that it will inlined into an application and β-reduction will remove
+the lambda abstraction and turn it into a representable expression at the
+inline site. The same holds for partial applications, which can be turned into
+full applications by inlining.
+
+Other cases of non-representable bindings we see in practice are primitive
+Haskell types. In most cases, these will not result in a valid normalized
+output, but then the input would have been invalid to start with. There is one
+exception to this: When a builtin function is applied to a non-representable
+expression, things might work out in some cases. For example, when you write a
+literal \hs{SizedInt} in Haskell, like \hs{1 :: SizedInt D8}, this results in
+the following core: \lam{fromInteger (smallInteger 10)}, where for example
+\lam{10 :: GHC.Prim.Int\#} and \lam{smallInteger 10 :: Integer} have
+non-representable types. TODO: This/these paragraph(s) should probably become a
+separate discussion somewhere else.
+
+\starttrans
+letnonrec a = E in M
+--------------------------    \lam{E} has a non-representable type.
+M[E/a]
+\stoptrans
+
+\starttrans
+letrec 
+  \vdots
+  a = E
+  \vdots
+in
+  M
+--------------------------    \lam{E} has a non-representable type.
+letrec
+  \vdots [E/a]
+  \vdots [E/a]
+in
+  M[E/a]
+\stoptrans
+
+\startbuffer[from]
+letrec
+  a = smallInteger 10
+  inc = λa -> add a 1
+  inc' = add 1
+  x = fromInteger a 
+in
+  inc (inc' x)
+\stopbuffer
+
+\startbuffer[to]
+letrec
+  x = fromInteger (smallInteger 10)
+in
+  (λa -> add a 1) (add 1 x)
+\stopbuffer
+
+\transexample{Let flattening}{from}{to}
+
+\subsection{Compiler generated top level binding inlining}
+TODO
+
+\subsection{Scrutinee simplification}
+This transform ensures that the scrutinee of a case expression is always
+a simple variable reference.
+
+\starttrans
+case E of
+  alts
+-----------------        \lam{E} is not a local variable reference
+let x = E in 
+  case E of
+    alts
+\stoptrans
+
+\startbuffer[from]
+case (foo a) of
+  True -> a
+  False -> b
+\stopbuffer
+
+\startbuffer[to]
+let x = foo a in
+  case x of
+    True -> a
+    False -> b
+\stopbuffer
+
+\transexample{Let flattening}{from}{to}
+
+
+\subsection{Case simplification}
+This transformation ensures that all case expressions become normal form. This
+means they will become one of:
+\startitemize
+\item An extractor case with a single alternative that picks a single field
+from a datatype, \eg \lam{case x of (a, b) -> a}.
+\item A selector case with multiple alternatives and only wild binders, that
+makes a choice between expressions based on the constructor of another
+expression, \eg \lam{case x of Low -> a; High -> b}.
+\stopitemize
+
+\starttrans
+case E of
+  C0 v0,0 ... v0,m -> E0
+  \vdots
+  Cn vn,0 ... vn,m -> En
+--------------------------------------------------- \forall i \forall j, 0 <= i <= n, 0 <= i < m (\lam{wi,j} is a wild (unused) binder)
+letnonrec
+  v0,0 = case x of C0 v0,0 .. v0,m -> v0,0
+  \vdots
+  v0,m = case x of C0 v0,0 .. v0,m -> v0,m
+  x0 = E0
+  \dots
+  vn,m = case x of Cn vn,0 .. vn,m -> vn,m
+  xn = En
+in
+  case E of
+    C0 w0,0 ... w0,m -> x0
+    \vdots
+    Cn wn,0 ... wn,m -> xn
+\stoptrans
+
+TODO: This transformation specified like this is complicated and misses
+conditions to prevent looping with itself. Perhaps we should split it here for
+discussion?
+
+\startbuffer[from]
+case a of
+  True -> add b 1
+  False -> add b 2
+\stopbuffer
+
+\startbuffer[to]
+letnonrec
+  x0 = add b 1
+  x1 = add b 2
+in
+  case a of
+    True -> x0
+    False -> x1
+\stopbuffer
+
+\transexample{Selector case simplification}{from}{to}
+
+\startbuffer[from]
+case a of
+  (,) b c -> add b c
+\stopbuffer
+\startbuffer[to]
+letnonrec
+  b = case a of (,) b c -> b
+  c = case a of (,) b c -> c
+  x0 = add b c
+in
+  case a of
+    (,) w0 w1 -> x0
+\stopbuffer
+
+\transexample{Extractor case simplification}{from}{to}
+
+\subsection{Case removal}
+This transform removes any case statements with a single alternative and
+only wild binders.
+
+These "useless" case statements are usually leftovers from case simplification
+on extractor case (see the previous example).
+
+\starttrans
+case x of
+  C v0 ... vm -> E
+----------------------     \lam{\forall i, 0 <= i <= m} (\lam{vi} does not occur free in E)
+E
+\stoptrans
+
+\startbuffer[from]
+case a of
+  (,) w0 w1 -> x0
+\stopbuffer
+
+\startbuffer[to]
+x0
+\stopbuffer
+
+\transexample{Case removal}{from}{to}
+
  \subsection{Argument simplification}
  The transforms in this section deal with simplifying application
  arguments into normal form. The goal here is to:
  \subsection{Argument simplification}
  The transforms in this section deal with simplifying application
  arguments into normal form. The goal here is to:
@@ -257,8 +631,9 @@ arguments into normal form. The goal here is to:
  \startitemize
   \item Make all arguments of user-defined functions (\eg, of which
   we have a function body) simple variable references of a runtime
  \startitemize
   \item Make all arguments of user-defined functions (\eg, of which
   we have a function body) simple variable references of a runtime
- representable type.
- \item Make all arguments of builtin functions either:
+ representable type. This is needed, since these applications will be turned
+ into component instantiations.
+ \item Make all arguments of builtin functions one of:
     \startitemize
      \item A type argument.
      \item A dictionary argument.
     \startitemize
      \item A type argument.
      \item A dictionary argument.
@@ -286,8 +661,23 @@ divide them into two categories:
          VHDL. To remove them, we create a specialized version of the
          called function with these arguments filled in. This is done by
          the argument propagation transform.
          VHDL. To remove them, we create a specialized version of the
          called function with these arguments filled in. This is done by
          the argument propagation transform.
+
+        Typically, these arguments are type and dictionary arguments that are
+        used to make functions polymorphic. By propagating these arguments, we
+        are essentially doing the same which GHC does when it specializes
+        functions: Creating multiple variants of the same function, one for
+        each type for which it is used. Other common non-representable
+        arguments are functions, e.g. when calling a higher order function
+        with another function or a lambda abstraction as an argument.
+
+        The reason for doing this is similar to the reasoning provided for
+        the inlining of non-representable let bindings above. In fact, this
+        argument propagation could be viewed as a form of cross-function
+        inlining.
  \stopitemize
  
  \stopitemize
  
+TODO: Check the following itemization.
+
  When looking at the arguments of a builtin function, we can divide them
  into categories: 
  
  When looking at the arguments of a builtin function, we can divide them
  into categories: 
  
@@ -370,9 +760,10 @@ into categories:
          categories instead.
  \stopitemize
  
          categories instead.
  \stopitemize
  
-\subsubsection{Argument extraction}
+\subsubsection{Argument simplification}
  This transform deals with arguments to functions that
  This transform deals with arguments to functions that
-are of a runtime representable type. 
+are of a runtime representable type. It ensures that they will all become
+references to global variables, or local signals in the resulting VHDL. 
  
  TODO: It seems we can map an expression to a port, not only a signal.
  Perhaps this makes this transformation not needed?
  
  TODO: It seems we can map an expression to a port, not only a signal.
  Perhaps this makes this transformation not needed?
@@ -385,16 +776,21 @@ a new let expression around the application, which binds the complex
  expression to a new variable. The original function is then applied to
  this variable.
  
  expression to a new variable. The original function is then applied to
  this variable.
  
-%\transform{Argument extract}
-%{
-%\lam{Y} is of a hardware representable type
-%
-%\lam{Y} is not a variable referene
-%
-%\conclusion
-%
-%\trans{X Y}{let z = Y in X z}
-%}
+\starttrans
+M N
+--------------------    \lam{N} is of a representable type
+let x = N in M x        \lam{N} is not a local variable reference
+\stoptrans
+
+\startbuffer[from]
+add (add a 1) 1
+\stopbuffer
+
+\startbuffer[to]
+let x = add a 1 in add x 1
+\stopbuffer
+
+\transexample{Argument extraction}{from}{to}
  
  \subsubsection{Function extraction}
  This transform deals with function-typed arguments to builtin functions.
  
  \subsubsection{Function extraction}
  This transform deals with function-typed arguments to builtin functions.
@@ -406,24 +802,36 @@ parameters to the new global function. The original argument is replaced
  with a reference to the new function, applied to any free variables from
  the original argument.
  
  with a reference to the new function, applied to any free variables from
  the original argument.
  
-%\transform{Function extraction}
-%{
-%\lam{X} is a (partial application of) a builtin function
-%
-%\lam{Y} is not an application
-%
-%\lam{Y} is not a variable reference
-%
-%\conclusion
-%
-%\lam{f0 ... fm} = free local vars of \lam{Y}
-%
-%\lam{y} is a new global variable
-%
-%\lam{y = λf0 ... fn.Y}
-%
-%\trans{X Y}{X (y f0 ... fn)}
-%}
+This transformation is useful when applying higher order builtin functions
+like \hs{map} to a lambda abstraction, for example. In this case, the code
+that generates VHDL for \hs{map} only needs to handle top level functions and
+partial applications, not any other expression (such as lambda abstractions or
+even more complicated expressions).
+
+\starttrans
+M N                     \lam{M} is a (partial aplication of a) builtin function.
+---------------------   \lam{f0 ... fn} = free local variables of \lam{N}
+M x f0 ... fn           \lam{N :: a -> b}
+~                       \lam{N} is not a (partial application of) a top level function
+x = λf0 ... λfn.N
+\stoptrans
+
+\startbuffer[from]
+map (λa . add a b) xs
+
+map (add b) ys
+\stopbuffer
+
+\startbuffer[to]
+x0 = λb.λa.add a b
+~
+map x0 xs
+
+x1 = λb.add b
+map x1 ys
+\stopbuffer
+
+\transexample{Function extraction}{from}{to}
  
  \subsubsection{Argument propagation}
  This transform deals with arguments to user-defined functions that are
  
  \subsubsection{Argument propagation}
  This transform deals with arguments to user-defined functions that are
@@ -480,69 +888,67 @@ translatable. A user-defined function is any other function.
  \starttrans
  x = E
  ~
  \starttrans
  x = E
  ~
-x Y0 ... Yi ... Yn                               \lam{Y_i} is not of a runtime representable type
----------------------------------------------    \lam{Y_i} is not a local variable reference
-x' = λy0 ... yi-1 f0 ... fm yi+1 ... yn .        \lam{f0 ... fm} = free local vars of \lam{Y_i}
-      E y0 ... yi-1 Yi yi+1 ... yn   
+x Y0 ... Yi ... Yn                               \lam{Yi} is not of a runtime representable type
+---------------------------------------------    \lam{Yi} is not a local variable reference
+x' y0 ... yi-1 f0 ...  fm Yi+1 ... Yn            \lam{f0 ... fm} = free local vars of \lam{Yi}
  ~
  ~
-x' y0 ... yi-1 f0 ...  fm Yi+1 ... Yn
+x' = λy0 ... yi-1 f0 ... fm yi+1 ... yn .       
+      E y0 ... yi-1 Yi yi+1 ... yn   
+
  \stoptrans
  
  \stoptrans
  
-%\transform{Argument propagation}
-%{
-%\lam{x} is a global variable, bound to a user-defined function
-%
-%\lam{x = E}
-%
-%\lam{Y_i} is not of a runtime representable type
-%
-%\lam{Y_i} is not a local variable reference
-%
-%\conclusion
-%
-%\lam{f0 ... fm} = free local vars of \lam{Y_i}
-%
-%\lam{x'} is a new global variable
-%
-%\lam{x' = λy0 ... yi-1 f0 ... fm yi+1 ... yn . E y0 ... yi-1 Yi yi+1 ... yn}
-%
-%\trans{x Y0 ... Yi ... Yn}{x' y0 ... yi-1 f0 ... fm Yi+1 ... Yn}
-%}
-%
-%TODO: The above definition looks too complicated... Can we find
-%something more concise?
-
-\subsection{Cast propagation}
-This transform pushes casts down into the expression as far as possible.
-\subsection{Let recursification}
-This transform makes all lets recursive.
-\subsection{Let simplification}
-This transform makes the result value of all let expressions a simple
-variable reference.
-\subsection{Let flattening}
-This transform turns two nested lets (\lam{let x = (let ... in ...) in
-...}) into a single let.
-\subsection{Simple let binding removal}
-This transforms inlines simple let bindings (\eg a = b).
-\subsection{Function inlining}
-This transform inlines let bindings of a funtion type. TODO: This should
-be generelized to anything that is non representable at runtime, or
-something like that.
-\subsection{Scrutinee simplification}
-This transform ensures that the scrutinee of a case expression is always
-a simple variable reference.
-\subsection{Case binder wildening}
-This transform replaces all binders of a each case alternative with a
-wild binder (\ie, one that is never referred to). This will possibly
-introduce a number of new "selector" case statements, that only select
-one element from an algebraic datatype and bind it to a variable.
-\subsection{Case value simplification}
-This transform simplifies the result value of each case alternative by
-binding the value in a let expression and replacing the value by a
-simple variable reference.
-\subsection{Case removal}
-This transform removes any case statements with a single alternative and
-only wild binders.
+TODO: Example
+
+\subsection{Cast propagation / simplification}
+This transform pushes casts down into the expression as far as possible. Since
+its exact role and need is not clear yet, this transformation is not yet
+specified.
+
+\subsection{Return value simplification}
+This transformation ensures that the return value of a function is always a
+simple local variable reference.
+
+Currently implemented using lambda simplification, let simplification, and
+top simplification. Should change into something like the following, which
+works only on the result of a function instead of any subexpression. This is
+achieved by the contexts, like \lam{x = E}, though this is strictly not
+correct (you could read this as "if there is any function \lam{x} that binds
+\lam{E}, any \lam{E} can be transformed, while we only mean the \lam{E} that
+is bound by \lam{x}. This might need some extra notes or something).
+
+\starttrans
+x = E                            \lam{E} is representable
+~                                \lam{E} is not a lambda abstraction
+E                                \lam{E} is not a let expression
+---------------------------      \lam{E} is not a local variable reference
+let x = E in x
+\stoptrans
+
+\starttrans
+x = λv0 ... λvn.E
+~                                \lam{E} is representable
+E                                \lam{E} is not a let expression
+---------------------------      \lam{E} is not a local variable reference
+let x = E in x
+\stoptrans
+
+\starttrans
+x = λv0 ... λvn.let ... in E
+~                                \lam{E} is representable
+E                                \lam{E} is not a local variable reference
+---------------------------
+let x = E in x
+\stoptrans
+
+\startbuffer[from]
+x = add 1 2
+\stopbuffer
+
+\startbuffer[to]
+x = let x = add 1 2 in x
+\stopbuffer
+
+\transexample{Return value simplification}{from}{to}
  
  \subsection{Example sequence}
  
  
  \subsection{Example sequence}