Promote the Output format subsection to a section.

[matthijs/master-project/report.git] / Chapters / Normalization.tex
diff --git a/Chapters/Normalization.tex b/Chapters/Normalization.tex

index d93717a1337e968253320a41d5a25cf32971ae7f..1e5b200c0a82922b4f1b0c0350b09d6212372980 100644 (file)
--- a/Chapters/Normalization.tex
+++ b/Chapters/Normalization.tex
@@ -7,9 +7,9 @@
    % space at the start of the frame.
    \define[1]\example{
      \framed[offset=1mm,align=right,strut=no,background=box,frame=off]{
-      \setuptyping[option=LAM,style=sans,before=,after=]
+      \setuptyping[option=LAM,style=sans,before=,after=,strip=auto]
        \typebuffer[#1]
-      \setuptyping[option=none,style=\tttf]
+      \setuptyping[option=none,style=\tttf,strip=auto]
      }
    }
  
@@ -317,9 +317,7 @@
        \italic{normal} = \italic{lambda}
        \italic{lambda} = λvar.\italic{lambda} (representable(var))
                        | \italic{toplet} 
-      \italic{toplet} = let \italic{binding} in \italic{toplet} 
-                      | letrec [\italic{binding}] in \italic{toplet}
-                      | var (representable(varvar))
+      \italic{toplet} = letrec [\italic{binding}...] in var (representable(varvar))
        \italic{binding} = var = \italic{rhs} (representable(rhs))
                         -- State packing and unpacking by coercion
                         | var0 = var1 :: State ty (lvar(var1))
@@ -590,6 +588,8 @@
        In the following sections, we will be using a number of functions and
        notations, which we will define here.
  
+      TODO: Define substitution
+
        \subsubsection{Other concepts}
          A \emph{global variable} is any variable that is bound at the
          top level of a program, or an external module. A \emph{local variable} is any
@@ -744,6 +744,15 @@
      this notation are still a bit fuzzy, so comments are welcom.
  
      \subsection{General cleanup}
+      These transformations are general cleanup transformations, that aim to
+      make expressions simpler. These transformations usually clean up the
+       mess left behind by other transformations or clean up expressions to
+       expose new transformation opportunities for other transformations.
+
+       Most of these transformations are standard optimizations in other
+       compilers as well. However, in our compiler, most of these are not just
+       optimizations, but they are required to get our program into normal
+       form.
  
        \subsubsection{β-reduction}
          β-reduction is a well known transformation from lambda calculus, where it is
@@ -756,8 +765,6 @@
          sure that most lambda abstractions will eventually be reducable by
          β-reduction.
  
-        TODO: Define substitution syntax
-
          \starttrans
          (λx.E) M
          -----------------
@@ -775,68 +782,10 @@
  
          \transexample{β-reduction}{from}{to}
  
-      \subsubsection{Application propagation}
-        This transformation is meant to propagate application expressions downwards
-        into expressions as far as possible. This allows partial applications inside
-        expressions to become fully applied and exposes new transformation
-        possibilities for other transformations (like β-reduction).
-
-        \starttrans
-        let binds in E) M
-        -----------------
-        let binds in E M
-        \stoptrans
-
-        % And an example
-        \startbuffer[from]
-        ( let 
-            val = 1
-          in 
-            add val
-        ) 3
-        \stopbuffer
-
-        \startbuffer[to]
-        let 
-          val = 1
-        in 
-          add val 3
-        \stopbuffer
-
-        \transexample{Application propagation for a let expression}{from}{to}
-
-        \starttrans
-        (case x of
-          p1 -> E1
-          \vdots
-          pn -> En) M
-        -----------------
-        case x of
-          p1 -> E1 M
-          \vdots
-          pn -> En M
-        \stoptrans
-
-        % And an example
-        \startbuffer[from]
-        ( case x of 
-            True -> id
-            False -> neg
-        ) 1
-        \stopbuffer
-
-        \startbuffer[to]
-        case x of 
-          True -> id 1
-          False -> neg 1
-        \stopbuffer
-
-        \transexample{Application propagation for a case expression}{from}{to}
-
        \subsubsection{Empty let removal}
          This transformation is simple: It removes recursive lets that have no bindings
-        (which usually occurs when let derecursification removes the last binding from
-        it).
+        (which usually occurs when unused let binding removal removes the last
+        binding from it).
  
          \starttrans
          letrec in M
@@ -844,82 +793,132 @@
          M
          \stoptrans
  
-        \subsubsection{Simple let binding removal}
+        TODO: Example
+
+      \subsubsection{Simple let binding removal}
          This transformation inlines simple let bindings (\eg a = b).
  
          This transformation is not needed to get into normal form, but makes the
          resulting \small{VHDL} a lot shorter.
  
-        \starttrans
-        letnonrec
-          a = b
-        in
-          M
-        -----------------
-        M[b/a]
-        \stoptrans
-
          \starttrans
          letrec
+          a0 = E0
            \vdots
-          a = b
+          ai = b
            \vdots
+          an = En
          in
            M
-        -----------------
-        let
-          \vdots [b/a]
-          \vdots [b/a]
+        -----------------------------  \lam{b} is a variable reference
+        letrec
+          a0 = E0 [b/ai]
+          \vdots
+          ai-1 = Ei-1 [b/ai]
+          ai+1 = Ei+1 [b/ai]
+          \vdots
+          an = En [b/ai]
          in
-          M[b/a]
+          M[b/ai]
          \stoptrans
  
+        TODO: Example
+
        \subsubsection{Unused let binding removal}
          This transformation removes let bindings that are never used. Usually,
          the desugarer introduces some unused let bindings.
  
          This normalization pass should really be unneeded to get into normal form
-        (since ununsed bindings are not forbidden by the normal form), but in practice
+        (since unused bindings are not forbidden by the normal form), but in practice
          the desugarer or simplifier emits some unused bindings that cannot be
          normalized (e.g., calls to a \type{PatError} (TODO: Check this name)). Also,
          this transformation makes the resulting \small{VHDL} a lot shorter.
  
-        \starttrans
-        let a = E in M
-        ----------------------------    \lam{a} does not occur free in \lam{M}
-        M
-        \stoptrans
-
          \starttrans
          letrec
+          a0 = E0
            \vdots
-          a = E
+          ai = Ei
            \vdots
+          an = En
          in
-          M
-        ----------------------------    \lam{a} does not occur free in \lam{M}
+          M                             \lam{a} does not occur free in \lam{M}
+        ----------------------------    \forall j, 0 <= j <= n, j ≠ i (\lam{a} does not occur free in \lam{Ej})
          letrec
+          a0 = E0
            \vdots
+          ai-1 = Ei-1
+          ai+1 = Ei+1
            \vdots
+          an = En
          in
            M
          \stoptrans
  
+        TODO: Example
+
        \subsubsection{Cast propagation / simplification}
          This transform pushes casts down into the expression as far as possible.
          Since its exact role and need is not clear yet, this transformation is
          not yet specified.
  
-      \subsubsection{Compiler generated top level binding inlining}
-        TODO
+      \subsubsection{Top level binding inlining}
+        This transform takes simple top level bindings generated by the
+        \small{GHC} compiler. \small{GHC} sometimes generates very simple
+        \quote{wrapper} bindings, which are bound to just a variable
+        reference, or a partial application to constants or other variable
+        references.
  
-    \section{Program structure}
+        Note that this transformation is completely optional. It is not
+        required to get any function into normal form, but it does help making
+        the resulting VHDL output easier to read (since it removes a bunch of
+        components that are really boring).
+
+        This transform takes any top level binding generated by the compiler,
+        whose normalized form contains only a single let binding.
+
+        \starttrans
+        x = λa0 ... λan.let y = E in y
+        ~
+        x
+        --------------------------------------         \lam{x} is generated by the compiler
+        λa0 ... λan.let y = E in y
+        \stoptrans
+
+        \startbuffer[from]
+        (+) :: Word -> Word -> Word
+        (+) = GHC.Num.(+) @Word $dNum
+        ~
+        (+) a b
+        \stopbuffer
+        \startbuffer[to]
+        GHC.Num.(+) @ Alu.Word $dNum a b
+        \stopbuffer
+
+        \transexample{Top level binding inlining}{from}{to}
+       
+        Without this transformation, the (+) function would generate an
+        architecture which would just add its inputs. This generates a lot of
+        overhead in the VHDL, which is particularly annoying when browsing the
+        generated RTL schematic (especially since + is not allowed in VHDL
+        architecture names\footnote{Technically, it is allowed to use
+        non-alphanumerics when using extended identifiers, but it seems that
+        none of the tooling likes extended identifiers in filenames, so it
+        effectively doesn't work}, so the entity would be called
+        \quote{w7aA7f} or something similarly unreadable and autogenerated).
+
+    \subsection{Program structure}
+      These transformations are aimed at normalizing the overall structure
+      into the intended form. This means ensuring there is a lambda abstraction
+      at the top for every argument (input port), putting all of the other
+      value definitions in let bindings and making the final return value a
+      simple variable reference.
  
        \subsubsection{η-abstraction}
          This transformation makes sure that all arguments of a function-typed
          expression are named, by introducing lambda expressions. When combined with
-        β-reduction and function inlining below, all function-typed expressions should
-        be lambda abstractions or global identifiers.
+        β-reduction and non-representable binding inlining, all function-typed
+        expressions should be lambda abstractions or global identifiers.
  
          \starttrans
          E                 \lam{E :: a -> b}
@@ -942,32 +941,94 @@
  
          \transexample{η-abstraction}{from}{to}
  
-      \subsubsection{Let derecursification}
-        This transformation is meant to make lets non-recursive whenever possible.
-        This might allow other optimizations to do their work better. TODO: Why is
-        this needed exactly?
+      \subsubsection{Application propagation}
+        This transformation is meant to propagate application expressions downwards
+        into expressions as far as possible. This allows partial applications inside
+        expressions to become fully applied and exposes new transformation
+        opportunities for other transformations (like β-reduction and
+        specialization).
  
-      \subsubsection{Let flattening}
-        This transformation puts nested lets in the same scope, by lifting the
-        binding(s) of the inner let into a new let around the outer let. Eventually,
-        this will cause all let bindings to appear in the same scope (they will all be
-        in scope for the function return value).
+        \starttrans
+        (letrec binds in E) M
+        ------------------------
+        letrec binds in E M
+        \stoptrans
+
+        % And an example
+        \startbuffer[from]
+        ( letrec
+            val = 1
+          in 
+            add val
+        ) 3
+        \stopbuffer
  
-        Note that this transformation does not try to be smart when faced with
-        recursive lets, it will just leave the lets recursive (possibly joining a
-        recursive and non-recursive let into a single recursive let). The let
-        rederecursification transformation will do this instead.
+        \startbuffer[to]
+        letrec
+          val = 1
+        in 
+          add val 3
+        \stopbuffer
+
+        \transexample{Application propagation for a let expression}{from}{to}
  
          \starttrans
-        letnonrec x = (let bindings in M) in N
+        (case x of
+          p1 -> E1
+          \vdots
+          pn -> En) M
+        -----------------
+        case x of
+          p1 -> E1 M
+          \vdots
+          pn -> En M
+        \stoptrans
+
+        % And an example
+        \startbuffer[from]
+        ( case x of 
+            True -> id
+            False -> neg
+        ) 1
+        \stopbuffer
+
+        \startbuffer[to]
+        case x of 
+          True -> id 1
+          False -> neg 1
+        \stopbuffer
+
+        \transexample{Application propagation for a case expression}{from}{to}
+
+      \subsubsection{Let recursification}
+        This transformation makes all non-recursive lets recursive. In the
+        end, we want a single recursive let in our normalized program, so all
+        non-recursive lets can be converted. This also makes other
+        transformations simpler: They can simply assume all lets are
+        recursive.
+
+        \starttrans
+        let
+          a = E
+        in
+          M
          ------------------------------------------
-        let bindings in (letnonrec x = M) in N
+        letrec
+          a = E
+        in
+          M
          \stoptrans
  
+      \subsubsection{Let flattening}
+        This transformation puts nested lets in the same scope, by lifting the
+        binding(s) of the inner let into a new let around the outer let. Eventually,
+        this will cause all let bindings to appear in the same scope (they will all be
+        in scope for the function return value).
+
          \starttrans
          letrec 
            \vdots
-          x = (let bindings in M)
+          x = (letrec bindings in M)
            \vdots
          in
            N
@@ -982,33 +1043,22 @@
          \stoptrans
  
          \startbuffer[from]
-        let
+        letrec
            a = letrec
              x = 1
              y = 2
            in
              x + y
          in
-          letrec
-            b = let c = 3 in a + c
-            d = 4
-          in
-            d + b
+          a
          \stopbuffer
          \startbuffer[to]
          letrec
            x = 1
            y = 2
+          a = x + y
          in
-          let
-            a = x + y
-          in
-            letrec
-              c = 3
-              b = a + c
-              d = 4
-            in
-              d + b
+          a
          \stopbuffer
  
          \transexample{Let flattening}{from}{to}
@@ -1025,12 +1075,20 @@
          \lam{E}, any \lam{E} can be transformed, while we only mean the \lam{E} that
          is bound by \lam{x}. This might need some extra notes or something).
  
+        Note that the return value is not simplified if its not representable.
+        Otherwise, this would cause a direct loop with the inlining of
+        unrepresentable bindings, of course. If the return value is not
+        representable because it has a function type, η-abstraction should
+        make sure that this transformation will eventually apply. If the value
+        is not representable for other reasons, the function result itself is
+        not representable, meaning this function is not representable anyway!
+
          \starttrans
          x = E                            \lam{E} is representable
          ~                                \lam{E} is not a lambda abstraction
          E                                \lam{E} is not a let expression
          ---------------------------      \lam{E} is not a local variable reference
-        let x = E in x
+        letrec x = E in x
          \stoptrans
  
          \starttrans
@@ -1038,7 +1096,7 @@
          ~                                \lam{E} is representable
          E                                \lam{E} is not a let expression
          ---------------------------      \lam{E} is not a local variable reference
-        let x = E in x
+        letrec x = E in x
          \stoptrans
  
          \starttrans
@@ -1046,7 +1104,7 @@
          ~                                \lam{E} is representable
          E                                \lam{E} is not a local variable reference
          ---------------------------
-        let x = E in x
+        letrec x = E in x
          \stoptrans
  
          \startbuffer[from]
@@ -1054,7 +1112,7 @@
          \stopbuffer
  
          \startbuffer[to]
-        x = let x = add 1 2 in x
+        x = letrec x = add 1 2 in x
          \stopbuffer
  
          \transexample{Return value simplification}{from}{to}
@@ -1214,7 +1272,7 @@
          \starttrans
          M N
          --------------------    \lam{N} is of a representable type
-        let x = N in M x        \lam{N} is not a local variable reference
+        letrec x = N in M x     \lam{N} is not a local variable reference
          \stoptrans
  
          \startbuffer[from]
@@ -1222,7 +1280,7 @@
          \stopbuffer
  
          \startbuffer[to]
-        let x = add a 1 in add x 1
+        letrec x = add a 1 in add x 1
          \stopbuffer
  
          \transexample{Argument extraction}{from}{to}
@@ -1246,7 +1304,7 @@
          \starttrans
          M N                     \lam{M} is a (partial aplication of a) builtin function.
          ---------------------   \lam{f0 ... fn} = free local variables of \lam{N}
-        M x f0 ... fn           \lam{N :: a -> b}
+        M (x f0 ... fn)         \lam{N :: a -> b}
          ~                       \lam{N} is not a (partial application of) a top level function
          x = λf0 ... λfn.N
          \stoptrans
@@ -1258,16 +1316,18 @@
          \stopbuffer
  
          \startbuffer[to]
-        x0 = λb.λa.add a b
-        ~
-        map x0 xs
+        map (x0 b) xs
  
-        x1 = λb.add b
          map x1 ys
+        ~
+        x0 = λb.λa.add a b
+        x1 = λb.add b
          \stopbuffer
  
          \transexample{Function extraction}{from}{to}
  
+        Note that \lam{x0} and {x1} will still need normalization after this.
+
        \subsubsection{Argument propagation}
          This transform deals with arguments to user-defined functions that are
          not representable at runtime. This means these arguments cannot be
@@ -1282,7 +1342,7 @@
          inc = λa.f a 1
          \stoplambda
  
-        we could {\em propagate} the constant argument 1, with the following
+        We could {\em propagate} the constant argument 1, with the following
          result:
  
          \startlambda
@@ -1329,7 +1389,7 @@
          case E of
            alts
          -----------------        \lam{E} is not a local variable reference
-        let x = E in 
+        letrec x = E in 
            case E of
              alts
          \stoptrans
@@ -1341,7 +1401,7 @@
          \stopbuffer
  
          \startbuffer[to]
-        let x = foo a in
+        letrec x = foo a in
            case x of
              True -> a
              False -> b
@@ -1367,7 +1427,7 @@
            \vdots
            Cn vn,0 ... vn,m -> En
          --------------------------------------------------- \forall i \forall j, 0 <= i <= n, 0 <= i < m (\lam{wi,j} is a wild (unused) binder)
-        letnonrec
+        letrec
            v0,0 = case x of C0 v0,0 .. v0,m -> v0,0
            \vdots
            v0,m = case x of C0 v0,0 .. v0,m -> v0,m
@@ -1409,7 +1469,7 @@
            (,) b c -> add b c
          \stopbuffer
          \startbuffer[to]
-        letnonrec
+        letrec
            b = case a of (,) b c -> b
            c = case a of (,) b c -> c
            x0 = add b c
@@ -1445,12 +1505,18 @@
  
          \transexample{Case removal}{from}{to}
  
-  \subsection{Monomorphisation}
-    TODO: Better name for this section
-
+  \subsection{Removing polymorphism}
      Reference type-specialization (== argument propagation)
  
-  \subsubsection{Defunctionalization}
+    Reference polymporphic binding inlining (== non-representable binding
+    inlining).
+
+  \subsection{Defunctionalization}
+    These transformations remove most higher order expressions from our
+    program, making it completely first-order (the only exception here is for
+    arguments to builtin functions, since we can't specialize builtin
+    function. TODO: Talk more about this somewhere).
+
      Reference higher-order-specialization (== argument propagation)
  
        \subsubsection{Non-representable binding inlining}
@@ -1476,31 +1542,32 @@
          non-representable types. TODO: This/these paragraph(s) should probably become a
          separate discussion somewhere else.
  
-        \starttrans
-        letnonrec a = E in M
-        --------------------------    \lam{E} has a non-representable type.
-        M[E/a]
-        \stoptrans
  
          \starttrans
          letrec 
+          a0 = E0
            \vdots
-          a = E
+          ai = Ei
            \vdots
+          an = En
          in
            M
-        --------------------------    \lam{E} has a non-representable type.
+        --------------------------    \lam{Ei} has a non-representable type.
          letrec
-          \vdots [E/a]
-          \vdots [E/a]
+          a0 = E0 [Ei/ai]
+          \vdots
+          ai-1 = Ei-1 [Ei/ai]
+          ai+1 = Ei+1 [Ei/ai]
+          \vdots
+          an = En [Ei/ai]
          in
-          M[E/a]
+          M[Ei/ai]
          \stoptrans
  
          \startbuffer[from]
          letrec
            a = smallInteger 10
-          inc = λa -> add a 1
+          inc = λb -> add b 1
            inc' = add 1
            x = fromInteger a 
          in
@@ -1511,10 +1578,10 @@
          letrec
            x = fromInteger (smallInteger 10)
          in
-          (λa -> add a 1) (add 1 x)
+          (λb -> add b 1) (add 1 x)
          \stopbuffer
  
-        \transexample{Let flattening}{from}{to}
+        \transexample{None representable binding inlining}{from}{to}
  
  
    \section{Provable properties}