\mainlanguage [en]
\setuppapersize[A4][A4]

% Define a custom typescript. We could also have put the \definetypeface's
% directly in the script, without a typescript, but I guess this is more
% elegant...
\starttypescript[Custom]
% This is a sans font that supports greek symbols
\definetypeface [Custom] [ss] [sans]  [iwona]                [default]
\definetypeface [Custom] [rm] [serif] [antykwa-torunska]     [default]
\definetypeface [Custom] [tt] [mono]  [modern]               [default]
\definetypeface [Custom] [mm] [math]  [modern]               [default]
\stoptypescript
\usetypescript [Custom]

% Use our custom typeface
\switchtotypeface [Custom] [10pt]

% The function application operator, which expands to a space in math mode
\define[1]\expr{|#1|}
\define[2]\app{#1\;#2}
\define[2]\lam{λ#1 \xrightarrow #2}
\define[2]\letexpr{{\bold let}\;#1\;{\bold in}\;#2}
\define[2]\case{{\bold case}\;#1\;{\bold of}\;#2}
\define[2]\alt{#1 \xrightarrow #2}
\define[2]\bind{#1:#2}
\define[1]\where{{\bold where}\;#1}
% A transformation
\definefloat[transformation][transformations]
\define[2]\transform{
  \startframedtext[width=\textwidth]
  #2
  \stopframedtext
}

\define\conclusion{\blackrule[height=0.5pt,depth=0pt,width=.5\textwidth]}
\define\nextrule{\vskip1cm}

\define[2]\transformold{
  %\placetransformation[here]{#1}
  \startframedtext[width=\textwidth]
  \startformula \startalign
  #2
  \stopalign \stopformula
  \stopframedtext
}

% A shortcut for italicized e.g. and i.e.
\define[0]\eg{{\em e.g.}}
\define[0]\ie{{\em i.e.}}

\definedescription 
  [desc]
  [location=hanging,hang=20,width=broad]
   %command=\hskip-1cm,margin=1cm]

% Install the lambda calculus pretty-printer, as defined in pret-lam.lua.
\installprettytype [LAM] [LAM]

\definetyping[lambda][option=LAM,style=sans]
\definetype[lam][option=LAM,style=sans]

% An (invisible) frame to hold a lambda expression
\define[1]\lamframe{
        % Put a frame around lambda expressions, so they can have multiple
        % lines and still appear inline.
        % The align=right option really does left-alignment, but without the
        % program will end up on a single line. The strut=no option prevents a
        % bunch of empty space at the start of the frame.
        \framed[offset=0mm,location=middle,strut=no,align=right,frame=off]{#1}
}

\define[2]\transbuf{
        % Make \typebuffer uses the LAM pretty printer and a sans-serif font
        % Also prevent any extra spacing above and below caused by the default
        % before=\blank and after=\blank.
        \setuptyping[option=LAM,style=sans,before=,after=]
        % Prevent the arrow from ending up below the first frame (a \framed
        % at the start of a line defaults to using vmode).
        \dontleavehmode
        % Put the elements in frames, so they can have multiple lines and be
        % middle-aligned
        \lamframe{\typebuffer[#1]}
        \lamframe{\Rightarrow}
        \lamframe{\typebuffer[#2]}
        % Reset the typing settings to their defaults
        \setuptyping[option=none,style=\tttf]
}
% This is the same as \transbuf above, but it accepts text directly instead
% of through buffers. This only works for single lines, however.
\define[2]\trans{
        \dontleavehmode
        \lamframe{\lam{#1}}
        \lamframe{\Rightarrow}
        \lamframe{\lam{#2}}
}


% A helper to print a single example in the half the page width. The example
% text should be in a buffer whose name is given in an argument.
%
% The align=right option really does left-alignment, but without the program
% will end up on a single line. The strut=no option prevents a bunch of empty
% space at the start of the frame.
\define[1]\example{
  \framed[offset=1mm,align=right,strut=no]{
    \setuptyping[option=LAM,style=sans,before=,after=]
    \typebuffer[#1]
    \setuptyping[option=none,style=\tttf]
  }
}


% A transformation example
\definefloat[example][examples]
\setupcaption[example][location=top] % Put captions on top

\define[3]\transexample{
  \placeexample[here]{#1}
  \startcombination[2*1]
    {\example{#2}}{Original program}
    {\example{#3}}{Transformed program}
  \stopcombination
}

\define[3]\transexampleh{
%  \placeexample[here]{#1}
%  \startcombination[1*2]
%    {\example{#2}}{Original program}
%    {\example{#3}}{Transformed program}
%  \stopcombination
}

% Define a custom description format for the builtinexprs below
\definedescription[builtindesc][headstyle=bold,style=normal,location=top]

\starttext
\title {Core transformations for hardware generation}
Matthijs Kooijman

\section{Introduction}
As a new approach to translating Core to VHDL, we investigate a number of
transforms on our Core program, which should bring the program into a
well-defined {\em normal} form, which is subsequently trivial to
translate to VHDL.

The transforms as presented here are far from complete, but are meant as
an exploration of possible transformations.

\section{Goal}
The transformations described here have a well-defined goal: To bring the
program in a well-defined form that is directly translatable to hardware,
while fully preserving the semantics of the program.

This {\em normal form} is again a Core program, but with a very specific
structure. A function in normal form has nested lambda's at the top, which
produce a let expression. This let expression binds every function application
in the function and produces a simple identifier. Every bound value in
the let expression is either a simple function application or a case
expression to extract a single element from a tuple returned by a
function.

An example of a program in canonical form would be:

\startlambda
  -- All arguments are an inital lambda
  λx.λc.λd.
  -- There is one let expression at the top level
  let
    -- Calling some other user-defined function.
    s = foo x
    -- Extracting result values from a tuple
    a = case s of (a, b) -> a
    b = case s of (a, b) -> b
    -- Some builtin expressions
    rh = add c d
    rhh = sub d c
    -- Conditional connections
    rl = case b of
      High -> rhh
      Low -> d
    r = case a of
      High -> rh
      Low -> rl
  in
    -- The actual result
    r
\stoplambda

When looking at such a program from a hardware perspective, the top level
lambda's define the input ports. The value produced by the let expression is
the output port. Most function applications bound by the let expression
define a component instantiation, where the input and output ports are mapped
to local signals or arguments. Some of the others use a builtin
construction (\eg the \lam{case} statement) or call a builtin function
(\eg \lam{add} or \lam{sub}). For these, a hardcoded VHDL translation is
available.

\subsection{Normal definition}
Formally, the normal form is a core program obeying the following
constraints. TODO: Update this section, this is probably not completely
accurate or relevant anymore.

\startitemize[R,inmargin]
\item All top level binds must have the form $\expr{\bind{fun}{lamexpr}}$.
$fun$ is an identifier that will be bound as a global identifier.
\item A $lamexpr$ has the form $\expr{\lam{arg}{lamexpr}}$ or
$\expr{letexpr}$. $arg$ is an identifier which will be bound as an $argument$.
\item[letexpr] A $letexpr$ has the form $\expr{\letexpr{letbinds}{retexpr}}$.
\item $letbinds$ is a list with elements of the form
$\expr{\bind{res}{appexpr}}$ or $\expr{\bind{res}{builtinexpr}}$, where $res$ is
an identifier that will be bound as local identifier. The type of the bound
value must be a $hardware\;type$.
\item[builtinexpr] A $builtinexpr$ is an expression that can be mapped to an
equivalent VHDL expression. Since there are many supported forms for this,
these are defined in a separate table.
\item An $appexpr$ has the form $\expr{fun}$ or $\expr{\app{appexpr}{x}}$,
where $fun$ is a global identifier and $x$ is a local identifier.
\item[retexpr] A $retexpr$ has the form $\expr{x}$ or $\expr{tupexpr}$, where $x$ is a local identifier that is bound as an $argument$ or $result$.  A $retexpr$ must
be of a $hardware\;type$.
\item A $tupexpr$ has the form $\expr{con}$ or $\expr{\app{tupexpr}{x}}$,
where $con$ is a tuple constructor ({\em e.g.} $(,)$ or $(,,,)$) and $x$ is
a local identifier.
\item A $hardware\;type$ is a type that can be directly translated to
hardware. This includes the types $Bit$, $SizedWord$, tuples containing
elements of $hardware\;type$s, and will include others. This explicitely
excludes function types.
\stopitemize

TODO: Say something about uniqueness of identifiers

\subsection{Builtin expressions}
A $builtinexpr$, as defined at \in[builtinexpr] can have any of the following forms.

\startitemize[m,inmargin]
\item
$tuple\_extract=\expr{\case{t}{\alt{\app{con}{x_0\;x_1\;..\;x_n}}{x_i}}}$,
where $t$ can be any local identifier, $con$ is a tuple constructor ({\em
e.g.} $(,)$ or $(,,,)$), $x_0$ to $x_n$ can be any identifier, and $x_i$ can
be any of $x_0$ to $x_n$. A case expression must have a $hardware\;type$.
\item TODO: Many more!
\stopitemize

\section{Transform passes}

In this section we describe the actual transforms. Here we're using
the core language in a notation that resembles lambda calculus.

Each of these transforms is meant to be applied to every (sub)expression
in a program, for as long as it applies. Only when none of the
expressions can be applied anymore, the program is in normal form. We
hope to be able to prove that this form will obey all of the constraints
defined above, but this has yet to happen (though it seems likely that
it will).

Each of the transforms will be described informally first, explaining
the need for and goal of the transform. Then, a formal definition is
given, using a familiar syntax from the world of logic. Each transform
is specified as a number of conditions (above the horizontal line) and a
number of conclusions (below the horizontal line). The details of using
this notation are still a bit fuzzy, so comments are welcom.

TODO: Formally describe the "apply to every (sub)expression" in terms of
rules with full transformations in the conditions.

\subsection{η-abstraction}
This transformation makes sure that all arguments of a function-typed
expression are named, by introducing lambda expressions. When combined with
β-reduction and function inlining below, all function-typed expressions should
be lambda abstractions or global identifiers.

\transform{η-abstraction}
{
\lam{E :: * -> *}

\lam{E} is not the first argument of an application.

\lam{E} is not a lambda abstraction.

\lam{x} is a variable that does not occur free in E.

\conclusion

\trans{E}{λx.E x}
}

\startbuffer[from]
foo = λa -> case a of 
  True -> λb.mul b b
  False -> id
\stopbuffer

\startbuffer[to]
foo = λa.λx -> (case a of 
    True -> λb.mul b b
    False -> λy.id y) x
\stopbuffer

\transexample{η-abstraction}{from}{to}

\subsection{Extended β-reduction}
This transformation is meant to propagate application expressions downwards
into expressions as far as possible. In lambda calculus, this reduction
is known as β-reduction, but it is of course only defined for
applications of lambda abstractions. We extend this reduction to also
work for the rest of core (case and let expressions).
\startbuffer[from]
(case x of
  p1 -> E1
  \vdots
  pn -> En) M
\stopbuffer
\startbuffer[to]
case x of
  p1 -> E1 M
  \vdots
  pn -> En M
\stopbuffer

\transform{Extended β-reduction}
{
\conclusion
\trans{(λx.E) M}{E[M/x]}

\nextrule
\conclusion
\trans{(let binds in E) M}{let binds in E M}

\nextrule
\conclusion
\transbuf{from}{to}
}

\startbuffer[from]
let a = (case x of 
           True -> id
           False -> neg
        ) 1
    b = (let y = 3 in add y) 2
in
  (λz.add 1 z) 3
\stopbuffer

\startbuffer[to]
let a = case x of 
           True -> id 1
           False -> neg 1
    b = let y = 3 in add y 2
in
  add 1 3
\stopbuffer

\transexample{Extended β-reduction}{from}{to}

\subsection{Argument simplification}
The transforms in this section deal with simplifying application
arguments into normal form. The goal here is to:

\startitemize
 \item Make all arguments of user-defined functions (\eg, of which
 we have a function body) simple variable references of a runtime
 representable type.
 \item Make all arguments of builtin functions either:
   \startitemize
    \item A type argument.
    \item A dictionary argument.
    \item A type level expression.
    \item A variable reference of a runtime representable type.
    \item A variable reference or partial application of a function type.
   \stopitemize
\stopitemize

When looking at the arguments of a user-defined function, we can
divide them into two categories:
\startitemize
  \item Arguments with a runtime representable type (\eg bits or vectors).

        These arguments can be preserved in the program, since they can
        be translated to input ports later on.  However, since we can
        only connect signals to input ports, these arguments must be
        reduced to simple variables (for which signals will be
        produced). This is taken care of by the argument extraction
        transform.
  \item Non-runtime representable typed arguments.
        
        These arguments cannot be preserved in the program, since we
        cannot represent them as input or output ports in the resulting
        VHDL. To remove them, we create a specialized version of the
        called function with these arguments filled in. This is done by
        the argument propagation transform.
\stopitemize

When looking at the arguments of a builtin function, we can divide them
into categories: 

\startitemize
  \item Arguments with a runtime representable type.
        
        As we have seen with user-defined functions, these arguments can
        always be reduced to a simple variable reference, by the
        argument extraction transform. Performing this transform for
        builtin functions as well, means that the translation of builtin
        functions can be limited to signal references, instead of
        needing to support all possible expressions.

  \item Arguments with a function type.
        
        These arguments are functions passed to higher order builtins,
        like \lam{map} and \lam{foldl}. Since implementing these
        functions for arbitrary function-typed expressions (\eg, lambda
        expressions) is rather comlex, we reduce these arguments to
        (partial applications of) global functions.
        
        We can still support arbitrary expressions from the user code,
        by creating a new global function containing that expression.
        This way, we can simply replace the argument with a reference to
        that new function. However, since the expression can contain any
        number of free variables we also have to include partial
        applications in our normal form.

        This category of arguments is handled by the function extraction
        transform.
  \item Other unrepresentable arguments.
        
        These arguments can take a few different forms:
        \startdesc{Type arguments}
          In the core language, type arguments can only take a single
          form: A type wrapped in the Type constructor. Also, there is
          nothing that can be done with type expressions, except for
          applying functions to them, so we can simply leave type
          arguments as they are.
        \stopdesc
        \startdesc{Dictionary arguments}
          In the core language, dictionary arguments are used to find
          operations operating on one of the type arguments (mostly for
          finding class methods). Since we will not actually evaluatie
          the function body for builtin functions and can generate
          code for builtin functions by just looking at the type
          arguments, these arguments can be ignored and left as they
          are.
        \stopdesc
        \startdesc{Type level arguments}
          Sometimes, we want to pass a value to a builtin function, but
          we need to know the value at compile time. Additionally, the
          value has an impact on the type of the function. This is
          encoded using type-level values, where the actual value of the
          argument is not important, but the type encodes some integer,
          for example. Since the value is not important, the actual form
          of the expression does not matter either and we can leave
          these arguments as they are.
        \stopdesc
        \startdesc{Other arguments}
          Technically, there is still a wide array of arguments that can
          be passed, but does not fall into any of the above categories.
          However, none of the supported builtin functions requires such
          an argument. This leaves use with passing unsupported types to
          a function, such as calling \lam{head} on a list of functions.

          In these cases, it would be impossible to generate hardware
          for such a function call anyway, so we can ignore these
          arguments.

          The only way to generate hardware for builtin functions with
          arguments like these, is to expand the function call into an
          equivalent core expression (\eg, expand map into a series of
          function applications). But for now, we choose to simply not
          support expressions like these.
        \stopdesc

        From the above, we can conclude that we can simply ignore these
        other unrepresentable arguments and focus on the first two
        categories instead.
\stopitemize

\subsubsection{Argument extraction}
This transform deals with arguments to functions that
are of a runtime representable type. 

TODO: It seems we can map an expression to a port, not only a signal.
Perhaps this makes this transformation not needed?
TODO: Say something about dataconstructors (without arguments, like True
or False), which are variable references of a runtime representable
type, but do not result in a signal.

To reduce a complex expression to a simple variable reference, we create
a new let expression around the application, which binds the complex
expression to a new variable. The original function is then applied to
this variable.

\transform{Argument extract}
{
\lam{Y} is of a hardware representable type

\lam{Y} is not a variable referene

\conclusion

\trans{X Y}{let z = Y in X z}
}

\subsubsection{Function extraction}
This transform deals with function-typed arguments to builtin functions.
Since these arguments cannot be propagated, we choose to extract them
into a new global function instead.

Any free variables occuring in the extracted arguments will become
parameters to the new global function. The original argument is replaced
with a reference to the new function, applied to any free variables from
the original argument.

\transform{Function extraction}
{
\lam{X} is a (partial application of) a builtin function

\lam{Y} is not an application

\lam{Y} is not a variable reference

\conclusion

\lam{f0 ... fm} = free local vars of \lam{Y}

\lam{y} is a new global variable

\lam{y = λf0 ... fn.Y}

\trans{X Y}{X (y f0 ... fn)}
}

\subsubsection{Argument propagation}
This transform deals with arguments to user-defined functions that are
not representable at runtime. This means these arguments cannot be
preserved in the final form and most be {\em propagated}.

Propagation means to create a specialized version of the called
function, with the propagated argument already filled in. As a simple
example, in the following program:

\startlambda
f = λa.λb.a + b
inc = λa.f a 1
\stoplambda

we could {\em propagate} the constant argument 1, with the following
result:

\startlambda
f' = λa.a + 1
inc = λa.f' a
\stoplambda

Special care must be taken when the to-be-propagated expression has any
free variables. If this is the case, the original argument should not be
removed alltogether, but replaced by all the free variables of the
expression. In this way, the original expression can still be evaluated
inside the new function. Also, this brings us closer to our goal: All
these free variables will be simple variable references.

To prevent us from propagating the same argument over and over, a simple
local variable reference is not propagated (since is has exactly one
free variable, itself, we would only replace that argument with itself).

This shows that any free local variables that are not runtime representable
cannot be brought into normal form by this transform. We rely on an
inlining transformation to replace such a variable with an expression we
can propagate again.

TODO: Move these definitions somewhere sensible.

Definition: A global variable is any variable that is bound at the
top level of a program. A local variable is any other variable.

Definition: A hardware representable type is a type that we can generate
a signal for in hardware. For example, a bit, a vector of bits, a 32 bit
unsigned word, etc. Types that are not runtime representable notably
include (but are not limited to): Types, dictionaries, functions.

Definition: A builtin function is a function for which a builtin
hardware translation is available, because its actual definition is not
translatable. A user-defined function is any other function.

\transform{Argument propagation}
{
\lam{x} is a global variable, bound to a user-defined function

\lam{x = E}

\lam{Y_i} is not of a runtime representable type

\lam{Y_i} is not a local variable reference

\conclusion

\lam{f0 ... fm} = free local vars of \lam{Y_i}

\lam{x'} is a new global variable

\lam{x' = λy0 ... yi-1 f0 ... fm yi+1 ... yn . E y0 ... yi-1 Yi yi+1 ... yn}

\trans{x Y0 ... Yi ... Yn}{x' y0 ... yi-1 f0 ... fm Yi+1 ... Yn}
}

TODO: The above definition looks too complicated... Can we find
something more concise?

\subsection{Cast propagation}
This transform pushes casts down into the expression as far as possible.
\subsection{Let recursification}
This transform makes all lets recursive.
\subsection{Let simplification}
This transform makes the result value of all let expressions a simple
variable reference.
\subsection{Let flattening}
This transform turns two nested lets (\lam{let x = (let ... in ...) in
...}) into a single let.
\subsection{Simple let binding removal}
This transforms inlines simple let bindings (\eg a = b).
\subsection{Function inlining}
This transform inlines let bindings of a funtion type. TODO: This should
be generelized to anything that is non representable at runtime, or
something like that.
\subsection{Scrutinee simplification}
This transform ensures that the scrutinee of a case expression is always
a simple variable reference.
\subsection{Case binder wildening}
This transform replaces all binders of a each case alternative with a
wild binder (\ie, one that is never referred to). This will possibly
introduce a number of new "selector" case statements, that only select
one element from an algebraic datatype and bind it to a variable.
\subsection{Case value simplification}
This transform simplifies the result value of each case alternative by
binding the value in a let expression and replacing the value by a
simple variable reference.
\subsection{Case removal}
This transform removes any case statements with a single alternative and
only wild binders.

\subsection{Example sequence}

This section lists an example expression, with a sequence of transforms
applied to it. The exact transforms given here probably don't exactly
match the transforms given above anymore, but perhaps this can clarify
the big picture a bit.

TODO: Update or remove this section.

\startlambda
  λx.
    let s = foo x
    in
      case s of
        (a, b) ->
          case a of
            High -> add
            Low -> let
              op' = case b of
                High -> sub
                Low  -> λc.λd.c
              in
                λc.λd.op' d c
\stoplambda

After top-level η-abstraction:

\startlambda
  λx.λc.λd.
    (let s = foo x
    in
      case s of
        (a, b) ->
          case a of
            High -> add
            Low -> let
              op' = case b of
                High -> sub
                Low  -> λc.λd.c
              in
                λc.λd.op' d c
    ) c d
\stoplambda

After (extended) β-reduction:

\startlambda
  λx.λc.λd.
    let s = foo x
    in
      case s of
        (a, b) ->
          case a of
            High -> add c d
            Low -> let
              op' = case b of
                High -> sub
                Low  -> λc.λd.c
              in
                op' d c
\stoplambda

After return value extraction:

\startlambda
  λx.λc.λd.
    let s = foo x
        r = case s of
              (a, b) ->
                case a of
                  High -> add c d
                  Low -> let
                    op' = case b of
                      High -> sub
                      Low  -> λc.λd.c
                    in
                      op' d c
    in
      r
\stoplambda

Scrutinee simplification does not apply.

After case binder wildening:

\startlambda
  λx.λc.λd.
    let s = foo x
        a = case s of (a, _) -> a
        b = case s of (_, b) -> b
        r = case s of (_, _) ->
              case a of
                High -> add c d
                Low -> let op' = case b of
                             High -> sub
                             Low  -> λc.λd.c
                       in
                         op' d c
    in
      r
\stoplambda

After case value simplification

\startlambda
  λx.λc.λd.
    let s = foo x
        a = case s of (a, _) -> a
        b = case s of (_, b) -> b
        r = case s of (_, _) -> r'
        rh = add c d
        rl = let rll = λc.λd.c
                 op' = case b of
                   High -> sub
                   Low  -> rll
             in
               op' d c
        r' = case a of
               High -> rh
               Low -> rl
    in
      r
\stoplambda

After let flattening:

\startlambda
  λx.λc.λd.
    let s = foo x
        a = case s of (a, _) -> a
        b = case s of (_, b) -> b
        r = case s of (_, _) -> r'
        rh = add c d
        rl = op' d c
        rll = λc.λd.c
        op' = case b of
          High -> sub
          Low  -> rll
        r' = case a of
               High -> rh
               Low -> rl
    in
      r
\stoplambda

After function inlining:

\startlambda
  λx.λc.λd.
    let s = foo x
        a = case s of (a, _) -> a
        b = case s of (_, b) -> b
        r = case s of (_, _) -> r'
        rh = add c d
        rl = (case b of
          High -> sub
          Low  -> λc.λd.c) d c
        r' = case a of
          High -> rh
          Low -> rl
    in
      r
\stoplambda

After (extended) β-reduction again:

\startlambda
  λx.λc.λd.
    let s = foo x
        a = case s of (a, _) -> a
        b = case s of (_, b) -> b
        r = case s of (_, _) -> r'
        rh = add c d
        rl = case b of
          High -> sub d c
          Low  -> d
        r' = case a of
          High -> rh
          Low -> rl
    in
      r
\stoplambda

After case value simplification again:

\startlambda
  λx.λc.λd.
    let s = foo x
        a = case s of (a, _) -> a
        b = case s of (_, b) -> b
        r = case s of (_, _) -> r'
        rh = add c d
        rlh = sub d c
        rl = case b of
          High -> rlh
          Low  -> d
        r' = case a of
          High -> rh
          Low -> rl
    in
      r
\stoplambda

After case removal:

\startlambda
  λx.λc.λd.
    let s = foo x
        a = case s of (a, _) -> a
        b = case s of (_, b) -> b
        r = r'
        rh = add c d
        rlh = sub d c
        rl = case b of
          High -> rlh
          Low  -> d
        r' = case a of
          High -> rh
          Low -> rl
    in
      r
\stoplambda

After let bind removal:

\startlambda
  λx.λc.λd.
    let s = foo x
        a = case s of (a, _) -> a
        b = case s of (_, b) -> b
        rh = add c d
        rlh = sub d c
        rl = case b of
          High -> rlh
          Low  -> d
        r' = case a of
          High -> rh
          Low -> rl
    in
      r'
\stoplambda

Application simplification is not applicable.
\stoptext