Core2Core.tex

   1 \mainlanguage [en]
   2 \setuppapersize[A4][A4]
   3
   4 % Define a custom typescript. We could also have put the \definetypeface's
   5 % directly in the script, without a typescript, but I guess this is more
   6 % elegant...
   7 \starttypescript[Custom]
   8 % This is a sans font that supports greek symbols
   9 \definetypeface [Custom] [ss] [sans]  [iwona]                [default]
  10 \definetypeface [Custom] [rm] [serif] [antykwa-torunska]     [default]
  11 \definetypeface [Custom] [tt] [mono]  [modern]               [default]
  12 \definetypeface [Custom] [mm] [math]  [modern]               [default]
  13 \stoptypescript
  14 \usetypescript [Custom]
  15
  16 % Use our custom typeface
  17 \switchtotypeface [Custom] [10pt]
  18
  19 % The function application operator, which expands to a space in math mode
  20 \define[1]\expr{|#1|}
  21 \define[2]\app{#1\;#2}
  22 \define[2]\lam{λ#1 \xrightarrow #2}
  23 \define[2]\letexpr{{\bold let}\;#1\;{\bold in}\;#2}
  24 \define[2]\case{{\bold case}\;#1\;{\bold of}\;#2}
  25 \define[2]\alt{#1 \xrightarrow #2}
  26 \define[2]\bind{#1:#2}
  27 \define[1]\where{{\bold where}\;#1}
  28 % A transformation
  29 \definefloat[transformation][transformations]
  30 \define[2]\transform{
  31   \startframedtext[width=\textwidth]
  32   #2
  33   \stopframedtext
  34 }
  35
  36 \define\conclusion{\blackrule[height=0.5pt,depth=0pt,width=.5\textwidth]}
  37 \define\nextrule{\vskip1cm}
  38
  39 \define[2]\transformold{
  40   %\placetransformation[here]{#1}
  41   \startframedtext[width=\textwidth]
  42   \startformula \startalign
  43   #2
  44   \stopalign \stopformula
  45   \stopframedtext
  46 }
  47
  48 % A shortcut for italicized e.g. and i.e.
  49 \define[0]\eg{{\em e.g.}}
  50 \define[0]\ie{{\em i.e.}}
  51
  52 \definedescription
  53   [desc]
  54   [location=hanging,hang=20,width=broad]
  55    %command=\hskip-1cm,margin=1cm]
  56
  57 % Install the lambda calculus pretty-printer, as defined in pret-lam.lua.
  58 \installprettytype [LAM] [LAM]
  59
  60 \definetyping[lambda][option=LAM,style=sans]
  61 \definetype[lam][option=LAM,style=sans]
  62
  63 % An (invisible) frame to hold a lambda expression
  64 \define[1]\lamframe{
  65         % Put a frame around lambda expressions, so they can have multiple
  66         % lines and still appear inline.
  67         % The align=right option really does left-alignment, but without the
  68         % program will end up on a single line. The strut=no option prevents a
  69         % bunch of empty space at the start of the frame.
  70         \framed[offset=0mm,location=middle,strut=no,align=right,frame=off]{#1}
  71 }
  72
  73 \define[2]\transbuf{
  74         % Make \typebuffer uses the LAM pretty printer and a sans-serif font
  75         % Also prevent any extra spacing above and below caused by the default
  76         % before=\blank and after=\blank.
  77         \setuptyping[option=LAM,style=sans,before=,after=]
  78         % Prevent the arrow from ending up below the first frame (a \framed
  79         % at the start of a line defaults to using vmode).
  80         \dontleavehmode
  81         % Put the elements in frames, so they can have multiple lines and be
  82         % middle-aligned
  83         \lamframe{\typebuffer[#1]}
  84         \lamframe{\Rightarrow}
  85         \lamframe{\typebuffer[#2]}
  86         % Reset the typing settings to their defaults
  87         \setuptyping[option=none,style=\tttf]
  88 }
  89 % This is the same as \transbuf above, but it accepts text directly instead
  90 % of through buffers. This only works for single lines, however.
  91 \define[2]\trans{
  92         \dontleavehmode
  93         \lamframe{\lam{#1}}
  94         \lamframe{\Rightarrow}
  95         \lamframe{\lam{#2}}
  96 }
  97
  98
  99 % A helper to print a single example in the half the page width. The example
 100 % text should be in a buffer whose name is given in an argument.
 101 %
 102 % The align=right option really does left-alignment, but without the program
 103 % will end up on a single line. The strut=no option prevents a bunch of empty
 104 % space at the start of the frame.
 105 \define[1]\example{
 106   \framed[offset=1mm,align=right,strut=no]{
 107     \setuptyping[option=LAM,style=sans,before=,after=]
 108     \typebuffer[#1]
 109     \setuptyping[option=none,style=\tttf]
 110   }
 111 }
 112
 113
 114 % A transformation example
 115 \definefloat[example][examples]
 116 \setupcaption[example][location=top] % Put captions on top
 117
 118 \define[3]\transexample{
 119   \placeexample[here]{#1}
 120   \startcombination[2*1]
 121     {\example{#2}}{Original program}
 122     {\example{#3}}{Transformed program}
 123   \stopcombination
 124 }
 125
 126 \define[3]\transexampleh{
 127 %  \placeexample[here]{#1}
 128 %  \startcombination[1*2]
 129 %    {\example{#2}}{Original program}
 130 %    {\example{#3}}{Transformed program}
 131 %  \stopcombination
 132 }
 133
 134 % Define a custom description format for the builtinexprs below
 135 \definedescription[builtindesc][headstyle=bold,style=normal,location=top]
 136
 137 \starttext
 138 \title {Core transformations for hardware generation}
 139 Matthijs Kooijman
 140
 141 \section{Introduction}
 142 As a new approach to translating Core to VHDL, we investigate a number of
 143 transformations on our Core program, which should bring the program into a
 144 well-defined "canonical" form, which is subsequently trivial to translate to
 145 VHDL.
 146
 147 The transformations as presented here are far from complete, but are meant as
 148 an exploration of possible transformations. In the running example below, we
 149 apply each of the transformations exactly once, in sequence. As will be
 150 apparent from the end result, there will be additional transformations needed
 151 to fully reach our goal, and some transformations must be applied more than
 152 once. How exactly to (efficiently) do this, has not been investigated.
 153
 154 Lastly, I hope to be able to state a number of pre- and postconditions for
 155 each transformation. If these can be proven for each transformation, and it
 156 can be shown that there exists some ordering of transformations for which the
 157 postcondition implies the canonical form, we can show that the transformations
 158 do indeed transform any program (probably satisfying a number of
 159 preconditions) to the canonical form.
 160
 161 \section{Goal}
 162 The transformations described here have a well-defined goal: To bring the
 163 program in a well-defined form that is directly translatable to hardware,
 164 while fully preserving the semantics of the program.
 165
 166 This {\em canonical form} is again a Core program, but with a very specific
 167 structure. A function in canonical form has nested lambda's at the top, which
 168 produce a let expression. This let expression binds every function application
 169 in the function and produces either a simple identifier or a tuple of
 170 identifiers. Every bound value in the let expression is either a simple
 171 function application or a case expression to extract a single element from a
 172 tuple returned by a function.
 173
 174 An example of a program in canonical form would be:
 175
 176 \starttyping
 177   -- All arguments are an inital lambda
 178   \x c d ->
 179   -- There is one let expression at the top level
 180   let
 181     -- Calling some other user-defined function.
 182     s = foo x
 183     -- Extracting result values from a tuple
 184     a = case s of (a, b) -> a
 185     b = case s of (a, b) -> b
 186     -- Some builtin expressions
 187     rh = add c d
 188     rhh = sub d c
 189     -- Conditional connections
 190     rl = case b of
 191       High -> rhh
 192       Low -> d
 193     r = case a of
 194       High -> rh
 195       Low -> rl
 196   in
 197     -- The actual result
 198     r
 199 \stoptyping
 200
 201 In this and all following programs, the following definitions are assumed to
 202 be available:
 203
 204 \starttyping
 205 data Bit = Low | High
 206
 207 foo :: Int -> (Bit, Bit)
 208 add :: Int -> Int -> Int
 209 sub :: Int -> Int -> Int
 210 \stoptyping
 211
 212 When looking at such a program from a hardware perspective, the top level
 213 lambda's define the input ports. The value produced by the let expression are
 214 the output ports. Each function application bound by the let expression
 215 defines a component instantiation, where the input and output ports are mapped
 216 to local signals or arguments. The tuple extracting case expressions don't map
 217 to
 218
 219 \subsection{Canonical form definition}
 220 Formally, the canonical form is a core program obeying the following
 221 constraints.
 222
 223 \startitemize[R,inmargin]
 224 \item All top level binds must have the form $\expr{\bind{fun}{lamexpr}}$.
 225 $fun$ is an identifier that will be bound as a global identifier.
 226 \item A $lamexpr$ has the form $\expr{\lam{arg}{lamexpr}}$ or
 227 $\expr{letexpr}$. $arg$ is an identifier which will be bound as an $argument$.
 228 \item[letexpr] A $letexpr$ has the form $\expr{\letexpr{letbinds}{retexpr}}$.
 229 \item $letbinds$ is a list with elements of the form
 230 $\expr{\bind{res}{appexpr}}$ or $\expr{\bind{res}{builtinexpr}}$, where $res$ is
 231 an identifier that will be bound as local identifier. The type of the bound
 232 value must be a $hardware\;type$.
 233 \item[builtinexpr] A $builtinexpr$ is an expression that can be mapped to an
 234 equivalent VHDL expression. Since there are many supported forms for this,
 235 these are defined in a separate table.
 236 \item An $appexpr$ has the form $\expr{fun}$ or $\expr{\app{appexpr}{x}}$,
 237 where $fun$ is a global identifier and $x$ is a local identifier.
 238 \item[retexpr] A $retexpr$ has the form $\expr{x}$ or $\expr{tupexpr}$, where $x$ is a local identifier that is bound as an $argument$ or $result$.  A $retexpr$ must
 239 be of a $hardware\;type$.
 240 \item A $tupexpr$ has the form $\expr{con}$ or $\expr{\app{tupexpr}{x}}$,
 241 where $con$ is a tuple constructor ({\em e.g.} $(,)$ or $(,,,)$) and $x$ is
 242 a local identifier.
 243 \item A $hardware\;type$ is a type that can be directly translated to
 244 hardware. This includes the types $Bit$, $SizedWord$, tuples containing
 245 elements of $hardware\;type$s, and will include others. This explicitely
 246 excludes function types.
 247 \stopitemize
 248
 249 TODO: Say something about uniqueness of identifiers
 250
 251 \subsection{Builtin expressions}
 252 A $builtinexpr$, as defined at \in[builtinexpr] can have any of the following forms.
 253
 254 \startitemize[m,inmargin]
 255 \item
 256 $tuple\_extract=\expr{\case{t}{\alt{\app{con}{x_0\;x_1\;..\;x_n}}{x_i}}}$,
 257 where $t$ can be any local identifier, $con$ is a tuple constructor ({\em
 258 e.g.} $(,)$ or $(,,,)$), $x_0$ to $x_n$ can be any identifier, and $x_i$ can
 259 be any of $x_0$ to $x_n$. A case expression must have a $hardware\;type$.
 260 \item TODO: Many more!
 261 \stopitemize
 262
 263 \section{Transformation passes}
 264
 265 In this section we describe the actual transformations. Here we're using
 266 mostly Core-like notation, with a few notable points.
 267
 268 \startitemize
 269 \item A core expression (in contrast with a transformation function, for
 270 example), is enclosed in pipes. For example, $\app{transform}{\expr{\lam{z}{z+1}}}$
 271 is the transform function applied to a lambda core expression.
 272
 273 Note that this notation might not be consistently applied everywhere. In
 274 particular where a non-core function is used inside a core expression, things
 275 get slightly confusing.
 276 \item A bind is written as $\expr{\bind{x}{expr}}$. This means binding the identifier
 277 $x$ to the expression $expr$.
 278 \item In the core examples, the layout rule from Haskell is loosely applied.
 279 It should be evident what was meant from indentation, even though it might nog
 280 be strictly correct.
 281 \stopitemize
 282
 283 \subsection{Example}
 284 In the descriptions of transformations below, the following (slightly
 285 contrived) example program will be used as the running example. Note that this
 286 the example for the canonical form given above is the same program, but in
 287 canonical form.
 288
 289 \starttyping
 290   \x ->
 291     let s = foo x
 292     in
 293       case s of
 294         (a, b) ->
 295           case a of
 296             High -> add
 297             Low -> let
 298               op' = case b of
 299                 High -> sub
 300                 Low  -> \c d -> c
 301               in
 302                 \c d -> op' d c
 303 \stoptyping
 304
 305 \subsection{η-abstraction}
 306 This transformation makes sure that all arguments of a function-typed
 307 expression are named, by introducing lambda expressions. When combined with
 308 β-reduction and function inlining below, all function-typed expressions should
 309 be lambda abstractions or global identifiers.
 310
 311 \transform{η-abstraction}
 312 {
 313 \lam{E :: * -> *}
 314
 315 \lam{E} is not the first argument of an application.
 316
 317 \lam{E} is not a lambda abstraction.
 318
 319 \lam{x} is a variable that does not occur free in E.
 320
 321 \conclusion
 322
 323 \trans{E}{λx.E x}
 324 }
 325
 326 \startbuffer[from]
 327 foo = λa -> case a of
 328   True -> λb.mul b b
 329   False -> id
 330 \stopbuffer
 331
 332 \startbuffer[to]
 333 foo = λa.λx -> (case a of
 334     True -> λb.mul b b
 335     False -> λy.id y) x
 336 \stopbuffer
 337
 338 \transexample{η-abstraction}{from}{to}
 339
 340 \subsection{Extended β-reduction}
 341 This transformation is meant to propagate application expressions downwards
 342 into expressions as far as possible. In lambda calculus, this reduction
 343 is known as β-reduction, but it is of course only defined for
 344 applications of lambda abstractions. We extend this reduction to also
 345 work for the rest of core (case and let expressions).
 346 \startbuffer[from]
 347 (case x of
 348   p1 -> E1
 349   \vdots
 350   pn -> En) M
 351 \stopbuffer
 352 \startbuffer[to]
 353 case x of
 354   p1 -> E1 M
 355   \vdots
 356   pn -> En M
 357 \stopbuffer
 358
 359 \transform{Extended β-reduction}
 360 {
 361 \conclusion
 362 \trans{(λx.E) M}{E[M/x]}
 363
 364 \nextrule
 365 \conclusion
 366 \trans{(let binds in E) M}{let binds in E M}
 367
 368 \nextrule
 369 \conclusion
 370 \transbuf{from}{to}
 371 }
 372
 373 \startbuffer[from]
 374 let a = (case x of
 375            True -> id
 376            False -> neg
 377         ) 1
 378     b = (let y = 3 in add y) 2
 379 in
 380   (λz.add 1 z) 3
 381 \stopbuffer
 382
 383 \startbuffer[to]
 384 let a = case x of
 385            True -> id 1
 386            False -> neg 1
 387     b = let y = 3 in add y 2
 388 in
 389   add 1 3
 390 \stopbuffer
 391
 392 \transexample{Extended β-reduction}{from}{to}
 393
 394 \subsection{Argument simplification}
 395 The transforms in this section deal with simplifying application
 396 arguments into normal form. The goal here is to:
 397
 398 \startitemize
 399  \item Make all arguments of user-defined functions (\eg, of which
 400  we have a function body) simple variable references of a runtime
 401  representable type.
 402  \item Make all arguments of builtin functions either:
 403    \startitemize
 404     \item A type argument.
 405     \item A dictionary argument.
 406     \item A type level expression.
 407     \item A variable reference of a runtime representable type.
 408     \item A variable reference or partial application of a function type.
 409    \stopitemize
 410 \stopitemize
 411
 412 When looking at the arguments of a user-defined function, we can
 413 divide them into two categories:
 414 \startitemize
 415   \item Arguments with a runtime representable type (\eg bits or vectors).
 416
 417         These arguments can be preserved in the program, since they can
 418         be translated to input ports later on.  However, since we can
 419         only connect signals to input ports, these arguments must be
 420         reduced to simple variables (for which signals will be
 421         produced). This is taken care of by the argument extraction
 422         transform.
 423   \item Non-runtime representable typed arguments.
 424
 425         These arguments cannot be preserved in the program, since we
 426         cannot represent them as input or output ports in the resulting
 427         VHDL. To remove them, we create a specialized version of the
 428         called function with these arguments filled in. This is done by
 429         the argument propagation transform.
 430 \stopitemize
 431
 432 When looking at the arguments of a builtin function, we can divide them
 433 into categories:
 434
 435 \startitemize
 436   \item Arguments with a runtime representable type.
 437
 438         As we have seen with user-defined functions, these arguments can
 439         always be reduced to a simple variable reference, by the
 440         argument extraction transform. Performing this transform for
 441         builtin functions as well, means that the translation of builtin
 442         functions can be limited to signal references, instead of
 443         needing to support all possible expressions.
 444
 445   \item Arguments with a function type.
 446
 447         These arguments are functions passed to higher order builtins,
 448         like \lam{map} and \lam{foldl}. Since implementing these
 449         functions for arbitrary function-typed expressions (\eg, lambda
 450         expressions) is rather comlex, we reduce these arguments to
 451         (partial applications of) global functions.
 452
 453         We can still support arbitrary expressions from the user code,
 454         by creating a new global function containing that expression.
 455         This way, we can simply replace the argument with a reference to
 456         that new function. However, since the expression can contain any
 457         number of free variables we also have to include partial
 458         applications in our normal form.
 459
 460         This category of arguments is handled by the function extraction
 461         transform.
 462   \item Other unrepresentable arguments.
 463
 464         These arguments can take a few different forms:
 465         \startdesc{Type arguments}
 466           In the core language, type arguments can only take a single
 467           form: A type wrapped in the Type constructor. Also, there is
 468           nothing that can be done with type expressions, except for
 469           applying functions to them, so we can simply leave type
 470           arguments as they are.
 471         \stopdesc
 472         \startdesc{Dictionary arguments}
 473           In the core language, dictionary arguments are used to find
 474           operations operating on one of the type arguments (mostly for
 475           finding class methods). Since we will not actually evaluatie
 476           the function body for builtin functions and can generate
 477           code for builtin functions by just looking at the type
 478           arguments, these arguments can be ignored and left as they
 479           are.
 480         \stopdesc
 481         \startdesc{Type level arguments}
 482           Sometimes, we want to pass a value to a builtin function, but
 483           we need to know the value at compile time. Additionally, the
 484           value has an impact on the type of the function. This is
 485           encoded using type-level values, where the actual value of the
 486           argument is not important, but the type encodes some integer,
 487           for example. Since the value is not important, the actual form
 488           of the expression does not matter either and we can leave
 489           these arguments as they are.
 490         \stopdesc
 491         \startdesc{Other arguments}
 492           Technically, there is still a wide array of arguments that can
 493           be passed, but does not fall into any of the above categories.
 494           However, none of the supported builtin functions requires such
 495           an argument. This leaves use with passing unsupported types to
 496           a function, such as calling \lam{head} on a list of functions.
 497
 498           In these cases, it would be impossible to generate hardware
 499           for such a function call anyway, so we can ignore these
 500           arguments.
 501
 502           The only way to generate hardware for builtin functions with
 503           arguments like these, is to expand the function call into an
 504           equivalent core expression (\eg, expand map into a series of
 505           function applications). But for now, we choose to simply not
 506           support expressions like these.
 507         \stopdesc
 508
 509         From the above, we can conclude that we can simply ignore these
 510         other unrepresentable arguments and focus on the first two
 511         categories instead.
 512 \stopitemize
 513
 514 \subsubsection{Argument extraction}
 515 This transform deals with arguments to functions that
 516 are of a runtime representable type.
 517
 518 TODO: It seems we can map an expression to a port, not only a signal.
 519 Perhaps this makes this transformation not needed?
 520 TODO: Say something about dataconstructors (without arguments, like True
 521 or False), which are variable references of a runtime representable
 522 type, but do not result in a signal.
 523
 524 To reduce a complex expression to a simple variable reference, we create
 525 a new let expression around the application, which binds the complex
 526 expression to a new variable. The original function is then applied to
 527 this variable.
 528
 529 \transform{Argument extract}
 530 {
 531 \lam{Y} is of a hardware representable type
 532
 533 \lam{Y} is not a variable referene
 534
 535 \conclusion
 536
 537 \trans{X Y}{let z = Y in X z}
 538 }
 539
 540 \subsubsection{Function extraction}
 541 This transform deals with function-typed arguments to builtin functions.
 542 Since these arguments cannot be propagated, we choose to extract them
 543 into a new global function instead.
 544
 545 Any free variables occuring in the extracted arguments will become
 546 parameters to the new global function. The original argument is replaced
 547 with a reference to the new function, applied to any free variables from
 548 the original argument.
 549
 550 \transform{Function extraction}
 551 {
 552 \lam{X} is a (partial application of) a builtin function
 553
 554 \lam{Y} is not an application
 555
 556 \lam{Y} is not a variable reference
 557
 558 \conclusion
 559
 560 \lam{f0 ... fm} = free local vars of \lam{Y}
 561
 562 \lam{y} is a new global variable
 563
 564 \lam{y = λf0 ... fn.Y}
 565
 566 \trans{X Y}{X (y f0 ... fn)}
 567 }
 568
 569 \subsubsection{Argument propagation}
 570 This transform deals with arguments to user-defined functions that are
 571 not representable at runtime. This means these arguments cannot be
 572 preserved in the final form and most be {\em propagated}.
 573
 574 Propagation means to create a specialized version of the called
 575 function, with the propagated argument already filled in. As a simple
 576 example, in the following program:
 577
 578 \startlambda
 579 f = λa.λb.a + b
 580 inc = λa.f a 1
 581 \stoplambda
 582
 583 we could {\em propagate} the constant argument 1, with the following
 584 result:
 585
 586 \startlambda
 587 f' = λa.a + 1
 588 inc = λa.f' a
 589 \stoplambda
 590
 591 Special care must be taken when the to-be-propagated expression has any
 592 free variables. If this is the case, the original argument should not be
 593 removed alltogether, but replaced by all the free variables of the
 594 expression. In this way, the original expression can still be evaluated
 595 inside the new function. Also, this brings us closer to our goal: All
 596 these free variables will be simple variable references.
 597
 598 To prevent us from propagating the same argument over and over, a simple
 599 local variable reference is not propagated (since is has exactly one
 600 free variable, itself, we would only replace that argument with itself).
 601
 602 This shows that any free local variables that are not runtime representable
 603 cannot be brought into normal form by this transform. We rely on an
 604 inlining transformation to replace such a variable with an expression we
 605 can propagate again.
 606
 607 TODO: Move these definitions somewhere sensible.
 608
 609 Definition: A global variable is any variable that is bound at the
 610 top level of a program. A local variable is any other variable.
 611
 612 Definition: A hardware representable type is a type that we can generate
 613 a signal for in hardware. For example, a bit, a vector of bits, a 32 bit
 614 unsigned word, etc. Types that are not runtime representable notably
 615 include (but are not limited to): Types, dictionaries, functions.
 616
 617 Definition: A builtin function is a function for which a builtin
 618 hardware translation is available, because its actual definition is not
 619 translatable. A user-defined function is any other function.
 620
 621 \transform{Argument propagation}
 622 {
 623 \lam{x} is a global variable, bound to a user-defined function
 624
 625 \lam{x = E}
 626
 627 \lam{Y_i} is not of a runtime representable type
 628
 629 \lam{Y_i} is not a local variable reference
 630
 631 \conclusion
 632
 633 \lam{f0 ... fm} = free local vars of \lam{Y_i}
 634
 635 \lam{x'} is a new global variable
 636
 637 \lam{x' = λy0 ... yi-1 f0 ... fm yi+1 ... yn . E y0 ... yi-1 Yi yi+1 ... yn}
 638
 639 \trans{x Y0 ... Yi ... Yn}{x' y0 ... yi-1 f0 ... fm Yi+1 ... Yn}
 640 }
 641
 642 TODO: The above definition looks too complicated... Can we find
 643 something more concise?
 644
 645 \subsection{Introducing main scope}
 646 This transformation is meant to introduce a single let expression that will be
 647 the "main scope". This is the let expression as described under requirement
 648 \ref[letexpr]. This let expression will contain only a single binding, which
 649 binds the original expression to some identifier and then evalutes to just
 650 this identifier (to comply with requirement \in[retexpr]).
 651
 652 Formally, we can describe the transformation as follows.
 653
 654 \transformold{Main scope introduction}
 655 {
 656 \NC \app{transform}{\expr{\bind{f}{expr}}} \NC = \expr{\bind{f}{\app{transform'(expr)}}}\NR
 657 \NR
 658 \NC \app{transform'}{\expr{\lam{v}{expr}}} \NC = \expr{\lam{v}{\app{transform'}{expr}}}\NR
 659 \NC \app{transform'}{\expr{expr}} \NC = \expr{\letexpr{\bind{x}{expr}}{x}} \NR
 660 }
 661
 662 When applying this transformation to our running example, we get the following
 663 program.
 664
 665 \starttyping
 666   \x c d ->
 667     let r = (let s = foo x
 668               in
 669                 case s of
 670                   (a, b) ->
 671                     case a of
 672                       High -> add c d
 673                       Low -> let
 674                         op' = case b of
 675                           High -> sub
 676                           Low  -> \c d -> c
 677                         in
 678                           op' d c
 679             )
 680     in
 681       r
 682 \stoptyping
 683
 684 \subsection{Scope flattening}
 685 This transformation tries to flatten the topmost let expression in a bind,
 686 {\em i.e.}, bind all identifiers in the same scope, and bind them to simple
 687 expressions only (so simplify deeply nested expressions).
 688
 689 Formally, we can describe the transformation as follows.
 690
 691 \transformold{Main scope introduction} { \NC \app{transform}{\expr{\bind{f}{expr}}} \NC = \expr{\bind{f}{\app{transform'(expr)}}}\NR
 692 \NR
 693 \NC \app{transform'}{\expr{\lam{v}{expr}}} \NC = \expr{\lam{v}{\app{transform'}{expr}}}\NR
 694 \NC \app{transform'}{\expr{\letexpr{binds}{expr}}} \NC = \expr{\letexpr{\app{concat . map . flatten}{binds}}{expr}}\NR
 695 \NR
 696 \NC \app{flatten}{\expr{\bind{x}{\letexpr{binds}{expr}}}} \NC = (\app{concat . map . flatten}{binds}) \cup \{\app{flatten}{\expr{\bind{x}{expr}}}\}\NR
 697 \NC \app{flatten}{\expr{\bind{x}{\case{s}{alts}}}} \NC = \app{concat}{binds'} \cup \{\bind{x}{\case{s}{alts'}}\}\NR
 698 \NC                                                \NC  \where{(binds', alts')=\app{unzip.map.(flattenalt s)}{alts}}\NR
 699 \NC \app{\app{flattenalt}{s}}{\expr{\alt{\app{con}{x_0\;..\;x_n}}{expr}}} \NC = (extracts \cup \{\app{flatten}{bind}\}, alt)\NR
 700 \NC \NC \where{extracts =\{\expr{\case{s}{\alt{\app{con}{x_0\;..\;x_n}}{x_0}}},}\NR
 701 \NC \NC \;..\;,\expr{\case{s}{\alt{\app{con}{x_0\;..\;x_n}}{x_n}}}\} \NR
 702 \NC \NC bind = \expr{\bind{y}{expr}}\NR
 703 \NC \NC alt = \expr{\alt{\app{con}{\_\;..\;\_}}{y}}\NR
 704 }
 705
 706 When applying this transformation to our running example, we get the following
 707 program.
 708
 709 \starttyping
 710   \x c d ->
 711     let s = foo x
 712         r = case s of
 713               (_, _) -> y
 714         a = case s of (a, b) -> a
 715         b = case s of (a, b) -> b
 716         y = case a of
 717               High -> g
 718               Low -> h
 719         g = add c d
 720         h = op' d c
 721         op' = case b of
 722           High -> i
 723           Low  -> j
 724         i = sub
 725         j = \c d -> c
 726     in
 727       r
 728 \stoptyping
 729
 730 \subsection{More transformations}
 731 As noted before, the above transformations are not complete. Other needed
 732 transformations include:
 733 \startitemize
 734 \item Inlining of local identifiers with a function type. Since these cannot
 735 be represented in hardware directly, they must be transformed into something
 736 else. Inlining them should always be possible without loss of semantics (TODO:
 737 How true is this?) and can expose new possibilities for other transformations
 738 passes (such as application propagation when inlining {\tt j} above).
 739 \item A variation on inlining local identifiers is the propagation of
 740 function arguments with a function type. This will probably be initiated when
 741 transforming the caller (and not the callee), but it will also deal with
 742 identifiers with a function type that are unrepresentable in hardware.
 743
 744 Special care must be taken here, since the expression that is propagated into
 745 the callee comes from a different scope. The function typed argument must thus
 746 be replaced by any identifiers from the callers scope that the propagated
 747 expression uses.
 748
 749 Note that propagating an argument will change both a function's interface and
 750 implementation. For this to work, a new function should be created instead of
 751 modifying the original function, so any other callers will not be broken.
 752 \item Something similar should happen with return values with function types.
 753 \item Polymorphism must be removed from all user-defined functions. This is
 754 again similar to propagation function typed arguments, since this will also
 755 create duplicates of functions (for a specific type). This is an operation
 756 that is commonly known as "specialization" and already happens in GHC (since
 757 non-polymorph functions can be a lot faster than generic ones).
 758 \item More builtin expressions should be added and these should be evaluated
 759 by the compiler. For example, map, fold, +.
 760 \stopitemize
 761
 762 Initial example
 763
 764 \startlambda
 765   λx.
 766     let s = foo x
 767     in
 768       case s of
 769         (a, b) ->
 770           case a of
 771             High -> add
 772             Low -> let
 773               op' = case b of
 774                 High -> sub
 775                 Low  -> λc.λd.c
 776               in
 777                 λc.λd.op' d c
 778 \stoplambda
 779
 780 After top-level η-abstraction:
 781
 782 \startlambda
 783   λx.λc.λd.
 784     (let s = foo x
 785     in
 786       case s of
 787         (a, b) ->
 788           case a of
 789             High -> add
 790             Low -> let
 791               op' = case b of
 792                 High -> sub
 793                 Low  -> λc.λd.c
 794               in
 795                 λc.λd.op' d c
 796     ) c d
 797 \stoplambda
 798
 799 After (extended) β-reduction:
 800
 801 \startlambda
 802   λx.λc.λd.
 803     let s = foo x
 804     in
 805       case s of
 806         (a, b) ->
 807           case a of
 808             High -> add c d
 809             Low -> let
 810               op' = case b of
 811                 High -> sub
 812                 Low  -> λc.λd.c
 813               in
 814                 op' d c
 815 \stoplambda
 816
 817 After return value extraction:
 818
 819 \startlambda
 820   λx.λc.λd.
 821     let s = foo x
 822         r = case s of
 823               (a, b) ->
 824                 case a of
 825                   High -> add c d
 826                   Low -> let
 827                     op' = case b of
 828                       High -> sub
 829                       Low  -> λc.λd.c
 830                     in
 831                       op' d c
 832     in
 833       r
 834 \stoplambda
 835
 836 Scrutinee simplification does not apply.
 837
 838 After case binder wildening:
 839
 840 \startlambda
 841   λx.λc.λd.
 842     let s = foo x
 843         a = case s of (a, _) -> a
 844         b = case s of (_, b) -> b
 845         r = case s of (_, _) ->
 846               case a of
 847                 High -> add c d
 848                 Low -> let op' = case b of
 849                              High -> sub
 850                              Low  -> λc.λd.c
 851                        in
 852                          op' d c
 853     in
 854       r
 855 \stoplambda
 856
 857 After case value simplification
 858
 859 \startlambda
 860   λx.λc.λd.
 861     let s = foo x
 862         a = case s of (a, _) -> a
 863         b = case s of (_, b) -> b
 864         r = case s of (_, _) -> r'
 865         rh = add c d
 866         rl = let rll = λc.λd.c
 867                  op' = case b of
 868                    High -> sub
 869                    Low  -> rll
 870              in
 871                op' d c
 872         r' = case a of
 873                High -> rh
 874                Low -> rl
 875     in
 876       r
 877 \stoplambda
 878
 879 After let flattening:
 880
 881 \startlambda
 882   λx.λc.λd.
 883     let s = foo x
 884         a = case s of (a, _) -> a
 885         b = case s of (_, b) -> b
 886         r = case s of (_, _) -> r'
 887         rh = add c d
 888         rl = op' d c
 889         rll = λc.λd.c
 890         op' = case b of
 891           High -> sub
 892           Low  -> rll
 893         r' = case a of
 894                High -> rh
 895                Low -> rl
 896     in
 897       r
 898 \stoplambda
 899
 900 After function inlining:
 901
 902 \startlambda
 903   λx.λc.λd.
 904     let s = foo x
 905         a = case s of (a, _) -> a
 906         b = case s of (_, b) -> b
 907         r = case s of (_, _) -> r'
 908         rh = add c d
 909         rl = (case b of
 910           High -> sub
 911           Low  -> λc.λd.c) d c
 912         r' = case a of
 913           High -> rh
 914           Low -> rl
 915     in
 916       r
 917 \stoplambda
 918
 919 After (extended) β-reduction again:
 920
 921 \startlambda
 922   λx.λc.λd.
 923     let s = foo x
 924         a = case s of (a, _) -> a
 925         b = case s of (_, b) -> b
 926         r = case s of (_, _) -> r'
 927         rh = add c d
 928         rl = case b of
 929           High -> sub d c
 930           Low  -> d
 931         r' = case a of
 932           High -> rh
 933           Low -> rl
 934     in
 935       r
 936 \stoplambda
 937
 938 After case value simplification again:
 939
 940 \startlambda
 941   λx.λc.λd.
 942     let s = foo x
 943         a = case s of (a, _) -> a
 944         b = case s of (_, b) -> b
 945         r = case s of (_, _) -> r'
 946         rh = add c d
 947         rlh = sub d c
 948         rl = case b of
 949           High -> rlh
 950           Low  -> d
 951         r' = case a of
 952           High -> rh
 953           Low -> rl
 954     in
 955       r
 956 \stoplambda
 957
 958 After case removal:
 959
 960 \startlambda
 961   λx.λc.λd.
 962     let s = foo x
 963         a = case s of (a, _) -> a
 964         b = case s of (_, b) -> b
 965         r = r'
 966         rh = add c d
 967         rlh = sub d c
 968         rl = case b of
 969           High -> rlh
 970           Low  -> d
 971         r' = case a of
 972           High -> rh
 973           Low -> rl
 974     in
 975       r
 976 \stoplambda
 977
 978 After let bind removal:
 979
 980 \startlambda
 981   λx.λc.λd.
 982     let s = foo x
 983         a = case s of (a, _) -> a
 984         b = case s of (_, b) -> b
 985         rh = add c d
 986         rlh = sub d c
 987         rl = case b of
 988           High -> rlh
 989           Low  -> d
 990         r' = case a of
 991           High -> rh
 992           Low -> rl
 993     in
 994       r'
 995 \stoplambda
 996
 997 Application simplification is not applicable.
 998 \stoptext