Chapters/Normalization.tex

   1 \chapter[chap:normalization]{Normalization}
   2
   3 % A helper to print a single example in the half the page width. The example
   4 % text should be in a buffer whose name is given in an argument.
   5 %
   6 % The align=right option really does left-alignment, but without the program
   7 % will end up on a single line. The strut=no option prevents a bunch of empty
   8 % space at the start of the frame.
   9 \define[1]\example{
  10   \framed[offset=1mm,align=right,strut=no,background=box,frame=off]{
  11     \setuptyping[option=LAM,style=sans,before=,after=]
  12     \typebuffer[#1]
  13     \setuptyping[option=none,style=\tttf]
  14   }
  15 }
  16
  17
  18 % A transformation example
  19 \definefloat[example][examples]
  20 \setupcaption[example][location=top] % Put captions on top
  21
  22 \define[3]\transexample{
  23   \placeexample[here]{#1}
  24   \startcombination[2*1]
  25     {\example{#2}}{Original program}
  26     {\example{#3}}{Transformed program}
  27   \stopcombination
  28 }
  29 %
  30 %\define[3]\transexampleh{
  31 %%  \placeexample[here]{#1}
  32 %%  \startcombination[1*2]
  33 %%    {\example{#2}}{Original program}
  34 %%    {\example{#3}}{Transformed program}
  35 %%  \stopcombination
  36 %}
  37
  38 The first step in the core to VHDL translation process, is normalization. We
  39 aim to bring the core description into a simpler form, which we can
  40 subsequently translate into VHDL easily. This normal form is needed because
  41 the full core language is more expressive than VHDL in some areas and because
  42 core can describe expressions that do not have a direct hardware
  43 interpretation.
  44
  45 TODO: Describe core properties not supported in VHDL, and describe how the
  46 VHDL we want to generate should look like.
  47
  48 \section{Goal}
  49 The transformations described here have a well-defined goal: To bring the
  50 program in a well-defined form that is directly translatable to hardware,
  51 while fully preserving the semantics of the program.
  52
  53 This {\em normal form} is again a Core program, but with a very specific
  54 structure. A function in normal form has nested lambda's at the top, which
  55 produce a number of nested let expressions. These let expressions binds a
  56 number of simple expressions in the function and produces a simple identifier.
  57 Every bound value in the let expression is either a simple function
  58 application, a case expression to extract a single element from a tuple
  59 returned by a function or a case expression to choose between two signals
  60 based on some other signal.
  61
  62 This structure is easy to translate to VHDL, since each top level lambda will
  63 be an input port, every bound value will become a concurrent statement (such
  64 as a component instantiation or conditional signal assignment) and the result
  65 variable will become the output port.
  66
  67 \startbuffer[MulSum]
  68 alu :: Bit -> Word -> Word -> Word
  69 alu = λa.λb.λc.
  70     let
  71       mul = (*) a b
  72       sum = (+) mul c
  73     in
  74       sum
  75 \stopbuffer
  76
  77 \startuseMPgraphic{MulSum}
  78   save a, b, c, mul, add, sum;
  79
  80   % I/O ports
  81   newCircle.a(btex $a$ etex) "framed(false)";
  82   newCircle.b(btex $b$ etex) "framed(false)";
  83   newCircle.c(btex $c$ etex) "framed(false)";
  84   newCircle.sum(btex $res$ etex) "framed(false)";
  85
  86   % Components
  87   newCircle.mul(btex - etex);
  88   newCircle.add(btex + etex);
  89
  90   a.c      - b.c   = (0cm, 2cm);
  91   b.c      - c.c   = (0cm, 2cm);
  92   add.c            = c.c + (2cm, 0cm);
  93   mul.c            = midpoint(a.c, b.c) + (2cm, 0cm);
  94   sum.c            = add.c + (2cm, 0cm);
  95   c.c              = origin;
  96
  97   % Draw objects and lines
  98   drawObj(a, b, c, mul, add, sum);
  99
 100   ncarc(a)(mul) "arcangle(15)";
 101   ncarc(b)(mul) "arcangle(-15)";
 102   ncline(c)(add);
 103   ncline(mul)(add);
 104   ncline(add)(sum);
 105 \stopuseMPgraphic
 106
 107 \placeexample[ex:MulSum]{\small{ALU} described in normal form}
 108   \startcombination[2*1]
 109     {\typebufferlam{MulSum}}{Description in normal form}
 110     {\boxedgraphic{MulSum}}{Described architecture}
 111   \stopcombination
 112
 113 \startbuffer[AddSubAlu]
 114 alu :: Bit -> Word -> Word -> Word
 115 alu = λopcode.λa.λb.
 116     let
 117       res1 = (+) a b
 118       res2 = (-) a b
 119       res = case op of
 120         Low -> res1
 121         High -> res2
 122     in
 123       res
 124 \stopbuffer
 125
 126 \startuseMPgraphic{AddSubAlu}
 127   save opcode, a, b, add, sub, mux, res;
 128
 129   % I/O ports
 130   newCircle.opcode(btex $opcode$ etex) "framed(false)";
 131   newCircle.a(btex $a$ etex) "framed(false)";
 132   newCircle.b(btex $b$ etex) "framed(false)";
 133   newCircle.res(btex $res$ etex) "framed(false)";
 134   % Components
 135   newCircle.add(btex + etex);
 136   newCircle.sub(btex - etex);
 137   newMux.mux;
 138
 139   opcode.c - a.c   = (0cm, 2cm);
 140   add.c    - a.c   = (4cm, 0cm);
 141   sub.c    - b.c   = (4cm, 0cm);
 142   a.c      - b.c   = (0cm, 3cm);
 143   mux.c            = midpoint(add.c, sub.c) + (1.5cm, 0cm);
 144   res.c    - mux.c = (1.5cm, 0cm);
 145   b.c              = origin;
 146
 147   % Draw objects and lines
 148   drawObj(opcode, a, b, res, add, sub, mux);
 149
 150   ncline(a)(add) "posA(e)";
 151   ncline(b)(sub) "posA(e)";
 152   nccurve(a)(sub) "posA(e)", "angleA(0)";
 153   nccurve(b)(add) "posA(e)", "angleA(0)";
 154   nccurve(add)(mux) "posB(inpa)", "angleB(0)";
 155   nccurve(sub)(mux) "posB(inpb)", "angleB(0)";
 156   nccurve(opcode)(mux) "posB(n)", "angleA(0)", "angleB(-90)";
 157   ncline(mux)(res) "posA(out)";
 158 \stopuseMPgraphic
 159
 160 \placeexample[ex:AddSubAlu]{\small{ALU} described in normal form}
 161   \startcombination[2*1]
 162     {\typebufferlam{AddSubAlu}}{Description in normal form}
 163     {\boxedgraphic{AddSubAlu}}{Described architecture}
 164   \stopcombination
 165
 166 \startlambda
 167   -- All arguments are an inital lambda
 168   λa.λd.λsp.
 169   -- There are nested let expressions at top level
 170   let
 171     -- Unpack the state by coercion
 172     s = sp :: (Word, Word)
 173     -- Extract both registers from the state
 174     r1 = case s of (fst, snd) -> fst
 175     r2 = case s of (fst, snd) -> snd
 176     -- Calling some other user-defined function.
 177     d' = foo d
 178     -- Conditional connections
 179     out = case a of
 180       High -> r1
 181       Low -> r2
 182     r1' = case a of
 183       High -> d
 184       Low -> r1
 185     r2' = case a of
 186       High -> r2
 187       Low -> d
 188     -- Packing a tuple
 189     s' = (,) r1' r2'
 190     -- Packing the state by coercion
 191     sp' = s' :: State (Word, Word)
 192     -- Pack our return value
 193     res = (,) sp' out
 194   in
 195     -- The actual result
 196     res
 197 \stoplambda
 198
 199 \subsection{Definitions}
 200 In the following sections, we will be using a number of functions and
 201 notations, which we will define here.
 202
 203 \subsubsection{Transformations}
 204 The most important notation is the one for transformation, which looks like
 205 the following:
 206
 207 \starttrans
 208 context conditions
 209 ~
 210 from
 211 ------------------------            expression conditions
 212 to
 213 ~
 214 context additions
 215 \stoptrans
 216
 217 Here, we describe a transformation. The most import parts are \lam{from} and
 218 \lam{to}, which describe the Core expresssion that should be matched and the
 219 expression that it should be replaced with. This matching can occur anywhere
 220 in function that is being normalized, so it applies to any subexpression as
 221 well.
 222
 223 The \lam{expression conditions} list a number of conditions on the \lam{from}
 224 expression that must hold for the transformation to apply.
 225
 226 Furthermore, there is some way to look into the environment (\eg, other top
 227 level bindings).  The \lam{context conditions} part specifies any number of
 228 top level bindings that must be present for the transformation to apply.
 229 Usually, this lists a top level binding that binds an identfier that is also
 230 used in the \lam{from} expression, allowing us to "access" the value of a top
 231 level binding in the \lam{to} expression (\eg, for inlining).
 232
 233 Finally, there is a way to influence the environment. The \lam{context
 234 additions} part lists any number of new top level bindings that should be
 235 added.
 236
 237 If there are no \lam{context conditions} or \lam{context additions}, they can
 238 be left out alltogether, along with the separator \lam{~}.
 239
 240 TODO: Example
 241
 242 \subsubsection{Other concepts}
 243 A \emph{global variable} is any variable that is bound at the
 244 top level of a program, or an external module. A local variable is any other
 245 variable (\eg, variables local to a function, which can be bound by lambda
 246 abstractions, let expressions and case expressions).
 247
 248 A \emph{hardware representable} type is a type that we can generate
 249 a signal for in hardware. For example, a bit, a vector of bits, a 32 bit
 250 unsigned word, etc. Types that are not runtime representable notably
 251 include (but are not limited to): Types, dictionaries, functions.
 252
 253 A \emph{builtin function} is a function for which a builtin
 254 hardware translation is available, because its actual definition is not
 255 translatable. A user-defined function is any other function.
 256
 257 \subsubsection{Functions}
 258 Here, we define a number of functions that can be used below to concisely
 259 specify conditions.
 260
 261 \emph{gvar(expr)} is true when \emph{expr} is a variable that references a
 262 global variable. It is false when it references a local variable.
 263
 264 \emph{lvar(expr)} is the inverse of \emph{gvar}; it is true when \emph{expr}
 265 references a local variable, false when it references a global variable.
 266
 267 \emph{representable(expr)} or \emph{representable(var)} is true when
 268 \emph{expr} or \emph{var} has a type that is representable at runtime.
 269
 270 \subsection{Normal form definition}
 271 We can describe this normal form in a slightly more formal manner. The
 272 following EBNF-like description completely captures the intended structure
 273 (and generates a subset of GHC's core format).
 274
 275 Some clauses have an expression listed in parentheses. These are conditions
 276 that need to apply to the clause.
 277
 278 \startlambda
 279 \italic{normal} = \italic{lambda}
 280 \italic{lambda} = λvar.\italic{lambda} (representable(var))
 281                 | \italic{toplet}
 282 \italic{toplet} = let \italic{binding} in \italic{toplet}
 283                 | letrec [\italic{binding}] in \italic{toplet}
 284                 | var (representable(varvar))
 285 \italic{binding} = var = \italic{rhs} (representable(rhs))
 286                  -- State packing and unpacking by coercion
 287                  | var0 = var1 :: State ty (lvar(var1))
 288                  | var0 = var1 :: ty (var0 :: State ty) (lvar(var1))
 289 \italic{rhs} = userapp
 290              | builtinapp
 291              -- Extractor case
 292              | case var of C a0 ... an -> ai (lvar(var))
 293              -- Selector case
 294              | case var of (lvar(var))
 295                 DEFAULT -> var0 (lvar(var0))
 296                 C w0 ... wn -> resvar (\forall{}i, wi \neq resvar, lvar(resvar))
 297 \italic{userapp} = \italic{userfunc}
 298                  | \italic{userapp} {userarg}
 299 \italic{userfunc} = var (gvar(var))
 300 \italic{userarg} = var (lvar(var))
 301 \italic{builtinapp} = \italic{builtinfunc}
 302                     | \italic{builtinapp} \italic{builtinarg}
 303 \italic{builtinfunc} = var (bvar(var))
 304 \italic{builtinarg} = \italic{coreexpr}
 305 \stoplambda
 306
 307 -- TODO: Limit builtinarg further
 308
 309 -- TODO: There can still be other casts around (which the code can handle,
 310 e.g., ignore), which still need to be documented here.
 311
 312 -- TODO: Note about the selector case. It just supports Bit and Bool
 313 currently, perhaps it should be generalized in the normal form?
 314
 315 When looking at such a program from a hardware perspective, the top level
 316 lambda's define the input ports. The value produced by the let expression is
 317 the output port. Most function applications bound by the let expression
 318 define a component instantiation, where the input and output ports are mapped
 319 to local signals or arguments. Some of the others use a builtin
 320 construction (\eg the \lam{case} statement) or call a builtin function
 321 (\eg \lam{add} or \lam{sub}). For these, a hardcoded VHDL translation is
 322 available.
 323
 324 \section{Transform passes}
 325 In this section we describe the actual transforms. Here we're using
 326 the core language in a notation that resembles lambda calculus.
 327
 328 Each of these transforms is meant to be applied to every (sub)expression
 329 in a program, for as long as it applies. Only when none of the
 330 transformations can be applied anymore, the program is in normal form (by
 331 definition). We hope to be able to prove that this form will obey all of the
 332 constraints defined above, but this has yet to happen (though it seems likely
 333 that it will).
 334
 335 Each of the transforms will be described informally first, explaining
 336 the need for and goal of the transform. Then, a formal definition is
 337 given, using a familiar syntax from the world of logic. Each transform
 338 is specified as a number of conditions (above the horizontal line) and a
 339 number of conclusions (below the horizontal line). The details of using
 340 this notation are still a bit fuzzy, so comments are welcom.
 341
 342 TODO: Formally describe the "apply to every (sub)expression" in terms of
 343 rules with full transformations in the conditions.
 344
 345 \subsection{η-abstraction}
 346 This transformation makes sure that all arguments of a function-typed
 347 expression are named, by introducing lambda expressions. When combined with
 348 β-reduction and function inlining below, all function-typed expressions should
 349 be lambda abstractions or global identifiers.
 350
 351 \starttrans
 352 E                 \lam{E :: * -> *}
 353 --------------    \lam{E} is not the first argument of an application.
 354 λx.E x            \lam{E} is not a lambda abstraction.
 355                   \lam{x} is a variable that does not occur free in \lam{E}.
 356 \stoptrans
 357
 358 \startbuffer[from]
 359 foo = λa.case a of
 360   True -> λb.mul b b
 361   False -> id
 362 \stopbuffer
 363
 364 \startbuffer[to]
 365 foo = λa.λx.(case a of
 366     True -> λb.mul b b
 367     False -> λy.id y) x
 368 \stopbuffer
 369
 370 \transexample{η-abstraction}{from}{to}
 371
 372 \subsection{Extended β-reduction}
 373 This transformation is meant to propagate application expressions downwards
 374 into expressions as far as possible. In lambda calculus, this reduction
 375 is known as β-reduction, but it is of course only defined for
 376 applications of lambda abstractions. We extend this reduction to also
 377 work for the rest of core (case and let expressions).
 378
 379 For let expressions:
 380 \starttrans
 381 let binds in E) M
 382 -----------------
 383 let binds in E M
 384 \stoptrans
 385
 386 For case statements:
 387 \starttrans
 388 (case x of
 389   p1 -> E1
 390   \vdots
 391   pn -> En) M
 392 -----------------
 393 case x of
 394   p1 -> E1 M
 395   \vdots
 396   pn -> En M
 397 \stoptrans
 398
 399 For lambda expressions:
 400 \starttrans
 401 (λx.E) M
 402 -----------------
 403 E[M/x]
 404 \stoptrans
 405
 406 % And an example
 407 \startbuffer[from]
 408 ( let a = (case x of
 409             True -> id
 410             False -> neg
 411           ) 1
 412       b = (let y = 3 in add y) 2
 413   in
 414     (λz.add 1 z)
 415 ) 3
 416 \stopbuffer
 417
 418 \startbuffer[to]
 419 let a = case x of
 420            True -> id 1
 421            False -> neg 1
 422     b = let y = 3 in add y 2
 423 in
 424   add 1 3
 425 \stopbuffer
 426
 427 \transexample{Extended β-reduction}{from}{to}
 428
 429 \subsection{Let derecursification}
 430 This transformation is meant to make lets non-recursive whenever possible.
 431 This might allow other optimizations to do their work better. TODO: Why is
 432 this needed exactly?
 433
 434 \subsection{Let flattening}
 435 This transformation puts nested lets in the same scope, by lifting the
 436 binding(s) of the inner let into a new let around the outer let. Eventually,
 437 this will cause all let bindings to appear in the same scope (they will all be
 438 in scope for the function return value).
 439
 440 Note that this transformation does not try to be smart when faced with
 441 recursive lets, it will just leave the lets recursive (possibly joining a
 442 recursive and non-recursive let into a single recursive let). The let
 443 rederursification transformation will do this instead.
 444
 445 \starttrans
 446 letnonrec x = (let bindings in M) in N
 447 ------------------------------------------
 448 let bindings in (letnonrec x = M) in N
 449 \stoptrans
 450
 451 \starttrans
 452 letrec
 453   \vdots
 454   x = (let bindings in M)
 455   \vdots
 456 in
 457   N
 458 ------------------------------------------
 459 letrec
 460   \vdots
 461   bindings
 462   x = M
 463   \vdots
 464 in
 465   N
 466 \stoptrans
 467
 468 \startbuffer[from]
 469 let
 470   a = letrec
 471     x = 1
 472     y = 2
 473   in
 474     x + y
 475 in
 476   letrec
 477     b = let c = 3 in a + c
 478     d = 4
 479   in
 480     d + b
 481 \stopbuffer
 482 \startbuffer[to]
 483 letrec
 484   x = 1
 485   y = 2
 486 in
 487   let
 488     a = x + y
 489   in
 490     letrec
 491       c = 3
 492       b = a + c
 493       d = 4
 494     in
 495       d + b
 496 \stopbuffer
 497
 498 \transexample{Let flattening}{from}{to}
 499
 500 \subsection{Empty let removal}
 501 This transformation is simple: It removes recursive lets that have no bindings
 502 (which usually occurs when let derecursification removes the last binding from
 503 it).
 504
 505 \starttrans
 506 letrec in M
 507 --------------
 508 M
 509 \stoptrans
 510
 511 \subsection{Simple let binding removal}
 512 This transformation inlines simple let bindings (\eg a = b).
 513
 514 This transformation is not needed to get into normal form, but makes the
 515 resulting VHDL a lot shorter.
 516
 517 \starttrans
 518 letnonrec
 519   a = b
 520 in
 521   M
 522 -----------------
 523 M[b/a]
 524 \stoptrans
 525
 526 \starttrans
 527 letrec
 528   \vdots
 529   a = b
 530   \vdots
 531 in
 532   M
 533 -----------------
 534 let
 535   \vdots [b/a]
 536   \vdots [b/a]
 537 in
 538   M[b/a]
 539 \stoptrans
 540
 541 \subsection{Unused let binding removal}
 542 This transformation removes let bindings that are never used. Usually,
 543 the desugarer introduces some unused let bindings.
 544
 545 This normalization pass should really be unneeded to get into normal form
 546 (since ununsed bindings are not forbidden by the normal form), but in practice
 547 the desugarer or simplifier emits some unused bindings that cannot be
 548 normalized (e.g., calls to a \type{PatError} (TODO: Check this name)). Also,
 549 this transformation makes the resulting VHDL a lot shorter.
 550
 551 \starttrans
 552 let a = E in M
 553 ----------------------------    \lam{a} does not occur free in \lam{M}
 554 M
 555 \stoptrans
 556
 557 \starttrans
 558 letrec
 559   \vdots
 560   a = E
 561   \vdots
 562 in
 563   M
 564 ----------------------------    \lam{a} does not occur free in \lam{M}
 565 letrec
 566   \vdots
 567   \vdots
 568 in
 569   M
 570 \stoptrans
 571
 572 \subsection{Non-representable binding inlining}
 573 This transform inlines let bindings that have a non-representable type. Since
 574 we can never generate a signal assignment for these bindings (we cannot
 575 declare a signal assignment with a non-representable type, for obvious
 576 reasons), we have no choice but to inline the binding to remove it.
 577
 578 If the binding is non-representable because it is a lambda abstraction, it is
 579 likely that it will inlined into an application and β-reduction will remove
 580 the lambda abstraction and turn it into a representable expression at the
 581 inline site. The same holds for partial applications, which can be turned into
 582 full applications by inlining.
 583
 584 Other cases of non-representable bindings we see in practice are primitive
 585 Haskell types. In most cases, these will not result in a valid normalized
 586 output, but then the input would have been invalid to start with. There is one
 587 exception to this: When a builtin function is applied to a non-representable
 588 expression, things might work out in some cases. For example, when you write a
 589 literal \hs{SizedInt} in Haskell, like \hs{1 :: SizedInt D8}, this results in
 590 the following core: \lam{fromInteger (smallInteger 10)}, where for example
 591 \lam{10 :: GHC.Prim.Int\#} and \lam{smallInteger 10 :: Integer} have
 592 non-representable types. TODO: This/these paragraph(s) should probably become a
 593 separate discussion somewhere else.
 594
 595 \starttrans
 596 letnonrec a = E in M
 597 --------------------------    \lam{E} has a non-representable type.
 598 M[E/a]
 599 \stoptrans
 600
 601 \starttrans
 602 letrec
 603   \vdots
 604   a = E
 605   \vdots
 606 in
 607   M
 608 --------------------------    \lam{E} has a non-representable type.
 609 letrec
 610   \vdots [E/a]
 611   \vdots [E/a]
 612 in
 613   M[E/a]
 614 \stoptrans
 615
 616 \startbuffer[from]
 617 letrec
 618   a = smallInteger 10
 619   inc = λa -> add a 1
 620   inc' = add 1
 621   x = fromInteger a
 622 in
 623   inc (inc' x)
 624 \stopbuffer
 625
 626 \startbuffer[to]
 627 letrec
 628   x = fromInteger (smallInteger 10)
 629 in
 630   (λa -> add a 1) (add 1 x)
 631 \stopbuffer
 632
 633 \transexample{Let flattening}{from}{to}
 634
 635 \subsection{Compiler generated top level binding inlining}
 636 TODO
 637
 638 \subsection{Scrutinee simplification}
 639 This transform ensures that the scrutinee of a case expression is always
 640 a simple variable reference.
 641
 642 \starttrans
 643 case E of
 644   alts
 645 -----------------        \lam{E} is not a local variable reference
 646 let x = E in
 647   case E of
 648     alts
 649 \stoptrans
 650
 651 \startbuffer[from]
 652 case (foo a) of
 653   True -> a
 654   False -> b
 655 \stopbuffer
 656
 657 \startbuffer[to]
 658 let x = foo a in
 659   case x of
 660     True -> a
 661     False -> b
 662 \stopbuffer
 663
 664 \transexample{Let flattening}{from}{to}
 665
 666
 667 \subsection{Case simplification}
 668 This transformation ensures that all case expressions become normal form. This
 669 means they will become one of:
 670 \startitemize
 671 \item An extractor case with a single alternative that picks a single field
 672 from a datatype, \eg \lam{case x of (a, b) -> a}.
 673 \item A selector case with multiple alternatives and only wild binders, that
 674 makes a choice between expressions based on the constructor of another
 675 expression, \eg \lam{case x of Low -> a; High -> b}.
 676 \stopitemize
 677
 678 \starttrans
 679 case E of
 680   C0 v0,0 ... v0,m -> E0
 681   \vdots
 682   Cn vn,0 ... vn,m -> En
 683 --------------------------------------------------- \forall i \forall j, 0 <= i <= n, 0 <= i < m (\lam{wi,j} is a wild (unused) binder)
 684 letnonrec
 685   v0,0 = case x of C0 v0,0 .. v0,m -> v0,0
 686   \vdots
 687   v0,m = case x of C0 v0,0 .. v0,m -> v0,m
 688   x0 = E0
 689   \dots
 690   vn,m = case x of Cn vn,0 .. vn,m -> vn,m
 691   xn = En
 692 in
 693   case E of
 694     C0 w0,0 ... w0,m -> x0
 695     \vdots
 696     Cn wn,0 ... wn,m -> xn
 697 \stoptrans
 698
 699 TODO: This transformation specified like this is complicated and misses
 700 conditions to prevent looping with itself. Perhaps we should split it here for
 701 discussion?
 702
 703 \startbuffer[from]
 704 case a of
 705   True -> add b 1
 706   False -> add b 2
 707 \stopbuffer
 708
 709 \startbuffer[to]
 710 letnonrec
 711   x0 = add b 1
 712   x1 = add b 2
 713 in
 714   case a of
 715     True -> x0
 716     False -> x1
 717 \stopbuffer
 718
 719 \transexample{Selector case simplification}{from}{to}
 720
 721 \startbuffer[from]
 722 case a of
 723   (,) b c -> add b c
 724 \stopbuffer
 725 \startbuffer[to]
 726 letnonrec
 727   b = case a of (,) b c -> b
 728   c = case a of (,) b c -> c
 729   x0 = add b c
 730 in
 731   case a of
 732     (,) w0 w1 -> x0
 733 \stopbuffer
 734
 735 \transexample{Extractor case simplification}{from}{to}
 736
 737 \subsection{Case removal}
 738 This transform removes any case statements with a single alternative and
 739 only wild binders.
 740
 741 These "useless" case statements are usually leftovers from case simplification
 742 on extractor case (see the previous example).
 743
 744 \starttrans
 745 case x of
 746   C v0 ... vm -> E
 747 ----------------------     \lam{\forall i, 0 <= i <= m} (\lam{vi} does not occur free in E)
 748 E
 749 \stoptrans
 750
 751 \startbuffer[from]
 752 case a of
 753   (,) w0 w1 -> x0
 754 \stopbuffer
 755
 756 \startbuffer[to]
 757 x0
 758 \stopbuffer
 759
 760 \transexample{Case removal}{from}{to}
 761
 762 \subsection{Argument simplification}
 763 The transforms in this section deal with simplifying application
 764 arguments into normal form. The goal here is to:
 765
 766 \startitemize
 767  \item Make all arguments of user-defined functions (\eg, of which
 768  we have a function body) simple variable references of a runtime
 769  representable type. This is needed, since these applications will be turned
 770  into component instantiations.
 771  \item Make all arguments of builtin functions one of:
 772    \startitemize
 773     \item A type argument.
 774     \item A dictionary argument.
 775     \item A type level expression.
 776     \item A variable reference of a runtime representable type.
 777     \item A variable reference or partial application of a function type.
 778    \stopitemize
 779 \stopitemize
 780
 781 When looking at the arguments of a user-defined function, we can
 782 divide them into two categories:
 783 \startitemize
 784   \item Arguments of a runtime representable type (\eg bits or vectors).
 785
 786         These arguments can be preserved in the program, since they can
 787         be translated to input ports later on.  However, since we can
 788         only connect signals to input ports, these arguments must be
 789         reduced to simple variables (for which signals will be
 790         produced). This is taken care of by the argument extraction
 791         transform.
 792   \item Non-runtime representable typed arguments.
 793
 794         These arguments cannot be preserved in the program, since we
 795         cannot represent them as input or output ports in the resulting
 796         VHDL. To remove them, we create a specialized version of the
 797         called function with these arguments filled in. This is done by
 798         the argument propagation transform.
 799
 800         Typically, these arguments are type and dictionary arguments that are
 801         used to make functions polymorphic. By propagating these arguments, we
 802         are essentially doing the same which GHC does when it specializes
 803         functions: Creating multiple variants of the same function, one for
 804         each type for which it is used. Other common non-representable
 805         arguments are functions, e.g. when calling a higher order function
 806         with another function or a lambda abstraction as an argument.
 807
 808         The reason for doing this is similar to the reasoning provided for
 809         the inlining of non-representable let bindings above. In fact, this
 810         argument propagation could be viewed as a form of cross-function
 811         inlining.
 812 \stopitemize
 813
 814 TODO: Check the following itemization.
 815
 816 When looking at the arguments of a builtin function, we can divide them
 817 into categories:
 818
 819 \startitemize
 820   \item Arguments of a runtime representable type.
 821
 822         As we have seen with user-defined functions, these arguments can
 823         always be reduced to a simple variable reference, by the
 824         argument extraction transform. Performing this transform for
 825         builtin functions as well, means that the translation of builtin
 826         functions can be limited to signal references, instead of
 827         needing to support all possible expressions.
 828
 829   \item Arguments of a function type.
 830
 831         These arguments are functions passed to higher order builtins,
 832         like \lam{map} and \lam{foldl}. Since implementing these
 833         functions for arbitrary function-typed expressions (\eg, lambda
 834         expressions) is rather comlex, we reduce these arguments to
 835         (partial applications of) global functions.
 836
 837         We can still support arbitrary expressions from the user code,
 838         by creating a new global function containing that expression.
 839         This way, we can simply replace the argument with a reference to
 840         that new function. However, since the expression can contain any
 841         number of free variables we also have to include partial
 842         applications in our normal form.
 843
 844         This category of arguments is handled by the function extraction
 845         transform.
 846   \item Other unrepresentable arguments.
 847
 848         These arguments can take a few different forms:
 849         \startdesc{Type arguments}
 850           In the core language, type arguments can only take a single
 851           form: A type wrapped in the Type constructor. Also, there is
 852           nothing that can be done with type expressions, except for
 853           applying functions to them, so we can simply leave type
 854           arguments as they are.
 855         \stopdesc
 856         \startdesc{Dictionary arguments}
 857           In the core language, dictionary arguments are used to find
 858           operations operating on one of the type arguments (mostly for
 859           finding class methods). Since we will not actually evaluatie
 860           the function body for builtin functions and can generate
 861           code for builtin functions by just looking at the type
 862           arguments, these arguments can be ignored and left as they
 863           are.
 864         \stopdesc
 865         \startdesc{Type level arguments}
 866           Sometimes, we want to pass a value to a builtin function, but
 867           we need to know the value at compile time. Additionally, the
 868           value has an impact on the type of the function. This is
 869           encoded using type-level values, where the actual value of the
 870           argument is not important, but the type encodes some integer,
 871           for example. Since the value is not important, the actual form
 872           of the expression does not matter either and we can leave
 873           these arguments as they are.
 874         \stopdesc
 875         \startdesc{Other arguments}
 876           Technically, there is still a wide array of arguments that can
 877           be passed, but does not fall into any of the above categories.
 878           However, none of the supported builtin functions requires such
 879           an argument. This leaves use with passing unsupported types to
 880           a function, such as calling \lam{head} on a list of functions.
 881
 882           In these cases, it would be impossible to generate hardware
 883           for such a function call anyway, so we can ignore these
 884           arguments.
 885
 886           The only way to generate hardware for builtin functions with
 887           arguments like these, is to expand the function call into an
 888           equivalent core expression (\eg, expand map into a series of
 889           function applications). But for now, we choose to simply not
 890           support expressions like these.
 891         \stopdesc
 892
 893         From the above, we can conclude that we can simply ignore these
 894         other unrepresentable arguments and focus on the first two
 895         categories instead.
 896 \stopitemize
 897
 898 \subsubsection{Argument simplification}
 899 This transform deals with arguments to functions that
 900 are of a runtime representable type. It ensures that they will all become
 901 references to global variables, or local signals in the resulting VHDL.
 902
 903 TODO: It seems we can map an expression to a port, not only a signal.
 904 Perhaps this makes this transformation not needed?
 905 TODO: Say something about dataconstructors (without arguments, like True
 906 or False), which are variable references of a runtime representable
 907 type, but do not result in a signal.
 908
 909 To reduce a complex expression to a simple variable reference, we create
 910 a new let expression around the application, which binds the complex
 911 expression to a new variable. The original function is then applied to
 912 this variable.
 913
 914 \starttrans
 915 M N
 916 --------------------    \lam{N} is of a representable type
 917 let x = N in M x        \lam{N} is not a local variable reference
 918 \stoptrans
 919
 920 \startbuffer[from]
 921 add (add a 1) 1
 922 \stopbuffer
 923
 924 \startbuffer[to]
 925 let x = add a 1 in add x 1
 926 \stopbuffer
 927
 928 \transexample{Argument extraction}{from}{to}
 929
 930 \subsubsection{Function extraction}
 931 This transform deals with function-typed arguments to builtin functions.
 932 Since these arguments cannot be propagated, we choose to extract them
 933 into a new global function instead.
 934
 935 Any free variables occuring in the extracted arguments will become
 936 parameters to the new global function. The original argument is replaced
 937 with a reference to the new function, applied to any free variables from
 938 the original argument.
 939
 940 This transformation is useful when applying higher order builtin functions
 941 like \hs{map} to a lambda abstraction, for example. In this case, the code
 942 that generates VHDL for \hs{map} only needs to handle top level functions and
 943 partial applications, not any other expression (such as lambda abstractions or
 944 even more complicated expressions).
 945
 946 \starttrans
 947 M N                     \lam{M} is a (partial aplication of a) builtin function.
 948 ---------------------   \lam{f0 ... fn} = free local variables of \lam{N}
 949 M x f0 ... fn           \lam{N :: a -> b}
 950 ~                       \lam{N} is not a (partial application of) a top level function
 951 x = λf0 ... λfn.N
 952 \stoptrans
 953
 954 \startbuffer[from]
 955 map (λa . add a b) xs
 956
 957 map (add b) ys
 958 \stopbuffer
 959
 960 \startbuffer[to]
 961 x0 = λb.λa.add a b
 962 ~
 963 map x0 xs
 964
 965 x1 = λb.add b
 966 map x1 ys
 967 \stopbuffer
 968
 969 \transexample{Function extraction}{from}{to}
 970
 971 \subsubsection{Argument propagation}
 972 This transform deals with arguments to user-defined functions that are
 973 not representable at runtime. This means these arguments cannot be
 974 preserved in the final form and most be {\em propagated}.
 975
 976 Propagation means to create a specialized version of the called
 977 function, with the propagated argument already filled in. As a simple
 978 example, in the following program:
 979
 980 \startlambda
 981 f = λa.λb.a + b
 982 inc = λa.f a 1
 983 \stoplambda
 984
 985 we could {\em propagate} the constant argument 1, with the following
 986 result:
 987
 988 \startlambda
 989 f' = λa.a + 1
 990 inc = λa.f' a
 991 \stoplambda
 992
 993 Special care must be taken when the to-be-propagated expression has any
 994 free variables. If this is the case, the original argument should not be
 995 removed alltogether, but replaced by all the free variables of the
 996 expression. In this way, the original expression can still be evaluated
 997 inside the new function. Also, this brings us closer to our goal: All
 998 these free variables will be simple variable references.
 999
1000 To prevent us from propagating the same argument over and over, a simple
1001 local variable reference is not propagated (since is has exactly one
1002 free variable, itself, we would only replace that argument with itself).
1003
1004 This shows that any free local variables that are not runtime representable
1005 cannot be brought into normal form by this transform. We rely on an
1006 inlining transformation to replace such a variable with an expression we
1007 can propagate again.
1008
1009 \starttrans
1010 x = E
1011 ~
1012 x Y0 ... Yi ... Yn                               \lam{Yi} is not of a runtime representable type
1013 ---------------------------------------------    \lam{Yi} is not a local variable reference
1014 x' y0 ... yi-1 f0 ...  fm Yi+1 ... Yn            \lam{f0 ... fm} = free local vars of \lam{Yi}
1015 ~
1016 x' = λy0 ... yi-1 f0 ... fm yi+1 ... yn .
1017       E y0 ... yi-1 Yi yi+1 ... yn
1018
1019 \stoptrans
1020
1021 TODO: Example
1022
1023 \subsection{Cast propagation / simplification}
1024 This transform pushes casts down into the expression as far as possible. Since
1025 its exact role and need is not clear yet, this transformation is not yet
1026 specified.
1027
1028 \subsection{Return value simplification}
1029 This transformation ensures that the return value of a function is always a
1030 simple local variable reference.
1031
1032 Currently implemented using lambda simplification, let simplification, and
1033 top simplification. Should change into something like the following, which
1034 works only on the result of a function instead of any subexpression. This is
1035 achieved by the contexts, like \lam{x = E}, though this is strictly not
1036 correct (you could read this as "if there is any function \lam{x} that binds
1037 \lam{E}, any \lam{E} can be transformed, while we only mean the \lam{E} that
1038 is bound by \lam{x}. This might need some extra notes or something).
1039
1040 \starttrans
1041 x = E                            \lam{E} is representable
1042 ~                                \lam{E} is not a lambda abstraction
1043 E                                \lam{E} is not a let expression
1044 ---------------------------      \lam{E} is not a local variable reference
1045 let x = E in x
1046 \stoptrans
1047
1048 \starttrans
1049 x = λv0 ... λvn.E
1050 ~                                \lam{E} is representable
1051 E                                \lam{E} is not a let expression
1052 ---------------------------      \lam{E} is not a local variable reference
1053 let x = E in x
1054 \stoptrans
1055
1056 \starttrans
1057 x = λv0 ... λvn.let ... in E
1058 ~                                \lam{E} is representable
1059 E                                \lam{E} is not a local variable reference
1060 ---------------------------
1061 let x = E in x
1062 \stoptrans
1063
1064 \startbuffer[from]
1065 x = add 1 2
1066 \stopbuffer
1067
1068 \startbuffer[to]
1069 x = let x = add 1 2 in x
1070 \stopbuffer
1071
1072 \transexample{Return value simplification}{from}{to}