X-Git-Url: https://git.stderr.nl/gitweb?p=matthijs%2Fmaster-project%2Freport.git;a=blobdiff_plain;f=Chapters%2FNormalization.tex;h=4b3a08c821404729476f02aefdba5af96c0c1585;hp=6c9b43470dee4a4b714b08bdad4a0b5fcd23182f;hb=eca9391c2835722ba61218e5a0c13d25872f0a03;hpb=dbbb3487185b8f524a18b315094f50e6c3a42c2b diff --git a/Chapters/Normalization.tex b/Chapters/Normalization.tex index 6c9b434..4b3a08c 100644 --- a/Chapters/Normalization.tex +++ b/Chapters/Normalization.tex @@ -1,4 +1,4 @@ -\chapter{Normalization} +\chapter[chap:normalization]{Normalization} % A helper to print a single example in the half the page width. The example % text should be in a buffer whose name is given in an argument. @@ -7,7 +7,7 @@ % will end up on a single line. The strut=no option prevents a bunch of empty % space at the start of the frame. \define[1]\example{ - \framed[offset=1mm,align=right,strut=no]{ + \framed[offset=1mm,align=right,strut=no,background=box,frame=off]{ \setuptyping[option=LAM,style=sans,before=,after=] \typebuffer[#1] \setuptyping[option=none,style=\tttf] @@ -52,13 +52,116 @@ while fully preserving the semantics of the program. This {\em normal form} is again a Core program, but with a very specific structure. A function in normal form has nested lambda's at the top, which -produce a let expression. This let expression binds every function application -in the function and produces a simple identifier. Every bound value in -the let expression is either a simple function application or a case -expression to extract a single element from a tuple returned by a -function. +produce a number of nested let expressions. These let expressions binds a +number of simple expressions in the function and produces a simple identifier. +Every bound value in the let expression is either a simple function +application, a case expression to extract a single element from a tuple +returned by a function or a case expression to choose between two signals +based on some other signal. + +This structure is easy to translate to VHDL, since each top level lambda will +be an input port, every bound value will become a concurrent statement (such +as a component instantiation or conditional signal assignment) and the result +variable will become the output port. + +\startbuffer[MulSum] +alu :: Bit -> Word -> Word -> Word +alu = λa.λb.λc. + let + mul = (*) a b + sum = (+) mul c + in + sum +\stopbuffer + +\startuseMPgraphic{MulSum} + save a, b, c, mul, add, sum; + + % I/O ports + newCircle.a(btex $a$ etex) "framed(false)"; + newCircle.b(btex $b$ etex) "framed(false)"; + newCircle.c(btex $c$ etex) "framed(false)"; + newCircle.sum(btex $res$ etex) "framed(false)"; + + % Components + newCircle.mul(btex - etex); + newCircle.add(btex + etex); + + a.c - b.c = (0cm, 2cm); + b.c - c.c = (0cm, 2cm); + add.c = c.c + (2cm, 0cm); + mul.c = midpoint(a.c, b.c) + (2cm, 0cm); + sum.c = add.c + (2cm, 0cm); + c.c = origin; + + % Draw objects and lines + drawObj(a, b, c, mul, add, sum); + + ncarc(a)(mul) "arcangle(15)"; + ncarc(b)(mul) "arcangle(-15)"; + ncline(c)(add); + ncline(mul)(add); + ncline(add)(sum); +\stopuseMPgraphic + +\placeexample[ex:MulSum]{\small{ALU} described in normal form} + \startcombination[2*1] + {\typebufferlam{MulSum}}{Description in normal form} + {\boxedgraphic{MulSum}}{Described architecture} + \stopcombination -An example of a program in canonical form would be: +\startbuffer[AddSubAlu] +alu :: Bit -> Word -> Word -> Word +alu = λopcode.λa.λb. + let + res1 = (+) a b + res2 = (-) a b + res = case op of + Low -> res1 + High -> res2 + in + res +\stopbuffer + +\startuseMPgraphic{AddSubAlu} + save opcode, a, b, add, sub, mux, res; + + % I/O ports + newCircle.opcode(btex $opcode$ etex) "framed(false)"; + newCircle.a(btex $a$ etex) "framed(false)"; + newCircle.b(btex $b$ etex) "framed(false)"; + newCircle.res(btex $res$ etex) "framed(false)"; + % Components + newCircle.add(btex + etex); + newCircle.sub(btex - etex); + newMux.mux; + + opcode.c - a.c = (0cm, 2cm); + add.c - a.c = (4cm, 0cm); + sub.c - b.c = (4cm, 0cm); + a.c - b.c = (0cm, 3cm); + mux.c = midpoint(add.c, sub.c) + (1.5cm, 0cm); + res.c - mux.c = (1.5cm, 0cm); + b.c = origin; + + % Draw objects and lines + drawObj(opcode, a, b, res, add, sub, mux); + + ncline(a)(add) "posA(e)"; + ncline(b)(sub) "posA(e)"; + nccurve(a)(sub) "posA(e)", "angleA(0)"; + nccurve(b)(add) "posA(e)", "angleA(0)"; + nccurve(add)(mux) "posB(inpa)", "angleB(0)"; + nccurve(sub)(mux) "posB(inpb)", "angleB(0)"; + nccurve(opcode)(mux) "posB(n)", "angleA(0)", "angleB(-90)"; + ncline(mux)(res) "posA(out)"; +\stopuseMPgraphic + +\placeexample[ex:AddSubAlu]{\small{ALU} described in normal form} + \startcombination[2*1] + {\typebufferlam{AddSubAlu}}{Description in normal form} + {\boxedgraphic{AddSubAlu}}{Described architecture} + \stopcombination \startlambda -- All arguments are an inital lambda @@ -93,36 +196,114 @@ An example of a program in canonical form would be: res \stoplambda +\subsection{Definitions} +In the following sections, we will be using a number of functions and +notations, which we will define here. + +\subsubsection{Transformations} +The most important notation is the one for transformation, which looks like +the following: + +\starttrans +context conditions +~ +from +------------------------ expression conditions +to +~ +context additions +\stoptrans + +Here, we describe a transformation. The most import parts are \lam{from} and +\lam{to}, which describe the Core expresssion that should be matched and the +expression that it should be replaced with. This matching can occur anywhere +in function that is being normalized, so it applies to any subexpression as +well. + +The \lam{expression conditions} list a number of conditions on the \lam{from} +expression that must hold for the transformation to apply. + +Furthermore, there is some way to look into the environment (\eg, other top +level bindings). The \lam{context conditions} part specifies any number of +top level bindings that must be present for the transformation to apply. +Usually, this lists a top level binding that binds an identfier that is also +used in the \lam{from} expression, allowing us to "access" the value of a top +level binding in the \lam{to} expression (\eg, for inlining). + +Finally, there is a way to influence the environment. The \lam{context +additions} part lists any number of new top level bindings that should be +added. + +If there are no \lam{context conditions} or \lam{context additions}, they can +be left out alltogether, along with the separator \lam{~}. + +TODO: Example + +\subsubsection{Other concepts} +A \emph{global variable} is any variable that is bound at the +top level of a program, or an external module. A local variable is any other +variable (\eg, variables local to a function, which can be bound by lambda +abstractions, let expressions and case expressions). + +A \emph{hardware representable} type is a type that we can generate +a signal for in hardware. For example, a bit, a vector of bits, a 32 bit +unsigned word, etc. Types that are not runtime representable notably +include (but are not limited to): Types, dictionaries, functions. + +A \emph{builtin function} is a function for which a builtin +hardware translation is available, because its actual definition is not +translatable. A user-defined function is any other function. + +\subsubsection{Functions} +Here, we define a number of functions that can be used below to concisely +specify conditions. + +\emph{gvar(expr)} is true when \emph{expr} is a variable that references a +global variable. It is false when it references a local variable. + +\emph{lvar(expr)} is the inverse of \emph{gvar}; it is true when \emph{expr} +references a local variable, false when it references a global variable. + +\emph{representable(expr)} or \emph{representable(var)} is true when +\emph{expr} or \emph{var} has a type that is representable at runtime. + +\subsection{Normal form definition} +We can describe this normal form in a slightly more formal manner. The +following EBNF-like description completely captures the intended structure +(and generates a subset of GHC's core format). + +Some clauses have an expression listed in parentheses. These are conditions +that need to apply to the clause. + \startlambda \italic{normal} = \italic{lambda} -\italic{lambda} = λvar.\italic{lambda} (representable(typeof(var))) +\italic{lambda} = λvar.\italic{lambda} (representable(var)) | \italic{toplet} \italic{toplet} = let \italic{binding} in \italic{toplet} | letrec [\italic{binding}] in \italic{toplet} - | var (representable(typeof(var)), fvar(var)) -\italic{binding} = var = \italic{rhs} (representable(typeof(rhs))) + | var (representable(varvar)) +\italic{binding} = var = \italic{rhs} (representable(rhs)) -- State packing and unpacking by coercion - | var0 = var1 :: State ty (fvar(var1)) - | var0 = var1 :: ty (var0 :: State ty) (fvar(var1)) + | var0 = var1 :: State ty (lvar(var1)) + | var0 = var1 :: ty (var0 :: State ty) (lvar(var1)) \italic{rhs} = userapp | builtinapp -- Extractor case - | case var of C a0 ... an -> ai (fvar(var)) + | case var of C a0 ... an -> ai (lvar(var)) -- Selector case - | case var of (fvar(var)) - DEFAULT -> var0 (fvar(var0)) - C w0 ... wn -> resvar (\forall{}i, wi \neq resvar, fvar(resvar)) + | case var of (lvar(var)) + DEFAULT -> var0 (lvar(var0)) + C w0 ... wn -> resvar (\forall{}i, wi \neq resvar, lvar(resvar)) \italic{userapp} = \italic{userfunc} | \italic{userapp} {userarg} -\italic{userfunc} = var (tvar(var)) -\italic{userarg} = var (fvar(var)) +\italic{userfunc} = var (gvar(var)) +\italic{userarg} = var (lvar(var)) \italic{builtinapp} = \italic{builtinfunc} | \italic{builtinapp} \italic{builtinarg} \italic{builtinfunc} = var (bvar(var)) \italic{builtinarg} = \italic{coreexpr} \stoplambda --- TODO: Define tvar, fvar, typeof, representable -- TODO: Limit builtinarg further -- TODO: There can still be other casts around (which the code can handle, @@ -140,62 +321,16 @@ construction (\eg the \lam{case} statement) or call a builtin function (\eg \lam{add} or \lam{sub}). For these, a hardcoded VHDL translation is available. -\subsection{Normal definition} -Formally, the normal form is a core program obeying the following -constraints. TODO: Update this section, this is probably not completely -accurate or relevant anymore. - -\startitemize[R,inmargin] -%\item All top level binds must have the form $\expr{\bind{fun}{lamexpr}}$. -%$fun$ is an identifier that will be bound as a global identifier. -%\item A $lamexpr$ has the form $\expr{\lam{arg}{lamexpr}}$ or -%$\expr{letexpr}$. $arg$ is an identifier which will be bound as an $argument$. -%\item[letexpr] A $letexpr$ has the form $\expr{\letexpr{letbinds}{retexpr}}$. -%\item $letbinds$ is a list with elements of the form -%$\expr{\bind{res}{appexpr}}$ or $\expr{\bind{res}{builtinexpr}}$, where $res$ is -%an identifier that will be bound as local identifier. The type of the bound -%value must be a $hardware\;type$. -%\item[builtinexpr] A $builtinexpr$ is an expression that can be mapped to an -%equivalent VHDL expression. Since there are many supported forms for this, -%these are defined in a separate table. -%\item An $appexpr$ has the form $\expr{fun}$ or $\expr{\app{appexpr}{x}}$, -%where $fun$ is a global identifier and $x$ is a local identifier. -%\item[retexpr] A $retexpr$ has the form $\expr{x}$ or $\expr{tupexpr}$, where $x$ is a local identifier that is bound as an $argument$ or $result$. A $retexpr$ must -%be of a $hardware\;type$. -%\item A $tupexpr$ has the form $\expr{con}$ or $\expr{\app{tupexpr}{x}}$, -%where $con$ is a tuple constructor ({\em e.g.} $(,)$ or $(,,,)$) and $x$ is -%a local identifier. -%\item A $hardware\;type$ is a type that can be directly translated to -%hardware. This includes the types $Bit$, $SizedWord$, tuples containing -%elements of $hardware\;type$s, and will include others. This explicitely -%excludes function types. -\stopitemize - -TODO: Say something about uniqueness of identifiers - -\subsection{Builtin expressions} -A $builtinexpr$, as defined at \in[builtinexpr] can have any of the following forms. - -\startitemize[m,inmargin] -%\item -%$tuple\_extract=\expr{\case{t}{\alt{\app{con}{x_0\;x_1\;..\;x_n}}{x_i}}}$, -%where $t$ can be any local identifier, $con$ is a tuple constructor ({\em -%e.g.} $(,)$ or $(,,,)$), $x_0$ to $x_n$ can be any identifier, and $x_i$ can -%be any of $x_0$ to $x_n$. A case expression must have a $hardware\;type$. -%\item TODO: Many more! -\stopitemize - \section{Transform passes} - In this section we describe the actual transforms. Here we're using the core language in a notation that resembles lambda calculus. Each of these transforms is meant to be applied to every (sub)expression in a program, for as long as it applies. Only when none of the -expressions can be applied anymore, the program is in normal form. We -hope to be able to prove that this form will obey all of the constraints -defined above, but this has yet to happen (though it seems likely that -it will). +transformations can be applied anymore, the program is in normal form (by +definition). We hope to be able to prove that this form will obey all of the +constraints defined above, but this has yet to happen (though it seems likely +that it will). Each of the transforms will be described informally first, explaining the need for and goal of the transform. Then, a formal definition is @@ -221,13 +356,13 @@ E \lam{E :: * -> *} \stoptrans \startbuffer[from] -foo = λa -> case a of +foo = λa.case a of True -> λb.mul b b False -> id \stopbuffer \startbuffer[to] -foo = λa.λx -> (case a of +foo = λa.λx.(case a of True -> λb.mul b b False -> λy.id y) x \stopbuffer @@ -646,7 +781,7 @@ arguments into normal form. The goal here is to: When looking at the arguments of a user-defined function, we can divide them into two categories: \startitemize - \item Arguments with a runtime representable type (\eg bits or vectors). + \item Arguments of a runtime representable type (\eg bits or vectors). These arguments can be preserved in the program, since they can be translated to input ports later on. However, since we can @@ -682,7 +817,7 @@ When looking at the arguments of a builtin function, we can divide them into categories: \startitemize - \item Arguments with a runtime representable type. + \item Arguments of a runtime representable type. As we have seen with user-defined functions, these arguments can always be reduced to a simple variable reference, by the @@ -691,7 +826,7 @@ into categories: functions can be limited to signal references, instead of needing to support all possible expressions. - \item Arguments with a function type. + \item Arguments of a function type. These arguments are functions passed to higher order builtins, like \lam{map} and \lam{foldl}. Since implementing these @@ -871,20 +1006,6 @@ cannot be brought into normal form by this transform. We rely on an inlining transformation to replace such a variable with an expression we can propagate again. -TODO: Move these definitions somewhere sensible. - -Definition: A global variable is any variable that is bound at the -top level of a program. A local variable is any other variable. - -Definition: A hardware representable type is a type that we can generate -a signal for in hardware. For example, a bit, a vector of bits, a 32 bit -unsigned word, etc. Types that are not runtime representable notably -include (but are not limited to): Types, dictionaries, functions. - -Definition: A builtin function is a function for which a builtin -hardware translation is available, because its actual definition is not -translatable. A user-defined function is any other function. - \starttrans x = E ~ @@ -949,247 +1070,3 @@ x = let x = add 1 2 in x \stopbuffer \transexample{Return value simplification}{from}{to} - -\subsection{Example sequence} - -This section lists an example expression, with a sequence of transforms -applied to it. The exact transforms given here probably don't exactly -match the transforms given above anymore, but perhaps this can clarify -the big picture a bit. - -TODO: Update or remove this section. - -\startlambda - λx. - let s = foo x - in - case s of - (a, b) -> - case a of - High -> add - Low -> let - op' = case b of - High -> sub - Low -> λc.λd.c - in - λc.λd.op' d c -\stoplambda - -After top-level η-abstraction: - -\startlambda - λx.λc.λd. - (let s = foo x - in - case s of - (a, b) -> - case a of - High -> add - Low -> let - op' = case b of - High -> sub - Low -> λc.λd.c - in - λc.λd.op' d c - ) c d -\stoplambda - -After (extended) β-reduction: - -\startlambda - λx.λc.λd. - let s = foo x - in - case s of - (a, b) -> - case a of - High -> add c d - Low -> let - op' = case b of - High -> sub - Low -> λc.λd.c - in - op' d c -\stoplambda - -After return value extraction: - -\startlambda - λx.λc.λd. - let s = foo x - r = case s of - (a, b) -> - case a of - High -> add c d - Low -> let - op' = case b of - High -> sub - Low -> λc.λd.c - in - op' d c - in - r -\stoplambda - -Scrutinee simplification does not apply. - -After case binder wildening: - -\startlambda - λx.λc.λd. - let s = foo x - a = case s of (a, _) -> a - b = case s of (_, b) -> b - r = case s of (_, _) -> - case a of - High -> add c d - Low -> let op' = case b of - High -> sub - Low -> λc.λd.c - in - op' d c - in - r -\stoplambda - -After case value simplification - -\startlambda - λx.λc.λd. - let s = foo x - a = case s of (a, _) -> a - b = case s of (_, b) -> b - r = case s of (_, _) -> r' - rh = add c d - rl = let rll = λc.λd.c - op' = case b of - High -> sub - Low -> rll - in - op' d c - r' = case a of - High -> rh - Low -> rl - in - r -\stoplambda - -After let flattening: - -\startlambda - λx.λc.λd. - let s = foo x - a = case s of (a, _) -> a - b = case s of (_, b) -> b - r = case s of (_, _) -> r' - rh = add c d - rl = op' d c - rll = λc.λd.c - op' = case b of - High -> sub - Low -> rll - r' = case a of - High -> rh - Low -> rl - in - r -\stoplambda - -After function inlining: - -\startlambda - λx.λc.λd. - let s = foo x - a = case s of (a, _) -> a - b = case s of (_, b) -> b - r = case s of (_, _) -> r' - rh = add c d - rl = (case b of - High -> sub - Low -> λc.λd.c) d c - r' = case a of - High -> rh - Low -> rl - in - r -\stoplambda - -After (extended) β-reduction again: - -\startlambda - λx.λc.λd. - let s = foo x - a = case s of (a, _) -> a - b = case s of (_, b) -> b - r = case s of (_, _) -> r' - rh = add c d - rl = case b of - High -> sub d c - Low -> d - r' = case a of - High -> rh - Low -> rl - in - r -\stoplambda - -After case value simplification again: - -\startlambda - λx.λc.λd. - let s = foo x - a = case s of (a, _) -> a - b = case s of (_, b) -> b - r = case s of (_, _) -> r' - rh = add c d - rlh = sub d c - rl = case b of - High -> rlh - Low -> d - r' = case a of - High -> rh - Low -> rl - in - r -\stoplambda - -After case removal: - -\startlambda - λx.λc.λd. - let s = foo x - a = case s of (a, _) -> a - b = case s of (_, b) -> b - r = r' - rh = add c d - rlh = sub d c - rl = case b of - High -> rlh - Low -> d - r' = case a of - High -> rh - Low -> rl - in - r -\stoplambda - -After let bind removal: - -\startlambda - λx.λc.λd. - let s = foo x - a = case s of (a, _) -> a - b = case s of (_, b) -> b - rh = add c d - rlh = sub d c - rl = case b of - High -> rlh - Low -> d - r' = case a of - High -> rh - Low -> rl - in - r' -\stoplambda - -Application simplification is not applicable.