From: Matthijs Kooijman Date: Thu, 27 Aug 2009 11:17:32 +0000 (+0200) Subject: Improve subscript handling in pret-lam. X-Git-Tag: final-thesis~282 X-Git-Url: https://git.stderr.nl/gitweb?a=commitdiff_plain;h=1864c65fe74e332c5aca3ccb9878b98aa1aa93e7;p=matthijs%2Fmaster-project%2Freport.git Improve subscript handling in pret-lam. The handling is now a bit more strict when automatically detecting multiple character subscripts, to prevent things like "return" from being turned into "r_eturn". --- diff --git a/pret-lam.lua b/pret-lam.lua index aff6988..ea07811 100644 --- a/pret-lam.lua +++ b/pret-lam.lua @@ -63,7 +63,7 @@ end local function take_word(str) -- A word must always start with a-z (in particular, λ is not a valid -- start of a word). - res, newstr = utf.match(str, "^([a-zA-Z][%a%d_]+)(.*)") + res, newstr = utf.match(str, "^([a-zA-Z][%a%d%+%-%,_]+)(.*)") return res, newstr or str end @@ -90,8 +90,12 @@ local function do_subscripts(word) if not bases[base] then -- Register that we've added this base bases[base] = true - -- Add a pattern for this base - submatches[#submatches+1] = "^(" .. base .. ")([%a%d,]+)$" + -- Add a patterns for this base. First, the base with a single + -- letter or number subscript. + submatches[#submatches+1] = "^(" .. base .. ")([%a%d])$" + -- Seconde, the base with a longer prefix that includes at least + -- one of +-, (to catch things like ri+1, but not return). + submatches[#submatches+1] = "^(" .. base .. ")([%a%d]*[%-%+%,]+[%a%d%-%+%,]*)$" end end return word @@ -100,7 +104,7 @@ end function vis.begin_of_display() -- Initially allow subscripts using _ or just appending a number (later, -- we will add extra patterns here. - submatches = {"^(.*)_([%a%d,]+)$", "^(.*[^%d])(%d+)$"} + submatches = {"^(%a*)_([%a%d,]+)$", "^(%a+)(%d+)$"} -- This stores all the bases we've encountered so far (to prevent -- duplicates). For each of them there will be a pattern in submatches -- above.