From: Matthijs Kooijman <matthijs@stdin.nl>
Date: Thu, 27 Aug 2009 11:17:32 +0000 (+0200)
Subject: Improve subscript handling in pret-lam.
X-Git-Tag: final-thesis~282
X-Git-Url: https://git.stderr.nl/gitweb?a=commitdiff_plain;h=1864c65fe74e332c5aca3ccb9878b98aa1aa93e7;p=matthijs%2Fmaster-project%2Freport.git

Improve subscript handling in pret-lam.

The handling is now a bit more strict when automatically detecting
multiple character subscripts, to prevent things like "return" from being
turned into "r_eturn".
---

diff --git a/pret-lam.lua b/pret-lam.lua
index aff6988..ea07811 100644
--- a/pret-lam.lua
+++ b/pret-lam.lua
@@ -63,7 +63,7 @@ end
 local function take_word(str)
         -- A word must always start with a-z (in particular, Î» is not a valid
         -- start of a word).
-        res, newstr = utf.match(str, "^([a-zA-Z][%a%d_]+)(.*)")
+        res, newstr = utf.match(str, "^([a-zA-Z][%a%d%+%-%,_]+)(.*)")
         return res, newstr or str
 end
 
@@ -90,8 +90,12 @@ local function do_subscripts(word)
         if not bases[base] then
             -- Register that we've added this base
             bases[base] = true
-            -- Add a pattern for this base
-            submatches[#submatches+1] = "^(" .. base .. ")([%a%d,]+)$"
+            -- Add a patterns for this base. First, the base with a single
+            -- letter or number subscript.
+            submatches[#submatches+1] = "^(" .. base .. ")([%a%d])$"
+            -- Seconde, the base with a longer prefix that includes at least
+            -- one of +-, (to catch things like ri+1, but not return).
+            submatches[#submatches+1] = "^(" .. base .. ")([%a%d]*[%-%+%,]+[%a%d%-%+%,]*)$"
         end
     end
     return word
@@ -100,7 +104,7 @@ end
 function vis.begin_of_display()
     -- Initially allow subscripts using _ or just appending a number (later,
     -- we will add extra patterns here.
-    submatches = {"^(.*)_([%a%d,]+)$", "^(.*[^%d])(%d+)$"}
+    submatches = {"^(%a*)_([%a%d,]+)$", "^(%a+)(%d+)$"}
     -- This stores all the bases we've encountered so far (to prevent
     -- duplicates). For each of them there will be a pattern in submatches
     -- above.