local function take_word(str)
-- A word must always start with a-z (in particular, λ is not a valid
-- start of a word).
- res, newstr = utf.match(str, "^([a-zA-Z][%a%d_]+)(.*)")
+ res, newstr = utf.match(str, "^([a-zA-Z][%a%d%+%-%,_]+)(.*)")
return res, newstr or str
end
if not bases[base] then
-- Register that we've added this base
bases[base] = true
- -- Add a pattern for this base
- submatches[#submatches+1] = "^(" .. base .. ")([%a%d,]+)$"
+ -- Add a patterns for this base. First, the base with a single
+ -- letter or number subscript.
+ submatches[#submatches+1] = "^(" .. base .. ")([%a%d])$"
+ -- Seconde, the base with a longer prefix that includes at least
+ -- one of +-, (to catch things like ri+1, but not return).
+ submatches[#submatches+1] = "^(" .. base .. ")([%a%d]*[%-%+%,]+[%a%d%-%+%,]*)$"
end
end
return word
function vis.begin_of_display()
-- Initially allow subscripts using _ or just appending a number (later,
-- we will add extra patterns here.
- submatches = {"^(.*)_([%a%d,]+)$", "^(.*[^%d])(%d+)$"}
+ submatches = {"^(%a*)_([%a%d,]+)$", "^(%a+)(%d+)$"}
-- This stores all the bases we've encountered so far (to prevent
-- duplicates). For each of them there will be a pattern in submatches
-- above.