From: Matthijs Kooijman Date: Tue, 4 Nov 2008 10:53:45 +0000 (+0100) Subject: Add rewrap function for wrapping comments. X-Git-Url: https://git.stderr.nl/gitweb?a=commitdiff_plain;h=1bc04fef27c360b73d1cbe3a83fa45ff438d1f93;p=matthijs%2Fprojects%2Fxerxes.git Add rewrap function for wrapping comments. --- diff --git a/tools/text.py b/tools/text.py new file mode 100644 index 0000000..2bade4a --- /dev/null +++ b/tools/text.py @@ -0,0 +1,94 @@ +from django.utils.encoding import force_unicode +from django.utils.functional import allow_lazy + +import re + +def splitline(line, prefix_regex): + """ + Splits the line into a prefix and the rest of the line, using the given + regex. The regex should contain exactly two groups. If no match was + found, the prefix is assumed empty. + """ + match = re.findall(re.compile(prefix_regex), line) + if match and len(match) == 1 and len(match[0]) == 2: + return match[0] + else: + return ('', line) + +def rewrap(text, width, prefix_regex): + """ + Rewraps the given text into the given width, properly preserving the + prefix identified by prefix_regex. This is similar to vim's gqgq + command. + + This command tries to preserve any empty lines in input (also lines + only containing a prefix). Additionally, unprefixed lines that are + longer than width are taken to be paragraphs, and are separated by + an empty line after wrapping. + + The aim of this command is to get proper formatting when wrapping a + text and then adding a prefix to every line, such as is customary + for email quoting. + + prefix_regex should be a regex that defines exactly two groups, the + first of which matches the prefix and the second matches the rest of + the line. + """ + text = force_unicode(text) + def _generator(): + length = 0 + prefix = '' + # We want to know if we're in the previous line wrapped + wrapped = False + for line in text.split('\n'): + # Save the previous prefix and find the next one + oldprefix = prefix + (prefix, line) = splitline(line, prefix_regex) + + # Unprefixed lines in the input are paragraphs and should be + # separated by an empty line to make sure they don't get + # merged after being prefixed later on. This only happens + # when we have two non-prefixed lines after each other, the + # second is not an empty line and the first line was long + # enough to wrap (to preserver already properly hardwrapped + # input). + if wrapped and oldprefix == '' and prefix == '' and line: + yield '\n\n' + length = 0 + + # Keep track if this line wrapped + wrapped = False + + # Preserver empty lines in the input + if not line: + yield '\n' + yield prefix + yield '\n' + length = 0 + # Fake the prefix to be empty, so any prefix in the next + # line will be yielded first + prefix = '' + continue + + # New line has a different prefix? Terminate line if needed, + # and yield the new prefix + if prefix != oldprefix: + if length > 0: + yield '\n' + length = 0 + yield prefix + + for word in line.split(' '): + if length != 0 and len(prefix) + length + len(word) > width: + # This word would go over width, terminate the line. + yield '\n' + yield prefix + length = 0 + wrapped = True + if length != 0: + yield ' ' + length += 1 + yield word + length += len(word) + return u''.join(_generator()) +rewrap = allow_lazy(rewrap, unicode)