tools/text.py

   1 from django.utils.encoding import force_unicode
   2 from django.utils.functional import allow_lazy
   3
   4 import re
   5
   6 def splitline(line, prefix_regex):
   7     """
   8     Splits the line into a prefix and the rest of the line, using the given
   9     regex. The regex should contain exactly two groups. If no match was
  10     found, the prefix is assumed empty.
  11     """
  12     match = re.findall(re.compile(prefix_regex), line)
  13     if match and len(match) == 1 and len(match[0]) == 2:
  14         return match[0]
  15     else:
  16         return ('', line)
  17
  18 def rewrap(text, width, prefix_regex):
  19     """
  20     Rewraps the given text into the given width, properly preserving the
  21     prefix identified by prefix_regex. This is similar to vim's gqgq
  22     command.
  23
  24     This command tries to preserve any empty lines in input (also lines
  25     only containing a prefix). Additionally, unprefixed lines that are
  26     longer than width are taken to be paragraphs, and are separated by
  27     an empty line after wrapping.
  28
  29     The aim of this command is to get proper formatting when wrapping a
  30     text and then adding a prefix to every line, such as is customary
  31     for email quoting.
  32
  33     prefix_regex should be a regex that defines exactly two groups, the
  34     first of which matches the prefix and the second matches the rest of
  35     the line.
  36     """
  37     text = force_unicode(text)
  38     def _generator():
  39         length = 0
  40         prefix = ''
  41         # We want to know if we're in the previous line wrapped
  42         wrapped = False
  43         for line in text.split('\n'):
  44             # Save the previous prefix and find the next one
  45             oldprefix = prefix
  46             (prefix, line) = splitline(line, prefix_regex)
  47
  48             # Unprefixed lines in the input are paragraphs and should be
  49             # separated by an empty line to make sure they don't get
  50             # merged after being prefixed later on.  This only happens
  51             # when we have two non-prefixed lines after each other, the
  52             # second is not an empty line and the first line was long
  53             # enough to wrap (to preserver already properly hardwrapped
  54             # input).
  55             if wrapped and oldprefix == '' and prefix == '' and line:
  56                 yield '\n\n'
  57                 length = 0
  58
  59             # Keep track if this line wrapped
  60             wrapped = False
  61
  62             # Preserver empty lines in the input
  63             if not line:
  64                 yield '\n'
  65                 yield prefix
  66                 yield '\n'
  67                 length = 0
  68                 # Fake the prefix to be empty, so any prefix in the next
  69                 # line will be yielded first
  70                 prefix = ''
  71                 continue
  72
  73             # New line has a different prefix? Terminate line if needed,
  74             # and yield the new prefix
  75             if prefix != oldprefix:
  76                 if length > 0:
  77                     yield '\n'
  78                 length = 0
  79                 yield prefix
  80
  81             for word in line.split(' '):
  82                 if length != 0 and len(prefix) + length + len(word) > width:
  83                     # This word would go over width, terminate the line.
  84                     yield '\n'
  85                     yield prefix
  86                     length = 0
  87                     wrapped = True
  88                 if length != 0:
  89                     yield ' '
  90                     length += 1
  91                 yield word
  92                 length += len(word)
  93     return u''.join(_generator())
  94 rewrap = allow_lazy(rewrap, unicode)