lib/bup/shquote.py

   1
   2 from __future__ import absolute_import
   3 import re
   4
   5 from bup.compat import bytes_from_byte
   6
   7 q = b"'"
   8 qq = b'"'
   9
  10
  11 class QuoteError(Exception):
  12     pass
  13
  14
  15 def _quotesplit(line):
  16     inquote = None
  17     inescape = None
  18     wordstart = 0
  19     word = b''
  20     for i in range(len(line)):
  21         c = bytes_from_byte(line[i])
  22         if inescape:
  23             if inquote == q and c != q:
  24                 word += b'\\'  # single-q backslashes can only quote single-q
  25             word += c
  26             inescape = False
  27         elif c == b'\\':
  28             inescape = True
  29         elif c == inquote:
  30             inquote = None
  31             # this is un-sh-like, but do it for sanity when autocompleting
  32             yield (wordstart, word)
  33             word = b''
  34             wordstart = i+1
  35         elif not inquote and not word and c in (q, qq):
  36             # the 'not word' constraint on this is un-sh-like, but do it
  37             # for sanity when autocompleting
  38             inquote = c
  39             wordstart = i
  40         elif not inquote and c in [b' ', b'\n', b'\r', b'\t']:
  41             if word:
  42                 yield (wordstart, word)
  43             word = b''
  44             wordstart = i+1
  45         else:
  46             word += c
  47     if word:
  48         yield (wordstart, word)
  49     if inquote or inescape or word:
  50         raise QuoteError()
  51
  52
  53 def quotesplit(line):
  54     """Split 'line' into a list of offset,word tuples.
  55
  56     The words are produced after removing doublequotes, singlequotes, and
  57     backslash escapes.
  58
  59     Note that this implementation isn't entirely sh-compatible.  It only
  60     dequotes words that *start* with a quote character, that is, bytes like
  61        hello"world"
  62     will not have its quotes removed, while bytes like
  63        hello "world"
  64     will be turned into [(0, 'hello'), (6, 'world')] (ie. quotes removed).
  65     """
  66     l = []
  67     try:
  68         for i in _quotesplit(line):
  69             l.append(i)
  70     except QuoteError:
  71         pass
  72     return l
  73
  74
  75 def unfinished_word(line):
  76     """Returns the quotechar,word of any unfinished word at the end of 'line'.
  77
  78     You can use this to determine if 'line' is a completely parseable line
  79     (ie. one that quotesplit() will finish successfully) or if you need
  80     to read more bytes first.
  81
  82     Args:
  83       line: bytes
  84     Returns:
  85       quotechar,word: the initial quote char (or None), and the partial word.
  86     """
  87     try:
  88         for (wordstart,word) in _quotesplit(line):
  89             pass
  90     except QuoteError:
  91         firstchar = bytes_from_byte(line[wordstart])
  92         if firstchar in [q, qq]:
  93             return (firstchar, word)
  94         else:
  95             return (None, word)
  96     else:
  97         return (None, b'')
  98
  99 def quotify(qtype, word, terminate):
 100     """Return a bytes corresponding to given word, quoted using qtype.
 101
 102     The resulting bytes are dequotable using quotesplit() and can be
 103     joined with other quoted bytes by adding arbitrary whitespace
 104     separators.
 105
 106     Args:
 107       qtype: one of '', shquote.qq, or shquote.q
 108       word: the bytes to quote.  May contain arbitrary characters.
 109       terminate: include the trailing quote character, if any.
 110     Returns:
 111       The quoted bytes.
 112     """
 113     if qtype == qq:
 114         return qq + word.replace(qq, b'\\"') + (terminate and qq or b'')
 115     elif qtype == q:
 116         return q + word.replace(q, b"\\'") + (terminate and q or b'')
 117     else:
 118         return re.sub(br'([\"\' \t\n\r])', br'\\\1', word)
 119
 120
 121 def quotify_list(words):
 122   """Return minimally-quoted bytes produced by quoting each word.
 123
 124   This calculates the qtype for each word depending on whether the word
 125   already includes singlequote characters, doublequote characters, both,
 126   or neither.
 127
 128   Args:
 129     words: the list of words to quote.
 130   Returns:
 131     The resulting bytes, with quoted words separated by ' '.
 132   """
 133   wordout = []
 134   for word in words:
 135     qtype = q
 136     if word and not re.search(br'[\s\"\']', word):
 137       qtype = b''
 138     elif q in word and qq not in word:
 139       qtype = qq
 140     wordout.append(quotify(qtype, word, True))
 141   return b' '.join(wordout)
 142
 143
 144 def what_to_add(qtype, origword, newword, terminate):
 145     """Return a qtype that is needed to finish a partial word.
 146
 147     For example, given an origword of '\"frog' and a newword of '\"frogston',
 148     returns either:
 149        terminate=False: 'ston'
 150        terminate=True:  'ston\"'
 151
 152     This is useful when calculating tab completions for readline.
 153
 154     Args:
 155       qtype: the type of quoting to use (ie. the first character of origword)
 156       origword: the original word that needs completion.
 157       newword: the word we want it to be after completion.  Must start with
 158         origword.
 159       terminate: true if we should add the actual quote character at the end.
 160     Returns:
 161       The bytes to append to origword to produce (quoted) newword.
 162     """
 163     if not newword.startswith(origword):
 164         return b''
 165     else:
 166         qold = quotify(qtype, origword, terminate=False)
 167         return quotify(qtype, newword, terminate=terminate)[len(qold):]