lib/bup/helpers.py

   1 """Helper functions and classes for bup."""
   2
   3 from __future__ import absolute_import, division
   4 from collections import namedtuple
   5 from contextlib import contextmanager
   6 from ctypes import sizeof, c_void_p
   7 from math import floor
   8 from os import environ
   9 from subprocess import PIPE, Popen
  10 import sys, os, subprocess, errno, select, mmap, stat, re, struct
  11 import hashlib, heapq, math, operator, time, tempfile
  12
  13 from bup import _helpers
  14 from bup import compat
  15 from bup.compat import argv_bytes, byte_int, pending_raise
  16 from bup.io import byte_stream, path_msg
  17 # This function should really be in helpers, not in bup.options.  But we
  18 # want options.py to be standalone so people can include it in other projects.
  19 from bup.options import _tty_width as tty_width
  20
  21
  22 buglvl = int(os.environ.get('BUP_DEBUG', 0))
  23
  24
  25 class Nonlocal:
  26     """Helper to deal with Python scoping issues"""
  27     pass
  28
  29
  30 @contextmanager
  31 def finalized(enter_result=None, finalize=None):
  32     assert finalize
  33     try:
  34         yield enter_result
  35     except BaseException as ex:
  36         with pending_raise(ex):
  37             finalize(enter_result)
  38     finalize(enter_result)
  39
  40
  41 sc_page_size = os.sysconf('SC_PAGE_SIZE')
  42 assert(sc_page_size > 0)
  43
  44 sc_arg_max = os.sysconf('SC_ARG_MAX')
  45 if sc_arg_max == -1:  # "no definite limit" - let's choose 2M
  46     sc_arg_max = 2 * 1024 * 1024
  47
  48 def last(iterable):
  49     result = None
  50     for result in iterable:
  51         pass
  52     return result
  53
  54 try:
  55     _fdatasync = os.fdatasync
  56 except AttributeError:
  57     _fdatasync = os.fsync
  58
  59 if sys.platform.startswith('darwin'):
  60     # Apparently os.fsync on OS X doesn't guarantee to sync all the way down
  61     import fcntl
  62     def fdatasync(fd):
  63         try:
  64             return fcntl.fcntl(fd, fcntl.F_FULLFSYNC)
  65         except IOError as e:
  66             # Fallback for file systems (SMB) that do not support F_FULLFSYNC
  67             if e.errno == errno.ENOTSUP:
  68                 return _fdatasync(fd)
  69             else:
  70                 raise
  71 else:
  72     fdatasync = _fdatasync
  73
  74
  75 def partition(predicate, stream):
  76     """Returns (leading_matches_it, rest_it), where leading_matches_it
  77     must be completely exhausted before traversing rest_it.
  78
  79     """
  80     stream = iter(stream)
  81     ns = Nonlocal()
  82     ns.first_nonmatch = None
  83     def leading_matches():
  84         for x in stream:
  85             if predicate(x):
  86                 yield x
  87             else:
  88                 ns.first_nonmatch = (x,)
  89                 break
  90     def rest():
  91         if ns.first_nonmatch:
  92             yield ns.first_nonmatch[0]
  93             for x in stream:
  94                 yield x
  95     return (leading_matches(), rest())
  96
  97
  98 def merge_dict(*xs):
  99     result = {}
 100     for x in xs:
 101         result.update(x)
 102     return result
 103
 104
 105 def lines_until_sentinel(f, sentinel, ex_type):
 106     # sentinel must end with \n and must contain only one \n
 107     while True:
 108         line = f.readline()
 109         if not (line and line.endswith(b'\n')):
 110             raise ex_type('Hit EOF while reading line')
 111         if line == sentinel:
 112             return
 113         yield line
 114
 115
 116 def stat_if_exists(path):
 117     try:
 118         return os.stat(path)
 119     except OSError as e:
 120         if e.errno != errno.ENOENT:
 121             raise
 122     return None
 123
 124
 125 # Write (blockingly) to sockets that may or may not be in blocking mode.
 126 # We need this because our stderr is sometimes eaten by subprocesses
 127 # (probably ssh) that sometimes make it nonblocking, if only temporarily,
 128 # leading to race conditions.  Ick.  We'll do it the hard way.
 129 def _hard_write(fd, buf):
 130     while buf:
 131         (r,w,x) = select.select([], [fd], [], None)
 132         if not w:
 133             raise IOError('select(fd) returned without being writable')
 134         try:
 135             sz = os.write(fd, buf)
 136         except OSError as e:
 137             if e.errno != errno.EAGAIN:
 138                 raise
 139         assert(sz >= 0)
 140         buf = buf[sz:]
 141
 142
 143 _last_prog = 0
 144 def log(s):
 145     """Print a log message to stderr."""
 146     global _last_prog
 147     sys.stdout.flush()
 148     _hard_write(sys.stderr.fileno(), s if isinstance(s, bytes) else s.encode())
 149     _last_prog = 0
 150
 151
 152 def debug1(s):
 153     if buglvl >= 1:
 154         log(s)
 155
 156
 157 def debug2(s):
 158     if buglvl >= 2:
 159         log(s)
 160
 161
 162 istty1 = os.isatty(1) or (int(os.environ.get('BUP_FORCE_TTY', 0)) & 1)
 163 istty2 = os.isatty(2) or (int(os.environ.get('BUP_FORCE_TTY', 0)) & 2)
 164 _last_progress = ''
 165 def progress(s):
 166     """Calls log() if stderr is a TTY.  Does nothing otherwise."""
 167     global _last_progress
 168     if istty2:
 169         log(s)
 170         _last_progress = s
 171
 172
 173 def qprogress(s):
 174     """Calls progress() only if we haven't printed progress in a while.
 175
 176     This avoids overloading the stderr buffer with excess junk.
 177     """
 178     global _last_prog
 179     now = time.time()
 180     if now - _last_prog > 0.1:
 181         progress(s)
 182         _last_prog = now
 183
 184
 185 def reprogress():
 186     """Calls progress() to redisplay the most recent progress message.
 187
 188     Useful after you've printed some other message that wipes out the
 189     progress line.
 190     """
 191     if _last_progress and _last_progress.endswith('\r'):
 192         progress(_last_progress)
 193
 194
 195 def mkdirp(d, mode=None):
 196     """Recursively create directories on path 'd'.
 197
 198     Unlike os.makedirs(), it doesn't raise an exception if the last element of
 199     the path already exists.
 200     """
 201     try:
 202         if mode:
 203             os.makedirs(d, mode)
 204         else:
 205             os.makedirs(d)
 206     except OSError as e:
 207         if e.errno == errno.EEXIST:
 208             pass
 209         else:
 210             raise
 211
 212
 213 class MergeIterItem:
 214     def __init__(self, entry, read_it):
 215         self.entry = entry
 216         self.read_it = read_it
 217     def __lt__(self, x):
 218         return self.entry < x.entry
 219
 220 def merge_iter(iters, pfreq, pfunc, pfinal, key=None):
 221     if key:
 222         samekey = lambda e, pe: getattr(e, key) == getattr(pe, key, None)
 223     else:
 224         samekey = operator.eq
 225     count = 0
 226     total = sum(len(it) for it in iters)
 227     iters = (iter(it) for it in iters)
 228     heap = ((next(it, None),it) for it in iters)
 229     heap = [MergeIterItem(e, it) for e, it in heap if e]
 230
 231     heapq.heapify(heap)
 232     pe = None
 233     while heap:
 234         if not count % pfreq:
 235             pfunc(count, total)
 236         e, it = heap[0].entry, heap[0].read_it
 237         if not samekey(e, pe):
 238             pe = e
 239             yield e
 240         count += 1
 241         try:
 242             e = next(it)
 243         except StopIteration:
 244             heapq.heappop(heap) # remove current
 245         else:
 246             # shift current to new location
 247             heapq.heapreplace(heap, MergeIterItem(e, it))
 248     pfinal(count, total)
 249
 250
 251 def unlink(f):
 252     """Delete a file at path 'f' if it currently exists.
 253
 254     Unlike os.unlink(), does not throw an exception if the file didn't already
 255     exist.
 256     """
 257     try:
 258         os.unlink(f)
 259     except OSError as e:
 260         if e.errno != errno.ENOENT:
 261             raise
 262
 263
 264 _bq_simple_id_rx = re.compile(br'^[-_./a-zA-Z0-9]+$')
 265 _sq_simple_id_rx = re.compile(r'^[-_./a-zA-Z0-9]+$')
 266
 267 def bquote(x):
 268     if x == b'':
 269         return b"''"
 270     if _bq_simple_id_rx.match(x):
 271         return x
 272     return b"'%s'" % x.replace(b"'", b"'\"'\"'")
 273
 274 def squote(x):
 275     if x == '':
 276         return "''"
 277     if _sq_simple_id_rx.match(x):
 278         return x
 279     return "'%s'" % x.replace("'", "'\"'\"'")
 280
 281 def quote(x):
 282     if isinstance(x, bytes):
 283         return bquote(x)
 284     if isinstance(x, compat.str_type):
 285         return squote(x)
 286     assert False
 287     # some versions of pylint get confused
 288     return None
 289
 290 def shstr(cmd):
 291     """Return a shell quoted string for cmd if it's a sequence, else cmd.
 292
 293     cmd must be a string, bytes, or a sequence of one or the other,
 294     and the assumption is that if cmd is a string or bytes, then it's
 295     already quoted (because it's what's actually being passed to
 296     call() and friends.  e.g. log(shstr(cmd)); call(cmd)
 297
 298     """
 299     if isinstance(cmd, (bytes, compat.str_type)):
 300         return cmd
 301     elif all(isinstance(x, bytes) for x in cmd):
 302         return b' '.join(map(bquote, cmd))
 303     elif all(isinstance(x, compat.str_type) for x in cmd):
 304         return ' '.join(map(squote, cmd))
 305     raise TypeError('unsupported shstr argument: ' + repr(cmd))
 306
 307
 308 exc = subprocess.check_call
 309
 310 def exo(cmd,
 311         input=None,
 312         stdin=None,
 313         stderr=None,
 314         shell=False,
 315         check=True,
 316         preexec_fn=None,
 317         close_fds=True):
 318     if input:
 319         assert stdin in (None, PIPE)
 320         stdin = PIPE
 321     p = Popen(cmd,
 322               stdin=stdin, stdout=PIPE, stderr=stderr,
 323               shell=shell,
 324               preexec_fn=preexec_fn,
 325               close_fds=close_fds)
 326     out, err = p.communicate(input)
 327     if check and p.returncode != 0:
 328         raise Exception('subprocess %r failed with status %d%s'
 329                         % (b' '.join(map(quote, cmd)), p.returncode,
 330                            ', stderr: %r' % err if err else ''))
 331     return out, err, p
 332
 333 def readpipe(argv, preexec_fn=None, shell=False):
 334     """Run a subprocess and return its output."""
 335     return exo(argv, preexec_fn=preexec_fn, shell=shell)[0]
 336
 337
 338 def _argmax_base(command):
 339     base_size = 2048
 340     for c in command:
 341         base_size += len(command) + 1
 342     for k, v in compat.items(environ):
 343         base_size += len(k) + len(v) + 2 + sizeof(c_void_p)
 344     return base_size
 345
 346
 347 def _argmax_args_size(args):
 348     return sum(len(x) + 1 + sizeof(c_void_p) for x in args)
 349
 350
 351 def batchpipe(command, args, preexec_fn=None, arg_max=sc_arg_max):
 352     """If args is not empty, yield the output produced by calling the
 353 command list with args as a sequence of strings (It may be necessary
 354 to return multiple strings in order to respect ARG_MAX)."""
 355     # The optional arg_max arg is a workaround for an issue with the
 356     # current wvtest behavior.
 357     base_size = _argmax_base(command)
 358     while args:
 359         room = arg_max - base_size
 360         i = 0
 361         while i < len(args):
 362             next_size = _argmax_args_size(args[i:i+1])
 363             if room - next_size < 0:
 364                 break
 365             room -= next_size
 366             i += 1
 367         sub_args = args[:i]
 368         args = args[i:]
 369         assert(len(sub_args))
 370         yield readpipe(command + sub_args, preexec_fn=preexec_fn)
 371
 372
 373 def resolve_parent(p):
 374     """Return the absolute path of a file without following any final symlink.
 375
 376     Behaves like os.path.realpath, but doesn't follow a symlink for the last
 377     element. (ie. if 'p' itself is a symlink, this one won't follow it, but it
 378     will follow symlinks in p's directory)
 379     """
 380     try:
 381         st = os.lstat(p)
 382     except OSError:
 383         st = None
 384     if st and stat.S_ISLNK(st.st_mode):
 385         (dir, name) = os.path.split(p)
 386         dir = os.path.realpath(dir)
 387         out = os.path.join(dir, name)
 388     else:
 389         out = os.path.realpath(p)
 390     #log('realpathing:%r,%r\n' % (p, out))
 391     return out
 392
 393
 394 def detect_fakeroot():
 395     "Return True if we appear to be running under fakeroot."
 396     return os.getenv("FAKEROOTKEY") != None
 397
 398
 399 if sys.platform.startswith('cygwin'):
 400     def is_superuser():
 401         # https://cygwin.com/ml/cygwin/2015-02/msg00057.html
 402         groups = os.getgroups()
 403         return 544 in groups or 0 in groups
 404 else:
 405     def is_superuser():
 406         return os.geteuid() == 0
 407
 408
 409 def cache_key_value(get_value, key, cache):
 410     """Return (value, was_cached).  If there is a value in the cache
 411     for key, use that, otherwise, call get_value(key) which should
 412     throw a KeyError if there is no value -- in which case the cached
 413     and returned value will be None.
 414     """
 415     try: # Do we already have it (or know there wasn't one)?
 416         value = cache[key]
 417         return value, True
 418     except KeyError:
 419         pass
 420     value = None
 421     try:
 422         cache[key] = value = get_value(key)
 423     except KeyError:
 424         cache[key] = None
 425     return value, False
 426
 427
 428 _hostname = None
 429 def hostname():
 430     """Get the FQDN of this machine."""
 431     global _hostname
 432     if not _hostname:
 433         _hostname = _helpers.gethostname()
 434     return _hostname
 435
 436
 437 def format_filesize(size):
 438     unit = 1024.0
 439     size = float(size)
 440     if size < unit:
 441         return "%d" % (size)
 442     exponent = int(math.log(size) // math.log(unit))
 443     size_prefix = "KMGTPE"[exponent - 1]
 444     return "%.1f%s" % (size / math.pow(unit, exponent), size_prefix)
 445
 446
 447 class NotOk(Exception):
 448     pass
 449
 450
 451 class BaseConn:
 452     def __init__(self, outp):
 453         self.outp = outp
 454
 455     def close(self):
 456         while self._read(65536): pass
 457
 458     def _read(self, size):
 459         raise NotImplementedError("Subclasses must implement _read")
 460
 461     def read(self, size):
 462         """Read 'size' bytes from input stream."""
 463         self.outp.flush()
 464         return self._read(size)
 465
 466     def _readline(self, size):
 467         raise NotImplementedError("Subclasses must implement _readline")
 468
 469     def readline(self):
 470         """Read from input stream until a newline is found."""
 471         self.outp.flush()
 472         return self._readline()
 473
 474     def write(self, data):
 475         """Write 'data' to output stream."""
 476         #log('%d writing: %d bytes\n' % (os.getpid(), len(data)))
 477         self.outp.write(data)
 478
 479     def has_input(self):
 480         """Return true if input stream is readable."""
 481         raise NotImplementedError("Subclasses must implement has_input")
 482
 483     def ok(self):
 484         """Indicate end of output from last sent command."""
 485         self.write(b'\nok\n')
 486
 487     def error(self, s):
 488         """Indicate server error to the client."""
 489         s = re.sub(br'\s+', b' ', s)
 490         self.write(b'\nerror %s\n' % s)
 491
 492     def _check_ok(self, onempty):
 493         self.outp.flush()
 494         rl = b''
 495         for rl in linereader(self):
 496             #log('%d got line: %r\n' % (os.getpid(), rl))
 497             if not rl:  # empty line
 498                 continue
 499             elif rl == b'ok':
 500                 return None
 501             elif rl.startswith(b'error '):
 502                 #log('client: error: %s\n' % rl[6:])
 503                 return NotOk(rl[6:])
 504             else:
 505                 onempty(rl)
 506         raise Exception('server exited unexpectedly; see errors above')
 507
 508     def drain_and_check_ok(self):
 509         """Remove all data for the current command from input stream."""
 510         def onempty(rl):
 511             pass
 512         return self._check_ok(onempty)
 513
 514     def check_ok(self):
 515         """Verify that server action completed successfully."""
 516         def onempty(rl):
 517             raise Exception('expected "ok", got %r' % rl)
 518         return self._check_ok(onempty)
 519
 520
 521 class Conn(BaseConn):
 522     def __init__(self, inp, outp):
 523         BaseConn.__init__(self, outp)
 524         self.inp = inp
 525
 526     def _read(self, size):
 527         return self.inp.read(size)
 528
 529     def _readline(self):
 530         return self.inp.readline()
 531
 532     def has_input(self):
 533         [rl, wl, xl] = select.select([self.inp.fileno()], [], [], 0)
 534         if rl:
 535             assert(rl[0] == self.inp.fileno())
 536             return True
 537         else:
 538             return None
 539
 540
 541 def checked_reader(fd, n):
 542     while n > 0:
 543         rl, _, _ = select.select([fd], [], [])
 544         assert(rl[0] == fd)
 545         buf = os.read(fd, n)
 546         if not buf: raise Exception("Unexpected EOF reading %d more bytes" % n)
 547         yield buf
 548         n -= len(buf)
 549
 550
 551 MAX_PACKET = 128 * 1024
 552 def mux(p, outfd, outr, errr):
 553     try:
 554         fds = [outr, errr]
 555         while p.poll() is None:
 556             rl, _, _ = select.select(fds, [], [])
 557             for fd in rl:
 558                 if fd == outr:
 559                     buf = os.read(outr, MAX_PACKET)
 560                     if not buf: break
 561                     os.write(outfd, struct.pack('!IB', len(buf), 1) + buf)
 562                 elif fd == errr:
 563                     buf = os.read(errr, 1024)
 564                     if not buf: break
 565                     os.write(outfd, struct.pack('!IB', len(buf), 2) + buf)
 566     finally:
 567         os.write(outfd, struct.pack('!IB', 0, 3))
 568
 569
 570 class DemuxConn(BaseConn):
 571     """A helper class for bup's client-server protocol."""
 572     def __init__(self, infd, outp):
 573         BaseConn.__init__(self, outp)
 574         # Anything that comes through before the sync string was not
 575         # multiplexed and can be assumed to be debug/log before mux init.
 576         tail = b''
 577         stderr = byte_stream(sys.stderr)
 578         while tail != b'BUPMUX':
 579             # Make sure to write all pre-BUPMUX output to stderr
 580             b = os.read(infd, (len(tail) < 6) and (6-len(tail)) or 1)
 581             if not b:
 582                 ex = IOError('demux: unexpected EOF during initialization')
 583                 with pending_raise(ex):
 584                     stderr.write(tail)
 585                     stderr.flush()
 586             tail += b
 587             stderr.write(tail[:-6])
 588             tail = tail[-6:]
 589         stderr.flush()
 590         self.infd = infd
 591         self.reader = None
 592         self.buf = None
 593         self.closed = False
 594
 595     def write(self, data):
 596         self._load_buf(0)
 597         BaseConn.write(self, data)
 598
 599     def _next_packet(self, timeout):
 600         if self.closed: return False
 601         rl, wl, xl = select.select([self.infd], [], [], timeout)
 602         if not rl: return False
 603         assert(rl[0] == self.infd)
 604         ns = b''.join(checked_reader(self.infd, 5))
 605         n, fdw = struct.unpack('!IB', ns)
 606         if n > MAX_PACKET:
 607             # assume that something went wrong and print stuff
 608             ns += os.read(self.infd, 1024)
 609             stderr = byte_stream(sys.stderr)
 610             stderr.write(ns)
 611             stderr.flush()
 612             raise Exception("Connection broken")
 613         if fdw == 1:
 614             self.reader = checked_reader(self.infd, n)
 615         elif fdw == 2:
 616             for buf in checked_reader(self.infd, n):
 617                 byte_stream(sys.stderr).write(buf)
 618         elif fdw == 3:
 619             self.closed = True
 620             debug2("DemuxConn: marked closed\n")
 621         return True
 622
 623     def _load_buf(self, timeout):
 624         if self.buf is not None:
 625             return True
 626         while not self.closed:
 627             while not self.reader:
 628                 if not self._next_packet(timeout):
 629                     return False
 630             try:
 631                 self.buf = next(self.reader)
 632                 return True
 633             except StopIteration:
 634                 self.reader = None
 635         return False
 636
 637     def _read_parts(self, ix_fn):
 638         while self._load_buf(None):
 639             assert(self.buf is not None)
 640             i = ix_fn(self.buf)
 641             if i is None or i == len(self.buf):
 642                 yv = self.buf
 643                 self.buf = None
 644             else:
 645                 yv = self.buf[:i]
 646                 self.buf = self.buf[i:]
 647             yield yv
 648             if i is not None:
 649                 break
 650
 651     def _readline(self):
 652         def find_eol(buf):
 653             try:
 654                 return buf.index(b'\n')+1
 655             except ValueError:
 656                 return None
 657         return b''.join(self._read_parts(find_eol))
 658
 659     def _read(self, size):
 660         csize = [size]
 661         def until_size(buf): # Closes on csize
 662             if len(buf) < csize[0]:
 663                 csize[0] -= len(buf)
 664                 return None
 665             else:
 666                 return csize[0]
 667         return b''.join(self._read_parts(until_size))
 668
 669     def has_input(self):
 670         return self._load_buf(0)
 671
 672
 673 def linereader(f):
 674     """Generate a list of input lines from 'f' without terminating newlines."""
 675     while 1:
 676         line = f.readline()
 677         if not line:
 678             break
 679         yield line[:-1]
 680
 681
 682 def chunkyreader(f, count = None):
 683     """Generate a list of chunks of data read from 'f'.
 684
 685     If count is None, read until EOF is reached.
 686
 687     If count is a positive integer, read 'count' bytes from 'f'. If EOF is
 688     reached while reading, raise IOError.
 689     """
 690     if count != None:
 691         while count > 0:
 692             b = f.read(min(count, 65536))
 693             if not b:
 694                 raise IOError('EOF with %d bytes remaining' % count)
 695             yield b
 696             count -= len(b)
 697     else:
 698         while 1:
 699             b = f.read(65536)
 700             if not b: break
 701             yield b
 702
 703
 704 @contextmanager
 705 def atomically_replaced_file(name, mode='w', buffering=-1):
 706     """Yield a file that will be atomically renamed name when leaving the block.
 707
 708     This contextmanager yields an open file object that is backed by a
 709     temporary file which will be renamed (atomically) to the target
 710     name if everything succeeds.
 711
 712     The mode and buffering arguments are handled exactly as with open,
 713     and the yielded file will have very restrictive permissions, as
 714     per mkstemp.
 715
 716     E.g.::
 717
 718         with atomically_replaced_file('foo.txt', 'w') as f:
 719             f.write('hello jack.')
 720
 721     """
 722
 723     (ffd, tempname) = tempfile.mkstemp(dir=os.path.dirname(name),
 724                                        text=('b' not in mode))
 725     try:
 726         try:
 727             f = os.fdopen(ffd, mode, buffering)
 728         except:
 729             os.close(ffd)
 730             raise
 731         try:
 732             yield f
 733         finally:
 734             f.close()
 735         os.rename(tempname, name)
 736     finally:
 737         unlink(tempname)  # nonexistant file is ignored
 738
 739
 740 def slashappend(s):
 741     """Append "/" to 's' if it doesn't aleady end in "/"."""
 742     assert isinstance(s, bytes)
 743     if s and not s.endswith(b'/'):
 744         return s + b'/'
 745     else:
 746         return s
 747
 748
 749 def _mmap_do(f, sz, flags, prot, close):
 750     if not sz:
 751         st = os.fstat(f.fileno())
 752         sz = st.st_size
 753     if not sz:
 754         # trying to open a zero-length map gives an error, but an empty
 755         # string has all the same behaviour of a zero-length map, ie. it has
 756         # no elements :)
 757         return ''
 758     map = compat.mmap(f.fileno(), sz, flags, prot)
 759     if close:
 760         f.close()  # map will persist beyond file close
 761     return map
 762
 763
 764 def mmap_read(f, sz = 0, close=True):
 765     """Create a read-only memory mapped region on file 'f'.
 766     If sz is 0, the region will cover the entire file.
 767     """
 768     return _mmap_do(f, sz, mmap.MAP_PRIVATE, mmap.PROT_READ, close)
 769
 770
 771 def mmap_readwrite(f, sz = 0, close=True):
 772     """Create a read-write memory mapped region on file 'f'.
 773     If sz is 0, the region will cover the entire file.
 774     """
 775     return _mmap_do(f, sz, mmap.MAP_SHARED, mmap.PROT_READ|mmap.PROT_WRITE,
 776                     close)
 777
 778
 779 def mmap_readwrite_private(f, sz = 0, close=True):
 780     """Create a read-write memory mapped region on file 'f'.
 781     If sz is 0, the region will cover the entire file.
 782     The map is private, which means the changes are never flushed back to the
 783     file.
 784     """
 785     return _mmap_do(f, sz, mmap.MAP_PRIVATE, mmap.PROT_READ|mmap.PROT_WRITE,
 786                     close)
 787
 788
 789 _mincore = getattr(_helpers, 'mincore', None)
 790 if _mincore:
 791     # ./configure ensures that we're on Linux if MINCORE_INCORE isn't defined.
 792     MINCORE_INCORE = getattr(_helpers, 'MINCORE_INCORE', 1)
 793
 794     _fmincore_chunk_size = None
 795     def _set_fmincore_chunk_size():
 796         global _fmincore_chunk_size
 797         pref_chunk_size = 64 * 1024 * 1024
 798         chunk_size = sc_page_size
 799         if (sc_page_size < pref_chunk_size):
 800             chunk_size = sc_page_size * (pref_chunk_size // sc_page_size)
 801         _fmincore_chunk_size = chunk_size
 802
 803     def fmincore(fd):
 804         """Return the mincore() data for fd as a bytearray whose values can be
 805         tested via MINCORE_INCORE, or None if fd does not fully
 806         support the operation."""
 807         st = os.fstat(fd)
 808         if (st.st_size == 0):
 809             return bytearray(0)
 810         if not _fmincore_chunk_size:
 811             _set_fmincore_chunk_size()
 812         pages_per_chunk = _fmincore_chunk_size // sc_page_size;
 813         page_count = (st.st_size + sc_page_size - 1) // sc_page_size;
 814         chunk_count = (st.st_size + _fmincore_chunk_size - 1) // _fmincore_chunk_size
 815         result = bytearray(page_count)
 816         for ci in compat.range(chunk_count):
 817             pos = _fmincore_chunk_size * ci;
 818             msize = min(_fmincore_chunk_size, st.st_size - pos)
 819             try:
 820                 m = compat.mmap(fd, msize, mmap.MAP_PRIVATE, 0, 0, pos)
 821             except mmap.error as ex:
 822                 if ex.errno == errno.EINVAL or ex.errno == errno.ENODEV:
 823                     # Perhaps the file was a pipe, i.e. "... | bup split ..."
 824                     return None
 825                 raise ex
 826             try:
 827                 _mincore(m, msize, 0, result, ci * pages_per_chunk)
 828             except OSError as ex:
 829                 if ex.errno == errno.ENOSYS:
 830                     return None
 831                 raise
 832         return result
 833
 834
 835 def parse_timestamp(epoch_str):
 836     """Return the number of nanoseconds since the epoch that are described
 837 by epoch_str (100ms, 100ns, ...); when epoch_str cannot be parsed,
 838 throw a ValueError that may contain additional information."""
 839     ns_per = {'s' :  1000000000,
 840               'ms' : 1000000,
 841               'us' : 1000,
 842               'ns' : 1}
 843     match = re.match(r'^((?:[-+]?[0-9]+)?)(s|ms|us|ns)$', epoch_str)
 844     if not match:
 845         if re.match(r'^([-+]?[0-9]+)$', epoch_str):
 846             raise ValueError('must include units, i.e. 100ns, 100ms, ...')
 847         raise ValueError()
 848     (n, units) = match.group(1, 2)
 849     if not n:
 850         n = 1
 851     n = int(n)
 852     return n * ns_per[units]
 853
 854
 855 def parse_num(s):
 856     """Parse string or bytes as a possibly unit suffixed number.
 857
 858     For example:
 859         199.2k means 203981 bytes
 860         1GB means 1073741824 bytes
 861         2.1 tb means 2199023255552 bytes
 862     """
 863     if isinstance(s, bytes):
 864         # FIXME: should this raise a ValueError for UnicodeDecodeError
 865         # (perhaps with the latter as the context).
 866         s = s.decode('ascii')
 867     g = re.match(r'([-+\d.e]+)\s*(\w*)', str(s))
 868     if not g:
 869         raise ValueError("can't parse %r as a number" % s)
 870     (val, unit) = g.groups()
 871     num = float(val)
 872     unit = unit.lower()
 873     if unit in ['t', 'tb']:
 874         mult = 1024*1024*1024*1024
 875     elif unit in ['g', 'gb']:
 876         mult = 1024*1024*1024
 877     elif unit in ['m', 'mb']:
 878         mult = 1024*1024
 879     elif unit in ['k', 'kb']:
 880         mult = 1024
 881     elif unit in ['', 'b']:
 882         mult = 1
 883     else:
 884         raise ValueError("invalid unit %r in number %r" % (unit, s))
 885     return int(num*mult)
 886
 887
 888 saved_errors = []
 889 def add_error(e):
 890     """Append an error message to the list of saved errors.
 891
 892     Once processing is able to stop and output the errors, the saved errors are
 893     accessible in the module variable helpers.saved_errors.
 894     """
 895     saved_errors.append(e)
 896     log('%-70s\n' % e)
 897
 898
 899 def clear_errors():
 900     global saved_errors
 901     saved_errors = []
 902
 903
 904 def die_if_errors(msg=None, status=1):
 905     global saved_errors
 906     if saved_errors:
 907         if not msg:
 908             msg = 'warning: %d errors encountered\n' % len(saved_errors)
 909         log(msg)
 910         sys.exit(status)
 911
 912
 913 def handle_ctrl_c():
 914     """Replace the default exception handler for KeyboardInterrupt (Ctrl-C).
 915
 916     The new exception handler will make sure that bup will exit without an ugly
 917     stacktrace when Ctrl-C is hit.
 918     """
 919     oldhook = sys.excepthook
 920     def newhook(exctype, value, traceback):
 921         if exctype == KeyboardInterrupt:
 922             log('\nInterrupted.\n')
 923         else:
 924             oldhook(exctype, value, traceback)
 925     sys.excepthook = newhook
 926
 927
 928 def columnate(l, prefix):
 929     """Format elements of 'l' in columns with 'prefix' leading each line.
 930
 931     The number of columns is determined automatically based on the string
 932     lengths.
 933     """
 934     binary = isinstance(prefix, bytes)
 935     nothing = b'' if binary else ''
 936     nl = b'\n' if binary else '\n'
 937     if not l:
 938         return nothing
 939     l = l[:]
 940     clen = max(len(s) for s in l)
 941     ncols = (tty_width() - len(prefix)) // (clen + 2)
 942     if ncols <= 1:
 943         ncols = 1
 944         clen = 0
 945     cols = []
 946     while len(l) % ncols:
 947         l.append(nothing)
 948     rows = len(l) // ncols
 949     for s in compat.range(0, len(l), rows):
 950         cols.append(l[s:s+rows])
 951     out = nothing
 952     fmt = b'%-*s' if binary else '%-*s'
 953     for row in zip(*cols):
 954         out += prefix + nothing.join((fmt % (clen+2, s)) for s in row) + nl
 955     return out
 956
 957
 958 def parse_date_or_fatal(str, fatal):
 959     """Parses the given date or calls Option.fatal().
 960     For now we expect a string that contains a float."""
 961     try:
 962         date = float(str)
 963     except ValueError as e:
 964         raise fatal('invalid date format (should be a float): %r' % e)
 965     else:
 966         return date
 967
 968
 969 def parse_excludes(options, fatal):
 970     """Traverse the options and extract all excludes, or call Option.fatal()."""
 971     excluded_paths = []
 972
 973     for flag in options:
 974         (option, parameter) = flag
 975         if option == '--exclude':
 976             excluded_paths.append(resolve_parent(argv_bytes(parameter)))
 977         elif option == '--exclude-from':
 978             try:
 979                 f = open(resolve_parent(argv_bytes(parameter)), 'rb')
 980             except IOError as e:
 981                 raise fatal("couldn't read %r" % parameter)
 982             for exclude_path in f.readlines():
 983                 # FIXME: perhaps this should be rstrip('\n')
 984                 exclude_path = resolve_parent(exclude_path.strip())
 985                 if exclude_path:
 986                     excluded_paths.append(exclude_path)
 987     return sorted(frozenset(excluded_paths))
 988
 989
 990 def parse_rx_excludes(options, fatal):
 991     """Traverse the options and extract all rx excludes, or call
 992     Option.fatal()."""
 993     excluded_patterns = []
 994
 995     for flag in options:
 996         (option, parameter) = flag
 997         if option == '--exclude-rx':
 998             try:
 999                 excluded_patterns.append(re.compile(argv_bytes(parameter)))
1000             except re.error as ex:
1001                 fatal('invalid --exclude-rx pattern (%r): %s' % (parameter, ex))
1002         elif option == '--exclude-rx-from':
1003             try:
1004                 f = open(resolve_parent(parameter), 'rb')
1005             except IOError as e:
1006                 raise fatal("couldn't read %r" % parameter)
1007             for pattern in f.readlines():
1008                 spattern = pattern.rstrip(b'\n')
1009                 if not spattern:
1010                     continue
1011                 try:
1012                     excluded_patterns.append(re.compile(spattern))
1013                 except re.error as ex:
1014                     fatal('invalid --exclude-rx pattern (%r): %s' % (spattern, ex))
1015     return excluded_patterns
1016
1017
1018 def should_rx_exclude_path(path, exclude_rxs):
1019     """Return True if path matches a regular expression in exclude_rxs."""
1020     for rx in exclude_rxs:
1021         if rx.search(path):
1022             debug1('Skipping %r: excluded by rx pattern %r.\n'
1023                    % (path, rx.pattern))
1024             return True
1025     return False
1026
1027
1028 # FIXME: Carefully consider the use of functions (os.path.*, etc.)
1029 # that resolve against the current filesystem in the strip/graft
1030 # functions for example, but elsewhere as well.  I suspect bup's not
1031 # always being careful about that.  For some cases, the contents of
1032 # the current filesystem should be irrelevant, and consulting it might
1033 # produce the wrong result, perhaps via unintended symlink resolution,
1034 # for example.
1035
1036 def path_components(path):
1037     """Break path into a list of pairs of the form (name,
1038     full_path_to_name).  Path must start with '/'.
1039     Example:
1040       '/home/foo' -> [('', '/'), ('home', '/home'), ('foo', '/home/foo')]"""
1041     if not path.startswith(b'/'):
1042         raise Exception('path must start with "/": %s' % path_msg(path))
1043     # Since we assume path startswith('/'), we can skip the first element.
1044     result = [(b'', b'/')]
1045     norm_path = os.path.abspath(path)
1046     if norm_path == b'/':
1047         return result
1048     full_path = b''
1049     for p in norm_path.split(b'/')[1:]:
1050         full_path += b'/' + p
1051         result.append((p, full_path))
1052     return result
1053
1054
1055 def stripped_path_components(path, strip_prefixes):
1056     """Strip any prefix in strip_prefixes from path and return a list
1057     of path components where each component is (name,
1058     none_or_full_fs_path_to_name).  Assume path startswith('/').
1059     See thelpers.py for examples."""
1060     normalized_path = os.path.abspath(path)
1061     sorted_strip_prefixes = sorted(strip_prefixes, key=len, reverse=True)
1062     for bp in sorted_strip_prefixes:
1063         normalized_bp = os.path.abspath(bp)
1064         if normalized_bp == b'/':
1065             continue
1066         if normalized_path.startswith(normalized_bp):
1067             prefix = normalized_path[:len(normalized_bp)]
1068             result = []
1069             for p in normalized_path[len(normalized_bp):].split(b'/'):
1070                 if p: # not root
1071                     prefix += b'/'
1072                 prefix += p
1073                 result.append((p, prefix))
1074             return result
1075     # Nothing to strip.
1076     return path_components(path)
1077
1078
1079 def grafted_path_components(graft_points, path):
1080     # Create a result that consists of some number of faked graft
1081     # directories before the graft point, followed by all of the real
1082     # directories from path that are after the graft point.  Arrange
1083     # for the directory at the graft point in the result to correspond
1084     # to the "orig" directory in --graft orig=new.  See t/thelpers.py
1085     # for some examples.
1086
1087     # Note that given --graft orig=new, orig and new have *nothing* to
1088     # do with each other, even if some of their component names
1089     # match. i.e. --graft /foo/bar/baz=/foo/bar/bax is semantically
1090     # equivalent to --graft /foo/bar/baz=/x/y/z, or even
1091     # /foo/bar/baz=/x.
1092
1093     # FIXME: This can't be the best solution...
1094     clean_path = os.path.abspath(path)
1095     for graft_point in graft_points:
1096         old_prefix, new_prefix = graft_point
1097         # Expand prefixes iff not absolute paths.
1098         old_prefix = os.path.normpath(old_prefix)
1099         new_prefix = os.path.normpath(new_prefix)
1100         if clean_path.startswith(old_prefix):
1101             escaped_prefix = re.escape(old_prefix)
1102             grafted_path = re.sub(br'^' + escaped_prefix, new_prefix, clean_path)
1103             # Handle /foo=/ (at least) -- which produces //whatever.
1104             grafted_path = b'/' + grafted_path.lstrip(b'/')
1105             clean_path_components = path_components(clean_path)
1106             # Count the components that were stripped.
1107             strip_count = 0 if old_prefix == b'/' else old_prefix.count(b'/')
1108             new_prefix_parts = new_prefix.split(b'/')
1109             result_prefix = grafted_path.split(b'/')[:new_prefix.count(b'/')]
1110             result = [(p, None) for p in result_prefix] \
1111                 + clean_path_components[strip_count:]
1112             # Now set the graft point name to match the end of new_prefix.
1113             graft_point = len(result_prefix)
1114             result[graft_point] = \
1115                 (new_prefix_parts[-1], clean_path_components[strip_count][1])
1116             if new_prefix == b'/': # --graft ...=/ is a special case.
1117                 return result[1:]
1118             return result
1119     return path_components(clean_path)
1120
1121
1122 Sha1 = hashlib.sha1
1123
1124
1125 _localtime = getattr(_helpers, 'localtime', None)
1126
1127 if _localtime:
1128     bup_time = namedtuple('bup_time', ['tm_year', 'tm_mon', 'tm_mday',
1129                                        'tm_hour', 'tm_min', 'tm_sec',
1130                                        'tm_wday', 'tm_yday',
1131                                        'tm_isdst', 'tm_gmtoff', 'tm_zone'])
1132
1133 # Define a localtime() that returns bup_time when possible.  Note:
1134 # this means that any helpers.localtime() results may need to be
1135 # passed through to_py_time() before being passed to python's time
1136 # module, which doesn't appear willing to ignore the extra items.
1137 if _localtime:
1138     def localtime(time):
1139         return bup_time(*_helpers.localtime(int(floor(time))))
1140     def utc_offset_str(t):
1141         """Return the local offset from UTC as "+hhmm" or "-hhmm" for time t.
1142         If the current UTC offset does not represent an integer number
1143         of minutes, the fractional component will be truncated."""
1144         off = localtime(t).tm_gmtoff
1145         # Note: // doesn't truncate like C for negative values, it rounds down.
1146         offmin = abs(off) // 60
1147         m = offmin % 60
1148         h = (offmin - m) // 60
1149         return b'%+03d%02d' % (-h if off < 0 else h, m)
1150     def to_py_time(x):
1151         if isinstance(x, time.struct_time):
1152             return x
1153         return time.struct_time(x[:9])
1154 else:
1155     localtime = time.localtime
1156     def utc_offset_str(t):
1157         return time.strftime(b'%z', localtime(t))
1158     def to_py_time(x):
1159         return x
1160
1161
1162 _some_invalid_save_parts_rx = re.compile(br'[\[ ~^:?*\\]|\.\.|//|@{')
1163
1164 def valid_save_name(name):
1165     # Enforce a superset of the restrictions in git-check-ref-format(1)
1166     if name == b'@' \
1167        or name.startswith(b'/') or name.endswith(b'/') \
1168        or name.endswith(b'.'):
1169         return False
1170     if _some_invalid_save_parts_rx.search(name):
1171         return False
1172     for c in name:
1173         if byte_int(c) < 0x20 or byte_int(c) == 0x7f:
1174             return False
1175     for part in name.split(b'/'):
1176         if part.startswith(b'.') or part.endswith(b'.lock'):
1177             return False
1178     return True
1179
1180
1181 _period_rx = re.compile(br'^([0-9]+)(s|min|h|d|w|m|y)$')
1182
1183 def period_as_secs(s):
1184     if s == b'forever':
1185         return float('inf')
1186     match = _period_rx.match(s)
1187     if not match:
1188         return None
1189     mag = int(match.group(1))
1190     scale = match.group(2)
1191     return mag * {b's': 1,
1192                   b'min': 60,
1193                   b'h': 60 * 60,
1194                   b'd': 60 * 60 * 24,
1195                   b'w': 60 * 60 * 24 * 7,
1196                   b'm': 60 * 60 * 24 * 31,
1197                   b'y': 60 * 60 * 24 * 366}[scale]