lib/bup/helpers.py

   1 """Helper functions and classes for bup."""
   2
   3 from __future__ import absolute_import, division
   4 from collections import namedtuple
   5 from contextlib import contextmanager
   6 from ctypes import sizeof, c_void_p
   7 from math import floor
   8 from os import environ
   9 from subprocess import PIPE, Popen
  10 import sys, os, subprocess, errno, select, mmap, stat, re, struct
  11 import hashlib, heapq, math, operator, time, tempfile
  12
  13 from bup import _helpers
  14 from bup import compat
  15 from bup import io
  16 from bup.compat import argv_bytes, byte_int, nullcontext, pending_raise
  17 from bup.io import byte_stream, path_msg
  18 # This function should really be in helpers, not in bup.options.  But we
  19 # want options.py to be standalone so people can include it in other projects.
  20 from bup.options import _tty_width as tty_width
  21
  22
  23 buglvl = int(os.environ.get('BUP_DEBUG', 0))
  24
  25
  26 class Nonlocal:
  27     """Helper to deal with Python scoping issues"""
  28     pass
  29
  30
  31 def nullcontext_if_not(manager):
  32     return manager if manager is not None else nullcontext()
  33
  34
  35 @contextmanager
  36 def finalized(enter_result=None, finalize=None):
  37     assert finalize
  38     try:
  39         yield enter_result
  40     except BaseException as ex:
  41         with pending_raise(ex):
  42             finalize(enter_result)
  43     finalize(enter_result)
  44
  45
  46 sc_page_size = os.sysconf('SC_PAGE_SIZE')
  47 assert(sc_page_size > 0)
  48
  49 sc_arg_max = os.sysconf('SC_ARG_MAX')
  50 if sc_arg_max == -1:  # "no definite limit" - let's choose 2M
  51     sc_arg_max = 2 * 1024 * 1024
  52
  53 def last(iterable):
  54     result = None
  55     for result in iterable:
  56         pass
  57     return result
  58
  59 try:
  60     _fdatasync = os.fdatasync
  61 except AttributeError:
  62     _fdatasync = os.fsync
  63
  64 if sys.platform.startswith('darwin'):
  65     # Apparently os.fsync on OS X doesn't guarantee to sync all the way down
  66     import fcntl
  67     def fdatasync(fd):
  68         try:
  69             return fcntl.fcntl(fd, fcntl.F_FULLFSYNC)
  70         except IOError as e:
  71             # Fallback for file systems (SMB) that do not support F_FULLFSYNC
  72             if e.errno == errno.ENOTSUP:
  73                 return _fdatasync(fd)
  74             else:
  75                 raise
  76 else:
  77     fdatasync = _fdatasync
  78
  79
  80 def partition(predicate, stream):
  81     """Returns (leading_matches_it, rest_it), where leading_matches_it
  82     must be completely exhausted before traversing rest_it.
  83
  84     """
  85     stream = iter(stream)
  86     ns = Nonlocal()
  87     ns.first_nonmatch = None
  88     def leading_matches():
  89         for x in stream:
  90             if predicate(x):
  91                 yield x
  92             else:
  93                 ns.first_nonmatch = (x,)
  94                 break
  95     def rest():
  96         if ns.first_nonmatch:
  97             yield ns.first_nonmatch[0]
  98             for x in stream:
  99                 yield x
 100     return (leading_matches(), rest())
 101
 102
 103 def merge_dict(*xs):
 104     result = {}
 105     for x in xs:
 106         result.update(x)
 107     return result
 108
 109
 110 def lines_until_sentinel(f, sentinel, ex_type):
 111     # sentinel must end with \n and must contain only one \n
 112     while True:
 113         line = f.readline()
 114         if not (line and line.endswith(b'\n')):
 115             raise ex_type('Hit EOF while reading line')
 116         if line == sentinel:
 117             return
 118         yield line
 119
 120
 121 def stat_if_exists(path):
 122     try:
 123         return os.stat(path)
 124     except OSError as e:
 125         if e.errno != errno.ENOENT:
 126             raise
 127     return None
 128
 129
 130 # Write (blockingly) to sockets that may or may not be in blocking mode.
 131 # We need this because our stderr is sometimes eaten by subprocesses
 132 # (probably ssh) that sometimes make it nonblocking, if only temporarily,
 133 # leading to race conditions.  Ick.  We'll do it the hard way.
 134 def _hard_write(fd, buf):
 135     while buf:
 136         (r,w,x) = select.select([], [fd], [], None)
 137         if not w:
 138             raise IOError('select(fd) returned without being writable')
 139         try:
 140             sz = os.write(fd, buf)
 141         except OSError as e:
 142             if e.errno != errno.EAGAIN:
 143                 raise
 144         assert(sz >= 0)
 145         buf = buf[sz:]
 146
 147
 148 _last_prog = 0
 149 def log(s):
 150     """Print a log message to stderr."""
 151     global _last_prog
 152     sys.stdout.flush()
 153     _hard_write(sys.stderr.fileno(), s if isinstance(s, bytes) else s.encode())
 154     _last_prog = 0
 155
 156
 157 def debug1(s):
 158     if buglvl >= 1:
 159         log(s)
 160
 161
 162 def debug2(s):
 163     if buglvl >= 2:
 164         log(s)
 165
 166
 167 istty1 = os.isatty(1) or (int(os.environ.get('BUP_FORCE_TTY', 0)) & 1)
 168 istty2 = os.isatty(2) or (int(os.environ.get('BUP_FORCE_TTY', 0)) & 2)
 169 _last_progress = ''
 170 def progress(s):
 171     """Calls log() if stderr is a TTY.  Does nothing otherwise."""
 172     global _last_progress
 173     if istty2:
 174         log(s)
 175         _last_progress = s
 176
 177
 178 def qprogress(s):
 179     """Calls progress() only if we haven't printed progress in a while.
 180
 181     This avoids overloading the stderr buffer with excess junk.
 182     """
 183     global _last_prog
 184     now = time.time()
 185     if now - _last_prog > 0.1:
 186         progress(s)
 187         _last_prog = now
 188
 189
 190 def reprogress():
 191     """Calls progress() to redisplay the most recent progress message.
 192
 193     Useful after you've printed some other message that wipes out the
 194     progress line.
 195     """
 196     if _last_progress and _last_progress.endswith('\r'):
 197         progress(_last_progress)
 198
 199
 200 def mkdirp(d, mode=None):
 201     """Recursively create directories on path 'd'.
 202
 203     Unlike os.makedirs(), it doesn't raise an exception if the last element of
 204     the path already exists.
 205     """
 206     try:
 207         if mode:
 208             os.makedirs(d, mode)
 209         else:
 210             os.makedirs(d)
 211     except OSError as e:
 212         if e.errno == errno.EEXIST:
 213             pass
 214         else:
 215             raise
 216
 217
 218 class MergeIterItem:
 219     def __init__(self, entry, read_it):
 220         self.entry = entry
 221         self.read_it = read_it
 222     def __lt__(self, x):
 223         return self.entry < x.entry
 224
 225 def merge_iter(iters, pfreq, pfunc, pfinal, key=None):
 226     if key:
 227         samekey = lambda e, pe: getattr(e, key) == getattr(pe, key, None)
 228     else:
 229         samekey = operator.eq
 230     count = 0
 231     total = sum(len(it) for it in iters)
 232     iters = (iter(it) for it in iters)
 233     heap = ((next(it, None),it) for it in iters)
 234     heap = [MergeIterItem(e, it) for e, it in heap if e]
 235
 236     heapq.heapify(heap)
 237     pe = None
 238     while heap:
 239         if not count % pfreq:
 240             pfunc(count, total)
 241         e, it = heap[0].entry, heap[0].read_it
 242         if not samekey(e, pe):
 243             pe = e
 244             yield e
 245         count += 1
 246         try:
 247             e = next(it)
 248         except StopIteration:
 249             heapq.heappop(heap) # remove current
 250         else:
 251             # shift current to new location
 252             heapq.heapreplace(heap, MergeIterItem(e, it))
 253     pfinal(count, total)
 254
 255
 256 def unlink(f):
 257     """Delete a file at path 'f' if it currently exists.
 258
 259     Unlike os.unlink(), does not throw an exception if the file didn't already
 260     exist.
 261     """
 262     try:
 263         os.unlink(f)
 264     except OSError as e:
 265         if e.errno != errno.ENOENT:
 266             raise
 267
 268
 269 _bq_simple_id_rx = re.compile(br'^[-_./a-zA-Z0-9]+$')
 270 _sq_simple_id_rx = re.compile(r'^[-_./a-zA-Z0-9]+$')
 271
 272 def bquote(x):
 273     if x == b'':
 274         return b"''"
 275     if _bq_simple_id_rx.match(x):
 276         return x
 277     return b"'%s'" % x.replace(b"'", b"'\"'\"'")
 278
 279 def squote(x):
 280     if x == '':
 281         return "''"
 282     if _sq_simple_id_rx.match(x):
 283         return x
 284     return "'%s'" % x.replace("'", "'\"'\"'")
 285
 286 def quote(x):
 287     if isinstance(x, bytes):
 288         return bquote(x)
 289     if isinstance(x, compat.str_type):
 290         return squote(x)
 291     assert False
 292     # some versions of pylint get confused
 293     return None
 294
 295 def shstr(cmd):
 296     """Return a shell quoted string for cmd if it's a sequence, else cmd.
 297
 298     cmd must be a string, bytes, or a sequence of one or the other,
 299     and the assumption is that if cmd is a string or bytes, then it's
 300     already quoted (because it's what's actually being passed to
 301     call() and friends.  e.g. log(shstr(cmd)); call(cmd)
 302
 303     """
 304     if isinstance(cmd, (bytes, compat.str_type)):
 305         return cmd
 306     elif all(isinstance(x, bytes) for x in cmd):
 307         return b' '.join(map(bquote, cmd))
 308     elif all(isinstance(x, compat.str_type) for x in cmd):
 309         return ' '.join(map(squote, cmd))
 310     raise TypeError('unsupported shstr argument: ' + repr(cmd))
 311
 312
 313 exc = subprocess.check_call
 314
 315 def exo(cmd,
 316         input=None,
 317         stdin=None,
 318         stderr=None,
 319         shell=False,
 320         check=True,
 321         preexec_fn=None,
 322         close_fds=True):
 323     if input:
 324         assert stdin in (None, PIPE)
 325         stdin = PIPE
 326     p = Popen(cmd,
 327               stdin=stdin, stdout=PIPE, stderr=stderr,
 328               shell=shell,
 329               preexec_fn=preexec_fn,
 330               close_fds=close_fds)
 331     out, err = p.communicate(input)
 332     if check and p.returncode != 0:
 333         raise Exception('subprocess %r failed with status %d%s'
 334                         % (b' '.join(map(quote, cmd)), p.returncode,
 335                            ', stderr: %r' % err if err else ''))
 336     return out, err, p
 337
 338 def readpipe(argv, preexec_fn=None, shell=False):
 339     """Run a subprocess and return its output."""
 340     return exo(argv, preexec_fn=preexec_fn, shell=shell)[0]
 341
 342
 343 def _argmax_base(command):
 344     base_size = 2048
 345     for c in command:
 346         base_size += len(command) + 1
 347     for k, v in compat.items(environ):
 348         base_size += len(k) + len(v) + 2 + sizeof(c_void_p)
 349     return base_size
 350
 351
 352 def _argmax_args_size(args):
 353     return sum(len(x) + 1 + sizeof(c_void_p) for x in args)
 354
 355
 356 def batchpipe(command, args, preexec_fn=None, arg_max=sc_arg_max):
 357     """If args is not empty, yield the output produced by calling the
 358 command list with args as a sequence of strings (It may be necessary
 359 to return multiple strings in order to respect ARG_MAX)."""
 360     # The optional arg_max arg is a workaround for an issue with the
 361     # current wvtest behavior.
 362     base_size = _argmax_base(command)
 363     while args:
 364         room = arg_max - base_size
 365         i = 0
 366         while i < len(args):
 367             next_size = _argmax_args_size(args[i:i+1])
 368             if room - next_size < 0:
 369                 break
 370             room -= next_size
 371             i += 1
 372         sub_args = args[:i]
 373         args = args[i:]
 374         assert(len(sub_args))
 375         yield readpipe(command + sub_args, preexec_fn=preexec_fn)
 376
 377
 378 def resolve_parent(p):
 379     """Return the absolute path of a file without following any final symlink.
 380
 381     Behaves like os.path.realpath, but doesn't follow a symlink for the last
 382     element. (ie. if 'p' itself is a symlink, this one won't follow it, but it
 383     will follow symlinks in p's directory)
 384     """
 385     try:
 386         st = os.lstat(p)
 387     except OSError:
 388         st = None
 389     if st and stat.S_ISLNK(st.st_mode):
 390         (dir, name) = os.path.split(p)
 391         dir = os.path.realpath(dir)
 392         out = os.path.join(dir, name)
 393     else:
 394         out = os.path.realpath(p)
 395     #log('realpathing:%r,%r\n' % (p, out))
 396     return out
 397
 398
 399 def detect_fakeroot():
 400     "Return True if we appear to be running under fakeroot."
 401     return os.getenv("FAKEROOTKEY") != None
 402
 403
 404 if sys.platform.startswith('cygwin'):
 405     def is_superuser():
 406         # https://cygwin.com/ml/cygwin/2015-02/msg00057.html
 407         groups = os.getgroups()
 408         return 544 in groups or 0 in groups
 409 else:
 410     def is_superuser():
 411         return os.geteuid() == 0
 412
 413
 414 def cache_key_value(get_value, key, cache):
 415     """Return (value, was_cached).  If there is a value in the cache
 416     for key, use that, otherwise, call get_value(key) which should
 417     throw a KeyError if there is no value -- in which case the cached
 418     and returned value will be None.
 419     """
 420     try: # Do we already have it (or know there wasn't one)?
 421         value = cache[key]
 422         return value, True
 423     except KeyError:
 424         pass
 425     value = None
 426     try:
 427         cache[key] = value = get_value(key)
 428     except KeyError:
 429         cache[key] = None
 430     return value, False
 431
 432
 433 _hostname = None
 434 def hostname():
 435     """Get the FQDN of this machine."""
 436     global _hostname
 437     if not _hostname:
 438         _hostname = _helpers.gethostname()
 439     return _hostname
 440
 441
 442 def format_filesize(size):
 443     unit = 1024.0
 444     size = float(size)
 445     if size < unit:
 446         return "%d" % (size)
 447     exponent = int(math.log(size) // math.log(unit))
 448     size_prefix = "KMGTPE"[exponent - 1]
 449     return "%.1f%s" % (size / math.pow(unit, exponent), size_prefix)
 450
 451
 452 class NotOk(Exception):
 453     pass
 454
 455
 456 class BaseConn:
 457     def __init__(self, outp):
 458         self._base_closed = False
 459         self.outp = outp
 460
 461     def close(self):
 462         self._base_closed = True
 463
 464     def __enter__(self):
 465         return self
 466
 467     def __exit__(self, exc_type, exc_value, tb):
 468         with pending_raise(exc_value, rethrow=False):
 469             self.close()
 470
 471     def __del__(self):
 472         assert self._base_closed
 473
 474     def _read(self, size):
 475         raise NotImplementedError("Subclasses must implement _read")
 476
 477     def read(self, size):
 478         """Read 'size' bytes from input stream."""
 479         self.outp.flush()
 480         return self._read(size)
 481
 482     def _readline(self, size):
 483         raise NotImplementedError("Subclasses must implement _readline")
 484
 485     def readline(self):
 486         """Read from input stream until a newline is found."""
 487         self.outp.flush()
 488         return self._readline()
 489
 490     def write(self, data):
 491         """Write 'data' to output stream."""
 492         #log('%d writing: %d bytes\n' % (os.getpid(), len(data)))
 493         self.outp.write(data)
 494
 495     def has_input(self):
 496         """Return true if input stream is readable."""
 497         raise NotImplementedError("Subclasses must implement has_input")
 498
 499     def ok(self):
 500         """Indicate end of output from last sent command."""
 501         self.write(b'\nok\n')
 502
 503     def error(self, s):
 504         """Indicate server error to the client."""
 505         s = re.sub(br'\s+', b' ', s)
 506         self.write(b'\nerror %s\n' % s)
 507
 508     def _check_ok(self, onempty):
 509         self.outp.flush()
 510         rl = b''
 511         for rl in linereader(self):
 512             #log('%d got line: %r\n' % (os.getpid(), rl))
 513             if not rl:  # empty line
 514                 continue
 515             elif rl == b'ok':
 516                 return None
 517             elif rl.startswith(b'error '):
 518                 #log('client: error: %s\n' % rl[6:])
 519                 return NotOk(rl[6:])
 520             else:
 521                 onempty(rl)
 522         raise Exception('server exited unexpectedly; see errors above')
 523
 524     def drain_and_check_ok(self):
 525         """Remove all data for the current command from input stream."""
 526         def onempty(rl):
 527             pass
 528         return self._check_ok(onempty)
 529
 530     def check_ok(self):
 531         """Verify that server action completed successfully."""
 532         def onempty(rl):
 533             raise Exception('expected "ok", got %r' % rl)
 534         return self._check_ok(onempty)
 535
 536
 537 class Conn(BaseConn):
 538     def __init__(self, inp, outp):
 539         BaseConn.__init__(self, outp)
 540         self.inp = inp
 541
 542     def _read(self, size):
 543         return self.inp.read(size)
 544
 545     def _readline(self):
 546         return self.inp.readline()
 547
 548     def has_input(self):
 549         [rl, wl, xl] = select.select([self.inp.fileno()], [], [], 0)
 550         if rl:
 551             assert(rl[0] == self.inp.fileno())
 552             return True
 553         else:
 554             return None
 555
 556
 557 def checked_reader(fd, n):
 558     while n > 0:
 559         rl, _, _ = select.select([fd], [], [])
 560         assert(rl[0] == fd)
 561         buf = os.read(fd, n)
 562         if not buf: raise Exception("Unexpected EOF reading %d more bytes" % n)
 563         yield buf
 564         n -= len(buf)
 565
 566
 567 MAX_PACKET = 128 * 1024
 568 def mux(p, outfd, outr, errr):
 569     try:
 570         fds = [outr, errr]
 571         while p.poll() is None:
 572             rl, _, _ = select.select(fds, [], [])
 573             for fd in rl:
 574                 if fd == outr:
 575                     buf = os.read(outr, MAX_PACKET)
 576                     if not buf: break
 577                     os.write(outfd, struct.pack('!IB', len(buf), 1) + buf)
 578                 elif fd == errr:
 579                     buf = os.read(errr, 1024)
 580                     if not buf: break
 581                     os.write(outfd, struct.pack('!IB', len(buf), 2) + buf)
 582     finally:
 583         os.write(outfd, struct.pack('!IB', 0, 3))
 584
 585
 586 class DemuxConn(BaseConn):
 587     """A helper class for bup's client-server protocol."""
 588     def __init__(self, infd, outp):
 589         BaseConn.__init__(self, outp)
 590         # Anything that comes through before the sync string was not
 591         # multiplexed and can be assumed to be debug/log before mux init.
 592         tail = b''
 593         stderr = byte_stream(sys.stderr)
 594         while tail != b'BUPMUX':
 595             # Make sure to write all pre-BUPMUX output to stderr
 596             b = os.read(infd, (len(tail) < 6) and (6-len(tail)) or 1)
 597             if not b:
 598                 ex = IOError('demux: unexpected EOF during initialization')
 599                 with pending_raise(ex):
 600                     stderr.write(tail)
 601                     stderr.flush()
 602             tail += b
 603             stderr.write(tail[:-6])
 604             tail = tail[-6:]
 605         stderr.flush()
 606         self.infd = infd
 607         self.reader = None
 608         self.buf = None
 609         self.closed = False
 610
 611     def write(self, data):
 612         self._load_buf(0)
 613         BaseConn.write(self, data)
 614
 615     def _next_packet(self, timeout):
 616         if self.closed: return False
 617         rl, wl, xl = select.select([self.infd], [], [], timeout)
 618         if not rl: return False
 619         assert(rl[0] == self.infd)
 620         ns = b''.join(checked_reader(self.infd, 5))
 621         n, fdw = struct.unpack('!IB', ns)
 622         if n > MAX_PACKET:
 623             # assume that something went wrong and print stuff
 624             ns += os.read(self.infd, 1024)
 625             stderr = byte_stream(sys.stderr)
 626             stderr.write(ns)
 627             stderr.flush()
 628             raise Exception("Connection broken")
 629         if fdw == 1:
 630             self.reader = checked_reader(self.infd, n)
 631         elif fdw == 2:
 632             for buf in checked_reader(self.infd, n):
 633                 byte_stream(sys.stderr).write(buf)
 634         elif fdw == 3:
 635             self.closed = True
 636             debug2("DemuxConn: marked closed\n")
 637         return True
 638
 639     def _load_buf(self, timeout):
 640         if self.buf is not None:
 641             return True
 642         while not self.closed:
 643             while not self.reader:
 644                 if not self._next_packet(timeout):
 645                     return False
 646             try:
 647                 self.buf = next(self.reader)
 648                 return True
 649             except StopIteration:
 650                 self.reader = None
 651         return False
 652
 653     def _read_parts(self, ix_fn):
 654         while self._load_buf(None):
 655             assert(self.buf is not None)
 656             i = ix_fn(self.buf)
 657             if i is None or i == len(self.buf):
 658                 yv = self.buf
 659                 self.buf = None
 660             else:
 661                 yv = self.buf[:i]
 662                 self.buf = self.buf[i:]
 663             yield yv
 664             if i is not None:
 665                 break
 666
 667     def _readline(self):
 668         def find_eol(buf):
 669             try:
 670                 return buf.index(b'\n')+1
 671             except ValueError:
 672                 return None
 673         return b''.join(self._read_parts(find_eol))
 674
 675     def _read(self, size):
 676         csize = [size]
 677         def until_size(buf): # Closes on csize
 678             if len(buf) < csize[0]:
 679                 csize[0] -= len(buf)
 680                 return None
 681             else:
 682                 return csize[0]
 683         return b''.join(self._read_parts(until_size))
 684
 685     def has_input(self):
 686         return self._load_buf(0)
 687
 688
 689 def linereader(f):
 690     """Generate a list of input lines from 'f' without terminating newlines."""
 691     while 1:
 692         line = f.readline()
 693         if not line:
 694             break
 695         yield line[:-1]
 696
 697
 698 def chunkyreader(f, count = None):
 699     """Generate a list of chunks of data read from 'f'.
 700
 701     If count is None, read until EOF is reached.
 702
 703     If count is a positive integer, read 'count' bytes from 'f'. If EOF is
 704     reached while reading, raise IOError.
 705     """
 706     if count != None:
 707         while count > 0:
 708             b = f.read(min(count, 65536))
 709             if not b:
 710                 raise IOError('EOF with %d bytes remaining' % count)
 711             yield b
 712             count -= len(b)
 713     else:
 714         while 1:
 715             b = f.read(65536)
 716             if not b: break
 717             yield b
 718
 719
 720 @contextmanager
 721 def atomically_replaced_file(name, mode='w', buffering=-1):
 722     """Yield a file that will be atomically renamed name when leaving the block.
 723
 724     This contextmanager yields an open file object that is backed by a
 725     temporary file which will be renamed (atomically) to the target
 726     name if everything succeeds.
 727
 728     The mode and buffering arguments are handled exactly as with open,
 729     and the yielded file will have very restrictive permissions, as
 730     per mkstemp.
 731
 732     E.g.::
 733
 734         with atomically_replaced_file('foo.txt', 'w') as f:
 735             f.write('hello jack.')
 736
 737     """
 738
 739     (ffd, tempname) = tempfile.mkstemp(dir=os.path.dirname(name),
 740                                        text=('b' not in mode))
 741     try:
 742         try:
 743             f = os.fdopen(ffd, mode, buffering)
 744         except:
 745             os.close(ffd)
 746             raise
 747         try:
 748             yield f
 749         finally:
 750             f.close()
 751         os.rename(tempname, name)
 752     finally:
 753         unlink(tempname)  # nonexistant file is ignored
 754
 755
 756 def slashappend(s):
 757     """Append "/" to 's' if it doesn't aleady end in "/"."""
 758     assert isinstance(s, bytes)
 759     if s and not s.endswith(b'/'):
 760         return s + b'/'
 761     else:
 762         return s
 763
 764
 765 def _mmap_do(f, sz, flags, prot, close):
 766     if not sz:
 767         st = os.fstat(f.fileno())
 768         sz = st.st_size
 769     if not sz:
 770         # trying to open a zero-length map gives an error, but an empty
 771         # string has all the same behaviour of a zero-length map, ie. it has
 772         # no elements :)
 773         return ''
 774     map = io.mmap(f.fileno(), sz, flags, prot)
 775     if close:
 776         f.close()  # map will persist beyond file close
 777     return map
 778
 779
 780 def mmap_read(f, sz = 0, close=True):
 781     """Create a read-only memory mapped region on file 'f'.
 782     If sz is 0, the region will cover the entire file.
 783     """
 784     return _mmap_do(f, sz, mmap.MAP_PRIVATE, mmap.PROT_READ, close)
 785
 786
 787 def mmap_readwrite(f, sz = 0, close=True):
 788     """Create a read-write memory mapped region on file 'f'.
 789     If sz is 0, the region will cover the entire file.
 790     """
 791     return _mmap_do(f, sz, mmap.MAP_SHARED, mmap.PROT_READ|mmap.PROT_WRITE,
 792                     close)
 793
 794
 795 def mmap_readwrite_private(f, sz = 0, close=True):
 796     """Create a read-write memory mapped region on file 'f'.
 797     If sz is 0, the region will cover the entire file.
 798     The map is private, which means the changes are never flushed back to the
 799     file.
 800     """
 801     return _mmap_do(f, sz, mmap.MAP_PRIVATE, mmap.PROT_READ|mmap.PROT_WRITE,
 802                     close)
 803
 804
 805 _mincore = getattr(_helpers, 'mincore', None)
 806 if _mincore:
 807     # ./configure ensures that we're on Linux if MINCORE_INCORE isn't defined.
 808     MINCORE_INCORE = getattr(_helpers, 'MINCORE_INCORE', 1)
 809
 810     _fmincore_chunk_size = None
 811     def _set_fmincore_chunk_size():
 812         global _fmincore_chunk_size
 813         pref_chunk_size = 64 * 1024 * 1024
 814         chunk_size = sc_page_size
 815         if (sc_page_size < pref_chunk_size):
 816             chunk_size = sc_page_size * (pref_chunk_size // sc_page_size)
 817         _fmincore_chunk_size = chunk_size
 818
 819     def fmincore(fd):
 820         """Return the mincore() data for fd as a bytearray whose values can be
 821         tested via MINCORE_INCORE, or None if fd does not fully
 822         support the operation."""
 823         st = os.fstat(fd)
 824         if (st.st_size == 0):
 825             return bytearray(0)
 826         if not _fmincore_chunk_size:
 827             _set_fmincore_chunk_size()
 828         pages_per_chunk = _fmincore_chunk_size // sc_page_size;
 829         page_count = (st.st_size + sc_page_size - 1) // sc_page_size;
 830         chunk_count = (st.st_size + _fmincore_chunk_size - 1) // _fmincore_chunk_size
 831         result = bytearray(page_count)
 832         for ci in compat.range(chunk_count):
 833             pos = _fmincore_chunk_size * ci;
 834             msize = min(_fmincore_chunk_size, st.st_size - pos)
 835             try:
 836                 m = io.mmap(fd, msize, mmap.MAP_PRIVATE, 0, 0, pos)
 837             except mmap.error as ex:
 838                 if ex.errno == errno.EINVAL or ex.errno == errno.ENODEV:
 839                     # Perhaps the file was a pipe, i.e. "... | bup split ..."
 840                     return None
 841                 raise ex
 842             with m:
 843                 try:
 844                     _mincore(m, msize, 0, result, ci * pages_per_chunk)
 845                 except OSError as ex:
 846                     if ex.errno == errno.ENOSYS:
 847                         return None
 848                     raise
 849         return result
 850
 851
 852 def parse_timestamp(epoch_str):
 853     """Return the number of nanoseconds since the epoch that are described
 854 by epoch_str (100ms, 100ns, ...); when epoch_str cannot be parsed,
 855 throw a ValueError that may contain additional information."""
 856     ns_per = {'s' :  1000000000,
 857               'ms' : 1000000,
 858               'us' : 1000,
 859               'ns' : 1}
 860     match = re.match(r'^((?:[-+]?[0-9]+)?)(s|ms|us|ns)$', epoch_str)
 861     if not match:
 862         if re.match(r'^([-+]?[0-9]+)$', epoch_str):
 863             raise ValueError('must include units, i.e. 100ns, 100ms, ...')
 864         raise ValueError()
 865     (n, units) = match.group(1, 2)
 866     if not n:
 867         n = 1
 868     n = int(n)
 869     return n * ns_per[units]
 870
 871
 872 def parse_num(s):
 873     """Parse string or bytes as a possibly unit suffixed number.
 874
 875     For example:
 876         199.2k means 203981 bytes
 877         1GB means 1073741824 bytes
 878         2.1 tb means 2199023255552 bytes
 879     """
 880     if isinstance(s, bytes):
 881         # FIXME: should this raise a ValueError for UnicodeDecodeError
 882         # (perhaps with the latter as the context).
 883         s = s.decode('ascii')
 884     g = re.match(r'([-+\d.e]+)\s*(\w*)', str(s))
 885     if not g:
 886         raise ValueError("can't parse %r as a number" % s)
 887     (val, unit) = g.groups()
 888     num = float(val)
 889     unit = unit.lower()
 890     if unit in ['t', 'tb']:
 891         mult = 1024*1024*1024*1024
 892     elif unit in ['g', 'gb']:
 893         mult = 1024*1024*1024
 894     elif unit in ['m', 'mb']:
 895         mult = 1024*1024
 896     elif unit in ['k', 'kb']:
 897         mult = 1024
 898     elif unit in ['', 'b']:
 899         mult = 1
 900     else:
 901         raise ValueError("invalid unit %r in number %r" % (unit, s))
 902     return int(num*mult)
 903
 904
 905 saved_errors = []
 906 def add_error(e):
 907     """Append an error message to the list of saved errors.
 908
 909     Once processing is able to stop and output the errors, the saved errors are
 910     accessible in the module variable helpers.saved_errors.
 911     """
 912     saved_errors.append(e)
 913     log('%-70s\n' % e)
 914
 915
 916 def clear_errors():
 917     global saved_errors
 918     saved_errors = []
 919
 920
 921 def die_if_errors(msg=None, status=1):
 922     global saved_errors
 923     if saved_errors:
 924         if not msg:
 925             msg = 'warning: %d errors encountered\n' % len(saved_errors)
 926         log(msg)
 927         sys.exit(status)
 928
 929
 930 def handle_ctrl_c():
 931     """Replace the default exception handler for KeyboardInterrupt (Ctrl-C).
 932
 933     The new exception handler will make sure that bup will exit without an ugly
 934     stacktrace when Ctrl-C is hit.
 935     """
 936     oldhook = sys.excepthook
 937     def newhook(exctype, value, traceback):
 938         if exctype == KeyboardInterrupt:
 939             log('\nInterrupted.\n')
 940         else:
 941             oldhook(exctype, value, traceback)
 942     sys.excepthook = newhook
 943
 944
 945 def columnate(l, prefix):
 946     """Format elements of 'l' in columns with 'prefix' leading each line.
 947
 948     The number of columns is determined automatically based on the string
 949     lengths.
 950     """
 951     binary = isinstance(prefix, bytes)
 952     nothing = b'' if binary else ''
 953     nl = b'\n' if binary else '\n'
 954     if not l:
 955         return nothing
 956     l = l[:]
 957     clen = max(len(s) for s in l)
 958     ncols = (tty_width() - len(prefix)) // (clen + 2)
 959     if ncols <= 1:
 960         ncols = 1
 961         clen = 0
 962     cols = []
 963     while len(l) % ncols:
 964         l.append(nothing)
 965     rows = len(l) // ncols
 966     for s in compat.range(0, len(l), rows):
 967         cols.append(l[s:s+rows])
 968     out = nothing
 969     fmt = b'%-*s' if binary else '%-*s'
 970     for row in zip(*cols):
 971         out += prefix + nothing.join((fmt % (clen+2, s)) for s in row) + nl
 972     return out
 973
 974
 975 def parse_date_or_fatal(str, fatal):
 976     """Parses the given date or calls Option.fatal().
 977     For now we expect a string that contains a float."""
 978     try:
 979         date = float(str)
 980     except ValueError as e:
 981         raise fatal('invalid date format (should be a float): %r' % e)
 982     else:
 983         return date
 984
 985
 986 def parse_excludes(options, fatal):
 987     """Traverse the options and extract all excludes, or call Option.fatal()."""
 988     excluded_paths = []
 989
 990     for flag in options:
 991         (option, parameter) = flag
 992         if option == '--exclude':
 993             excluded_paths.append(resolve_parent(argv_bytes(parameter)))
 994         elif option == '--exclude-from':
 995             try:
 996                 f = open(resolve_parent(argv_bytes(parameter)), 'rb')
 997             except IOError as e:
 998                 raise fatal("couldn't read %r" % parameter)
 999             for exclude_path in f.readlines():
1000                 # FIXME: perhaps this should be rstrip('\n')
1001                 exclude_path = resolve_parent(exclude_path.strip())
1002                 if exclude_path:
1003                     excluded_paths.append(exclude_path)
1004     return sorted(frozenset(excluded_paths))
1005
1006
1007 def parse_rx_excludes(options, fatal):
1008     """Traverse the options and extract all rx excludes, or call
1009     Option.fatal()."""
1010     excluded_patterns = []
1011
1012     for flag in options:
1013         (option, parameter) = flag
1014         if option == '--exclude-rx':
1015             try:
1016                 excluded_patterns.append(re.compile(argv_bytes(parameter)))
1017             except re.error as ex:
1018                 fatal('invalid --exclude-rx pattern (%r): %s' % (parameter, ex))
1019         elif option == '--exclude-rx-from':
1020             try:
1021                 f = open(resolve_parent(parameter), 'rb')
1022             except IOError as e:
1023                 raise fatal("couldn't read %r" % parameter)
1024             for pattern in f.readlines():
1025                 spattern = pattern.rstrip(b'\n')
1026                 if not spattern:
1027                     continue
1028                 try:
1029                     excluded_patterns.append(re.compile(spattern))
1030                 except re.error as ex:
1031                     fatal('invalid --exclude-rx pattern (%r): %s' % (spattern, ex))
1032     return excluded_patterns
1033
1034
1035 def should_rx_exclude_path(path, exclude_rxs):
1036     """Return True if path matches a regular expression in exclude_rxs."""
1037     for rx in exclude_rxs:
1038         if rx.search(path):
1039             debug1('Skipping %r: excluded by rx pattern %r.\n'
1040                    % (path, rx.pattern))
1041             return True
1042     return False
1043
1044
1045 # FIXME: Carefully consider the use of functions (os.path.*, etc.)
1046 # that resolve against the current filesystem in the strip/graft
1047 # functions for example, but elsewhere as well.  I suspect bup's not
1048 # always being careful about that.  For some cases, the contents of
1049 # the current filesystem should be irrelevant, and consulting it might
1050 # produce the wrong result, perhaps via unintended symlink resolution,
1051 # for example.
1052
1053 def path_components(path):
1054     """Break path into a list of pairs of the form (name,
1055     full_path_to_name).  Path must start with '/'.
1056     Example:
1057       '/home/foo' -> [('', '/'), ('home', '/home'), ('foo', '/home/foo')]"""
1058     if not path.startswith(b'/'):
1059         raise Exception('path must start with "/": %s' % path_msg(path))
1060     # Since we assume path startswith('/'), we can skip the first element.
1061     result = [(b'', b'/')]
1062     norm_path = os.path.abspath(path)
1063     if norm_path == b'/':
1064         return result
1065     full_path = b''
1066     for p in norm_path.split(b'/')[1:]:
1067         full_path += b'/' + p
1068         result.append((p, full_path))
1069     return result
1070
1071
1072 def stripped_path_components(path, strip_prefixes):
1073     """Strip any prefix in strip_prefixes from path and return a list
1074     of path components where each component is (name,
1075     none_or_full_fs_path_to_name).  Assume path startswith('/').
1076     See thelpers.py for examples."""
1077     normalized_path = os.path.abspath(path)
1078     sorted_strip_prefixes = sorted(strip_prefixes, key=len, reverse=True)
1079     for bp in sorted_strip_prefixes:
1080         normalized_bp = os.path.abspath(bp)
1081         if normalized_bp == b'/':
1082             continue
1083         if normalized_path.startswith(normalized_bp):
1084             prefix = normalized_path[:len(normalized_bp)]
1085             result = []
1086             for p in normalized_path[len(normalized_bp):].split(b'/'):
1087                 if p: # not root
1088                     prefix += b'/'
1089                 prefix += p
1090                 result.append((p, prefix))
1091             return result
1092     # Nothing to strip.
1093     return path_components(path)
1094
1095
1096 def grafted_path_components(graft_points, path):
1097     # Create a result that consists of some number of faked graft
1098     # directories before the graft point, followed by all of the real
1099     # directories from path that are after the graft point.  Arrange
1100     # for the directory at the graft point in the result to correspond
1101     # to the "orig" directory in --graft orig=new.  See t/thelpers.py
1102     # for some examples.
1103
1104     # Note that given --graft orig=new, orig and new have *nothing* to
1105     # do with each other, even if some of their component names
1106     # match. i.e. --graft /foo/bar/baz=/foo/bar/bax is semantically
1107     # equivalent to --graft /foo/bar/baz=/x/y/z, or even
1108     # /foo/bar/baz=/x.
1109
1110     # FIXME: This can't be the best solution...
1111     clean_path = os.path.abspath(path)
1112     for graft_point in graft_points:
1113         old_prefix, new_prefix = graft_point
1114         # Expand prefixes iff not absolute paths.
1115         old_prefix = os.path.normpath(old_prefix)
1116         new_prefix = os.path.normpath(new_prefix)
1117         if clean_path.startswith(old_prefix):
1118             escaped_prefix = re.escape(old_prefix)
1119             grafted_path = re.sub(br'^' + escaped_prefix, new_prefix, clean_path)
1120             # Handle /foo=/ (at least) -- which produces //whatever.
1121             grafted_path = b'/' + grafted_path.lstrip(b'/')
1122             clean_path_components = path_components(clean_path)
1123             # Count the components that were stripped.
1124             strip_count = 0 if old_prefix == b'/' else old_prefix.count(b'/')
1125             new_prefix_parts = new_prefix.split(b'/')
1126             result_prefix = grafted_path.split(b'/')[:new_prefix.count(b'/')]
1127             result = [(p, None) for p in result_prefix] \
1128                 + clean_path_components[strip_count:]
1129             # Now set the graft point name to match the end of new_prefix.
1130             graft_point = len(result_prefix)
1131             result[graft_point] = \
1132                 (new_prefix_parts[-1], clean_path_components[strip_count][1])
1133             if new_prefix == b'/': # --graft ...=/ is a special case.
1134                 return result[1:]
1135             return result
1136     return path_components(clean_path)
1137
1138
1139 Sha1 = hashlib.sha1
1140
1141
1142 _localtime = getattr(_helpers, 'localtime', None)
1143
1144 if _localtime:
1145     bup_time = namedtuple('bup_time', ['tm_year', 'tm_mon', 'tm_mday',
1146                                        'tm_hour', 'tm_min', 'tm_sec',
1147                                        'tm_wday', 'tm_yday',
1148                                        'tm_isdst', 'tm_gmtoff', 'tm_zone'])
1149
1150 # Define a localtime() that returns bup_time when possible.  Note:
1151 # this means that any helpers.localtime() results may need to be
1152 # passed through to_py_time() before being passed to python's time
1153 # module, which doesn't appear willing to ignore the extra items.
1154 if _localtime:
1155     def localtime(time):
1156         return bup_time(*_helpers.localtime(int(floor(time))))
1157     def utc_offset_str(t):
1158         """Return the local offset from UTC as "+hhmm" or "-hhmm" for time t.
1159         If the current UTC offset does not represent an integer number
1160         of minutes, the fractional component will be truncated."""
1161         off = localtime(t).tm_gmtoff
1162         # Note: // doesn't truncate like C for negative values, it rounds down.
1163         offmin = abs(off) // 60
1164         m = offmin % 60
1165         h = (offmin - m) // 60
1166         return b'%+03d%02d' % (-h if off < 0 else h, m)
1167     def to_py_time(x):
1168         if isinstance(x, time.struct_time):
1169             return x
1170         return time.struct_time(x[:9])
1171 else:
1172     localtime = time.localtime
1173     def utc_offset_str(t):
1174         return time.strftime(b'%z', localtime(t))
1175     def to_py_time(x):
1176         return x
1177
1178
1179 _some_invalid_save_parts_rx = re.compile(br'[\[ ~^:?*\\]|\.\.|//|@{')
1180
1181 def valid_save_name(name):
1182     # Enforce a superset of the restrictions in git-check-ref-format(1)
1183     if name == b'@' \
1184        or name.startswith(b'/') or name.endswith(b'/') \
1185        or name.endswith(b'.'):
1186         return False
1187     if _some_invalid_save_parts_rx.search(name):
1188         return False
1189     for c in name:
1190         if byte_int(c) < 0x20 or byte_int(c) == 0x7f:
1191             return False
1192     for part in name.split(b'/'):
1193         if part.startswith(b'.') or part.endswith(b'.lock'):
1194             return False
1195     return True
1196
1197
1198 _period_rx = re.compile(br'^([0-9]+)(s|min|h|d|w|m|y)$')
1199
1200 def period_as_secs(s):
1201     if s == b'forever':
1202         return float('inf')
1203     match = _period_rx.match(s)
1204     if not match:
1205         return None
1206     mag = int(match.group(1))
1207     scale = match.group(2)
1208     return mag * {b's': 1,
1209                   b'min': 60,
1210                   b'h': 60 * 60,
1211                   b'd': 60 * 60 * 24,
1212                   b'w': 60 * 60 * 24 * 7,
1213                   b'm': 60 * 60 * 24 * 31,
1214                   b'y': 60 * 60 * 24 * 366}[scale]