lib/bup/helpers.py

   1 """Helper functions and classes for bup."""
   2
   3 from __future__ import absolute_import, division
   4 from collections import namedtuple
   5 from contextlib import contextmanager
   6 from ctypes import sizeof, c_void_p
   7 from math import floor
   8 from os import environ
   9 from subprocess import PIPE, Popen
  10 import sys, os, subprocess, errno, select, mmap, stat, re, struct
  11 import hashlib, heapq, math, operator, time, tempfile
  12
  13 from bup import _helpers
  14 from bup import compat
  15 from bup.compat import argv_bytes, byte_int, nullcontext, pending_raise
  16 from bup.io import byte_stream, path_msg
  17 # This function should really be in helpers, not in bup.options.  But we
  18 # want options.py to be standalone so people can include it in other projects.
  19 from bup.options import _tty_width as tty_width
  20
  21
  22 buglvl = int(os.environ.get('BUP_DEBUG', 0))
  23
  24
  25 class Nonlocal:
  26     """Helper to deal with Python scoping issues"""
  27     pass
  28
  29
  30 def nullcontext_if_not(manager):
  31     return manager if manager is not None else nullcontext()
  32
  33
  34 @contextmanager
  35 def finalized(enter_result=None, finalize=None):
  36     assert finalize
  37     try:
  38         yield enter_result
  39     except BaseException as ex:
  40         with pending_raise(ex):
  41             finalize(enter_result)
  42     finalize(enter_result)
  43
  44
  45 sc_page_size = os.sysconf('SC_PAGE_SIZE')
  46 assert(sc_page_size > 0)
  47
  48 sc_arg_max = os.sysconf('SC_ARG_MAX')
  49 if sc_arg_max == -1:  # "no definite limit" - let's choose 2M
  50     sc_arg_max = 2 * 1024 * 1024
  51
  52 def last(iterable):
  53     result = None
  54     for result in iterable:
  55         pass
  56     return result
  57
  58 try:
  59     _fdatasync = os.fdatasync
  60 except AttributeError:
  61     _fdatasync = os.fsync
  62
  63 if sys.platform.startswith('darwin'):
  64     # Apparently os.fsync on OS X doesn't guarantee to sync all the way down
  65     import fcntl
  66     def fdatasync(fd):
  67         try:
  68             return fcntl.fcntl(fd, fcntl.F_FULLFSYNC)
  69         except IOError as e:
  70             # Fallback for file systems (SMB) that do not support F_FULLFSYNC
  71             if e.errno == errno.ENOTSUP:
  72                 return _fdatasync(fd)
  73             else:
  74                 raise
  75 else:
  76     fdatasync = _fdatasync
  77
  78
  79 def partition(predicate, stream):
  80     """Returns (leading_matches_it, rest_it), where leading_matches_it
  81     must be completely exhausted before traversing rest_it.
  82
  83     """
  84     stream = iter(stream)
  85     ns = Nonlocal()
  86     ns.first_nonmatch = None
  87     def leading_matches():
  88         for x in stream:
  89             if predicate(x):
  90                 yield x
  91             else:
  92                 ns.first_nonmatch = (x,)
  93                 break
  94     def rest():
  95         if ns.first_nonmatch:
  96             yield ns.first_nonmatch[0]
  97             for x in stream:
  98                 yield x
  99     return (leading_matches(), rest())
 100
 101
 102 def merge_dict(*xs):
 103     result = {}
 104     for x in xs:
 105         result.update(x)
 106     return result
 107
 108
 109 def lines_until_sentinel(f, sentinel, ex_type):
 110     # sentinel must end with \n and must contain only one \n
 111     while True:
 112         line = f.readline()
 113         if not (line and line.endswith(b'\n')):
 114             raise ex_type('Hit EOF while reading line')
 115         if line == sentinel:
 116             return
 117         yield line
 118
 119
 120 def stat_if_exists(path):
 121     try:
 122         return os.stat(path)
 123     except OSError as e:
 124         if e.errno != errno.ENOENT:
 125             raise
 126     return None
 127
 128
 129 # Write (blockingly) to sockets that may or may not be in blocking mode.
 130 # We need this because our stderr is sometimes eaten by subprocesses
 131 # (probably ssh) that sometimes make it nonblocking, if only temporarily,
 132 # leading to race conditions.  Ick.  We'll do it the hard way.
 133 def _hard_write(fd, buf):
 134     while buf:
 135         (r,w,x) = select.select([], [fd], [], None)
 136         if not w:
 137             raise IOError('select(fd) returned without being writable')
 138         try:
 139             sz = os.write(fd, buf)
 140         except OSError as e:
 141             if e.errno != errno.EAGAIN:
 142                 raise
 143         assert(sz >= 0)
 144         buf = buf[sz:]
 145
 146
 147 _last_prog = 0
 148 def log(s):
 149     """Print a log message to stderr."""
 150     global _last_prog
 151     sys.stdout.flush()
 152     _hard_write(sys.stderr.fileno(), s if isinstance(s, bytes) else s.encode())
 153     _last_prog = 0
 154
 155
 156 def debug1(s):
 157     if buglvl >= 1:
 158         log(s)
 159
 160
 161 def debug2(s):
 162     if buglvl >= 2:
 163         log(s)
 164
 165
 166 istty1 = os.isatty(1) or (int(os.environ.get('BUP_FORCE_TTY', 0)) & 1)
 167 istty2 = os.isatty(2) or (int(os.environ.get('BUP_FORCE_TTY', 0)) & 2)
 168 _last_progress = ''
 169 def progress(s):
 170     """Calls log() if stderr is a TTY.  Does nothing otherwise."""
 171     global _last_progress
 172     if istty2:
 173         log(s)
 174         _last_progress = s
 175
 176
 177 def qprogress(s):
 178     """Calls progress() only if we haven't printed progress in a while.
 179
 180     This avoids overloading the stderr buffer with excess junk.
 181     """
 182     global _last_prog
 183     now = time.time()
 184     if now - _last_prog > 0.1:
 185         progress(s)
 186         _last_prog = now
 187
 188
 189 def reprogress():
 190     """Calls progress() to redisplay the most recent progress message.
 191
 192     Useful after you've printed some other message that wipes out the
 193     progress line.
 194     """
 195     if _last_progress and _last_progress.endswith('\r'):
 196         progress(_last_progress)
 197
 198
 199 def mkdirp(d, mode=None):
 200     """Recursively create directories on path 'd'.
 201
 202     Unlike os.makedirs(), it doesn't raise an exception if the last element of
 203     the path already exists.
 204     """
 205     try:
 206         if mode:
 207             os.makedirs(d, mode)
 208         else:
 209             os.makedirs(d)
 210     except OSError as e:
 211         if e.errno == errno.EEXIST:
 212             pass
 213         else:
 214             raise
 215
 216
 217 class MergeIterItem:
 218     def __init__(self, entry, read_it):
 219         self.entry = entry
 220         self.read_it = read_it
 221     def __lt__(self, x):
 222         return self.entry < x.entry
 223
 224 def merge_iter(iters, pfreq, pfunc, pfinal, key=None):
 225     if key:
 226         samekey = lambda e, pe: getattr(e, key) == getattr(pe, key, None)
 227     else:
 228         samekey = operator.eq
 229     count = 0
 230     total = sum(len(it) for it in iters)
 231     iters = (iter(it) for it in iters)
 232     heap = ((next(it, None),it) for it in iters)
 233     heap = [MergeIterItem(e, it) for e, it in heap if e]
 234
 235     heapq.heapify(heap)
 236     pe = None
 237     while heap:
 238         if not count % pfreq:
 239             pfunc(count, total)
 240         e, it = heap[0].entry, heap[0].read_it
 241         if not samekey(e, pe):
 242             pe = e
 243             yield e
 244         count += 1
 245         try:
 246             e = next(it)
 247         except StopIteration:
 248             heapq.heappop(heap) # remove current
 249         else:
 250             # shift current to new location
 251             heapq.heapreplace(heap, MergeIterItem(e, it))
 252     pfinal(count, total)
 253
 254
 255 def unlink(f):
 256     """Delete a file at path 'f' if it currently exists.
 257
 258     Unlike os.unlink(), does not throw an exception if the file didn't already
 259     exist.
 260     """
 261     try:
 262         os.unlink(f)
 263     except OSError as e:
 264         if e.errno != errno.ENOENT:
 265             raise
 266
 267
 268 _bq_simple_id_rx = re.compile(br'^[-_./a-zA-Z0-9]+$')
 269 _sq_simple_id_rx = re.compile(r'^[-_./a-zA-Z0-9]+$')
 270
 271 def bquote(x):
 272     if x == b'':
 273         return b"''"
 274     if _bq_simple_id_rx.match(x):
 275         return x
 276     return b"'%s'" % x.replace(b"'", b"'\"'\"'")
 277
 278 def squote(x):
 279     if x == '':
 280         return "''"
 281     if _sq_simple_id_rx.match(x):
 282         return x
 283     return "'%s'" % x.replace("'", "'\"'\"'")
 284
 285 def quote(x):
 286     if isinstance(x, bytes):
 287         return bquote(x)
 288     if isinstance(x, compat.str_type):
 289         return squote(x)
 290     assert False
 291     # some versions of pylint get confused
 292     return None
 293
 294 def shstr(cmd):
 295     """Return a shell quoted string for cmd if it's a sequence, else cmd.
 296
 297     cmd must be a string, bytes, or a sequence of one or the other,
 298     and the assumption is that if cmd is a string or bytes, then it's
 299     already quoted (because it's what's actually being passed to
 300     call() and friends.  e.g. log(shstr(cmd)); call(cmd)
 301
 302     """
 303     if isinstance(cmd, (bytes, compat.str_type)):
 304         return cmd
 305     elif all(isinstance(x, bytes) for x in cmd):
 306         return b' '.join(map(bquote, cmd))
 307     elif all(isinstance(x, compat.str_type) for x in cmd):
 308         return ' '.join(map(squote, cmd))
 309     raise TypeError('unsupported shstr argument: ' + repr(cmd))
 310
 311
 312 exc = subprocess.check_call
 313
 314 def exo(cmd,
 315         input=None,
 316         stdin=None,
 317         stderr=None,
 318         shell=False,
 319         check=True,
 320         preexec_fn=None,
 321         close_fds=True):
 322     if input:
 323         assert stdin in (None, PIPE)
 324         stdin = PIPE
 325     p = Popen(cmd,
 326               stdin=stdin, stdout=PIPE, stderr=stderr,
 327               shell=shell,
 328               preexec_fn=preexec_fn,
 329               close_fds=close_fds)
 330     out, err = p.communicate(input)
 331     if check and p.returncode != 0:
 332         raise Exception('subprocess %r failed with status %d%s'
 333                         % (b' '.join(map(quote, cmd)), p.returncode,
 334                            ', stderr: %r' % err if err else ''))
 335     return out, err, p
 336
 337 def readpipe(argv, preexec_fn=None, shell=False):
 338     """Run a subprocess and return its output."""
 339     return exo(argv, preexec_fn=preexec_fn, shell=shell)[0]
 340
 341
 342 def _argmax_base(command):
 343     base_size = 2048
 344     for c in command:
 345         base_size += len(command) + 1
 346     for k, v in compat.items(environ):
 347         base_size += len(k) + len(v) + 2 + sizeof(c_void_p)
 348     return base_size
 349
 350
 351 def _argmax_args_size(args):
 352     return sum(len(x) + 1 + sizeof(c_void_p) for x in args)
 353
 354
 355 def batchpipe(command, args, preexec_fn=None, arg_max=sc_arg_max):
 356     """If args is not empty, yield the output produced by calling the
 357 command list with args as a sequence of strings (It may be necessary
 358 to return multiple strings in order to respect ARG_MAX)."""
 359     # The optional arg_max arg is a workaround for an issue with the
 360     # current wvtest behavior.
 361     base_size = _argmax_base(command)
 362     while args:
 363         room = arg_max - base_size
 364         i = 0
 365         while i < len(args):
 366             next_size = _argmax_args_size(args[i:i+1])
 367             if room - next_size < 0:
 368                 break
 369             room -= next_size
 370             i += 1
 371         sub_args = args[:i]
 372         args = args[i:]
 373         assert(len(sub_args))
 374         yield readpipe(command + sub_args, preexec_fn=preexec_fn)
 375
 376
 377 def resolve_parent(p):
 378     """Return the absolute path of a file without following any final symlink.
 379
 380     Behaves like os.path.realpath, but doesn't follow a symlink for the last
 381     element. (ie. if 'p' itself is a symlink, this one won't follow it, but it
 382     will follow symlinks in p's directory)
 383     """
 384     try:
 385         st = os.lstat(p)
 386     except OSError:
 387         st = None
 388     if st and stat.S_ISLNK(st.st_mode):
 389         (dir, name) = os.path.split(p)
 390         dir = os.path.realpath(dir)
 391         out = os.path.join(dir, name)
 392     else:
 393         out = os.path.realpath(p)
 394     #log('realpathing:%r,%r\n' % (p, out))
 395     return out
 396
 397
 398 def detect_fakeroot():
 399     "Return True if we appear to be running under fakeroot."
 400     return os.getenv("FAKEROOTKEY") != None
 401
 402
 403 if sys.platform.startswith('cygwin'):
 404     def is_superuser():
 405         # https://cygwin.com/ml/cygwin/2015-02/msg00057.html
 406         groups = os.getgroups()
 407         return 544 in groups or 0 in groups
 408 else:
 409     def is_superuser():
 410         return os.geteuid() == 0
 411
 412
 413 def cache_key_value(get_value, key, cache):
 414     """Return (value, was_cached).  If there is a value in the cache
 415     for key, use that, otherwise, call get_value(key) which should
 416     throw a KeyError if there is no value -- in which case the cached
 417     and returned value will be None.
 418     """
 419     try: # Do we already have it (or know there wasn't one)?
 420         value = cache[key]
 421         return value, True
 422     except KeyError:
 423         pass
 424     value = None
 425     try:
 426         cache[key] = value = get_value(key)
 427     except KeyError:
 428         cache[key] = None
 429     return value, False
 430
 431
 432 _hostname = None
 433 def hostname():
 434     """Get the FQDN of this machine."""
 435     global _hostname
 436     if not _hostname:
 437         _hostname = _helpers.gethostname()
 438     return _hostname
 439
 440
 441 def format_filesize(size):
 442     unit = 1024.0
 443     size = float(size)
 444     if size < unit:
 445         return "%d" % (size)
 446     exponent = int(math.log(size) // math.log(unit))
 447     size_prefix = "KMGTPE"[exponent - 1]
 448     return "%.1f%s" % (size / math.pow(unit, exponent), size_prefix)
 449
 450
 451 class NotOk(Exception):
 452     pass
 453
 454
 455 class BaseConn:
 456     def __init__(self, outp):
 457         self._base_closed = False
 458         self.outp = outp
 459
 460     def close(self):
 461         self._base_closed = True
 462
 463     def __enter__(self):
 464         return self
 465
 466     def __exit__(self, exc_type, exc_value, tb):
 467         with pending_raise(exc_value, rethrow=False):
 468             self.close()
 469
 470     def __del__(self):
 471         assert self._base_closed
 472
 473     def _read(self, size):
 474         raise NotImplementedError("Subclasses must implement _read")
 475
 476     def read(self, size):
 477         """Read 'size' bytes from input stream."""
 478         self.outp.flush()
 479         return self._read(size)
 480
 481     def _readline(self, size):
 482         raise NotImplementedError("Subclasses must implement _readline")
 483
 484     def readline(self):
 485         """Read from input stream until a newline is found."""
 486         self.outp.flush()
 487         return self._readline()
 488
 489     def write(self, data):
 490         """Write 'data' to output stream."""
 491         #log('%d writing: %d bytes\n' % (os.getpid(), len(data)))
 492         self.outp.write(data)
 493
 494     def has_input(self):
 495         """Return true if input stream is readable."""
 496         raise NotImplementedError("Subclasses must implement has_input")
 497
 498     def ok(self):
 499         """Indicate end of output from last sent command."""
 500         self.write(b'\nok\n')
 501
 502     def error(self, s):
 503         """Indicate server error to the client."""
 504         s = re.sub(br'\s+', b' ', s)
 505         self.write(b'\nerror %s\n' % s)
 506
 507     def _check_ok(self, onempty):
 508         self.outp.flush()
 509         rl = b''
 510         for rl in linereader(self):
 511             #log('%d got line: %r\n' % (os.getpid(), rl))
 512             if not rl:  # empty line
 513                 continue
 514             elif rl == b'ok':
 515                 return None
 516             elif rl.startswith(b'error '):
 517                 #log('client: error: %s\n' % rl[6:])
 518                 return NotOk(rl[6:])
 519             else:
 520                 onempty(rl)
 521         raise Exception('server exited unexpectedly; see errors above')
 522
 523     def drain_and_check_ok(self):
 524         """Remove all data for the current command from input stream."""
 525         def onempty(rl):
 526             pass
 527         return self._check_ok(onempty)
 528
 529     def check_ok(self):
 530         """Verify that server action completed successfully."""
 531         def onempty(rl):
 532             raise Exception('expected "ok", got %r' % rl)
 533         return self._check_ok(onempty)
 534
 535
 536 class Conn(BaseConn):
 537     def __init__(self, inp, outp):
 538         BaseConn.__init__(self, outp)
 539         self.inp = inp
 540
 541     def _read(self, size):
 542         return self.inp.read(size)
 543
 544     def _readline(self):
 545         return self.inp.readline()
 546
 547     def has_input(self):
 548         [rl, wl, xl] = select.select([self.inp.fileno()], [], [], 0)
 549         if rl:
 550             assert(rl[0] == self.inp.fileno())
 551             return True
 552         else:
 553             return None
 554
 555
 556 def checked_reader(fd, n):
 557     while n > 0:
 558         rl, _, _ = select.select([fd], [], [])
 559         assert(rl[0] == fd)
 560         buf = os.read(fd, n)
 561         if not buf: raise Exception("Unexpected EOF reading %d more bytes" % n)
 562         yield buf
 563         n -= len(buf)
 564
 565
 566 MAX_PACKET = 128 * 1024
 567 def mux(p, outfd, outr, errr):
 568     try:
 569         fds = [outr, errr]
 570         while p.poll() is None:
 571             rl, _, _ = select.select(fds, [], [])
 572             for fd in rl:
 573                 if fd == outr:
 574                     buf = os.read(outr, MAX_PACKET)
 575                     if not buf: break
 576                     os.write(outfd, struct.pack('!IB', len(buf), 1) + buf)
 577                 elif fd == errr:
 578                     buf = os.read(errr, 1024)
 579                     if not buf: break
 580                     os.write(outfd, struct.pack('!IB', len(buf), 2) + buf)
 581     finally:
 582         os.write(outfd, struct.pack('!IB', 0, 3))
 583
 584
 585 class DemuxConn(BaseConn):
 586     """A helper class for bup's client-server protocol."""
 587     def __init__(self, infd, outp):
 588         BaseConn.__init__(self, outp)
 589         # Anything that comes through before the sync string was not
 590         # multiplexed and can be assumed to be debug/log before mux init.
 591         tail = b''
 592         stderr = byte_stream(sys.stderr)
 593         while tail != b'BUPMUX':
 594             # Make sure to write all pre-BUPMUX output to stderr
 595             b = os.read(infd, (len(tail) < 6) and (6-len(tail)) or 1)
 596             if not b:
 597                 ex = IOError('demux: unexpected EOF during initialization')
 598                 with pending_raise(ex):
 599                     stderr.write(tail)
 600                     stderr.flush()
 601             tail += b
 602             stderr.write(tail[:-6])
 603             tail = tail[-6:]
 604         stderr.flush()
 605         self.infd = infd
 606         self.reader = None
 607         self.buf = None
 608         self.closed = False
 609
 610     def write(self, data):
 611         self._load_buf(0)
 612         BaseConn.write(self, data)
 613
 614     def _next_packet(self, timeout):
 615         if self.closed: return False
 616         rl, wl, xl = select.select([self.infd], [], [], timeout)
 617         if not rl: return False
 618         assert(rl[0] == self.infd)
 619         ns = b''.join(checked_reader(self.infd, 5))
 620         n, fdw = struct.unpack('!IB', ns)
 621         if n > MAX_PACKET:
 622             # assume that something went wrong and print stuff
 623             ns += os.read(self.infd, 1024)
 624             stderr = byte_stream(sys.stderr)
 625             stderr.write(ns)
 626             stderr.flush()
 627             raise Exception("Connection broken")
 628         if fdw == 1:
 629             self.reader = checked_reader(self.infd, n)
 630         elif fdw == 2:
 631             for buf in checked_reader(self.infd, n):
 632                 byte_stream(sys.stderr).write(buf)
 633         elif fdw == 3:
 634             self.closed = True
 635             debug2("DemuxConn: marked closed\n")
 636         return True
 637
 638     def _load_buf(self, timeout):
 639         if self.buf is not None:
 640             return True
 641         while not self.closed:
 642             while not self.reader:
 643                 if not self._next_packet(timeout):
 644                     return False
 645             try:
 646                 self.buf = next(self.reader)
 647                 return True
 648             except StopIteration:
 649                 self.reader = None
 650         return False
 651
 652     def _read_parts(self, ix_fn):
 653         while self._load_buf(None):
 654             assert(self.buf is not None)
 655             i = ix_fn(self.buf)
 656             if i is None or i == len(self.buf):
 657                 yv = self.buf
 658                 self.buf = None
 659             else:
 660                 yv = self.buf[:i]
 661                 self.buf = self.buf[i:]
 662             yield yv
 663             if i is not None:
 664                 break
 665
 666     def _readline(self):
 667         def find_eol(buf):
 668             try:
 669                 return buf.index(b'\n')+1
 670             except ValueError:
 671                 return None
 672         return b''.join(self._read_parts(find_eol))
 673
 674     def _read(self, size):
 675         csize = [size]
 676         def until_size(buf): # Closes on csize
 677             if len(buf) < csize[0]:
 678                 csize[0] -= len(buf)
 679                 return None
 680             else:
 681                 return csize[0]
 682         return b''.join(self._read_parts(until_size))
 683
 684     def has_input(self):
 685         return self._load_buf(0)
 686
 687
 688 def linereader(f):
 689     """Generate a list of input lines from 'f' without terminating newlines."""
 690     while 1:
 691         line = f.readline()
 692         if not line:
 693             break
 694         yield line[:-1]
 695
 696
 697 def chunkyreader(f, count = None):
 698     """Generate a list of chunks of data read from 'f'.
 699
 700     If count is None, read until EOF is reached.
 701
 702     If count is a positive integer, read 'count' bytes from 'f'. If EOF is
 703     reached while reading, raise IOError.
 704     """
 705     if count != None:
 706         while count > 0:
 707             b = f.read(min(count, 65536))
 708             if not b:
 709                 raise IOError('EOF with %d bytes remaining' % count)
 710             yield b
 711             count -= len(b)
 712     else:
 713         while 1:
 714             b = f.read(65536)
 715             if not b: break
 716             yield b
 717
 718
 719 @contextmanager
 720 def atomically_replaced_file(name, mode='w', buffering=-1):
 721     """Yield a file that will be atomically renamed name when leaving the block.
 722
 723     This contextmanager yields an open file object that is backed by a
 724     temporary file which will be renamed (atomically) to the target
 725     name if everything succeeds.
 726
 727     The mode and buffering arguments are handled exactly as with open,
 728     and the yielded file will have very restrictive permissions, as
 729     per mkstemp.
 730
 731     E.g.::
 732
 733         with atomically_replaced_file('foo.txt', 'w') as f:
 734             f.write('hello jack.')
 735
 736     """
 737
 738     (ffd, tempname) = tempfile.mkstemp(dir=os.path.dirname(name),
 739                                        text=('b' not in mode))
 740     try:
 741         try:
 742             f = os.fdopen(ffd, mode, buffering)
 743         except:
 744             os.close(ffd)
 745             raise
 746         try:
 747             yield f
 748         finally:
 749             f.close()
 750         os.rename(tempname, name)
 751     finally:
 752         unlink(tempname)  # nonexistant file is ignored
 753
 754
 755 def slashappend(s):
 756     """Append "/" to 's' if it doesn't aleady end in "/"."""
 757     assert isinstance(s, bytes)
 758     if s and not s.endswith(b'/'):
 759         return s + b'/'
 760     else:
 761         return s
 762
 763
 764 def _mmap_do(f, sz, flags, prot, close):
 765     if not sz:
 766         st = os.fstat(f.fileno())
 767         sz = st.st_size
 768     if not sz:
 769         # trying to open a zero-length map gives an error, but an empty
 770         # string has all the same behaviour of a zero-length map, ie. it has
 771         # no elements :)
 772         return ''
 773     map = compat.mmap(f.fileno(), sz, flags, prot)
 774     if close:
 775         f.close()  # map will persist beyond file close
 776     return map
 777
 778
 779 def mmap_read(f, sz = 0, close=True):
 780     """Create a read-only memory mapped region on file 'f'.
 781     If sz is 0, the region will cover the entire file.
 782     """
 783     return _mmap_do(f, sz, mmap.MAP_PRIVATE, mmap.PROT_READ, close)
 784
 785
 786 def mmap_readwrite(f, sz = 0, close=True):
 787     """Create a read-write memory mapped region on file 'f'.
 788     If sz is 0, the region will cover the entire file.
 789     """
 790     return _mmap_do(f, sz, mmap.MAP_SHARED, mmap.PROT_READ|mmap.PROT_WRITE,
 791                     close)
 792
 793
 794 def mmap_readwrite_private(f, sz = 0, close=True):
 795     """Create a read-write memory mapped region on file 'f'.
 796     If sz is 0, the region will cover the entire file.
 797     The map is private, which means the changes are never flushed back to the
 798     file.
 799     """
 800     return _mmap_do(f, sz, mmap.MAP_PRIVATE, mmap.PROT_READ|mmap.PROT_WRITE,
 801                     close)
 802
 803
 804 _mincore = getattr(_helpers, 'mincore', None)
 805 if _mincore:
 806     # ./configure ensures that we're on Linux if MINCORE_INCORE isn't defined.
 807     MINCORE_INCORE = getattr(_helpers, 'MINCORE_INCORE', 1)
 808
 809     _fmincore_chunk_size = None
 810     def _set_fmincore_chunk_size():
 811         global _fmincore_chunk_size
 812         pref_chunk_size = 64 * 1024 * 1024
 813         chunk_size = sc_page_size
 814         if (sc_page_size < pref_chunk_size):
 815             chunk_size = sc_page_size * (pref_chunk_size // sc_page_size)
 816         _fmincore_chunk_size = chunk_size
 817
 818     def fmincore(fd):
 819         """Return the mincore() data for fd as a bytearray whose values can be
 820         tested via MINCORE_INCORE, or None if fd does not fully
 821         support the operation."""
 822         st = os.fstat(fd)
 823         if (st.st_size == 0):
 824             return bytearray(0)
 825         if not _fmincore_chunk_size:
 826             _set_fmincore_chunk_size()
 827         pages_per_chunk = _fmincore_chunk_size // sc_page_size;
 828         page_count = (st.st_size + sc_page_size - 1) // sc_page_size;
 829         chunk_count = (st.st_size + _fmincore_chunk_size - 1) // _fmincore_chunk_size
 830         result = bytearray(page_count)
 831         for ci in compat.range(chunk_count):
 832             pos = _fmincore_chunk_size * ci;
 833             msize = min(_fmincore_chunk_size, st.st_size - pos)
 834             try:
 835                 m = compat.mmap(fd, msize, mmap.MAP_PRIVATE, 0, 0, pos)
 836             except mmap.error as ex:
 837                 if ex.errno == errno.EINVAL or ex.errno == errno.ENODEV:
 838                     # Perhaps the file was a pipe, i.e. "... | bup split ..."
 839                     return None
 840                 raise ex
 841             with m:
 842                 try:
 843                     _mincore(m, msize, 0, result, ci * pages_per_chunk)
 844                 except OSError as ex:
 845                     if ex.errno == errno.ENOSYS:
 846                         return None
 847                     raise
 848         return result
 849
 850
 851 def parse_timestamp(epoch_str):
 852     """Return the number of nanoseconds since the epoch that are described
 853 by epoch_str (100ms, 100ns, ...); when epoch_str cannot be parsed,
 854 throw a ValueError that may contain additional information."""
 855     ns_per = {'s' :  1000000000,
 856               'ms' : 1000000,
 857               'us' : 1000,
 858               'ns' : 1}
 859     match = re.match(r'^((?:[-+]?[0-9]+)?)(s|ms|us|ns)$', epoch_str)
 860     if not match:
 861         if re.match(r'^([-+]?[0-9]+)$', epoch_str):
 862             raise ValueError('must include units, i.e. 100ns, 100ms, ...')
 863         raise ValueError()
 864     (n, units) = match.group(1, 2)
 865     if not n:
 866         n = 1
 867     n = int(n)
 868     return n * ns_per[units]
 869
 870
 871 def parse_num(s):
 872     """Parse string or bytes as a possibly unit suffixed number.
 873
 874     For example:
 875         199.2k means 203981 bytes
 876         1GB means 1073741824 bytes
 877         2.1 tb means 2199023255552 bytes
 878     """
 879     if isinstance(s, bytes):
 880         # FIXME: should this raise a ValueError for UnicodeDecodeError
 881         # (perhaps with the latter as the context).
 882         s = s.decode('ascii')
 883     g = re.match(r'([-+\d.e]+)\s*(\w*)', str(s))
 884     if not g:
 885         raise ValueError("can't parse %r as a number" % s)
 886     (val, unit) = g.groups()
 887     num = float(val)
 888     unit = unit.lower()
 889     if unit in ['t', 'tb']:
 890         mult = 1024*1024*1024*1024
 891     elif unit in ['g', 'gb']:
 892         mult = 1024*1024*1024
 893     elif unit in ['m', 'mb']:
 894         mult = 1024*1024
 895     elif unit in ['k', 'kb']:
 896         mult = 1024
 897     elif unit in ['', 'b']:
 898         mult = 1
 899     else:
 900         raise ValueError("invalid unit %r in number %r" % (unit, s))
 901     return int(num*mult)
 902
 903
 904 saved_errors = []
 905 def add_error(e):
 906     """Append an error message to the list of saved errors.
 907
 908     Once processing is able to stop and output the errors, the saved errors are
 909     accessible in the module variable helpers.saved_errors.
 910     """
 911     saved_errors.append(e)
 912     log('%-70s\n' % e)
 913
 914
 915 def clear_errors():
 916     global saved_errors
 917     saved_errors = []
 918
 919
 920 def die_if_errors(msg=None, status=1):
 921     global saved_errors
 922     if saved_errors:
 923         if not msg:
 924             msg = 'warning: %d errors encountered\n' % len(saved_errors)
 925         log(msg)
 926         sys.exit(status)
 927
 928
 929 def handle_ctrl_c():
 930     """Replace the default exception handler for KeyboardInterrupt (Ctrl-C).
 931
 932     The new exception handler will make sure that bup will exit without an ugly
 933     stacktrace when Ctrl-C is hit.
 934     """
 935     oldhook = sys.excepthook
 936     def newhook(exctype, value, traceback):
 937         if exctype == KeyboardInterrupt:
 938             log('\nInterrupted.\n')
 939         else:
 940             oldhook(exctype, value, traceback)
 941     sys.excepthook = newhook
 942
 943
 944 def columnate(l, prefix):
 945     """Format elements of 'l' in columns with 'prefix' leading each line.
 946
 947     The number of columns is determined automatically based on the string
 948     lengths.
 949     """
 950     binary = isinstance(prefix, bytes)
 951     nothing = b'' if binary else ''
 952     nl = b'\n' if binary else '\n'
 953     if not l:
 954         return nothing
 955     l = l[:]
 956     clen = max(len(s) for s in l)
 957     ncols = (tty_width() - len(prefix)) // (clen + 2)
 958     if ncols <= 1:
 959         ncols = 1
 960         clen = 0
 961     cols = []
 962     while len(l) % ncols:
 963         l.append(nothing)
 964     rows = len(l) // ncols
 965     for s in compat.range(0, len(l), rows):
 966         cols.append(l[s:s+rows])
 967     out = nothing
 968     fmt = b'%-*s' if binary else '%-*s'
 969     for row in zip(*cols):
 970         out += prefix + nothing.join((fmt % (clen+2, s)) for s in row) + nl
 971     return out
 972
 973
 974 def parse_date_or_fatal(str, fatal):
 975     """Parses the given date or calls Option.fatal().
 976     For now we expect a string that contains a float."""
 977     try:
 978         date = float(str)
 979     except ValueError as e:
 980         raise fatal('invalid date format (should be a float): %r' % e)
 981     else:
 982         return date
 983
 984
 985 def parse_excludes(options, fatal):
 986     """Traverse the options and extract all excludes, or call Option.fatal()."""
 987     excluded_paths = []
 988
 989     for flag in options:
 990         (option, parameter) = flag
 991         if option == '--exclude':
 992             excluded_paths.append(resolve_parent(argv_bytes(parameter)))
 993         elif option == '--exclude-from':
 994             try:
 995                 f = open(resolve_parent(argv_bytes(parameter)), 'rb')
 996             except IOError as e:
 997                 raise fatal("couldn't read %r" % parameter)
 998             for exclude_path in f.readlines():
 999                 # FIXME: perhaps this should be rstrip('\n')
1000                 exclude_path = resolve_parent(exclude_path.strip())
1001                 if exclude_path:
1002                     excluded_paths.append(exclude_path)
1003     return sorted(frozenset(excluded_paths))
1004
1005
1006 def parse_rx_excludes(options, fatal):
1007     """Traverse the options and extract all rx excludes, or call
1008     Option.fatal()."""
1009     excluded_patterns = []
1010
1011     for flag in options:
1012         (option, parameter) = flag
1013         if option == '--exclude-rx':
1014             try:
1015                 excluded_patterns.append(re.compile(argv_bytes(parameter)))
1016             except re.error as ex:
1017                 fatal('invalid --exclude-rx pattern (%r): %s' % (parameter, ex))
1018         elif option == '--exclude-rx-from':
1019             try:
1020                 f = open(resolve_parent(parameter), 'rb')
1021             except IOError as e:
1022                 raise fatal("couldn't read %r" % parameter)
1023             for pattern in f.readlines():
1024                 spattern = pattern.rstrip(b'\n')
1025                 if not spattern:
1026                     continue
1027                 try:
1028                     excluded_patterns.append(re.compile(spattern))
1029                 except re.error as ex:
1030                     fatal('invalid --exclude-rx pattern (%r): %s' % (spattern, ex))
1031     return excluded_patterns
1032
1033
1034 def should_rx_exclude_path(path, exclude_rxs):
1035     """Return True if path matches a regular expression in exclude_rxs."""
1036     for rx in exclude_rxs:
1037         if rx.search(path):
1038             debug1('Skipping %r: excluded by rx pattern %r.\n'
1039                    % (path, rx.pattern))
1040             return True
1041     return False
1042
1043
1044 # FIXME: Carefully consider the use of functions (os.path.*, etc.)
1045 # that resolve against the current filesystem in the strip/graft
1046 # functions for example, but elsewhere as well.  I suspect bup's not
1047 # always being careful about that.  For some cases, the contents of
1048 # the current filesystem should be irrelevant, and consulting it might
1049 # produce the wrong result, perhaps via unintended symlink resolution,
1050 # for example.
1051
1052 def path_components(path):
1053     """Break path into a list of pairs of the form (name,
1054     full_path_to_name).  Path must start with '/'.
1055     Example:
1056       '/home/foo' -> [('', '/'), ('home', '/home'), ('foo', '/home/foo')]"""
1057     if not path.startswith(b'/'):
1058         raise Exception('path must start with "/": %s' % path_msg(path))
1059     # Since we assume path startswith('/'), we can skip the first element.
1060     result = [(b'', b'/')]
1061     norm_path = os.path.abspath(path)
1062     if norm_path == b'/':
1063         return result
1064     full_path = b''
1065     for p in norm_path.split(b'/')[1:]:
1066         full_path += b'/' + p
1067         result.append((p, full_path))
1068     return result
1069
1070
1071 def stripped_path_components(path, strip_prefixes):
1072     """Strip any prefix in strip_prefixes from path and return a list
1073     of path components where each component is (name,
1074     none_or_full_fs_path_to_name).  Assume path startswith('/').
1075     See thelpers.py for examples."""
1076     normalized_path = os.path.abspath(path)
1077     sorted_strip_prefixes = sorted(strip_prefixes, key=len, reverse=True)
1078     for bp in sorted_strip_prefixes:
1079         normalized_bp = os.path.abspath(bp)
1080         if normalized_bp == b'/':
1081             continue
1082         if normalized_path.startswith(normalized_bp):
1083             prefix = normalized_path[:len(normalized_bp)]
1084             result = []
1085             for p in normalized_path[len(normalized_bp):].split(b'/'):
1086                 if p: # not root
1087                     prefix += b'/'
1088                 prefix += p
1089                 result.append((p, prefix))
1090             return result
1091     # Nothing to strip.
1092     return path_components(path)
1093
1094
1095 def grafted_path_components(graft_points, path):
1096     # Create a result that consists of some number of faked graft
1097     # directories before the graft point, followed by all of the real
1098     # directories from path that are after the graft point.  Arrange
1099     # for the directory at the graft point in the result to correspond
1100     # to the "orig" directory in --graft orig=new.  See t/thelpers.py
1101     # for some examples.
1102
1103     # Note that given --graft orig=new, orig and new have *nothing* to
1104     # do with each other, even if some of their component names
1105     # match. i.e. --graft /foo/bar/baz=/foo/bar/bax is semantically
1106     # equivalent to --graft /foo/bar/baz=/x/y/z, or even
1107     # /foo/bar/baz=/x.
1108
1109     # FIXME: This can't be the best solution...
1110     clean_path = os.path.abspath(path)
1111     for graft_point in graft_points:
1112         old_prefix, new_prefix = graft_point
1113         # Expand prefixes iff not absolute paths.
1114         old_prefix = os.path.normpath(old_prefix)
1115         new_prefix = os.path.normpath(new_prefix)
1116         if clean_path.startswith(old_prefix):
1117             escaped_prefix = re.escape(old_prefix)
1118             grafted_path = re.sub(br'^' + escaped_prefix, new_prefix, clean_path)
1119             # Handle /foo=/ (at least) -- which produces //whatever.
1120             grafted_path = b'/' + grafted_path.lstrip(b'/')
1121             clean_path_components = path_components(clean_path)
1122             # Count the components that were stripped.
1123             strip_count = 0 if old_prefix == b'/' else old_prefix.count(b'/')
1124             new_prefix_parts = new_prefix.split(b'/')
1125             result_prefix = grafted_path.split(b'/')[:new_prefix.count(b'/')]
1126             result = [(p, None) for p in result_prefix] \
1127                 + clean_path_components[strip_count:]
1128             # Now set the graft point name to match the end of new_prefix.
1129             graft_point = len(result_prefix)
1130             result[graft_point] = \
1131                 (new_prefix_parts[-1], clean_path_components[strip_count][1])
1132             if new_prefix == b'/': # --graft ...=/ is a special case.
1133                 return result[1:]
1134             return result
1135     return path_components(clean_path)
1136
1137
1138 Sha1 = hashlib.sha1
1139
1140
1141 _localtime = getattr(_helpers, 'localtime', None)
1142
1143 if _localtime:
1144     bup_time = namedtuple('bup_time', ['tm_year', 'tm_mon', 'tm_mday',
1145                                        'tm_hour', 'tm_min', 'tm_sec',
1146                                        'tm_wday', 'tm_yday',
1147                                        'tm_isdst', 'tm_gmtoff', 'tm_zone'])
1148
1149 # Define a localtime() that returns bup_time when possible.  Note:
1150 # this means that any helpers.localtime() results may need to be
1151 # passed through to_py_time() before being passed to python's time
1152 # module, which doesn't appear willing to ignore the extra items.
1153 if _localtime:
1154     def localtime(time):
1155         return bup_time(*_helpers.localtime(int(floor(time))))
1156     def utc_offset_str(t):
1157         """Return the local offset from UTC as "+hhmm" or "-hhmm" for time t.
1158         If the current UTC offset does not represent an integer number
1159         of minutes, the fractional component will be truncated."""
1160         off = localtime(t).tm_gmtoff
1161         # Note: // doesn't truncate like C for negative values, it rounds down.
1162         offmin = abs(off) // 60
1163         m = offmin % 60
1164         h = (offmin - m) // 60
1165         return b'%+03d%02d' % (-h if off < 0 else h, m)
1166     def to_py_time(x):
1167         if isinstance(x, time.struct_time):
1168             return x
1169         return time.struct_time(x[:9])
1170 else:
1171     localtime = time.localtime
1172     def utc_offset_str(t):
1173         return time.strftime(b'%z', localtime(t))
1174     def to_py_time(x):
1175         return x
1176
1177
1178 _some_invalid_save_parts_rx = re.compile(br'[\[ ~^:?*\\]|\.\.|//|@{')
1179
1180 def valid_save_name(name):
1181     # Enforce a superset of the restrictions in git-check-ref-format(1)
1182     if name == b'@' \
1183        or name.startswith(b'/') or name.endswith(b'/') \
1184        or name.endswith(b'.'):
1185         return False
1186     if _some_invalid_save_parts_rx.search(name):
1187         return False
1188     for c in name:
1189         if byte_int(c) < 0x20 or byte_int(c) == 0x7f:
1190             return False
1191     for part in name.split(b'/'):
1192         if part.startswith(b'.') or part.endswith(b'.lock'):
1193             return False
1194     return True
1195
1196
1197 _period_rx = re.compile(br'^([0-9]+)(s|min|h|d|w|m|y)$')
1198
1199 def period_as_secs(s):
1200     if s == b'forever':
1201         return float('inf')
1202     match = _period_rx.match(s)
1203     if not match:
1204         return None
1205     mag = int(match.group(1))
1206     scale = match.group(2)
1207     return mag * {b's': 1,
1208                   b'min': 60,
1209                   b'h': 60 * 60,
1210                   b'd': 60 * 60 * 24,
1211                   b'w': 60 * 60 * 24 * 7,
1212                   b'm': 60 * 60 * 24 * 31,
1213                   b'y': 60 * 60 * 24 * 366}[scale]