lib/bup/helpers.py

   1 """Helper functions and classes for bup."""
   2
   3 from __future__ import absolute_import, division
   4 from collections import namedtuple
   5 from contextlib import contextmanager
   6 from ctypes import sizeof, c_void_p
   7 from math import floor
   8 from os import environ
   9 from subprocess import PIPE, Popen
  10 import sys, os, subprocess, errno, select, mmap, stat, re, struct
  11 import hashlib, heapq, math, operator, time, tempfile
  12
  13 from bup import _helpers
  14 from bup import compat
  15 from bup.compat import argv_bytes, byte_int, nullcontext, pending_raise
  16 from bup.io import byte_stream, path_msg
  17 # This function should really be in helpers, not in bup.options.  But we
  18 # want options.py to be standalone so people can include it in other projects.
  19 from bup.options import _tty_width as tty_width
  20
  21
  22 buglvl = int(os.environ.get('BUP_DEBUG', 0))
  23
  24
  25 class Nonlocal:
  26     """Helper to deal with Python scoping issues"""
  27     pass
  28
  29
  30 def nullcontext_if_not(manager):
  31     return manager if manager is not None else nullcontext()
  32
  33
  34 @contextmanager
  35 def finalized(enter_result=None, finalize=None):
  36     assert finalize
  37     try:
  38         yield enter_result
  39     except BaseException as ex:
  40         with pending_raise(ex):
  41             finalize(enter_result)
  42     finalize(enter_result)
  43
  44
  45 sc_page_size = os.sysconf('SC_PAGE_SIZE')
  46 assert(sc_page_size > 0)
  47
  48 sc_arg_max = os.sysconf('SC_ARG_MAX')
  49 if sc_arg_max == -1:  # "no definite limit" - let's choose 2M
  50     sc_arg_max = 2 * 1024 * 1024
  51
  52 def last(iterable):
  53     result = None
  54     for result in iterable:
  55         pass
  56     return result
  57
  58 try:
  59     _fdatasync = os.fdatasync
  60 except AttributeError:
  61     _fdatasync = os.fsync
  62
  63 if sys.platform.startswith('darwin'):
  64     # Apparently os.fsync on OS X doesn't guarantee to sync all the way down
  65     import fcntl
  66     def fdatasync(fd):
  67         try:
  68             return fcntl.fcntl(fd, fcntl.F_FULLFSYNC)
  69         except IOError as e:
  70             # Fallback for file systems (SMB) that do not support F_FULLFSYNC
  71             if e.errno == errno.ENOTSUP:
  72                 return _fdatasync(fd)
  73             else:
  74                 raise
  75 else:
  76     fdatasync = _fdatasync
  77
  78
  79 def partition(predicate, stream):
  80     """Returns (leading_matches_it, rest_it), where leading_matches_it
  81     must be completely exhausted before traversing rest_it.
  82
  83     """
  84     stream = iter(stream)
  85     ns = Nonlocal()
  86     ns.first_nonmatch = None
  87     def leading_matches():
  88         for x in stream:
  89             if predicate(x):
  90                 yield x
  91             else:
  92                 ns.first_nonmatch = (x,)
  93                 break
  94     def rest():
  95         if ns.first_nonmatch:
  96             yield ns.first_nonmatch[0]
  97             for x in stream:
  98                 yield x
  99     return (leading_matches(), rest())
 100
 101
 102 def merge_dict(*xs):
 103     result = {}
 104     for x in xs:
 105         result.update(x)
 106     return result
 107
 108
 109 def lines_until_sentinel(f, sentinel, ex_type):
 110     # sentinel must end with \n and must contain only one \n
 111     while True:
 112         line = f.readline()
 113         if not (line and line.endswith(b'\n')):
 114             raise ex_type('Hit EOF while reading line')
 115         if line == sentinel:
 116             return
 117         yield line
 118
 119
 120 def stat_if_exists(path):
 121     try:
 122         return os.stat(path)
 123     except OSError as e:
 124         if e.errno != errno.ENOENT:
 125             raise
 126     return None
 127
 128
 129 # Write (blockingly) to sockets that may or may not be in blocking mode.
 130 # We need this because our stderr is sometimes eaten by subprocesses
 131 # (probably ssh) that sometimes make it nonblocking, if only temporarily,
 132 # leading to race conditions.  Ick.  We'll do it the hard way.
 133 def _hard_write(fd, buf):
 134     while buf:
 135         (r,w,x) = select.select([], [fd], [], None)
 136         if not w:
 137             raise IOError('select(fd) returned without being writable')
 138         try:
 139             sz = os.write(fd, buf)
 140         except OSError as e:
 141             if e.errno != errno.EAGAIN:
 142                 raise
 143         assert(sz >= 0)
 144         buf = buf[sz:]
 145
 146
 147 _last_prog = 0
 148 def log(s):
 149     """Print a log message to stderr."""
 150     global _last_prog
 151     sys.stdout.flush()
 152     _hard_write(sys.stderr.fileno(), s if isinstance(s, bytes) else s.encode())
 153     _last_prog = 0
 154
 155
 156 def debug1(s):
 157     if buglvl >= 1:
 158         log(s)
 159
 160
 161 def debug2(s):
 162     if buglvl >= 2:
 163         log(s)
 164
 165
 166 istty1 = os.isatty(1) or (int(os.environ.get('BUP_FORCE_TTY', 0)) & 1)
 167 istty2 = os.isatty(2) or (int(os.environ.get('BUP_FORCE_TTY', 0)) & 2)
 168 _last_progress = ''
 169 def progress(s):
 170     """Calls log() if stderr is a TTY.  Does nothing otherwise."""
 171     global _last_progress
 172     if istty2:
 173         log(s)
 174         _last_progress = s
 175
 176
 177 def qprogress(s):
 178     """Calls progress() only if we haven't printed progress in a while.
 179
 180     This avoids overloading the stderr buffer with excess junk.
 181     """
 182     global _last_prog
 183     now = time.time()
 184     if now - _last_prog > 0.1:
 185         progress(s)
 186         _last_prog = now
 187
 188
 189 def reprogress():
 190     """Calls progress() to redisplay the most recent progress message.
 191
 192     Useful after you've printed some other message that wipes out the
 193     progress line.
 194     """
 195     if _last_progress and _last_progress.endswith('\r'):
 196         progress(_last_progress)
 197
 198
 199 def mkdirp(d, mode=None):
 200     """Recursively create directories on path 'd'.
 201
 202     Unlike os.makedirs(), it doesn't raise an exception if the last element of
 203     the path already exists.
 204     """
 205     try:
 206         if mode:
 207             os.makedirs(d, mode)
 208         else:
 209             os.makedirs(d)
 210     except OSError as e:
 211         if e.errno == errno.EEXIST:
 212             pass
 213         else:
 214             raise
 215
 216
 217 class MergeIterItem:
 218     def __init__(self, entry, read_it):
 219         self.entry = entry
 220         self.read_it = read_it
 221     def __lt__(self, x):
 222         return self.entry < x.entry
 223
 224 def merge_iter(iters, pfreq, pfunc, pfinal, key=None):
 225     if key:
 226         samekey = lambda e, pe: getattr(e, key) == getattr(pe, key, None)
 227     else:
 228         samekey = operator.eq
 229     count = 0
 230     total = sum(len(it) for it in iters)
 231     iters = (iter(it) for it in iters)
 232     heap = ((next(it, None),it) for it in iters)
 233     heap = [MergeIterItem(e, it) for e, it in heap if e]
 234
 235     heapq.heapify(heap)
 236     pe = None
 237     while heap:
 238         if not count % pfreq:
 239             pfunc(count, total)
 240         e, it = heap[0].entry, heap[0].read_it
 241         if not samekey(e, pe):
 242             pe = e
 243             yield e
 244         count += 1
 245         try:
 246             e = next(it)
 247         except StopIteration:
 248             heapq.heappop(heap) # remove current
 249         else:
 250             # shift current to new location
 251             heapq.heapreplace(heap, MergeIterItem(e, it))
 252     pfinal(count, total)
 253
 254
 255 def unlink(f):
 256     """Delete a file at path 'f' if it currently exists.
 257
 258     Unlike os.unlink(), does not throw an exception if the file didn't already
 259     exist.
 260     """
 261     try:
 262         os.unlink(f)
 263     except OSError as e:
 264         if e.errno != errno.ENOENT:
 265             raise
 266
 267
 268 _bq_simple_id_rx = re.compile(br'^[-_./a-zA-Z0-9]+$')
 269 _sq_simple_id_rx = re.compile(r'^[-_./a-zA-Z0-9]+$')
 270
 271 def bquote(x):
 272     if x == b'':
 273         return b"''"
 274     if _bq_simple_id_rx.match(x):
 275         return x
 276     return b"'%s'" % x.replace(b"'", b"'\"'\"'")
 277
 278 def squote(x):
 279     if x == '':
 280         return "''"
 281     if _sq_simple_id_rx.match(x):
 282         return x
 283     return "'%s'" % x.replace("'", "'\"'\"'")
 284
 285 def quote(x):
 286     if isinstance(x, bytes):
 287         return bquote(x)
 288     if isinstance(x, compat.str_type):
 289         return squote(x)
 290     assert False
 291     # some versions of pylint get confused
 292     return None
 293
 294 def shstr(cmd):
 295     """Return a shell quoted string for cmd if it's a sequence, else cmd.
 296
 297     cmd must be a string, bytes, or a sequence of one or the other,
 298     and the assumption is that if cmd is a string or bytes, then it's
 299     already quoted (because it's what's actually being passed to
 300     call() and friends.  e.g. log(shstr(cmd)); call(cmd)
 301
 302     """
 303     if isinstance(cmd, (bytes, compat.str_type)):
 304         return cmd
 305     elif all(isinstance(x, bytes) for x in cmd):
 306         return b' '.join(map(bquote, cmd))
 307     elif all(isinstance(x, compat.str_type) for x in cmd):
 308         return ' '.join(map(squote, cmd))
 309     raise TypeError('unsupported shstr argument: ' + repr(cmd))
 310
 311
 312 exc = subprocess.check_call
 313
 314 def exo(cmd,
 315         input=None,
 316         stdin=None,
 317         stderr=None,
 318         shell=False,
 319         check=True,
 320         preexec_fn=None,
 321         close_fds=True):
 322     if input:
 323         assert stdin in (None, PIPE)
 324         stdin = PIPE
 325     p = Popen(cmd,
 326               stdin=stdin, stdout=PIPE, stderr=stderr,
 327               shell=shell,
 328               preexec_fn=preexec_fn,
 329               close_fds=close_fds)
 330     out, err = p.communicate(input)
 331     if check and p.returncode != 0:
 332         raise Exception('subprocess %r failed with status %d%s'
 333                         % (b' '.join(map(quote, cmd)), p.returncode,
 334                            ', stderr: %r' % err if err else ''))
 335     return out, err, p
 336
 337 def readpipe(argv, preexec_fn=None, shell=False):
 338     """Run a subprocess and return its output."""
 339     return exo(argv, preexec_fn=preexec_fn, shell=shell)[0]
 340
 341
 342 def _argmax_base(command):
 343     base_size = 2048
 344     for c in command:
 345         base_size += len(command) + 1
 346     for k, v in compat.items(environ):
 347         base_size += len(k) + len(v) + 2 + sizeof(c_void_p)
 348     return base_size
 349
 350
 351 def _argmax_args_size(args):
 352     return sum(len(x) + 1 + sizeof(c_void_p) for x in args)
 353
 354
 355 def batchpipe(command, args, preexec_fn=None, arg_max=sc_arg_max):
 356     """If args is not empty, yield the output produced by calling the
 357 command list with args as a sequence of strings (It may be necessary
 358 to return multiple strings in order to respect ARG_MAX)."""
 359     # The optional arg_max arg is a workaround for an issue with the
 360     # current wvtest behavior.
 361     base_size = _argmax_base(command)
 362     while args:
 363         room = arg_max - base_size
 364         i = 0
 365         while i < len(args):
 366             next_size = _argmax_args_size(args[i:i+1])
 367             if room - next_size < 0:
 368                 break
 369             room -= next_size
 370             i += 1
 371         sub_args = args[:i]
 372         args = args[i:]
 373         assert(len(sub_args))
 374         yield readpipe(command + sub_args, preexec_fn=preexec_fn)
 375
 376
 377 def resolve_parent(p):
 378     """Return the absolute path of a file without following any final symlink.
 379
 380     Behaves like os.path.realpath, but doesn't follow a symlink for the last
 381     element. (ie. if 'p' itself is a symlink, this one won't follow it, but it
 382     will follow symlinks in p's directory)
 383     """
 384     try:
 385         st = os.lstat(p)
 386     except OSError:
 387         st = None
 388     if st and stat.S_ISLNK(st.st_mode):
 389         (dir, name) = os.path.split(p)
 390         dir = os.path.realpath(dir)
 391         out = os.path.join(dir, name)
 392     else:
 393         out = os.path.realpath(p)
 394     #log('realpathing:%r,%r\n' % (p, out))
 395     return out
 396
 397
 398 def detect_fakeroot():
 399     "Return True if we appear to be running under fakeroot."
 400     return os.getenv("FAKEROOTKEY") != None
 401
 402
 403 if sys.platform.startswith('cygwin'):
 404     def is_superuser():
 405         # https://cygwin.com/ml/cygwin/2015-02/msg00057.html
 406         groups = os.getgroups()
 407         return 544 in groups or 0 in groups
 408 else:
 409     def is_superuser():
 410         return os.geteuid() == 0
 411
 412
 413 def cache_key_value(get_value, key, cache):
 414     """Return (value, was_cached).  If there is a value in the cache
 415     for key, use that, otherwise, call get_value(key) which should
 416     throw a KeyError if there is no value -- in which case the cached
 417     and returned value will be None.
 418     """
 419     try: # Do we already have it (or know there wasn't one)?
 420         value = cache[key]
 421         return value, True
 422     except KeyError:
 423         pass
 424     value = None
 425     try:
 426         cache[key] = value = get_value(key)
 427     except KeyError:
 428         cache[key] = None
 429     return value, False
 430
 431
 432 _hostname = None
 433 def hostname():
 434     """Get the FQDN of this machine."""
 435     global _hostname
 436     if not _hostname:
 437         _hostname = _helpers.gethostname()
 438     return _hostname
 439
 440
 441 def format_filesize(size):
 442     unit = 1024.0
 443     size = float(size)
 444     if size < unit:
 445         return "%d" % (size)
 446     exponent = int(math.log(size) // math.log(unit))
 447     size_prefix = "KMGTPE"[exponent - 1]
 448     return "%.1f%s" % (size / math.pow(unit, exponent), size_prefix)
 449
 450
 451 class NotOk(Exception):
 452     pass
 453
 454
 455 class BaseConn:
 456     def __init__(self, outp):
 457         self._base_closed = False
 458         self.outp = outp
 459
 460     def close(self):
 461         self._base_closed = True
 462         while self._read(65536): pass
 463
 464     def __del__(self):
 465         assert self._base_closed
 466
 467     def _read(self, size):
 468         raise NotImplementedError("Subclasses must implement _read")
 469
 470     def read(self, size):
 471         """Read 'size' bytes from input stream."""
 472         self.outp.flush()
 473         return self._read(size)
 474
 475     def _readline(self, size):
 476         raise NotImplementedError("Subclasses must implement _readline")
 477
 478     def readline(self):
 479         """Read from input stream until a newline is found."""
 480         self.outp.flush()
 481         return self._readline()
 482
 483     def write(self, data):
 484         """Write 'data' to output stream."""
 485         #log('%d writing: %d bytes\n' % (os.getpid(), len(data)))
 486         self.outp.write(data)
 487
 488     def has_input(self):
 489         """Return true if input stream is readable."""
 490         raise NotImplementedError("Subclasses must implement has_input")
 491
 492     def ok(self):
 493         """Indicate end of output from last sent command."""
 494         self.write(b'\nok\n')
 495
 496     def error(self, s):
 497         """Indicate server error to the client."""
 498         s = re.sub(br'\s+', b' ', s)
 499         self.write(b'\nerror %s\n' % s)
 500
 501     def _check_ok(self, onempty):
 502         self.outp.flush()
 503         rl = b''
 504         for rl in linereader(self):
 505             #log('%d got line: %r\n' % (os.getpid(), rl))
 506             if not rl:  # empty line
 507                 continue
 508             elif rl == b'ok':
 509                 return None
 510             elif rl.startswith(b'error '):
 511                 #log('client: error: %s\n' % rl[6:])
 512                 return NotOk(rl[6:])
 513             else:
 514                 onempty(rl)
 515         raise Exception('server exited unexpectedly; see errors above')
 516
 517     def drain_and_check_ok(self):
 518         """Remove all data for the current command from input stream."""
 519         def onempty(rl):
 520             pass
 521         return self._check_ok(onempty)
 522
 523     def check_ok(self):
 524         """Verify that server action completed successfully."""
 525         def onempty(rl):
 526             raise Exception('expected "ok", got %r' % rl)
 527         return self._check_ok(onempty)
 528
 529
 530 class Conn(BaseConn):
 531     def __init__(self, inp, outp):
 532         BaseConn.__init__(self, outp)
 533         self.inp = inp
 534
 535     def _read(self, size):
 536         return self.inp.read(size)
 537
 538     def _readline(self):
 539         return self.inp.readline()
 540
 541     def has_input(self):
 542         [rl, wl, xl] = select.select([self.inp.fileno()], [], [], 0)
 543         if rl:
 544             assert(rl[0] == self.inp.fileno())
 545             return True
 546         else:
 547             return None
 548
 549
 550 def checked_reader(fd, n):
 551     while n > 0:
 552         rl, _, _ = select.select([fd], [], [])
 553         assert(rl[0] == fd)
 554         buf = os.read(fd, n)
 555         if not buf: raise Exception("Unexpected EOF reading %d more bytes" % n)
 556         yield buf
 557         n -= len(buf)
 558
 559
 560 MAX_PACKET = 128 * 1024
 561 def mux(p, outfd, outr, errr):
 562     try:
 563         fds = [outr, errr]
 564         while p.poll() is None:
 565             rl, _, _ = select.select(fds, [], [])
 566             for fd in rl:
 567                 if fd == outr:
 568                     buf = os.read(outr, MAX_PACKET)
 569                     if not buf: break
 570                     os.write(outfd, struct.pack('!IB', len(buf), 1) + buf)
 571                 elif fd == errr:
 572                     buf = os.read(errr, 1024)
 573                     if not buf: break
 574                     os.write(outfd, struct.pack('!IB', len(buf), 2) + buf)
 575     finally:
 576         os.write(outfd, struct.pack('!IB', 0, 3))
 577
 578
 579 class DemuxConn(BaseConn):
 580     """A helper class for bup's client-server protocol."""
 581     def __init__(self, infd, outp):
 582         BaseConn.__init__(self, outp)
 583         # Anything that comes through before the sync string was not
 584         # multiplexed and can be assumed to be debug/log before mux init.
 585         tail = b''
 586         stderr = byte_stream(sys.stderr)
 587         while tail != b'BUPMUX':
 588             # Make sure to write all pre-BUPMUX output to stderr
 589             b = os.read(infd, (len(tail) < 6) and (6-len(tail)) or 1)
 590             if not b:
 591                 ex = IOError('demux: unexpected EOF during initialization')
 592                 with pending_raise(ex):
 593                     stderr.write(tail)
 594                     stderr.flush()
 595             tail += b
 596             stderr.write(tail[:-6])
 597             tail = tail[-6:]
 598         stderr.flush()
 599         self.infd = infd
 600         self.reader = None
 601         self.buf = None
 602         self.closed = False
 603
 604     def write(self, data):
 605         self._load_buf(0)
 606         BaseConn.write(self, data)
 607
 608     def _next_packet(self, timeout):
 609         if self.closed: return False
 610         rl, wl, xl = select.select([self.infd], [], [], timeout)
 611         if not rl: return False
 612         assert(rl[0] == self.infd)
 613         ns = b''.join(checked_reader(self.infd, 5))
 614         n, fdw = struct.unpack('!IB', ns)
 615         if n > MAX_PACKET:
 616             # assume that something went wrong and print stuff
 617             ns += os.read(self.infd, 1024)
 618             stderr = byte_stream(sys.stderr)
 619             stderr.write(ns)
 620             stderr.flush()
 621             raise Exception("Connection broken")
 622         if fdw == 1:
 623             self.reader = checked_reader(self.infd, n)
 624         elif fdw == 2:
 625             for buf in checked_reader(self.infd, n):
 626                 byte_stream(sys.stderr).write(buf)
 627         elif fdw == 3:
 628             self.closed = True
 629             debug2("DemuxConn: marked closed\n")
 630         return True
 631
 632     def _load_buf(self, timeout):
 633         if self.buf is not None:
 634             return True
 635         while not self.closed:
 636             while not self.reader:
 637                 if not self._next_packet(timeout):
 638                     return False
 639             try:
 640                 self.buf = next(self.reader)
 641                 return True
 642             except StopIteration:
 643                 self.reader = None
 644         return False
 645
 646     def _read_parts(self, ix_fn):
 647         while self._load_buf(None):
 648             assert(self.buf is not None)
 649             i = ix_fn(self.buf)
 650             if i is None or i == len(self.buf):
 651                 yv = self.buf
 652                 self.buf = None
 653             else:
 654                 yv = self.buf[:i]
 655                 self.buf = self.buf[i:]
 656             yield yv
 657             if i is not None:
 658                 break
 659
 660     def _readline(self):
 661         def find_eol(buf):
 662             try:
 663                 return buf.index(b'\n')+1
 664             except ValueError:
 665                 return None
 666         return b''.join(self._read_parts(find_eol))
 667
 668     def _read(self, size):
 669         csize = [size]
 670         def until_size(buf): # Closes on csize
 671             if len(buf) < csize[0]:
 672                 csize[0] -= len(buf)
 673                 return None
 674             else:
 675                 return csize[0]
 676         return b''.join(self._read_parts(until_size))
 677
 678     def has_input(self):
 679         return self._load_buf(0)
 680
 681
 682 def linereader(f):
 683     """Generate a list of input lines from 'f' without terminating newlines."""
 684     while 1:
 685         line = f.readline()
 686         if not line:
 687             break
 688         yield line[:-1]
 689
 690
 691 def chunkyreader(f, count = None):
 692     """Generate a list of chunks of data read from 'f'.
 693
 694     If count is None, read until EOF is reached.
 695
 696     If count is a positive integer, read 'count' bytes from 'f'. If EOF is
 697     reached while reading, raise IOError.
 698     """
 699     if count != None:
 700         while count > 0:
 701             b = f.read(min(count, 65536))
 702             if not b:
 703                 raise IOError('EOF with %d bytes remaining' % count)
 704             yield b
 705             count -= len(b)
 706     else:
 707         while 1:
 708             b = f.read(65536)
 709             if not b: break
 710             yield b
 711
 712
 713 @contextmanager
 714 def atomically_replaced_file(name, mode='w', buffering=-1):
 715     """Yield a file that will be atomically renamed name when leaving the block.
 716
 717     This contextmanager yields an open file object that is backed by a
 718     temporary file which will be renamed (atomically) to the target
 719     name if everything succeeds.
 720
 721     The mode and buffering arguments are handled exactly as with open,
 722     and the yielded file will have very restrictive permissions, as
 723     per mkstemp.
 724
 725     E.g.::
 726
 727         with atomically_replaced_file('foo.txt', 'w') as f:
 728             f.write('hello jack.')
 729
 730     """
 731
 732     (ffd, tempname) = tempfile.mkstemp(dir=os.path.dirname(name),
 733                                        text=('b' not in mode))
 734     try:
 735         try:
 736             f = os.fdopen(ffd, mode, buffering)
 737         except:
 738             os.close(ffd)
 739             raise
 740         try:
 741             yield f
 742         finally:
 743             f.close()
 744         os.rename(tempname, name)
 745     finally:
 746         unlink(tempname)  # nonexistant file is ignored
 747
 748
 749 def slashappend(s):
 750     """Append "/" to 's' if it doesn't aleady end in "/"."""
 751     assert isinstance(s, bytes)
 752     if s and not s.endswith(b'/'):
 753         return s + b'/'
 754     else:
 755         return s
 756
 757
 758 def _mmap_do(f, sz, flags, prot, close):
 759     if not sz:
 760         st = os.fstat(f.fileno())
 761         sz = st.st_size
 762     if not sz:
 763         # trying to open a zero-length map gives an error, but an empty
 764         # string has all the same behaviour of a zero-length map, ie. it has
 765         # no elements :)
 766         return ''
 767     map = compat.mmap(f.fileno(), sz, flags, prot)
 768     if close:
 769         f.close()  # map will persist beyond file close
 770     return map
 771
 772
 773 def mmap_read(f, sz = 0, close=True):
 774     """Create a read-only memory mapped region on file 'f'.
 775     If sz is 0, the region will cover the entire file.
 776     """
 777     return _mmap_do(f, sz, mmap.MAP_PRIVATE, mmap.PROT_READ, close)
 778
 779
 780 def mmap_readwrite(f, sz = 0, close=True):
 781     """Create a read-write memory mapped region on file 'f'.
 782     If sz is 0, the region will cover the entire file.
 783     """
 784     return _mmap_do(f, sz, mmap.MAP_SHARED, mmap.PROT_READ|mmap.PROT_WRITE,
 785                     close)
 786
 787
 788 def mmap_readwrite_private(f, sz = 0, close=True):
 789     """Create a read-write memory mapped region on file 'f'.
 790     If sz is 0, the region will cover the entire file.
 791     The map is private, which means the changes are never flushed back to the
 792     file.
 793     """
 794     return _mmap_do(f, sz, mmap.MAP_PRIVATE, mmap.PROT_READ|mmap.PROT_WRITE,
 795                     close)
 796
 797
 798 _mincore = getattr(_helpers, 'mincore', None)
 799 if _mincore:
 800     # ./configure ensures that we're on Linux if MINCORE_INCORE isn't defined.
 801     MINCORE_INCORE = getattr(_helpers, 'MINCORE_INCORE', 1)
 802
 803     _fmincore_chunk_size = None
 804     def _set_fmincore_chunk_size():
 805         global _fmincore_chunk_size
 806         pref_chunk_size = 64 * 1024 * 1024
 807         chunk_size = sc_page_size
 808         if (sc_page_size < pref_chunk_size):
 809             chunk_size = sc_page_size * (pref_chunk_size // sc_page_size)
 810         _fmincore_chunk_size = chunk_size
 811
 812     def fmincore(fd):
 813         """Return the mincore() data for fd as a bytearray whose values can be
 814         tested via MINCORE_INCORE, or None if fd does not fully
 815         support the operation."""
 816         st = os.fstat(fd)
 817         if (st.st_size == 0):
 818             return bytearray(0)
 819         if not _fmincore_chunk_size:
 820             _set_fmincore_chunk_size()
 821         pages_per_chunk = _fmincore_chunk_size // sc_page_size;
 822         page_count = (st.st_size + sc_page_size - 1) // sc_page_size;
 823         chunk_count = (st.st_size + _fmincore_chunk_size - 1) // _fmincore_chunk_size
 824         result = bytearray(page_count)
 825         for ci in compat.range(chunk_count):
 826             pos = _fmincore_chunk_size * ci;
 827             msize = min(_fmincore_chunk_size, st.st_size - pos)
 828             try:
 829                 m = compat.mmap(fd, msize, mmap.MAP_PRIVATE, 0, 0, pos)
 830             except mmap.error as ex:
 831                 if ex.errno == errno.EINVAL or ex.errno == errno.ENODEV:
 832                     # Perhaps the file was a pipe, i.e. "... | bup split ..."
 833                     return None
 834                 raise ex
 835             try:
 836                 _mincore(m, msize, 0, result, ci * pages_per_chunk)
 837             except OSError as ex:
 838                 if ex.errno == errno.ENOSYS:
 839                     return None
 840                 raise
 841         return result
 842
 843
 844 def parse_timestamp(epoch_str):
 845     """Return the number of nanoseconds since the epoch that are described
 846 by epoch_str (100ms, 100ns, ...); when epoch_str cannot be parsed,
 847 throw a ValueError that may contain additional information."""
 848     ns_per = {'s' :  1000000000,
 849               'ms' : 1000000,
 850               'us' : 1000,
 851               'ns' : 1}
 852     match = re.match(r'^((?:[-+]?[0-9]+)?)(s|ms|us|ns)$', epoch_str)
 853     if not match:
 854         if re.match(r'^([-+]?[0-9]+)$', epoch_str):
 855             raise ValueError('must include units, i.e. 100ns, 100ms, ...')
 856         raise ValueError()
 857     (n, units) = match.group(1, 2)
 858     if not n:
 859         n = 1
 860     n = int(n)
 861     return n * ns_per[units]
 862
 863
 864 def parse_num(s):
 865     """Parse string or bytes as a possibly unit suffixed number.
 866
 867     For example:
 868         199.2k means 203981 bytes
 869         1GB means 1073741824 bytes
 870         2.1 tb means 2199023255552 bytes
 871     """
 872     if isinstance(s, bytes):
 873         # FIXME: should this raise a ValueError for UnicodeDecodeError
 874         # (perhaps with the latter as the context).
 875         s = s.decode('ascii')
 876     g = re.match(r'([-+\d.e]+)\s*(\w*)', str(s))
 877     if not g:
 878         raise ValueError("can't parse %r as a number" % s)
 879     (val, unit) = g.groups()
 880     num = float(val)
 881     unit = unit.lower()
 882     if unit in ['t', 'tb']:
 883         mult = 1024*1024*1024*1024
 884     elif unit in ['g', 'gb']:
 885         mult = 1024*1024*1024
 886     elif unit in ['m', 'mb']:
 887         mult = 1024*1024
 888     elif unit in ['k', 'kb']:
 889         mult = 1024
 890     elif unit in ['', 'b']:
 891         mult = 1
 892     else:
 893         raise ValueError("invalid unit %r in number %r" % (unit, s))
 894     return int(num*mult)
 895
 896
 897 saved_errors = []
 898 def add_error(e):
 899     """Append an error message to the list of saved errors.
 900
 901     Once processing is able to stop and output the errors, the saved errors are
 902     accessible in the module variable helpers.saved_errors.
 903     """
 904     saved_errors.append(e)
 905     log('%-70s\n' % e)
 906
 907
 908 def clear_errors():
 909     global saved_errors
 910     saved_errors = []
 911
 912
 913 def die_if_errors(msg=None, status=1):
 914     global saved_errors
 915     if saved_errors:
 916         if not msg:
 917             msg = 'warning: %d errors encountered\n' % len(saved_errors)
 918         log(msg)
 919         sys.exit(status)
 920
 921
 922 def handle_ctrl_c():
 923     """Replace the default exception handler for KeyboardInterrupt (Ctrl-C).
 924
 925     The new exception handler will make sure that bup will exit without an ugly
 926     stacktrace when Ctrl-C is hit.
 927     """
 928     oldhook = sys.excepthook
 929     def newhook(exctype, value, traceback):
 930         if exctype == KeyboardInterrupt:
 931             log('\nInterrupted.\n')
 932         else:
 933             oldhook(exctype, value, traceback)
 934     sys.excepthook = newhook
 935
 936
 937 def columnate(l, prefix):
 938     """Format elements of 'l' in columns with 'prefix' leading each line.
 939
 940     The number of columns is determined automatically based on the string
 941     lengths.
 942     """
 943     binary = isinstance(prefix, bytes)
 944     nothing = b'' if binary else ''
 945     nl = b'\n' if binary else '\n'
 946     if not l:
 947         return nothing
 948     l = l[:]
 949     clen = max(len(s) for s in l)
 950     ncols = (tty_width() - len(prefix)) // (clen + 2)
 951     if ncols <= 1:
 952         ncols = 1
 953         clen = 0
 954     cols = []
 955     while len(l) % ncols:
 956         l.append(nothing)
 957     rows = len(l) // ncols
 958     for s in compat.range(0, len(l), rows):
 959         cols.append(l[s:s+rows])
 960     out = nothing
 961     fmt = b'%-*s' if binary else '%-*s'
 962     for row in zip(*cols):
 963         out += prefix + nothing.join((fmt % (clen+2, s)) for s in row) + nl
 964     return out
 965
 966
 967 def parse_date_or_fatal(str, fatal):
 968     """Parses the given date or calls Option.fatal().
 969     For now we expect a string that contains a float."""
 970     try:
 971         date = float(str)
 972     except ValueError as e:
 973         raise fatal('invalid date format (should be a float): %r' % e)
 974     else:
 975         return date
 976
 977
 978 def parse_excludes(options, fatal):
 979     """Traverse the options and extract all excludes, or call Option.fatal()."""
 980     excluded_paths = []
 981
 982     for flag in options:
 983         (option, parameter) = flag
 984         if option == '--exclude':
 985             excluded_paths.append(resolve_parent(argv_bytes(parameter)))
 986         elif option == '--exclude-from':
 987             try:
 988                 f = open(resolve_parent(argv_bytes(parameter)), 'rb')
 989             except IOError as e:
 990                 raise fatal("couldn't read %r" % parameter)
 991             for exclude_path in f.readlines():
 992                 # FIXME: perhaps this should be rstrip('\n')
 993                 exclude_path = resolve_parent(exclude_path.strip())
 994                 if exclude_path:
 995                     excluded_paths.append(exclude_path)
 996     return sorted(frozenset(excluded_paths))
 997
 998
 999 def parse_rx_excludes(options, fatal):
1000     """Traverse the options and extract all rx excludes, or call
1001     Option.fatal()."""
1002     excluded_patterns = []
1003
1004     for flag in options:
1005         (option, parameter) = flag
1006         if option == '--exclude-rx':
1007             try:
1008                 excluded_patterns.append(re.compile(argv_bytes(parameter)))
1009             except re.error as ex:
1010                 fatal('invalid --exclude-rx pattern (%r): %s' % (parameter, ex))
1011         elif option == '--exclude-rx-from':
1012             try:
1013                 f = open(resolve_parent(parameter), 'rb')
1014             except IOError as e:
1015                 raise fatal("couldn't read %r" % parameter)
1016             for pattern in f.readlines():
1017                 spattern = pattern.rstrip(b'\n')
1018                 if not spattern:
1019                     continue
1020                 try:
1021                     excluded_patterns.append(re.compile(spattern))
1022                 except re.error as ex:
1023                     fatal('invalid --exclude-rx pattern (%r): %s' % (spattern, ex))
1024     return excluded_patterns
1025
1026
1027 def should_rx_exclude_path(path, exclude_rxs):
1028     """Return True if path matches a regular expression in exclude_rxs."""
1029     for rx in exclude_rxs:
1030         if rx.search(path):
1031             debug1('Skipping %r: excluded by rx pattern %r.\n'
1032                    % (path, rx.pattern))
1033             return True
1034     return False
1035
1036
1037 # FIXME: Carefully consider the use of functions (os.path.*, etc.)
1038 # that resolve against the current filesystem in the strip/graft
1039 # functions for example, but elsewhere as well.  I suspect bup's not
1040 # always being careful about that.  For some cases, the contents of
1041 # the current filesystem should be irrelevant, and consulting it might
1042 # produce the wrong result, perhaps via unintended symlink resolution,
1043 # for example.
1044
1045 def path_components(path):
1046     """Break path into a list of pairs of the form (name,
1047     full_path_to_name).  Path must start with '/'.
1048     Example:
1049       '/home/foo' -> [('', '/'), ('home', '/home'), ('foo', '/home/foo')]"""
1050     if not path.startswith(b'/'):
1051         raise Exception('path must start with "/": %s' % path_msg(path))
1052     # Since we assume path startswith('/'), we can skip the first element.
1053     result = [(b'', b'/')]
1054     norm_path = os.path.abspath(path)
1055     if norm_path == b'/':
1056         return result
1057     full_path = b''
1058     for p in norm_path.split(b'/')[1:]:
1059         full_path += b'/' + p
1060         result.append((p, full_path))
1061     return result
1062
1063
1064 def stripped_path_components(path, strip_prefixes):
1065     """Strip any prefix in strip_prefixes from path and return a list
1066     of path components where each component is (name,
1067     none_or_full_fs_path_to_name).  Assume path startswith('/').
1068     See thelpers.py for examples."""
1069     normalized_path = os.path.abspath(path)
1070     sorted_strip_prefixes = sorted(strip_prefixes, key=len, reverse=True)
1071     for bp in sorted_strip_prefixes:
1072         normalized_bp = os.path.abspath(bp)
1073         if normalized_bp == b'/':
1074             continue
1075         if normalized_path.startswith(normalized_bp):
1076             prefix = normalized_path[:len(normalized_bp)]
1077             result = []
1078             for p in normalized_path[len(normalized_bp):].split(b'/'):
1079                 if p: # not root
1080                     prefix += b'/'
1081                 prefix += p
1082                 result.append((p, prefix))
1083             return result
1084     # Nothing to strip.
1085     return path_components(path)
1086
1087
1088 def grafted_path_components(graft_points, path):
1089     # Create a result that consists of some number of faked graft
1090     # directories before the graft point, followed by all of the real
1091     # directories from path that are after the graft point.  Arrange
1092     # for the directory at the graft point in the result to correspond
1093     # to the "orig" directory in --graft orig=new.  See t/thelpers.py
1094     # for some examples.
1095
1096     # Note that given --graft orig=new, orig and new have *nothing* to
1097     # do with each other, even if some of their component names
1098     # match. i.e. --graft /foo/bar/baz=/foo/bar/bax is semantically
1099     # equivalent to --graft /foo/bar/baz=/x/y/z, or even
1100     # /foo/bar/baz=/x.
1101
1102     # FIXME: This can't be the best solution...
1103     clean_path = os.path.abspath(path)
1104     for graft_point in graft_points:
1105         old_prefix, new_prefix = graft_point
1106         # Expand prefixes iff not absolute paths.
1107         old_prefix = os.path.normpath(old_prefix)
1108         new_prefix = os.path.normpath(new_prefix)
1109         if clean_path.startswith(old_prefix):
1110             escaped_prefix = re.escape(old_prefix)
1111             grafted_path = re.sub(br'^' + escaped_prefix, new_prefix, clean_path)
1112             # Handle /foo=/ (at least) -- which produces //whatever.
1113             grafted_path = b'/' + grafted_path.lstrip(b'/')
1114             clean_path_components = path_components(clean_path)
1115             # Count the components that were stripped.
1116             strip_count = 0 if old_prefix == b'/' else old_prefix.count(b'/')
1117             new_prefix_parts = new_prefix.split(b'/')
1118             result_prefix = grafted_path.split(b'/')[:new_prefix.count(b'/')]
1119             result = [(p, None) for p in result_prefix] \
1120                 + clean_path_components[strip_count:]
1121             # Now set the graft point name to match the end of new_prefix.
1122             graft_point = len(result_prefix)
1123             result[graft_point] = \
1124                 (new_prefix_parts[-1], clean_path_components[strip_count][1])
1125             if new_prefix == b'/': # --graft ...=/ is a special case.
1126                 return result[1:]
1127             return result
1128     return path_components(clean_path)
1129
1130
1131 Sha1 = hashlib.sha1
1132
1133
1134 _localtime = getattr(_helpers, 'localtime', None)
1135
1136 if _localtime:
1137     bup_time = namedtuple('bup_time', ['tm_year', 'tm_mon', 'tm_mday',
1138                                        'tm_hour', 'tm_min', 'tm_sec',
1139                                        'tm_wday', 'tm_yday',
1140                                        'tm_isdst', 'tm_gmtoff', 'tm_zone'])
1141
1142 # Define a localtime() that returns bup_time when possible.  Note:
1143 # this means that any helpers.localtime() results may need to be
1144 # passed through to_py_time() before being passed to python's time
1145 # module, which doesn't appear willing to ignore the extra items.
1146 if _localtime:
1147     def localtime(time):
1148         return bup_time(*_helpers.localtime(int(floor(time))))
1149     def utc_offset_str(t):
1150         """Return the local offset from UTC as "+hhmm" or "-hhmm" for time t.
1151         If the current UTC offset does not represent an integer number
1152         of minutes, the fractional component will be truncated."""
1153         off = localtime(t).tm_gmtoff
1154         # Note: // doesn't truncate like C for negative values, it rounds down.
1155         offmin = abs(off) // 60
1156         m = offmin % 60
1157         h = (offmin - m) // 60
1158         return b'%+03d%02d' % (-h if off < 0 else h, m)
1159     def to_py_time(x):
1160         if isinstance(x, time.struct_time):
1161             return x
1162         return time.struct_time(x[:9])
1163 else:
1164     localtime = time.localtime
1165     def utc_offset_str(t):
1166         return time.strftime(b'%z', localtime(t))
1167     def to_py_time(x):
1168         return x
1169
1170
1171 _some_invalid_save_parts_rx = re.compile(br'[\[ ~^:?*\\]|\.\.|//|@{')
1172
1173 def valid_save_name(name):
1174     # Enforce a superset of the restrictions in git-check-ref-format(1)
1175     if name == b'@' \
1176        or name.startswith(b'/') or name.endswith(b'/') \
1177        or name.endswith(b'.'):
1178         return False
1179     if _some_invalid_save_parts_rx.search(name):
1180         return False
1181     for c in name:
1182         if byte_int(c) < 0x20 or byte_int(c) == 0x7f:
1183             return False
1184     for part in name.split(b'/'):
1185         if part.startswith(b'.') or part.endswith(b'.lock'):
1186             return False
1187     return True
1188
1189
1190 _period_rx = re.compile(br'^([0-9]+)(s|min|h|d|w|m|y)$')
1191
1192 def period_as_secs(s):
1193     if s == b'forever':
1194         return float('inf')
1195     match = _period_rx.match(s)
1196     if not match:
1197         return None
1198     mag = int(match.group(1))
1199     scale = match.group(2)
1200     return mag * {b's': 1,
1201                   b'min': 60,
1202                   b'h': 60 * 60,
1203                   b'd': 60 * 60 * 24,
1204                   b'w': 60 * 60 * 24 * 7,
1205                   b'm': 60 * 60 * 24 * 31,
1206                   b'y': 60 * 60 * 24 * 366}[scale]