lib/bup/helpers.py

   1 """Helper functions and classes for bup."""
   2
   3 from __future__ import absolute_import, division
   4 from collections import namedtuple
   5 from contextlib import contextmanager
   6 from ctypes import sizeof, c_void_p
   7 from math import floor
   8 from os import environ
   9 from subprocess import PIPE, Popen
  10 import sys, os, pwd, subprocess, errno, socket, select, mmap, stat, re, struct
  11 import hashlib, heapq, math, operator, time, grp, tempfile
  12
  13 from bup import _helpers
  14 from bup import compat
  15 from bup.compat import argv_bytes, byte_int
  16 from bup.io import byte_stream, path_msg
  17 # This function should really be in helpers, not in bup.options.  But we
  18 # want options.py to be standalone so people can include it in other projects.
  19 from bup.options import _tty_width as tty_width
  20
  21
  22 class Nonlocal:
  23     """Helper to deal with Python scoping issues"""
  24     pass
  25
  26
  27 sc_page_size = os.sysconf('SC_PAGE_SIZE')
  28 assert(sc_page_size > 0)
  29
  30 sc_arg_max = os.sysconf('SC_ARG_MAX')
  31 if sc_arg_max == -1:  # "no definite limit" - let's choose 2M
  32     sc_arg_max = 2 * 1024 * 1024
  33
  34 def last(iterable):
  35     result = None
  36     for result in iterable:
  37         pass
  38     return result
  39
  40
  41 def atoi(s):
  42     """Convert s (ascii bytes) to an integer. Return 0 if s is not a number."""
  43     try:
  44         return int(s or b'0')
  45     except ValueError:
  46         return 0
  47
  48
  49 def atof(s):
  50     """Convert s (ascii bytes) to a float. Return 0 if s is not a number."""
  51     try:
  52         return float(s or b'0')
  53     except ValueError:
  54         return 0
  55
  56
  57 buglvl = atoi(os.environ.get('BUP_DEBUG', 0))
  58
  59
  60 try:
  61     _fdatasync = os.fdatasync
  62 except AttributeError:
  63     _fdatasync = os.fsync
  64
  65 if sys.platform.startswith('darwin'):
  66     # Apparently os.fsync on OS X doesn't guarantee to sync all the way down
  67     import fcntl
  68     def fdatasync(fd):
  69         try:
  70             return fcntl.fcntl(fd, fcntl.F_FULLFSYNC)
  71         except IOError as e:
  72             # Fallback for file systems (SMB) that do not support F_FULLFSYNC
  73             if e.errno == errno.ENOTSUP:
  74                 return _fdatasync(fd)
  75             else:
  76                 raise
  77 else:
  78     fdatasync = _fdatasync
  79
  80
  81 def partition(predicate, stream):
  82     """Returns (leading_matches_it, rest_it), where leading_matches_it
  83     must be completely exhausted before traversing rest_it.
  84
  85     """
  86     stream = iter(stream)
  87     ns = Nonlocal()
  88     ns.first_nonmatch = None
  89     def leading_matches():
  90         for x in stream:
  91             if predicate(x):
  92                 yield x
  93             else:
  94                 ns.first_nonmatch = (x,)
  95                 break
  96     def rest():
  97         if ns.first_nonmatch:
  98             yield ns.first_nonmatch[0]
  99             for x in stream:
 100                 yield x
 101     return (leading_matches(), rest())
 102
 103
 104 def merge_dict(*xs):
 105     result = {}
 106     for x in xs:
 107         result.update(x)
 108     return result
 109
 110
 111 def lines_until_sentinel(f, sentinel, ex_type):
 112     # sentinel must end with \n and must contain only one \n
 113     while True:
 114         line = f.readline()
 115         if not (line and line.endswith(b'\n')):
 116             raise ex_type('Hit EOF while reading line')
 117         if line == sentinel:
 118             return
 119         yield line
 120
 121
 122 def stat_if_exists(path):
 123     try:
 124         return os.stat(path)
 125     except OSError as e:
 126         if e.errno != errno.ENOENT:
 127             raise
 128     return None
 129
 130
 131 # Write (blockingly) to sockets that may or may not be in blocking mode.
 132 # We need this because our stderr is sometimes eaten by subprocesses
 133 # (probably ssh) that sometimes make it nonblocking, if only temporarily,
 134 # leading to race conditions.  Ick.  We'll do it the hard way.
 135 def _hard_write(fd, buf):
 136     while buf:
 137         (r,w,x) = select.select([], [fd], [], None)
 138         if not w:
 139             raise IOError('select(fd) returned without being writable')
 140         try:
 141             sz = os.write(fd, buf)
 142         except OSError as e:
 143             if e.errno != errno.EAGAIN:
 144                 raise
 145         assert(sz >= 0)
 146         buf = buf[sz:]
 147
 148
 149 _last_prog = 0
 150 def log(s):
 151     """Print a log message to stderr."""
 152     global _last_prog
 153     sys.stdout.flush()
 154     _hard_write(sys.stderr.fileno(), s if isinstance(s, bytes) else s.encode())
 155     _last_prog = 0
 156
 157
 158 def debug1(s):
 159     if buglvl >= 1:
 160         log(s)
 161
 162
 163 def debug2(s):
 164     if buglvl >= 2:
 165         log(s)
 166
 167
 168 istty1 = os.isatty(1) or (atoi(os.environ.get('BUP_FORCE_TTY')) & 1)
 169 istty2 = os.isatty(2) or (atoi(os.environ.get('BUP_FORCE_TTY')) & 2)
 170 _last_progress = ''
 171 def progress(s):
 172     """Calls log() if stderr is a TTY.  Does nothing otherwise."""
 173     global _last_progress
 174     if istty2:
 175         log(s)
 176         _last_progress = s
 177
 178
 179 def qprogress(s):
 180     """Calls progress() only if we haven't printed progress in a while.
 181
 182     This avoids overloading the stderr buffer with excess junk.
 183     """
 184     global _last_prog
 185     now = time.time()
 186     if now - _last_prog > 0.1:
 187         progress(s)
 188         _last_prog = now
 189
 190
 191 def reprogress():
 192     """Calls progress() to redisplay the most recent progress message.
 193
 194     Useful after you've printed some other message that wipes out the
 195     progress line.
 196     """
 197     if _last_progress and _last_progress.endswith('\r'):
 198         progress(_last_progress)
 199
 200
 201 def mkdirp(d, mode=None):
 202     """Recursively create directories on path 'd'.
 203
 204     Unlike os.makedirs(), it doesn't raise an exception if the last element of
 205     the path already exists.
 206     """
 207     try:
 208         if mode:
 209             os.makedirs(d, mode)
 210         else:
 211             os.makedirs(d)
 212     except OSError as e:
 213         if e.errno == errno.EEXIST:
 214             pass
 215         else:
 216             raise
 217
 218
 219 class MergeIterItem:
 220     def __init__(self, entry, read_it):
 221         self.entry = entry
 222         self.read_it = read_it
 223     def __lt__(self, x):
 224         return self.entry < x.entry
 225
 226 def merge_iter(iters, pfreq, pfunc, pfinal, key=None):
 227     if key:
 228         samekey = lambda e, pe: getattr(e, key) == getattr(pe, key, None)
 229     else:
 230         samekey = operator.eq
 231     count = 0
 232     total = sum(len(it) for it in iters)
 233     iters = (iter(it) for it in iters)
 234     heap = ((next(it, None),it) for it in iters)
 235     heap = [MergeIterItem(e, it) for e, it in heap if e]
 236
 237     heapq.heapify(heap)
 238     pe = None
 239     while heap:
 240         if not count % pfreq:
 241             pfunc(count, total)
 242         e, it = heap[0].entry, heap[0].read_it
 243         if not samekey(e, pe):
 244             pe = e
 245             yield e
 246         count += 1
 247         try:
 248             e = next(it)
 249         except StopIteration:
 250             heapq.heappop(heap) # remove current
 251         else:
 252             # shift current to new location
 253             heapq.heapreplace(heap, MergeIterItem(e, it))
 254     pfinal(count, total)
 255
 256
 257 def unlink(f):
 258     """Delete a file at path 'f' if it currently exists.
 259
 260     Unlike os.unlink(), does not throw an exception if the file didn't already
 261     exist.
 262     """
 263     try:
 264         os.unlink(f)
 265     except OSError as e:
 266         if e.errno != errno.ENOENT:
 267             raise
 268
 269
 270 _bq_simple_id_rx = re.compile(br'^[-_./a-zA-Z0-9]+$')
 271 _sq_simple_id_rx = re.compile(r'^[-_./a-zA-Z0-9]+$')
 272
 273 def bquote(x):
 274     if x == b'':
 275         return b"''"
 276     if _bq_simple_id_rx.match(x):
 277         return x
 278     return b"'%s'" % x.replace(b"'", b"'\"'\"'")
 279
 280 def squote(x):
 281     if x == '':
 282         return "''"
 283     if _sq_simple_id_rx.match(x):
 284         return x
 285     return "'%s'" % x.replace("'", "'\"'\"'")
 286
 287 def quote(x):
 288     if isinstance(x, bytes):
 289         return bquote(x)
 290     if isinstance(x, compat.str_type):
 291         return squote(x)
 292     assert False
 293
 294 def shstr(cmd):
 295     """Return a shell quoted string for cmd if it's a sequence, else cmd.
 296
 297     cmd must be a string, bytes, or a sequence of one or the other,
 298     and the assumption is that if cmd is a string or bytes, then it's
 299     already quoted (because it's what's actually being passed to
 300     call() and friends.  e.g. log(shstr(cmd)); call(cmd)
 301
 302     """
 303     if isinstance(cmd, (bytes, compat.str_type)):
 304         return cmd
 305     elif all(isinstance(x, bytes) for x in cmd):
 306         return b' '.join(map(bquote, cmd))
 307     elif all(isinstance(x, compat.str_type) for x in cmd):
 308         return ' '.join(map(squote, cmd))
 309     raise TypeError('unsupported shstr argument: ' + repr(cmd))
 310
 311
 312 exc = subprocess.check_call
 313
 314 def exo(cmd,
 315         input=None,
 316         stdin=None,
 317         stderr=None,
 318         shell=False,
 319         check=True,
 320         preexec_fn=None,
 321         close_fds=True):
 322     if input:
 323         assert stdin in (None, PIPE)
 324         stdin = PIPE
 325     p = Popen(cmd,
 326               stdin=stdin, stdout=PIPE, stderr=stderr,
 327               shell=shell,
 328               preexec_fn=preexec_fn,
 329               close_fds=close_fds)
 330     out, err = p.communicate(input)
 331     if check and p.returncode != 0:
 332         raise Exception('subprocess %r failed with status %d%s'
 333                         % (b' '.join(map(quote, cmd)), p.returncode,
 334                            ', stderr: %r' % err if err else ''))
 335     return out, err, p
 336
 337 def readpipe(argv, preexec_fn=None, shell=False):
 338     """Run a subprocess and return its output."""
 339     return exo(argv, preexec_fn=preexec_fn, shell=shell)[0]
 340
 341
 342 def _argmax_base(command):
 343     base_size = 2048
 344     for c in command:
 345         base_size += len(command) + 1
 346     for k, v in compat.items(environ):
 347         base_size += len(k) + len(v) + 2 + sizeof(c_void_p)
 348     return base_size
 349
 350
 351 def _argmax_args_size(args):
 352     return sum(len(x) + 1 + sizeof(c_void_p) for x in args)
 353
 354
 355 def batchpipe(command, args, preexec_fn=None, arg_max=sc_arg_max):
 356     """If args is not empty, yield the output produced by calling the
 357 command list with args as a sequence of strings (It may be necessary
 358 to return multiple strings in order to respect ARG_MAX)."""
 359     # The optional arg_max arg is a workaround for an issue with the
 360     # current wvtest behavior.
 361     base_size = _argmax_base(command)
 362     while args:
 363         room = arg_max - base_size
 364         i = 0
 365         while i < len(args):
 366             next_size = _argmax_args_size(args[i:i+1])
 367             if room - next_size < 0:
 368                 break
 369             room -= next_size
 370             i += 1
 371         sub_args = args[:i]
 372         args = args[i:]
 373         assert(len(sub_args))
 374         yield readpipe(command + sub_args, preexec_fn=preexec_fn)
 375
 376
 377 def resolve_parent(p):
 378     """Return the absolute path of a file without following any final symlink.
 379
 380     Behaves like os.path.realpath, but doesn't follow a symlink for the last
 381     element. (ie. if 'p' itself is a symlink, this one won't follow it, but it
 382     will follow symlinks in p's directory)
 383     """
 384     try:
 385         st = os.lstat(p)
 386     except OSError:
 387         st = None
 388     if st and stat.S_ISLNK(st.st_mode):
 389         (dir, name) = os.path.split(p)
 390         dir = os.path.realpath(dir)
 391         out = os.path.join(dir, name)
 392     else:
 393         out = os.path.realpath(p)
 394     #log('realpathing:%r,%r\n' % (p, out))
 395     return out
 396
 397
 398 def detect_fakeroot():
 399     "Return True if we appear to be running under fakeroot."
 400     return os.getenv("FAKEROOTKEY") != None
 401
 402
 403 if sys.platform.startswith('cygwin'):
 404     def is_superuser():
 405         # https://cygwin.com/ml/cygwin/2015-02/msg00057.html
 406         groups = os.getgroups()
 407         return 544 in groups or 0 in groups
 408 else:
 409     def is_superuser():
 410         return os.geteuid() == 0
 411
 412
 413 def cache_key_value(get_value, key, cache):
 414     """Return (value, was_cached).  If there is a value in the cache
 415     for key, use that, otherwise, call get_value(key) which should
 416     throw a KeyError if there is no value -- in which case the cached
 417     and returned value will be None.
 418     """
 419     try: # Do we already have it (or know there wasn't one)?
 420         value = cache[key]
 421         return value, True
 422     except KeyError:
 423         pass
 424     value = None
 425     try:
 426         cache[key] = value = get_value(key)
 427     except KeyError:
 428         cache[key] = None
 429     return value, False
 430
 431
 432 _hostname = None
 433 def hostname():
 434     """Get the FQDN of this machine."""
 435     global _hostname
 436     if not _hostname:
 437         _hostname = _helpers.gethostname()
 438     return _hostname
 439
 440
 441 def format_filesize(size):
 442     unit = 1024.0
 443     size = float(size)
 444     if size < unit:
 445         return "%d" % (size)
 446     exponent = int(math.log(size) // math.log(unit))
 447     size_prefix = "KMGTPE"[exponent - 1]
 448     return "%.1f%s" % (size / math.pow(unit, exponent), size_prefix)
 449
 450
 451 class NotOk(Exception):
 452     pass
 453
 454
 455 class BaseConn:
 456     def __init__(self, outp):
 457         self.outp = outp
 458
 459     def close(self):
 460         while self._read(65536): pass
 461
 462     def _read(self, size):
 463         raise NotImplementedError("Subclasses must implement _read")
 464
 465     def read(self, size):
 466         """Read 'size' bytes from input stream."""
 467         self.outp.flush()
 468         return self._read(size)
 469
 470     def _readline(self, size):
 471         raise NotImplementedError("Subclasses must implement _readline")
 472
 473     def readline(self):
 474         """Read from input stream until a newline is found."""
 475         self.outp.flush()
 476         return self._readline()
 477
 478     def write(self, data):
 479         """Write 'data' to output stream."""
 480         #log('%d writing: %d bytes\n' % (os.getpid(), len(data)))
 481         self.outp.write(data)
 482
 483     def has_input(self):
 484         """Return true if input stream is readable."""
 485         raise NotImplementedError("Subclasses must implement has_input")
 486
 487     def ok(self):
 488         """Indicate end of output from last sent command."""
 489         self.write(b'\nok\n')
 490
 491     def error(self, s):
 492         """Indicate server error to the client."""
 493         s = re.sub(br'\s+', b' ', s)
 494         self.write(b'\nerror %s\n' % s)
 495
 496     def _check_ok(self, onempty):
 497         self.outp.flush()
 498         rl = b''
 499         for rl in linereader(self):
 500             #log('%d got line: %r\n' % (os.getpid(), rl))
 501             if not rl:  # empty line
 502                 continue
 503             elif rl == b'ok':
 504                 return None
 505             elif rl.startswith(b'error '):
 506                 #log('client: error: %s\n' % rl[6:])
 507                 return NotOk(rl[6:])
 508             else:
 509                 onempty(rl)
 510         raise Exception('server exited unexpectedly; see errors above')
 511
 512     def drain_and_check_ok(self):
 513         """Remove all data for the current command from input stream."""
 514         def onempty(rl):
 515             pass
 516         return self._check_ok(onempty)
 517
 518     def check_ok(self):
 519         """Verify that server action completed successfully."""
 520         def onempty(rl):
 521             raise Exception('expected "ok", got %r' % rl)
 522         return self._check_ok(onempty)
 523
 524
 525 class Conn(BaseConn):
 526     def __init__(self, inp, outp):
 527         BaseConn.__init__(self, outp)
 528         self.inp = inp
 529
 530     def _read(self, size):
 531         return self.inp.read(size)
 532
 533     def _readline(self):
 534         return self.inp.readline()
 535
 536     def has_input(self):
 537         [rl, wl, xl] = select.select([self.inp.fileno()], [], [], 0)
 538         if rl:
 539             assert(rl[0] == self.inp.fileno())
 540             return True
 541         else:
 542             return None
 543
 544
 545 def checked_reader(fd, n):
 546     while n > 0:
 547         rl, _, _ = select.select([fd], [], [])
 548         assert(rl[0] == fd)
 549         buf = os.read(fd, n)
 550         if not buf: raise Exception("Unexpected EOF reading %d more bytes" % n)
 551         yield buf
 552         n -= len(buf)
 553
 554
 555 MAX_PACKET = 128 * 1024
 556 def mux(p, outfd, outr, errr):
 557     try:
 558         fds = [outr, errr]
 559         while p.poll() is None:
 560             rl, _, _ = select.select(fds, [], [])
 561             for fd in rl:
 562                 if fd == outr:
 563                     buf = os.read(outr, MAX_PACKET)
 564                     if not buf: break
 565                     os.write(outfd, struct.pack('!IB', len(buf), 1) + buf)
 566                 elif fd == errr:
 567                     buf = os.read(errr, 1024)
 568                     if not buf: break
 569                     os.write(outfd, struct.pack('!IB', len(buf), 2) + buf)
 570     finally:
 571         os.write(outfd, struct.pack('!IB', 0, 3))
 572
 573
 574 class DemuxConn(BaseConn):
 575     """A helper class for bup's client-server protocol."""
 576     def __init__(self, infd, outp):
 577         BaseConn.__init__(self, outp)
 578         # Anything that comes through before the sync string was not
 579         # multiplexed and can be assumed to be debug/log before mux init.
 580         tail = b''
 581         while tail != b'BUPMUX':
 582             b = os.read(infd, (len(tail) < 6) and (6-len(tail)) or 1)
 583             if not b:
 584                 raise IOError('demux: unexpected EOF during initialization')
 585             tail += b
 586             byte_stream(sys.stderr).write(tail[:-6])  # pre-mux log messages
 587             tail = tail[-6:]
 588         self.infd = infd
 589         self.reader = None
 590         self.buf = None
 591         self.closed = False
 592
 593     def write(self, data):
 594         self._load_buf(0)
 595         BaseConn.write(self, data)
 596
 597     def _next_packet(self, timeout):
 598         if self.closed: return False
 599         rl, wl, xl = select.select([self.infd], [], [], timeout)
 600         if not rl: return False
 601         assert(rl[0] == self.infd)
 602         ns = b''.join(checked_reader(self.infd, 5))
 603         n, fdw = struct.unpack('!IB', ns)
 604         assert(n <= MAX_PACKET)
 605         if fdw == 1:
 606             self.reader = checked_reader(self.infd, n)
 607         elif fdw == 2:
 608             for buf in checked_reader(self.infd, n):
 609                 byte_stream(sys.stderr).write(buf)
 610         elif fdw == 3:
 611             self.closed = True
 612             debug2("DemuxConn: marked closed\n")
 613         return True
 614
 615     def _load_buf(self, timeout):
 616         if self.buf is not None:
 617             return True
 618         while not self.closed:
 619             while not self.reader:
 620                 if not self._next_packet(timeout):
 621                     return False
 622             try:
 623                 self.buf = next(self.reader)
 624                 return True
 625             except StopIteration:
 626                 self.reader = None
 627         return False
 628
 629     def _read_parts(self, ix_fn):
 630         while self._load_buf(None):
 631             assert(self.buf is not None)
 632             i = ix_fn(self.buf)
 633             if i is None or i == len(self.buf):
 634                 yv = self.buf
 635                 self.buf = None
 636             else:
 637                 yv = self.buf[:i]
 638                 self.buf = self.buf[i:]
 639             yield yv
 640             if i is not None:
 641                 break
 642
 643     def _readline(self):
 644         def find_eol(buf):
 645             try:
 646                 return buf.index(b'\n')+1
 647             except ValueError:
 648                 return None
 649         return b''.join(self._read_parts(find_eol))
 650
 651     def _read(self, size):
 652         csize = [size]
 653         def until_size(buf): # Closes on csize
 654             if len(buf) < csize[0]:
 655                 csize[0] -= len(buf)
 656                 return None
 657             else:
 658                 return csize[0]
 659         return b''.join(self._read_parts(until_size))
 660
 661     def has_input(self):
 662         return self._load_buf(0)
 663
 664
 665 def linereader(f):
 666     """Generate a list of input lines from 'f' without terminating newlines."""
 667     while 1:
 668         line = f.readline()
 669         if not line:
 670             break
 671         yield line[:-1]
 672
 673
 674 def chunkyreader(f, count = None):
 675     """Generate a list of chunks of data read from 'f'.
 676
 677     If count is None, read until EOF is reached.
 678
 679     If count is a positive integer, read 'count' bytes from 'f'. If EOF is
 680     reached while reading, raise IOError.
 681     """
 682     if count != None:
 683         while count > 0:
 684             b = f.read(min(count, 65536))
 685             if not b:
 686                 raise IOError('EOF with %d bytes remaining' % count)
 687             yield b
 688             count -= len(b)
 689     else:
 690         while 1:
 691             b = f.read(65536)
 692             if not b: break
 693             yield b
 694
 695
 696 @contextmanager
 697 def atomically_replaced_file(name, mode='w', buffering=-1):
 698     """Yield a file that will be atomically renamed name when leaving the block.
 699
 700     This contextmanager yields an open file object that is backed by a
 701     temporary file which will be renamed (atomically) to the target
 702     name if everything succeeds.
 703
 704     The mode and buffering arguments are handled exactly as with open,
 705     and the yielded file will have very restrictive permissions, as
 706     per mkstemp.
 707
 708     E.g.::
 709
 710         with atomically_replaced_file('foo.txt', 'w') as f:
 711             f.write('hello jack.')
 712
 713     """
 714
 715     (ffd, tempname) = tempfile.mkstemp(dir=os.path.dirname(name),
 716                                        text=('b' not in mode))
 717     try:
 718         try:
 719             f = os.fdopen(ffd, mode, buffering)
 720         except:
 721             os.close(ffd)
 722             raise
 723         try:
 724             yield f
 725         finally:
 726             f.close()
 727         os.rename(tempname, name)
 728     finally:
 729         unlink(tempname)  # nonexistant file is ignored
 730
 731
 732 def slashappend(s):
 733     """Append "/" to 's' if it doesn't aleady end in "/"."""
 734     assert isinstance(s, bytes)
 735     if s and not s.endswith(b'/'):
 736         return s + b'/'
 737     else:
 738         return s
 739
 740
 741 def _mmap_do(f, sz, flags, prot, close):
 742     if not sz:
 743         st = os.fstat(f.fileno())
 744         sz = st.st_size
 745     if not sz:
 746         # trying to open a zero-length map gives an error, but an empty
 747         # string has all the same behaviour of a zero-length map, ie. it has
 748         # no elements :)
 749         return ''
 750     map = mmap.mmap(f.fileno(), sz, flags, prot)
 751     if close:
 752         f.close()  # map will persist beyond file close
 753     return map
 754
 755
 756 def mmap_read(f, sz = 0, close=True):
 757     """Create a read-only memory mapped region on file 'f'.
 758     If sz is 0, the region will cover the entire file.
 759     """
 760     return _mmap_do(f, sz, mmap.MAP_PRIVATE, mmap.PROT_READ, close)
 761
 762
 763 def mmap_readwrite(f, sz = 0, close=True):
 764     """Create a read-write memory mapped region on file 'f'.
 765     If sz is 0, the region will cover the entire file.
 766     """
 767     return _mmap_do(f, sz, mmap.MAP_SHARED, mmap.PROT_READ|mmap.PROT_WRITE,
 768                     close)
 769
 770
 771 def mmap_readwrite_private(f, sz = 0, close=True):
 772     """Create a read-write memory mapped region on file 'f'.
 773     If sz is 0, the region will cover the entire file.
 774     The map is private, which means the changes are never flushed back to the
 775     file.
 776     """
 777     return _mmap_do(f, sz, mmap.MAP_PRIVATE, mmap.PROT_READ|mmap.PROT_WRITE,
 778                     close)
 779
 780
 781 _mincore = getattr(_helpers, 'mincore', None)
 782 if _mincore:
 783     # ./configure ensures that we're on Linux if MINCORE_INCORE isn't defined.
 784     MINCORE_INCORE = getattr(_helpers, 'MINCORE_INCORE', 1)
 785
 786     _fmincore_chunk_size = None
 787     def _set_fmincore_chunk_size():
 788         global _fmincore_chunk_size
 789         pref_chunk_size = 64 * 1024 * 1024
 790         chunk_size = sc_page_size
 791         if (sc_page_size < pref_chunk_size):
 792             chunk_size = sc_page_size * (pref_chunk_size // sc_page_size)
 793         _fmincore_chunk_size = chunk_size
 794
 795     def fmincore(fd):
 796         """Return the mincore() data for fd as a bytearray whose values can be
 797         tested via MINCORE_INCORE, or None if fd does not fully
 798         support the operation."""
 799         st = os.fstat(fd)
 800         if (st.st_size == 0):
 801             return bytearray(0)
 802         if not _fmincore_chunk_size:
 803             _set_fmincore_chunk_size()
 804         pages_per_chunk = _fmincore_chunk_size // sc_page_size;
 805         page_count = (st.st_size + sc_page_size - 1) // sc_page_size;
 806         chunk_count = (st.st_size + _fmincore_chunk_size - 1) // _fmincore_chunk_size
 807         result = bytearray(page_count)
 808         for ci in compat.range(chunk_count):
 809             pos = _fmincore_chunk_size * ci;
 810             msize = min(_fmincore_chunk_size, st.st_size - pos)
 811             try:
 812                 m = mmap.mmap(fd, msize, mmap.MAP_PRIVATE, 0, 0, pos)
 813             except mmap.error as ex:
 814                 if ex.errno == errno.EINVAL or ex.errno == errno.ENODEV:
 815                     # Perhaps the file was a pipe, i.e. "... | bup split ..."
 816                     return None
 817                 raise ex
 818             try:
 819                 _mincore(m, msize, 0, result, ci * pages_per_chunk)
 820             except OSError as ex:
 821                 if ex.errno == errno.ENOSYS:
 822                     return None
 823                 raise
 824         return result
 825
 826
 827 def parse_timestamp(epoch_str):
 828     """Return the number of nanoseconds since the epoch that are described
 829 by epoch_str (100ms, 100ns, ...); when epoch_str cannot be parsed,
 830 throw a ValueError that may contain additional information."""
 831     ns_per = {'s' :  1000000000,
 832               'ms' : 1000000,
 833               'us' : 1000,
 834               'ns' : 1}
 835     match = re.match(r'^((?:[-+]?[0-9]+)?)(s|ms|us|ns)$', epoch_str)
 836     if not match:
 837         if re.match(r'^([-+]?[0-9]+)$', epoch_str):
 838             raise ValueError('must include units, i.e. 100ns, 100ms, ...')
 839         raise ValueError()
 840     (n, units) = match.group(1, 2)
 841     if not n:
 842         n = 1
 843     n = int(n)
 844     return n * ns_per[units]
 845
 846
 847 def parse_num(s):
 848     """Parse string or bytes as a possibly unit suffixed number.
 849
 850     For example:
 851         199.2k means 203981 bytes
 852         1GB means 1073741824 bytes
 853         2.1 tb means 2199023255552 bytes
 854     """
 855     if isinstance(s, bytes):
 856         # FIXME: should this raise a ValueError for UnicodeDecodeError
 857         # (perhaps with the latter as the context).
 858         s = s.decode('ascii')
 859     g = re.match(r'([-+\d.e]+)\s*(\w*)', str(s))
 860     if not g:
 861         raise ValueError("can't parse %r as a number" % s)
 862     (val, unit) = g.groups()
 863     num = float(val)
 864     unit = unit.lower()
 865     if unit in ['t', 'tb']:
 866         mult = 1024*1024*1024*1024
 867     elif unit in ['g', 'gb']:
 868         mult = 1024*1024*1024
 869     elif unit in ['m', 'mb']:
 870         mult = 1024*1024
 871     elif unit in ['k', 'kb']:
 872         mult = 1024
 873     elif unit in ['', 'b']:
 874         mult = 1
 875     else:
 876         raise ValueError("invalid unit %r in number %r" % (unit, s))
 877     return int(num*mult)
 878
 879
 880 saved_errors = []
 881 def add_error(e):
 882     """Append an error message to the list of saved errors.
 883
 884     Once processing is able to stop and output the errors, the saved errors are
 885     accessible in the module variable helpers.saved_errors.
 886     """
 887     saved_errors.append(e)
 888     log('%-70s\n' % e)
 889
 890
 891 def clear_errors():
 892     global saved_errors
 893     saved_errors = []
 894
 895
 896 def die_if_errors(msg=None, status=1):
 897     global saved_errors
 898     if saved_errors:
 899         if not msg:
 900             msg = 'warning: %d errors encountered\n' % len(saved_errors)
 901         log(msg)
 902         sys.exit(status)
 903
 904
 905 def handle_ctrl_c():
 906     """Replace the default exception handler for KeyboardInterrupt (Ctrl-C).
 907
 908     The new exception handler will make sure that bup will exit without an ugly
 909     stacktrace when Ctrl-C is hit.
 910     """
 911     oldhook = sys.excepthook
 912     def newhook(exctype, value, traceback):
 913         if exctype == KeyboardInterrupt:
 914             log('\nInterrupted.\n')
 915         else:
 916             return oldhook(exctype, value, traceback)
 917     sys.excepthook = newhook
 918
 919
 920 def columnate(l, prefix):
 921     """Format elements of 'l' in columns with 'prefix' leading each line.
 922
 923     The number of columns is determined automatically based on the string
 924     lengths.
 925     """
 926     binary = isinstance(prefix, bytes)
 927     nothing = b'' if binary else ''
 928     nl = b'\n' if binary else '\n'
 929     if not l:
 930         return nothing
 931     l = l[:]
 932     clen = max(len(s) for s in l)
 933     ncols = (tty_width() - len(prefix)) // (clen + 2)
 934     if ncols <= 1:
 935         ncols = 1
 936         clen = 0
 937     cols = []
 938     while len(l) % ncols:
 939         l.append(nothing)
 940     rows = len(l) // ncols
 941     for s in compat.range(0, len(l), rows):
 942         cols.append(l[s:s+rows])
 943     out = nothing
 944     fmt = b'%-*s' if binary else '%-*s'
 945     for row in zip(*cols):
 946         out += prefix + nothing.join((fmt % (clen+2, s)) for s in row) + nl
 947     return out
 948
 949
 950 def parse_date_or_fatal(str, fatal):
 951     """Parses the given date or calls Option.fatal().
 952     For now we expect a string that contains a float."""
 953     try:
 954         date = float(str)
 955     except ValueError as e:
 956         raise fatal('invalid date format (should be a float): %r' % e)
 957     else:
 958         return date
 959
 960
 961 def parse_excludes(options, fatal):
 962     """Traverse the options and extract all excludes, or call Option.fatal()."""
 963     excluded_paths = []
 964
 965     for flag in options:
 966         (option, parameter) = flag
 967         if option == '--exclude':
 968             excluded_paths.append(resolve_parent(argv_bytes(parameter)))
 969         elif option == '--exclude-from':
 970             try:
 971                 f = open(resolve_parent(argv_bytes(parameter)), 'rb')
 972             except IOError as e:
 973                 raise fatal("couldn't read %r" % parameter)
 974             for exclude_path in f.readlines():
 975                 # FIXME: perhaps this should be rstrip('\n')
 976                 exclude_path = resolve_parent(exclude_path.strip())
 977                 if exclude_path:
 978                     excluded_paths.append(exclude_path)
 979     return sorted(frozenset(excluded_paths))
 980
 981
 982 def parse_rx_excludes(options, fatal):
 983     """Traverse the options and extract all rx excludes, or call
 984     Option.fatal()."""
 985     excluded_patterns = []
 986
 987     for flag in options:
 988         (option, parameter) = flag
 989         if option == '--exclude-rx':
 990             try:
 991                 excluded_patterns.append(re.compile(argv_bytes(parameter)))
 992             except re.error as ex:
 993                 fatal('invalid --exclude-rx pattern (%r): %s' % (parameter, ex))
 994         elif option == '--exclude-rx-from':
 995             try:
 996                 f = open(resolve_parent(parameter), 'rb')
 997             except IOError as e:
 998                 raise fatal("couldn't read %r" % parameter)
 999             for pattern in f.readlines():
1000                 spattern = pattern.rstrip(b'\n')
1001                 if not spattern:
1002                     continue
1003                 try:
1004                     excluded_patterns.append(re.compile(spattern))
1005                 except re.error as ex:
1006                     fatal('invalid --exclude-rx pattern (%r): %s' % (spattern, ex))
1007     return excluded_patterns
1008
1009
1010 def should_rx_exclude_path(path, exclude_rxs):
1011     """Return True if path matches a regular expression in exclude_rxs."""
1012     for rx in exclude_rxs:
1013         if rx.search(path):
1014             debug1('Skipping %r: excluded by rx pattern %r.\n'
1015                    % (path, rx.pattern))
1016             return True
1017     return False
1018
1019
1020 # FIXME: Carefully consider the use of functions (os.path.*, etc.)
1021 # that resolve against the current filesystem in the strip/graft
1022 # functions for example, but elsewhere as well.  I suspect bup's not
1023 # always being careful about that.  For some cases, the contents of
1024 # the current filesystem should be irrelevant, and consulting it might
1025 # produce the wrong result, perhaps via unintended symlink resolution,
1026 # for example.
1027
1028 def path_components(path):
1029     """Break path into a list of pairs of the form (name,
1030     full_path_to_name).  Path must start with '/'.
1031     Example:
1032       '/home/foo' -> [('', '/'), ('home', '/home'), ('foo', '/home/foo')]"""
1033     if not path.startswith(b'/'):
1034         raise Exception('path must start with "/": %s' % path_msg(path))
1035     # Since we assume path startswith('/'), we can skip the first element.
1036     result = [(b'', b'/')]
1037     norm_path = os.path.abspath(path)
1038     if norm_path == b'/':
1039         return result
1040     full_path = b''
1041     for p in norm_path.split(b'/')[1:]:
1042         full_path += b'/' + p
1043         result.append((p, full_path))
1044     return result
1045
1046
1047 def stripped_path_components(path, strip_prefixes):
1048     """Strip any prefix in strip_prefixes from path and return a list
1049     of path components where each component is (name,
1050     none_or_full_fs_path_to_name).  Assume path startswith('/').
1051     See thelpers.py for examples."""
1052     normalized_path = os.path.abspath(path)
1053     sorted_strip_prefixes = sorted(strip_prefixes, key=len, reverse=True)
1054     for bp in sorted_strip_prefixes:
1055         normalized_bp = os.path.abspath(bp)
1056         if normalized_bp == b'/':
1057             continue
1058         if normalized_path.startswith(normalized_bp):
1059             prefix = normalized_path[:len(normalized_bp)]
1060             result = []
1061             for p in normalized_path[len(normalized_bp):].split(b'/'):
1062                 if p: # not root
1063                     prefix += b'/'
1064                 prefix += p
1065                 result.append((p, prefix))
1066             return result
1067     # Nothing to strip.
1068     return path_components(path)
1069
1070
1071 def grafted_path_components(graft_points, path):
1072     # Create a result that consists of some number of faked graft
1073     # directories before the graft point, followed by all of the real
1074     # directories from path that are after the graft point.  Arrange
1075     # for the directory at the graft point in the result to correspond
1076     # to the "orig" directory in --graft orig=new.  See t/thelpers.py
1077     # for some examples.
1078
1079     # Note that given --graft orig=new, orig and new have *nothing* to
1080     # do with each other, even if some of their component names
1081     # match. i.e. --graft /foo/bar/baz=/foo/bar/bax is semantically
1082     # equivalent to --graft /foo/bar/baz=/x/y/z, or even
1083     # /foo/bar/baz=/x.
1084
1085     # FIXME: This can't be the best solution...
1086     clean_path = os.path.abspath(path)
1087     for graft_point in graft_points:
1088         old_prefix, new_prefix = graft_point
1089         # Expand prefixes iff not absolute paths.
1090         old_prefix = os.path.normpath(old_prefix)
1091         new_prefix = os.path.normpath(new_prefix)
1092         if clean_path.startswith(old_prefix):
1093             escaped_prefix = re.escape(old_prefix)
1094             grafted_path = re.sub(br'^' + escaped_prefix, new_prefix, clean_path)
1095             # Handle /foo=/ (at least) -- which produces //whatever.
1096             grafted_path = b'/' + grafted_path.lstrip(b'/')
1097             clean_path_components = path_components(clean_path)
1098             # Count the components that were stripped.
1099             strip_count = 0 if old_prefix == b'/' else old_prefix.count(b'/')
1100             new_prefix_parts = new_prefix.split(b'/')
1101             result_prefix = grafted_path.split(b'/')[:new_prefix.count(b'/')]
1102             result = [(p, None) for p in result_prefix] \
1103                 + clean_path_components[strip_count:]
1104             # Now set the graft point name to match the end of new_prefix.
1105             graft_point = len(result_prefix)
1106             result[graft_point] = \
1107                 (new_prefix_parts[-1], clean_path_components[strip_count][1])
1108             if new_prefix == b'/': # --graft ...=/ is a special case.
1109                 return result[1:]
1110             return result
1111     return path_components(clean_path)
1112
1113
1114 Sha1 = hashlib.sha1
1115
1116
1117 _localtime = getattr(_helpers, 'localtime', None)
1118
1119 if _localtime:
1120     bup_time = namedtuple('bup_time', ['tm_year', 'tm_mon', 'tm_mday',
1121                                        'tm_hour', 'tm_min', 'tm_sec',
1122                                        'tm_wday', 'tm_yday',
1123                                        'tm_isdst', 'tm_gmtoff', 'tm_zone'])
1124
1125 # Define a localtime() that returns bup_time when possible.  Note:
1126 # this means that any helpers.localtime() results may need to be
1127 # passed through to_py_time() before being passed to python's time
1128 # module, which doesn't appear willing to ignore the extra items.
1129 if _localtime:
1130     def localtime(time):
1131         return bup_time(*_helpers.localtime(floor(time)))
1132     def utc_offset_str(t):
1133         """Return the local offset from UTC as "+hhmm" or "-hhmm" for time t.
1134         If the current UTC offset does not represent an integer number
1135         of minutes, the fractional component will be truncated."""
1136         off = localtime(t).tm_gmtoff
1137         # Note: // doesn't truncate like C for negative values, it rounds down.
1138         offmin = abs(off) // 60
1139         m = offmin % 60
1140         h = (offmin - m) // 60
1141         return b'%+03d%02d' % (-h if off < 0 else h, m)
1142     def to_py_time(x):
1143         if isinstance(x, time.struct_time):
1144             return x
1145         return time.struct_time(x[:9])
1146 else:
1147     localtime = time.localtime
1148     def utc_offset_str(t):
1149         return time.strftime(b'%z', localtime(t))
1150     def to_py_time(x):
1151         return x
1152
1153
1154 _some_invalid_save_parts_rx = re.compile(br'[\[ ~^:?*\\]|\.\.|//|@{')
1155
1156 def valid_save_name(name):
1157     # Enforce a superset of the restrictions in git-check-ref-format(1)
1158     if name == b'@' \
1159        or name.startswith(b'/') or name.endswith(b'/') \
1160        or name.endswith(b'.'):
1161         return False
1162     if _some_invalid_save_parts_rx.search(name):
1163         return False
1164     for c in name:
1165         if byte_int(c) < 0x20 or byte_int(c) == 0x7f:
1166             return False
1167     for part in name.split(b'/'):
1168         if part.startswith(b'.') or part.endswith(b'.lock'):
1169             return False
1170     return True
1171
1172
1173 _period_rx = re.compile(br'^([0-9]+)(s|min|h|d|w|m|y)$')
1174
1175 def period_as_secs(s):
1176     if s == b'forever':
1177         return float('inf')
1178     match = _period_rx.match(s)
1179     if not match:
1180         return None
1181     mag = int(match.group(1))
1182     scale = match.group(2)
1183     return mag * {b's': 1,
1184                   b'min': 60,
1185                   b'h': 60 * 60,
1186                   b'd': 60 * 60 * 24,
1187                   b'w': 60 * 60 * 24 * 7,
1188                   b'm': 60 * 60 * 24 * 31,
1189                   b'y': 60 * 60 * 24 * 366}[scale]