lib/bup/helpers.py

   1 """Helper functions and classes for bup."""
   2
   3 from __future__ import absolute_import, division
   4 from collections import namedtuple
   5 from contextlib import contextmanager
   6 from ctypes import sizeof, c_void_p
   7 from os import environ
   8 from pipes import quote
   9 from subprocess import PIPE, Popen
  10 import sys, os, pwd, subprocess, errno, socket, select, mmap, stat, re, struct
  11 import hashlib, heapq, math, operator, time, grp, tempfile
  12
  13 from bup import _helpers
  14 from bup import compat
  15 # This function should really be in helpers, not in bup.options.  But we
  16 # want options.py to be standalone so people can include it in other projects.
  17 from bup.options import _tty_width as tty_width
  18
  19
  20 class Nonlocal:
  21     """Helper to deal with Python scoping issues"""
  22     pass
  23
  24
  25 sc_page_size = os.sysconf('SC_PAGE_SIZE')
  26 assert(sc_page_size > 0)
  27
  28 sc_arg_max = os.sysconf('SC_ARG_MAX')
  29 if sc_arg_max == -1:  # "no definite limit" - let's choose 2M
  30     sc_arg_max = 2 * 1024 * 1024
  31
  32 def last(iterable):
  33     result = None
  34     for result in iterable:
  35         pass
  36     return result
  37
  38
  39 def atoi(s):
  40     """Convert the string 's' to an integer. Return 0 if s is not a number."""
  41     try:
  42         return int(s or '0')
  43     except ValueError:
  44         return 0
  45
  46
  47 def atof(s):
  48     """Convert the string 's' to a float. Return 0 if s is not a number."""
  49     try:
  50         return float(s or '0')
  51     except ValueError:
  52         return 0
  53
  54
  55 buglvl = atoi(os.environ.get('BUP_DEBUG', 0))
  56
  57
  58 try:
  59     _fdatasync = os.fdatasync
  60 except AttributeError:
  61     _fdatasync = os.fsync
  62
  63 if sys.platform.startswith('darwin'):
  64     # Apparently os.fsync on OS X doesn't guarantee to sync all the way down
  65     import fcntl
  66     def fdatasync(fd):
  67         try:
  68             return fcntl.fcntl(fd, fcntl.F_FULLFSYNC)
  69         except IOError as e:
  70             # Fallback for file systems (SMB) that do not support F_FULLFSYNC
  71             if e.errno == errno.ENOTSUP:
  72                 return _fdatasync(fd)
  73             else:
  74                 raise
  75 else:
  76     fdatasync = _fdatasync
  77
  78
  79 def partition(predicate, stream):
  80     """Returns (leading_matches_it, rest_it), where leading_matches_it
  81     must be completely exhausted before traversing rest_it.
  82
  83     """
  84     stream = iter(stream)
  85     ns = Nonlocal()
  86     ns.first_nonmatch = None
  87     def leading_matches():
  88         for x in stream:
  89             if predicate(x):
  90                 yield x
  91             else:
  92                 ns.first_nonmatch = (x,)
  93                 break
  94     def rest():
  95         if ns.first_nonmatch:
  96             yield ns.first_nonmatch[0]
  97             for x in stream:
  98                 yield x
  99     return (leading_matches(), rest())
 100
 101
 102 def merge_dict(*xs):
 103     result = {}
 104     for x in xs:
 105         result.update(x)
 106     return result
 107
 108
 109 def lines_until_sentinel(f, sentinel, ex_type):
 110     # sentinel must end with \n and must contain only one \n
 111     while True:
 112         line = f.readline()
 113         if not (line and line.endswith('\n')):
 114             raise ex_type('Hit EOF while reading line')
 115         if line == sentinel:
 116             return
 117         yield line
 118
 119
 120 def stat_if_exists(path):
 121     try:
 122         return os.stat(path)
 123     except OSError as e:
 124         if e.errno != errno.ENOENT:
 125             raise
 126     return None
 127
 128
 129 # Write (blockingly) to sockets that may or may not be in blocking mode.
 130 # We need this because our stderr is sometimes eaten by subprocesses
 131 # (probably ssh) that sometimes make it nonblocking, if only temporarily,
 132 # leading to race conditions.  Ick.  We'll do it the hard way.
 133 def _hard_write(fd, buf):
 134     while buf:
 135         (r,w,x) = select.select([], [fd], [], None)
 136         if not w:
 137             raise IOError('select(fd) returned without being writable')
 138         try:
 139             sz = os.write(fd, buf)
 140         except OSError as e:
 141             if e.errno != errno.EAGAIN:
 142                 raise
 143         assert(sz >= 0)
 144         buf = buf[sz:]
 145
 146
 147 _last_prog = 0
 148 def log(s):
 149     """Print a log message to stderr."""
 150     global _last_prog
 151     sys.stdout.flush()
 152     _hard_write(sys.stderr.fileno(), s)
 153     _last_prog = 0
 154
 155
 156 def debug1(s):
 157     if buglvl >= 1:
 158         log(s)
 159
 160
 161 def debug2(s):
 162     if buglvl >= 2:
 163         log(s)
 164
 165
 166 istty1 = os.isatty(1) or (atoi(os.environ.get('BUP_FORCE_TTY')) & 1)
 167 istty2 = os.isatty(2) or (atoi(os.environ.get('BUP_FORCE_TTY')) & 2)
 168 _last_progress = ''
 169 def progress(s):
 170     """Calls log() if stderr is a TTY.  Does nothing otherwise."""
 171     global _last_progress
 172     if istty2:
 173         log(s)
 174         _last_progress = s
 175
 176
 177 def qprogress(s):
 178     """Calls progress() only if we haven't printed progress in a while.
 179
 180     This avoids overloading the stderr buffer with excess junk.
 181     """
 182     global _last_prog
 183     now = time.time()
 184     if now - _last_prog > 0.1:
 185         progress(s)
 186         _last_prog = now
 187
 188
 189 def reprogress():
 190     """Calls progress() to redisplay the most recent progress message.
 191
 192     Useful after you've printed some other message that wipes out the
 193     progress line.
 194     """
 195     if _last_progress and _last_progress.endswith('\r'):
 196         progress(_last_progress)
 197
 198
 199 def mkdirp(d, mode=None):
 200     """Recursively create directories on path 'd'.
 201
 202     Unlike os.makedirs(), it doesn't raise an exception if the last element of
 203     the path already exists.
 204     """
 205     try:
 206         if mode:
 207             os.makedirs(d, mode)
 208         else:
 209             os.makedirs(d)
 210     except OSError as e:
 211         if e.errno == errno.EEXIST:
 212             pass
 213         else:
 214             raise
 215
 216
 217 def merge_iter(iters, pfreq, pfunc, pfinal, key=None):
 218     if key:
 219         samekey = lambda e, pe: getattr(e, key) == getattr(pe, key, None)
 220     else:
 221         samekey = operator.eq
 222     count = 0
 223     total = sum(len(it) for it in iters)
 224     iters = (iter(it) for it in iters)
 225     heap = ((next(it, None),it) for it in iters)
 226     heap = [(e,it) for e,it in heap if e]
 227
 228     heapq.heapify(heap)
 229     pe = None
 230     while heap:
 231         if not count % pfreq:
 232             pfunc(count, total)
 233         e, it = heap[0]
 234         if not samekey(e, pe):
 235             pe = e
 236             yield e
 237         count += 1
 238         try:
 239             e = next(it)
 240         except StopIteration:
 241             heapq.heappop(heap) # remove current
 242         else:
 243             heapq.heapreplace(heap, (e, it)) # shift current to new location
 244     pfinal(count, total)
 245
 246
 247 def unlink(f):
 248     """Delete a file at path 'f' if it currently exists.
 249
 250     Unlike os.unlink(), does not throw an exception if the file didn't already
 251     exist.
 252     """
 253     try:
 254         os.unlink(f)
 255     except OSError as e:
 256         if e.errno != errno.ENOENT:
 257             raise
 258
 259
 260 def shstr(cmd):
 261     if isinstance(cmd, compat.str_type):
 262         return cmd
 263     else:
 264         return ' '.join(map(quote, cmd))
 265
 266 exc = subprocess.check_call
 267
 268 def exo(cmd,
 269         input=None,
 270         stdin=None,
 271         stderr=None,
 272         shell=False,
 273         check=True,
 274         preexec_fn=None):
 275     if input:
 276         assert stdin in (None, PIPE)
 277         stdin = PIPE
 278     p = Popen(cmd,
 279               stdin=stdin, stdout=PIPE, stderr=stderr,
 280               shell=shell,
 281               preexec_fn=preexec_fn)
 282     out, err = p.communicate(input)
 283     if check and p.returncode != 0:
 284         raise Exception('subprocess %r failed with status %d, stderr: %r'
 285                         % (' '.join(map(quote, cmd)), p.returncode, err))
 286     return out, err, p
 287
 288 def readpipe(argv, preexec_fn=None, shell=False):
 289     """Run a subprocess and return its output."""
 290     p = subprocess.Popen(argv, stdout=subprocess.PIPE, preexec_fn=preexec_fn,
 291                          shell=shell)
 292     out, err = p.communicate()
 293     if p.returncode != 0:
 294         raise Exception('subprocess %r failed with status %d'
 295                         % (' '.join(argv), p.returncode))
 296     return out
 297
 298
 299 def _argmax_base(command):
 300     base_size = 2048
 301     for c in command:
 302         base_size += len(command) + 1
 303     for k, v in compat.items(environ):
 304         base_size += len(k) + len(v) + 2 + sizeof(c_void_p)
 305     return base_size
 306
 307
 308 def _argmax_args_size(args):
 309     return sum(len(x) + 1 + sizeof(c_void_p) for x in args)
 310
 311
 312 def batchpipe(command, args, preexec_fn=None, arg_max=sc_arg_max):
 313     """If args is not empty, yield the output produced by calling the
 314 command list with args as a sequence of strings (It may be necessary
 315 to return multiple strings in order to respect ARG_MAX)."""
 316     # The optional arg_max arg is a workaround for an issue with the
 317     # current wvtest behavior.
 318     base_size = _argmax_base(command)
 319     while args:
 320         room = arg_max - base_size
 321         i = 0
 322         while i < len(args):
 323             next_size = _argmax_args_size(args[i:i+1])
 324             if room - next_size < 0:
 325                 break
 326             room -= next_size
 327             i += 1
 328         sub_args = args[:i]
 329         args = args[i:]
 330         assert(len(sub_args))
 331         yield readpipe(command + sub_args, preexec_fn=preexec_fn)
 332
 333
 334 def resolve_parent(p):
 335     """Return the absolute path of a file without following any final symlink.
 336
 337     Behaves like os.path.realpath, but doesn't follow a symlink for the last
 338     element. (ie. if 'p' itself is a symlink, this one won't follow it, but it
 339     will follow symlinks in p's directory)
 340     """
 341     try:
 342         st = os.lstat(p)
 343     except OSError:
 344         st = None
 345     if st and stat.S_ISLNK(st.st_mode):
 346         (dir, name) = os.path.split(p)
 347         dir = os.path.realpath(dir)
 348         out = os.path.join(dir, name)
 349     else:
 350         out = os.path.realpath(p)
 351     #log('realpathing:%r,%r\n' % (p, out))
 352     return out
 353
 354
 355 def detect_fakeroot():
 356     "Return True if we appear to be running under fakeroot."
 357     return os.getenv("FAKEROOTKEY") != None
 358
 359
 360 if sys.platform.startswith('cygwin'):
 361     def is_superuser():
 362         # https://cygwin.com/ml/cygwin/2015-02/msg00057.html
 363         groups = os.getgroups()
 364         return 544 in groups or 0 in groups
 365 else:
 366     def is_superuser():
 367         return os.geteuid() == 0
 368
 369
 370 def _cache_key_value(get_value, key, cache):
 371     """Return (value, was_cached).  If there is a value in the cache
 372     for key, use that, otherwise, call get_value(key) which should
 373     throw a KeyError if there is no value -- in which case the cached
 374     and returned value will be None.
 375     """
 376     try: # Do we already have it (or know there wasn't one)?
 377         value = cache[key]
 378         return value, True
 379     except KeyError:
 380         pass
 381     value = None
 382     try:
 383         cache[key] = value = get_value(key)
 384     except KeyError:
 385         cache[key] = None
 386     return value, False
 387
 388
 389 _uid_to_pwd_cache = {}
 390 _name_to_pwd_cache = {}
 391
 392 def pwd_from_uid(uid):
 393     """Return password database entry for uid (may be a cached value).
 394     Return None if no entry is found.
 395     """
 396     global _uid_to_pwd_cache, _name_to_pwd_cache
 397     entry, cached = _cache_key_value(pwd.getpwuid, uid, _uid_to_pwd_cache)
 398     if entry and not cached:
 399         _name_to_pwd_cache[entry.pw_name] = entry
 400     return entry
 401
 402
 403 def pwd_from_name(name):
 404     """Return password database entry for name (may be a cached value).
 405     Return None if no entry is found.
 406     """
 407     global _uid_to_pwd_cache, _name_to_pwd_cache
 408     entry, cached = _cache_key_value(pwd.getpwnam, name, _name_to_pwd_cache)
 409     if entry and not cached:
 410         _uid_to_pwd_cache[entry.pw_uid] = entry
 411     return entry
 412
 413
 414 _gid_to_grp_cache = {}
 415 _name_to_grp_cache = {}
 416
 417 def grp_from_gid(gid):
 418     """Return password database entry for gid (may be a cached value).
 419     Return None if no entry is found.
 420     """
 421     global _gid_to_grp_cache, _name_to_grp_cache
 422     entry, cached = _cache_key_value(grp.getgrgid, gid, _gid_to_grp_cache)
 423     if entry and not cached:
 424         _name_to_grp_cache[entry.gr_name] = entry
 425     return entry
 426
 427
 428 def grp_from_name(name):
 429     """Return password database entry for name (may be a cached value).
 430     Return None if no entry is found.
 431     """
 432     global _gid_to_grp_cache, _name_to_grp_cache
 433     entry, cached = _cache_key_value(grp.getgrnam, name, _name_to_grp_cache)
 434     if entry and not cached:
 435         _gid_to_grp_cache[entry.gr_gid] = entry
 436     return entry
 437
 438
 439 _username = None
 440 def username():
 441     """Get the user's login name."""
 442     global _username
 443     if not _username:
 444         uid = os.getuid()
 445         _username = pwd_from_uid(uid).pw_name or b'user%d' % uid
 446     return _username
 447
 448
 449 _userfullname = None
 450 def userfullname():
 451     """Get the user's full name."""
 452     global _userfullname
 453     if not _userfullname:
 454         uid = os.getuid()
 455         entry = pwd_from_uid(uid)
 456         if entry:
 457             _userfullname = entry.pw_gecos.split(b',')[0] or entry.pw_name
 458         if not _userfullname:
 459             _userfullname = b'user%d' % uid
 460     return _userfullname
 461
 462
 463 _hostname = None
 464 def hostname():
 465     """Get the FQDN of this machine."""
 466     global _hostname
 467     if not _hostname:
 468         _hostname = socket.getfqdn()
 469     return _hostname
 470
 471
 472 _resource_path = None
 473 def resource_path(subdir=''):
 474     global _resource_path
 475     if not _resource_path:
 476         _resource_path = os.environ.get('BUP_RESOURCE_PATH') or '.'
 477     return os.path.join(_resource_path, subdir)
 478
 479 def format_filesize(size):
 480     unit = 1024.0
 481     size = float(size)
 482     if size < unit:
 483         return "%d" % (size)
 484     exponent = int(math.log(size) // math.log(unit))
 485     size_prefix = "KMGTPE"[exponent - 1]
 486     return "%.1f%s" % (size // math.pow(unit, exponent), size_prefix)
 487
 488
 489 class NotOk(Exception):
 490     pass
 491
 492
 493 class BaseConn:
 494     def __init__(self, outp):
 495         self.outp = outp
 496
 497     def close(self):
 498         while self._read(65536): pass
 499
 500     def read(self, size):
 501         """Read 'size' bytes from input stream."""
 502         self.outp.flush()
 503         return self._read(size)
 504
 505     def readline(self):
 506         """Read from input stream until a newline is found."""
 507         self.outp.flush()
 508         return self._readline()
 509
 510     def write(self, data):
 511         """Write 'data' to output stream."""
 512         #log('%d writing: %d bytes\n' % (os.getpid(), len(data)))
 513         self.outp.write(data)
 514
 515     def has_input(self):
 516         """Return true if input stream is readable."""
 517         raise NotImplemented("Subclasses must implement has_input")
 518
 519     def ok(self):
 520         """Indicate end of output from last sent command."""
 521         self.write('\nok\n')
 522
 523     def error(self, s):
 524         """Indicate server error to the client."""
 525         s = re.sub(r'\s+', ' ', str(s))
 526         self.write('\nerror %s\n' % s)
 527
 528     def _check_ok(self, onempty):
 529         self.outp.flush()
 530         rl = ''
 531         for rl in linereader(self):
 532             #log('%d got line: %r\n' % (os.getpid(), rl))
 533             if not rl:  # empty line
 534                 continue
 535             elif rl == 'ok':
 536                 return None
 537             elif rl.startswith('error '):
 538                 #log('client: error: %s\n' % rl[6:])
 539                 return NotOk(rl[6:])
 540             else:
 541                 onempty(rl)
 542         raise Exception('server exited unexpectedly; see errors above')
 543
 544     def drain_and_check_ok(self):
 545         """Remove all data for the current command from input stream."""
 546         def onempty(rl):
 547             pass
 548         return self._check_ok(onempty)
 549
 550     def check_ok(self):
 551         """Verify that server action completed successfully."""
 552         def onempty(rl):
 553             raise Exception('expected "ok", got %r' % rl)
 554         return self._check_ok(onempty)
 555
 556
 557 class Conn(BaseConn):
 558     def __init__(self, inp, outp):
 559         BaseConn.__init__(self, outp)
 560         self.inp = inp
 561
 562     def _read(self, size):
 563         return self.inp.read(size)
 564
 565     def _readline(self):
 566         return self.inp.readline()
 567
 568     def has_input(self):
 569         [rl, wl, xl] = select.select([self.inp.fileno()], [], [], 0)
 570         if rl:
 571             assert(rl[0] == self.inp.fileno())
 572             return True
 573         else:
 574             return None
 575
 576
 577 def checked_reader(fd, n):
 578     while n > 0:
 579         rl, _, _ = select.select([fd], [], [])
 580         assert(rl[0] == fd)
 581         buf = os.read(fd, n)
 582         if not buf: raise Exception("Unexpected EOF reading %d more bytes" % n)
 583         yield buf
 584         n -= len(buf)
 585
 586
 587 MAX_PACKET = 128 * 1024
 588 def mux(p, outfd, outr, errr):
 589     try:
 590         fds = [outr, errr]
 591         while p.poll() is None:
 592             rl, _, _ = select.select(fds, [], [])
 593             for fd in rl:
 594                 if fd == outr:
 595                     buf = os.read(outr, MAX_PACKET)
 596                     if not buf: break
 597                     os.write(outfd, struct.pack('!IB', len(buf), 1) + buf)
 598                 elif fd == errr:
 599                     buf = os.read(errr, 1024)
 600                     if not buf: break
 601                     os.write(outfd, struct.pack('!IB', len(buf), 2) + buf)
 602     finally:
 603         os.write(outfd, struct.pack('!IB', 0, 3))
 604
 605
 606 class DemuxConn(BaseConn):
 607     """A helper class for bup's client-server protocol."""
 608     def __init__(self, infd, outp):
 609         BaseConn.__init__(self, outp)
 610         # Anything that comes through before the sync string was not
 611         # multiplexed and can be assumed to be debug/log before mux init.
 612         tail = ''
 613         while tail != 'BUPMUX':
 614             b = os.read(infd, (len(tail) < 6) and (6-len(tail)) or 1)
 615             if not b:
 616                 raise IOError('demux: unexpected EOF during initialization')
 617             tail += b
 618             sys.stderr.write(tail[:-6])  # pre-mux log messages
 619             tail = tail[-6:]
 620         self.infd = infd
 621         self.reader = None
 622         self.buf = None
 623         self.closed = False
 624
 625     def write(self, data):
 626         self._load_buf(0)
 627         BaseConn.write(self, data)
 628
 629     def _next_packet(self, timeout):
 630         if self.closed: return False
 631         rl, wl, xl = select.select([self.infd], [], [], timeout)
 632         if not rl: return False
 633         assert(rl[0] == self.infd)
 634         ns = ''.join(checked_reader(self.infd, 5))
 635         n, fdw = struct.unpack('!IB', ns)
 636         assert(n <= MAX_PACKET)
 637         if fdw == 1:
 638             self.reader = checked_reader(self.infd, n)
 639         elif fdw == 2:
 640             for buf in checked_reader(self.infd, n):
 641                 sys.stderr.write(buf)
 642         elif fdw == 3:
 643             self.closed = True
 644             debug2("DemuxConn: marked closed\n")
 645         return True
 646
 647     def _load_buf(self, timeout):
 648         if self.buf is not None:
 649             return True
 650         while not self.closed:
 651             while not self.reader:
 652                 if not self._next_packet(timeout):
 653                     return False
 654             try:
 655                 self.buf = next(self.reader)
 656                 return True
 657             except StopIteration:
 658                 self.reader = None
 659         return False
 660
 661     def _read_parts(self, ix_fn):
 662         while self._load_buf(None):
 663             assert(self.buf is not None)
 664             i = ix_fn(self.buf)
 665             if i is None or i == len(self.buf):
 666                 yv = self.buf
 667                 self.buf = None
 668             else:
 669                 yv = self.buf[:i]
 670                 self.buf = self.buf[i:]
 671             yield yv
 672             if i is not None:
 673                 break
 674
 675     def _readline(self):
 676         def find_eol(buf):
 677             try:
 678                 return buf.index('\n')+1
 679             except ValueError:
 680                 return None
 681         return ''.join(self._read_parts(find_eol))
 682
 683     def _read(self, size):
 684         csize = [size]
 685         def until_size(buf): # Closes on csize
 686             if len(buf) < csize[0]:
 687                 csize[0] -= len(buf)
 688                 return None
 689             else:
 690                 return csize[0]
 691         return ''.join(self._read_parts(until_size))
 692
 693     def has_input(self):
 694         return self._load_buf(0)
 695
 696
 697 def linereader(f):
 698     """Generate a list of input lines from 'f' without terminating newlines."""
 699     while 1:
 700         line = f.readline()
 701         if not line:
 702             break
 703         yield line[:-1]
 704
 705
 706 def chunkyreader(f, count = None):
 707     """Generate a list of chunks of data read from 'f'.
 708
 709     If count is None, read until EOF is reached.
 710
 711     If count is a positive integer, read 'count' bytes from 'f'. If EOF is
 712     reached while reading, raise IOError.
 713     """
 714     if count != None:
 715         while count > 0:
 716             b = f.read(min(count, 65536))
 717             if not b:
 718                 raise IOError('EOF with %d bytes remaining' % count)
 719             yield b
 720             count -= len(b)
 721     else:
 722         while 1:
 723             b = f.read(65536)
 724             if not b: break
 725             yield b
 726
 727
 728 @contextmanager
 729 def atomically_replaced_file(name, mode='w', buffering=-1):
 730     """Yield a file that will be atomically renamed name when leaving the block.
 731
 732     This contextmanager yields an open file object that is backed by a
 733     temporary file which will be renamed (atomically) to the target
 734     name if everything succeeds.
 735
 736     The mode and buffering arguments are handled exactly as with open,
 737     and the yielded file will have very restrictive permissions, as
 738     per mkstemp.
 739
 740     E.g.::
 741
 742         with atomically_replaced_file('foo.txt', 'w') as f:
 743             f.write('hello jack.')
 744
 745     """
 746
 747     (ffd, tempname) = tempfile.mkstemp(dir=os.path.dirname(name),
 748                                        text=('b' not in mode))
 749     try:
 750         try:
 751             f = os.fdopen(ffd, mode, buffering)
 752         except:
 753             os.close(ffd)
 754             raise
 755         try:
 756             yield f
 757         finally:
 758             f.close()
 759         os.rename(tempname, name)
 760     finally:
 761         unlink(tempname)  # nonexistant file is ignored
 762
 763
 764 def slashappend(s):
 765     """Append "/" to 's' if it doesn't aleady end in "/"."""
 766     if s and not s.endswith('/'):
 767         return s + '/'
 768     else:
 769         return s
 770
 771
 772 def _mmap_do(f, sz, flags, prot, close):
 773     if not sz:
 774         st = os.fstat(f.fileno())
 775         sz = st.st_size
 776     if not sz:
 777         # trying to open a zero-length map gives an error, but an empty
 778         # string has all the same behaviour of a zero-length map, ie. it has
 779         # no elements :)
 780         return ''
 781     map = mmap.mmap(f.fileno(), sz, flags, prot)
 782     if close:
 783         f.close()  # map will persist beyond file close
 784     return map
 785
 786
 787 def mmap_read(f, sz = 0, close=True):
 788     """Create a read-only memory mapped region on file 'f'.
 789     If sz is 0, the region will cover the entire file.
 790     """
 791     return _mmap_do(f, sz, mmap.MAP_PRIVATE, mmap.PROT_READ, close)
 792
 793
 794 def mmap_readwrite(f, sz = 0, close=True):
 795     """Create a read-write memory mapped region on file 'f'.
 796     If sz is 0, the region will cover the entire file.
 797     """
 798     return _mmap_do(f, sz, mmap.MAP_SHARED, mmap.PROT_READ|mmap.PROT_WRITE,
 799                     close)
 800
 801
 802 def mmap_readwrite_private(f, sz = 0, close=True):
 803     """Create a read-write memory mapped region on file 'f'.
 804     If sz is 0, the region will cover the entire file.
 805     The map is private, which means the changes are never flushed back to the
 806     file.
 807     """
 808     return _mmap_do(f, sz, mmap.MAP_PRIVATE, mmap.PROT_READ|mmap.PROT_WRITE,
 809                     close)
 810
 811
 812 _mincore = getattr(_helpers, 'mincore', None)
 813 if _mincore:
 814     # ./configure ensures that we're on Linux if MINCORE_INCORE isn't defined.
 815     MINCORE_INCORE = getattr(_helpers, 'MINCORE_INCORE', 1)
 816
 817     _fmincore_chunk_size = None
 818     def _set_fmincore_chunk_size():
 819         global _fmincore_chunk_size
 820         pref_chunk_size = 64 * 1024 * 1024
 821         chunk_size = sc_page_size
 822         if (sc_page_size < pref_chunk_size):
 823             chunk_size = sc_page_size * (pref_chunk_size // sc_page_size)
 824         _fmincore_chunk_size = chunk_size
 825
 826     def fmincore(fd):
 827         """Return the mincore() data for fd as a bytearray whose values can be
 828         tested via MINCORE_INCORE, or None if fd does not fully
 829         support the operation."""
 830         st = os.fstat(fd)
 831         if (st.st_size == 0):
 832             return bytearray(0)
 833         if not _fmincore_chunk_size:
 834             _set_fmincore_chunk_size()
 835         pages_per_chunk = _fmincore_chunk_size // sc_page_size;
 836         page_count = (st.st_size + sc_page_size - 1) // sc_page_size;
 837         chunk_count = page_count // _fmincore_chunk_size
 838         if chunk_count < 1:
 839             chunk_count = 1
 840         result = bytearray(page_count)
 841         for ci in compat.range(chunk_count):
 842             pos = _fmincore_chunk_size * ci;
 843             msize = min(_fmincore_chunk_size, st.st_size - pos)
 844             try:
 845                 m = mmap.mmap(fd, msize, mmap.MAP_PRIVATE, 0, 0, pos)
 846             except mmap.error as ex:
 847                 if ex.errno == errno.EINVAL or ex.errno == errno.ENODEV:
 848                     # Perhaps the file was a pipe, i.e. "... | bup split ..."
 849                     return None
 850                 raise ex
 851             try:
 852                 _mincore(m, msize, 0, result, ci * pages_per_chunk)
 853             except OSError as ex:
 854                 if ex.errno == errno.ENOSYS:
 855                     return None
 856                 raise
 857         return result
 858
 859
 860 def parse_timestamp(epoch_str):
 861     """Return the number of nanoseconds since the epoch that are described
 862 by epoch_str (100ms, 100ns, ...); when epoch_str cannot be parsed,
 863 throw a ValueError that may contain additional information."""
 864     ns_per = {'s' :  1000000000,
 865               'ms' : 1000000,
 866               'us' : 1000,
 867               'ns' : 1}
 868     match = re.match(r'^((?:[-+]?[0-9]+)?)(s|ms|us|ns)$', epoch_str)
 869     if not match:
 870         if re.match(r'^([-+]?[0-9]+)$', epoch_str):
 871             raise ValueError('must include units, i.e. 100ns, 100ms, ...')
 872         raise ValueError()
 873     (n, units) = match.group(1, 2)
 874     if not n:
 875         n = 1
 876     n = int(n)
 877     return n * ns_per[units]
 878
 879
 880 def parse_num(s):
 881     """Parse data size information into a float number.
 882
 883     Here are some examples of conversions:
 884         199.2k means 203981 bytes
 885         1GB means 1073741824 bytes
 886         2.1 tb means 2199023255552 bytes
 887     """
 888     g = re.match(r'([-+\d.e]+)\s*(\w*)', str(s))
 889     if not g:
 890         raise ValueError("can't parse %r as a number" % s)
 891     (val, unit) = g.groups()
 892     num = float(val)
 893     unit = unit.lower()
 894     if unit in ['t', 'tb']:
 895         mult = 1024*1024*1024*1024
 896     elif unit in ['g', 'gb']:
 897         mult = 1024*1024*1024
 898     elif unit in ['m', 'mb']:
 899         mult = 1024*1024
 900     elif unit in ['k', 'kb']:
 901         mult = 1024
 902     elif unit in ['', 'b']:
 903         mult = 1
 904     else:
 905         raise ValueError("invalid unit %r in number %r" % (unit, s))
 906     return int(num*mult)
 907
 908
 909 def count(l):
 910     """Count the number of elements in an iterator. (consumes the iterator)"""
 911     return reduce(lambda x,y: x+1, l)
 912
 913
 914 saved_errors = []
 915 def add_error(e):
 916     """Append an error message to the list of saved errors.
 917
 918     Once processing is able to stop and output the errors, the saved errors are
 919     accessible in the module variable helpers.saved_errors.
 920     """
 921     saved_errors.append(e)
 922     log('%-70s\n' % e)
 923
 924
 925 def clear_errors():
 926     global saved_errors
 927     saved_errors = []
 928
 929
 930 def die_if_errors(msg=None, status=1):
 931     global saved_errors
 932     if saved_errors:
 933         if not msg:
 934             msg = 'warning: %d errors encountered\n' % len(saved_errors)
 935         log(msg)
 936         sys.exit(status)
 937
 938
 939 def handle_ctrl_c():
 940     """Replace the default exception handler for KeyboardInterrupt (Ctrl-C).
 941
 942     The new exception handler will make sure that bup will exit without an ugly
 943     stacktrace when Ctrl-C is hit.
 944     """
 945     oldhook = sys.excepthook
 946     def newhook(exctype, value, traceback):
 947         if exctype == KeyboardInterrupt:
 948             log('\nInterrupted.\n')
 949         else:
 950             return oldhook(exctype, value, traceback)
 951     sys.excepthook = newhook
 952
 953
 954 def columnate(l, prefix):
 955     """Format elements of 'l' in columns with 'prefix' leading each line.
 956
 957     The number of columns is determined automatically based on the string
 958     lengths.
 959     """
 960     if not l:
 961         return ""
 962     l = l[:]
 963     clen = max(len(s) for s in l)
 964     ncols = (tty_width() - len(prefix)) // (clen + 2)
 965     if ncols <= 1:
 966         ncols = 1
 967         clen = 0
 968     cols = []
 969     while len(l) % ncols:
 970         l.append('')
 971     rows = len(l) // ncols
 972     for s in compat.range(0, len(l), rows):
 973         cols.append(l[s:s+rows])
 974     out = ''
 975     for row in zip(*cols):
 976         out += prefix + ''.join(('%-*s' % (clen+2, s)) for s in row) + '\n'
 977     return out
 978
 979
 980 def parse_date_or_fatal(str, fatal):
 981     """Parses the given date or calls Option.fatal().
 982     For now we expect a string that contains a float."""
 983     try:
 984         date = float(str)
 985     except ValueError as e:
 986         raise fatal('invalid date format (should be a float): %r' % e)
 987     else:
 988         return date
 989
 990
 991 def parse_excludes(options, fatal):
 992     """Traverse the options and extract all excludes, or call Option.fatal()."""
 993     excluded_paths = []
 994
 995     for flag in options:
 996         (option, parameter) = flag
 997         if option == '--exclude':
 998             excluded_paths.append(resolve_parent(parameter))
 999         elif option == '--exclude-from':
1000             try:
1001                 f = open(resolve_parent(parameter))
1002             except IOError as e:
1003                 raise fatal("couldn't read %s" % parameter)
1004             for exclude_path in f.readlines():
1005                 # FIXME: perhaps this should be rstrip('\n')
1006                 exclude_path = resolve_parent(exclude_path.strip())
1007                 if exclude_path:
1008                     excluded_paths.append(exclude_path)
1009     return sorted(frozenset(excluded_paths))
1010
1011
1012 def parse_rx_excludes(options, fatal):
1013     """Traverse the options and extract all rx excludes, or call
1014     Option.fatal()."""
1015     excluded_patterns = []
1016
1017     for flag in options:
1018         (option, parameter) = flag
1019         if option == '--exclude-rx':
1020             try:
1021                 excluded_patterns.append(re.compile(parameter))
1022             except re.error as ex:
1023                 fatal('invalid --exclude-rx pattern (%s): %s' % (parameter, ex))
1024         elif option == '--exclude-rx-from':
1025             try:
1026                 f = open(resolve_parent(parameter))
1027             except IOError as e:
1028                 raise fatal("couldn't read %s" % parameter)
1029             for pattern in f.readlines():
1030                 spattern = pattern.rstrip('\n')
1031                 if not spattern:
1032                     continue
1033                 try:
1034                     excluded_patterns.append(re.compile(spattern))
1035                 except re.error as ex:
1036                     fatal('invalid --exclude-rx pattern (%s): %s' % (spattern, ex))
1037     return excluded_patterns
1038
1039
1040 def should_rx_exclude_path(path, exclude_rxs):
1041     """Return True if path matches a regular expression in exclude_rxs."""
1042     for rx in exclude_rxs:
1043         if rx.search(path):
1044             debug1('Skipping %r: excluded by rx pattern %r.\n'
1045                    % (path, rx.pattern))
1046             return True
1047     return False
1048
1049
1050 # FIXME: Carefully consider the use of functions (os.path.*, etc.)
1051 # that resolve against the current filesystem in the strip/graft
1052 # functions for example, but elsewhere as well.  I suspect bup's not
1053 # always being careful about that.  For some cases, the contents of
1054 # the current filesystem should be irrelevant, and consulting it might
1055 # produce the wrong result, perhaps via unintended symlink resolution,
1056 # for example.
1057
1058 def path_components(path):
1059     """Break path into a list of pairs of the form (name,
1060     full_path_to_name).  Path must start with '/'.
1061     Example:
1062       '/home/foo' -> [('', '/'), ('home', '/home'), ('foo', '/home/foo')]"""
1063     if not path.startswith('/'):
1064         raise Exception('path must start with "/": %s' % path)
1065     # Since we assume path startswith('/'), we can skip the first element.
1066     result = [('', '/')]
1067     norm_path = os.path.abspath(path)
1068     if norm_path == '/':
1069         return result
1070     full_path = ''
1071     for p in norm_path.split('/')[1:]:
1072         full_path += '/' + p
1073         result.append((p, full_path))
1074     return result
1075
1076
1077 def stripped_path_components(path, strip_prefixes):
1078     """Strip any prefix in strip_prefixes from path and return a list
1079     of path components where each component is (name,
1080     none_or_full_fs_path_to_name).  Assume path startswith('/').
1081     See thelpers.py for examples."""
1082     normalized_path = os.path.abspath(path)
1083     sorted_strip_prefixes = sorted(strip_prefixes, key=len, reverse=True)
1084     for bp in sorted_strip_prefixes:
1085         normalized_bp = os.path.abspath(bp)
1086         if normalized_bp == '/':
1087             continue
1088         if normalized_path.startswith(normalized_bp):
1089             prefix = normalized_path[:len(normalized_bp)]
1090             result = []
1091             for p in normalized_path[len(normalized_bp):].split('/'):
1092                 if p: # not root
1093                     prefix += '/'
1094                 prefix += p
1095                 result.append((p, prefix))
1096             return result
1097     # Nothing to strip.
1098     return path_components(path)
1099
1100
1101 def grafted_path_components(graft_points, path):
1102     # Create a result that consists of some number of faked graft
1103     # directories before the graft point, followed by all of the real
1104     # directories from path that are after the graft point.  Arrange
1105     # for the directory at the graft point in the result to correspond
1106     # to the "orig" directory in --graft orig=new.  See t/thelpers.py
1107     # for some examples.
1108
1109     # Note that given --graft orig=new, orig and new have *nothing* to
1110     # do with each other, even if some of their component names
1111     # match. i.e. --graft /foo/bar/baz=/foo/bar/bax is semantically
1112     # equivalent to --graft /foo/bar/baz=/x/y/z, or even
1113     # /foo/bar/baz=/x.
1114
1115     # FIXME: This can't be the best solution...
1116     clean_path = os.path.abspath(path)
1117     for graft_point in graft_points:
1118         old_prefix, new_prefix = graft_point
1119         # Expand prefixes iff not absolute paths.
1120         old_prefix = os.path.normpath(old_prefix)
1121         new_prefix = os.path.normpath(new_prefix)
1122         if clean_path.startswith(old_prefix):
1123             escaped_prefix = re.escape(old_prefix)
1124             grafted_path = re.sub(r'^' + escaped_prefix, new_prefix, clean_path)
1125             # Handle /foo=/ (at least) -- which produces //whatever.
1126             grafted_path = '/' + grafted_path.lstrip('/')
1127             clean_path_components = path_components(clean_path)
1128             # Count the components that were stripped.
1129             strip_count = 0 if old_prefix == '/' else old_prefix.count('/')
1130             new_prefix_parts = new_prefix.split('/')
1131             result_prefix = grafted_path.split('/')[:new_prefix.count('/')]
1132             result = [(p, None) for p in result_prefix] \
1133                 + clean_path_components[strip_count:]
1134             # Now set the graft point name to match the end of new_prefix.
1135             graft_point = len(result_prefix)
1136             result[graft_point] = \
1137                 (new_prefix_parts[-1], clean_path_components[strip_count][1])
1138             if new_prefix == '/': # --graft ...=/ is a special case.
1139                 return result[1:]
1140             return result
1141     return path_components(clean_path)
1142
1143
1144 Sha1 = hashlib.sha1
1145
1146
1147 _localtime = getattr(_helpers, 'localtime', None)
1148
1149 if _localtime:
1150     bup_time = namedtuple('bup_time', ['tm_year', 'tm_mon', 'tm_mday',
1151                                        'tm_hour', 'tm_min', 'tm_sec',
1152                                        'tm_wday', 'tm_yday',
1153                                        'tm_isdst', 'tm_gmtoff', 'tm_zone'])
1154
1155 # Define a localtime() that returns bup_time when possible.  Note:
1156 # this means that any helpers.localtime() results may need to be
1157 # passed through to_py_time() before being passed to python's time
1158 # module, which doesn't appear willing to ignore the extra items.
1159 if _localtime:
1160     def localtime(time):
1161         return bup_time(*_helpers.localtime(time))
1162     def utc_offset_str(t):
1163         """Return the local offset from UTC as "+hhmm" or "-hhmm" for time t.
1164         If the current UTC offset does not represent an integer number
1165         of minutes, the fractional component will be truncated."""
1166         off = localtime(t).tm_gmtoff
1167         # Note: // doesn't truncate like C for negative values, it rounds down.
1168         offmin = abs(off) // 60
1169         m = offmin % 60
1170         h = (offmin - m) // 60
1171         return "%+03d%02d" % (-h if off < 0 else h, m)
1172     def to_py_time(x):
1173         if isinstance(x, time.struct_time):
1174             return x
1175         return time.struct_time(x[:9])
1176 else:
1177     localtime = time.localtime
1178     def utc_offset_str(t):
1179         return time.strftime('%z', localtime(t))
1180     def to_py_time(x):
1181         return x
1182
1183
1184 _some_invalid_save_parts_rx = re.compile(r'[\[ ~^:?*\\]|\.\.|//|@{')
1185
1186 def valid_save_name(name):
1187     # Enforce a superset of the restrictions in git-check-ref-format(1)
1188     if name == '@' \
1189        or name.startswith('/') or name.endswith('/') \
1190        or name.endswith('.'):
1191         return False
1192     if _some_invalid_save_parts_rx.search(name):
1193         return False
1194     for c in name:
1195         if ord(c) < 0x20 or ord(c) == 0x7f:
1196             return False
1197     for part in name.split('/'):
1198         if part.startswith('.') or part.endswith('.lock'):
1199             return False
1200     return True
1201
1202
1203 _period_rx = re.compile(r'^([0-9]+)(s|min|h|d|w|m|y)$')
1204
1205 def period_as_secs(s):
1206     if s == 'forever':
1207         return float('inf')
1208     match = _period_rx.match(s)
1209     if not match:
1210         return None
1211     mag = int(match.group(1))
1212     scale = match.group(2)
1213     return mag * {'s': 1,
1214                   'min': 60,
1215                   'h': 60 * 60,
1216                   'd': 60 * 60 * 24,
1217                   'w': 60 * 60 * 24 * 7,
1218                   'm': 60 * 60 * 24 * 31,
1219                   'y': 60 * 60 * 24 * 366}[scale]