lib/bup/helpers.py

   1 """Helper functions and classes for bup."""
   2
   3 from __future__ import absolute_import
   4 from collections import namedtuple
   5 from contextlib import contextmanager
   6 from ctypes import sizeof, c_void_p
   7 from os import environ
   8 from pipes import quote
   9 from subprocess import PIPE, Popen
  10 import sys, os, pwd, subprocess, errno, socket, select, mmap, stat, re, struct
  11 import hashlib, heapq, math, operator, time, grp, tempfile
  12
  13 from bup import _helpers
  14 from bup import compat
  15 # This function should really be in helpers, not in bup.options.  But we
  16 # want options.py to be standalone so people can include it in other projects.
  17 from bup.options import _tty_width as tty_width
  18
  19
  20 class Nonlocal:
  21     """Helper to deal with Python scoping issues"""
  22     pass
  23
  24
  25 sc_page_size = os.sysconf('SC_PAGE_SIZE')
  26 assert(sc_page_size > 0)
  27
  28 sc_arg_max = os.sysconf('SC_ARG_MAX')
  29 if sc_arg_max == -1:  # "no definite limit" - let's choose 2M
  30     sc_arg_max = 2 * 1024 * 1024
  31
  32 def last(iterable):
  33     result = None
  34     for result in iterable:
  35         pass
  36     return result
  37
  38
  39 def atoi(s):
  40     """Convert the string 's' to an integer. Return 0 if s is not a number."""
  41     try:
  42         return int(s or '0')
  43     except ValueError:
  44         return 0
  45
  46
  47 def atof(s):
  48     """Convert the string 's' to a float. Return 0 if s is not a number."""
  49     try:
  50         return float(s or '0')
  51     except ValueError:
  52         return 0
  53
  54
  55 buglvl = atoi(os.environ.get('BUP_DEBUG', 0))
  56
  57
  58 try:
  59     _fdatasync = os.fdatasync
  60 except AttributeError:
  61     _fdatasync = os.fsync
  62
  63 if sys.platform.startswith('darwin'):
  64     # Apparently os.fsync on OS X doesn't guarantee to sync all the way down
  65     import fcntl
  66     def fdatasync(fd):
  67         try:
  68             return fcntl.fcntl(fd, fcntl.F_FULLFSYNC)
  69         except IOError as e:
  70             # Fallback for file systems (SMB) that do not support F_FULLFSYNC
  71             if e.errno == errno.ENOTSUP:
  72                 return _fdatasync(fd)
  73             else:
  74                 raise
  75 else:
  76     fdatasync = _fdatasync
  77
  78
  79 def partition(predicate, stream):
  80     """Returns (leading_matches_it, rest_it), where leading_matches_it
  81     must be completely exhausted before traversing rest_it.
  82
  83     """
  84     stream = iter(stream)
  85     ns = Nonlocal()
  86     ns.first_nonmatch = None
  87     def leading_matches():
  88         for x in stream:
  89             if predicate(x):
  90                 yield x
  91             else:
  92                 ns.first_nonmatch = (x,)
  93                 break
  94     def rest():
  95         if ns.first_nonmatch:
  96             yield ns.first_nonmatch[0]
  97             for x in stream:
  98                 yield x
  99     return (leading_matches(), rest())
 100
 101
 102 def lines_until_sentinel(f, sentinel, ex_type):
 103     # sentinel must end with \n and must contain only one \n
 104     while True:
 105         line = f.readline()
 106         if not (line and line.endswith('\n')):
 107             raise ex_type('Hit EOF while reading line')
 108         if line == sentinel:
 109             return
 110         yield line
 111
 112
 113 def stat_if_exists(path):
 114     try:
 115         return os.stat(path)
 116     except OSError as e:
 117         if e.errno != errno.ENOENT:
 118             raise
 119     return None
 120
 121
 122 # Write (blockingly) to sockets that may or may not be in blocking mode.
 123 # We need this because our stderr is sometimes eaten by subprocesses
 124 # (probably ssh) that sometimes make it nonblocking, if only temporarily,
 125 # leading to race conditions.  Ick.  We'll do it the hard way.
 126 def _hard_write(fd, buf):
 127     while buf:
 128         (r,w,x) = select.select([], [fd], [], None)
 129         if not w:
 130             raise IOError('select(fd) returned without being writable')
 131         try:
 132             sz = os.write(fd, buf)
 133         except OSError as e:
 134             if e.errno != errno.EAGAIN:
 135                 raise
 136         assert(sz >= 0)
 137         buf = buf[sz:]
 138
 139
 140 _last_prog = 0
 141 def log(s):
 142     """Print a log message to stderr."""
 143     global _last_prog
 144     sys.stdout.flush()
 145     _hard_write(sys.stderr.fileno(), s)
 146     _last_prog = 0
 147
 148
 149 def debug1(s):
 150     if buglvl >= 1:
 151         log(s)
 152
 153
 154 def debug2(s):
 155     if buglvl >= 2:
 156         log(s)
 157
 158
 159 istty1 = os.isatty(1) or (atoi(os.environ.get('BUP_FORCE_TTY')) & 1)
 160 istty2 = os.isatty(2) or (atoi(os.environ.get('BUP_FORCE_TTY')) & 2)
 161 _last_progress = ''
 162 def progress(s):
 163     """Calls log() if stderr is a TTY.  Does nothing otherwise."""
 164     global _last_progress
 165     if istty2:
 166         log(s)
 167         _last_progress = s
 168
 169
 170 def qprogress(s):
 171     """Calls progress() only if we haven't printed progress in a while.
 172
 173     This avoids overloading the stderr buffer with excess junk.
 174     """
 175     global _last_prog
 176     now = time.time()
 177     if now - _last_prog > 0.1:
 178         progress(s)
 179         _last_prog = now
 180
 181
 182 def reprogress():
 183     """Calls progress() to redisplay the most recent progress message.
 184
 185     Useful after you've printed some other message that wipes out the
 186     progress line.
 187     """
 188     if _last_progress and _last_progress.endswith('\r'):
 189         progress(_last_progress)
 190
 191
 192 def mkdirp(d, mode=None):
 193     """Recursively create directories on path 'd'.
 194
 195     Unlike os.makedirs(), it doesn't raise an exception if the last element of
 196     the path already exists.
 197     """
 198     try:
 199         if mode:
 200             os.makedirs(d, mode)
 201         else:
 202             os.makedirs(d)
 203     except OSError as e:
 204         if e.errno == errno.EEXIST:
 205             pass
 206         else:
 207             raise
 208
 209
 210 def merge_iter(iters, pfreq, pfunc, pfinal, key=None):
 211     if key:
 212         samekey = lambda e, pe: getattr(e, key) == getattr(pe, key, None)
 213     else:
 214         samekey = operator.eq
 215     count = 0
 216     total = sum(len(it) for it in iters)
 217     iters = (iter(it) for it in iters)
 218     heap = ((next(it, None),it) for it in iters)
 219     heap = [(e,it) for e,it in heap if e]
 220
 221     heapq.heapify(heap)
 222     pe = None
 223     while heap:
 224         if not count % pfreq:
 225             pfunc(count, total)
 226         e, it = heap[0]
 227         if not samekey(e, pe):
 228             pe = e
 229             yield e
 230         count += 1
 231         try:
 232             e = next(it)
 233         except StopIteration:
 234             heapq.heappop(heap) # remove current
 235         else:
 236             heapq.heapreplace(heap, (e, it)) # shift current to new location
 237     pfinal(count, total)
 238
 239
 240 def unlink(f):
 241     """Delete a file at path 'f' if it currently exists.
 242
 243     Unlike os.unlink(), does not throw an exception if the file didn't already
 244     exist.
 245     """
 246     try:
 247         os.unlink(f)
 248     except OSError as e:
 249         if e.errno != errno.ENOENT:
 250             raise
 251
 252
 253 def shstr(cmd):
 254     if isinstance(cmd, compat.str_type):
 255         return cmd
 256     else:
 257         return ' '.join(map(quote, cmd))
 258
 259 exc = subprocess.check_call
 260
 261 def exo(cmd,
 262         input=None,
 263         stdin=None,
 264         stderr=None,
 265         shell=False,
 266         check=True,
 267         preexec_fn=None):
 268     if input:
 269         assert stdin in (None, PIPE)
 270         stdin = PIPE
 271     p = Popen(cmd,
 272               stdin=stdin, stdout=PIPE, stderr=stderr,
 273               shell=shell,
 274               preexec_fn=preexec_fn)
 275     out, err = p.communicate(input)
 276     if check and p.returncode != 0:
 277         raise Exception('subprocess %r failed with status %d, stderr: %r'
 278                         % (' '.join(map(quote, cmd)), p.returncode, err))
 279     return out, err, p
 280
 281 def readpipe(argv, preexec_fn=None, shell=False):
 282     """Run a subprocess and return its output."""
 283     p = subprocess.Popen(argv, stdout=subprocess.PIPE, preexec_fn=preexec_fn,
 284                          shell=shell)
 285     out, err = p.communicate()
 286     if p.returncode != 0:
 287         raise Exception('subprocess %r failed with status %d'
 288                         % (' '.join(argv), p.returncode))
 289     return out
 290
 291
 292 def _argmax_base(command):
 293     base_size = 2048
 294     for c in command:
 295         base_size += len(command) + 1
 296     for k, v in compat.items(environ):
 297         base_size += len(k) + len(v) + 2 + sizeof(c_void_p)
 298     return base_size
 299
 300
 301 def _argmax_args_size(args):
 302     return sum(len(x) + 1 + sizeof(c_void_p) for x in args)
 303
 304
 305 def batchpipe(command, args, preexec_fn=None, arg_max=sc_arg_max):
 306     """If args is not empty, yield the output produced by calling the
 307 command list with args as a sequence of strings (It may be necessary
 308 to return multiple strings in order to respect ARG_MAX)."""
 309     # The optional arg_max arg is a workaround for an issue with the
 310     # current wvtest behavior.
 311     base_size = _argmax_base(command)
 312     while args:
 313         room = arg_max - base_size
 314         i = 0
 315         while i < len(args):
 316             next_size = _argmax_args_size(args[i:i+1])
 317             if room - next_size < 0:
 318                 break
 319             room -= next_size
 320             i += 1
 321         sub_args = args[:i]
 322         args = args[i:]
 323         assert(len(sub_args))
 324         yield readpipe(command + sub_args, preexec_fn=preexec_fn)
 325
 326
 327 def resolve_parent(p):
 328     """Return the absolute path of a file without following any final symlink.
 329
 330     Behaves like os.path.realpath, but doesn't follow a symlink for the last
 331     element. (ie. if 'p' itself is a symlink, this one won't follow it, but it
 332     will follow symlinks in p's directory)
 333     """
 334     try:
 335         st = os.lstat(p)
 336     except OSError:
 337         st = None
 338     if st and stat.S_ISLNK(st.st_mode):
 339         (dir, name) = os.path.split(p)
 340         dir = os.path.realpath(dir)
 341         out = os.path.join(dir, name)
 342     else:
 343         out = os.path.realpath(p)
 344     #log('realpathing:%r,%r\n' % (p, out))
 345     return out
 346
 347
 348 def detect_fakeroot():
 349     "Return True if we appear to be running under fakeroot."
 350     return os.getenv("FAKEROOTKEY") != None
 351
 352
 353 _warned_about_superuser_detection = None
 354 def is_superuser():
 355     if sys.platform.startswith('cygwin'):
 356         if sys.getwindowsversion()[0] > 5:
 357             # Sounds like situation is much more complicated here
 358             global _warned_about_superuser_detection
 359             if not _warned_about_superuser_detection:
 360                 log("can't detect root status for OS version > 5; assuming not root")
 361                 _warned_about_superuser_detection = True
 362             return False
 363         import ctypes
 364         return ctypes.cdll.shell32.IsUserAnAdmin()
 365     else:
 366         return os.geteuid() == 0
 367
 368
 369 def _cache_key_value(get_value, key, cache):
 370     """Return (value, was_cached).  If there is a value in the cache
 371     for key, use that, otherwise, call get_value(key) which should
 372     throw a KeyError if there is no value -- in which case the cached
 373     and returned value will be None.
 374     """
 375     try: # Do we already have it (or know there wasn't one)?
 376         value = cache[key]
 377         return value, True
 378     except KeyError:
 379         pass
 380     value = None
 381     try:
 382         cache[key] = value = get_value(key)
 383     except KeyError:
 384         cache[key] = None
 385     return value, False
 386
 387
 388 _uid_to_pwd_cache = {}
 389 _name_to_pwd_cache = {}
 390
 391 def pwd_from_uid(uid):
 392     """Return password database entry for uid (may be a cached value).
 393     Return None if no entry is found.
 394     """
 395     global _uid_to_pwd_cache, _name_to_pwd_cache
 396     entry, cached = _cache_key_value(pwd.getpwuid, uid, _uid_to_pwd_cache)
 397     if entry and not cached:
 398         _name_to_pwd_cache[entry.pw_name] = entry
 399     return entry
 400
 401
 402 def pwd_from_name(name):
 403     """Return password database entry for name (may be a cached value).
 404     Return None if no entry is found.
 405     """
 406     global _uid_to_pwd_cache, _name_to_pwd_cache
 407     entry, cached = _cache_key_value(pwd.getpwnam, name, _name_to_pwd_cache)
 408     if entry and not cached:
 409         _uid_to_pwd_cache[entry.pw_uid] = entry
 410     return entry
 411
 412
 413 _gid_to_grp_cache = {}
 414 _name_to_grp_cache = {}
 415
 416 def grp_from_gid(gid):
 417     """Return password database entry for gid (may be a cached value).
 418     Return None if no entry is found.
 419     """
 420     global _gid_to_grp_cache, _name_to_grp_cache
 421     entry, cached = _cache_key_value(grp.getgrgid, gid, _gid_to_grp_cache)
 422     if entry and not cached:
 423         _name_to_grp_cache[entry.gr_name] = entry
 424     return entry
 425
 426
 427 def grp_from_name(name):
 428     """Return password database entry for name (may be a cached value).
 429     Return None if no entry is found.
 430     """
 431     global _gid_to_grp_cache, _name_to_grp_cache
 432     entry, cached = _cache_key_value(grp.getgrnam, name, _name_to_grp_cache)
 433     if entry and not cached:
 434         _gid_to_grp_cache[entry.gr_gid] = entry
 435     return entry
 436
 437
 438 _username = None
 439 def username():
 440     """Get the user's login name."""
 441     global _username
 442     if not _username:
 443         uid = os.getuid()
 444         _username = pwd_from_uid(uid)[0] or 'user%d' % uid
 445     return _username
 446
 447
 448 _userfullname = None
 449 def userfullname():
 450     """Get the user's full name."""
 451     global _userfullname
 452     if not _userfullname:
 453         uid = os.getuid()
 454         entry = pwd_from_uid(uid)
 455         if entry:
 456             _userfullname = entry[4].split(',')[0] or entry[0]
 457         if not _userfullname:
 458             _userfullname = 'user%d' % uid
 459     return _userfullname
 460
 461
 462 _hostname = None
 463 def hostname():
 464     """Get the FQDN of this machine."""
 465     global _hostname
 466     if not _hostname:
 467         _hostname = socket.getfqdn()
 468     return _hostname
 469
 470
 471 _resource_path = None
 472 def resource_path(subdir=''):
 473     global _resource_path
 474     if not _resource_path:
 475         _resource_path = os.environ.get('BUP_RESOURCE_PATH') or '.'
 476     return os.path.join(_resource_path, subdir)
 477
 478 def format_filesize(size):
 479     unit = 1024.0
 480     size = float(size)
 481     if size < unit:
 482         return "%d" % (size)
 483     exponent = int(math.log(size) / math.log(unit))
 484     size_prefix = "KMGTPE"[exponent - 1]
 485     return "%.1f%s" % (size / math.pow(unit, exponent), size_prefix)
 486
 487
 488 class NotOk(Exception):
 489     pass
 490
 491
 492 class BaseConn:
 493     def __init__(self, outp):
 494         self.outp = outp
 495
 496     def close(self):
 497         while self._read(65536): pass
 498
 499     def read(self, size):
 500         """Read 'size' bytes from input stream."""
 501         self.outp.flush()
 502         return self._read(size)
 503
 504     def readline(self):
 505         """Read from input stream until a newline is found."""
 506         self.outp.flush()
 507         return self._readline()
 508
 509     def write(self, data):
 510         """Write 'data' to output stream."""
 511         #log('%d writing: %d bytes\n' % (os.getpid(), len(data)))
 512         self.outp.write(data)
 513
 514     def has_input(self):
 515         """Return true if input stream is readable."""
 516         raise NotImplemented("Subclasses must implement has_input")
 517
 518     def ok(self):
 519         """Indicate end of output from last sent command."""
 520         self.write('\nok\n')
 521
 522     def error(self, s):
 523         """Indicate server error to the client."""
 524         s = re.sub(r'\s+', ' ', str(s))
 525         self.write('\nerror %s\n' % s)
 526
 527     def _check_ok(self, onempty):
 528         self.outp.flush()
 529         rl = ''
 530         for rl in linereader(self):
 531             #log('%d got line: %r\n' % (os.getpid(), rl))
 532             if not rl:  # empty line
 533                 continue
 534             elif rl == 'ok':
 535                 return None
 536             elif rl.startswith('error '):
 537                 #log('client: error: %s\n' % rl[6:])
 538                 return NotOk(rl[6:])
 539             else:
 540                 onempty(rl)
 541         raise Exception('server exited unexpectedly; see errors above')
 542
 543     def drain_and_check_ok(self):
 544         """Remove all data for the current command from input stream."""
 545         def onempty(rl):
 546             pass
 547         return self._check_ok(onempty)
 548
 549     def check_ok(self):
 550         """Verify that server action completed successfully."""
 551         def onempty(rl):
 552             raise Exception('expected "ok", got %r' % rl)
 553         return self._check_ok(onempty)
 554
 555
 556 class Conn(BaseConn):
 557     def __init__(self, inp, outp):
 558         BaseConn.__init__(self, outp)
 559         self.inp = inp
 560
 561     def _read(self, size):
 562         return self.inp.read(size)
 563
 564     def _readline(self):
 565         return self.inp.readline()
 566
 567     def has_input(self):
 568         [rl, wl, xl] = select.select([self.inp.fileno()], [], [], 0)
 569         if rl:
 570             assert(rl[0] == self.inp.fileno())
 571             return True
 572         else:
 573             return None
 574
 575
 576 def checked_reader(fd, n):
 577     while n > 0:
 578         rl, _, _ = select.select([fd], [], [])
 579         assert(rl[0] == fd)
 580         buf = os.read(fd, n)
 581         if not buf: raise Exception("Unexpected EOF reading %d more bytes" % n)
 582         yield buf
 583         n -= len(buf)
 584
 585
 586 MAX_PACKET = 128 * 1024
 587 def mux(p, outfd, outr, errr):
 588     try:
 589         fds = [outr, errr]
 590         while p.poll() is None:
 591             rl, _, _ = select.select(fds, [], [])
 592             for fd in rl:
 593                 if fd == outr:
 594                     buf = os.read(outr, MAX_PACKET)
 595                     if not buf: break
 596                     os.write(outfd, struct.pack('!IB', len(buf), 1) + buf)
 597                 elif fd == errr:
 598                     buf = os.read(errr, 1024)
 599                     if not buf: break
 600                     os.write(outfd, struct.pack('!IB', len(buf), 2) + buf)
 601     finally:
 602         os.write(outfd, struct.pack('!IB', 0, 3))
 603
 604
 605 class DemuxConn(BaseConn):
 606     """A helper class for bup's client-server protocol."""
 607     def __init__(self, infd, outp):
 608         BaseConn.__init__(self, outp)
 609         # Anything that comes through before the sync string was not
 610         # multiplexed and can be assumed to be debug/log before mux init.
 611         tail = ''
 612         while tail != 'BUPMUX':
 613             b = os.read(infd, (len(tail) < 6) and (6-len(tail)) or 1)
 614             if not b:
 615                 raise IOError('demux: unexpected EOF during initialization')
 616             tail += b
 617             sys.stderr.write(tail[:-6])  # pre-mux log messages
 618             tail = tail[-6:]
 619         self.infd = infd
 620         self.reader = None
 621         self.buf = None
 622         self.closed = False
 623
 624     def write(self, data):
 625         self._load_buf(0)
 626         BaseConn.write(self, data)
 627
 628     def _next_packet(self, timeout):
 629         if self.closed: return False
 630         rl, wl, xl = select.select([self.infd], [], [], timeout)
 631         if not rl: return False
 632         assert(rl[0] == self.infd)
 633         ns = ''.join(checked_reader(self.infd, 5))
 634         n, fdw = struct.unpack('!IB', ns)
 635         assert(n <= MAX_PACKET)
 636         if fdw == 1:
 637             self.reader = checked_reader(self.infd, n)
 638         elif fdw == 2:
 639             for buf in checked_reader(self.infd, n):
 640                 sys.stderr.write(buf)
 641         elif fdw == 3:
 642             self.closed = True
 643             debug2("DemuxConn: marked closed\n")
 644         return True
 645
 646     def _load_buf(self, timeout):
 647         if self.buf is not None:
 648             return True
 649         while not self.closed:
 650             while not self.reader:
 651                 if not self._next_packet(timeout):
 652                     return False
 653             try:
 654                 self.buf = next(self.reader)
 655                 return True
 656             except StopIteration:
 657                 self.reader = None
 658         return False
 659
 660     def _read_parts(self, ix_fn):
 661         while self._load_buf(None):
 662             assert(self.buf is not None)
 663             i = ix_fn(self.buf)
 664             if i is None or i == len(self.buf):
 665                 yv = self.buf
 666                 self.buf = None
 667             else:
 668                 yv = self.buf[:i]
 669                 self.buf = self.buf[i:]
 670             yield yv
 671             if i is not None:
 672                 break
 673
 674     def _readline(self):
 675         def find_eol(buf):
 676             try:
 677                 return buf.index('\n')+1
 678             except ValueError:
 679                 return None
 680         return ''.join(self._read_parts(find_eol))
 681
 682     def _read(self, size):
 683         csize = [size]
 684         def until_size(buf): # Closes on csize
 685             if len(buf) < csize[0]:
 686                 csize[0] -= len(buf)
 687                 return None
 688             else:
 689                 return csize[0]
 690         return ''.join(self._read_parts(until_size))
 691
 692     def has_input(self):
 693         return self._load_buf(0)
 694
 695
 696 def linereader(f):
 697     """Generate a list of input lines from 'f' without terminating newlines."""
 698     while 1:
 699         line = f.readline()
 700         if not line:
 701             break
 702         yield line[:-1]
 703
 704
 705 def chunkyreader(f, count = None):
 706     """Generate a list of chunks of data read from 'f'.
 707
 708     If count is None, read until EOF is reached.
 709
 710     If count is a positive integer, read 'count' bytes from 'f'. If EOF is
 711     reached while reading, raise IOError.
 712     """
 713     if count != None:
 714         while count > 0:
 715             b = f.read(min(count, 65536))
 716             if not b:
 717                 raise IOError('EOF with %d bytes remaining' % count)
 718             yield b
 719             count -= len(b)
 720     else:
 721         while 1:
 722             b = f.read(65536)
 723             if not b: break
 724             yield b
 725
 726
 727 @contextmanager
 728 def atomically_replaced_file(name, mode='w', buffering=-1):
 729     """Yield a file that will be atomically renamed name when leaving the block.
 730
 731     This contextmanager yields an open file object that is backed by a
 732     temporary file which will be renamed (atomically) to the target
 733     name if everything succeeds.
 734
 735     The mode and buffering arguments are handled exactly as with open,
 736     and the yielded file will have very restrictive permissions, as
 737     per mkstemp.
 738
 739     E.g.::
 740
 741         with atomically_replaced_file('foo.txt', 'w') as f:
 742             f.write('hello jack.')
 743
 744     """
 745
 746     (ffd, tempname) = tempfile.mkstemp(dir=os.path.dirname(name),
 747                                        text=('b' not in mode))
 748     try:
 749         try:
 750             f = os.fdopen(ffd, mode, buffering)
 751         except:
 752             os.close(ffd)
 753             raise
 754         try:
 755             yield f
 756         finally:
 757             f.close()
 758         os.rename(tempname, name)
 759     finally:
 760         unlink(tempname)  # nonexistant file is ignored
 761
 762
 763 def slashappend(s):
 764     """Append "/" to 's' if it doesn't aleady end in "/"."""
 765     if s and not s.endswith('/'):
 766         return s + '/'
 767     else:
 768         return s
 769
 770
 771 def _mmap_do(f, sz, flags, prot, close):
 772     if not sz:
 773         st = os.fstat(f.fileno())
 774         sz = st.st_size
 775     if not sz:
 776         # trying to open a zero-length map gives an error, but an empty
 777         # string has all the same behaviour of a zero-length map, ie. it has
 778         # no elements :)
 779         return ''
 780     map = mmap.mmap(f.fileno(), sz, flags, prot)
 781     if close:
 782         f.close()  # map will persist beyond file close
 783     return map
 784
 785
 786 def mmap_read(f, sz = 0, close=True):
 787     """Create a read-only memory mapped region on file 'f'.
 788     If sz is 0, the region will cover the entire file.
 789     """
 790     return _mmap_do(f, sz, mmap.MAP_PRIVATE, mmap.PROT_READ, close)
 791
 792
 793 def mmap_readwrite(f, sz = 0, close=True):
 794     """Create a read-write memory mapped region on file 'f'.
 795     If sz is 0, the region will cover the entire file.
 796     """
 797     return _mmap_do(f, sz, mmap.MAP_SHARED, mmap.PROT_READ|mmap.PROT_WRITE,
 798                     close)
 799
 800
 801 def mmap_readwrite_private(f, sz = 0, close=True):
 802     """Create a read-write memory mapped region on file 'f'.
 803     If sz is 0, the region will cover the entire file.
 804     The map is private, which means the changes are never flushed back to the
 805     file.
 806     """
 807     return _mmap_do(f, sz, mmap.MAP_PRIVATE, mmap.PROT_READ|mmap.PROT_WRITE,
 808                     close)
 809
 810
 811 _mincore = getattr(_helpers, 'mincore', None)
 812 if _mincore:
 813     # ./configure ensures that we're on Linux if MINCORE_INCORE isn't defined.
 814     MINCORE_INCORE = getattr(_helpers, 'MINCORE_INCORE', 1)
 815
 816     _fmincore_chunk_size = None
 817     def _set_fmincore_chunk_size():
 818         global _fmincore_chunk_size
 819         pref_chunk_size = 64 * 1024 * 1024
 820         chunk_size = sc_page_size
 821         if (sc_page_size < pref_chunk_size):
 822             chunk_size = sc_page_size * (pref_chunk_size / sc_page_size)
 823         _fmincore_chunk_size = chunk_size
 824
 825     def fmincore(fd):
 826         """Return the mincore() data for fd as a bytearray whose values can be
 827         tested via MINCORE_INCORE, or None if fd does not fully
 828         support the operation."""
 829         st = os.fstat(fd)
 830         if (st.st_size == 0):
 831             return bytearray(0)
 832         if not _fmincore_chunk_size:
 833             _set_fmincore_chunk_size()
 834         pages_per_chunk = _fmincore_chunk_size / sc_page_size;
 835         page_count = (st.st_size + sc_page_size - 1) / sc_page_size;
 836         chunk_count = page_count / _fmincore_chunk_size
 837         if chunk_count < 1:
 838             chunk_count = 1
 839         result = bytearray(page_count)
 840         for ci in xrange(chunk_count):
 841             pos = _fmincore_chunk_size * ci;
 842             msize = min(_fmincore_chunk_size, st.st_size - pos)
 843             try:
 844                 m = mmap.mmap(fd, msize, mmap.MAP_PRIVATE, 0, 0, pos)
 845             except mmap.error as ex:
 846                 if ex.errno == errno.EINVAL or ex.errno == errno.ENODEV:
 847                     # Perhaps the file was a pipe, i.e. "... | bup split ..."
 848                     return None
 849                 raise ex
 850             try:
 851                 _mincore(m, msize, 0, result, ci * pages_per_chunk)
 852             except OSError as ex:
 853                 if ex.errno == errno.ENOSYS:
 854                     return None
 855                 raise
 856         return result
 857
 858
 859 def parse_timestamp(epoch_str):
 860     """Return the number of nanoseconds since the epoch that are described
 861 by epoch_str (100ms, 100ns, ...); when epoch_str cannot be parsed,
 862 throw a ValueError that may contain additional information."""
 863     ns_per = {'s' :  1000000000,
 864               'ms' : 1000000,
 865               'us' : 1000,
 866               'ns' : 1}
 867     match = re.match(r'^((?:[-+]?[0-9]+)?)(s|ms|us|ns)$', epoch_str)
 868     if not match:
 869         if re.match(r'^([-+]?[0-9]+)$', epoch_str):
 870             raise ValueError('must include units, i.e. 100ns, 100ms, ...')
 871         raise ValueError()
 872     (n, units) = match.group(1, 2)
 873     if not n:
 874         n = 1
 875     n = int(n)
 876     return n * ns_per[units]
 877
 878
 879 def parse_num(s):
 880     """Parse data size information into a float number.
 881
 882     Here are some examples of conversions:
 883         199.2k means 203981 bytes
 884         1GB means 1073741824 bytes
 885         2.1 tb means 2199023255552 bytes
 886     """
 887     g = re.match(r'([-+\d.e]+)\s*(\w*)', str(s))
 888     if not g:
 889         raise ValueError("can't parse %r as a number" % s)
 890     (val, unit) = g.groups()
 891     num = float(val)
 892     unit = unit.lower()
 893     if unit in ['t', 'tb']:
 894         mult = 1024*1024*1024*1024
 895     elif unit in ['g', 'gb']:
 896         mult = 1024*1024*1024
 897     elif unit in ['m', 'mb']:
 898         mult = 1024*1024
 899     elif unit in ['k', 'kb']:
 900         mult = 1024
 901     elif unit in ['', 'b']:
 902         mult = 1
 903     else:
 904         raise ValueError("invalid unit %r in number %r" % (unit, s))
 905     return int(num*mult)
 906
 907
 908 def count(l):
 909     """Count the number of elements in an iterator. (consumes the iterator)"""
 910     return reduce(lambda x,y: x+1, l)
 911
 912
 913 saved_errors = []
 914 def add_error(e):
 915     """Append an error message to the list of saved errors.
 916
 917     Once processing is able to stop and output the errors, the saved errors are
 918     accessible in the module variable helpers.saved_errors.
 919     """
 920     saved_errors.append(e)
 921     log('%-70s\n' % e)
 922
 923
 924 def clear_errors():
 925     global saved_errors
 926     saved_errors = []
 927
 928
 929 def die_if_errors(msg=None, status=1):
 930     global saved_errors
 931     if saved_errors:
 932         if not msg:
 933             msg = 'warning: %d errors encountered\n' % len(saved_errors)
 934         log(msg)
 935         sys.exit(status)
 936
 937
 938 def handle_ctrl_c():
 939     """Replace the default exception handler for KeyboardInterrupt (Ctrl-C).
 940
 941     The new exception handler will make sure that bup will exit without an ugly
 942     stacktrace when Ctrl-C is hit.
 943     """
 944     oldhook = sys.excepthook
 945     def newhook(exctype, value, traceback):
 946         if exctype == KeyboardInterrupt:
 947             log('\nInterrupted.\n')
 948         else:
 949             return oldhook(exctype, value, traceback)
 950     sys.excepthook = newhook
 951
 952
 953 def columnate(l, prefix):
 954     """Format elements of 'l' in columns with 'prefix' leading each line.
 955
 956     The number of columns is determined automatically based on the string
 957     lengths.
 958     """
 959     if not l:
 960         return ""
 961     l = l[:]
 962     clen = max(len(s) for s in l)
 963     ncols = (tty_width() - len(prefix)) // (clen + 2)
 964     if ncols <= 1:
 965         ncols = 1
 966         clen = 0
 967     cols = []
 968     while len(l) % ncols:
 969         l.append('')
 970     rows = len(l) // ncols
 971     for s in range(0, len(l), rows):
 972         cols.append(l[s:s+rows])
 973     out = ''
 974     for row in zip(*cols):
 975         out += prefix + ''.join(('%-*s' % (clen+2, s)) for s in row) + '\n'
 976     return out
 977
 978
 979 def parse_date_or_fatal(str, fatal):
 980     """Parses the given date or calls Option.fatal().
 981     For now we expect a string that contains a float."""
 982     try:
 983         date = float(str)
 984     except ValueError as e:
 985         raise fatal('invalid date format (should be a float): %r' % e)
 986     else:
 987         return date
 988
 989
 990 def parse_excludes(options, fatal):
 991     """Traverse the options and extract all excludes, or call Option.fatal()."""
 992     excluded_paths = []
 993
 994     for flag in options:
 995         (option, parameter) = flag
 996         if option == '--exclude':
 997             excluded_paths.append(resolve_parent(parameter))
 998         elif option == '--exclude-from':
 999             try:
1000                 f = open(resolve_parent(parameter))
1001             except IOError as e:
1002                 raise fatal("couldn't read %s" % parameter)
1003             for exclude_path in f.readlines():
1004                 # FIXME: perhaps this should be rstrip('\n')
1005                 exclude_path = resolve_parent(exclude_path.strip())
1006                 if exclude_path:
1007                     excluded_paths.append(exclude_path)
1008     return sorted(frozenset(excluded_paths))
1009
1010
1011 def parse_rx_excludes(options, fatal):
1012     """Traverse the options and extract all rx excludes, or call
1013     Option.fatal()."""
1014     excluded_patterns = []
1015
1016     for flag in options:
1017         (option, parameter) = flag
1018         if option == '--exclude-rx':
1019             try:
1020                 excluded_patterns.append(re.compile(parameter))
1021             except re.error as ex:
1022                 fatal('invalid --exclude-rx pattern (%s): %s' % (parameter, ex))
1023         elif option == '--exclude-rx-from':
1024             try:
1025                 f = open(resolve_parent(parameter))
1026             except IOError as e:
1027                 raise fatal("couldn't read %s" % parameter)
1028             for pattern in f.readlines():
1029                 spattern = pattern.rstrip('\n')
1030                 if not spattern:
1031                     continue
1032                 try:
1033                     excluded_patterns.append(re.compile(spattern))
1034                 except re.error as ex:
1035                     fatal('invalid --exclude-rx pattern (%s): %s' % (spattern, ex))
1036     return excluded_patterns
1037
1038
1039 def should_rx_exclude_path(path, exclude_rxs):
1040     """Return True if path matches a regular expression in exclude_rxs."""
1041     for rx in exclude_rxs:
1042         if rx.search(path):
1043             debug1('Skipping %r: excluded by rx pattern %r.\n'
1044                    % (path, rx.pattern))
1045             return True
1046     return False
1047
1048
1049 # FIXME: Carefully consider the use of functions (os.path.*, etc.)
1050 # that resolve against the current filesystem in the strip/graft
1051 # functions for example, but elsewhere as well.  I suspect bup's not
1052 # always being careful about that.  For some cases, the contents of
1053 # the current filesystem should be irrelevant, and consulting it might
1054 # produce the wrong result, perhaps via unintended symlink resolution,
1055 # for example.
1056
1057 def path_components(path):
1058     """Break path into a list of pairs of the form (name,
1059     full_path_to_name).  Path must start with '/'.
1060     Example:
1061       '/home/foo' -> [('', '/'), ('home', '/home'), ('foo', '/home/foo')]"""
1062     if not path.startswith('/'):
1063         raise Exception('path must start with "/": %s' % path)
1064     # Since we assume path startswith('/'), we can skip the first element.
1065     result = [('', '/')]
1066     norm_path = os.path.abspath(path)
1067     if norm_path == '/':
1068         return result
1069     full_path = ''
1070     for p in norm_path.split('/')[1:]:
1071         full_path += '/' + p
1072         result.append((p, full_path))
1073     return result
1074
1075
1076 def stripped_path_components(path, strip_prefixes):
1077     """Strip any prefix in strip_prefixes from path and return a list
1078     of path components where each component is (name,
1079     none_or_full_fs_path_to_name).  Assume path startswith('/').
1080     See thelpers.py for examples."""
1081     normalized_path = os.path.abspath(path)
1082     sorted_strip_prefixes = sorted(strip_prefixes, key=len, reverse=True)
1083     for bp in sorted_strip_prefixes:
1084         normalized_bp = os.path.abspath(bp)
1085         if normalized_bp == '/':
1086             continue
1087         if normalized_path.startswith(normalized_bp):
1088             prefix = normalized_path[:len(normalized_bp)]
1089             result = []
1090             for p in normalized_path[len(normalized_bp):].split('/'):
1091                 if p: # not root
1092                     prefix += '/'
1093                 prefix += p
1094                 result.append((p, prefix))
1095             return result
1096     # Nothing to strip.
1097     return path_components(path)
1098
1099
1100 def grafted_path_components(graft_points, path):
1101     # Create a result that consists of some number of faked graft
1102     # directories before the graft point, followed by all of the real
1103     # directories from path that are after the graft point.  Arrange
1104     # for the directory at the graft point in the result to correspond
1105     # to the "orig" directory in --graft orig=new.  See t/thelpers.py
1106     # for some examples.
1107
1108     # Note that given --graft orig=new, orig and new have *nothing* to
1109     # do with each other, even if some of their component names
1110     # match. i.e. --graft /foo/bar/baz=/foo/bar/bax is semantically
1111     # equivalent to --graft /foo/bar/baz=/x/y/z, or even
1112     # /foo/bar/baz=/x.
1113
1114     # FIXME: This can't be the best solution...
1115     clean_path = os.path.abspath(path)
1116     for graft_point in graft_points:
1117         old_prefix, new_prefix = graft_point
1118         # Expand prefixes iff not absolute paths.
1119         old_prefix = os.path.normpath(old_prefix)
1120         new_prefix = os.path.normpath(new_prefix)
1121         if clean_path.startswith(old_prefix):
1122             escaped_prefix = re.escape(old_prefix)
1123             grafted_path = re.sub(r'^' + escaped_prefix, new_prefix, clean_path)
1124             # Handle /foo=/ (at least) -- which produces //whatever.
1125             grafted_path = '/' + grafted_path.lstrip('/')
1126             clean_path_components = path_components(clean_path)
1127             # Count the components that were stripped.
1128             strip_count = 0 if old_prefix == '/' else old_prefix.count('/')
1129             new_prefix_parts = new_prefix.split('/')
1130             result_prefix = grafted_path.split('/')[:new_prefix.count('/')]
1131             result = [(p, None) for p in result_prefix] \
1132                 + clean_path_components[strip_count:]
1133             # Now set the graft point name to match the end of new_prefix.
1134             graft_point = len(result_prefix)
1135             result[graft_point] = \
1136                 (new_prefix_parts[-1], clean_path_components[strip_count][1])
1137             if new_prefix == '/': # --graft ...=/ is a special case.
1138                 return result[1:]
1139             return result
1140     return path_components(clean_path)
1141
1142
1143 Sha1 = hashlib.sha1
1144
1145
1146 _localtime = getattr(_helpers, 'localtime', None)
1147
1148 if _localtime:
1149     bup_time = namedtuple('bup_time', ['tm_year', 'tm_mon', 'tm_mday',
1150                                        'tm_hour', 'tm_min', 'tm_sec',
1151                                        'tm_wday', 'tm_yday',
1152                                        'tm_isdst', 'tm_gmtoff', 'tm_zone'])
1153
1154 # Define a localtime() that returns bup_time when possible.  Note:
1155 # this means that any helpers.localtime() results may need to be
1156 # passed through to_py_time() before being passed to python's time
1157 # module, which doesn't appear willing to ignore the extra items.
1158 if _localtime:
1159     def localtime(time):
1160         return bup_time(*_helpers.localtime(time))
1161     def utc_offset_str(t):
1162         """Return the local offset from UTC as "+hhmm" or "-hhmm" for time t.
1163         If the current UTC offset does not represent an integer number
1164         of minutes, the fractional component will be truncated."""
1165         off = localtime(t).tm_gmtoff
1166         # Note: // doesn't truncate like C for negative values, it rounds down.
1167         offmin = abs(off) // 60
1168         m = offmin % 60
1169         h = (offmin - m) // 60
1170         return "%+03d%02d" % (-h if off < 0 else h, m)
1171     def to_py_time(x):
1172         if isinstance(x, time.struct_time):
1173             return x
1174         return time.struct_time(x[:9])
1175 else:
1176     localtime = time.localtime
1177     def utc_offset_str(t):
1178         return time.strftime('%z', localtime(t))
1179     def to_py_time(x):
1180         return x
1181
1182
1183 _some_invalid_save_parts_rx = re.compile(r'[[ ~^:?*\\]|\.\.|//|@{')
1184
1185 def valid_save_name(name):
1186     # Enforce a superset of the restrictions in git-check-ref-format(1)
1187     if name == '@' \
1188        or name.startswith('/') or name.endswith('/') \
1189        or name.endswith('.'):
1190         return False
1191     if _some_invalid_save_parts_rx.search(name):
1192         return False
1193     for c in name:
1194         if ord(c) < 0x20 or ord(c) == 0x7f:
1195             return False
1196     for part in name.split('/'):
1197         if part.startswith('.') or part.endswith('.lock'):
1198             return False
1199     return True
1200
1201
1202 _period_rx = re.compile(r'^([0-9]+)(s|min|h|d|w|m|y)$')
1203
1204 def period_as_secs(s):
1205     if s == 'forever':
1206         return float('inf')
1207     match = _period_rx.match(s)
1208     if not match:
1209         return None
1210     mag = int(match.group(1))
1211     scale = match.group(2)
1212     return mag * {'s': 1,
1213                   'min': 60,
1214                   'h': 60 * 60,
1215                   'd': 60 * 60 * 24,
1216                   'w': 60 * 60 * 24 * 7,
1217                   'm': 60 * 60 * 24 * 31,
1218                   'y': 60 * 60 * 24 * 366}[scale]