lib/bup/helpers.py

   1 """Helper functions and classes for bup."""
   2
   3 from collections import namedtuple
   4 from contextlib import contextmanager
   5 from ctypes import sizeof, c_void_p
   6 from os import environ
   7 from pipes import quote
   8 from subprocess import PIPE, Popen
   9 import sys, os, pwd, subprocess, errno, socket, select, mmap, stat, re, struct
  10 import hashlib, heapq, math, operator, time, grp, tempfile
  11
  12 from bup import _helpers
  13 from bup import compat
  14
  15 class Nonlocal:
  16     """Helper to deal with Python scoping issues"""
  17     pass
  18
  19
  20 sc_page_size = os.sysconf('SC_PAGE_SIZE')
  21 assert(sc_page_size > 0)
  22
  23 sc_arg_max = os.sysconf('SC_ARG_MAX')
  24 if sc_arg_max == -1:  # "no definite limit" - let's choose 2M
  25     sc_arg_max = 2 * 1024 * 1024
  26
  27 # This function should really be in helpers, not in bup.options.  But we
  28 # want options.py to be standalone so people can include it in other projects.
  29 from bup.options import _tty_width
  30 tty_width = _tty_width
  31
  32
  33 def last(iterable):
  34     result = None
  35     for result in iterable:
  36         pass
  37     return result
  38
  39
  40 def atoi(s):
  41     """Convert the string 's' to an integer. Return 0 if s is not a number."""
  42     try:
  43         return int(s or '0')
  44     except ValueError:
  45         return 0
  46
  47
  48 def atof(s):
  49     """Convert the string 's' to a float. Return 0 if s is not a number."""
  50     try:
  51         return float(s or '0')
  52     except ValueError:
  53         return 0
  54
  55
  56 buglvl = atoi(os.environ.get('BUP_DEBUG', 0))
  57
  58
  59 try:
  60     _fdatasync = os.fdatasync
  61 except AttributeError:
  62     _fdatasync = os.fsync
  63
  64 if sys.platform.startswith('darwin'):
  65     # Apparently os.fsync on OS X doesn't guarantee to sync all the way down
  66     import fcntl
  67     def fdatasync(fd):
  68         try:
  69             return fcntl.fcntl(fd, fcntl.F_FULLFSYNC)
  70         except IOError as e:
  71             # Fallback for file systems (SMB) that do not support F_FULLFSYNC
  72             if e.errno == errno.ENOTSUP:
  73                 return _fdatasync(fd)
  74             else:
  75                 raise
  76 else:
  77     fdatasync = _fdatasync
  78
  79
  80 def partition(predicate, stream):
  81     """Returns (leading_matches_it, rest_it), where leading_matches_it
  82     must be completely exhausted before traversing rest_it.
  83
  84     """
  85     stream = iter(stream)
  86     ns = Nonlocal()
  87     ns.first_nonmatch = None
  88     def leading_matches():
  89         for x in stream:
  90             if predicate(x):
  91                 yield x
  92             else:
  93                 ns.first_nonmatch = (x,)
  94                 break
  95     def rest():
  96         if ns.first_nonmatch:
  97             yield ns.first_nonmatch[0]
  98             for x in stream:
  99                 yield x
 100     return (leading_matches(), rest())
 101
 102
 103 def lines_until_sentinel(f, sentinel, ex_type):
 104     # sentinel must end with \n and must contain only one \n
 105     while True:
 106         line = f.readline()
 107         if not (line and line.endswith('\n')):
 108             raise ex_type('Hit EOF while reading line')
 109         if line == sentinel:
 110             return
 111         yield line
 112
 113
 114 def stat_if_exists(path):
 115     try:
 116         return os.stat(path)
 117     except OSError as e:
 118         if e.errno != errno.ENOENT:
 119             raise
 120     return None
 121
 122
 123 # Write (blockingly) to sockets that may or may not be in blocking mode.
 124 # We need this because our stderr is sometimes eaten by subprocesses
 125 # (probably ssh) that sometimes make it nonblocking, if only temporarily,
 126 # leading to race conditions.  Ick.  We'll do it the hard way.
 127 def _hard_write(fd, buf):
 128     while buf:
 129         (r,w,x) = select.select([], [fd], [], None)
 130         if not w:
 131             raise IOError('select(fd) returned without being writable')
 132         try:
 133             sz = os.write(fd, buf)
 134         except OSError as e:
 135             if e.errno != errno.EAGAIN:
 136                 raise
 137         assert(sz >= 0)
 138         buf = buf[sz:]
 139
 140
 141 _last_prog = 0
 142 def log(s):
 143     """Print a log message to stderr."""
 144     global _last_prog
 145     sys.stdout.flush()
 146     _hard_write(sys.stderr.fileno(), s)
 147     _last_prog = 0
 148
 149
 150 def debug1(s):
 151     if buglvl >= 1:
 152         log(s)
 153
 154
 155 def debug2(s):
 156     if buglvl >= 2:
 157         log(s)
 158
 159
 160 istty1 = os.isatty(1) or (atoi(os.environ.get('BUP_FORCE_TTY')) & 1)
 161 istty2 = os.isatty(2) or (atoi(os.environ.get('BUP_FORCE_TTY')) & 2)
 162 _last_progress = ''
 163 def progress(s):
 164     """Calls log() if stderr is a TTY.  Does nothing otherwise."""
 165     global _last_progress
 166     if istty2:
 167         log(s)
 168         _last_progress = s
 169
 170
 171 def qprogress(s):
 172     """Calls progress() only if we haven't printed progress in a while.
 173
 174     This avoids overloading the stderr buffer with excess junk.
 175     """
 176     global _last_prog
 177     now = time.time()
 178     if now - _last_prog > 0.1:
 179         progress(s)
 180         _last_prog = now
 181
 182
 183 def reprogress():
 184     """Calls progress() to redisplay the most recent progress message.
 185
 186     Useful after you've printed some other message that wipes out the
 187     progress line.
 188     """
 189     if _last_progress and _last_progress.endswith('\r'):
 190         progress(_last_progress)
 191
 192
 193 def mkdirp(d, mode=None):
 194     """Recursively create directories on path 'd'.
 195
 196     Unlike os.makedirs(), it doesn't raise an exception if the last element of
 197     the path already exists.
 198     """
 199     try:
 200         if mode:
 201             os.makedirs(d, mode)
 202         else:
 203             os.makedirs(d)
 204     except OSError as e:
 205         if e.errno == errno.EEXIST:
 206             pass
 207         else:
 208             raise
 209
 210
 211 def merge_iter(iters, pfreq, pfunc, pfinal, key=None):
 212     if key:
 213         samekey = lambda e, pe: getattr(e, key) == getattr(pe, key, None)
 214     else:
 215         samekey = operator.eq
 216     count = 0
 217     total = sum(len(it) for it in iters)
 218     iters = (iter(it) for it in iters)
 219     heap = ((next(it, None),it) for it in iters)
 220     heap = [(e,it) for e,it in heap if e]
 221
 222     heapq.heapify(heap)
 223     pe = None
 224     while heap:
 225         if not count % pfreq:
 226             pfunc(count, total)
 227         e, it = heap[0]
 228         if not samekey(e, pe):
 229             pe = e
 230             yield e
 231         count += 1
 232         try:
 233             e = next(it)
 234         except StopIteration:
 235             heapq.heappop(heap) # remove current
 236         else:
 237             heapq.heapreplace(heap, (e, it)) # shift current to new location
 238     pfinal(count, total)
 239
 240
 241 def unlink(f):
 242     """Delete a file at path 'f' if it currently exists.
 243
 244     Unlike os.unlink(), does not throw an exception if the file didn't already
 245     exist.
 246     """
 247     try:
 248         os.unlink(f)
 249     except OSError as e:
 250         if e.errno != errno.ENOENT:
 251             raise
 252
 253
 254 def shstr(cmd):
 255     if isinstance(cmd, compat.str_type):
 256         return cmd
 257     else:
 258         return ' '.join(map(quote, cmd))
 259
 260 exc = subprocess.check_call
 261
 262 def exo(cmd,
 263         input=None,
 264         stdin=None,
 265         stderr=None,
 266         shell=False,
 267         check=True,
 268         preexec_fn=None):
 269     if input:
 270         assert stdin in (None, PIPE)
 271         stdin = PIPE
 272     p = Popen(cmd,
 273               stdin=stdin, stdout=PIPE, stderr=stderr,
 274               shell=shell,
 275               preexec_fn=preexec_fn)
 276     out, err = p.communicate(input)
 277     if check and p.returncode != 0:
 278         raise Exception('subprocess %r failed with status %d, stderr: %r'
 279                         % (' '.join(map(quote, cmd)), p.returncode, err))
 280     return out, err, p
 281
 282 def readpipe(argv, preexec_fn=None, shell=False):
 283     """Run a subprocess and return its output."""
 284     p = subprocess.Popen(argv, stdout=subprocess.PIPE, preexec_fn=preexec_fn,
 285                          shell=shell)
 286     out, err = p.communicate()
 287     if p.returncode != 0:
 288         raise Exception('subprocess %r failed with status %d'
 289                         % (' '.join(argv), p.returncode))
 290     return out
 291
 292
 293 def _argmax_base(command):
 294     base_size = 2048
 295     for c in command:
 296         base_size += len(command) + 1
 297     for k, v in environ.iteritems():
 298         base_size += len(k) + len(v) + 2 + sizeof(c_void_p)
 299     return base_size
 300
 301
 302 def _argmax_args_size(args):
 303     return sum(len(x) + 1 + sizeof(c_void_p) for x in args)
 304
 305
 306 def batchpipe(command, args, preexec_fn=None, arg_max=sc_arg_max):
 307     """If args is not empty, yield the output produced by calling the
 308 command list with args as a sequence of strings (It may be necessary
 309 to return multiple strings in order to respect ARG_MAX)."""
 310     # The optional arg_max arg is a workaround for an issue with the
 311     # current wvtest behavior.
 312     base_size = _argmax_base(command)
 313     while args:
 314         room = arg_max - base_size
 315         i = 0
 316         while i < len(args):
 317             next_size = _argmax_args_size(args[i:i+1])
 318             if room - next_size < 0:
 319                 break
 320             room -= next_size
 321             i += 1
 322         sub_args = args[:i]
 323         args = args[i:]
 324         assert(len(sub_args))
 325         yield readpipe(command + sub_args, preexec_fn=preexec_fn)
 326
 327
 328 def resolve_parent(p):
 329     """Return the absolute path of a file without following any final symlink.
 330
 331     Behaves like os.path.realpath, but doesn't follow a symlink for the last
 332     element. (ie. if 'p' itself is a symlink, this one won't follow it, but it
 333     will follow symlinks in p's directory)
 334     """
 335     try:
 336         st = os.lstat(p)
 337     except OSError:
 338         st = None
 339     if st and stat.S_ISLNK(st.st_mode):
 340         (dir, name) = os.path.split(p)
 341         dir = os.path.realpath(dir)
 342         out = os.path.join(dir, name)
 343     else:
 344         out = os.path.realpath(p)
 345     #log('realpathing:%r,%r\n' % (p, out))
 346     return out
 347
 348
 349 def detect_fakeroot():
 350     "Return True if we appear to be running under fakeroot."
 351     return os.getenv("FAKEROOTKEY") != None
 352
 353
 354 _warned_about_superuser_detection = None
 355 def is_superuser():
 356     if sys.platform.startswith('cygwin'):
 357         if sys.getwindowsversion()[0] > 5:
 358             # Sounds like situation is much more complicated here
 359             global _warned_about_superuser_detection
 360             if not _warned_about_superuser_detection:
 361                 log("can't detect root status for OS version > 5; assuming not root")
 362                 _warned_about_superuser_detection = True
 363             return False
 364         import ctypes
 365         return ctypes.cdll.shell32.IsUserAnAdmin()
 366     else:
 367         return os.geteuid() == 0
 368
 369
 370 def _cache_key_value(get_value, key, cache):
 371     """Return (value, was_cached).  If there is a value in the cache
 372     for key, use that, otherwise, call get_value(key) which should
 373     throw a KeyError if there is no value -- in which case the cached
 374     and returned value will be None.
 375     """
 376     try: # Do we already have it (or know there wasn't one)?
 377         value = cache[key]
 378         return value, True
 379     except KeyError:
 380         pass
 381     value = None
 382     try:
 383         cache[key] = value = get_value(key)
 384     except KeyError:
 385         cache[key] = None
 386     return value, False
 387
 388
 389 _uid_to_pwd_cache = {}
 390 _name_to_pwd_cache = {}
 391
 392 def pwd_from_uid(uid):
 393     """Return password database entry for uid (may be a cached value).
 394     Return None if no entry is found.
 395     """
 396     global _uid_to_pwd_cache, _name_to_pwd_cache
 397     entry, cached = _cache_key_value(pwd.getpwuid, uid, _uid_to_pwd_cache)
 398     if entry and not cached:
 399         _name_to_pwd_cache[entry.pw_name] = entry
 400     return entry
 401
 402
 403 def pwd_from_name(name):
 404     """Return password database entry for name (may be a cached value).
 405     Return None if no entry is found.
 406     """
 407     global _uid_to_pwd_cache, _name_to_pwd_cache
 408     entry, cached = _cache_key_value(pwd.getpwnam, name, _name_to_pwd_cache)
 409     if entry and not cached:
 410         _uid_to_pwd_cache[entry.pw_uid] = entry
 411     return entry
 412
 413
 414 _gid_to_grp_cache = {}
 415 _name_to_grp_cache = {}
 416
 417 def grp_from_gid(gid):
 418     """Return password database entry for gid (may be a cached value).
 419     Return None if no entry is found.
 420     """
 421     global _gid_to_grp_cache, _name_to_grp_cache
 422     entry, cached = _cache_key_value(grp.getgrgid, gid, _gid_to_grp_cache)
 423     if entry and not cached:
 424         _name_to_grp_cache[entry.gr_name] = entry
 425     return entry
 426
 427
 428 def grp_from_name(name):
 429     """Return password database entry for name (may be a cached value).
 430     Return None if no entry is found.
 431     """
 432     global _gid_to_grp_cache, _name_to_grp_cache
 433     entry, cached = _cache_key_value(grp.getgrnam, name, _name_to_grp_cache)
 434     if entry and not cached:
 435         _gid_to_grp_cache[entry.gr_gid] = entry
 436     return entry
 437
 438
 439 _username = None
 440 def username():
 441     """Get the user's login name."""
 442     global _username
 443     if not _username:
 444         uid = os.getuid()
 445         _username = pwd_from_uid(uid)[0] or 'user%d' % uid
 446     return _username
 447
 448
 449 _userfullname = None
 450 def userfullname():
 451     """Get the user's full name."""
 452     global _userfullname
 453     if not _userfullname:
 454         uid = os.getuid()
 455         entry = pwd_from_uid(uid)
 456         if entry:
 457             _userfullname = entry[4].split(',')[0] or entry[0]
 458         if not _userfullname:
 459             _userfullname = 'user%d' % uid
 460     return _userfullname
 461
 462
 463 _hostname = None
 464 def hostname():
 465     """Get the FQDN of this machine."""
 466     global _hostname
 467     if not _hostname:
 468         _hostname = socket.getfqdn()
 469     return _hostname
 470
 471
 472 _resource_path = None
 473 def resource_path(subdir=''):
 474     global _resource_path
 475     if not _resource_path:
 476         _resource_path = os.environ.get('BUP_RESOURCE_PATH') or '.'
 477     return os.path.join(_resource_path, subdir)
 478
 479 def format_filesize(size):
 480     unit = 1024.0
 481     size = float(size)
 482     if size < unit:
 483         return "%d" % (size)
 484     exponent = int(math.log(size) / math.log(unit))
 485     size_prefix = "KMGTPE"[exponent - 1]
 486     return "%.1f%s" % (size / math.pow(unit, exponent), size_prefix)
 487
 488
 489 class NotOk(Exception):
 490     pass
 491
 492
 493 class BaseConn:
 494     def __init__(self, outp):
 495         self.outp = outp
 496
 497     def close(self):
 498         while self._read(65536): pass
 499
 500     def read(self, size):
 501         """Read 'size' bytes from input stream."""
 502         self.outp.flush()
 503         return self._read(size)
 504
 505     def readline(self):
 506         """Read from input stream until a newline is found."""
 507         self.outp.flush()
 508         return self._readline()
 509
 510     def write(self, data):
 511         """Write 'data' to output stream."""
 512         #log('%d writing: %d bytes\n' % (os.getpid(), len(data)))
 513         self.outp.write(data)
 514
 515     def has_input(self):
 516         """Return true if input stream is readable."""
 517         raise NotImplemented("Subclasses must implement has_input")
 518
 519     def ok(self):
 520         """Indicate end of output from last sent command."""
 521         self.write('\nok\n')
 522
 523     def error(self, s):
 524         """Indicate server error to the client."""
 525         s = re.sub(r'\s+', ' ', str(s))
 526         self.write('\nerror %s\n' % s)
 527
 528     def _check_ok(self, onempty):
 529         self.outp.flush()
 530         rl = ''
 531         for rl in linereader(self):
 532             #log('%d got line: %r\n' % (os.getpid(), rl))
 533             if not rl:  # empty line
 534                 continue
 535             elif rl == 'ok':
 536                 return None
 537             elif rl.startswith('error '):
 538                 #log('client: error: %s\n' % rl[6:])
 539                 return NotOk(rl[6:])
 540             else:
 541                 onempty(rl)
 542         raise Exception('server exited unexpectedly; see errors above')
 543
 544     def drain_and_check_ok(self):
 545         """Remove all data for the current command from input stream."""
 546         def onempty(rl):
 547             pass
 548         return self._check_ok(onempty)
 549
 550     def check_ok(self):
 551         """Verify that server action completed successfully."""
 552         def onempty(rl):
 553             raise Exception('expected "ok", got %r' % rl)
 554         return self._check_ok(onempty)
 555
 556
 557 class Conn(BaseConn):
 558     def __init__(self, inp, outp):
 559         BaseConn.__init__(self, outp)
 560         self.inp = inp
 561
 562     def _read(self, size):
 563         return self.inp.read(size)
 564
 565     def _readline(self):
 566         return self.inp.readline()
 567
 568     def has_input(self):
 569         [rl, wl, xl] = select.select([self.inp.fileno()], [], [], 0)
 570         if rl:
 571             assert(rl[0] == self.inp.fileno())
 572             return True
 573         else:
 574             return None
 575
 576
 577 def checked_reader(fd, n):
 578     while n > 0:
 579         rl, _, _ = select.select([fd], [], [])
 580         assert(rl[0] == fd)
 581         buf = os.read(fd, n)
 582         if not buf: raise Exception("Unexpected EOF reading %d more bytes" % n)
 583         yield buf
 584         n -= len(buf)
 585
 586
 587 MAX_PACKET = 128 * 1024
 588 def mux(p, outfd, outr, errr):
 589     try:
 590         fds = [outr, errr]
 591         while p.poll() is None:
 592             rl, _, _ = select.select(fds, [], [])
 593             for fd in rl:
 594                 if fd == outr:
 595                     buf = os.read(outr, MAX_PACKET)
 596                     if not buf: break
 597                     os.write(outfd, struct.pack('!IB', len(buf), 1) + buf)
 598                 elif fd == errr:
 599                     buf = os.read(errr, 1024)
 600                     if not buf: break
 601                     os.write(outfd, struct.pack('!IB', len(buf), 2) + buf)
 602     finally:
 603         os.write(outfd, struct.pack('!IB', 0, 3))
 604
 605
 606 class DemuxConn(BaseConn):
 607     """A helper class for bup's client-server protocol."""
 608     def __init__(self, infd, outp):
 609         BaseConn.__init__(self, outp)
 610         # Anything that comes through before the sync string was not
 611         # multiplexed and can be assumed to be debug/log before mux init.
 612         tail = ''
 613         while tail != 'BUPMUX':
 614             b = os.read(infd, (len(tail) < 6) and (6-len(tail)) or 1)
 615             if not b:
 616                 raise IOError('demux: unexpected EOF during initialization')
 617             tail += b
 618             sys.stderr.write(tail[:-6])  # pre-mux log messages
 619             tail = tail[-6:]
 620         self.infd = infd
 621         self.reader = None
 622         self.buf = None
 623         self.closed = False
 624
 625     def write(self, data):
 626         self._load_buf(0)
 627         BaseConn.write(self, data)
 628
 629     def _next_packet(self, timeout):
 630         if self.closed: return False
 631         rl, wl, xl = select.select([self.infd], [], [], timeout)
 632         if not rl: return False
 633         assert(rl[0] == self.infd)
 634         ns = ''.join(checked_reader(self.infd, 5))
 635         n, fdw = struct.unpack('!IB', ns)
 636         assert(n <= MAX_PACKET)
 637         if fdw == 1:
 638             self.reader = checked_reader(self.infd, n)
 639         elif fdw == 2:
 640             for buf in checked_reader(self.infd, n):
 641                 sys.stderr.write(buf)
 642         elif fdw == 3:
 643             self.closed = True
 644             debug2("DemuxConn: marked closed\n")
 645         return True
 646
 647     def _load_buf(self, timeout):
 648         if self.buf is not None:
 649             return True
 650         while not self.closed:
 651             while not self.reader:
 652                 if not self._next_packet(timeout):
 653                     return False
 654             try:
 655                 self.buf = next(self.reader)
 656                 return True
 657             except StopIteration:
 658                 self.reader = None
 659         return False
 660
 661     def _read_parts(self, ix_fn):
 662         while self._load_buf(None):
 663             assert(self.buf is not None)
 664             i = ix_fn(self.buf)
 665             if i is None or i == len(self.buf):
 666                 yv = self.buf
 667                 self.buf = None
 668             else:
 669                 yv = self.buf[:i]
 670                 self.buf = self.buf[i:]
 671             yield yv
 672             if i is not None:
 673                 break
 674
 675     def _readline(self):
 676         def find_eol(buf):
 677             try:
 678                 return buf.index('\n')+1
 679             except ValueError:
 680                 return None
 681         return ''.join(self._read_parts(find_eol))
 682
 683     def _read(self, size):
 684         csize = [size]
 685         def until_size(buf): # Closes on csize
 686             if len(buf) < csize[0]:
 687                 csize[0] -= len(buf)
 688                 return None
 689             else:
 690                 return csize[0]
 691         return ''.join(self._read_parts(until_size))
 692
 693     def has_input(self):
 694         return self._load_buf(0)
 695
 696
 697 def linereader(f):
 698     """Generate a list of input lines from 'f' without terminating newlines."""
 699     while 1:
 700         line = f.readline()
 701         if not line:
 702             break
 703         yield line[:-1]
 704
 705
 706 def chunkyreader(f, count = None):
 707     """Generate a list of chunks of data read from 'f'.
 708
 709     If count is None, read until EOF is reached.
 710
 711     If count is a positive integer, read 'count' bytes from 'f'. If EOF is
 712     reached while reading, raise IOError.
 713     """
 714     if count != None:
 715         while count > 0:
 716             b = f.read(min(count, 65536))
 717             if not b:
 718                 raise IOError('EOF with %d bytes remaining' % count)
 719             yield b
 720             count -= len(b)
 721     else:
 722         while 1:
 723             b = f.read(65536)
 724             if not b: break
 725             yield b
 726
 727
 728 @contextmanager
 729 def atomically_replaced_file(name, mode='w', buffering=-1):
 730     """Yield a file that will be atomically renamed name when leaving the block.
 731
 732     This contextmanager yields an open file object that is backed by a
 733     temporary file which will be renamed (atomically) to the target
 734     name if everything succeeds.
 735
 736     The mode and buffering arguments are handled exactly as with open,
 737     and the yielded file will have very restrictive permissions, as
 738     per mkstemp.
 739
 740     E.g.::
 741
 742         with atomically_replaced_file('foo.txt', 'w') as f:
 743             f.write('hello jack.')
 744
 745     """
 746
 747     (ffd, tempname) = tempfile.mkstemp(dir=os.path.dirname(name),
 748                                        text=('b' not in mode))
 749     try:
 750         try:
 751             f = os.fdopen(ffd, mode, buffering)
 752         except:
 753             os.close(ffd)
 754             raise
 755         try:
 756             yield f
 757         finally:
 758             f.close()
 759         os.rename(tempname, name)
 760     finally:
 761         unlink(tempname)  # nonexistant file is ignored
 762
 763
 764 def slashappend(s):
 765     """Append "/" to 's' if it doesn't aleady end in "/"."""
 766     if s and not s.endswith('/'):
 767         return s + '/'
 768     else:
 769         return s
 770
 771
 772 def _mmap_do(f, sz, flags, prot, close):
 773     if not sz:
 774         st = os.fstat(f.fileno())
 775         sz = st.st_size
 776     if not sz:
 777         # trying to open a zero-length map gives an error, but an empty
 778         # string has all the same behaviour of a zero-length map, ie. it has
 779         # no elements :)
 780         return ''
 781     map = mmap.mmap(f.fileno(), sz, flags, prot)
 782     if close:
 783         f.close()  # map will persist beyond file close
 784     return map
 785
 786
 787 def mmap_read(f, sz = 0, close=True):
 788     """Create a read-only memory mapped region on file 'f'.
 789     If sz is 0, the region will cover the entire file.
 790     """
 791     return _mmap_do(f, sz, mmap.MAP_PRIVATE, mmap.PROT_READ, close)
 792
 793
 794 def mmap_readwrite(f, sz = 0, close=True):
 795     """Create a read-write memory mapped region on file 'f'.
 796     If sz is 0, the region will cover the entire file.
 797     """
 798     return _mmap_do(f, sz, mmap.MAP_SHARED, mmap.PROT_READ|mmap.PROT_WRITE,
 799                     close)
 800
 801
 802 def mmap_readwrite_private(f, sz = 0, close=True):
 803     """Create a read-write memory mapped region on file 'f'.
 804     If sz is 0, the region will cover the entire file.
 805     The map is private, which means the changes are never flushed back to the
 806     file.
 807     """
 808     return _mmap_do(f, sz, mmap.MAP_PRIVATE, mmap.PROT_READ|mmap.PROT_WRITE,
 809                     close)
 810
 811
 812 _mincore = getattr(_helpers, 'mincore', None)
 813 if _mincore:
 814     # ./configure ensures that we're on Linux if MINCORE_INCORE isn't defined.
 815     MINCORE_INCORE = getattr(_helpers, 'MINCORE_INCORE', 1)
 816
 817     _fmincore_chunk_size = None
 818     def _set_fmincore_chunk_size():
 819         global _fmincore_chunk_size
 820         pref_chunk_size = 64 * 1024 * 1024
 821         chunk_size = sc_page_size
 822         if (sc_page_size < pref_chunk_size):
 823             chunk_size = sc_page_size * (pref_chunk_size / sc_page_size)
 824         _fmincore_chunk_size = chunk_size
 825
 826     def fmincore(fd):
 827         """Return the mincore() data for fd as a bytearray whose values can be
 828         tested via MINCORE_INCORE, or None if fd does not fully
 829         support the operation."""
 830         st = os.fstat(fd)
 831         if (st.st_size == 0):
 832             return bytearray(0)
 833         if not _fmincore_chunk_size:
 834             _set_fmincore_chunk_size()
 835         pages_per_chunk = _fmincore_chunk_size / sc_page_size;
 836         page_count = (st.st_size + sc_page_size - 1) / sc_page_size;
 837         chunk_count = page_count / _fmincore_chunk_size
 838         if chunk_count < 1:
 839             chunk_count = 1
 840         result = bytearray(page_count)
 841         for ci in xrange(chunk_count):
 842             pos = _fmincore_chunk_size * ci;
 843             msize = min(_fmincore_chunk_size, st.st_size - pos)
 844             try:
 845                 m = mmap.mmap(fd, msize, mmap.MAP_PRIVATE, 0, 0, pos)
 846             except mmap.error as ex:
 847                 if ex.errno == errno.EINVAL or ex.errno == errno.ENODEV:
 848                     # Perhaps the file was a pipe, i.e. "... | bup split ..."
 849                     return None
 850                 raise ex
 851             _mincore(m, msize, 0, result, ci * pages_per_chunk);
 852         return result
 853
 854
 855 def parse_timestamp(epoch_str):
 856     """Return the number of nanoseconds since the epoch that are described
 857 by epoch_str (100ms, 100ns, ...); when epoch_str cannot be parsed,
 858 throw a ValueError that may contain additional information."""
 859     ns_per = {'s' :  1000000000,
 860               'ms' : 1000000,
 861               'us' : 1000,
 862               'ns' : 1}
 863     match = re.match(r'^((?:[-+]?[0-9]+)?)(s|ms|us|ns)$', epoch_str)
 864     if not match:
 865         if re.match(r'^([-+]?[0-9]+)$', epoch_str):
 866             raise ValueError('must include units, i.e. 100ns, 100ms, ...')
 867         raise ValueError()
 868     (n, units) = match.group(1, 2)
 869     if not n:
 870         n = 1
 871     n = int(n)
 872     return n * ns_per[units]
 873
 874
 875 def parse_num(s):
 876     """Parse data size information into a float number.
 877
 878     Here are some examples of conversions:
 879         199.2k means 203981 bytes
 880         1GB means 1073741824 bytes
 881         2.1 tb means 2199023255552 bytes
 882     """
 883     g = re.match(r'([-+\d.e]+)\s*(\w*)', str(s))
 884     if not g:
 885         raise ValueError("can't parse %r as a number" % s)
 886     (val, unit) = g.groups()
 887     num = float(val)
 888     unit = unit.lower()
 889     if unit in ['t', 'tb']:
 890         mult = 1024*1024*1024*1024
 891     elif unit in ['g', 'gb']:
 892         mult = 1024*1024*1024
 893     elif unit in ['m', 'mb']:
 894         mult = 1024*1024
 895     elif unit in ['k', 'kb']:
 896         mult = 1024
 897     elif unit in ['', 'b']:
 898         mult = 1
 899     else:
 900         raise ValueError("invalid unit %r in number %r" % (unit, s))
 901     return int(num*mult)
 902
 903
 904 def count(l):
 905     """Count the number of elements in an iterator. (consumes the iterator)"""
 906     return reduce(lambda x,y: x+1, l)
 907
 908
 909 saved_errors = []
 910 def add_error(e):
 911     """Append an error message to the list of saved errors.
 912
 913     Once processing is able to stop and output the errors, the saved errors are
 914     accessible in the module variable helpers.saved_errors.
 915     """
 916     saved_errors.append(e)
 917     log('%-70s\n' % e)
 918
 919
 920 def clear_errors():
 921     global saved_errors
 922     saved_errors = []
 923
 924
 925 def die_if_errors(msg=None, status=1):
 926     global saved_errors
 927     if saved_errors:
 928         if not msg:
 929             msg = 'warning: %d errors encountered\n' % len(saved_errors)
 930         log(msg)
 931         sys.exit(status)
 932
 933
 934 def handle_ctrl_c():
 935     """Replace the default exception handler for KeyboardInterrupt (Ctrl-C).
 936
 937     The new exception handler will make sure that bup will exit without an ugly
 938     stacktrace when Ctrl-C is hit.
 939     """
 940     oldhook = sys.excepthook
 941     def newhook(exctype, value, traceback):
 942         if exctype == KeyboardInterrupt:
 943             log('\nInterrupted.\n')
 944         else:
 945             return oldhook(exctype, value, traceback)
 946     sys.excepthook = newhook
 947
 948
 949 def columnate(l, prefix):
 950     """Format elements of 'l' in columns with 'prefix' leading each line.
 951
 952     The number of columns is determined automatically based on the string
 953     lengths.
 954     """
 955     if not l:
 956         return ""
 957     l = l[:]
 958     clen = max(len(s) for s in l)
 959     ncols = (tty_width() - len(prefix)) / (clen + 2)
 960     if ncols <= 1:
 961         ncols = 1
 962         clen = 0
 963     cols = []
 964     while len(l) % ncols:
 965         l.append('')
 966     rows = len(l)/ncols
 967     for s in range(0, len(l), rows):
 968         cols.append(l[s:s+rows])
 969     out = ''
 970     for row in zip(*cols):
 971         out += prefix + ''.join(('%-*s' % (clen+2, s)) for s in row) + '\n'
 972     return out
 973
 974
 975 def parse_date_or_fatal(str, fatal):
 976     """Parses the given date or calls Option.fatal().
 977     For now we expect a string that contains a float."""
 978     try:
 979         date = float(str)
 980     except ValueError as e:
 981         raise fatal('invalid date format (should be a float): %r' % e)
 982     else:
 983         return date
 984
 985
 986 def parse_excludes(options, fatal):
 987     """Traverse the options and extract all excludes, or call Option.fatal()."""
 988     excluded_paths = []
 989
 990     for flag in options:
 991         (option, parameter) = flag
 992         if option == '--exclude':
 993             excluded_paths.append(resolve_parent(parameter))
 994         elif option == '--exclude-from':
 995             try:
 996                 f = open(resolve_parent(parameter))
 997             except IOError as e:
 998                 raise fatal("couldn't read %s" % parameter)
 999             for exclude_path in f.readlines():
1000                 # FIXME: perhaps this should be rstrip('\n')
1001                 exclude_path = resolve_parent(exclude_path.strip())
1002                 if exclude_path:
1003                     excluded_paths.append(exclude_path)
1004     return sorted(frozenset(excluded_paths))
1005
1006
1007 def parse_rx_excludes(options, fatal):
1008     """Traverse the options and extract all rx excludes, or call
1009     Option.fatal()."""
1010     excluded_patterns = []
1011
1012     for flag in options:
1013         (option, parameter) = flag
1014         if option == '--exclude-rx':
1015             try:
1016                 excluded_patterns.append(re.compile(parameter))
1017             except re.error as ex:
1018                 fatal('invalid --exclude-rx pattern (%s): %s' % (parameter, ex))
1019         elif option == '--exclude-rx-from':
1020             try:
1021                 f = open(resolve_parent(parameter))
1022             except IOError as e:
1023                 raise fatal("couldn't read %s" % parameter)
1024             for pattern in f.readlines():
1025                 spattern = pattern.rstrip('\n')
1026                 if not spattern:
1027                     continue
1028                 try:
1029                     excluded_patterns.append(re.compile(spattern))
1030                 except re.error as ex:
1031                     fatal('invalid --exclude-rx pattern (%s): %s' % (spattern, ex))
1032     return excluded_patterns
1033
1034
1035 def should_rx_exclude_path(path, exclude_rxs):
1036     """Return True if path matches a regular expression in exclude_rxs."""
1037     for rx in exclude_rxs:
1038         if rx.search(path):
1039             debug1('Skipping %r: excluded by rx pattern %r.\n'
1040                    % (path, rx.pattern))
1041             return True
1042     return False
1043
1044
1045 # FIXME: Carefully consider the use of functions (os.path.*, etc.)
1046 # that resolve against the current filesystem in the strip/graft
1047 # functions for example, but elsewhere as well.  I suspect bup's not
1048 # always being careful about that.  For some cases, the contents of
1049 # the current filesystem should be irrelevant, and consulting it might
1050 # produce the wrong result, perhaps via unintended symlink resolution,
1051 # for example.
1052
1053 def path_components(path):
1054     """Break path into a list of pairs of the form (name,
1055     full_path_to_name).  Path must start with '/'.
1056     Example:
1057       '/home/foo' -> [('', '/'), ('home', '/home'), ('foo', '/home/foo')]"""
1058     if not path.startswith('/'):
1059         raise Exception, 'path must start with "/": %s' % path
1060     # Since we assume path startswith('/'), we can skip the first element.
1061     result = [('', '/')]
1062     norm_path = os.path.abspath(path)
1063     if norm_path == '/':
1064         return result
1065     full_path = ''
1066     for p in norm_path.split('/')[1:]:
1067         full_path += '/' + p
1068         result.append((p, full_path))
1069     return result
1070
1071
1072 def stripped_path_components(path, strip_prefixes):
1073     """Strip any prefix in strip_prefixes from path and return a list
1074     of path components where each component is (name,
1075     none_or_full_fs_path_to_name).  Assume path startswith('/').
1076     See thelpers.py for examples."""
1077     normalized_path = os.path.abspath(path)
1078     sorted_strip_prefixes = sorted(strip_prefixes, key=len, reverse=True)
1079     for bp in sorted_strip_prefixes:
1080         normalized_bp = os.path.abspath(bp)
1081         if normalized_bp == '/':
1082             continue
1083         if normalized_path.startswith(normalized_bp):
1084             prefix = normalized_path[:len(normalized_bp)]
1085             result = []
1086             for p in normalized_path[len(normalized_bp):].split('/'):
1087                 if p: # not root
1088                     prefix += '/'
1089                 prefix += p
1090                 result.append((p, prefix))
1091             return result
1092     # Nothing to strip.
1093     return path_components(path)
1094
1095
1096 def grafted_path_components(graft_points, path):
1097     # Create a result that consists of some number of faked graft
1098     # directories before the graft point, followed by all of the real
1099     # directories from path that are after the graft point.  Arrange
1100     # for the directory at the graft point in the result to correspond
1101     # to the "orig" directory in --graft orig=new.  See t/thelpers.py
1102     # for some examples.
1103
1104     # Note that given --graft orig=new, orig and new have *nothing* to
1105     # do with each other, even if some of their component names
1106     # match. i.e. --graft /foo/bar/baz=/foo/bar/bax is semantically
1107     # equivalent to --graft /foo/bar/baz=/x/y/z, or even
1108     # /foo/bar/baz=/x.
1109
1110     # FIXME: This can't be the best solution...
1111     clean_path = os.path.abspath(path)
1112     for graft_point in graft_points:
1113         old_prefix, new_prefix = graft_point
1114         # Expand prefixes iff not absolute paths.
1115         old_prefix = os.path.normpath(old_prefix)
1116         new_prefix = os.path.normpath(new_prefix)
1117         if clean_path.startswith(old_prefix):
1118             escaped_prefix = re.escape(old_prefix)
1119             grafted_path = re.sub(r'^' + escaped_prefix, new_prefix, clean_path)
1120             # Handle /foo=/ (at least) -- which produces //whatever.
1121             grafted_path = '/' + grafted_path.lstrip('/')
1122             clean_path_components = path_components(clean_path)
1123             # Count the components that were stripped.
1124             strip_count = 0 if old_prefix == '/' else old_prefix.count('/')
1125             new_prefix_parts = new_prefix.split('/')
1126             result_prefix = grafted_path.split('/')[:new_prefix.count('/')]
1127             result = [(p, None) for p in result_prefix] \
1128                 + clean_path_components[strip_count:]
1129             # Now set the graft point name to match the end of new_prefix.
1130             graft_point = len(result_prefix)
1131             result[graft_point] = \
1132                 (new_prefix_parts[-1], clean_path_components[strip_count][1])
1133             if new_prefix == '/': # --graft ...=/ is a special case.
1134                 return result[1:]
1135             return result
1136     return path_components(clean_path)
1137
1138
1139 Sha1 = hashlib.sha1
1140
1141
1142 _localtime = getattr(_helpers, 'localtime', None)
1143
1144 if _localtime:
1145     bup_time = namedtuple('bup_time', ['tm_year', 'tm_mon', 'tm_mday',
1146                                        'tm_hour', 'tm_min', 'tm_sec',
1147                                        'tm_wday', 'tm_yday',
1148                                        'tm_isdst', 'tm_gmtoff', 'tm_zone'])
1149
1150 # Define a localtime() that returns bup_time when possible.  Note:
1151 # this means that any helpers.localtime() results may need to be
1152 # passed through to_py_time() before being passed to python's time
1153 # module, which doesn't appear willing to ignore the extra items.
1154 if _localtime:
1155     def localtime(time):
1156         return bup_time(*_helpers.localtime(time))
1157     def utc_offset_str(t):
1158         """Return the local offset from UTC as "+hhmm" or "-hhmm" for time t.
1159         If the current UTC offset does not represent an integer number
1160         of minutes, the fractional component will be truncated."""
1161         off = localtime(t).tm_gmtoff
1162         # Note: // doesn't truncate like C for negative values, it rounds down.
1163         offmin = abs(off) // 60
1164         m = offmin % 60
1165         h = (offmin - m) // 60
1166         return "%+03d%02d" % (-h if off < 0 else h, m)
1167     def to_py_time(x):
1168         if isinstance(x, time.struct_time):
1169             return x
1170         return time.struct_time(x[:9])
1171 else:
1172     localtime = time.localtime
1173     def utc_offset_str(t):
1174         return time.strftime('%z', localtime(t))
1175     def to_py_time(x):
1176         return x
1177
1178
1179 _some_invalid_save_parts_rx = re.compile(r'[[ ~^:?*\\]|\.\.|//|@{')
1180
1181 def valid_save_name(name):
1182     # Enforce a superset of the restrictions in git-check-ref-format(1)
1183     if name == '@' \
1184        or name.startswith('/') or name.endswith('/') \
1185        or name.endswith('.'):
1186         return False
1187     if _some_invalid_save_parts_rx.search(name):
1188         return False
1189     for c in name:
1190         if ord(c) < 0x20 or ord(c) == 0x7f:
1191             return False
1192     for part in name.split('/'):
1193         if part.startswith('.') or part.endswith('.lock'):
1194             return False
1195     return True
1196
1197
1198 _period_rx = re.compile(r'^([0-9]+)(s|min|h|d|w|m|y)$')
1199
1200 def period_as_secs(s):
1201     if s == 'forever':
1202         return float('inf')
1203     match = _period_rx.match(s)
1204     if not match:
1205         return None
1206     mag = int(match.group(1))
1207     scale = match.group(2)
1208     return mag * {'s': 1,
1209                   'min': 60,
1210                   'h': 60 * 60,
1211                   'd': 60 * 60 * 24,
1212                   'w': 60 * 60 * 24 * 7,
1213                   'm': 60 * 60 * 24 * 31,
1214                   'y': 60 * 60 * 24 * 366}[scale]