lib/bup/helpers.py

   1 """Helper functions and classes for bup."""
   2
   3 from ctypes import sizeof, c_void_p
   4 from os import environ
   5 import sys, os, pwd, subprocess, errno, socket, select, mmap, stat, re, struct
   6 import hashlib, heapq, operator, time, grp
   7
   8 from bup import _version, _helpers
   9 import bup._helpers as _helpers
  10 import math
  11
  12 # This function should really be in helpers, not in bup.options.  But we
  13 # want options.py to be standalone so people can include it in other projects.
  14 from bup.options import _tty_width
  15 tty_width = _tty_width
  16
  17
  18 def atoi(s):
  19     """Convert the string 's' to an integer. Return 0 if s is not a number."""
  20     try:
  21         return int(s or '0')
  22     except ValueError:
  23         return 0
  24
  25
  26 def atof(s):
  27     """Convert the string 's' to a float. Return 0 if s is not a number."""
  28     try:
  29         return float(s or '0')
  30     except ValueError:
  31         return 0
  32
  33
  34 buglvl = atoi(os.environ.get('BUP_DEBUG', 0))
  35
  36
  37 # If the platform doesn't have fdatasync (OS X), fall back to fsync.
  38 try:
  39     fdatasync = os.fdatasync
  40 except AttributeError:
  41     fdatasync = os.fsync
  42
  43
  44 # Write (blockingly) to sockets that may or may not be in blocking mode.
  45 # We need this because our stderr is sometimes eaten by subprocesses
  46 # (probably ssh) that sometimes make it nonblocking, if only temporarily,
  47 # leading to race conditions.  Ick.  We'll do it the hard way.
  48 def _hard_write(fd, buf):
  49     while buf:
  50         (r,w,x) = select.select([], [fd], [], None)
  51         if not w:
  52             raise IOError('select(fd) returned without being writable')
  53         try:
  54             sz = os.write(fd, buf)
  55         except OSError, e:
  56             if e.errno != errno.EAGAIN:
  57                 raise
  58         assert(sz >= 0)
  59         buf = buf[sz:]
  60
  61
  62 _last_prog = 0
  63 def log(s):
  64     """Print a log message to stderr."""
  65     global _last_prog
  66     sys.stdout.flush()
  67     _hard_write(sys.stderr.fileno(), s)
  68     _last_prog = 0
  69
  70
  71 def debug1(s):
  72     if buglvl >= 1:
  73         log(s)
  74
  75
  76 def debug2(s):
  77     if buglvl >= 2:
  78         log(s)
  79
  80
  81 istty1 = os.isatty(1) or (atoi(os.environ.get('BUP_FORCE_TTY')) & 1)
  82 istty2 = os.isatty(2) or (atoi(os.environ.get('BUP_FORCE_TTY')) & 2)
  83 _last_progress = ''
  84 def progress(s):
  85     """Calls log() if stderr is a TTY.  Does nothing otherwise."""
  86     global _last_progress
  87     if istty2:
  88         log(s)
  89         _last_progress = s
  90
  91
  92 def qprogress(s):
  93     """Calls progress() only if we haven't printed progress in a while.
  94
  95     This avoids overloading the stderr buffer with excess junk.
  96     """
  97     global _last_prog
  98     now = time.time()
  99     if now - _last_prog > 0.1:
 100         progress(s)
 101         _last_prog = now
 102
 103
 104 def reprogress():
 105     """Calls progress() to redisplay the most recent progress message.
 106
 107     Useful after you've printed some other message that wipes out the
 108     progress line.
 109     """
 110     if _last_progress and _last_progress.endswith('\r'):
 111         progress(_last_progress)
 112
 113
 114 def mkdirp(d, mode=None):
 115     """Recursively create directories on path 'd'.
 116
 117     Unlike os.makedirs(), it doesn't raise an exception if the last element of
 118     the path already exists.
 119     """
 120     try:
 121         if mode:
 122             os.makedirs(d, mode)
 123         else:
 124             os.makedirs(d)
 125     except OSError, e:
 126         if e.errno == errno.EEXIST:
 127             pass
 128         else:
 129             raise
 130
 131
 132 _unspecified_next_default = object()
 133
 134 def _fallback_next(it, default=_unspecified_next_default):
 135     """Retrieve the next item from the iterator by calling its
 136     next() method. If default is given, it is returned if the
 137     iterator is exhausted, otherwise StopIteration is raised."""
 138
 139     if default is _unspecified_next_default:
 140         return it.next()
 141     else:
 142         try:
 143             return it.next()
 144         except StopIteration:
 145             return default
 146
 147 if sys.version_info < (2, 6):
 148     next =  _fallback_next
 149
 150
 151 def merge_iter(iters, pfreq, pfunc, pfinal, key=None):
 152     if key:
 153         samekey = lambda e, pe: getattr(e, key) == getattr(pe, key, None)
 154     else:
 155         samekey = operator.eq
 156     count = 0
 157     total = sum(len(it) for it in iters)
 158     iters = (iter(it) for it in iters)
 159     heap = ((next(it, None),it) for it in iters)
 160     heap = [(e,it) for e,it in heap if e]
 161
 162     heapq.heapify(heap)
 163     pe = None
 164     while heap:
 165         if not count % pfreq:
 166             pfunc(count, total)
 167         e, it = heap[0]
 168         if not samekey(e, pe):
 169             pe = e
 170             yield e
 171         count += 1
 172         try:
 173             e = it.next() # Don't use next() function, it's too expensive
 174         except StopIteration:
 175             heapq.heappop(heap) # remove current
 176         else:
 177             heapq.heapreplace(heap, (e, it)) # shift current to new location
 178     pfinal(count, total)
 179
 180
 181 def unlink(f):
 182     """Delete a file at path 'f' if it currently exists.
 183
 184     Unlike os.unlink(), does not throw an exception if the file didn't already
 185     exist.
 186     """
 187     try:
 188         os.unlink(f)
 189     except OSError, e:
 190         if e.errno == errno.ENOENT:
 191             pass  # it doesn't exist, that's what you asked for
 192
 193
 194 def readpipe(argv, preexec_fn=None):
 195     """Run a subprocess and return its output."""
 196     p = subprocess.Popen(argv, stdout=subprocess.PIPE, preexec_fn=preexec_fn)
 197     out, err = p.communicate()
 198     if p.returncode != 0:
 199         raise Exception('subprocess %r failed with status %d'
 200                         % (' '.join(argv), p.returncode))
 201     return out
 202
 203
 204 try:
 205     _arg_max = os.sysconf('SC_ARG_MAX')
 206     if _arg_max == -1:
 207         raise ValueError()
 208 except ValueError, ex:
 209     print >> sys.stderr, 'Cannot find SC_ARG_MAX, please report a bug.'
 210     sys.exit(1)
 211
 212
 213 def _argmax_base(command):
 214     base_size = 2048
 215     for c in command:
 216         base_size += len(command) + 1
 217     for k, v in environ.iteritems():
 218         base_size += len(k) + len(v) + 2 + sizeof(c_void_p)
 219     return base_size
 220
 221
 222 def _argmax_args_size(args):
 223     return sum(len(x) + 1 + sizeof(c_void_p) for x in args)
 224
 225
 226 def batchpipe(command, args, preexec_fn=None, arg_max=None):
 227     """If args is not empty, yield the output produced by calling the
 228 command list with args as a sequence of strings (It may be necessary
 229 to return multiple strings in order to respect ARG_MAX)."""
 230     # The optional arg_max arg is a workaround for an issue with the
 231     # current wvtest behavior.
 232     if not arg_max:
 233         arg_max = _arg_max
 234     base_size = _argmax_base(command)
 235     while args:
 236         room = arg_max - base_size
 237         i = 0
 238         while i < len(args):
 239             next_size = _argmax_args_size(args[i:i+1])
 240             if room - next_size < 0:
 241                 break
 242             room -= next_size
 243             i += 1
 244         sub_args = args[:i]
 245         args = args[i:]
 246         assert(len(sub_args))
 247         yield readpipe(command + sub_args, preexec_fn=preexec_fn)
 248
 249
 250 def realpath(p):
 251     """Get the absolute path of a file.
 252
 253     Behaves like os.path.realpath, but doesn't follow a symlink for the last
 254     element. (ie. if 'p' itself is a symlink, this one won't follow it, but it
 255     will follow symlinks in p's directory)
 256     """
 257     try:
 258         st = os.lstat(p)
 259     except OSError:
 260         st = None
 261     if st and stat.S_ISLNK(st.st_mode):
 262         (dir, name) = os.path.split(p)
 263         dir = os.path.realpath(dir)
 264         out = os.path.join(dir, name)
 265     else:
 266         out = os.path.realpath(p)
 267     #log('realpathing:%r,%r\n' % (p, out))
 268     return out
 269
 270
 271 def detect_fakeroot():
 272     "Return True if we appear to be running under fakeroot."
 273     return os.getenv("FAKEROOTKEY") != None
 274
 275
 276 def is_superuser():
 277     if sys.platform.startswith('cygwin'):
 278         import ctypes
 279         return ctypes.cdll.shell32.IsUserAnAdmin()
 280     else:
 281         return os.geteuid() == 0
 282
 283
 284 def _cache_key_value(get_value, key, cache):
 285     """Return (value, was_cached).  If there is a value in the cache
 286     for key, use that, otherwise, call get_value(key) which should
 287     throw a KeyError if there is no value -- in which case the cached
 288     and returned value will be None.
 289     """
 290     try: # Do we already have it (or know there wasn't one)?
 291         value = cache[key]
 292         return value, True
 293     except KeyError:
 294         pass
 295     value = None
 296     try:
 297         cache[key] = value = get_value(key)
 298     except KeyError:
 299         cache[key] = None
 300     return value, False
 301
 302
 303 _uid_to_pwd_cache = {}
 304 _name_to_pwd_cache = {}
 305
 306 def pwd_from_uid(uid):
 307     """Return password database entry for uid (may be a cached value).
 308     Return None if no entry is found.
 309     """
 310     global _uid_to_pwd_cache, _name_to_pwd_cache
 311     entry, cached = _cache_key_value(pwd.getpwuid, uid, _uid_to_pwd_cache)
 312     if entry and not cached:
 313         _name_to_pwd_cache[entry.pw_name] = entry
 314     return entry
 315
 316
 317 def pwd_from_name(name):
 318     """Return password database entry for name (may be a cached value).
 319     Return None if no entry is found.
 320     """
 321     global _uid_to_pwd_cache, _name_to_pwd_cache
 322     entry, cached = _cache_key_value(pwd.getpwnam, name, _name_to_pwd_cache)
 323     if entry and not cached:
 324         _uid_to_pwd_cache[entry.pw_uid] = entry
 325     return entry
 326
 327
 328 _gid_to_grp_cache = {}
 329 _name_to_grp_cache = {}
 330
 331 def grp_from_gid(gid):
 332     """Return password database entry for gid (may be a cached value).
 333     Return None if no entry is found.
 334     """
 335     global _gid_to_grp_cache, _name_to_grp_cache
 336     entry, cached = _cache_key_value(grp.getgrgid, gid, _gid_to_grp_cache)
 337     if entry and not cached:
 338         _name_to_grp_cache[entry.gr_name] = entry
 339     return entry
 340
 341
 342 def grp_from_name(name):
 343     """Return password database entry for name (may be a cached value).
 344     Return None if no entry is found.
 345     """
 346     global _gid_to_grp_cache, _name_to_grp_cache
 347     entry, cached = _cache_key_value(grp.getgrnam, name, _name_to_grp_cache)
 348     if entry and not cached:
 349         _gid_to_grp_cache[entry.gr_gid] = entry
 350     return entry
 351
 352
 353 _username = None
 354 def username():
 355     """Get the user's login name."""
 356     global _username
 357     if not _username:
 358         uid = os.getuid()
 359         _username = pwd_from_uid(uid)[0] or 'user%d' % uid
 360     return _username
 361
 362
 363 _userfullname = None
 364 def userfullname():
 365     """Get the user's full name."""
 366     global _userfullname
 367     if not _userfullname:
 368         uid = os.getuid()
 369         entry = pwd_from_uid(uid)
 370         if entry:
 371             _userfullname = entry[4].split(',')[0] or entry[0]
 372         if not _userfullname:
 373             _userfullname = 'user%d' % uid
 374     return _userfullname
 375
 376
 377 _hostname = None
 378 def hostname():
 379     """Get the FQDN of this machine."""
 380     global _hostname
 381     if not _hostname:
 382         _hostname = socket.getfqdn()
 383     return _hostname
 384
 385
 386 _resource_path = None
 387 def resource_path(subdir=''):
 388     global _resource_path
 389     if not _resource_path:
 390         _resource_path = os.environ.get('BUP_RESOURCE_PATH') or '.'
 391     return os.path.join(_resource_path, subdir)
 392
 393 def format_filesize(size):
 394     unit = 1024.0
 395     size = float(size)
 396     if size < unit:
 397         return "%d" % (size)
 398     exponent = int(math.log(size) / math.log(unit))
 399     size_prefix = "KMGTPE"[exponent - 1]
 400     return "%.1f%s" % (size / math.pow(unit, exponent), size_prefix)
 401
 402
 403 class NotOk(Exception):
 404     pass
 405
 406
 407 class BaseConn:
 408     def __init__(self, outp):
 409         self.outp = outp
 410
 411     def close(self):
 412         while self._read(65536): pass
 413
 414     def read(self, size):
 415         """Read 'size' bytes from input stream."""
 416         self.outp.flush()
 417         return self._read(size)
 418
 419     def readline(self):
 420         """Read from input stream until a newline is found."""
 421         self.outp.flush()
 422         return self._readline()
 423
 424     def write(self, data):
 425         """Write 'data' to output stream."""
 426         #log('%d writing: %d bytes\n' % (os.getpid(), len(data)))
 427         self.outp.write(data)
 428
 429     def has_input(self):
 430         """Return true if input stream is readable."""
 431         raise NotImplemented("Subclasses must implement has_input")
 432
 433     def ok(self):
 434         """Indicate end of output from last sent command."""
 435         self.write('\nok\n')
 436
 437     def error(self, s):
 438         """Indicate server error to the client."""
 439         s = re.sub(r'\s+', ' ', str(s))
 440         self.write('\nerror %s\n' % s)
 441
 442     def _check_ok(self, onempty):
 443         self.outp.flush()
 444         rl = ''
 445         for rl in linereader(self):
 446             #log('%d got line: %r\n' % (os.getpid(), rl))
 447             if not rl:  # empty line
 448                 continue
 449             elif rl == 'ok':
 450                 return None
 451             elif rl.startswith('error '):
 452                 #log('client: error: %s\n' % rl[6:])
 453                 return NotOk(rl[6:])
 454             else:
 455                 onempty(rl)
 456         raise Exception('server exited unexpectedly; see errors above')
 457
 458     def drain_and_check_ok(self):
 459         """Remove all data for the current command from input stream."""
 460         def onempty(rl):
 461             pass
 462         return self._check_ok(onempty)
 463
 464     def check_ok(self):
 465         """Verify that server action completed successfully."""
 466         def onempty(rl):
 467             raise Exception('expected "ok", got %r' % rl)
 468         return self._check_ok(onempty)
 469
 470
 471 class Conn(BaseConn):
 472     def __init__(self, inp, outp):
 473         BaseConn.__init__(self, outp)
 474         self.inp = inp
 475
 476     def _read(self, size):
 477         return self.inp.read(size)
 478
 479     def _readline(self):
 480         return self.inp.readline()
 481
 482     def has_input(self):
 483         [rl, wl, xl] = select.select([self.inp.fileno()], [], [], 0)
 484         if rl:
 485             assert(rl[0] == self.inp.fileno())
 486             return True
 487         else:
 488             return None
 489
 490
 491 def checked_reader(fd, n):
 492     while n > 0:
 493         rl, _, _ = select.select([fd], [], [])
 494         assert(rl[0] == fd)
 495         buf = os.read(fd, n)
 496         if not buf: raise Exception("Unexpected EOF reading %d more bytes" % n)
 497         yield buf
 498         n -= len(buf)
 499
 500
 501 MAX_PACKET = 128 * 1024
 502 def mux(p, outfd, outr, errr):
 503     try:
 504         fds = [outr, errr]
 505         while p.poll() is None:
 506             rl, _, _ = select.select(fds, [], [])
 507             for fd in rl:
 508                 if fd == outr:
 509                     buf = os.read(outr, MAX_PACKET)
 510                     if not buf: break
 511                     os.write(outfd, struct.pack('!IB', len(buf), 1) + buf)
 512                 elif fd == errr:
 513                     buf = os.read(errr, 1024)
 514                     if not buf: break
 515                     os.write(outfd, struct.pack('!IB', len(buf), 2) + buf)
 516     finally:
 517         os.write(outfd, struct.pack('!IB', 0, 3))
 518
 519
 520 class DemuxConn(BaseConn):
 521     """A helper class for bup's client-server protocol."""
 522     def __init__(self, infd, outp):
 523         BaseConn.__init__(self, outp)
 524         # Anything that comes through before the sync string was not
 525         # multiplexed and can be assumed to be debug/log before mux init.
 526         tail = ''
 527         while tail != 'BUPMUX':
 528             b = os.read(infd, (len(tail) < 6) and (6-len(tail)) or 1)
 529             if not b:
 530                 raise IOError('demux: unexpected EOF during initialization')
 531             tail += b
 532             sys.stderr.write(tail[:-6])  # pre-mux log messages
 533             tail = tail[-6:]
 534         self.infd = infd
 535         self.reader = None
 536         self.buf = None
 537         self.closed = False
 538
 539     def write(self, data):
 540         self._load_buf(0)
 541         BaseConn.write(self, data)
 542
 543     def _next_packet(self, timeout):
 544         if self.closed: return False
 545         rl, wl, xl = select.select([self.infd], [], [], timeout)
 546         if not rl: return False
 547         assert(rl[0] == self.infd)
 548         ns = ''.join(checked_reader(self.infd, 5))
 549         n, fdw = struct.unpack('!IB', ns)
 550         assert(n <= MAX_PACKET)
 551         if fdw == 1:
 552             self.reader = checked_reader(self.infd, n)
 553         elif fdw == 2:
 554             for buf in checked_reader(self.infd, n):
 555                 sys.stderr.write(buf)
 556         elif fdw == 3:
 557             self.closed = True
 558             debug2("DemuxConn: marked closed\n")
 559         return True
 560
 561     def _load_buf(self, timeout):
 562         if self.buf is not None:
 563             return True
 564         while not self.closed:
 565             while not self.reader:
 566                 if not self._next_packet(timeout):
 567                     return False
 568             try:
 569                 self.buf = self.reader.next()
 570                 return True
 571             except StopIteration:
 572                 self.reader = None
 573         return False
 574
 575     def _read_parts(self, ix_fn):
 576         while self._load_buf(None):
 577             assert(self.buf is not None)
 578             i = ix_fn(self.buf)
 579             if i is None or i == len(self.buf):
 580                 yv = self.buf
 581                 self.buf = None
 582             else:
 583                 yv = self.buf[:i]
 584                 self.buf = self.buf[i:]
 585             yield yv
 586             if i is not None:
 587                 break
 588
 589     def _readline(self):
 590         def find_eol(buf):
 591             try:
 592                 return buf.index('\n')+1
 593             except ValueError:
 594                 return None
 595         return ''.join(self._read_parts(find_eol))
 596
 597     def _read(self, size):
 598         csize = [size]
 599         def until_size(buf): # Closes on csize
 600             if len(buf) < csize[0]:
 601                 csize[0] -= len(buf)
 602                 return None
 603             else:
 604                 return csize[0]
 605         return ''.join(self._read_parts(until_size))
 606
 607     def has_input(self):
 608         return self._load_buf(0)
 609
 610
 611 def linereader(f):
 612     """Generate a list of input lines from 'f' without terminating newlines."""
 613     while 1:
 614         line = f.readline()
 615         if not line:
 616             break
 617         yield line[:-1]
 618
 619
 620 def chunkyreader(f, count = None):
 621     """Generate a list of chunks of data read from 'f'.
 622
 623     If count is None, read until EOF is reached.
 624
 625     If count is a positive integer, read 'count' bytes from 'f'. If EOF is
 626     reached while reading, raise IOError.
 627     """
 628     if count != None:
 629         while count > 0:
 630             b = f.read(min(count, 65536))
 631             if not b:
 632                 raise IOError('EOF with %d bytes remaining' % count)
 633             yield b
 634             count -= len(b)
 635     else:
 636         while 1:
 637             b = f.read(65536)
 638             if not b: break
 639             yield b
 640
 641
 642 def slashappend(s):
 643     """Append "/" to 's' if it doesn't aleady end in "/"."""
 644     if s and not s.endswith('/'):
 645         return s + '/'
 646     else:
 647         return s
 648
 649
 650 def _mmap_do(f, sz, flags, prot, close):
 651     if not sz:
 652         st = os.fstat(f.fileno())
 653         sz = st.st_size
 654     if not sz:
 655         # trying to open a zero-length map gives an error, but an empty
 656         # string has all the same behaviour of a zero-length map, ie. it has
 657         # no elements :)
 658         return ''
 659     map = mmap.mmap(f.fileno(), sz, flags, prot)
 660     if close:
 661         f.close()  # map will persist beyond file close
 662     return map
 663
 664
 665 def mmap_read(f, sz = 0, close=True):
 666     """Create a read-only memory mapped region on file 'f'.
 667     If sz is 0, the region will cover the entire file.
 668     """
 669     return _mmap_do(f, sz, mmap.MAP_PRIVATE, mmap.PROT_READ, close)
 670
 671
 672 def mmap_readwrite(f, sz = 0, close=True):
 673     """Create a read-write memory mapped region on file 'f'.
 674     If sz is 0, the region will cover the entire file.
 675     """
 676     return _mmap_do(f, sz, mmap.MAP_SHARED, mmap.PROT_READ|mmap.PROT_WRITE,
 677                     close)
 678
 679
 680 def mmap_readwrite_private(f, sz = 0, close=True):
 681     """Create a read-write memory mapped region on file 'f'.
 682     If sz is 0, the region will cover the entire file.
 683     The map is private, which means the changes are never flushed back to the
 684     file.
 685     """
 686     return _mmap_do(f, sz, mmap.MAP_PRIVATE, mmap.PROT_READ|mmap.PROT_WRITE,
 687                     close)
 688
 689
 690 def parse_timestamp(epoch_str):
 691     """Return the number of nanoseconds since the epoch that are described
 692 by epoch_str (100ms, 100ns, ...); when epoch_str cannot be parsed,
 693 throw a ValueError that may contain additional information."""
 694     ns_per = {'s' :  1000000000,
 695               'ms' : 1000000,
 696               'us' : 1000,
 697               'ns' : 1}
 698     match = re.match(r'^((?:[-+]?[0-9]+)?)(s|ms|us|ns)$', epoch_str)
 699     if not match:
 700         if re.match(r'^([-+]?[0-9]+)$', epoch_str):
 701             raise ValueError('must include units, i.e. 100ns, 100ms, ...')
 702         raise ValueError()
 703     (n, units) = match.group(1, 2)
 704     if not n:
 705         n = 1
 706     n = int(n)
 707     return n * ns_per[units]
 708
 709
 710 def parse_num(s):
 711     """Parse data size information into a float number.
 712
 713     Here are some examples of conversions:
 714         199.2k means 203981 bytes
 715         1GB means 1073741824 bytes
 716         2.1 tb means 2199023255552 bytes
 717     """
 718     g = re.match(r'([-+\d.e]+)\s*(\w*)', str(s))
 719     if not g:
 720         raise ValueError("can't parse %r as a number" % s)
 721     (val, unit) = g.groups()
 722     num = float(val)
 723     unit = unit.lower()
 724     if unit in ['t', 'tb']:
 725         mult = 1024*1024*1024*1024
 726     elif unit in ['g', 'gb']:
 727         mult = 1024*1024*1024
 728     elif unit in ['m', 'mb']:
 729         mult = 1024*1024
 730     elif unit in ['k', 'kb']:
 731         mult = 1024
 732     elif unit in ['', 'b']:
 733         mult = 1
 734     else:
 735         raise ValueError("invalid unit %r in number %r" % (unit, s))
 736     return int(num*mult)
 737
 738
 739 def count(l):
 740     """Count the number of elements in an iterator. (consumes the iterator)"""
 741     return reduce(lambda x,y: x+1, l)
 742
 743
 744 saved_errors = []
 745 def add_error(e):
 746     """Append an error message to the list of saved errors.
 747
 748     Once processing is able to stop and output the errors, the saved errors are
 749     accessible in the module variable helpers.saved_errors.
 750     """
 751     saved_errors.append(e)
 752     log('%-70s\n' % e)
 753
 754
 755 def clear_errors():
 756     global saved_errors
 757     saved_errors = []
 758
 759
 760 def handle_ctrl_c():
 761     """Replace the default exception handler for KeyboardInterrupt (Ctrl-C).
 762
 763     The new exception handler will make sure that bup will exit without an ugly
 764     stacktrace when Ctrl-C is hit.
 765     """
 766     oldhook = sys.excepthook
 767     def newhook(exctype, value, traceback):
 768         if exctype == KeyboardInterrupt:
 769             log('\nInterrupted.\n')
 770         else:
 771             return oldhook(exctype, value, traceback)
 772     sys.excepthook = newhook
 773
 774
 775 def columnate(l, prefix):
 776     """Format elements of 'l' in columns with 'prefix' leading each line.
 777
 778     The number of columns is determined automatically based on the string
 779     lengths.
 780     """
 781     if not l:
 782         return ""
 783     l = l[:]
 784     clen = max(len(s) for s in l)
 785     ncols = (tty_width() - len(prefix)) / (clen + 2)
 786     if ncols <= 1:
 787         ncols = 1
 788         clen = 0
 789     cols = []
 790     while len(l) % ncols:
 791         l.append('')
 792     rows = len(l)/ncols
 793     for s in range(0, len(l), rows):
 794         cols.append(l[s:s+rows])
 795     out = ''
 796     for row in zip(*cols):
 797         out += prefix + ''.join(('%-*s' % (clen+2, s)) for s in row) + '\n'
 798     return out
 799
 800
 801 def parse_date_or_fatal(str, fatal):
 802     """Parses the given date or calls Option.fatal().
 803     For now we expect a string that contains a float."""
 804     try:
 805         date = atof(str)
 806     except ValueError, e:
 807         raise fatal('invalid date format (should be a float): %r' % e)
 808     else:
 809         return date
 810
 811
 812 def parse_excludes(options, fatal):
 813     """Traverse the options and extract all excludes, or call Option.fatal()."""
 814     excluded_paths = []
 815
 816     for flag in options:
 817         (option, parameter) = flag
 818         if option == '--exclude':
 819             excluded_paths.append(realpath(parameter))
 820         elif option == '--exclude-from':
 821             try:
 822                 f = open(realpath(parameter))
 823             except IOError, e:
 824                 raise fatal("couldn't read %s" % parameter)
 825             for exclude_path in f.readlines():
 826                 excluded_paths.append(realpath(exclude_path.strip()))
 827     return sorted(frozenset(excluded_paths))
 828
 829
 830 def parse_rx_excludes(options, fatal):
 831     """Traverse the options and extract all rx excludes, or call
 832     Option.fatal()."""
 833     excluded_patterns = []
 834
 835     for flag in options:
 836         (option, parameter) = flag
 837         if option == '--exclude-rx':
 838             try:
 839                 excluded_patterns.append(re.compile(parameter))
 840             except re.error, ex:
 841                 fatal('invalid --exclude-rx pattern (%s): %s' % (parameter, ex))
 842         elif option == '--exclude-rx-from':
 843             try:
 844                 f = open(realpath(parameter))
 845             except IOError, e:
 846                 raise fatal("couldn't read %s" % parameter)
 847             for pattern in f.readlines():
 848                 spattern = pattern.rstrip('\n')
 849                 try:
 850                     excluded_patterns.append(re.compile(spattern))
 851                 except re.error, ex:
 852                     fatal('invalid --exclude-rx pattern (%s): %s' % (spattern, ex))
 853     return excluded_patterns
 854
 855
 856 def should_rx_exclude_path(path, exclude_rxs):
 857     """Return True if path matches a regular expression in exclude_rxs."""
 858     for rx in exclude_rxs:
 859         if rx.search(path):
 860             debug1('Skipping %r: excluded by rx pattern %r.\n'
 861                    % (path, rx.pattern))
 862             return True
 863     return False
 864
 865
 866 # FIXME: Carefully consider the use of functions (os.path.*, etc.)
 867 # that resolve against the current filesystem in the strip/graft
 868 # functions for example, but elsewhere as well.  I suspect bup's not
 869 # always being careful about that.  For some cases, the contents of
 870 # the current filesystem should be irrelevant, and consulting it might
 871 # produce the wrong result, perhaps via unintended symlink resolution,
 872 # for example.
 873
 874 def path_components(path):
 875     """Break path into a list of pairs of the form (name,
 876     full_path_to_name).  Path must start with '/'.
 877     Example:
 878       '/home/foo' -> [('', '/'), ('home', '/home'), ('foo', '/home/foo')]"""
 879     if not path.startswith('/'):
 880         raise Exception, 'path must start with "/": %s' % path
 881     # Since we assume path startswith('/'), we can skip the first element.
 882     result = [('', '/')]
 883     norm_path = os.path.abspath(path)
 884     if norm_path == '/':
 885         return result
 886     full_path = ''
 887     for p in norm_path.split('/')[1:]:
 888         full_path += '/' + p
 889         result.append((p, full_path))
 890     return result
 891
 892
 893 def stripped_path_components(path, strip_prefixes):
 894     """Strip any prefix in strip_prefixes from path and return a list
 895     of path components where each component is (name,
 896     none_or_full_fs_path_to_name).  Assume path startswith('/').
 897     See thelpers.py for examples."""
 898     normalized_path = os.path.abspath(path)
 899     sorted_strip_prefixes = sorted(strip_prefixes, key=len, reverse=True)
 900     for bp in sorted_strip_prefixes:
 901         normalized_bp = os.path.abspath(bp)
 902         if normalized_path.startswith(normalized_bp):
 903             prefix = normalized_path[:len(normalized_bp)]
 904             result = []
 905             for p in normalized_path[len(normalized_bp):].split('/'):
 906                 if p: # not root
 907                     prefix += '/'
 908                 prefix += p
 909                 result.append((p, prefix))
 910             return result
 911     # Nothing to strip.
 912     return path_components(path)
 913
 914
 915 def grafted_path_components(graft_points, path):
 916     # Create a result that consists of some number of faked graft
 917     # directories before the graft point, followed by all of the real
 918     # directories from path that are after the graft point.  Arrange
 919     # for the directory at the graft point in the result to correspond
 920     # to the "orig" directory in --graft orig=new.  See t/thelpers.py
 921     # for some examples.
 922
 923     # Note that given --graft orig=new, orig and new have *nothing* to
 924     # do with each other, even if some of their component names
 925     # match. i.e. --graft /foo/bar/baz=/foo/bar/bax is semantically
 926     # equivalent to --graft /foo/bar/baz=/x/y/z, or even
 927     # /foo/bar/baz=/x.
 928
 929     # FIXME: This can't be the best solution...
 930     clean_path = os.path.abspath(path)
 931     for graft_point in graft_points:
 932         old_prefix, new_prefix = graft_point
 933         # Expand prefixes iff not absolute paths.
 934         old_prefix = os.path.normpath(old_prefix)
 935         new_prefix = os.path.normpath(new_prefix)
 936         if clean_path.startswith(old_prefix):
 937             escaped_prefix = re.escape(old_prefix)
 938             grafted_path = re.sub(r'^' + escaped_prefix, new_prefix, clean_path)
 939             # Handle /foo=/ (at least) -- which produces //whatever.
 940             grafted_path = '/' + grafted_path.lstrip('/')
 941             clean_path_components = path_components(clean_path)
 942             # Count the components that were stripped.
 943             strip_count = 0 if old_prefix == '/' else old_prefix.count('/')
 944             new_prefix_parts = new_prefix.split('/')
 945             result_prefix = grafted_path.split('/')[:new_prefix.count('/')]
 946             result = [(p, None) for p in result_prefix] \
 947                 + clean_path_components[strip_count:]
 948             # Now set the graft point name to match the end of new_prefix.
 949             graft_point = len(result_prefix)
 950             result[graft_point] = \
 951                 (new_prefix_parts[-1], clean_path_components[strip_count][1])
 952             if new_prefix == '/': # --graft ...=/ is a special case.
 953                 return result[1:]
 954             return result
 955     return path_components(clean_path)
 956
 957 Sha1 = hashlib.sha1
 958
 959 def version_date():
 960     """Format bup's version date string for output."""
 961     return _version.DATE.split(' ')[0]
 962
 963
 964 def version_commit():
 965     """Get the commit hash of bup's current version."""
 966     return _version.COMMIT
 967
 968
 969 def version_tag():
 970     """Format bup's version tag (the official version number).
 971
 972     When generated from a commit other than one pointed to with a tag, the
 973     returned string will be "unknown-" followed by the first seven positions of
 974     the commit hash.
 975     """
 976     names = _version.NAMES.strip()
 977     assert(names[0] == '(')
 978     assert(names[-1] == ')')
 979     names = names[1:-1]
 980     l = [n.strip() for n in names.split(',')]
 981     for n in l:
 982         if n.startswith('tag: bup-'):
 983             return n[9:]
 984     return 'unknown-%s' % _version.COMMIT[:7]