lib/bup/helpers.py

   1 """Helper functions and classes for bup."""
   2
   3 from ctypes import sizeof, c_void_p
   4 from os import environ
   5 from contextlib import contextmanager
   6 import sys, os, pwd, subprocess, errno, socket, select, mmap, stat, re, struct
   7 import hashlib, heapq, operator, time, grp, tempfile
   8
   9 from bup import _helpers
  10 import bup._helpers as _helpers
  11 import math
  12
  13 # This function should really be in helpers, not in bup.options.  But we
  14 # want options.py to be standalone so people can include it in other projects.
  15 from bup.options import _tty_width
  16 tty_width = _tty_width
  17
  18
  19 def atoi(s):
  20     """Convert the string 's' to an integer. Return 0 if s is not a number."""
  21     try:
  22         return int(s or '0')
  23     except ValueError:
  24         return 0
  25
  26
  27 def atof(s):
  28     """Convert the string 's' to a float. Return 0 if s is not a number."""
  29     try:
  30         return float(s or '0')
  31     except ValueError:
  32         return 0
  33
  34
  35 buglvl = atoi(os.environ.get('BUP_DEBUG', 0))
  36
  37
  38 # If the platform doesn't have fdatasync (OS X), fall back to fsync.
  39 try:
  40     fdatasync = os.fdatasync
  41 except AttributeError:
  42     fdatasync = os.fsync
  43
  44
  45 # Write (blockingly) to sockets that may or may not be in blocking mode.
  46 # We need this because our stderr is sometimes eaten by subprocesses
  47 # (probably ssh) that sometimes make it nonblocking, if only temporarily,
  48 # leading to race conditions.  Ick.  We'll do it the hard way.
  49 def _hard_write(fd, buf):
  50     while buf:
  51         (r,w,x) = select.select([], [fd], [], None)
  52         if not w:
  53             raise IOError('select(fd) returned without being writable')
  54         try:
  55             sz = os.write(fd, buf)
  56         except OSError, e:
  57             if e.errno != errno.EAGAIN:
  58                 raise
  59         assert(sz >= 0)
  60         buf = buf[sz:]
  61
  62
  63 istty1 = os.isatty(1) or (atoi(os.environ.get('BUP_FORCE_TTY')) & 1)
  64 istty2 = os.isatty(2) or (atoi(os.environ.get('BUP_FORCE_TTY')) & 2)
  65
  66 _last_prog = 0
  67
  68
  69 def log(s, is_prog=False, is_prog_end=False):
  70     """Print a log message to stderr."""
  71     global _last_prog
  72     ceol = "\033[K"
  73     sys.stdout.flush()
  74     if is_prog:
  75         if is_prog_end:
  76             s = s + ceol + "\n"
  77         else:
  78             s = s + ceol + "\r"
  79     else:
  80         if _last_prog > 0:
  81             s = s + ceol + "\n"
  82         else:
  83             s = s + "\n"
  84     _hard_write(sys.stderr.fileno(), s)
  85     _last_prog = 0
  86
  87
  88 def debug1(s):
  89     """Write a level 1 debug message to stderr"""
  90     if buglvl >= 1:
  91         log(s)
  92
  93
  94 def debug2(s):
  95     """Write a level 2 debug message to stderr"""
  96     if buglvl >= 2:
  97         log(s)
  98
  99
 100 def progress_update(s, always=True):
 101     """Update progress messages on stderr.
 102
 103     Calls log() if stderr is connected to a TTY and we haven't printed
 104     progress messages in a while. This avoids overloading the stderr buffer
 105     with excess junk.
 106     """
 107     global _last_prog
 108     if istty2:
 109         now = time.time()
 110         if always or now - _last_prog > 0.1:
 111             log(s, True)
 112             _last_prog = now
 113
 114
 115 def progress_end(s):
 116     """End progress messages on stderr.
 117
 118     Calls log() if stderr is connected to a TTY and makes sure, that this
 119     message is kept even if more/other messages follow using progress_update().
 120     """
 121     if istty2:
 122         log(s, True, True)
 123
 124
 125 def mkdirp(d, mode=None):
 126     """Recursively create directories on path 'd'.
 127
 128     Unlike os.makedirs(), it doesn't raise an exception if the last element of
 129     the path already exists.
 130     """
 131     try:
 132         if mode:
 133             os.makedirs(d, mode)
 134         else:
 135             os.makedirs(d)
 136     except OSError, e:
 137         if e.errno == errno.EEXIST:
 138             pass
 139         else:
 140             raise
 141
 142
 143 _unspecified_next_default = object()
 144
 145 def _fallback_next(it, default=_unspecified_next_default):
 146     """Retrieve the next item from the iterator by calling its
 147     next() method. If default is given, it is returned if the
 148     iterator is exhausted, otherwise StopIteration is raised."""
 149
 150     if default is _unspecified_next_default:
 151         return it.next()
 152     else:
 153         try:
 154             return it.next()
 155         except StopIteration:
 156             return default
 157
 158 if sys.version_info < (2, 6):
 159     next =  _fallback_next
 160
 161
 162 def merge_iter(iters, pfreq, pfunc, pfinal, key=None):
 163     if key:
 164         samekey = lambda e, pe: getattr(e, key) == getattr(pe, key, None)
 165     else:
 166         samekey = operator.eq
 167     count = 0
 168     total = sum(len(it) for it in iters)
 169     iters = (iter(it) for it in iters)
 170     heap = ((next(it, None),it) for it in iters)
 171     heap = [(e,it) for e,it in heap if e]
 172
 173     heapq.heapify(heap)
 174     pe = None
 175     while heap:
 176         if not count % pfreq:
 177             pfunc(count, total)
 178         e, it = heap[0]
 179         if not samekey(e, pe):
 180             pe = e
 181             yield e
 182         count += 1
 183         try:
 184             e = it.next() # Don't use next() function, it's too expensive
 185         except StopIteration:
 186             heapq.heappop(heap) # remove current
 187         else:
 188             heapq.heapreplace(heap, (e, it)) # shift current to new location
 189     pfinal(count, total)
 190
 191
 192 def unlink(f):
 193     """Delete a file at path 'f' if it currently exists.
 194
 195     Unlike os.unlink(), does not throw an exception if the file didn't already
 196     exist.
 197     """
 198     try:
 199         os.unlink(f)
 200     except OSError, e:
 201         if e.errno != errno.ENOENT:
 202             raise
 203
 204
 205 def readpipe(argv, preexec_fn=None):
 206     """Run a subprocess and return its output."""
 207     p = subprocess.Popen(argv, stdout=subprocess.PIPE, preexec_fn=preexec_fn)
 208     out, err = p.communicate()
 209     if p.returncode != 0:
 210         raise Exception('subprocess %r failed with status %d'
 211                         % (' '.join(argv), p.returncode))
 212     return out
 213
 214
 215 def _argmax_base(command):
 216     base_size = 2048
 217     for c in command:
 218         base_size += len(command) + 1
 219     for k, v in environ.iteritems():
 220         base_size += len(k) + len(v) + 2 + sizeof(c_void_p)
 221     return base_size
 222
 223
 224 def _argmax_args_size(args):
 225     return sum(len(x) + 1 + sizeof(c_void_p) for x in args)
 226
 227
 228 def batchpipe(command, args, preexec_fn=None, arg_max=_helpers.SC_ARG_MAX):
 229     """If args is not empty, yield the output produced by calling the
 230 command list with args as a sequence of strings (It may be necessary
 231 to return multiple strings in order to respect ARG_MAX)."""
 232     # The optional arg_max arg is a workaround for an issue with the
 233     # current wvtest behavior.
 234     base_size = _argmax_base(command)
 235     while args:
 236         room = arg_max - base_size
 237         i = 0
 238         while i < len(args):
 239             next_size = _argmax_args_size(args[i:i+1])
 240             if room - next_size < 0:
 241                 break
 242             room -= next_size
 243             i += 1
 244         sub_args = args[:i]
 245         args = args[i:]
 246         assert(len(sub_args))
 247         yield readpipe(command + sub_args, preexec_fn=preexec_fn)
 248
 249
 250 def realpath(p):
 251     """Get the absolute path of a file.
 252
 253     Behaves like os.path.realpath, but doesn't follow a symlink for the last
 254     element. (ie. if 'p' itself is a symlink, this one won't follow it, but it
 255     will follow symlinks in p's directory)
 256     """
 257     try:
 258         st = os.lstat(p)
 259     except OSError:
 260         st = None
 261     if st and stat.S_ISLNK(st.st_mode):
 262         (dir, name) = os.path.split(p)
 263         dir = os.path.realpath(dir)
 264         out = os.path.join(dir, name)
 265     else:
 266         out = os.path.realpath(p)
 267     #log('realpathing:%r,%r' % (p, out))
 268     return out
 269
 270
 271 def detect_fakeroot():
 272     "Return True if we appear to be running under fakeroot."
 273     return os.getenv("FAKEROOTKEY") != None
 274
 275
 276 def is_superuser():
 277     if sys.platform.startswith('cygwin'):
 278         import ctypes
 279         return ctypes.cdll.shell32.IsUserAnAdmin()
 280     else:
 281         return os.geteuid() == 0
 282
 283
 284 def _cache_key_value(get_value, key, cache):
 285     """Return (value, was_cached).  If there is a value in the cache
 286     for key, use that, otherwise, call get_value(key) which should
 287     throw a KeyError if there is no value -- in which case the cached
 288     and returned value will be None.
 289     """
 290     try: # Do we already have it (or know there wasn't one)?
 291         value = cache[key]
 292         return value, True
 293     except KeyError:
 294         pass
 295     value = None
 296     try:
 297         cache[key] = value = get_value(key)
 298     except KeyError:
 299         cache[key] = None
 300     return value, False
 301
 302
 303 _uid_to_pwd_cache = {}
 304 _name_to_pwd_cache = {}
 305
 306 def pwd_from_uid(uid):
 307     """Return password database entry for uid (may be a cached value).
 308     Return None if no entry is found.
 309     """
 310     global _uid_to_pwd_cache, _name_to_pwd_cache
 311     entry, cached = _cache_key_value(pwd.getpwuid, uid, _uid_to_pwd_cache)
 312     if entry and not cached:
 313         _name_to_pwd_cache[entry.pw_name] = entry
 314     return entry
 315
 316
 317 def pwd_from_name(name):
 318     """Return password database entry for name (may be a cached value).
 319     Return None if no entry is found.
 320     """
 321     global _uid_to_pwd_cache, _name_to_pwd_cache
 322     entry, cached = _cache_key_value(pwd.getpwnam, name, _name_to_pwd_cache)
 323     if entry and not cached:
 324         _uid_to_pwd_cache[entry.pw_uid] = entry
 325     return entry
 326
 327
 328 _gid_to_grp_cache = {}
 329 _name_to_grp_cache = {}
 330
 331 def grp_from_gid(gid):
 332     """Return password database entry for gid (may be a cached value).
 333     Return None if no entry is found.
 334     """
 335     global _gid_to_grp_cache, _name_to_grp_cache
 336     entry, cached = _cache_key_value(grp.getgrgid, gid, _gid_to_grp_cache)
 337     if entry and not cached:
 338         _name_to_grp_cache[entry.gr_name] = entry
 339     return entry
 340
 341
 342 def grp_from_name(name):
 343     """Return password database entry for name (may be a cached value).
 344     Return None if no entry is found.
 345     """
 346     global _gid_to_grp_cache, _name_to_grp_cache
 347     entry, cached = _cache_key_value(grp.getgrnam, name, _name_to_grp_cache)
 348     if entry and not cached:
 349         _gid_to_grp_cache[entry.gr_gid] = entry
 350     return entry
 351
 352
 353 _username = None
 354 def username():
 355     """Get the user's login name."""
 356     global _username
 357     if not _username:
 358         uid = os.getuid()
 359         _username = pwd_from_uid(uid)[0] or 'user%d' % uid
 360     return _username
 361
 362
 363 _userfullname = None
 364 def userfullname():
 365     """Get the user's full name."""
 366     global _userfullname
 367     if not _userfullname:
 368         uid = os.getuid()
 369         entry = pwd_from_uid(uid)
 370         if entry:
 371             _userfullname = entry[4].split(',')[0] or entry[0]
 372         if not _userfullname:
 373             _userfullname = 'user%d' % uid
 374     return _userfullname
 375
 376
 377 _hostname = None
 378 def hostname():
 379     """Get the FQDN of this machine."""
 380     global _hostname
 381     if not _hostname:
 382         _hostname = socket.getfqdn()
 383     return _hostname
 384
 385
 386 _resource_path = None
 387 def resource_path(subdir=''):
 388     global _resource_path
 389     if not _resource_path:
 390         _resource_path = os.environ.get('BUP_RESOURCE_PATH') or '.'
 391     return os.path.join(_resource_path, subdir)
 392
 393 def format_filesize(size):
 394     unit = 1024.0
 395     size = float(size)
 396     if size < unit:
 397         return "%d" % (size)
 398     exponent = int(math.log(size) / math.log(unit))
 399     size_prefix = "KMGTPE"[exponent - 1]
 400     return "%.1f%s" % (size / math.pow(unit, exponent), size_prefix)
 401
 402
 403 class NotOk(Exception):
 404     pass
 405
 406
 407 class BaseConn:
 408     def __init__(self, outp):
 409         self.outp = outp
 410
 411     def close(self):
 412         while self._read(65536): pass
 413
 414     def read(self, size):
 415         """Read 'size' bytes from input stream."""
 416         self.outp.flush()
 417         return self._read(size)
 418
 419     def readline(self):
 420         """Read from input stream until a newline is found."""
 421         self.outp.flush()
 422         return self._readline()
 423
 424     def write(self, data):
 425         """Write 'data' to output stream."""
 426         #log('%d writing: %d bytes' % (os.getpid(), len(data)))
 427         self.outp.write(data)
 428
 429     def has_input(self):
 430         """Return true if input stream is readable."""
 431         raise NotImplemented("Subclasses must implement has_input")
 432
 433     def ok(self):
 434         """Indicate end of output from last sent command."""
 435         self.write('\nok\n')
 436
 437     def error(self, s):
 438         """Indicate server error to the client."""
 439         s = re.sub(r'\s+', ' ', str(s))
 440         self.write('\nerror %s\n' % s)
 441
 442     def _check_ok(self, onempty):
 443         self.outp.flush()
 444         rl = ''
 445         for rl in linereader(self):
 446             #log('%d got line: %r' % (os.getpid(), rl))
 447             if not rl:  # empty line
 448                 continue
 449             elif rl == 'ok':
 450                 return None
 451             elif rl.startswith('error '):
 452                 #log('client: error: %s' % rl[6:])
 453                 return NotOk(rl[6:])
 454             else:
 455                 onempty(rl)
 456         raise Exception('server exited unexpectedly; see errors above')
 457
 458     def drain_and_check_ok(self):
 459         """Remove all data for the current command from input stream."""
 460         def onempty(rl):
 461             pass
 462         return self._check_ok(onempty)
 463
 464     def check_ok(self):
 465         """Verify that server action completed successfully."""
 466         def onempty(rl):
 467             raise Exception('expected "ok", got %r' % rl)
 468         return self._check_ok(onempty)
 469
 470
 471 class Conn(BaseConn):
 472     def __init__(self, inp, outp):
 473         BaseConn.__init__(self, outp)
 474         self.inp = inp
 475
 476     def _read(self, size):
 477         return self.inp.read(size)
 478
 479     def _readline(self):
 480         return self.inp.readline()
 481
 482     def has_input(self):
 483         [rl, wl, xl] = select.select([self.inp.fileno()], [], [], 0)
 484         if rl:
 485             assert(rl[0] == self.inp.fileno())
 486             return True
 487         else:
 488             return None
 489
 490
 491 def checked_reader(fd, n):
 492     while n > 0:
 493         rl, _, _ = select.select([fd], [], [])
 494         assert(rl[0] == fd)
 495         buf = os.read(fd, n)
 496         if not buf: raise Exception("Unexpected EOF reading %d more bytes" % n)
 497         yield buf
 498         n -= len(buf)
 499
 500
 501 MAX_PACKET = 128 * 1024
 502 def mux(p, outfd, outr, errr):
 503     try:
 504         fds = [outr, errr]
 505         while p.poll() is None:
 506             rl, _, _ = select.select(fds, [], [])
 507             for fd in rl:
 508                 if fd == outr:
 509                     buf = os.read(outr, MAX_PACKET)
 510                     if not buf: break
 511                     os.write(outfd, struct.pack('!IB', len(buf), 1) + buf)
 512                 elif fd == errr:
 513                     buf = os.read(errr, 1024)
 514                     if not buf: break
 515                     os.write(outfd, struct.pack('!IB', len(buf), 2) + buf)
 516     finally:
 517         os.write(outfd, struct.pack('!IB', 0, 3))
 518
 519
 520 class DemuxConn(BaseConn):
 521     """A helper class for bup's client-server protocol."""
 522     def __init__(self, infd, outp):
 523         BaseConn.__init__(self, outp)
 524         # Anything that comes through before the sync string was not
 525         # multiplexed and can be assumed to be debug/log before mux init.
 526         tail = ''
 527         while tail != 'BUPMUX':
 528             b = os.read(infd, (len(tail) < 6) and (6-len(tail)) or 1)
 529             if not b:
 530                 raise IOError('demux: unexpected EOF during initialization')
 531             tail += b
 532             sys.stderr.write(tail[:-6])  # pre-mux log messages
 533             tail = tail[-6:]
 534         self.infd = infd
 535         self.reader = None
 536         self.buf = None
 537         self.closed = False
 538
 539     def write(self, data):
 540         self._load_buf(0)
 541         BaseConn.write(self, data)
 542
 543     def _next_packet(self, timeout):
 544         if self.closed: return False
 545         rl, wl, xl = select.select([self.infd], [], [], timeout)
 546         if not rl: return False
 547         assert(rl[0] == self.infd)
 548         ns = ''.join(checked_reader(self.infd, 5))
 549         n, fdw = struct.unpack('!IB', ns)
 550         assert(n <= MAX_PACKET)
 551         if fdw == 1:
 552             self.reader = checked_reader(self.infd, n)
 553         elif fdw == 2:
 554             for buf in checked_reader(self.infd, n):
 555                 sys.stderr.write(buf)
 556         elif fdw == 3:
 557             self.closed = True
 558             debug2("DemuxConn: Marked closed.")
 559         return True
 560
 561     def _load_buf(self, timeout):
 562         if self.buf is not None:
 563             return True
 564         while not self.closed:
 565             while not self.reader:
 566                 if not self._next_packet(timeout):
 567                     return False
 568             try:
 569                 self.buf = self.reader.next()
 570                 return True
 571             except StopIteration:
 572                 self.reader = None
 573         return False
 574
 575     def _read_parts(self, ix_fn):
 576         while self._load_buf(None):
 577             assert(self.buf is not None)
 578             i = ix_fn(self.buf)
 579             if i is None or i == len(self.buf):
 580                 yv = self.buf
 581                 self.buf = None
 582             else:
 583                 yv = self.buf[:i]
 584                 self.buf = self.buf[i:]
 585             yield yv
 586             if i is not None:
 587                 break
 588
 589     def _readline(self):
 590         def find_eol(buf):
 591             try:
 592                 return buf.index('\n')+1
 593             except ValueError:
 594                 return None
 595         return ''.join(self._read_parts(find_eol))
 596
 597     def _read(self, size):
 598         csize = [size]
 599         def until_size(buf): # Closes on csize
 600             if len(buf) < csize[0]:
 601                 csize[0] -= len(buf)
 602                 return None
 603             else:
 604                 return csize[0]
 605         return ''.join(self._read_parts(until_size))
 606
 607     def has_input(self):
 608         return self._load_buf(0)
 609
 610
 611 def linereader(f):
 612     """Generate a list of input lines from 'f' without terminating newlines."""
 613     while 1:
 614         line = f.readline()
 615         if not line:
 616             break
 617         yield line[:-1]
 618
 619
 620 def chunkyreader(f, count = None):
 621     """Generate a list of chunks of data read from 'f'.
 622
 623     If count is None, read until EOF is reached.
 624
 625     If count is a positive integer, read 'count' bytes from 'f'. If EOF is
 626     reached while reading, raise IOError.
 627     """
 628     if count != None:
 629         while count > 0:
 630             b = f.read(min(count, 65536))
 631             if not b:
 632                 raise IOError('EOF with %d bytes remaining' % count)
 633             yield b
 634             count -= len(b)
 635     else:
 636         while 1:
 637             b = f.read(65536)
 638             if not b: break
 639             yield b
 640
 641
 642 @contextmanager
 643 def atomically_replaced_file(name, mode='w', buffering=-1):
 644     """Yield a file that will be atomically renamed name when leaving the block.
 645
 646     This contextmanager yields an open file object that is backed by a
 647     temporary file which will be renamed (atomically) to the target
 648     name if everything succeeds.
 649
 650     The mode and buffering arguments are handled exactly as with open,
 651     and the yielded file will have very restrictive permissions, as
 652     per mkstemp.
 653
 654     E.g.::
 655
 656         with atomically_replaced_file('foo.txt', 'w') as f:
 657             f.write('hello jack.')
 658
 659     """
 660
 661     (ffd, tempname) = tempfile.mkstemp(dir=os.path.dirname(name),
 662                                        text=('b' not in mode))
 663     try:
 664         try:
 665             f = os.fdopen(ffd, mode, buffering)
 666         except:
 667             os.close(ffd)
 668             raise
 669         try:
 670             yield f
 671         finally:
 672             f.close()
 673         os.rename(tempname, name)
 674     finally:
 675         unlink(tempname)  # nonexistant file is ignored
 676
 677
 678 def slashappend(s):
 679     """Append "/" to 's' if it doesn't aleady end in "/"."""
 680     if s and not s.endswith('/'):
 681         return s + '/'
 682     else:
 683         return s
 684
 685
 686 def _mmap_do(f, sz, flags, prot, close):
 687     if not sz:
 688         st = os.fstat(f.fileno())
 689         sz = st.st_size
 690     if not sz:
 691         # trying to open a zero-length map gives an error, but an empty
 692         # string has all the same behaviour of a zero-length map, ie. it has
 693         # no elements :)
 694         return ''
 695     map = mmap.mmap(f.fileno(), sz, flags, prot)
 696     if close:
 697         f.close()  # map will persist beyond file close
 698     return map
 699
 700
 701 def mmap_read(f, sz = 0, close=True):
 702     """Create a read-only memory mapped region on file 'f'.
 703     If sz is 0, the region will cover the entire file.
 704     """
 705     return _mmap_do(f, sz, mmap.MAP_PRIVATE, mmap.PROT_READ, close)
 706
 707
 708 def mmap_readwrite(f, sz = 0, close=True):
 709     """Create a read-write memory mapped region on file 'f'.
 710     If sz is 0, the region will cover the entire file.
 711     """
 712     return _mmap_do(f, sz, mmap.MAP_SHARED, mmap.PROT_READ|mmap.PROT_WRITE,
 713                     close)
 714
 715
 716 def mmap_readwrite_private(f, sz = 0, close=True):
 717     """Create a read-write memory mapped region on file 'f'.
 718     If sz is 0, the region will cover the entire file.
 719     The map is private, which means the changes are never flushed back to the
 720     file.
 721     """
 722     return _mmap_do(f, sz, mmap.MAP_PRIVATE, mmap.PROT_READ|mmap.PROT_WRITE,
 723                     close)
 724
 725
 726 def parse_timestamp(epoch_str):
 727     """Return the number of nanoseconds since the epoch that are described
 728 by epoch_str (100ms, 100ns, ...); when epoch_str cannot be parsed,
 729 throw a ValueError that may contain additional information."""
 730     ns_per = {'s' :  1000000000,
 731               'ms' : 1000000,
 732               'us' : 1000,
 733               'ns' : 1}
 734     match = re.match(r'^((?:[-+]?[0-9]+)?)(s|ms|us|ns)$', epoch_str)
 735     if not match:
 736         if re.match(r'^([-+]?[0-9]+)$', epoch_str):
 737             raise ValueError('must include units, i.e. 100ns, 100ms, ...')
 738         raise ValueError()
 739     (n, units) = match.group(1, 2)
 740     if not n:
 741         n = 1
 742     n = int(n)
 743     return n * ns_per[units]
 744
 745
 746 def parse_num(s):
 747     """Parse data size information into a float number.
 748
 749     Here are some examples of conversions:
 750         199.2k means 203981 bytes
 751         1GB means 1073741824 bytes
 752         2.1 tb means 2199023255552 bytes
 753     """
 754     g = re.match(r'([-+\d.e]+)\s*(\w*)', str(s))
 755     if not g:
 756         raise ValueError("can't parse %r as a number" % s)
 757     (val, unit) = g.groups()
 758     num = float(val)
 759     unit = unit.lower()
 760     if unit in ['t', 'tb']:
 761         mult = 1024*1024*1024*1024
 762     elif unit in ['g', 'gb']:
 763         mult = 1024*1024*1024
 764     elif unit in ['m', 'mb']:
 765         mult = 1024*1024
 766     elif unit in ['k', 'kb']:
 767         mult = 1024
 768     elif unit in ['', 'b']:
 769         mult = 1
 770     else:
 771         raise ValueError("invalid unit %r in number %r" % (unit, s))
 772     return int(num*mult)
 773
 774
 775 def count(l):
 776     """Count the number of elements in an iterator. (consumes the iterator)"""
 777     return reduce(lambda x,y: x+1, l)
 778
 779
 780 saved_errors = []
 781 def add_error(e):
 782     """Append an error message to the list of saved errors.
 783
 784     Once processing is able to stop and output the errors, the saved errors are
 785     accessible in the module variable helpers.saved_errors.
 786     """
 787     saved_errors.append(e)
 788     log('%-70s' % e)
 789
 790
 791 def clear_errors():
 792     global saved_errors
 793     saved_errors = []
 794
 795
 796 def check_saved_errors():
 797     global saved_errors
 798     if saved_errors:
 799         log('Warning: %d error%s encountered!'
 800             % (len(saved_errors), len(saved_errors)!=1 and 's' or ''))
 801         return False
 802     else:
 803         return True
 804
 805
 806 def handle_ctrl_c():
 807     """Replace the default exception handler for KeyboardInterrupt (Ctrl-C).
 808
 809     The new exception handler will make sure that bup will exit without an ugly
 810     stacktrace when Ctrl-C is hit.
 811     """
 812     oldhook = sys.excepthook
 813     def newhook(exctype, value, traceback):
 814         if exctype == KeyboardInterrupt:
 815             log('Interrupted.')
 816         else:
 817             return oldhook(exctype, value, traceback)
 818     sys.excepthook = newhook
 819
 820
 821 def columnate(l, prefix):
 822     """Format elements of 'l' in columns with 'prefix' leading each line.
 823
 824     The number of columns is determined automatically based on the string
 825     lengths.
 826     """
 827     if not l:
 828         return ""
 829     l = l[:]
 830     clen = max(len(s) for s in l)
 831     ncols = (tty_width() - len(prefix)) / (clen + 2)
 832     if ncols <= 1:
 833         ncols = 1
 834         clen = 0
 835     cols = []
 836     while len(l) % ncols:
 837         l.append('')
 838     rows = len(l)/ncols
 839     for s in range(0, len(l), rows):
 840         cols.append(l[s:s+rows])
 841     out = ''
 842     for row in zip(*cols):
 843         out += prefix + ''.join(('%-*s' % (clen+2, s)) for s in row) + '\n'
 844     return out
 845
 846
 847 def parse_date_or_fatal(str, fatal):
 848     """Parses the given date or calls Option.fatal().
 849     For now we expect a string that contains a float."""
 850     try:
 851         date = atof(str)
 852     except ValueError, e:
 853         raise fatal('invalid date format (should be a float): %r' % e)
 854     else:
 855         return date
 856
 857
 858 def parse_excludes(options, fatal):
 859     """Traverse the options and extract all excludes, or call Option.fatal()."""
 860     excluded_paths = []
 861
 862     for flag in options:
 863         (option, parameter) = flag
 864         if option == '--exclude':
 865             excluded_paths.append(realpath(parameter))
 866         elif option == '--exclude-from':
 867             try:
 868                 f = open(realpath(parameter))
 869             except IOError, e:
 870                 raise fatal("couldn't read %s" % parameter)
 871             for exclude_path in f.readlines():
 872                 # FIXME: perhaps this should be rstrip('\n')
 873                 exclude_path = realpath(exclude_path.strip())
 874                 if exclude_path:
 875                     excluded_paths.append(exclude_path)
 876     return sorted(frozenset(excluded_paths))
 877
 878
 879 def parse_rx_excludes(options, fatal):
 880     """Traverse the options and extract all rx excludes, or call
 881     Option.fatal()."""
 882     excluded_patterns = []
 883
 884     for flag in options:
 885         (option, parameter) = flag
 886         if option == '--exclude-rx':
 887             try:
 888                 excluded_patterns.append(re.compile(parameter))
 889             except re.error, ex:
 890                 fatal('Invalid --exclude-rx pattern (%s): %s!' % (parameter, ex))
 891         elif option == '--exclude-rx-from':
 892             try:
 893                 f = open(realpath(parameter))
 894             except IOError, e:
 895                 raise fatal("couldn't read %s" % parameter)
 896             for pattern in f.readlines():
 897                 spattern = pattern.rstrip('\n')
 898                 if not spattern:
 899                     continue
 900                 try:
 901                     excluded_patterns.append(re.compile(spattern))
 902                 except re.error, ex:
 903                     fatal('Invalid --exclude-rx pattern (%s): %s!' % (spattern, ex))
 904     return excluded_patterns
 905
 906
 907 def should_rx_exclude_path(path, exclude_rxs):
 908     """Return True if path matches a regular expression in exclude_rxs."""
 909     for rx in exclude_rxs:
 910         if rx.search(path):
 911             debug1('Skipping %r: excluded by rx pattern %r.'
 912                    % (path, rx.pattern))
 913             return True
 914     return False
 915
 916
 917 # FIXME: Carefully consider the use of functions (os.path.*, etc.)
 918 # that resolve against the current filesystem in the strip/graft
 919 # functions for example, but elsewhere as well.  I suspect bup's not
 920 # always being careful about that.  For some cases, the contents of
 921 # the current filesystem should be irrelevant, and consulting it might
 922 # produce the wrong result, perhaps via unintended symlink resolution,
 923 # for example.
 924
 925 def path_components(path):
 926     """Break path into a list of pairs of the form (name,
 927     full_path_to_name).  Path must start with '/'.
 928     Example:
 929       '/home/foo' -> [('', '/'), ('home', '/home'), ('foo', '/home/foo')]"""
 930     if not path.startswith('/'):
 931         raise Exception, 'path must start with "/": %s' % path
 932     # Since we assume path startswith('/'), we can skip the first element.
 933     result = [('', '/')]
 934     norm_path = os.path.abspath(path)
 935     if norm_path == '/':
 936         return result
 937     full_path = ''
 938     for p in norm_path.split('/')[1:]:
 939         full_path += '/' + p
 940         result.append((p, full_path))
 941     return result
 942
 943
 944 def stripped_path_components(path, strip_prefixes):
 945     """Strip any prefix in strip_prefixes from path and return a list
 946     of path components where each component is (name,
 947     none_or_full_fs_path_to_name).  Assume path startswith('/').
 948     See thelpers.py for examples."""
 949     normalized_path = os.path.abspath(path)
 950     sorted_strip_prefixes = sorted(strip_prefixes, key=len, reverse=True)
 951     for bp in sorted_strip_prefixes:
 952         normalized_bp = os.path.abspath(bp)
 953         if normalized_path.startswith(normalized_bp):
 954             prefix = normalized_path[:len(normalized_bp)]
 955             result = []
 956             for p in normalized_path[len(normalized_bp):].split('/'):
 957                 if p: # not root
 958                     prefix += '/'
 959                 prefix += p
 960                 result.append((p, prefix))
 961             return result
 962     # Nothing to strip.
 963     return path_components(path)
 964
 965
 966 def grafted_path_components(graft_points, path):
 967     # Create a result that consists of some number of faked graft
 968     # directories before the graft point, followed by all of the real
 969     # directories from path that are after the graft point.  Arrange
 970     # for the directory at the graft point in the result to correspond
 971     # to the "orig" directory in --graft orig=new.  See t/thelpers.py
 972     # for some examples.
 973
 974     # Note that given --graft orig=new, orig and new have *nothing* to
 975     # do with each other, even if some of their component names
 976     # match. i.e. --graft /foo/bar/baz=/foo/bar/bax is semantically
 977     # equivalent to --graft /foo/bar/baz=/x/y/z, or even
 978     # /foo/bar/baz=/x.
 979
 980     # FIXME: This can't be the best solution...
 981     clean_path = os.path.abspath(path)
 982     for graft_point in graft_points:
 983         old_prefix, new_prefix = graft_point
 984         # Expand prefixes iff not absolute paths.
 985         old_prefix = os.path.normpath(old_prefix)
 986         new_prefix = os.path.normpath(new_prefix)
 987         if clean_path.startswith(old_prefix):
 988             escaped_prefix = re.escape(old_prefix)
 989             grafted_path = re.sub(r'^' + escaped_prefix, new_prefix, clean_path)
 990             # Handle /foo=/ (at least) -- which produces //whatever.
 991             grafted_path = '/' + grafted_path.lstrip('/')
 992             clean_path_components = path_components(clean_path)
 993             # Count the components that were stripped.
 994             strip_count = 0 if old_prefix == '/' else old_prefix.count('/')
 995             new_prefix_parts = new_prefix.split('/')
 996             result_prefix = grafted_path.split('/')[:new_prefix.count('/')]
 997             result = [(p, None) for p in result_prefix] \
 998                 + clean_path_components[strip_count:]
 999             # Now set the graft point name to match the end of new_prefix.
1000             graft_point = len(result_prefix)
1001             result[graft_point] = \
1002                 (new_prefix_parts[-1], clean_path_components[strip_count][1])
1003             if new_prefix == '/': # --graft ...=/ is a special case.
1004                 return result[1:]
1005             return result
1006     return path_components(clean_path)
1007
1008 Sha1 = hashlib.sha1