lib/bup/helpers.py

   1 """Helper functions and classes for bup."""
   2
   3 from collections import namedtuple
   4 from ctypes import sizeof, c_void_p
   5 from os import environ
   6 from contextlib import contextmanager
   7 import sys, os, pwd, subprocess, errno, socket, select, mmap, stat, re, struct
   8 import hashlib, heapq, math, operator, time, grp, tempfile
   9
  10 from bup import _helpers
  11
  12 sc_page_size = os.sysconf('SC_PAGE_SIZE')
  13 assert(sc_page_size > 0)
  14
  15 sc_arg_max = os.sysconf('SC_ARG_MAX')
  16 if sc_arg_max == -1:  # "no definite limit" - let's choose 2M
  17     sc_arg_max = 2 * 1024 * 1024
  18
  19 # This function should really be in helpers, not in bup.options.  But we
  20 # want options.py to be standalone so people can include it in other projects.
  21 from bup.options import _tty_width
  22 tty_width = _tty_width
  23
  24
  25 def atoi(s):
  26     """Convert the string 's' to an integer. Return 0 if s is not a number."""
  27     try:
  28         return int(s or '0')
  29     except ValueError:
  30         return 0
  31
  32
  33 def atof(s):
  34     """Convert the string 's' to a float. Return 0 if s is not a number."""
  35     try:
  36         return float(s or '0')
  37     except ValueError:
  38         return 0
  39
  40
  41 buglvl = atoi(os.environ.get('BUP_DEBUG', 0))
  42
  43
  44 # If the platform doesn't have fdatasync (OS X), fall back to fsync.
  45 try:
  46     fdatasync = os.fdatasync
  47 except AttributeError:
  48     fdatasync = os.fsync
  49
  50
  51 # Write (blockingly) to sockets that may or may not be in blocking mode.
  52 # We need this because our stderr is sometimes eaten by subprocesses
  53 # (probably ssh) that sometimes make it nonblocking, if only temporarily,
  54 # leading to race conditions.  Ick.  We'll do it the hard way.
  55 def _hard_write(fd, buf):
  56     while buf:
  57         (r,w,x) = select.select([], [fd], [], None)
  58         if not w:
  59             raise IOError('select(fd) returned without being writable')
  60         try:
  61             sz = os.write(fd, buf)
  62         except OSError as e:
  63             if e.errno != errno.EAGAIN:
  64                 raise
  65         assert(sz >= 0)
  66         buf = buf[sz:]
  67
  68
  69 _last_prog = 0
  70 def log(s):
  71     """Print a log message to stderr."""
  72     global _last_prog
  73     sys.stdout.flush()
  74     _hard_write(sys.stderr.fileno(), s)
  75     _last_prog = 0
  76
  77
  78 def debug1(s):
  79     if buglvl >= 1:
  80         log(s)
  81
  82
  83 def debug2(s):
  84     if buglvl >= 2:
  85         log(s)
  86
  87
  88 istty1 = os.isatty(1) or (atoi(os.environ.get('BUP_FORCE_TTY')) & 1)
  89 istty2 = os.isatty(2) or (atoi(os.environ.get('BUP_FORCE_TTY')) & 2)
  90 _last_progress = ''
  91 def progress(s):
  92     """Calls log() if stderr is a TTY.  Does nothing otherwise."""
  93     global _last_progress
  94     if istty2:
  95         log(s)
  96         _last_progress = s
  97
  98
  99 def qprogress(s):
 100     """Calls progress() only if we haven't printed progress in a while.
 101
 102     This avoids overloading the stderr buffer with excess junk.
 103     """
 104     global _last_prog
 105     now = time.time()
 106     if now - _last_prog > 0.1:
 107         progress(s)
 108         _last_prog = now
 109
 110
 111 def reprogress():
 112     """Calls progress() to redisplay the most recent progress message.
 113
 114     Useful after you've printed some other message that wipes out the
 115     progress line.
 116     """
 117     if _last_progress and _last_progress.endswith('\r'):
 118         progress(_last_progress)
 119
 120
 121 def mkdirp(d, mode=None):
 122     """Recursively create directories on path 'd'.
 123
 124     Unlike os.makedirs(), it doesn't raise an exception if the last element of
 125     the path already exists.
 126     """
 127     try:
 128         if mode:
 129             os.makedirs(d, mode)
 130         else:
 131             os.makedirs(d)
 132     except OSError as e:
 133         if e.errno == errno.EEXIST:
 134             pass
 135         else:
 136             raise
 137
 138
 139 _unspecified_next_default = object()
 140
 141 def _fallback_next(it, default=_unspecified_next_default):
 142     """Retrieve the next item from the iterator by calling its
 143     next() method. If default is given, it is returned if the
 144     iterator is exhausted, otherwise StopIteration is raised."""
 145
 146     if default is _unspecified_next_default:
 147         return it.next()
 148     else:
 149         try:
 150             return it.next()
 151         except StopIteration:
 152             return default
 153
 154 if sys.version_info < (2, 6):
 155     next =  _fallback_next
 156
 157
 158 def merge_iter(iters, pfreq, pfunc, pfinal, key=None):
 159     if key:
 160         samekey = lambda e, pe: getattr(e, key) == getattr(pe, key, None)
 161     else:
 162         samekey = operator.eq
 163     count = 0
 164     total = sum(len(it) for it in iters)
 165     iters = (iter(it) for it in iters)
 166     heap = ((next(it, None),it) for it in iters)
 167     heap = [(e,it) for e,it in heap if e]
 168
 169     heapq.heapify(heap)
 170     pe = None
 171     while heap:
 172         if not count % pfreq:
 173             pfunc(count, total)
 174         e, it = heap[0]
 175         if not samekey(e, pe):
 176             pe = e
 177             yield e
 178         count += 1
 179         try:
 180             e = it.next() # Don't use next() function, it's too expensive
 181         except StopIteration:
 182             heapq.heappop(heap) # remove current
 183         else:
 184             heapq.heapreplace(heap, (e, it)) # shift current to new location
 185     pfinal(count, total)
 186
 187
 188 def unlink(f):
 189     """Delete a file at path 'f' if it currently exists.
 190
 191     Unlike os.unlink(), does not throw an exception if the file didn't already
 192     exist.
 193     """
 194     try:
 195         os.unlink(f)
 196     except OSError as e:
 197         if e.errno != errno.ENOENT:
 198             raise
 199
 200
 201 def readpipe(argv, preexec_fn=None):
 202     """Run a subprocess and return its output."""
 203     p = subprocess.Popen(argv, stdout=subprocess.PIPE, preexec_fn=preexec_fn)
 204     out, err = p.communicate()
 205     if p.returncode != 0:
 206         raise Exception('subprocess %r failed with status %d'
 207                         % (' '.join(argv), p.returncode))
 208     return out
 209
 210
 211 def _argmax_base(command):
 212     base_size = 2048
 213     for c in command:
 214         base_size += len(command) + 1
 215     for k, v in environ.iteritems():
 216         base_size += len(k) + len(v) + 2 + sizeof(c_void_p)
 217     return base_size
 218
 219
 220 def _argmax_args_size(args):
 221     return sum(len(x) + 1 + sizeof(c_void_p) for x in args)
 222
 223
 224 def batchpipe(command, args, preexec_fn=None, arg_max=sc_arg_max):
 225     """If args is not empty, yield the output produced by calling the
 226 command list with args as a sequence of strings (It may be necessary
 227 to return multiple strings in order to respect ARG_MAX)."""
 228     # The optional arg_max arg is a workaround for an issue with the
 229     # current wvtest behavior.
 230     base_size = _argmax_base(command)
 231     while args:
 232         room = arg_max - base_size
 233         i = 0
 234         while i < len(args):
 235             next_size = _argmax_args_size(args[i:i+1])
 236             if room - next_size < 0:
 237                 break
 238             room -= next_size
 239             i += 1
 240         sub_args = args[:i]
 241         args = args[i:]
 242         assert(len(sub_args))
 243         yield readpipe(command + sub_args, preexec_fn=preexec_fn)
 244
 245
 246 def realpath(p):
 247     """Get the absolute path of a file.
 248
 249     Behaves like os.path.realpath, but doesn't follow a symlink for the last
 250     element. (ie. if 'p' itself is a symlink, this one won't follow it, but it
 251     will follow symlinks in p's directory)
 252     """
 253     try:
 254         st = os.lstat(p)
 255     except OSError:
 256         st = None
 257     if st and stat.S_ISLNK(st.st_mode):
 258         (dir, name) = os.path.split(p)
 259         dir = os.path.realpath(dir)
 260         out = os.path.join(dir, name)
 261     else:
 262         out = os.path.realpath(p)
 263     #log('realpathing:%r,%r\n' % (p, out))
 264     return out
 265
 266
 267 def detect_fakeroot():
 268     "Return True if we appear to be running under fakeroot."
 269     return os.getenv("FAKEROOTKEY") != None
 270
 271
 272 def is_superuser():
 273     if sys.platform.startswith('cygwin'):
 274         import ctypes
 275         return ctypes.cdll.shell32.IsUserAnAdmin()
 276     else:
 277         return os.geteuid() == 0
 278
 279
 280 def _cache_key_value(get_value, key, cache):
 281     """Return (value, was_cached).  If there is a value in the cache
 282     for key, use that, otherwise, call get_value(key) which should
 283     throw a KeyError if there is no value -- in which case the cached
 284     and returned value will be None.
 285     """
 286     try: # Do we already have it (or know there wasn't one)?
 287         value = cache[key]
 288         return value, True
 289     except KeyError:
 290         pass
 291     value = None
 292     try:
 293         cache[key] = value = get_value(key)
 294     except KeyError:
 295         cache[key] = None
 296     return value, False
 297
 298
 299 _uid_to_pwd_cache = {}
 300 _name_to_pwd_cache = {}
 301
 302 def pwd_from_uid(uid):
 303     """Return password database entry for uid (may be a cached value).
 304     Return None if no entry is found.
 305     """
 306     global _uid_to_pwd_cache, _name_to_pwd_cache
 307     entry, cached = _cache_key_value(pwd.getpwuid, uid, _uid_to_pwd_cache)
 308     if entry and not cached:
 309         _name_to_pwd_cache[entry.pw_name] = entry
 310     return entry
 311
 312
 313 def pwd_from_name(name):
 314     """Return password database entry for name (may be a cached value).
 315     Return None if no entry is found.
 316     """
 317     global _uid_to_pwd_cache, _name_to_pwd_cache
 318     entry, cached = _cache_key_value(pwd.getpwnam, name, _name_to_pwd_cache)
 319     if entry and not cached:
 320         _uid_to_pwd_cache[entry.pw_uid] = entry
 321     return entry
 322
 323
 324 _gid_to_grp_cache = {}
 325 _name_to_grp_cache = {}
 326
 327 def grp_from_gid(gid):
 328     """Return password database entry for gid (may be a cached value).
 329     Return None if no entry is found.
 330     """
 331     global _gid_to_grp_cache, _name_to_grp_cache
 332     entry, cached = _cache_key_value(grp.getgrgid, gid, _gid_to_grp_cache)
 333     if entry and not cached:
 334         _name_to_grp_cache[entry.gr_name] = entry
 335     return entry
 336
 337
 338 def grp_from_name(name):
 339     """Return password database entry for name (may be a cached value).
 340     Return None if no entry is found.
 341     """
 342     global _gid_to_grp_cache, _name_to_grp_cache
 343     entry, cached = _cache_key_value(grp.getgrnam, name, _name_to_grp_cache)
 344     if entry and not cached:
 345         _gid_to_grp_cache[entry.gr_gid] = entry
 346     return entry
 347
 348
 349 _username = None
 350 def username():
 351     """Get the user's login name."""
 352     global _username
 353     if not _username:
 354         uid = os.getuid()
 355         _username = pwd_from_uid(uid)[0] or 'user%d' % uid
 356     return _username
 357
 358
 359 _userfullname = None
 360 def userfullname():
 361     """Get the user's full name."""
 362     global _userfullname
 363     if not _userfullname:
 364         uid = os.getuid()
 365         entry = pwd_from_uid(uid)
 366         if entry:
 367             _userfullname = entry[4].split(',')[0] or entry[0]
 368         if not _userfullname:
 369             _userfullname = 'user%d' % uid
 370     return _userfullname
 371
 372
 373 _hostname = None
 374 def hostname():
 375     """Get the FQDN of this machine."""
 376     global _hostname
 377     if not _hostname:
 378         _hostname = socket.getfqdn()
 379     return _hostname
 380
 381
 382 _resource_path = None
 383 def resource_path(subdir=''):
 384     global _resource_path
 385     if not _resource_path:
 386         _resource_path = os.environ.get('BUP_RESOURCE_PATH') or '.'
 387     return os.path.join(_resource_path, subdir)
 388
 389 def format_filesize(size):
 390     unit = 1024.0
 391     size = float(size)
 392     if size < unit:
 393         return "%d" % (size)
 394     exponent = int(math.log(size) / math.log(unit))
 395     size_prefix = "KMGTPE"[exponent - 1]
 396     return "%.1f%s" % (size / math.pow(unit, exponent), size_prefix)
 397
 398
 399 class NotOk(Exception):
 400     pass
 401
 402
 403 class BaseConn:
 404     def __init__(self, outp):
 405         self.outp = outp
 406
 407     def close(self):
 408         while self._read(65536): pass
 409
 410     def read(self, size):
 411         """Read 'size' bytes from input stream."""
 412         self.outp.flush()
 413         return self._read(size)
 414
 415     def readline(self):
 416         """Read from input stream until a newline is found."""
 417         self.outp.flush()
 418         return self._readline()
 419
 420     def write(self, data):
 421         """Write 'data' to output stream."""
 422         #log('%d writing: %d bytes\n' % (os.getpid(), len(data)))
 423         self.outp.write(data)
 424
 425     def has_input(self):
 426         """Return true if input stream is readable."""
 427         raise NotImplemented("Subclasses must implement has_input")
 428
 429     def ok(self):
 430         """Indicate end of output from last sent command."""
 431         self.write('\nok\n')
 432
 433     def error(self, s):
 434         """Indicate server error to the client."""
 435         s = re.sub(r'\s+', ' ', str(s))
 436         self.write('\nerror %s\n' % s)
 437
 438     def _check_ok(self, onempty):
 439         self.outp.flush()
 440         rl = ''
 441         for rl in linereader(self):
 442             #log('%d got line: %r\n' % (os.getpid(), rl))
 443             if not rl:  # empty line
 444                 continue
 445             elif rl == 'ok':
 446                 return None
 447             elif rl.startswith('error '):
 448                 #log('client: error: %s\n' % rl[6:])
 449                 return NotOk(rl[6:])
 450             else:
 451                 onempty(rl)
 452         raise Exception('server exited unexpectedly; see errors above')
 453
 454     def drain_and_check_ok(self):
 455         """Remove all data for the current command from input stream."""
 456         def onempty(rl):
 457             pass
 458         return self._check_ok(onempty)
 459
 460     def check_ok(self):
 461         """Verify that server action completed successfully."""
 462         def onempty(rl):
 463             raise Exception('expected "ok", got %r' % rl)
 464         return self._check_ok(onempty)
 465
 466
 467 class Conn(BaseConn):
 468     def __init__(self, inp, outp):
 469         BaseConn.__init__(self, outp)
 470         self.inp = inp
 471
 472     def _read(self, size):
 473         return self.inp.read(size)
 474
 475     def _readline(self):
 476         return self.inp.readline()
 477
 478     def has_input(self):
 479         [rl, wl, xl] = select.select([self.inp.fileno()], [], [], 0)
 480         if rl:
 481             assert(rl[0] == self.inp.fileno())
 482             return True
 483         else:
 484             return None
 485
 486
 487 def checked_reader(fd, n):
 488     while n > 0:
 489         rl, _, _ = select.select([fd], [], [])
 490         assert(rl[0] == fd)
 491         buf = os.read(fd, n)
 492         if not buf: raise Exception("Unexpected EOF reading %d more bytes" % n)
 493         yield buf
 494         n -= len(buf)
 495
 496
 497 MAX_PACKET = 128 * 1024
 498 def mux(p, outfd, outr, errr):
 499     try:
 500         fds = [outr, errr]
 501         while p.poll() is None:
 502             rl, _, _ = select.select(fds, [], [])
 503             for fd in rl:
 504                 if fd == outr:
 505                     buf = os.read(outr, MAX_PACKET)
 506                     if not buf: break
 507                     os.write(outfd, struct.pack('!IB', len(buf), 1) + buf)
 508                 elif fd == errr:
 509                     buf = os.read(errr, 1024)
 510                     if not buf: break
 511                     os.write(outfd, struct.pack('!IB', len(buf), 2) + buf)
 512     finally:
 513         os.write(outfd, struct.pack('!IB', 0, 3))
 514
 515
 516 class DemuxConn(BaseConn):
 517     """A helper class for bup's client-server protocol."""
 518     def __init__(self, infd, outp):
 519         BaseConn.__init__(self, outp)
 520         # Anything that comes through before the sync string was not
 521         # multiplexed and can be assumed to be debug/log before mux init.
 522         tail = ''
 523         while tail != 'BUPMUX':
 524             b = os.read(infd, (len(tail) < 6) and (6-len(tail)) or 1)
 525             if not b:
 526                 raise IOError('demux: unexpected EOF during initialization')
 527             tail += b
 528             sys.stderr.write(tail[:-6])  # pre-mux log messages
 529             tail = tail[-6:]
 530         self.infd = infd
 531         self.reader = None
 532         self.buf = None
 533         self.closed = False
 534
 535     def write(self, data):
 536         self._load_buf(0)
 537         BaseConn.write(self, data)
 538
 539     def _next_packet(self, timeout):
 540         if self.closed: return False
 541         rl, wl, xl = select.select([self.infd], [], [], timeout)
 542         if not rl: return False
 543         assert(rl[0] == self.infd)
 544         ns = ''.join(checked_reader(self.infd, 5))
 545         n, fdw = struct.unpack('!IB', ns)
 546         assert(n <= MAX_PACKET)
 547         if fdw == 1:
 548             self.reader = checked_reader(self.infd, n)
 549         elif fdw == 2:
 550             for buf in checked_reader(self.infd, n):
 551                 sys.stderr.write(buf)
 552         elif fdw == 3:
 553             self.closed = True
 554             debug2("DemuxConn: marked closed\n")
 555         return True
 556
 557     def _load_buf(self, timeout):
 558         if self.buf is not None:
 559             return True
 560         while not self.closed:
 561             while not self.reader:
 562                 if not self._next_packet(timeout):
 563                     return False
 564             try:
 565                 self.buf = self.reader.next()
 566                 return True
 567             except StopIteration:
 568                 self.reader = None
 569         return False
 570
 571     def _read_parts(self, ix_fn):
 572         while self._load_buf(None):
 573             assert(self.buf is not None)
 574             i = ix_fn(self.buf)
 575             if i is None or i == len(self.buf):
 576                 yv = self.buf
 577                 self.buf = None
 578             else:
 579                 yv = self.buf[:i]
 580                 self.buf = self.buf[i:]
 581             yield yv
 582             if i is not None:
 583                 break
 584
 585     def _readline(self):
 586         def find_eol(buf):
 587             try:
 588                 return buf.index('\n')+1
 589             except ValueError:
 590                 return None
 591         return ''.join(self._read_parts(find_eol))
 592
 593     def _read(self, size):
 594         csize = [size]
 595         def until_size(buf): # Closes on csize
 596             if len(buf) < csize[0]:
 597                 csize[0] -= len(buf)
 598                 return None
 599             else:
 600                 return csize[0]
 601         return ''.join(self._read_parts(until_size))
 602
 603     def has_input(self):
 604         return self._load_buf(0)
 605
 606
 607 def linereader(f):
 608     """Generate a list of input lines from 'f' without terminating newlines."""
 609     while 1:
 610         line = f.readline()
 611         if not line:
 612             break
 613         yield line[:-1]
 614
 615
 616 def chunkyreader(f, count = None):
 617     """Generate a list of chunks of data read from 'f'.
 618
 619     If count is None, read until EOF is reached.
 620
 621     If count is a positive integer, read 'count' bytes from 'f'. If EOF is
 622     reached while reading, raise IOError.
 623     """
 624     if count != None:
 625         while count > 0:
 626             b = f.read(min(count, 65536))
 627             if not b:
 628                 raise IOError('EOF with %d bytes remaining' % count)
 629             yield b
 630             count -= len(b)
 631     else:
 632         while 1:
 633             b = f.read(65536)
 634             if not b: break
 635             yield b
 636
 637
 638 @contextmanager
 639 def atomically_replaced_file(name, mode='w', buffering=-1):
 640     """Yield a file that will be atomically renamed name when leaving the block.
 641
 642     This contextmanager yields an open file object that is backed by a
 643     temporary file which will be renamed (atomically) to the target
 644     name if everything succeeds.
 645
 646     The mode and buffering arguments are handled exactly as with open,
 647     and the yielded file will have very restrictive permissions, as
 648     per mkstemp.
 649
 650     E.g.::
 651
 652         with atomically_replaced_file('foo.txt', 'w') as f:
 653             f.write('hello jack.')
 654
 655     """
 656
 657     (ffd, tempname) = tempfile.mkstemp(dir=os.path.dirname(name),
 658                                        text=('b' not in mode))
 659     try:
 660         try:
 661             f = os.fdopen(ffd, mode, buffering)
 662         except:
 663             os.close(ffd)
 664             raise
 665         try:
 666             yield f
 667         finally:
 668             f.close()
 669         os.rename(tempname, name)
 670     finally:
 671         unlink(tempname)  # nonexistant file is ignored
 672
 673
 674 def slashappend(s):
 675     """Append "/" to 's' if it doesn't aleady end in "/"."""
 676     if s and not s.endswith('/'):
 677         return s + '/'
 678     else:
 679         return s
 680
 681
 682 def _mmap_do(f, sz, flags, prot, close):
 683     if not sz:
 684         st = os.fstat(f.fileno())
 685         sz = st.st_size
 686     if not sz:
 687         # trying to open a zero-length map gives an error, but an empty
 688         # string has all the same behaviour of a zero-length map, ie. it has
 689         # no elements :)
 690         return ''
 691     map = mmap.mmap(f.fileno(), sz, flags, prot)
 692     if close:
 693         f.close()  # map will persist beyond file close
 694     return map
 695
 696
 697 def mmap_read(f, sz = 0, close=True):
 698     """Create a read-only memory mapped region on file 'f'.
 699     If sz is 0, the region will cover the entire file.
 700     """
 701     return _mmap_do(f, sz, mmap.MAP_PRIVATE, mmap.PROT_READ, close)
 702
 703
 704 def mmap_readwrite(f, sz = 0, close=True):
 705     """Create a read-write memory mapped region on file 'f'.
 706     If sz is 0, the region will cover the entire file.
 707     """
 708     return _mmap_do(f, sz, mmap.MAP_SHARED, mmap.PROT_READ|mmap.PROT_WRITE,
 709                     close)
 710
 711
 712 def mmap_readwrite_private(f, sz = 0, close=True):
 713     """Create a read-write memory mapped region on file 'f'.
 714     If sz is 0, the region will cover the entire file.
 715     The map is private, which means the changes are never flushed back to the
 716     file.
 717     """
 718     return _mmap_do(f, sz, mmap.MAP_PRIVATE, mmap.PROT_READ|mmap.PROT_WRITE,
 719                     close)
 720
 721
 722 _mincore = getattr(_helpers, 'mincore', None)
 723 if _mincore:
 724     # ./configure ensures that we're on Linux if MINCORE_INCORE isn't defined.
 725     MINCORE_INCORE = getattr(_helpers, 'MINCORE_INCORE', 1)
 726
 727     _fmincore_chunk_size = None
 728     def _set_fmincore_chunk_size():
 729         global _fmincore_chunk_size
 730         pref_chunk_size = 64 * 1024 * 1024
 731         chunk_size = sc_page_size
 732         if (sc_page_size < pref_chunk_size):
 733             chunk_size = sc_page_size * (pref_chunk_size / sc_page_size)
 734         _fmincore_chunk_size = chunk_size
 735
 736     def fmincore(fd):
 737         """Return the mincore() data for fd as a bytearray whose values can be
 738         tested via MINCORE_INCORE, or None if fd does not fully
 739         support the operation."""
 740         st = os.fstat(fd)
 741         if (st.st_size == 0):
 742             return bytearray(0)
 743         if not _fmincore_chunk_size:
 744             _set_fmincore_chunk_size()
 745         pages_per_chunk = _fmincore_chunk_size / sc_page_size;
 746         page_count = (st.st_size + sc_page_size - 1) / sc_page_size;
 747         chunk_count = page_count / _fmincore_chunk_size
 748         if chunk_count < 1:
 749             chunk_count = 1
 750         result = bytearray(page_count)
 751         for ci in xrange(chunk_count):
 752             pos = _fmincore_chunk_size * ci;
 753             msize = min(_fmincore_chunk_size, st.st_size - pos)
 754             try:
 755                 m = mmap.mmap(fd, msize, mmap.MAP_PRIVATE, 0, 0, pos)
 756             except mmap.error as ex:
 757                 if ex.errno == errno.EINVAL or ex.errno == errno.ENODEV:
 758                     # Perhaps the file was a pipe, i.e. "... | bup split ..."
 759                     return None
 760                 raise ex
 761             _mincore(m, msize, 0, result, ci * pages_per_chunk);
 762         return result
 763
 764
 765 def parse_timestamp(epoch_str):
 766     """Return the number of nanoseconds since the epoch that are described
 767 by epoch_str (100ms, 100ns, ...); when epoch_str cannot be parsed,
 768 throw a ValueError that may contain additional information."""
 769     ns_per = {'s' :  1000000000,
 770               'ms' : 1000000,
 771               'us' : 1000,
 772               'ns' : 1}
 773     match = re.match(r'^((?:[-+]?[0-9]+)?)(s|ms|us|ns)$', epoch_str)
 774     if not match:
 775         if re.match(r'^([-+]?[0-9]+)$', epoch_str):
 776             raise ValueError('must include units, i.e. 100ns, 100ms, ...')
 777         raise ValueError()
 778     (n, units) = match.group(1, 2)
 779     if not n:
 780         n = 1
 781     n = int(n)
 782     return n * ns_per[units]
 783
 784
 785 def parse_num(s):
 786     """Parse data size information into a float number.
 787
 788     Here are some examples of conversions:
 789         199.2k means 203981 bytes
 790         1GB means 1073741824 bytes
 791         2.1 tb means 2199023255552 bytes
 792     """
 793     g = re.match(r'([-+\d.e]+)\s*(\w*)', str(s))
 794     if not g:
 795         raise ValueError("can't parse %r as a number" % s)
 796     (val, unit) = g.groups()
 797     num = float(val)
 798     unit = unit.lower()
 799     if unit in ['t', 'tb']:
 800         mult = 1024*1024*1024*1024
 801     elif unit in ['g', 'gb']:
 802         mult = 1024*1024*1024
 803     elif unit in ['m', 'mb']:
 804         mult = 1024*1024
 805     elif unit in ['k', 'kb']:
 806         mult = 1024
 807     elif unit in ['', 'b']:
 808         mult = 1
 809     else:
 810         raise ValueError("invalid unit %r in number %r" % (unit, s))
 811     return int(num*mult)
 812
 813
 814 def count(l):
 815     """Count the number of elements in an iterator. (consumes the iterator)"""
 816     return reduce(lambda x,y: x+1, l)
 817
 818
 819 saved_errors = []
 820 def add_error(e):
 821     """Append an error message to the list of saved errors.
 822
 823     Once processing is able to stop and output the errors, the saved errors are
 824     accessible in the module variable helpers.saved_errors.
 825     """
 826     saved_errors.append(e)
 827     log('%-70s\n' % e)
 828
 829
 830 def clear_errors():
 831     global saved_errors
 832     saved_errors = []
 833
 834
 835 def handle_ctrl_c():
 836     """Replace the default exception handler for KeyboardInterrupt (Ctrl-C).
 837
 838     The new exception handler will make sure that bup will exit without an ugly
 839     stacktrace when Ctrl-C is hit.
 840     """
 841     oldhook = sys.excepthook
 842     def newhook(exctype, value, traceback):
 843         if exctype == KeyboardInterrupt:
 844             log('\nInterrupted.\n')
 845         else:
 846             return oldhook(exctype, value, traceback)
 847     sys.excepthook = newhook
 848
 849
 850 def columnate(l, prefix):
 851     """Format elements of 'l' in columns with 'prefix' leading each line.
 852
 853     The number of columns is determined automatically based on the string
 854     lengths.
 855     """
 856     if not l:
 857         return ""
 858     l = l[:]
 859     clen = max(len(s) for s in l)
 860     ncols = (tty_width() - len(prefix)) / (clen + 2)
 861     if ncols <= 1:
 862         ncols = 1
 863         clen = 0
 864     cols = []
 865     while len(l) % ncols:
 866         l.append('')
 867     rows = len(l)/ncols
 868     for s in range(0, len(l), rows):
 869         cols.append(l[s:s+rows])
 870     out = ''
 871     for row in zip(*cols):
 872         out += prefix + ''.join(('%-*s' % (clen+2, s)) for s in row) + '\n'
 873     return out
 874
 875
 876 def parse_date_or_fatal(str, fatal):
 877     """Parses the given date or calls Option.fatal().
 878     For now we expect a string that contains a float."""
 879     try:
 880         date = float(str)
 881     except ValueError as e:
 882         raise fatal('invalid date format (should be a float): %r' % e)
 883     else:
 884         return date
 885
 886
 887 def parse_excludes(options, fatal):
 888     """Traverse the options and extract all excludes, or call Option.fatal()."""
 889     excluded_paths = []
 890
 891     for flag in options:
 892         (option, parameter) = flag
 893         if option == '--exclude':
 894             excluded_paths.append(realpath(parameter))
 895         elif option == '--exclude-from':
 896             try:
 897                 f = open(realpath(parameter))
 898             except IOError as e:
 899                 raise fatal("couldn't read %s" % parameter)
 900             for exclude_path in f.readlines():
 901                 # FIXME: perhaps this should be rstrip('\n')
 902                 exclude_path = realpath(exclude_path.strip())
 903                 if exclude_path:
 904                     excluded_paths.append(exclude_path)
 905     return sorted(frozenset(excluded_paths))
 906
 907
 908 def parse_rx_excludes(options, fatal):
 909     """Traverse the options and extract all rx excludes, or call
 910     Option.fatal()."""
 911     excluded_patterns = []
 912
 913     for flag in options:
 914         (option, parameter) = flag
 915         if option == '--exclude-rx':
 916             try:
 917                 excluded_patterns.append(re.compile(parameter))
 918             except re.error as ex:
 919                 fatal('invalid --exclude-rx pattern (%s): %s' % (parameter, ex))
 920         elif option == '--exclude-rx-from':
 921             try:
 922                 f = open(realpath(parameter))
 923             except IOError as e:
 924                 raise fatal("couldn't read %s" % parameter)
 925             for pattern in f.readlines():
 926                 spattern = pattern.rstrip('\n')
 927                 if not spattern:
 928                     continue
 929                 try:
 930                     excluded_patterns.append(re.compile(spattern))
 931                 except re.error as ex:
 932                     fatal('invalid --exclude-rx pattern (%s): %s' % (spattern, ex))
 933     return excluded_patterns
 934
 935
 936 def should_rx_exclude_path(path, exclude_rxs):
 937     """Return True if path matches a regular expression in exclude_rxs."""
 938     for rx in exclude_rxs:
 939         if rx.search(path):
 940             debug1('Skipping %r: excluded by rx pattern %r.\n'
 941                    % (path, rx.pattern))
 942             return True
 943     return False
 944
 945
 946 # FIXME: Carefully consider the use of functions (os.path.*, etc.)
 947 # that resolve against the current filesystem in the strip/graft
 948 # functions for example, but elsewhere as well.  I suspect bup's not
 949 # always being careful about that.  For some cases, the contents of
 950 # the current filesystem should be irrelevant, and consulting it might
 951 # produce the wrong result, perhaps via unintended symlink resolution,
 952 # for example.
 953
 954 def path_components(path):
 955     """Break path into a list of pairs of the form (name,
 956     full_path_to_name).  Path must start with '/'.
 957     Example:
 958       '/home/foo' -> [('', '/'), ('home', '/home'), ('foo', '/home/foo')]"""
 959     if not path.startswith('/'):
 960         raise Exception, 'path must start with "/": %s' % path
 961     # Since we assume path startswith('/'), we can skip the first element.
 962     result = [('', '/')]
 963     norm_path = os.path.abspath(path)
 964     if norm_path == '/':
 965         return result
 966     full_path = ''
 967     for p in norm_path.split('/')[1:]:
 968         full_path += '/' + p
 969         result.append((p, full_path))
 970     return result
 971
 972
 973 def stripped_path_components(path, strip_prefixes):
 974     """Strip any prefix in strip_prefixes from path and return a list
 975     of path components where each component is (name,
 976     none_or_full_fs_path_to_name).  Assume path startswith('/').
 977     See thelpers.py for examples."""
 978     normalized_path = os.path.abspath(path)
 979     sorted_strip_prefixes = sorted(strip_prefixes, key=len, reverse=True)
 980     for bp in sorted_strip_prefixes:
 981         normalized_bp = os.path.abspath(bp)
 982         if normalized_bp == '/':
 983             continue
 984         if normalized_path.startswith(normalized_bp):
 985             prefix = normalized_path[:len(normalized_bp)]
 986             result = []
 987             for p in normalized_path[len(normalized_bp):].split('/'):
 988                 if p: # not root
 989                     prefix += '/'
 990                 prefix += p
 991                 result.append((p, prefix))
 992             return result
 993     # Nothing to strip.
 994     return path_components(path)
 995
 996
 997 def grafted_path_components(graft_points, path):
 998     # Create a result that consists of some number of faked graft
 999     # directories before the graft point, followed by all of the real
1000     # directories from path that are after the graft point.  Arrange
1001     # for the directory at the graft point in the result to correspond
1002     # to the "orig" directory in --graft orig=new.  See t/thelpers.py
1003     # for some examples.
1004
1005     # Note that given --graft orig=new, orig and new have *nothing* to
1006     # do with each other, even if some of their component names
1007     # match. i.e. --graft /foo/bar/baz=/foo/bar/bax is semantically
1008     # equivalent to --graft /foo/bar/baz=/x/y/z, or even
1009     # /foo/bar/baz=/x.
1010
1011     # FIXME: This can't be the best solution...
1012     clean_path = os.path.abspath(path)
1013     for graft_point in graft_points:
1014         old_prefix, new_prefix = graft_point
1015         # Expand prefixes iff not absolute paths.
1016         old_prefix = os.path.normpath(old_prefix)
1017         new_prefix = os.path.normpath(new_prefix)
1018         if clean_path.startswith(old_prefix):
1019             escaped_prefix = re.escape(old_prefix)
1020             grafted_path = re.sub(r'^' + escaped_prefix, new_prefix, clean_path)
1021             # Handle /foo=/ (at least) -- which produces //whatever.
1022             grafted_path = '/' + grafted_path.lstrip('/')
1023             clean_path_components = path_components(clean_path)
1024             # Count the components that were stripped.
1025             strip_count = 0 if old_prefix == '/' else old_prefix.count('/')
1026             new_prefix_parts = new_prefix.split('/')
1027             result_prefix = grafted_path.split('/')[:new_prefix.count('/')]
1028             result = [(p, None) for p in result_prefix] \
1029                 + clean_path_components[strip_count:]
1030             # Now set the graft point name to match the end of new_prefix.
1031             graft_point = len(result_prefix)
1032             result[graft_point] = \
1033                 (new_prefix_parts[-1], clean_path_components[strip_count][1])
1034             if new_prefix == '/': # --graft ...=/ is a special case.
1035                 return result[1:]
1036             return result
1037     return path_components(clean_path)
1038
1039
1040 Sha1 = hashlib.sha1
1041
1042
1043 _localtime = getattr(_helpers, 'localtime', None)
1044
1045 if _localtime:
1046     bup_time = namedtuple('bup_time', ['tm_year', 'tm_mon', 'tm_mday',
1047                                        'tm_hour', 'tm_min', 'tm_sec',
1048                                        'tm_wday', 'tm_yday',
1049                                        'tm_isdst', 'tm_gmtoff', 'tm_zone'])
1050
1051 # Define a localtime() that returns bup_time when possible.  Note:
1052 # this means that any helpers.localtime() results may need to be
1053 # passed through to_py_time() before being passed to python's time
1054 # module, which doesn't appear willing to ignore the extra items.
1055 if _localtime:
1056     def localtime(time):
1057         return bup_time(*_helpers.localtime(time))
1058     def utc_offset_str(t):
1059         """Return the local offset from UTC as "+hhmm" or "-hhmm" for time t.
1060         If the current UTC offset does not represent an integer number
1061         of minutes, the fractional component will be truncated."""
1062         off = localtime(t).tm_gmtoff
1063         # Note: // doesn't truncate like C for negative values, it rounds down.
1064         offmin = abs(off) // 60
1065         m = offmin % 60
1066         h = (offmin - m) // 60
1067         return "%+03d%02d" % (-h if off < 0 else h, m)
1068     def to_py_time(x):
1069         if isinstance(x, time.struct_time):
1070             return x
1071         return time.struct_time(x[:9])
1072 else:
1073     localtime = time.localtime
1074     def utc_offset_str(t):
1075         return time.strftime('%z', localtime(t))
1076     def to_py_time(x):
1077         return x