lib/bup/helpers.py

   1 """Helper functions and classes for bup."""
   2
   3 from collections import namedtuple
   4 from ctypes import sizeof, c_void_p
   5 from os import environ
   6 from contextlib import contextmanager
   7 import sys, os, pwd, subprocess, errno, socket, select, mmap, stat, re, struct
   8 import hashlib, heapq, math, operator, time, grp, tempfile
   9
  10 from bup import _helpers
  11
  12 sc_page_size = os.sysconf('SC_PAGE_SIZE')
  13 assert(sc_page_size > 0)
  14
  15 sc_arg_max = os.sysconf('SC_ARG_MAX')
  16 if sc_arg_max == -1:  # "no definite limit" - let's choose 2M
  17     sc_arg_max = 2 * 1024 * 1024
  18
  19 # This function should really be in helpers, not in bup.options.  But we
  20 # want options.py to be standalone so people can include it in other projects.
  21 from bup.options import _tty_width
  22 tty_width = _tty_width
  23
  24
  25 def atoi(s):
  26     """Convert the string 's' to an integer. Return 0 if s is not a number."""
  27     try:
  28         return int(s or '0')
  29     except ValueError:
  30         return 0
  31
  32
  33 def atof(s):
  34     """Convert the string 's' to a float. Return 0 if s is not a number."""
  35     try:
  36         return float(s or '0')
  37     except ValueError:
  38         return 0
  39
  40
  41 buglvl = atoi(os.environ.get('BUP_DEBUG', 0))
  42
  43
  44 try:
  45     _fdatasync = os.fdatasync
  46 except AttributeError:
  47     _fdatasync = os.fsync
  48
  49 if sys.platform.startswith('darwin'):
  50     # Apparently os.fsync on OS X doesn't guarantee to sync all the way down
  51     import fcntl
  52     def fdatasync(fd):
  53         try:
  54             return fcntl.fcntl(fd, fcntl.F_FULLFSYNC)
  55         except IOError as e:
  56             # Fallback for file systems (SMB) that do not support F_FULLFSYNC
  57             if e.errno == errno.ENOTSUP:
  58                 return _fdatasync(fd)
  59             else:
  60                 raise
  61 else:
  62     fdatasync = _fdatasync
  63
  64
  65 # Write (blockingly) to sockets that may or may not be in blocking mode.
  66 # We need this because our stderr is sometimes eaten by subprocesses
  67 # (probably ssh) that sometimes make it nonblocking, if only temporarily,
  68 # leading to race conditions.  Ick.  We'll do it the hard way.
  69 def _hard_write(fd, buf):
  70     while buf:
  71         (r,w,x) = select.select([], [fd], [], None)
  72         if not w:
  73             raise IOError('select(fd) returned without being writable')
  74         try:
  75             sz = os.write(fd, buf)
  76         except OSError as e:
  77             if e.errno != errno.EAGAIN:
  78                 raise
  79         assert(sz >= 0)
  80         buf = buf[sz:]
  81
  82
  83 _last_prog = 0
  84 def log(s):
  85     """Print a log message to stderr."""
  86     global _last_prog
  87     sys.stdout.flush()
  88     _hard_write(sys.stderr.fileno(), s)
  89     _last_prog = 0
  90
  91
  92 def debug1(s):
  93     if buglvl >= 1:
  94         log(s)
  95
  96
  97 def debug2(s):
  98     if buglvl >= 2:
  99         log(s)
 100
 101
 102 istty1 = os.isatty(1) or (atoi(os.environ.get('BUP_FORCE_TTY')) & 1)
 103 istty2 = os.isatty(2) or (atoi(os.environ.get('BUP_FORCE_TTY')) & 2)
 104 _last_progress = ''
 105 def progress(s):
 106     """Calls log() if stderr is a TTY.  Does nothing otherwise."""
 107     global _last_progress
 108     if istty2:
 109         log(s)
 110         _last_progress = s
 111
 112
 113 def qprogress(s):
 114     """Calls progress() only if we haven't printed progress in a while.
 115
 116     This avoids overloading the stderr buffer with excess junk.
 117     """
 118     global _last_prog
 119     now = time.time()
 120     if now - _last_prog > 0.1:
 121         progress(s)
 122         _last_prog = now
 123
 124
 125 def reprogress():
 126     """Calls progress() to redisplay the most recent progress message.
 127
 128     Useful after you've printed some other message that wipes out the
 129     progress line.
 130     """
 131     if _last_progress and _last_progress.endswith('\r'):
 132         progress(_last_progress)
 133
 134
 135 def mkdirp(d, mode=None):
 136     """Recursively create directories on path 'd'.
 137
 138     Unlike os.makedirs(), it doesn't raise an exception if the last element of
 139     the path already exists.
 140     """
 141     try:
 142         if mode:
 143             os.makedirs(d, mode)
 144         else:
 145             os.makedirs(d)
 146     except OSError as e:
 147         if e.errno == errno.EEXIST:
 148             pass
 149         else:
 150             raise
 151
 152
 153 _unspecified_next_default = object()
 154
 155 def _fallback_next(it, default=_unspecified_next_default):
 156     """Retrieve the next item from the iterator by calling its
 157     next() method. If default is given, it is returned if the
 158     iterator is exhausted, otherwise StopIteration is raised."""
 159
 160     if default is _unspecified_next_default:
 161         return it.next()
 162     else:
 163         try:
 164             return it.next()
 165         except StopIteration:
 166             return default
 167
 168 if sys.version_info < (2, 6):
 169     next =  _fallback_next
 170
 171
 172 def merge_iter(iters, pfreq, pfunc, pfinal, key=None):
 173     if key:
 174         samekey = lambda e, pe: getattr(e, key) == getattr(pe, key, None)
 175     else:
 176         samekey = operator.eq
 177     count = 0
 178     total = sum(len(it) for it in iters)
 179     iters = (iter(it) for it in iters)
 180     heap = ((next(it, None),it) for it in iters)
 181     heap = [(e,it) for e,it in heap if e]
 182
 183     heapq.heapify(heap)
 184     pe = None
 185     while heap:
 186         if not count % pfreq:
 187             pfunc(count, total)
 188         e, it = heap[0]
 189         if not samekey(e, pe):
 190             pe = e
 191             yield e
 192         count += 1
 193         try:
 194             e = it.next() # Don't use next() function, it's too expensive
 195         except StopIteration:
 196             heapq.heappop(heap) # remove current
 197         else:
 198             heapq.heapreplace(heap, (e, it)) # shift current to new location
 199     pfinal(count, total)
 200
 201
 202 def unlink(f):
 203     """Delete a file at path 'f' if it currently exists.
 204
 205     Unlike os.unlink(), does not throw an exception if the file didn't already
 206     exist.
 207     """
 208     try:
 209         os.unlink(f)
 210     except OSError as e:
 211         if e.errno != errno.ENOENT:
 212             raise
 213
 214
 215 def readpipe(argv, preexec_fn=None, shell=False):
 216     """Run a subprocess and return its output."""
 217     p = subprocess.Popen(argv, stdout=subprocess.PIPE, preexec_fn=preexec_fn,
 218                          shell=shell)
 219     out, err = p.communicate()
 220     if p.returncode != 0:
 221         raise Exception('subprocess %r failed with status %d'
 222                         % (' '.join(argv), p.returncode))
 223     return out
 224
 225
 226 def _argmax_base(command):
 227     base_size = 2048
 228     for c in command:
 229         base_size += len(command) + 1
 230     for k, v in environ.iteritems():
 231         base_size += len(k) + len(v) + 2 + sizeof(c_void_p)
 232     return base_size
 233
 234
 235 def _argmax_args_size(args):
 236     return sum(len(x) + 1 + sizeof(c_void_p) for x in args)
 237
 238
 239 def batchpipe(command, args, preexec_fn=None, arg_max=sc_arg_max):
 240     """If args is not empty, yield the output produced by calling the
 241 command list with args as a sequence of strings (It may be necessary
 242 to return multiple strings in order to respect ARG_MAX)."""
 243     # The optional arg_max arg is a workaround for an issue with the
 244     # current wvtest behavior.
 245     base_size = _argmax_base(command)
 246     while args:
 247         room = arg_max - base_size
 248         i = 0
 249         while i < len(args):
 250             next_size = _argmax_args_size(args[i:i+1])
 251             if room - next_size < 0:
 252                 break
 253             room -= next_size
 254             i += 1
 255         sub_args = args[:i]
 256         args = args[i:]
 257         assert(len(sub_args))
 258         yield readpipe(command + sub_args, preexec_fn=preexec_fn)
 259
 260
 261 def resolve_parent(p):
 262     """Return the absolute path of a file without following any final symlink.
 263
 264     Behaves like os.path.realpath, but doesn't follow a symlink for the last
 265     element. (ie. if 'p' itself is a symlink, this one won't follow it, but it
 266     will follow symlinks in p's directory)
 267     """
 268     try:
 269         st = os.lstat(p)
 270     except OSError:
 271         st = None
 272     if st and stat.S_ISLNK(st.st_mode):
 273         (dir, name) = os.path.split(p)
 274         dir = os.path.realpath(dir)
 275         out = os.path.join(dir, name)
 276     else:
 277         out = os.path.realpath(p)
 278     #log('realpathing:%r,%r\n' % (p, out))
 279     return out
 280
 281
 282 def detect_fakeroot():
 283     "Return True if we appear to be running under fakeroot."
 284     return os.getenv("FAKEROOTKEY") != None
 285
 286
 287 _warned_about_superuser_detection = None
 288 def is_superuser():
 289     if sys.platform.startswith('cygwin'):
 290         if sys.getwindowsversion()[0] > 5:
 291             # Sounds like situation is much more complicated here
 292             global _warned_about_superuser_detection
 293             if not _warned_about_superuser_detection:
 294                 log("can't detect root status for OS version > 5; assuming not root")
 295                 _warned_about_superuser_detection = True
 296             return False
 297         import ctypes
 298         return ctypes.cdll.shell32.IsUserAnAdmin()
 299     else:
 300         return os.geteuid() == 0
 301
 302
 303 def _cache_key_value(get_value, key, cache):
 304     """Return (value, was_cached).  If there is a value in the cache
 305     for key, use that, otherwise, call get_value(key) which should
 306     throw a KeyError if there is no value -- in which case the cached
 307     and returned value will be None.
 308     """
 309     try: # Do we already have it (or know there wasn't one)?
 310         value = cache[key]
 311         return value, True
 312     except KeyError:
 313         pass
 314     value = None
 315     try:
 316         cache[key] = value = get_value(key)
 317     except KeyError:
 318         cache[key] = None
 319     return value, False
 320
 321
 322 _uid_to_pwd_cache = {}
 323 _name_to_pwd_cache = {}
 324
 325 def pwd_from_uid(uid):
 326     """Return password database entry for uid (may be a cached value).
 327     Return None if no entry is found.
 328     """
 329     global _uid_to_pwd_cache, _name_to_pwd_cache
 330     entry, cached = _cache_key_value(pwd.getpwuid, uid, _uid_to_pwd_cache)
 331     if entry and not cached:
 332         _name_to_pwd_cache[entry.pw_name] = entry
 333     return entry
 334
 335
 336 def pwd_from_name(name):
 337     """Return password database entry for name (may be a cached value).
 338     Return None if no entry is found.
 339     """
 340     global _uid_to_pwd_cache, _name_to_pwd_cache
 341     entry, cached = _cache_key_value(pwd.getpwnam, name, _name_to_pwd_cache)
 342     if entry and not cached:
 343         _uid_to_pwd_cache[entry.pw_uid] = entry
 344     return entry
 345
 346
 347 _gid_to_grp_cache = {}
 348 _name_to_grp_cache = {}
 349
 350 def grp_from_gid(gid):
 351     """Return password database entry for gid (may be a cached value).
 352     Return None if no entry is found.
 353     """
 354     global _gid_to_grp_cache, _name_to_grp_cache
 355     entry, cached = _cache_key_value(grp.getgrgid, gid, _gid_to_grp_cache)
 356     if entry and not cached:
 357         _name_to_grp_cache[entry.gr_name] = entry
 358     return entry
 359
 360
 361 def grp_from_name(name):
 362     """Return password database entry for name (may be a cached value).
 363     Return None if no entry is found.
 364     """
 365     global _gid_to_grp_cache, _name_to_grp_cache
 366     entry, cached = _cache_key_value(grp.getgrnam, name, _name_to_grp_cache)
 367     if entry and not cached:
 368         _gid_to_grp_cache[entry.gr_gid] = entry
 369     return entry
 370
 371
 372 _username = None
 373 def username():
 374     """Get the user's login name."""
 375     global _username
 376     if not _username:
 377         uid = os.getuid()
 378         _username = pwd_from_uid(uid)[0] or 'user%d' % uid
 379     return _username
 380
 381
 382 _userfullname = None
 383 def userfullname():
 384     """Get the user's full name."""
 385     global _userfullname
 386     if not _userfullname:
 387         uid = os.getuid()
 388         entry = pwd_from_uid(uid)
 389         if entry:
 390             _userfullname = entry[4].split(',')[0] or entry[0]
 391         if not _userfullname:
 392             _userfullname = 'user%d' % uid
 393     return _userfullname
 394
 395
 396 _hostname = None
 397 def hostname():
 398     """Get the FQDN of this machine."""
 399     global _hostname
 400     if not _hostname:
 401         _hostname = socket.getfqdn()
 402     return _hostname
 403
 404
 405 _resource_path = None
 406 def resource_path(subdir=''):
 407     global _resource_path
 408     if not _resource_path:
 409         _resource_path = os.environ.get('BUP_RESOURCE_PATH') or '.'
 410     return os.path.join(_resource_path, subdir)
 411
 412 def format_filesize(size):
 413     unit = 1024.0
 414     size = float(size)
 415     if size < unit:
 416         return "%d" % (size)
 417     exponent = int(math.log(size) / math.log(unit))
 418     size_prefix = "KMGTPE"[exponent - 1]
 419     return "%.1f%s" % (size / math.pow(unit, exponent), size_prefix)
 420
 421
 422 class NotOk(Exception):
 423     pass
 424
 425
 426 class BaseConn:
 427     def __init__(self, outp):
 428         self.outp = outp
 429
 430     def close(self):
 431         while self._read(65536): pass
 432
 433     def read(self, size):
 434         """Read 'size' bytes from input stream."""
 435         self.outp.flush()
 436         return self._read(size)
 437
 438     def readline(self):
 439         """Read from input stream until a newline is found."""
 440         self.outp.flush()
 441         return self._readline()
 442
 443     def write(self, data):
 444         """Write 'data' to output stream."""
 445         #log('%d writing: %d bytes\n' % (os.getpid(), len(data)))
 446         self.outp.write(data)
 447
 448     def has_input(self):
 449         """Return true if input stream is readable."""
 450         raise NotImplemented("Subclasses must implement has_input")
 451
 452     def ok(self):
 453         """Indicate end of output from last sent command."""
 454         self.write('\nok\n')
 455
 456     def error(self, s):
 457         """Indicate server error to the client."""
 458         s = re.sub(r'\s+', ' ', str(s))
 459         self.write('\nerror %s\n' % s)
 460
 461     def _check_ok(self, onempty):
 462         self.outp.flush()
 463         rl = ''
 464         for rl in linereader(self):
 465             #log('%d got line: %r\n' % (os.getpid(), rl))
 466             if not rl:  # empty line
 467                 continue
 468             elif rl == 'ok':
 469                 return None
 470             elif rl.startswith('error '):
 471                 #log('client: error: %s\n' % rl[6:])
 472                 return NotOk(rl[6:])
 473             else:
 474                 onempty(rl)
 475         raise Exception('server exited unexpectedly; see errors above')
 476
 477     def drain_and_check_ok(self):
 478         """Remove all data for the current command from input stream."""
 479         def onempty(rl):
 480             pass
 481         return self._check_ok(onempty)
 482
 483     def check_ok(self):
 484         """Verify that server action completed successfully."""
 485         def onempty(rl):
 486             raise Exception('expected "ok", got %r' % rl)
 487         return self._check_ok(onempty)
 488
 489
 490 class Conn(BaseConn):
 491     def __init__(self, inp, outp):
 492         BaseConn.__init__(self, outp)
 493         self.inp = inp
 494
 495     def _read(self, size):
 496         return self.inp.read(size)
 497
 498     def _readline(self):
 499         return self.inp.readline()
 500
 501     def has_input(self):
 502         [rl, wl, xl] = select.select([self.inp.fileno()], [], [], 0)
 503         if rl:
 504             assert(rl[0] == self.inp.fileno())
 505             return True
 506         else:
 507             return None
 508
 509
 510 def checked_reader(fd, n):
 511     while n > 0:
 512         rl, _, _ = select.select([fd], [], [])
 513         assert(rl[0] == fd)
 514         buf = os.read(fd, n)
 515         if not buf: raise Exception("Unexpected EOF reading %d more bytes" % n)
 516         yield buf
 517         n -= len(buf)
 518
 519
 520 MAX_PACKET = 128 * 1024
 521 def mux(p, outfd, outr, errr):
 522     try:
 523         fds = [outr, errr]
 524         while p.poll() is None:
 525             rl, _, _ = select.select(fds, [], [])
 526             for fd in rl:
 527                 if fd == outr:
 528                     buf = os.read(outr, MAX_PACKET)
 529                     if not buf: break
 530                     os.write(outfd, struct.pack('!IB', len(buf), 1) + buf)
 531                 elif fd == errr:
 532                     buf = os.read(errr, 1024)
 533                     if not buf: break
 534                     os.write(outfd, struct.pack('!IB', len(buf), 2) + buf)
 535     finally:
 536         os.write(outfd, struct.pack('!IB', 0, 3))
 537
 538
 539 class DemuxConn(BaseConn):
 540     """A helper class for bup's client-server protocol."""
 541     def __init__(self, infd, outp):
 542         BaseConn.__init__(self, outp)
 543         # Anything that comes through before the sync string was not
 544         # multiplexed and can be assumed to be debug/log before mux init.
 545         tail = ''
 546         while tail != 'BUPMUX':
 547             b = os.read(infd, (len(tail) < 6) and (6-len(tail)) or 1)
 548             if not b:
 549                 raise IOError('demux: unexpected EOF during initialization')
 550             tail += b
 551             sys.stderr.write(tail[:-6])  # pre-mux log messages
 552             tail = tail[-6:]
 553         self.infd = infd
 554         self.reader = None
 555         self.buf = None
 556         self.closed = False
 557
 558     def write(self, data):
 559         self._load_buf(0)
 560         BaseConn.write(self, data)
 561
 562     def _next_packet(self, timeout):
 563         if self.closed: return False
 564         rl, wl, xl = select.select([self.infd], [], [], timeout)
 565         if not rl: return False
 566         assert(rl[0] == self.infd)
 567         ns = ''.join(checked_reader(self.infd, 5))
 568         n, fdw = struct.unpack('!IB', ns)
 569         assert(n <= MAX_PACKET)
 570         if fdw == 1:
 571             self.reader = checked_reader(self.infd, n)
 572         elif fdw == 2:
 573             for buf in checked_reader(self.infd, n):
 574                 sys.stderr.write(buf)
 575         elif fdw == 3:
 576             self.closed = True
 577             debug2("DemuxConn: marked closed\n")
 578         return True
 579
 580     def _load_buf(self, timeout):
 581         if self.buf is not None:
 582             return True
 583         while not self.closed:
 584             while not self.reader:
 585                 if not self._next_packet(timeout):
 586                     return False
 587             try:
 588                 self.buf = self.reader.next()
 589                 return True
 590             except StopIteration:
 591                 self.reader = None
 592         return False
 593
 594     def _read_parts(self, ix_fn):
 595         while self._load_buf(None):
 596             assert(self.buf is not None)
 597             i = ix_fn(self.buf)
 598             if i is None or i == len(self.buf):
 599                 yv = self.buf
 600                 self.buf = None
 601             else:
 602                 yv = self.buf[:i]
 603                 self.buf = self.buf[i:]
 604             yield yv
 605             if i is not None:
 606                 break
 607
 608     def _readline(self):
 609         def find_eol(buf):
 610             try:
 611                 return buf.index('\n')+1
 612             except ValueError:
 613                 return None
 614         return ''.join(self._read_parts(find_eol))
 615
 616     def _read(self, size):
 617         csize = [size]
 618         def until_size(buf): # Closes on csize
 619             if len(buf) < csize[0]:
 620                 csize[0] -= len(buf)
 621                 return None
 622             else:
 623                 return csize[0]
 624         return ''.join(self._read_parts(until_size))
 625
 626     def has_input(self):
 627         return self._load_buf(0)
 628
 629
 630 def linereader(f):
 631     """Generate a list of input lines from 'f' without terminating newlines."""
 632     while 1:
 633         line = f.readline()
 634         if not line:
 635             break
 636         yield line[:-1]
 637
 638
 639 def chunkyreader(f, count = None):
 640     """Generate a list of chunks of data read from 'f'.
 641
 642     If count is None, read until EOF is reached.
 643
 644     If count is a positive integer, read 'count' bytes from 'f'. If EOF is
 645     reached while reading, raise IOError.
 646     """
 647     if count != None:
 648         while count > 0:
 649             b = f.read(min(count, 65536))
 650             if not b:
 651                 raise IOError('EOF with %d bytes remaining' % count)
 652             yield b
 653             count -= len(b)
 654     else:
 655         while 1:
 656             b = f.read(65536)
 657             if not b: break
 658             yield b
 659
 660
 661 @contextmanager
 662 def atomically_replaced_file(name, mode='w', buffering=-1):
 663     """Yield a file that will be atomically renamed name when leaving the block.
 664
 665     This contextmanager yields an open file object that is backed by a
 666     temporary file which will be renamed (atomically) to the target
 667     name if everything succeeds.
 668
 669     The mode and buffering arguments are handled exactly as with open,
 670     and the yielded file will have very restrictive permissions, as
 671     per mkstemp.
 672
 673     E.g.::
 674
 675         with atomically_replaced_file('foo.txt', 'w') as f:
 676             f.write('hello jack.')
 677
 678     """
 679
 680     (ffd, tempname) = tempfile.mkstemp(dir=os.path.dirname(name),
 681                                        text=('b' not in mode))
 682     try:
 683         try:
 684             f = os.fdopen(ffd, mode, buffering)
 685         except:
 686             os.close(ffd)
 687             raise
 688         try:
 689             yield f
 690         finally:
 691             f.close()
 692         os.rename(tempname, name)
 693     finally:
 694         unlink(tempname)  # nonexistant file is ignored
 695
 696
 697 def slashappend(s):
 698     """Append "/" to 's' if it doesn't aleady end in "/"."""
 699     if s and not s.endswith('/'):
 700         return s + '/'
 701     else:
 702         return s
 703
 704
 705 def _mmap_do(f, sz, flags, prot, close):
 706     if not sz:
 707         st = os.fstat(f.fileno())
 708         sz = st.st_size
 709     if not sz:
 710         # trying to open a zero-length map gives an error, but an empty
 711         # string has all the same behaviour of a zero-length map, ie. it has
 712         # no elements :)
 713         return ''
 714     map = mmap.mmap(f.fileno(), sz, flags, prot)
 715     if close:
 716         f.close()  # map will persist beyond file close
 717     return map
 718
 719
 720 def mmap_read(f, sz = 0, close=True):
 721     """Create a read-only memory mapped region on file 'f'.
 722     If sz is 0, the region will cover the entire file.
 723     """
 724     return _mmap_do(f, sz, mmap.MAP_PRIVATE, mmap.PROT_READ, close)
 725
 726
 727 def mmap_readwrite(f, sz = 0, close=True):
 728     """Create a read-write memory mapped region on file 'f'.
 729     If sz is 0, the region will cover the entire file.
 730     """
 731     return _mmap_do(f, sz, mmap.MAP_SHARED, mmap.PROT_READ|mmap.PROT_WRITE,
 732                     close)
 733
 734
 735 def mmap_readwrite_private(f, sz = 0, close=True):
 736     """Create a read-write memory mapped region on file 'f'.
 737     If sz is 0, the region will cover the entire file.
 738     The map is private, which means the changes are never flushed back to the
 739     file.
 740     """
 741     return _mmap_do(f, sz, mmap.MAP_PRIVATE, mmap.PROT_READ|mmap.PROT_WRITE,
 742                     close)
 743
 744
 745 _mincore = getattr(_helpers, 'mincore', None)
 746 if _mincore:
 747     # ./configure ensures that we're on Linux if MINCORE_INCORE isn't defined.
 748     MINCORE_INCORE = getattr(_helpers, 'MINCORE_INCORE', 1)
 749
 750     _fmincore_chunk_size = None
 751     def _set_fmincore_chunk_size():
 752         global _fmincore_chunk_size
 753         pref_chunk_size = 64 * 1024 * 1024
 754         chunk_size = sc_page_size
 755         if (sc_page_size < pref_chunk_size):
 756             chunk_size = sc_page_size * (pref_chunk_size / sc_page_size)
 757         _fmincore_chunk_size = chunk_size
 758
 759     def fmincore(fd):
 760         """Return the mincore() data for fd as a bytearray whose values can be
 761         tested via MINCORE_INCORE, or None if fd does not fully
 762         support the operation."""
 763         st = os.fstat(fd)
 764         if (st.st_size == 0):
 765             return bytearray(0)
 766         if not _fmincore_chunk_size:
 767             _set_fmincore_chunk_size()
 768         pages_per_chunk = _fmincore_chunk_size / sc_page_size;
 769         page_count = (st.st_size + sc_page_size - 1) / sc_page_size;
 770         chunk_count = page_count / _fmincore_chunk_size
 771         if chunk_count < 1:
 772             chunk_count = 1
 773         result = bytearray(page_count)
 774         for ci in xrange(chunk_count):
 775             pos = _fmincore_chunk_size * ci;
 776             msize = min(_fmincore_chunk_size, st.st_size - pos)
 777             try:
 778                 m = mmap.mmap(fd, msize, mmap.MAP_PRIVATE, 0, 0, pos)
 779             except mmap.error as ex:
 780                 if ex.errno == errno.EINVAL or ex.errno == errno.ENODEV:
 781                     # Perhaps the file was a pipe, i.e. "... | bup split ..."
 782                     return None
 783                 raise ex
 784             _mincore(m, msize, 0, result, ci * pages_per_chunk);
 785         return result
 786
 787
 788 def parse_timestamp(epoch_str):
 789     """Return the number of nanoseconds since the epoch that are described
 790 by epoch_str (100ms, 100ns, ...); when epoch_str cannot be parsed,
 791 throw a ValueError that may contain additional information."""
 792     ns_per = {'s' :  1000000000,
 793               'ms' : 1000000,
 794               'us' : 1000,
 795               'ns' : 1}
 796     match = re.match(r'^((?:[-+]?[0-9]+)?)(s|ms|us|ns)$', epoch_str)
 797     if not match:
 798         if re.match(r'^([-+]?[0-9]+)$', epoch_str):
 799             raise ValueError('must include units, i.e. 100ns, 100ms, ...')
 800         raise ValueError()
 801     (n, units) = match.group(1, 2)
 802     if not n:
 803         n = 1
 804     n = int(n)
 805     return n * ns_per[units]
 806
 807
 808 def parse_num(s):
 809     """Parse data size information into a float number.
 810
 811     Here are some examples of conversions:
 812         199.2k means 203981 bytes
 813         1GB means 1073741824 bytes
 814         2.1 tb means 2199023255552 bytes
 815     """
 816     g = re.match(r'([-+\d.e]+)\s*(\w*)', str(s))
 817     if not g:
 818         raise ValueError("can't parse %r as a number" % s)
 819     (val, unit) = g.groups()
 820     num = float(val)
 821     unit = unit.lower()
 822     if unit in ['t', 'tb']:
 823         mult = 1024*1024*1024*1024
 824     elif unit in ['g', 'gb']:
 825         mult = 1024*1024*1024
 826     elif unit in ['m', 'mb']:
 827         mult = 1024*1024
 828     elif unit in ['k', 'kb']:
 829         mult = 1024
 830     elif unit in ['', 'b']:
 831         mult = 1
 832     else:
 833         raise ValueError("invalid unit %r in number %r" % (unit, s))
 834     return int(num*mult)
 835
 836
 837 def count(l):
 838     """Count the number of elements in an iterator. (consumes the iterator)"""
 839     return reduce(lambda x,y: x+1, l)
 840
 841
 842 saved_errors = []
 843 def add_error(e):
 844     """Append an error message to the list of saved errors.
 845
 846     Once processing is able to stop and output the errors, the saved errors are
 847     accessible in the module variable helpers.saved_errors.
 848     """
 849     saved_errors.append(e)
 850     log('%-70s\n' % e)
 851
 852
 853 def clear_errors():
 854     global saved_errors
 855     saved_errors = []
 856
 857
 858 def die_if_errors(msg=None, status=1):
 859     global saved_errors
 860     if saved_errors:
 861         if not msg:
 862             msg = 'warning: %d errors encountered\n' % len(saved_errors)
 863         log(msg)
 864         sys.exit(status)
 865
 866
 867 def handle_ctrl_c():
 868     """Replace the default exception handler for KeyboardInterrupt (Ctrl-C).
 869
 870     The new exception handler will make sure that bup will exit without an ugly
 871     stacktrace when Ctrl-C is hit.
 872     """
 873     oldhook = sys.excepthook
 874     def newhook(exctype, value, traceback):
 875         if exctype == KeyboardInterrupt:
 876             log('\nInterrupted.\n')
 877         else:
 878             return oldhook(exctype, value, traceback)
 879     sys.excepthook = newhook
 880
 881
 882 def columnate(l, prefix):
 883     """Format elements of 'l' in columns with 'prefix' leading each line.
 884
 885     The number of columns is determined automatically based on the string
 886     lengths.
 887     """
 888     if not l:
 889         return ""
 890     l = l[:]
 891     clen = max(len(s) for s in l)
 892     ncols = (tty_width() - len(prefix)) / (clen + 2)
 893     if ncols <= 1:
 894         ncols = 1
 895         clen = 0
 896     cols = []
 897     while len(l) % ncols:
 898         l.append('')
 899     rows = len(l)/ncols
 900     for s in range(0, len(l), rows):
 901         cols.append(l[s:s+rows])
 902     out = ''
 903     for row in zip(*cols):
 904         out += prefix + ''.join(('%-*s' % (clen+2, s)) for s in row) + '\n'
 905     return out
 906
 907
 908 def parse_date_or_fatal(str, fatal):
 909     """Parses the given date or calls Option.fatal().
 910     For now we expect a string that contains a float."""
 911     try:
 912         date = float(str)
 913     except ValueError as e:
 914         raise fatal('invalid date format (should be a float): %r' % e)
 915     else:
 916         return date
 917
 918
 919 def parse_excludes(options, fatal):
 920     """Traverse the options and extract all excludes, or call Option.fatal()."""
 921     excluded_paths = []
 922
 923     for flag in options:
 924         (option, parameter) = flag
 925         if option == '--exclude':
 926             excluded_paths.append(resolve_parent(parameter))
 927         elif option == '--exclude-from':
 928             try:
 929                 f = open(resolve_parent(parameter))
 930             except IOError as e:
 931                 raise fatal("couldn't read %s" % parameter)
 932             for exclude_path in f.readlines():
 933                 # FIXME: perhaps this should be rstrip('\n')
 934                 exclude_path = resolve_parent(exclude_path.strip())
 935                 if exclude_path:
 936                     excluded_paths.append(exclude_path)
 937     return sorted(frozenset(excluded_paths))
 938
 939
 940 def parse_rx_excludes(options, fatal):
 941     """Traverse the options and extract all rx excludes, or call
 942     Option.fatal()."""
 943     excluded_patterns = []
 944
 945     for flag in options:
 946         (option, parameter) = flag
 947         if option == '--exclude-rx':
 948             try:
 949                 excluded_patterns.append(re.compile(parameter))
 950             except re.error as ex:
 951                 fatal('invalid --exclude-rx pattern (%s): %s' % (parameter, ex))
 952         elif option == '--exclude-rx-from':
 953             try:
 954                 f = open(resolve_parent(parameter))
 955             except IOError as e:
 956                 raise fatal("couldn't read %s" % parameter)
 957             for pattern in f.readlines():
 958                 spattern = pattern.rstrip('\n')
 959                 if not spattern:
 960                     continue
 961                 try:
 962                     excluded_patterns.append(re.compile(spattern))
 963                 except re.error as ex:
 964                     fatal('invalid --exclude-rx pattern (%s): %s' % (spattern, ex))
 965     return excluded_patterns
 966
 967
 968 def should_rx_exclude_path(path, exclude_rxs):
 969     """Return True if path matches a regular expression in exclude_rxs."""
 970     for rx in exclude_rxs:
 971         if rx.search(path):
 972             debug1('Skipping %r: excluded by rx pattern %r.\n'
 973                    % (path, rx.pattern))
 974             return True
 975     return False
 976
 977
 978 # FIXME: Carefully consider the use of functions (os.path.*, etc.)
 979 # that resolve against the current filesystem in the strip/graft
 980 # functions for example, but elsewhere as well.  I suspect bup's not
 981 # always being careful about that.  For some cases, the contents of
 982 # the current filesystem should be irrelevant, and consulting it might
 983 # produce the wrong result, perhaps via unintended symlink resolution,
 984 # for example.
 985
 986 def path_components(path):
 987     """Break path into a list of pairs of the form (name,
 988     full_path_to_name).  Path must start with '/'.
 989     Example:
 990       '/home/foo' -> [('', '/'), ('home', '/home'), ('foo', '/home/foo')]"""
 991     if not path.startswith('/'):
 992         raise Exception, 'path must start with "/": %s' % path
 993     # Since we assume path startswith('/'), we can skip the first element.
 994     result = [('', '/')]
 995     norm_path = os.path.abspath(path)
 996     if norm_path == '/':
 997         return result
 998     full_path = ''
 999     for p in norm_path.split('/')[1:]:
1000         full_path += '/' + p
1001         result.append((p, full_path))
1002     return result
1003
1004
1005 def stripped_path_components(path, strip_prefixes):
1006     """Strip any prefix in strip_prefixes from path and return a list
1007     of path components where each component is (name,
1008     none_or_full_fs_path_to_name).  Assume path startswith('/').
1009     See thelpers.py for examples."""
1010     normalized_path = os.path.abspath(path)
1011     sorted_strip_prefixes = sorted(strip_prefixes, key=len, reverse=True)
1012     for bp in sorted_strip_prefixes:
1013         normalized_bp = os.path.abspath(bp)
1014         if normalized_bp == '/':
1015             continue
1016         if normalized_path.startswith(normalized_bp):
1017             prefix = normalized_path[:len(normalized_bp)]
1018             result = []
1019             for p in normalized_path[len(normalized_bp):].split('/'):
1020                 if p: # not root
1021                     prefix += '/'
1022                 prefix += p
1023                 result.append((p, prefix))
1024             return result
1025     # Nothing to strip.
1026     return path_components(path)
1027
1028
1029 def grafted_path_components(graft_points, path):
1030     # Create a result that consists of some number of faked graft
1031     # directories before the graft point, followed by all of the real
1032     # directories from path that are after the graft point.  Arrange
1033     # for the directory at the graft point in the result to correspond
1034     # to the "orig" directory in --graft orig=new.  See t/thelpers.py
1035     # for some examples.
1036
1037     # Note that given --graft orig=new, orig and new have *nothing* to
1038     # do with each other, even if some of their component names
1039     # match. i.e. --graft /foo/bar/baz=/foo/bar/bax is semantically
1040     # equivalent to --graft /foo/bar/baz=/x/y/z, or even
1041     # /foo/bar/baz=/x.
1042
1043     # FIXME: This can't be the best solution...
1044     clean_path = os.path.abspath(path)
1045     for graft_point in graft_points:
1046         old_prefix, new_prefix = graft_point
1047         # Expand prefixes iff not absolute paths.
1048         old_prefix = os.path.normpath(old_prefix)
1049         new_prefix = os.path.normpath(new_prefix)
1050         if clean_path.startswith(old_prefix):
1051             escaped_prefix = re.escape(old_prefix)
1052             grafted_path = re.sub(r'^' + escaped_prefix, new_prefix, clean_path)
1053             # Handle /foo=/ (at least) -- which produces //whatever.
1054             grafted_path = '/' + grafted_path.lstrip('/')
1055             clean_path_components = path_components(clean_path)
1056             # Count the components that were stripped.
1057             strip_count = 0 if old_prefix == '/' else old_prefix.count('/')
1058             new_prefix_parts = new_prefix.split('/')
1059             result_prefix = grafted_path.split('/')[:new_prefix.count('/')]
1060             result = [(p, None) for p in result_prefix] \
1061                 + clean_path_components[strip_count:]
1062             # Now set the graft point name to match the end of new_prefix.
1063             graft_point = len(result_prefix)
1064             result[graft_point] = \
1065                 (new_prefix_parts[-1], clean_path_components[strip_count][1])
1066             if new_prefix == '/': # --graft ...=/ is a special case.
1067                 return result[1:]
1068             return result
1069     return path_components(clean_path)
1070
1071
1072 Sha1 = hashlib.sha1
1073
1074
1075 _localtime = getattr(_helpers, 'localtime', None)
1076
1077 if _localtime:
1078     bup_time = namedtuple('bup_time', ['tm_year', 'tm_mon', 'tm_mday',
1079                                        'tm_hour', 'tm_min', 'tm_sec',
1080                                        'tm_wday', 'tm_yday',
1081                                        'tm_isdst', 'tm_gmtoff', 'tm_zone'])
1082
1083 # Define a localtime() that returns bup_time when possible.  Note:
1084 # this means that any helpers.localtime() results may need to be
1085 # passed through to_py_time() before being passed to python's time
1086 # module, which doesn't appear willing to ignore the extra items.
1087 if _localtime:
1088     def localtime(time):
1089         return bup_time(*_helpers.localtime(time))
1090     def utc_offset_str(t):
1091         """Return the local offset from UTC as "+hhmm" or "-hhmm" for time t.
1092         If the current UTC offset does not represent an integer number
1093         of minutes, the fractional component will be truncated."""
1094         off = localtime(t).tm_gmtoff
1095         # Note: // doesn't truncate like C for negative values, it rounds down.
1096         offmin = abs(off) // 60
1097         m = offmin % 60
1098         h = (offmin - m) // 60
1099         return "%+03d%02d" % (-h if off < 0 else h, m)
1100     def to_py_time(x):
1101         if isinstance(x, time.struct_time):
1102             return x
1103         return time.struct_time(x[:9])
1104 else:
1105     localtime = time.localtime
1106     def utc_offset_str(t):
1107         return time.strftime('%z', localtime(t))
1108     def to_py_time(x):
1109         return x
1110
1111
1112 _some_invalid_save_parts_rx = re.compile(r'[[ ~^:?*\\]|\.\.|//|@{')
1113
1114 def valid_save_name(name):
1115     # Enforce a superset of the restrictions in git-check-ref-format(1)
1116     if name == '@' \
1117        or name.startswith('/') or name.endswith('/') \
1118        or name.endswith('.'):
1119         return False
1120     if _some_invalid_save_parts_rx.search(name):
1121         return False
1122     for c in name:
1123         if ord(c) < 0x20 or ord(c) == 0x7f:
1124             return False
1125     for part in name.split('/'):
1126         if part.startswith('.') or part.endswith('.lock'):
1127             return False
1128     return True