lib/bup/helpers.py

   1 """Helper functions and classes for bup."""
   2
   3 from collections import namedtuple
   4 from contextlib import contextmanager
   5 from ctypes import sizeof, c_void_p
   6 from os import environ
   7 from pipes import quote
   8 from subprocess import PIPE, Popen
   9 import sys, os, pwd, subprocess, errno, socket, select, mmap, stat, re, struct
  10 import hashlib, heapq, math, operator, time, grp, tempfile
  11
  12 from bup import _helpers
  13 from bup import compat
  14 # This function should really be in helpers, not in bup.options.  But we
  15 # want options.py to be standalone so people can include it in other projects.
  16 from bup.options import _tty_width as tty_width
  17
  18
  19 class Nonlocal:
  20     """Helper to deal with Python scoping issues"""
  21     pass
  22
  23
  24 sc_page_size = os.sysconf('SC_PAGE_SIZE')
  25 assert(sc_page_size > 0)
  26
  27 sc_arg_max = os.sysconf('SC_ARG_MAX')
  28 if sc_arg_max == -1:  # "no definite limit" - let's choose 2M
  29     sc_arg_max = 2 * 1024 * 1024
  30
  31
  32 def last(iterable):
  33     result = None
  34     for result in iterable:
  35         pass
  36     return result
  37
  38
  39 def atoi(s):
  40     """Convert the string 's' to an integer. Return 0 if s is not a number."""
  41     try:
  42         return int(s or '0')
  43     except ValueError:
  44         return 0
  45
  46
  47 def atof(s):
  48     """Convert the string 's' to a float. Return 0 if s is not a number."""
  49     try:
  50         return float(s or '0')
  51     except ValueError:
  52         return 0
  53
  54
  55 buglvl = atoi(os.environ.get('BUP_DEBUG', 0))
  56
  57
  58 try:
  59     _fdatasync = os.fdatasync
  60 except AttributeError:
  61     _fdatasync = os.fsync
  62
  63 if sys.platform.startswith('darwin'):
  64     # Apparently os.fsync on OS X doesn't guarantee to sync all the way down
  65     import fcntl
  66     def fdatasync(fd):
  67         try:
  68             return fcntl.fcntl(fd, fcntl.F_FULLFSYNC)
  69         except IOError as e:
  70             # Fallback for file systems (SMB) that do not support F_FULLFSYNC
  71             if e.errno == errno.ENOTSUP:
  72                 return _fdatasync(fd)
  73             else:
  74                 raise
  75 else:
  76     fdatasync = _fdatasync
  77
  78
  79 def partition(predicate, stream):
  80     """Returns (leading_matches_it, rest_it), where leading_matches_it
  81     must be completely exhausted before traversing rest_it.
  82
  83     """
  84     stream = iter(stream)
  85     ns = Nonlocal()
  86     ns.first_nonmatch = None
  87     def leading_matches():
  88         for x in stream:
  89             if predicate(x):
  90                 yield x
  91             else:
  92                 ns.first_nonmatch = (x,)
  93                 break
  94     def rest():
  95         if ns.first_nonmatch:
  96             yield ns.first_nonmatch[0]
  97             for x in stream:
  98                 yield x
  99     return (leading_matches(), rest())
 100
 101
 102 def lines_until_sentinel(f, sentinel, ex_type):
 103     # sentinel must end with \n and must contain only one \n
 104     while True:
 105         line = f.readline()
 106         if not (line and line.endswith('\n')):
 107             raise ex_type('Hit EOF while reading line')
 108         if line == sentinel:
 109             return
 110         yield line
 111
 112
 113 def stat_if_exists(path):
 114     try:
 115         return os.stat(path)
 116     except OSError as e:
 117         if e.errno != errno.ENOENT:
 118             raise
 119     return None
 120
 121
 122 # Write (blockingly) to sockets that may or may not be in blocking mode.
 123 # We need this because our stderr is sometimes eaten by subprocesses
 124 # (probably ssh) that sometimes make it nonblocking, if only temporarily,
 125 # leading to race conditions.  Ick.  We'll do it the hard way.
 126 def _hard_write(fd, buf):
 127     while buf:
 128         (r,w,x) = select.select([], [fd], [], None)
 129         if not w:
 130             raise IOError('select(fd) returned without being writable')
 131         try:
 132             sz = os.write(fd, buf)
 133         except OSError as e:
 134             if e.errno != errno.EAGAIN:
 135                 raise
 136         assert(sz >= 0)
 137         buf = buf[sz:]
 138
 139
 140 _last_prog = 0
 141 def log(s):
 142     """Print a log message to stderr."""
 143     global _last_prog
 144     sys.stdout.flush()
 145     _hard_write(sys.stderr.fileno(), s)
 146     _last_prog = 0
 147
 148
 149 def debug1(s):
 150     if buglvl >= 1:
 151         log(s)
 152
 153
 154 def debug2(s):
 155     if buglvl >= 2:
 156         log(s)
 157
 158
 159 istty1 = os.isatty(1) or (atoi(os.environ.get('BUP_FORCE_TTY')) & 1)
 160 istty2 = os.isatty(2) or (atoi(os.environ.get('BUP_FORCE_TTY')) & 2)
 161 _last_progress = ''
 162 def progress(s):
 163     """Calls log() if stderr is a TTY.  Does nothing otherwise."""
 164     global _last_progress
 165     if istty2:
 166         log(s)
 167         _last_progress = s
 168
 169
 170 def qprogress(s):
 171     """Calls progress() only if we haven't printed progress in a while.
 172
 173     This avoids overloading the stderr buffer with excess junk.
 174     """
 175     global _last_prog
 176     now = time.time()
 177     if now - _last_prog > 0.1:
 178         progress(s)
 179         _last_prog = now
 180
 181
 182 def reprogress():
 183     """Calls progress() to redisplay the most recent progress message.
 184
 185     Useful after you've printed some other message that wipes out the
 186     progress line.
 187     """
 188     if _last_progress and _last_progress.endswith('\r'):
 189         progress(_last_progress)
 190
 191
 192 def mkdirp(d, mode=None):
 193     """Recursively create directories on path 'd'.
 194
 195     Unlike os.makedirs(), it doesn't raise an exception if the last element of
 196     the path already exists.
 197     """
 198     try:
 199         if mode:
 200             os.makedirs(d, mode)
 201         else:
 202             os.makedirs(d)
 203     except OSError as e:
 204         if e.errno == errno.EEXIST:
 205             pass
 206         else:
 207             raise
 208
 209
 210 def merge_iter(iters, pfreq, pfunc, pfinal, key=None):
 211     if key:
 212         samekey = lambda e, pe: getattr(e, key) == getattr(pe, key, None)
 213     else:
 214         samekey = operator.eq
 215     count = 0
 216     total = sum(len(it) for it in iters)
 217     iters = (iter(it) for it in iters)
 218     heap = ((next(it, None),it) for it in iters)
 219     heap = [(e,it) for e,it in heap if e]
 220
 221     heapq.heapify(heap)
 222     pe = None
 223     while heap:
 224         if not count % pfreq:
 225             pfunc(count, total)
 226         e, it = heap[0]
 227         if not samekey(e, pe):
 228             pe = e
 229             yield e
 230         count += 1
 231         try:
 232             e = next(it)
 233         except StopIteration:
 234             heapq.heappop(heap) # remove current
 235         else:
 236             heapq.heapreplace(heap, (e, it)) # shift current to new location
 237     pfinal(count, total)
 238
 239
 240 def unlink(f):
 241     """Delete a file at path 'f' if it currently exists.
 242
 243     Unlike os.unlink(), does not throw an exception if the file didn't already
 244     exist.
 245     """
 246     try:
 247         os.unlink(f)
 248     except OSError as e:
 249         if e.errno != errno.ENOENT:
 250             raise
 251
 252
 253 def shstr(cmd):
 254     if isinstance(cmd, compat.str_type):
 255         return cmd
 256     else:
 257         return ' '.join(map(quote, cmd))
 258
 259 exc = subprocess.check_call
 260
 261 def exo(cmd,
 262         input=None,
 263         stdin=None,
 264         stderr=None,
 265         shell=False,
 266         check=True,
 267         preexec_fn=None):
 268     if input:
 269         assert stdin in (None, PIPE)
 270         stdin = PIPE
 271     p = Popen(cmd,
 272               stdin=stdin, stdout=PIPE, stderr=stderr,
 273               shell=shell,
 274               preexec_fn=preexec_fn)
 275     out, err = p.communicate(input)
 276     if check and p.returncode != 0:
 277         raise Exception('subprocess %r failed with status %d, stderr: %r'
 278                         % (' '.join(map(quote, cmd)), p.returncode, err))
 279     return out, err, p
 280
 281 def readpipe(argv, preexec_fn=None, shell=False):
 282     """Run a subprocess and return its output."""
 283     p = subprocess.Popen(argv, stdout=subprocess.PIPE, preexec_fn=preexec_fn,
 284                          shell=shell)
 285     out, err = p.communicate()
 286     if p.returncode != 0:
 287         raise Exception('subprocess %r failed with status %d'
 288                         % (' '.join(argv), p.returncode))
 289     return out
 290
 291
 292 def _argmax_base(command):
 293     base_size = 2048
 294     for c in command:
 295         base_size += len(command) + 1
 296     for k, v in environ.iteritems():
 297         base_size += len(k) + len(v) + 2 + sizeof(c_void_p)
 298     return base_size
 299
 300
 301 def _argmax_args_size(args):
 302     return sum(len(x) + 1 + sizeof(c_void_p) for x in args)
 303
 304
 305 def batchpipe(command, args, preexec_fn=None, arg_max=sc_arg_max):
 306     """If args is not empty, yield the output produced by calling the
 307 command list with args as a sequence of strings (It may be necessary
 308 to return multiple strings in order to respect ARG_MAX)."""
 309     # The optional arg_max arg is a workaround for an issue with the
 310     # current wvtest behavior.
 311     base_size = _argmax_base(command)
 312     while args:
 313         room = arg_max - base_size
 314         i = 0
 315         while i < len(args):
 316             next_size = _argmax_args_size(args[i:i+1])
 317             if room - next_size < 0:
 318                 break
 319             room -= next_size
 320             i += 1
 321         sub_args = args[:i]
 322         args = args[i:]
 323         assert(len(sub_args))
 324         yield readpipe(command + sub_args, preexec_fn=preexec_fn)
 325
 326
 327 def resolve_parent(p):
 328     """Return the absolute path of a file without following any final symlink.
 329
 330     Behaves like os.path.realpath, but doesn't follow a symlink for the last
 331     element. (ie. if 'p' itself is a symlink, this one won't follow it, but it
 332     will follow symlinks in p's directory)
 333     """
 334     try:
 335         st = os.lstat(p)
 336     except OSError:
 337         st = None
 338     if st and stat.S_ISLNK(st.st_mode):
 339         (dir, name) = os.path.split(p)
 340         dir = os.path.realpath(dir)
 341         out = os.path.join(dir, name)
 342     else:
 343         out = os.path.realpath(p)
 344     #log('realpathing:%r,%r\n' % (p, out))
 345     return out
 346
 347
 348 def detect_fakeroot():
 349     "Return True if we appear to be running under fakeroot."
 350     return os.getenv("FAKEROOTKEY") != None
 351
 352
 353 _warned_about_superuser_detection = None
 354 def is_superuser():
 355     if sys.platform.startswith('cygwin'):
 356         if sys.getwindowsversion()[0] > 5:
 357             # Sounds like situation is much more complicated here
 358             global _warned_about_superuser_detection
 359             if not _warned_about_superuser_detection:
 360                 log("can't detect root status for OS version > 5; assuming not root")
 361                 _warned_about_superuser_detection = True
 362             return False
 363         import ctypes
 364         return ctypes.cdll.shell32.IsUserAnAdmin()
 365     else:
 366         return os.geteuid() == 0
 367
 368
 369 def _cache_key_value(get_value, key, cache):
 370     """Return (value, was_cached).  If there is a value in the cache
 371     for key, use that, otherwise, call get_value(key) which should
 372     throw a KeyError if there is no value -- in which case the cached
 373     and returned value will be None.
 374     """
 375     try: # Do we already have it (or know there wasn't one)?
 376         value = cache[key]
 377         return value, True
 378     except KeyError:
 379         pass
 380     value = None
 381     try:
 382         cache[key] = value = get_value(key)
 383     except KeyError:
 384         cache[key] = None
 385     return value, False
 386
 387
 388 _uid_to_pwd_cache = {}
 389 _name_to_pwd_cache = {}
 390
 391 def pwd_from_uid(uid):
 392     """Return password database entry for uid (may be a cached value).
 393     Return None if no entry is found.
 394     """
 395     global _uid_to_pwd_cache, _name_to_pwd_cache
 396     entry, cached = _cache_key_value(pwd.getpwuid, uid, _uid_to_pwd_cache)
 397     if entry and not cached:
 398         _name_to_pwd_cache[entry.pw_name] = entry
 399     return entry
 400
 401
 402 def pwd_from_name(name):
 403     """Return password database entry for name (may be a cached value).
 404     Return None if no entry is found.
 405     """
 406     global _uid_to_pwd_cache, _name_to_pwd_cache
 407     entry, cached = _cache_key_value(pwd.getpwnam, name, _name_to_pwd_cache)
 408     if entry and not cached:
 409         _uid_to_pwd_cache[entry.pw_uid] = entry
 410     return entry
 411
 412
 413 _gid_to_grp_cache = {}
 414 _name_to_grp_cache = {}
 415
 416 def grp_from_gid(gid):
 417     """Return password database entry for gid (may be a cached value).
 418     Return None if no entry is found.
 419     """
 420     global _gid_to_grp_cache, _name_to_grp_cache
 421     entry, cached = _cache_key_value(grp.getgrgid, gid, _gid_to_grp_cache)
 422     if entry and not cached:
 423         _name_to_grp_cache[entry.gr_name] = entry
 424     return entry
 425
 426
 427 def grp_from_name(name):
 428     """Return password database entry for name (may be a cached value).
 429     Return None if no entry is found.
 430     """
 431     global _gid_to_grp_cache, _name_to_grp_cache
 432     entry, cached = _cache_key_value(grp.getgrnam, name, _name_to_grp_cache)
 433     if entry and not cached:
 434         _gid_to_grp_cache[entry.gr_gid] = entry
 435     return entry
 436
 437
 438 _username = None
 439 def username():
 440     """Get the user's login name."""
 441     global _username
 442     if not _username:
 443         uid = os.getuid()
 444         _username = pwd_from_uid(uid)[0] or 'user%d' % uid
 445     return _username
 446
 447
 448 _userfullname = None
 449 def userfullname():
 450     """Get the user's full name."""
 451     global _userfullname
 452     if not _userfullname:
 453         uid = os.getuid()
 454         entry = pwd_from_uid(uid)
 455         if entry:
 456             _userfullname = entry[4].split(',')[0] or entry[0]
 457         if not _userfullname:
 458             _userfullname = 'user%d' % uid
 459     return _userfullname
 460
 461
 462 _hostname = None
 463 def hostname():
 464     """Get the FQDN of this machine."""
 465     global _hostname
 466     if not _hostname:
 467         _hostname = socket.getfqdn()
 468     return _hostname
 469
 470
 471 _resource_path = None
 472 def resource_path(subdir=''):
 473     global _resource_path
 474     if not _resource_path:
 475         _resource_path = os.environ.get('BUP_RESOURCE_PATH') or '.'
 476     return os.path.join(_resource_path, subdir)
 477
 478 def format_filesize(size):
 479     unit = 1024.0
 480     size = float(size)
 481     if size < unit:
 482         return "%d" % (size)
 483     exponent = int(math.log(size) / math.log(unit))
 484     size_prefix = "KMGTPE"[exponent - 1]
 485     return "%.1f%s" % (size / math.pow(unit, exponent), size_prefix)
 486
 487
 488 class NotOk(Exception):
 489     pass
 490
 491
 492 class BaseConn:
 493     def __init__(self, outp):
 494         self.outp = outp
 495
 496     def close(self):
 497         while self._read(65536): pass
 498
 499     def read(self, size):
 500         """Read 'size' bytes from input stream."""
 501         self.outp.flush()
 502         return self._read(size)
 503
 504     def readline(self):
 505         """Read from input stream until a newline is found."""
 506         self.outp.flush()
 507         return self._readline()
 508
 509     def write(self, data):
 510         """Write 'data' to output stream."""
 511         #log('%d writing: %d bytes\n' % (os.getpid(), len(data)))
 512         self.outp.write(data)
 513
 514     def has_input(self):
 515         """Return true if input stream is readable."""
 516         raise NotImplemented("Subclasses must implement has_input")
 517
 518     def ok(self):
 519         """Indicate end of output from last sent command."""
 520         self.write('\nok\n')
 521
 522     def error(self, s):
 523         """Indicate server error to the client."""
 524         s = re.sub(r'\s+', ' ', str(s))
 525         self.write('\nerror %s\n' % s)
 526
 527     def _check_ok(self, onempty):
 528         self.outp.flush()
 529         rl = ''
 530         for rl in linereader(self):
 531             #log('%d got line: %r\n' % (os.getpid(), rl))
 532             if not rl:  # empty line
 533                 continue
 534             elif rl == 'ok':
 535                 return None
 536             elif rl.startswith('error '):
 537                 #log('client: error: %s\n' % rl[6:])
 538                 return NotOk(rl[6:])
 539             else:
 540                 onempty(rl)
 541         raise Exception('server exited unexpectedly; see errors above')
 542
 543     def drain_and_check_ok(self):
 544         """Remove all data for the current command from input stream."""
 545         def onempty(rl):
 546             pass
 547         return self._check_ok(onempty)
 548
 549     def check_ok(self):
 550         """Verify that server action completed successfully."""
 551         def onempty(rl):
 552             raise Exception('expected "ok", got %r' % rl)
 553         return self._check_ok(onempty)
 554
 555
 556 class Conn(BaseConn):
 557     def __init__(self, inp, outp):
 558         BaseConn.__init__(self, outp)
 559         self.inp = inp
 560
 561     def _read(self, size):
 562         return self.inp.read(size)
 563
 564     def _readline(self):
 565         return self.inp.readline()
 566
 567     def has_input(self):
 568         [rl, wl, xl] = select.select([self.inp.fileno()], [], [], 0)
 569         if rl:
 570             assert(rl[0] == self.inp.fileno())
 571             return True
 572         else:
 573             return None
 574
 575
 576 def checked_reader(fd, n):
 577     while n > 0:
 578         rl, _, _ = select.select([fd], [], [])
 579         assert(rl[0] == fd)
 580         buf = os.read(fd, n)
 581         if not buf: raise Exception("Unexpected EOF reading %d more bytes" % n)
 582         yield buf
 583         n -= len(buf)
 584
 585
 586 MAX_PACKET = 128 * 1024
 587 def mux(p, outfd, outr, errr):
 588     try:
 589         fds = [outr, errr]
 590         while p.poll() is None:
 591             rl, _, _ = select.select(fds, [], [])
 592             for fd in rl:
 593                 if fd == outr:
 594                     buf = os.read(outr, MAX_PACKET)
 595                     if not buf: break
 596                     os.write(outfd, struct.pack('!IB', len(buf), 1) + buf)
 597                 elif fd == errr:
 598                     buf = os.read(errr, 1024)
 599                     if not buf: break
 600                     os.write(outfd, struct.pack('!IB', len(buf), 2) + buf)
 601     finally:
 602         os.write(outfd, struct.pack('!IB', 0, 3))
 603
 604
 605 class DemuxConn(BaseConn):
 606     """A helper class for bup's client-server protocol."""
 607     def __init__(self, infd, outp):
 608         BaseConn.__init__(self, outp)
 609         # Anything that comes through before the sync string was not
 610         # multiplexed and can be assumed to be debug/log before mux init.
 611         tail = ''
 612         while tail != 'BUPMUX':
 613             b = os.read(infd, (len(tail) < 6) and (6-len(tail)) or 1)
 614             if not b:
 615                 raise IOError('demux: unexpected EOF during initialization')
 616             tail += b
 617             sys.stderr.write(tail[:-6])  # pre-mux log messages
 618             tail = tail[-6:]
 619         self.infd = infd
 620         self.reader = None
 621         self.buf = None
 622         self.closed = False
 623
 624     def write(self, data):
 625         self._load_buf(0)
 626         BaseConn.write(self, data)
 627
 628     def _next_packet(self, timeout):
 629         if self.closed: return False
 630         rl, wl, xl = select.select([self.infd], [], [], timeout)
 631         if not rl: return False
 632         assert(rl[0] == self.infd)
 633         ns = ''.join(checked_reader(self.infd, 5))
 634         n, fdw = struct.unpack('!IB', ns)
 635         assert(n <= MAX_PACKET)
 636         if fdw == 1:
 637             self.reader = checked_reader(self.infd, n)
 638         elif fdw == 2:
 639             for buf in checked_reader(self.infd, n):
 640                 sys.stderr.write(buf)
 641         elif fdw == 3:
 642             self.closed = True
 643             debug2("DemuxConn: marked closed\n")
 644         return True
 645
 646     def _load_buf(self, timeout):
 647         if self.buf is not None:
 648             return True
 649         while not self.closed:
 650             while not self.reader:
 651                 if not self._next_packet(timeout):
 652                     return False
 653             try:
 654                 self.buf = next(self.reader)
 655                 return True
 656             except StopIteration:
 657                 self.reader = None
 658         return False
 659
 660     def _read_parts(self, ix_fn):
 661         while self._load_buf(None):
 662             assert(self.buf is not None)
 663             i = ix_fn(self.buf)
 664             if i is None or i == len(self.buf):
 665                 yv = self.buf
 666                 self.buf = None
 667             else:
 668                 yv = self.buf[:i]
 669                 self.buf = self.buf[i:]
 670             yield yv
 671             if i is not None:
 672                 break
 673
 674     def _readline(self):
 675         def find_eol(buf):
 676             try:
 677                 return buf.index('\n')+1
 678             except ValueError:
 679                 return None
 680         return ''.join(self._read_parts(find_eol))
 681
 682     def _read(self, size):
 683         csize = [size]
 684         def until_size(buf): # Closes on csize
 685             if len(buf) < csize[0]:
 686                 csize[0] -= len(buf)
 687                 return None
 688             else:
 689                 return csize[0]
 690         return ''.join(self._read_parts(until_size))
 691
 692     def has_input(self):
 693         return self._load_buf(0)
 694
 695
 696 def linereader(f):
 697     """Generate a list of input lines from 'f' without terminating newlines."""
 698     while 1:
 699         line = f.readline()
 700         if not line:
 701             break
 702         yield line[:-1]
 703
 704
 705 def chunkyreader(f, count = None):
 706     """Generate a list of chunks of data read from 'f'.
 707
 708     If count is None, read until EOF is reached.
 709
 710     If count is a positive integer, read 'count' bytes from 'f'. If EOF is
 711     reached while reading, raise IOError.
 712     """
 713     if count != None:
 714         while count > 0:
 715             b = f.read(min(count, 65536))
 716             if not b:
 717                 raise IOError('EOF with %d bytes remaining' % count)
 718             yield b
 719             count -= len(b)
 720     else:
 721         while 1:
 722             b = f.read(65536)
 723             if not b: break
 724             yield b
 725
 726
 727 @contextmanager
 728 def atomically_replaced_file(name, mode='w', buffering=-1):
 729     """Yield a file that will be atomically renamed name when leaving the block.
 730
 731     This contextmanager yields an open file object that is backed by a
 732     temporary file which will be renamed (atomically) to the target
 733     name if everything succeeds.
 734
 735     The mode and buffering arguments are handled exactly as with open,
 736     and the yielded file will have very restrictive permissions, as
 737     per mkstemp.
 738
 739     E.g.::
 740
 741         with atomically_replaced_file('foo.txt', 'w') as f:
 742             f.write('hello jack.')
 743
 744     """
 745
 746     (ffd, tempname) = tempfile.mkstemp(dir=os.path.dirname(name),
 747                                        text=('b' not in mode))
 748     try:
 749         try:
 750             f = os.fdopen(ffd, mode, buffering)
 751         except:
 752             os.close(ffd)
 753             raise
 754         try:
 755             yield f
 756         finally:
 757             f.close()
 758         os.rename(tempname, name)
 759     finally:
 760         unlink(tempname)  # nonexistant file is ignored
 761
 762
 763 def slashappend(s):
 764     """Append "/" to 's' if it doesn't aleady end in "/"."""
 765     if s and not s.endswith('/'):
 766         return s + '/'
 767     else:
 768         return s
 769
 770
 771 def _mmap_do(f, sz, flags, prot, close):
 772     if not sz:
 773         st = os.fstat(f.fileno())
 774         sz = st.st_size
 775     if not sz:
 776         # trying to open a zero-length map gives an error, but an empty
 777         # string has all the same behaviour of a zero-length map, ie. it has
 778         # no elements :)
 779         return ''
 780     map = mmap.mmap(f.fileno(), sz, flags, prot)
 781     if close:
 782         f.close()  # map will persist beyond file close
 783     return map
 784
 785
 786 def mmap_read(f, sz = 0, close=True):
 787     """Create a read-only memory mapped region on file 'f'.
 788     If sz is 0, the region will cover the entire file.
 789     """
 790     return _mmap_do(f, sz, mmap.MAP_PRIVATE, mmap.PROT_READ, close)
 791
 792
 793 def mmap_readwrite(f, sz = 0, close=True):
 794     """Create a read-write memory mapped region on file 'f'.
 795     If sz is 0, the region will cover the entire file.
 796     """
 797     return _mmap_do(f, sz, mmap.MAP_SHARED, mmap.PROT_READ|mmap.PROT_WRITE,
 798                     close)
 799
 800
 801 def mmap_readwrite_private(f, sz = 0, close=True):
 802     """Create a read-write memory mapped region on file 'f'.
 803     If sz is 0, the region will cover the entire file.
 804     The map is private, which means the changes are never flushed back to the
 805     file.
 806     """
 807     return _mmap_do(f, sz, mmap.MAP_PRIVATE, mmap.PROT_READ|mmap.PROT_WRITE,
 808                     close)
 809
 810
 811 _mincore = getattr(_helpers, 'mincore', None)
 812 if _mincore:
 813     # ./configure ensures that we're on Linux if MINCORE_INCORE isn't defined.
 814     MINCORE_INCORE = getattr(_helpers, 'MINCORE_INCORE', 1)
 815
 816     _fmincore_chunk_size = None
 817     def _set_fmincore_chunk_size():
 818         global _fmincore_chunk_size
 819         pref_chunk_size = 64 * 1024 * 1024
 820         chunk_size = sc_page_size
 821         if (sc_page_size < pref_chunk_size):
 822             chunk_size = sc_page_size * (pref_chunk_size / sc_page_size)
 823         _fmincore_chunk_size = chunk_size
 824
 825     def fmincore(fd):
 826         """Return the mincore() data for fd as a bytearray whose values can be
 827         tested via MINCORE_INCORE, or None if fd does not fully
 828         support the operation."""
 829         st = os.fstat(fd)
 830         if (st.st_size == 0):
 831             return bytearray(0)
 832         if not _fmincore_chunk_size:
 833             _set_fmincore_chunk_size()
 834         pages_per_chunk = _fmincore_chunk_size / sc_page_size;
 835         page_count = (st.st_size + sc_page_size - 1) / sc_page_size;
 836         chunk_count = page_count / _fmincore_chunk_size
 837         if chunk_count < 1:
 838             chunk_count = 1
 839         result = bytearray(page_count)
 840         for ci in xrange(chunk_count):
 841             pos = _fmincore_chunk_size * ci;
 842             msize = min(_fmincore_chunk_size, st.st_size - pos)
 843             try:
 844                 m = mmap.mmap(fd, msize, mmap.MAP_PRIVATE, 0, 0, pos)
 845             except mmap.error as ex:
 846                 if ex.errno == errno.EINVAL or ex.errno == errno.ENODEV:
 847                     # Perhaps the file was a pipe, i.e. "... | bup split ..."
 848                     return None
 849                 raise ex
 850             _mincore(m, msize, 0, result, ci * pages_per_chunk);
 851         return result
 852
 853
 854 def parse_timestamp(epoch_str):
 855     """Return the number of nanoseconds since the epoch that are described
 856 by epoch_str (100ms, 100ns, ...); when epoch_str cannot be parsed,
 857 throw a ValueError that may contain additional information."""
 858     ns_per = {'s' :  1000000000,
 859               'ms' : 1000000,
 860               'us' : 1000,
 861               'ns' : 1}
 862     match = re.match(r'^((?:[-+]?[0-9]+)?)(s|ms|us|ns)$', epoch_str)
 863     if not match:
 864         if re.match(r'^([-+]?[0-9]+)$', epoch_str):
 865             raise ValueError('must include units, i.e. 100ns, 100ms, ...')
 866         raise ValueError()
 867     (n, units) = match.group(1, 2)
 868     if not n:
 869         n = 1
 870     n = int(n)
 871     return n * ns_per[units]
 872
 873
 874 def parse_num(s):
 875     """Parse data size information into a float number.
 876
 877     Here are some examples of conversions:
 878         199.2k means 203981 bytes
 879         1GB means 1073741824 bytes
 880         2.1 tb means 2199023255552 bytes
 881     """
 882     g = re.match(r'([-+\d.e]+)\s*(\w*)', str(s))
 883     if not g:
 884         raise ValueError("can't parse %r as a number" % s)
 885     (val, unit) = g.groups()
 886     num = float(val)
 887     unit = unit.lower()
 888     if unit in ['t', 'tb']:
 889         mult = 1024*1024*1024*1024
 890     elif unit in ['g', 'gb']:
 891         mult = 1024*1024*1024
 892     elif unit in ['m', 'mb']:
 893         mult = 1024*1024
 894     elif unit in ['k', 'kb']:
 895         mult = 1024
 896     elif unit in ['', 'b']:
 897         mult = 1
 898     else:
 899         raise ValueError("invalid unit %r in number %r" % (unit, s))
 900     return int(num*mult)
 901
 902
 903 def count(l):
 904     """Count the number of elements in an iterator. (consumes the iterator)"""
 905     return reduce(lambda x,y: x+1, l)
 906
 907
 908 saved_errors = []
 909 def add_error(e):
 910     """Append an error message to the list of saved errors.
 911
 912     Once processing is able to stop and output the errors, the saved errors are
 913     accessible in the module variable helpers.saved_errors.
 914     """
 915     saved_errors.append(e)
 916     log('%-70s\n' % e)
 917
 918
 919 def clear_errors():
 920     global saved_errors
 921     saved_errors = []
 922
 923
 924 def die_if_errors(msg=None, status=1):
 925     global saved_errors
 926     if saved_errors:
 927         if not msg:
 928             msg = 'warning: %d errors encountered\n' % len(saved_errors)
 929         log(msg)
 930         sys.exit(status)
 931
 932
 933 def handle_ctrl_c():
 934     """Replace the default exception handler for KeyboardInterrupt (Ctrl-C).
 935
 936     The new exception handler will make sure that bup will exit without an ugly
 937     stacktrace when Ctrl-C is hit.
 938     """
 939     oldhook = sys.excepthook
 940     def newhook(exctype, value, traceback):
 941         if exctype == KeyboardInterrupt:
 942             log('\nInterrupted.\n')
 943         else:
 944             return oldhook(exctype, value, traceback)
 945     sys.excepthook = newhook
 946
 947
 948 def columnate(l, prefix):
 949     """Format elements of 'l' in columns with 'prefix' leading each line.
 950
 951     The number of columns is determined automatically based on the string
 952     lengths.
 953     """
 954     if not l:
 955         return ""
 956     l = l[:]
 957     clen = max(len(s) for s in l)
 958     ncols = (tty_width() - len(prefix)) / (clen + 2)
 959     if ncols <= 1:
 960         ncols = 1
 961         clen = 0
 962     cols = []
 963     while len(l) % ncols:
 964         l.append('')
 965     rows = len(l)/ncols
 966     for s in range(0, len(l), rows):
 967         cols.append(l[s:s+rows])
 968     out = ''
 969     for row in zip(*cols):
 970         out += prefix + ''.join(('%-*s' % (clen+2, s)) for s in row) + '\n'
 971     return out
 972
 973
 974 def parse_date_or_fatal(str, fatal):
 975     """Parses the given date or calls Option.fatal().
 976     For now we expect a string that contains a float."""
 977     try:
 978         date = float(str)
 979     except ValueError as e:
 980         raise fatal('invalid date format (should be a float): %r' % e)
 981     else:
 982         return date
 983
 984
 985 def parse_excludes(options, fatal):
 986     """Traverse the options and extract all excludes, or call Option.fatal()."""
 987     excluded_paths = []
 988
 989     for flag in options:
 990         (option, parameter) = flag
 991         if option == '--exclude':
 992             excluded_paths.append(resolve_parent(parameter))
 993         elif option == '--exclude-from':
 994             try:
 995                 f = open(resolve_parent(parameter))
 996             except IOError as e:
 997                 raise fatal("couldn't read %s" % parameter)
 998             for exclude_path in f.readlines():
 999                 # FIXME: perhaps this should be rstrip('\n')
1000                 exclude_path = resolve_parent(exclude_path.strip())
1001                 if exclude_path:
1002                     excluded_paths.append(exclude_path)
1003     return sorted(frozenset(excluded_paths))
1004
1005
1006 def parse_rx_excludes(options, fatal):
1007     """Traverse the options and extract all rx excludes, or call
1008     Option.fatal()."""
1009     excluded_patterns = []
1010
1011     for flag in options:
1012         (option, parameter) = flag
1013         if option == '--exclude-rx':
1014             try:
1015                 excluded_patterns.append(re.compile(parameter))
1016             except re.error as ex:
1017                 fatal('invalid --exclude-rx pattern (%s): %s' % (parameter, ex))
1018         elif option == '--exclude-rx-from':
1019             try:
1020                 f = open(resolve_parent(parameter))
1021             except IOError as e:
1022                 raise fatal("couldn't read %s" % parameter)
1023             for pattern in f.readlines():
1024                 spattern = pattern.rstrip('\n')
1025                 if not spattern:
1026                     continue
1027                 try:
1028                     excluded_patterns.append(re.compile(spattern))
1029                 except re.error as ex:
1030                     fatal('invalid --exclude-rx pattern (%s): %s' % (spattern, ex))
1031     return excluded_patterns
1032
1033
1034 def should_rx_exclude_path(path, exclude_rxs):
1035     """Return True if path matches a regular expression in exclude_rxs."""
1036     for rx in exclude_rxs:
1037         if rx.search(path):
1038             debug1('Skipping %r: excluded by rx pattern %r.\n'
1039                    % (path, rx.pattern))
1040             return True
1041     return False
1042
1043
1044 # FIXME: Carefully consider the use of functions (os.path.*, etc.)
1045 # that resolve against the current filesystem in the strip/graft
1046 # functions for example, but elsewhere as well.  I suspect bup's not
1047 # always being careful about that.  For some cases, the contents of
1048 # the current filesystem should be irrelevant, and consulting it might
1049 # produce the wrong result, perhaps via unintended symlink resolution,
1050 # for example.
1051
1052 def path_components(path):
1053     """Break path into a list of pairs of the form (name,
1054     full_path_to_name).  Path must start with '/'.
1055     Example:
1056       '/home/foo' -> [('', '/'), ('home', '/home'), ('foo', '/home/foo')]"""
1057     if not path.startswith('/'):
1058         raise Exception, 'path must start with "/": %s' % path
1059     # Since we assume path startswith('/'), we can skip the first element.
1060     result = [('', '/')]
1061     norm_path = os.path.abspath(path)
1062     if norm_path == '/':
1063         return result
1064     full_path = ''
1065     for p in norm_path.split('/')[1:]:
1066         full_path += '/' + p
1067         result.append((p, full_path))
1068     return result
1069
1070
1071 def stripped_path_components(path, strip_prefixes):
1072     """Strip any prefix in strip_prefixes from path and return a list
1073     of path components where each component is (name,
1074     none_or_full_fs_path_to_name).  Assume path startswith('/').
1075     See thelpers.py for examples."""
1076     normalized_path = os.path.abspath(path)
1077     sorted_strip_prefixes = sorted(strip_prefixes, key=len, reverse=True)
1078     for bp in sorted_strip_prefixes:
1079         normalized_bp = os.path.abspath(bp)
1080         if normalized_bp == '/':
1081             continue
1082         if normalized_path.startswith(normalized_bp):
1083             prefix = normalized_path[:len(normalized_bp)]
1084             result = []
1085             for p in normalized_path[len(normalized_bp):].split('/'):
1086                 if p: # not root
1087                     prefix += '/'
1088                 prefix += p
1089                 result.append((p, prefix))
1090             return result
1091     # Nothing to strip.
1092     return path_components(path)
1093
1094
1095 def grafted_path_components(graft_points, path):
1096     # Create a result that consists of some number of faked graft
1097     # directories before the graft point, followed by all of the real
1098     # directories from path that are after the graft point.  Arrange
1099     # for the directory at the graft point in the result to correspond
1100     # to the "orig" directory in --graft orig=new.  See t/thelpers.py
1101     # for some examples.
1102
1103     # Note that given --graft orig=new, orig and new have *nothing* to
1104     # do with each other, even if some of their component names
1105     # match. i.e. --graft /foo/bar/baz=/foo/bar/bax is semantically
1106     # equivalent to --graft /foo/bar/baz=/x/y/z, or even
1107     # /foo/bar/baz=/x.
1108
1109     # FIXME: This can't be the best solution...
1110     clean_path = os.path.abspath(path)
1111     for graft_point in graft_points:
1112         old_prefix, new_prefix = graft_point
1113         # Expand prefixes iff not absolute paths.
1114         old_prefix = os.path.normpath(old_prefix)
1115         new_prefix = os.path.normpath(new_prefix)
1116         if clean_path.startswith(old_prefix):
1117             escaped_prefix = re.escape(old_prefix)
1118             grafted_path = re.sub(r'^' + escaped_prefix, new_prefix, clean_path)
1119             # Handle /foo=/ (at least) -- which produces //whatever.
1120             grafted_path = '/' + grafted_path.lstrip('/')
1121             clean_path_components = path_components(clean_path)
1122             # Count the components that were stripped.
1123             strip_count = 0 if old_prefix == '/' else old_prefix.count('/')
1124             new_prefix_parts = new_prefix.split('/')
1125             result_prefix = grafted_path.split('/')[:new_prefix.count('/')]
1126             result = [(p, None) for p in result_prefix] \
1127                 + clean_path_components[strip_count:]
1128             # Now set the graft point name to match the end of new_prefix.
1129             graft_point = len(result_prefix)
1130             result[graft_point] = \
1131                 (new_prefix_parts[-1], clean_path_components[strip_count][1])
1132             if new_prefix == '/': # --graft ...=/ is a special case.
1133                 return result[1:]
1134             return result
1135     return path_components(clean_path)
1136
1137
1138 Sha1 = hashlib.sha1
1139
1140
1141 _localtime = getattr(_helpers, 'localtime', None)
1142
1143 if _localtime:
1144     bup_time = namedtuple('bup_time', ['tm_year', 'tm_mon', 'tm_mday',
1145                                        'tm_hour', 'tm_min', 'tm_sec',
1146                                        'tm_wday', 'tm_yday',
1147                                        'tm_isdst', 'tm_gmtoff', 'tm_zone'])
1148
1149 # Define a localtime() that returns bup_time when possible.  Note:
1150 # this means that any helpers.localtime() results may need to be
1151 # passed through to_py_time() before being passed to python's time
1152 # module, which doesn't appear willing to ignore the extra items.
1153 if _localtime:
1154     def localtime(time):
1155         return bup_time(*_helpers.localtime(time))
1156     def utc_offset_str(t):
1157         """Return the local offset from UTC as "+hhmm" or "-hhmm" for time t.
1158         If the current UTC offset does not represent an integer number
1159         of minutes, the fractional component will be truncated."""
1160         off = localtime(t).tm_gmtoff
1161         # Note: // doesn't truncate like C for negative values, it rounds down.
1162         offmin = abs(off) // 60
1163         m = offmin % 60
1164         h = (offmin - m) // 60
1165         return "%+03d%02d" % (-h if off < 0 else h, m)
1166     def to_py_time(x):
1167         if isinstance(x, time.struct_time):
1168             return x
1169         return time.struct_time(x[:9])
1170 else:
1171     localtime = time.localtime
1172     def utc_offset_str(t):
1173         return time.strftime('%z', localtime(t))
1174     def to_py_time(x):
1175         return x
1176
1177
1178 _some_invalid_save_parts_rx = re.compile(r'[[ ~^:?*\\]|\.\.|//|@{')
1179
1180 def valid_save_name(name):
1181     # Enforce a superset of the restrictions in git-check-ref-format(1)
1182     if name == '@' \
1183        or name.startswith('/') or name.endswith('/') \
1184        or name.endswith('.'):
1185         return False
1186     if _some_invalid_save_parts_rx.search(name):
1187         return False
1188     for c in name:
1189         if ord(c) < 0x20 or ord(c) == 0x7f:
1190             return False
1191     for part in name.split('/'):
1192         if part.startswith('.') or part.endswith('.lock'):
1193             return False
1194     return True
1195
1196
1197 _period_rx = re.compile(r'^([0-9]+)(s|min|h|d|w|m|y)$')
1198
1199 def period_as_secs(s):
1200     if s == 'forever':
1201         return float('inf')
1202     match = _period_rx.match(s)
1203     if not match:
1204         return None
1205     mag = int(match.group(1))
1206     scale = match.group(2)
1207     return mag * {'s': 1,
1208                   'min': 60,
1209                   'h': 60 * 60,
1210                   'd': 60 * 60 * 24,
1211                   'w': 60 * 60 * 24 * 7,
1212                   'm': 60 * 60 * 24 * 31,
1213                   'y': 60 * 60 * 24 * 366}[scale]