lib/bup/helpers.py

   1 """Helper functions and classes for bup."""
   2
   3 from collections import namedtuple
   4 from ctypes import sizeof, c_void_p
   5 from os import environ
   6 from contextlib import contextmanager
   7 import sys, os, pwd, subprocess, errno, socket, select, mmap, stat, re, struct
   8 import hashlib, heapq, math, operator, time, grp, tempfile
   9
  10 from bup import _helpers
  11
  12
  13 class Nonlocal:
  14     """Helper to deal with Python scoping issues"""
  15     pass
  16
  17
  18 sc_page_size = os.sysconf('SC_PAGE_SIZE')
  19 assert(sc_page_size > 0)
  20
  21 sc_arg_max = os.sysconf('SC_ARG_MAX')
  22 if sc_arg_max == -1:  # "no definite limit" - let's choose 2M
  23     sc_arg_max = 2 * 1024 * 1024
  24
  25 # This function should really be in helpers, not in bup.options.  But we
  26 # want options.py to be standalone so people can include it in other projects.
  27 from bup.options import _tty_width
  28 tty_width = _tty_width
  29
  30
  31 def atoi(s):
  32     """Convert the string 's' to an integer. Return 0 if s is not a number."""
  33     try:
  34         return int(s or '0')
  35     except ValueError:
  36         return 0
  37
  38
  39 def atof(s):
  40     """Convert the string 's' to a float. Return 0 if s is not a number."""
  41     try:
  42         return float(s or '0')
  43     except ValueError:
  44         return 0
  45
  46
  47 buglvl = atoi(os.environ.get('BUP_DEBUG', 0))
  48
  49
  50 try:
  51     _fdatasync = os.fdatasync
  52 except AttributeError:
  53     _fdatasync = os.fsync
  54
  55 if sys.platform.startswith('darwin'):
  56     # Apparently os.fsync on OS X doesn't guarantee to sync all the way down
  57     import fcntl
  58     def fdatasync(fd):
  59         try:
  60             return fcntl.fcntl(fd, fcntl.F_FULLFSYNC)
  61         except IOError as e:
  62             # Fallback for file systems (SMB) that do not support F_FULLFSYNC
  63             if e.errno == errno.ENOTSUP:
  64                 return _fdatasync(fd)
  65             else:
  66                 raise
  67 else:
  68     fdatasync = _fdatasync
  69
  70
  71 def partition(predicate, stream):
  72     """Returns (leading_matches_it, rest_it), where leading_matches_it
  73     must be completely exhausted before traversing rest_it.
  74
  75     """
  76     stream = iter(stream)
  77     ns = Nonlocal()
  78     ns.first_nonmatch = None
  79     def leading_matches():
  80         for x in stream:
  81             if predicate(x):
  82                 yield x
  83             else:
  84                 ns.first_nonmatch = (x,)
  85                 break
  86     def rest():
  87         if ns.first_nonmatch:
  88             yield ns.first_nonmatch[0]
  89             for x in stream:
  90                 yield x
  91     return (leading_matches(), rest())
  92
  93
  94 def stat_if_exists(path):
  95     try:
  96         return os.stat(path)
  97     except OSError as e:
  98         if e.errno != errno.ENOENT:
  99             raise
 100     return None
 101
 102
 103 # Write (blockingly) to sockets that may or may not be in blocking mode.
 104 # We need this because our stderr is sometimes eaten by subprocesses
 105 # (probably ssh) that sometimes make it nonblocking, if only temporarily,
 106 # leading to race conditions.  Ick.  We'll do it the hard way.
 107 def _hard_write(fd, buf):
 108     while buf:
 109         (r,w,x) = select.select([], [fd], [], None)
 110         if not w:
 111             raise IOError('select(fd) returned without being writable')
 112         try:
 113             sz = os.write(fd, buf)
 114         except OSError as e:
 115             if e.errno != errno.EAGAIN:
 116                 raise
 117         assert(sz >= 0)
 118         buf = buf[sz:]
 119
 120
 121 _last_prog = 0
 122 def log(s):
 123     """Print a log message to stderr."""
 124     global _last_prog
 125     sys.stdout.flush()
 126     _hard_write(sys.stderr.fileno(), s)
 127     _last_prog = 0
 128
 129
 130 def debug1(s):
 131     if buglvl >= 1:
 132         log(s)
 133
 134
 135 def debug2(s):
 136     if buglvl >= 2:
 137         log(s)
 138
 139
 140 istty1 = os.isatty(1) or (atoi(os.environ.get('BUP_FORCE_TTY')) & 1)
 141 istty2 = os.isatty(2) or (atoi(os.environ.get('BUP_FORCE_TTY')) & 2)
 142 _last_progress = ''
 143 def progress(s):
 144     """Calls log() if stderr is a TTY.  Does nothing otherwise."""
 145     global _last_progress
 146     if istty2:
 147         log(s)
 148         _last_progress = s
 149
 150
 151 def qprogress(s):
 152     """Calls progress() only if we haven't printed progress in a while.
 153
 154     This avoids overloading the stderr buffer with excess junk.
 155     """
 156     global _last_prog
 157     now = time.time()
 158     if now - _last_prog > 0.1:
 159         progress(s)
 160         _last_prog = now
 161
 162
 163 def reprogress():
 164     """Calls progress() to redisplay the most recent progress message.
 165
 166     Useful after you've printed some other message that wipes out the
 167     progress line.
 168     """
 169     if _last_progress and _last_progress.endswith('\r'):
 170         progress(_last_progress)
 171
 172
 173 def mkdirp(d, mode=None):
 174     """Recursively create directories on path 'd'.
 175
 176     Unlike os.makedirs(), it doesn't raise an exception if the last element of
 177     the path already exists.
 178     """
 179     try:
 180         if mode:
 181             os.makedirs(d, mode)
 182         else:
 183             os.makedirs(d)
 184     except OSError as e:
 185         if e.errno == errno.EEXIST:
 186             pass
 187         else:
 188             raise
 189
 190
 191 _unspecified_next_default = object()
 192
 193 def _fallback_next(it, default=_unspecified_next_default):
 194     """Retrieve the next item from the iterator by calling its
 195     next() method. If default is given, it is returned if the
 196     iterator is exhausted, otherwise StopIteration is raised."""
 197
 198     if default is _unspecified_next_default:
 199         return it.next()
 200     else:
 201         try:
 202             return it.next()
 203         except StopIteration:
 204             return default
 205
 206 if sys.version_info < (2, 6):
 207     next =  _fallback_next
 208
 209
 210 def merge_iter(iters, pfreq, pfunc, pfinal, key=None):
 211     if key:
 212         samekey = lambda e, pe: getattr(e, key) == getattr(pe, key, None)
 213     else:
 214         samekey = operator.eq
 215     count = 0
 216     total = sum(len(it) for it in iters)
 217     iters = (iter(it) for it in iters)
 218     heap = ((next(it, None),it) for it in iters)
 219     heap = [(e,it) for e,it in heap if e]
 220
 221     heapq.heapify(heap)
 222     pe = None
 223     while heap:
 224         if not count % pfreq:
 225             pfunc(count, total)
 226         e, it = heap[0]
 227         if not samekey(e, pe):
 228             pe = e
 229             yield e
 230         count += 1
 231         try:
 232             e = it.next() # Don't use next() function, it's too expensive
 233         except StopIteration:
 234             heapq.heappop(heap) # remove current
 235         else:
 236             heapq.heapreplace(heap, (e, it)) # shift current to new location
 237     pfinal(count, total)
 238
 239
 240 def unlink(f):
 241     """Delete a file at path 'f' if it currently exists.
 242
 243     Unlike os.unlink(), does not throw an exception if the file didn't already
 244     exist.
 245     """
 246     try:
 247         os.unlink(f)
 248     except OSError as e:
 249         if e.errno != errno.ENOENT:
 250             raise
 251
 252
 253 def readpipe(argv, preexec_fn=None, shell=False):
 254     """Run a subprocess and return its output."""
 255     p = subprocess.Popen(argv, stdout=subprocess.PIPE, preexec_fn=preexec_fn,
 256                          shell=shell)
 257     out, err = p.communicate()
 258     if p.returncode != 0:
 259         raise Exception('subprocess %r failed with status %d'
 260                         % (' '.join(argv), p.returncode))
 261     return out
 262
 263
 264 def _argmax_base(command):
 265     base_size = 2048
 266     for c in command:
 267         base_size += len(command) + 1
 268     for k, v in environ.iteritems():
 269         base_size += len(k) + len(v) + 2 + sizeof(c_void_p)
 270     return base_size
 271
 272
 273 def _argmax_args_size(args):
 274     return sum(len(x) + 1 + sizeof(c_void_p) for x in args)
 275
 276
 277 def batchpipe(command, args, preexec_fn=None, arg_max=sc_arg_max):
 278     """If args is not empty, yield the output produced by calling the
 279 command list with args as a sequence of strings (It may be necessary
 280 to return multiple strings in order to respect ARG_MAX)."""
 281     # The optional arg_max arg is a workaround for an issue with the
 282     # current wvtest behavior.
 283     base_size = _argmax_base(command)
 284     while args:
 285         room = arg_max - base_size
 286         i = 0
 287         while i < len(args):
 288             next_size = _argmax_args_size(args[i:i+1])
 289             if room - next_size < 0:
 290                 break
 291             room -= next_size
 292             i += 1
 293         sub_args = args[:i]
 294         args = args[i:]
 295         assert(len(sub_args))
 296         yield readpipe(command + sub_args, preexec_fn=preexec_fn)
 297
 298
 299 def resolve_parent(p):
 300     """Return the absolute path of a file without following any final symlink.
 301
 302     Behaves like os.path.realpath, but doesn't follow a symlink for the last
 303     element. (ie. if 'p' itself is a symlink, this one won't follow it, but it
 304     will follow symlinks in p's directory)
 305     """
 306     try:
 307         st = os.lstat(p)
 308     except OSError:
 309         st = None
 310     if st and stat.S_ISLNK(st.st_mode):
 311         (dir, name) = os.path.split(p)
 312         dir = os.path.realpath(dir)
 313         out = os.path.join(dir, name)
 314     else:
 315         out = os.path.realpath(p)
 316     #log('realpathing:%r,%r\n' % (p, out))
 317     return out
 318
 319
 320 def detect_fakeroot():
 321     "Return True if we appear to be running under fakeroot."
 322     return os.getenv("FAKEROOTKEY") != None
 323
 324
 325 _warned_about_superuser_detection = None
 326 def is_superuser():
 327     if sys.platform.startswith('cygwin'):
 328         if sys.getwindowsversion()[0] > 5:
 329             # Sounds like situation is much more complicated here
 330             global _warned_about_superuser_detection
 331             if not _warned_about_superuser_detection:
 332                 log("can't detect root status for OS version > 5; assuming not root")
 333                 _warned_about_superuser_detection = True
 334             return False
 335         import ctypes
 336         return ctypes.cdll.shell32.IsUserAnAdmin()
 337     else:
 338         return os.geteuid() == 0
 339
 340
 341 def _cache_key_value(get_value, key, cache):
 342     """Return (value, was_cached).  If there is a value in the cache
 343     for key, use that, otherwise, call get_value(key) which should
 344     throw a KeyError if there is no value -- in which case the cached
 345     and returned value will be None.
 346     """
 347     try: # Do we already have it (or know there wasn't one)?
 348         value = cache[key]
 349         return value, True
 350     except KeyError:
 351         pass
 352     value = None
 353     try:
 354         cache[key] = value = get_value(key)
 355     except KeyError:
 356         cache[key] = None
 357     return value, False
 358
 359
 360 _uid_to_pwd_cache = {}
 361 _name_to_pwd_cache = {}
 362
 363 def pwd_from_uid(uid):
 364     """Return password database entry for uid (may be a cached value).
 365     Return None if no entry is found.
 366     """
 367     global _uid_to_pwd_cache, _name_to_pwd_cache
 368     entry, cached = _cache_key_value(pwd.getpwuid, uid, _uid_to_pwd_cache)
 369     if entry and not cached:
 370         _name_to_pwd_cache[entry.pw_name] = entry
 371     return entry
 372
 373
 374 def pwd_from_name(name):
 375     """Return password database entry for name (may be a cached value).
 376     Return None if no entry is found.
 377     """
 378     global _uid_to_pwd_cache, _name_to_pwd_cache
 379     entry, cached = _cache_key_value(pwd.getpwnam, name, _name_to_pwd_cache)
 380     if entry and not cached:
 381         _uid_to_pwd_cache[entry.pw_uid] = entry
 382     return entry
 383
 384
 385 _gid_to_grp_cache = {}
 386 _name_to_grp_cache = {}
 387
 388 def grp_from_gid(gid):
 389     """Return password database entry for gid (may be a cached value).
 390     Return None if no entry is found.
 391     """
 392     global _gid_to_grp_cache, _name_to_grp_cache
 393     entry, cached = _cache_key_value(grp.getgrgid, gid, _gid_to_grp_cache)
 394     if entry and not cached:
 395         _name_to_grp_cache[entry.gr_name] = entry
 396     return entry
 397
 398
 399 def grp_from_name(name):
 400     """Return password database entry for name (may be a cached value).
 401     Return None if no entry is found.
 402     """
 403     global _gid_to_grp_cache, _name_to_grp_cache
 404     entry, cached = _cache_key_value(grp.getgrnam, name, _name_to_grp_cache)
 405     if entry and not cached:
 406         _gid_to_grp_cache[entry.gr_gid] = entry
 407     return entry
 408
 409
 410 _username = None
 411 def username():
 412     """Get the user's login name."""
 413     global _username
 414     if not _username:
 415         uid = os.getuid()
 416         _username = pwd_from_uid(uid)[0] or 'user%d' % uid
 417     return _username
 418
 419
 420 _userfullname = None
 421 def userfullname():
 422     """Get the user's full name."""
 423     global _userfullname
 424     if not _userfullname:
 425         uid = os.getuid()
 426         entry = pwd_from_uid(uid)
 427         if entry:
 428             _userfullname = entry[4].split(',')[0] or entry[0]
 429         if not _userfullname:
 430             _userfullname = 'user%d' % uid
 431     return _userfullname
 432
 433
 434 _hostname = None
 435 def hostname():
 436     """Get the FQDN of this machine."""
 437     global _hostname
 438     if not _hostname:
 439         _hostname = socket.getfqdn()
 440     return _hostname
 441
 442
 443 _resource_path = None
 444 def resource_path(subdir=''):
 445     global _resource_path
 446     if not _resource_path:
 447         _resource_path = os.environ.get('BUP_RESOURCE_PATH') or '.'
 448     return os.path.join(_resource_path, subdir)
 449
 450 def format_filesize(size):
 451     unit = 1024.0
 452     size = float(size)
 453     if size < unit:
 454         return "%d" % (size)
 455     exponent = int(math.log(size) / math.log(unit))
 456     size_prefix = "KMGTPE"[exponent - 1]
 457     return "%.1f%s" % (size / math.pow(unit, exponent), size_prefix)
 458
 459
 460 class NotOk(Exception):
 461     pass
 462
 463
 464 class BaseConn:
 465     def __init__(self, outp):
 466         self.outp = outp
 467
 468     def close(self):
 469         while self._read(65536): pass
 470
 471     def read(self, size):
 472         """Read 'size' bytes from input stream."""
 473         self.outp.flush()
 474         return self._read(size)
 475
 476     def readline(self):
 477         """Read from input stream until a newline is found."""
 478         self.outp.flush()
 479         return self._readline()
 480
 481     def write(self, data):
 482         """Write 'data' to output stream."""
 483         #log('%d writing: %d bytes\n' % (os.getpid(), len(data)))
 484         self.outp.write(data)
 485
 486     def has_input(self):
 487         """Return true if input stream is readable."""
 488         raise NotImplemented("Subclasses must implement has_input")
 489
 490     def ok(self):
 491         """Indicate end of output from last sent command."""
 492         self.write('\nok\n')
 493
 494     def error(self, s):
 495         """Indicate server error to the client."""
 496         s = re.sub(r'\s+', ' ', str(s))
 497         self.write('\nerror %s\n' % s)
 498
 499     def _check_ok(self, onempty):
 500         self.outp.flush()
 501         rl = ''
 502         for rl in linereader(self):
 503             #log('%d got line: %r\n' % (os.getpid(), rl))
 504             if not rl:  # empty line
 505                 continue
 506             elif rl == 'ok':
 507                 return None
 508             elif rl.startswith('error '):
 509                 #log('client: error: %s\n' % rl[6:])
 510                 return NotOk(rl[6:])
 511             else:
 512                 onempty(rl)
 513         raise Exception('server exited unexpectedly; see errors above')
 514
 515     def drain_and_check_ok(self):
 516         """Remove all data for the current command from input stream."""
 517         def onempty(rl):
 518             pass
 519         return self._check_ok(onempty)
 520
 521     def check_ok(self):
 522         """Verify that server action completed successfully."""
 523         def onempty(rl):
 524             raise Exception('expected "ok", got %r' % rl)
 525         return self._check_ok(onempty)
 526
 527
 528 class Conn(BaseConn):
 529     def __init__(self, inp, outp):
 530         BaseConn.__init__(self, outp)
 531         self.inp = inp
 532
 533     def _read(self, size):
 534         return self.inp.read(size)
 535
 536     def _readline(self):
 537         return self.inp.readline()
 538
 539     def has_input(self):
 540         [rl, wl, xl] = select.select([self.inp.fileno()], [], [], 0)
 541         if rl:
 542             assert(rl[0] == self.inp.fileno())
 543             return True
 544         else:
 545             return None
 546
 547
 548 def checked_reader(fd, n):
 549     while n > 0:
 550         rl, _, _ = select.select([fd], [], [])
 551         assert(rl[0] == fd)
 552         buf = os.read(fd, n)
 553         if not buf: raise Exception("Unexpected EOF reading %d more bytes" % n)
 554         yield buf
 555         n -= len(buf)
 556
 557
 558 MAX_PACKET = 128 * 1024
 559 def mux(p, outfd, outr, errr):
 560     try:
 561         fds = [outr, errr]
 562         while p.poll() is None:
 563             rl, _, _ = select.select(fds, [], [])
 564             for fd in rl:
 565                 if fd == outr:
 566                     buf = os.read(outr, MAX_PACKET)
 567                     if not buf: break
 568                     os.write(outfd, struct.pack('!IB', len(buf), 1) + buf)
 569                 elif fd == errr:
 570                     buf = os.read(errr, 1024)
 571                     if not buf: break
 572                     os.write(outfd, struct.pack('!IB', len(buf), 2) + buf)
 573     finally:
 574         os.write(outfd, struct.pack('!IB', 0, 3))
 575
 576
 577 class DemuxConn(BaseConn):
 578     """A helper class for bup's client-server protocol."""
 579     def __init__(self, infd, outp):
 580         BaseConn.__init__(self, outp)
 581         # Anything that comes through before the sync string was not
 582         # multiplexed and can be assumed to be debug/log before mux init.
 583         tail = ''
 584         while tail != 'BUPMUX':
 585             b = os.read(infd, (len(tail) < 6) and (6-len(tail)) or 1)
 586             if not b:
 587                 raise IOError('demux: unexpected EOF during initialization')
 588             tail += b
 589             sys.stderr.write(tail[:-6])  # pre-mux log messages
 590             tail = tail[-6:]
 591         self.infd = infd
 592         self.reader = None
 593         self.buf = None
 594         self.closed = False
 595
 596     def write(self, data):
 597         self._load_buf(0)
 598         BaseConn.write(self, data)
 599
 600     def _next_packet(self, timeout):
 601         if self.closed: return False
 602         rl, wl, xl = select.select([self.infd], [], [], timeout)
 603         if not rl: return False
 604         assert(rl[0] == self.infd)
 605         ns = ''.join(checked_reader(self.infd, 5))
 606         n, fdw = struct.unpack('!IB', ns)
 607         assert(n <= MAX_PACKET)
 608         if fdw == 1:
 609             self.reader = checked_reader(self.infd, n)
 610         elif fdw == 2:
 611             for buf in checked_reader(self.infd, n):
 612                 sys.stderr.write(buf)
 613         elif fdw == 3:
 614             self.closed = True
 615             debug2("DemuxConn: marked closed\n")
 616         return True
 617
 618     def _load_buf(self, timeout):
 619         if self.buf is not None:
 620             return True
 621         while not self.closed:
 622             while not self.reader:
 623                 if not self._next_packet(timeout):
 624                     return False
 625             try:
 626                 self.buf = self.reader.next()
 627                 return True
 628             except StopIteration:
 629                 self.reader = None
 630         return False
 631
 632     def _read_parts(self, ix_fn):
 633         while self._load_buf(None):
 634             assert(self.buf is not None)
 635             i = ix_fn(self.buf)
 636             if i is None or i == len(self.buf):
 637                 yv = self.buf
 638                 self.buf = None
 639             else:
 640                 yv = self.buf[:i]
 641                 self.buf = self.buf[i:]
 642             yield yv
 643             if i is not None:
 644                 break
 645
 646     def _readline(self):
 647         def find_eol(buf):
 648             try:
 649                 return buf.index('\n')+1
 650             except ValueError:
 651                 return None
 652         return ''.join(self._read_parts(find_eol))
 653
 654     def _read(self, size):
 655         csize = [size]
 656         def until_size(buf): # Closes on csize
 657             if len(buf) < csize[0]:
 658                 csize[0] -= len(buf)
 659                 return None
 660             else:
 661                 return csize[0]
 662         return ''.join(self._read_parts(until_size))
 663
 664     def has_input(self):
 665         return self._load_buf(0)
 666
 667
 668 def linereader(f):
 669     """Generate a list of input lines from 'f' without terminating newlines."""
 670     while 1:
 671         line = f.readline()
 672         if not line:
 673             break
 674         yield line[:-1]
 675
 676
 677 def chunkyreader(f, count = None):
 678     """Generate a list of chunks of data read from 'f'.
 679
 680     If count is None, read until EOF is reached.
 681
 682     If count is a positive integer, read 'count' bytes from 'f'. If EOF is
 683     reached while reading, raise IOError.
 684     """
 685     if count != None:
 686         while count > 0:
 687             b = f.read(min(count, 65536))
 688             if not b:
 689                 raise IOError('EOF with %d bytes remaining' % count)
 690             yield b
 691             count -= len(b)
 692     else:
 693         while 1:
 694             b = f.read(65536)
 695             if not b: break
 696             yield b
 697
 698
 699 @contextmanager
 700 def atomically_replaced_file(name, mode='w', buffering=-1):
 701     """Yield a file that will be atomically renamed name when leaving the block.
 702
 703     This contextmanager yields an open file object that is backed by a
 704     temporary file which will be renamed (atomically) to the target
 705     name if everything succeeds.
 706
 707     The mode and buffering arguments are handled exactly as with open,
 708     and the yielded file will have very restrictive permissions, as
 709     per mkstemp.
 710
 711     E.g.::
 712
 713         with atomically_replaced_file('foo.txt', 'w') as f:
 714             f.write('hello jack.')
 715
 716     """
 717
 718     (ffd, tempname) = tempfile.mkstemp(dir=os.path.dirname(name),
 719                                        text=('b' not in mode))
 720     try:
 721         try:
 722             f = os.fdopen(ffd, mode, buffering)
 723         except:
 724             os.close(ffd)
 725             raise
 726         try:
 727             yield f
 728         finally:
 729             f.close()
 730         os.rename(tempname, name)
 731     finally:
 732         unlink(tempname)  # nonexistant file is ignored
 733
 734
 735 def slashappend(s):
 736     """Append "/" to 's' if it doesn't aleady end in "/"."""
 737     if s and not s.endswith('/'):
 738         return s + '/'
 739     else:
 740         return s
 741
 742
 743 def _mmap_do(f, sz, flags, prot, close):
 744     if not sz:
 745         st = os.fstat(f.fileno())
 746         sz = st.st_size
 747     if not sz:
 748         # trying to open a zero-length map gives an error, but an empty
 749         # string has all the same behaviour of a zero-length map, ie. it has
 750         # no elements :)
 751         return ''
 752     map = mmap.mmap(f.fileno(), sz, flags, prot)
 753     if close:
 754         f.close()  # map will persist beyond file close
 755     return map
 756
 757
 758 def mmap_read(f, sz = 0, close=True):
 759     """Create a read-only memory mapped region on file 'f'.
 760     If sz is 0, the region will cover the entire file.
 761     """
 762     return _mmap_do(f, sz, mmap.MAP_PRIVATE, mmap.PROT_READ, close)
 763
 764
 765 def mmap_readwrite(f, sz = 0, close=True):
 766     """Create a read-write memory mapped region on file 'f'.
 767     If sz is 0, the region will cover the entire file.
 768     """
 769     return _mmap_do(f, sz, mmap.MAP_SHARED, mmap.PROT_READ|mmap.PROT_WRITE,
 770                     close)
 771
 772
 773 def mmap_readwrite_private(f, sz = 0, close=True):
 774     """Create a read-write memory mapped region on file 'f'.
 775     If sz is 0, the region will cover the entire file.
 776     The map is private, which means the changes are never flushed back to the
 777     file.
 778     """
 779     return _mmap_do(f, sz, mmap.MAP_PRIVATE, mmap.PROT_READ|mmap.PROT_WRITE,
 780                     close)
 781
 782
 783 _mincore = getattr(_helpers, 'mincore', None)
 784 if _mincore:
 785     # ./configure ensures that we're on Linux if MINCORE_INCORE isn't defined.
 786     MINCORE_INCORE = getattr(_helpers, 'MINCORE_INCORE', 1)
 787
 788     _fmincore_chunk_size = None
 789     def _set_fmincore_chunk_size():
 790         global _fmincore_chunk_size
 791         pref_chunk_size = 64 * 1024 * 1024
 792         chunk_size = sc_page_size
 793         if (sc_page_size < pref_chunk_size):
 794             chunk_size = sc_page_size * (pref_chunk_size / sc_page_size)
 795         _fmincore_chunk_size = chunk_size
 796
 797     def fmincore(fd):
 798         """Return the mincore() data for fd as a bytearray whose values can be
 799         tested via MINCORE_INCORE, or None if fd does not fully
 800         support the operation."""
 801         st = os.fstat(fd)
 802         if (st.st_size == 0):
 803             return bytearray(0)
 804         if not _fmincore_chunk_size:
 805             _set_fmincore_chunk_size()
 806         pages_per_chunk = _fmincore_chunk_size / sc_page_size;
 807         page_count = (st.st_size + sc_page_size - 1) / sc_page_size;
 808         chunk_count = page_count / _fmincore_chunk_size
 809         if chunk_count < 1:
 810             chunk_count = 1
 811         result = bytearray(page_count)
 812         for ci in xrange(chunk_count):
 813             pos = _fmincore_chunk_size * ci;
 814             msize = min(_fmincore_chunk_size, st.st_size - pos)
 815             try:
 816                 m = mmap.mmap(fd, msize, mmap.MAP_PRIVATE, 0, 0, pos)
 817             except mmap.error as ex:
 818                 if ex.errno == errno.EINVAL or ex.errno == errno.ENODEV:
 819                     # Perhaps the file was a pipe, i.e. "... | bup split ..."
 820                     return None
 821                 raise ex
 822             _mincore(m, msize, 0, result, ci * pages_per_chunk);
 823         return result
 824
 825
 826 def parse_timestamp(epoch_str):
 827     """Return the number of nanoseconds since the epoch that are described
 828 by epoch_str (100ms, 100ns, ...); when epoch_str cannot be parsed,
 829 throw a ValueError that may contain additional information."""
 830     ns_per = {'s' :  1000000000,
 831               'ms' : 1000000,
 832               'us' : 1000,
 833               'ns' : 1}
 834     match = re.match(r'^((?:[-+]?[0-9]+)?)(s|ms|us|ns)$', epoch_str)
 835     if not match:
 836         if re.match(r'^([-+]?[0-9]+)$', epoch_str):
 837             raise ValueError('must include units, i.e. 100ns, 100ms, ...')
 838         raise ValueError()
 839     (n, units) = match.group(1, 2)
 840     if not n:
 841         n = 1
 842     n = int(n)
 843     return n * ns_per[units]
 844
 845
 846 def parse_num(s):
 847     """Parse data size information into a float number.
 848
 849     Here are some examples of conversions:
 850         199.2k means 203981 bytes
 851         1GB means 1073741824 bytes
 852         2.1 tb means 2199023255552 bytes
 853     """
 854     g = re.match(r'([-+\d.e]+)\s*(\w*)', str(s))
 855     if not g:
 856         raise ValueError("can't parse %r as a number" % s)
 857     (val, unit) = g.groups()
 858     num = float(val)
 859     unit = unit.lower()
 860     if unit in ['t', 'tb']:
 861         mult = 1024*1024*1024*1024
 862     elif unit in ['g', 'gb']:
 863         mult = 1024*1024*1024
 864     elif unit in ['m', 'mb']:
 865         mult = 1024*1024
 866     elif unit in ['k', 'kb']:
 867         mult = 1024
 868     elif unit in ['', 'b']:
 869         mult = 1
 870     else:
 871         raise ValueError("invalid unit %r in number %r" % (unit, s))
 872     return int(num*mult)
 873
 874
 875 def count(l):
 876     """Count the number of elements in an iterator. (consumes the iterator)"""
 877     return reduce(lambda x,y: x+1, l)
 878
 879
 880 saved_errors = []
 881 def add_error(e):
 882     """Append an error message to the list of saved errors.
 883
 884     Once processing is able to stop and output the errors, the saved errors are
 885     accessible in the module variable helpers.saved_errors.
 886     """
 887     saved_errors.append(e)
 888     log('%-70s\n' % e)
 889
 890
 891 def clear_errors():
 892     global saved_errors
 893     saved_errors = []
 894
 895
 896 def die_if_errors(msg=None, status=1):
 897     global saved_errors
 898     if saved_errors:
 899         if not msg:
 900             msg = 'warning: %d errors encountered\n' % len(saved_errors)
 901         log(msg)
 902         sys.exit(status)
 903
 904
 905 def handle_ctrl_c():
 906     """Replace the default exception handler for KeyboardInterrupt (Ctrl-C).
 907
 908     The new exception handler will make sure that bup will exit without an ugly
 909     stacktrace when Ctrl-C is hit.
 910     """
 911     oldhook = sys.excepthook
 912     def newhook(exctype, value, traceback):
 913         if exctype == KeyboardInterrupt:
 914             log('\nInterrupted.\n')
 915         else:
 916             return oldhook(exctype, value, traceback)
 917     sys.excepthook = newhook
 918
 919
 920 def columnate(l, prefix):
 921     """Format elements of 'l' in columns with 'prefix' leading each line.
 922
 923     The number of columns is determined automatically based on the string
 924     lengths.
 925     """
 926     if not l:
 927         return ""
 928     l = l[:]
 929     clen = max(len(s) for s in l)
 930     ncols = (tty_width() - len(prefix)) / (clen + 2)
 931     if ncols <= 1:
 932         ncols = 1
 933         clen = 0
 934     cols = []
 935     while len(l) % ncols:
 936         l.append('')
 937     rows = len(l)/ncols
 938     for s in range(0, len(l), rows):
 939         cols.append(l[s:s+rows])
 940     out = ''
 941     for row in zip(*cols):
 942         out += prefix + ''.join(('%-*s' % (clen+2, s)) for s in row) + '\n'
 943     return out
 944
 945
 946 def parse_date_or_fatal(str, fatal):
 947     """Parses the given date or calls Option.fatal().
 948     For now we expect a string that contains a float."""
 949     try:
 950         date = float(str)
 951     except ValueError as e:
 952         raise fatal('invalid date format (should be a float): %r' % e)
 953     else:
 954         return date
 955
 956
 957 def parse_excludes(options, fatal):
 958     """Traverse the options and extract all excludes, or call Option.fatal()."""
 959     excluded_paths = []
 960
 961     for flag in options:
 962         (option, parameter) = flag
 963         if option == '--exclude':
 964             excluded_paths.append(resolve_parent(parameter))
 965         elif option == '--exclude-from':
 966             try:
 967                 f = open(resolve_parent(parameter))
 968             except IOError as e:
 969                 raise fatal("couldn't read %s" % parameter)
 970             for exclude_path in f.readlines():
 971                 # FIXME: perhaps this should be rstrip('\n')
 972                 exclude_path = resolve_parent(exclude_path.strip())
 973                 if exclude_path:
 974                     excluded_paths.append(exclude_path)
 975     return sorted(frozenset(excluded_paths))
 976
 977
 978 def parse_rx_excludes(options, fatal):
 979     """Traverse the options and extract all rx excludes, or call
 980     Option.fatal()."""
 981     excluded_patterns = []
 982
 983     for flag in options:
 984         (option, parameter) = flag
 985         if option == '--exclude-rx':
 986             try:
 987                 excluded_patterns.append(re.compile(parameter))
 988             except re.error as ex:
 989                 fatal('invalid --exclude-rx pattern (%s): %s' % (parameter, ex))
 990         elif option == '--exclude-rx-from':
 991             try:
 992                 f = open(resolve_parent(parameter))
 993             except IOError as e:
 994                 raise fatal("couldn't read %s" % parameter)
 995             for pattern in f.readlines():
 996                 spattern = pattern.rstrip('\n')
 997                 if not spattern:
 998                     continue
 999                 try:
1000                     excluded_patterns.append(re.compile(spattern))
1001                 except re.error as ex:
1002                     fatal('invalid --exclude-rx pattern (%s): %s' % (spattern, ex))
1003     return excluded_patterns
1004
1005
1006 def should_rx_exclude_path(path, exclude_rxs):
1007     """Return True if path matches a regular expression in exclude_rxs."""
1008     for rx in exclude_rxs:
1009         if rx.search(path):
1010             debug1('Skipping %r: excluded by rx pattern %r.\n'
1011                    % (path, rx.pattern))
1012             return True
1013     return False
1014
1015
1016 # FIXME: Carefully consider the use of functions (os.path.*, etc.)
1017 # that resolve against the current filesystem in the strip/graft
1018 # functions for example, but elsewhere as well.  I suspect bup's not
1019 # always being careful about that.  For some cases, the contents of
1020 # the current filesystem should be irrelevant, and consulting it might
1021 # produce the wrong result, perhaps via unintended symlink resolution,
1022 # for example.
1023
1024 def path_components(path):
1025     """Break path into a list of pairs of the form (name,
1026     full_path_to_name).  Path must start with '/'.
1027     Example:
1028       '/home/foo' -> [('', '/'), ('home', '/home'), ('foo', '/home/foo')]"""
1029     if not path.startswith('/'):
1030         raise Exception, 'path must start with "/": %s' % path
1031     # Since we assume path startswith('/'), we can skip the first element.
1032     result = [('', '/')]
1033     norm_path = os.path.abspath(path)
1034     if norm_path == '/':
1035         return result
1036     full_path = ''
1037     for p in norm_path.split('/')[1:]:
1038         full_path += '/' + p
1039         result.append((p, full_path))
1040     return result
1041
1042
1043 def stripped_path_components(path, strip_prefixes):
1044     """Strip any prefix in strip_prefixes from path and return a list
1045     of path components where each component is (name,
1046     none_or_full_fs_path_to_name).  Assume path startswith('/').
1047     See thelpers.py for examples."""
1048     normalized_path = os.path.abspath(path)
1049     sorted_strip_prefixes = sorted(strip_prefixes, key=len, reverse=True)
1050     for bp in sorted_strip_prefixes:
1051         normalized_bp = os.path.abspath(bp)
1052         if normalized_bp == '/':
1053             continue
1054         if normalized_path.startswith(normalized_bp):
1055             prefix = normalized_path[:len(normalized_bp)]
1056             result = []
1057             for p in normalized_path[len(normalized_bp):].split('/'):
1058                 if p: # not root
1059                     prefix += '/'
1060                 prefix += p
1061                 result.append((p, prefix))
1062             return result
1063     # Nothing to strip.
1064     return path_components(path)
1065
1066
1067 def grafted_path_components(graft_points, path):
1068     # Create a result that consists of some number of faked graft
1069     # directories before the graft point, followed by all of the real
1070     # directories from path that are after the graft point.  Arrange
1071     # for the directory at the graft point in the result to correspond
1072     # to the "orig" directory in --graft orig=new.  See t/thelpers.py
1073     # for some examples.
1074
1075     # Note that given --graft orig=new, orig and new have *nothing* to
1076     # do with each other, even if some of their component names
1077     # match. i.e. --graft /foo/bar/baz=/foo/bar/bax is semantically
1078     # equivalent to --graft /foo/bar/baz=/x/y/z, or even
1079     # /foo/bar/baz=/x.
1080
1081     # FIXME: This can't be the best solution...
1082     clean_path = os.path.abspath(path)
1083     for graft_point in graft_points:
1084         old_prefix, new_prefix = graft_point
1085         # Expand prefixes iff not absolute paths.
1086         old_prefix = os.path.normpath(old_prefix)
1087         new_prefix = os.path.normpath(new_prefix)
1088         if clean_path.startswith(old_prefix):
1089             escaped_prefix = re.escape(old_prefix)
1090             grafted_path = re.sub(r'^' + escaped_prefix, new_prefix, clean_path)
1091             # Handle /foo=/ (at least) -- which produces //whatever.
1092             grafted_path = '/' + grafted_path.lstrip('/')
1093             clean_path_components = path_components(clean_path)
1094             # Count the components that were stripped.
1095             strip_count = 0 if old_prefix == '/' else old_prefix.count('/')
1096             new_prefix_parts = new_prefix.split('/')
1097             result_prefix = grafted_path.split('/')[:new_prefix.count('/')]
1098             result = [(p, None) for p in result_prefix] \
1099                 + clean_path_components[strip_count:]
1100             # Now set the graft point name to match the end of new_prefix.
1101             graft_point = len(result_prefix)
1102             result[graft_point] = \
1103                 (new_prefix_parts[-1], clean_path_components[strip_count][1])
1104             if new_prefix == '/': # --graft ...=/ is a special case.
1105                 return result[1:]
1106             return result
1107     return path_components(clean_path)
1108
1109
1110 Sha1 = hashlib.sha1
1111
1112
1113 _localtime = getattr(_helpers, 'localtime', None)
1114
1115 if _localtime:
1116     bup_time = namedtuple('bup_time', ['tm_year', 'tm_mon', 'tm_mday',
1117                                        'tm_hour', 'tm_min', 'tm_sec',
1118                                        'tm_wday', 'tm_yday',
1119                                        'tm_isdst', 'tm_gmtoff', 'tm_zone'])
1120
1121 # Define a localtime() that returns bup_time when possible.  Note:
1122 # this means that any helpers.localtime() results may need to be
1123 # passed through to_py_time() before being passed to python's time
1124 # module, which doesn't appear willing to ignore the extra items.
1125 if _localtime:
1126     def localtime(time):
1127         return bup_time(*_helpers.localtime(time))
1128     def utc_offset_str(t):
1129         """Return the local offset from UTC as "+hhmm" or "-hhmm" for time t.
1130         If the current UTC offset does not represent an integer number
1131         of minutes, the fractional component will be truncated."""
1132         off = localtime(t).tm_gmtoff
1133         # Note: // doesn't truncate like C for negative values, it rounds down.
1134         offmin = abs(off) // 60
1135         m = offmin % 60
1136         h = (offmin - m) // 60
1137         return "%+03d%02d" % (-h if off < 0 else h, m)
1138     def to_py_time(x):
1139         if isinstance(x, time.struct_time):
1140             return x
1141         return time.struct_time(x[:9])
1142 else:
1143     localtime = time.localtime
1144     def utc_offset_str(t):
1145         return time.strftime('%z', localtime(t))
1146     def to_py_time(x):
1147         return x
1148
1149
1150 _some_invalid_save_parts_rx = re.compile(r'[[ ~^:?*\\]|\.\.|//|@{')
1151
1152 def valid_save_name(name):
1153     # Enforce a superset of the restrictions in git-check-ref-format(1)
1154     if name == '@' \
1155        or name.startswith('/') or name.endswith('/') \
1156        or name.endswith('.'):
1157         return False
1158     if _some_invalid_save_parts_rx.search(name):
1159         return False
1160     for c in name:
1161         if ord(c) < 0x20 or ord(c) == 0x7f:
1162             return False
1163     for part in name.split('/'):
1164         if part.startswith('.') or part.endswith('.lock'):
1165             return False
1166     return True
1167
1168
1169 _period_rx = re.compile(r'^([0-9]+)(s|min|h|d|w|m|y)$')
1170
1171 def period_as_secs(s):
1172     if s == 'forever':
1173         return float('inf')
1174     match = _period_rx.match(s)
1175     if not match:
1176         return None
1177     mag = int(match.group(1))
1178     scale = match.group(2)
1179     return mag * {'s': 1,
1180                   'min': 60,
1181                   'h': 60 * 60,
1182                   'd': 60 * 60 * 24,
1183                   'w': 60 * 60 * 24 * 7,
1184                   'm': 60 * 60 * 24 * 31,
1185                   'y': 60 * 60 * 24 * 366}[scale]