lib/bup/helpers.py

   1 """Helper functions and classes for bup."""
   2
   3 from collections import namedtuple
   4 from contextlib import contextmanager
   5 from ctypes import sizeof, c_void_p
   6 from os import environ
   7 from pipes import quote
   8 from subprocess import PIPE, Popen
   9 import sys, os, pwd, subprocess, errno, socket, select, mmap, stat, re, struct
  10 import hashlib, heapq, math, operator, time, grp, tempfile
  11
  12 from bup import _helpers
  13 from bup import compat
  14 # This function should really be in helpers, not in bup.options.  But we
  15 # want options.py to be standalone so people can include it in other projects.
  16 from bup.options import _tty_width as tty_width
  17
  18
  19 class Nonlocal:
  20     """Helper to deal with Python scoping issues"""
  21     pass
  22
  23
  24 sc_page_size = os.sysconf('SC_PAGE_SIZE')
  25 assert(sc_page_size > 0)
  26
  27 sc_arg_max = os.sysconf('SC_ARG_MAX')
  28 if sc_arg_max == -1:  # "no definite limit" - let's choose 2M
  29     sc_arg_max = 2 * 1024 * 1024
  30
  31 def last(iterable):
  32     result = None
  33     for result in iterable:
  34         pass
  35     return result
  36
  37
  38 def atoi(s):
  39     """Convert the string 's' to an integer. Return 0 if s is not a number."""
  40     try:
  41         return int(s or '0')
  42     except ValueError:
  43         return 0
  44
  45
  46 def atof(s):
  47     """Convert the string 's' to a float. Return 0 if s is not a number."""
  48     try:
  49         return float(s or '0')
  50     except ValueError:
  51         return 0
  52
  53
  54 buglvl = atoi(os.environ.get('BUP_DEBUG', 0))
  55
  56
  57 try:
  58     _fdatasync = os.fdatasync
  59 except AttributeError:
  60     _fdatasync = os.fsync
  61
  62 if sys.platform.startswith('darwin'):
  63     # Apparently os.fsync on OS X doesn't guarantee to sync all the way down
  64     import fcntl
  65     def fdatasync(fd):
  66         try:
  67             return fcntl.fcntl(fd, fcntl.F_FULLFSYNC)
  68         except IOError as e:
  69             # Fallback for file systems (SMB) that do not support F_FULLFSYNC
  70             if e.errno == errno.ENOTSUP:
  71                 return _fdatasync(fd)
  72             else:
  73                 raise
  74 else:
  75     fdatasync = _fdatasync
  76
  77
  78 def partition(predicate, stream):
  79     """Returns (leading_matches_it, rest_it), where leading_matches_it
  80     must be completely exhausted before traversing rest_it.
  81
  82     """
  83     stream = iter(stream)
  84     ns = Nonlocal()
  85     ns.first_nonmatch = None
  86     def leading_matches():
  87         for x in stream:
  88             if predicate(x):
  89                 yield x
  90             else:
  91                 ns.first_nonmatch = (x,)
  92                 break
  93     def rest():
  94         if ns.first_nonmatch:
  95             yield ns.first_nonmatch[0]
  96             for x in stream:
  97                 yield x
  98     return (leading_matches(), rest())
  99
 100
 101 def lines_until_sentinel(f, sentinel, ex_type):
 102     # sentinel must end with \n and must contain only one \n
 103     while True:
 104         line = f.readline()
 105         if not (line and line.endswith('\n')):
 106             raise ex_type('Hit EOF while reading line')
 107         if line == sentinel:
 108             return
 109         yield line
 110
 111
 112 def stat_if_exists(path):
 113     try:
 114         return os.stat(path)
 115     except OSError as e:
 116         if e.errno != errno.ENOENT:
 117             raise
 118     return None
 119
 120
 121 # Write (blockingly) to sockets that may or may not be in blocking mode.
 122 # We need this because our stderr is sometimes eaten by subprocesses
 123 # (probably ssh) that sometimes make it nonblocking, if only temporarily,
 124 # leading to race conditions.  Ick.  We'll do it the hard way.
 125 def _hard_write(fd, buf):
 126     while buf:
 127         (r,w,x) = select.select([], [fd], [], None)
 128         if not w:
 129             raise IOError('select(fd) returned without being writable')
 130         try:
 131             sz = os.write(fd, buf)
 132         except OSError as e:
 133             if e.errno != errno.EAGAIN:
 134                 raise
 135         assert(sz >= 0)
 136         buf = buf[sz:]
 137
 138
 139 _last_prog = 0
 140 def log(s):
 141     """Print a log message to stderr."""
 142     global _last_prog
 143     sys.stdout.flush()
 144     _hard_write(sys.stderr.fileno(), s)
 145     _last_prog = 0
 146
 147
 148 def debug1(s):
 149     if buglvl >= 1:
 150         log(s)
 151
 152
 153 def debug2(s):
 154     if buglvl >= 2:
 155         log(s)
 156
 157
 158 istty1 = os.isatty(1) or (atoi(os.environ.get('BUP_FORCE_TTY')) & 1)
 159 istty2 = os.isatty(2) or (atoi(os.environ.get('BUP_FORCE_TTY')) & 2)
 160 _last_progress = ''
 161 def progress(s):
 162     """Calls log() if stderr is a TTY.  Does nothing otherwise."""
 163     global _last_progress
 164     if istty2:
 165         log(s)
 166         _last_progress = s
 167
 168
 169 def qprogress(s):
 170     """Calls progress() only if we haven't printed progress in a while.
 171
 172     This avoids overloading the stderr buffer with excess junk.
 173     """
 174     global _last_prog
 175     now = time.time()
 176     if now - _last_prog > 0.1:
 177         progress(s)
 178         _last_prog = now
 179
 180
 181 def reprogress():
 182     """Calls progress() to redisplay the most recent progress message.
 183
 184     Useful after you've printed some other message that wipes out the
 185     progress line.
 186     """
 187     if _last_progress and _last_progress.endswith('\r'):
 188         progress(_last_progress)
 189
 190
 191 def mkdirp(d, mode=None):
 192     """Recursively create directories on path 'd'.
 193
 194     Unlike os.makedirs(), it doesn't raise an exception if the last element of
 195     the path already exists.
 196     """
 197     try:
 198         if mode:
 199             os.makedirs(d, mode)
 200         else:
 201             os.makedirs(d)
 202     except OSError as e:
 203         if e.errno == errno.EEXIST:
 204             pass
 205         else:
 206             raise
 207
 208
 209 def merge_iter(iters, pfreq, pfunc, pfinal, key=None):
 210     if key:
 211         samekey = lambda e, pe: getattr(e, key) == getattr(pe, key, None)
 212     else:
 213         samekey = operator.eq
 214     count = 0
 215     total = sum(len(it) for it in iters)
 216     iters = (iter(it) for it in iters)
 217     heap = ((next(it, None),it) for it in iters)
 218     heap = [(e,it) for e,it in heap if e]
 219
 220     heapq.heapify(heap)
 221     pe = None
 222     while heap:
 223         if not count % pfreq:
 224             pfunc(count, total)
 225         e, it = heap[0]
 226         if not samekey(e, pe):
 227             pe = e
 228             yield e
 229         count += 1
 230         try:
 231             e = next(it)
 232         except StopIteration:
 233             heapq.heappop(heap) # remove current
 234         else:
 235             heapq.heapreplace(heap, (e, it)) # shift current to new location
 236     pfinal(count, total)
 237
 238
 239 def unlink(f):
 240     """Delete a file at path 'f' if it currently exists.
 241
 242     Unlike os.unlink(), does not throw an exception if the file didn't already
 243     exist.
 244     """
 245     try:
 246         os.unlink(f)
 247     except OSError as e:
 248         if e.errno != errno.ENOENT:
 249             raise
 250
 251
 252 def shstr(cmd):
 253     if isinstance(cmd, compat.str_type):
 254         return cmd
 255     else:
 256         return ' '.join(map(quote, cmd))
 257
 258 exc = subprocess.check_call
 259
 260 def exo(cmd,
 261         input=None,
 262         stdin=None,
 263         stderr=None,
 264         shell=False,
 265         check=True,
 266         preexec_fn=None):
 267     if input:
 268         assert stdin in (None, PIPE)
 269         stdin = PIPE
 270     p = Popen(cmd,
 271               stdin=stdin, stdout=PIPE, stderr=stderr,
 272               shell=shell,
 273               preexec_fn=preexec_fn)
 274     out, err = p.communicate(input)
 275     if check and p.returncode != 0:
 276         raise Exception('subprocess %r failed with status %d, stderr: %r'
 277                         % (' '.join(map(quote, cmd)), p.returncode, err))
 278     return out, err, p
 279
 280 def readpipe(argv, preexec_fn=None, shell=False):
 281     """Run a subprocess and return its output."""
 282     p = subprocess.Popen(argv, stdout=subprocess.PIPE, preexec_fn=preexec_fn,
 283                          shell=shell)
 284     out, err = p.communicate()
 285     if p.returncode != 0:
 286         raise Exception('subprocess %r failed with status %d'
 287                         % (' '.join(argv), p.returncode))
 288     return out
 289
 290
 291 def _argmax_base(command):
 292     base_size = 2048
 293     for c in command:
 294         base_size += len(command) + 1
 295     for k, v in compat.items(environ):
 296         base_size += len(k) + len(v) + 2 + sizeof(c_void_p)
 297     return base_size
 298
 299
 300 def _argmax_args_size(args):
 301     return sum(len(x) + 1 + sizeof(c_void_p) for x in args)
 302
 303
 304 def batchpipe(command, args, preexec_fn=None, arg_max=sc_arg_max):
 305     """If args is not empty, yield the output produced by calling the
 306 command list with args as a sequence of strings (It may be necessary
 307 to return multiple strings in order to respect ARG_MAX)."""
 308     # The optional arg_max arg is a workaround for an issue with the
 309     # current wvtest behavior.
 310     base_size = _argmax_base(command)
 311     while args:
 312         room = arg_max - base_size
 313         i = 0
 314         while i < len(args):
 315             next_size = _argmax_args_size(args[i:i+1])
 316             if room - next_size < 0:
 317                 break
 318             room -= next_size
 319             i += 1
 320         sub_args = args[:i]
 321         args = args[i:]
 322         assert(len(sub_args))
 323         yield readpipe(command + sub_args, preexec_fn=preexec_fn)
 324
 325
 326 def resolve_parent(p):
 327     """Return the absolute path of a file without following any final symlink.
 328
 329     Behaves like os.path.realpath, but doesn't follow a symlink for the last
 330     element. (ie. if 'p' itself is a symlink, this one won't follow it, but it
 331     will follow symlinks in p's directory)
 332     """
 333     try:
 334         st = os.lstat(p)
 335     except OSError:
 336         st = None
 337     if st and stat.S_ISLNK(st.st_mode):
 338         (dir, name) = os.path.split(p)
 339         dir = os.path.realpath(dir)
 340         out = os.path.join(dir, name)
 341     else:
 342         out = os.path.realpath(p)
 343     #log('realpathing:%r,%r\n' % (p, out))
 344     return out
 345
 346
 347 def detect_fakeroot():
 348     "Return True if we appear to be running under fakeroot."
 349     return os.getenv("FAKEROOTKEY") != None
 350
 351
 352 _warned_about_superuser_detection = None
 353 def is_superuser():
 354     if sys.platform.startswith('cygwin'):
 355         if sys.getwindowsversion()[0] > 5:
 356             # Sounds like situation is much more complicated here
 357             global _warned_about_superuser_detection
 358             if not _warned_about_superuser_detection:
 359                 log("can't detect root status for OS version > 5; assuming not root")
 360                 _warned_about_superuser_detection = True
 361             return False
 362         import ctypes
 363         return ctypes.cdll.shell32.IsUserAnAdmin()
 364     else:
 365         return os.geteuid() == 0
 366
 367
 368 def _cache_key_value(get_value, key, cache):
 369     """Return (value, was_cached).  If there is a value in the cache
 370     for key, use that, otherwise, call get_value(key) which should
 371     throw a KeyError if there is no value -- in which case the cached
 372     and returned value will be None.
 373     """
 374     try: # Do we already have it (or know there wasn't one)?
 375         value = cache[key]
 376         return value, True
 377     except KeyError:
 378         pass
 379     value = None
 380     try:
 381         cache[key] = value = get_value(key)
 382     except KeyError:
 383         cache[key] = None
 384     return value, False
 385
 386
 387 _uid_to_pwd_cache = {}
 388 _name_to_pwd_cache = {}
 389
 390 def pwd_from_uid(uid):
 391     """Return password database entry for uid (may be a cached value).
 392     Return None if no entry is found.
 393     """
 394     global _uid_to_pwd_cache, _name_to_pwd_cache
 395     entry, cached = _cache_key_value(pwd.getpwuid, uid, _uid_to_pwd_cache)
 396     if entry and not cached:
 397         _name_to_pwd_cache[entry.pw_name] = entry
 398     return entry
 399
 400
 401 def pwd_from_name(name):
 402     """Return password database entry for name (may be a cached value).
 403     Return None if no entry is found.
 404     """
 405     global _uid_to_pwd_cache, _name_to_pwd_cache
 406     entry, cached = _cache_key_value(pwd.getpwnam, name, _name_to_pwd_cache)
 407     if entry and not cached:
 408         _uid_to_pwd_cache[entry.pw_uid] = entry
 409     return entry
 410
 411
 412 _gid_to_grp_cache = {}
 413 _name_to_grp_cache = {}
 414
 415 def grp_from_gid(gid):
 416     """Return password database entry for gid (may be a cached value).
 417     Return None if no entry is found.
 418     """
 419     global _gid_to_grp_cache, _name_to_grp_cache
 420     entry, cached = _cache_key_value(grp.getgrgid, gid, _gid_to_grp_cache)
 421     if entry and not cached:
 422         _name_to_grp_cache[entry.gr_name] = entry
 423     return entry
 424
 425
 426 def grp_from_name(name):
 427     """Return password database entry for name (may be a cached value).
 428     Return None if no entry is found.
 429     """
 430     global _gid_to_grp_cache, _name_to_grp_cache
 431     entry, cached = _cache_key_value(grp.getgrnam, name, _name_to_grp_cache)
 432     if entry and not cached:
 433         _gid_to_grp_cache[entry.gr_gid] = entry
 434     return entry
 435
 436
 437 _username = None
 438 def username():
 439     """Get the user's login name."""
 440     global _username
 441     if not _username:
 442         uid = os.getuid()
 443         _username = pwd_from_uid(uid)[0] or 'user%d' % uid
 444     return _username
 445
 446
 447 _userfullname = None
 448 def userfullname():
 449     """Get the user's full name."""
 450     global _userfullname
 451     if not _userfullname:
 452         uid = os.getuid()
 453         entry = pwd_from_uid(uid)
 454         if entry:
 455             _userfullname = entry[4].split(',')[0] or entry[0]
 456         if not _userfullname:
 457             _userfullname = 'user%d' % uid
 458     return _userfullname
 459
 460
 461 _hostname = None
 462 def hostname():
 463     """Get the FQDN of this machine."""
 464     global _hostname
 465     if not _hostname:
 466         _hostname = socket.getfqdn()
 467     return _hostname
 468
 469
 470 _resource_path = None
 471 def resource_path(subdir=''):
 472     global _resource_path
 473     if not _resource_path:
 474         _resource_path = os.environ.get('BUP_RESOURCE_PATH') or '.'
 475     return os.path.join(_resource_path, subdir)
 476
 477 def format_filesize(size):
 478     unit = 1024.0
 479     size = float(size)
 480     if size < unit:
 481         return "%d" % (size)
 482     exponent = int(math.log(size) / math.log(unit))
 483     size_prefix = "KMGTPE"[exponent - 1]
 484     return "%.1f%s" % (size / math.pow(unit, exponent), size_prefix)
 485
 486
 487 class NotOk(Exception):
 488     pass
 489
 490
 491 class BaseConn:
 492     def __init__(self, outp):
 493         self.outp = outp
 494
 495     def close(self):
 496         while self._read(65536): pass
 497
 498     def read(self, size):
 499         """Read 'size' bytes from input stream."""
 500         self.outp.flush()
 501         return self._read(size)
 502
 503     def readline(self):
 504         """Read from input stream until a newline is found."""
 505         self.outp.flush()
 506         return self._readline()
 507
 508     def write(self, data):
 509         """Write 'data' to output stream."""
 510         #log('%d writing: %d bytes\n' % (os.getpid(), len(data)))
 511         self.outp.write(data)
 512
 513     def has_input(self):
 514         """Return true if input stream is readable."""
 515         raise NotImplemented("Subclasses must implement has_input")
 516
 517     def ok(self):
 518         """Indicate end of output from last sent command."""
 519         self.write('\nok\n')
 520
 521     def error(self, s):
 522         """Indicate server error to the client."""
 523         s = re.sub(r'\s+', ' ', str(s))
 524         self.write('\nerror %s\n' % s)
 525
 526     def _check_ok(self, onempty):
 527         self.outp.flush()
 528         rl = ''
 529         for rl in linereader(self):
 530             #log('%d got line: %r\n' % (os.getpid(), rl))
 531             if not rl:  # empty line
 532                 continue
 533             elif rl == 'ok':
 534                 return None
 535             elif rl.startswith('error '):
 536                 #log('client: error: %s\n' % rl[6:])
 537                 return NotOk(rl[6:])
 538             else:
 539                 onempty(rl)
 540         raise Exception('server exited unexpectedly; see errors above')
 541
 542     def drain_and_check_ok(self):
 543         """Remove all data for the current command from input stream."""
 544         def onempty(rl):
 545             pass
 546         return self._check_ok(onempty)
 547
 548     def check_ok(self):
 549         """Verify that server action completed successfully."""
 550         def onempty(rl):
 551             raise Exception('expected "ok", got %r' % rl)
 552         return self._check_ok(onempty)
 553
 554
 555 class Conn(BaseConn):
 556     def __init__(self, inp, outp):
 557         BaseConn.__init__(self, outp)
 558         self.inp = inp
 559
 560     def _read(self, size):
 561         return self.inp.read(size)
 562
 563     def _readline(self):
 564         return self.inp.readline()
 565
 566     def has_input(self):
 567         [rl, wl, xl] = select.select([self.inp.fileno()], [], [], 0)
 568         if rl:
 569             assert(rl[0] == self.inp.fileno())
 570             return True
 571         else:
 572             return None
 573
 574
 575 def checked_reader(fd, n):
 576     while n > 0:
 577         rl, _, _ = select.select([fd], [], [])
 578         assert(rl[0] == fd)
 579         buf = os.read(fd, n)
 580         if not buf: raise Exception("Unexpected EOF reading %d more bytes" % n)
 581         yield buf
 582         n -= len(buf)
 583
 584
 585 MAX_PACKET = 128 * 1024
 586 def mux(p, outfd, outr, errr):
 587     try:
 588         fds = [outr, errr]
 589         while p.poll() is None:
 590             rl, _, _ = select.select(fds, [], [])
 591             for fd in rl:
 592                 if fd == outr:
 593                     buf = os.read(outr, MAX_PACKET)
 594                     if not buf: break
 595                     os.write(outfd, struct.pack('!IB', len(buf), 1) + buf)
 596                 elif fd == errr:
 597                     buf = os.read(errr, 1024)
 598                     if not buf: break
 599                     os.write(outfd, struct.pack('!IB', len(buf), 2) + buf)
 600     finally:
 601         os.write(outfd, struct.pack('!IB', 0, 3))
 602
 603
 604 class DemuxConn(BaseConn):
 605     """A helper class for bup's client-server protocol."""
 606     def __init__(self, infd, outp):
 607         BaseConn.__init__(self, outp)
 608         # Anything that comes through before the sync string was not
 609         # multiplexed and can be assumed to be debug/log before mux init.
 610         tail = ''
 611         while tail != 'BUPMUX':
 612             b = os.read(infd, (len(tail) < 6) and (6-len(tail)) or 1)
 613             if not b:
 614                 raise IOError('demux: unexpected EOF during initialization')
 615             tail += b
 616             sys.stderr.write(tail[:-6])  # pre-mux log messages
 617             tail = tail[-6:]
 618         self.infd = infd
 619         self.reader = None
 620         self.buf = None
 621         self.closed = False
 622
 623     def write(self, data):
 624         self._load_buf(0)
 625         BaseConn.write(self, data)
 626
 627     def _next_packet(self, timeout):
 628         if self.closed: return False
 629         rl, wl, xl = select.select([self.infd], [], [], timeout)
 630         if not rl: return False
 631         assert(rl[0] == self.infd)
 632         ns = ''.join(checked_reader(self.infd, 5))
 633         n, fdw = struct.unpack('!IB', ns)
 634         assert(n <= MAX_PACKET)
 635         if fdw == 1:
 636             self.reader = checked_reader(self.infd, n)
 637         elif fdw == 2:
 638             for buf in checked_reader(self.infd, n):
 639                 sys.stderr.write(buf)
 640         elif fdw == 3:
 641             self.closed = True
 642             debug2("DemuxConn: marked closed\n")
 643         return True
 644
 645     def _load_buf(self, timeout):
 646         if self.buf is not None:
 647             return True
 648         while not self.closed:
 649             while not self.reader:
 650                 if not self._next_packet(timeout):
 651                     return False
 652             try:
 653                 self.buf = next(self.reader)
 654                 return True
 655             except StopIteration:
 656                 self.reader = None
 657         return False
 658
 659     def _read_parts(self, ix_fn):
 660         while self._load_buf(None):
 661             assert(self.buf is not None)
 662             i = ix_fn(self.buf)
 663             if i is None or i == len(self.buf):
 664                 yv = self.buf
 665                 self.buf = None
 666             else:
 667                 yv = self.buf[:i]
 668                 self.buf = self.buf[i:]
 669             yield yv
 670             if i is not None:
 671                 break
 672
 673     def _readline(self):
 674         def find_eol(buf):
 675             try:
 676                 return buf.index('\n')+1
 677             except ValueError:
 678                 return None
 679         return ''.join(self._read_parts(find_eol))
 680
 681     def _read(self, size):
 682         csize = [size]
 683         def until_size(buf): # Closes on csize
 684             if len(buf) < csize[0]:
 685                 csize[0] -= len(buf)
 686                 return None
 687             else:
 688                 return csize[0]
 689         return ''.join(self._read_parts(until_size))
 690
 691     def has_input(self):
 692         return self._load_buf(0)
 693
 694
 695 def linereader(f):
 696     """Generate a list of input lines from 'f' without terminating newlines."""
 697     while 1:
 698         line = f.readline()
 699         if not line:
 700             break
 701         yield line[:-1]
 702
 703
 704 def chunkyreader(f, count = None):
 705     """Generate a list of chunks of data read from 'f'.
 706
 707     If count is None, read until EOF is reached.
 708
 709     If count is a positive integer, read 'count' bytes from 'f'. If EOF is
 710     reached while reading, raise IOError.
 711     """
 712     if count != None:
 713         while count > 0:
 714             b = f.read(min(count, 65536))
 715             if not b:
 716                 raise IOError('EOF with %d bytes remaining' % count)
 717             yield b
 718             count -= len(b)
 719     else:
 720         while 1:
 721             b = f.read(65536)
 722             if not b: break
 723             yield b
 724
 725
 726 @contextmanager
 727 def atomically_replaced_file(name, mode='w', buffering=-1):
 728     """Yield a file that will be atomically renamed name when leaving the block.
 729
 730     This contextmanager yields an open file object that is backed by a
 731     temporary file which will be renamed (atomically) to the target
 732     name if everything succeeds.
 733
 734     The mode and buffering arguments are handled exactly as with open,
 735     and the yielded file will have very restrictive permissions, as
 736     per mkstemp.
 737
 738     E.g.::
 739
 740         with atomically_replaced_file('foo.txt', 'w') as f:
 741             f.write('hello jack.')
 742
 743     """
 744
 745     (ffd, tempname) = tempfile.mkstemp(dir=os.path.dirname(name),
 746                                        text=('b' not in mode))
 747     try:
 748         try:
 749             f = os.fdopen(ffd, mode, buffering)
 750         except:
 751             os.close(ffd)
 752             raise
 753         try:
 754             yield f
 755         finally:
 756             f.close()
 757         os.rename(tempname, name)
 758     finally:
 759         unlink(tempname)  # nonexistant file is ignored
 760
 761
 762 def slashappend(s):
 763     """Append "/" to 's' if it doesn't aleady end in "/"."""
 764     if s and not s.endswith('/'):
 765         return s + '/'
 766     else:
 767         return s
 768
 769
 770 def _mmap_do(f, sz, flags, prot, close):
 771     if not sz:
 772         st = os.fstat(f.fileno())
 773         sz = st.st_size
 774     if not sz:
 775         # trying to open a zero-length map gives an error, but an empty
 776         # string has all the same behaviour of a zero-length map, ie. it has
 777         # no elements :)
 778         return ''
 779     map = mmap.mmap(f.fileno(), sz, flags, prot)
 780     if close:
 781         f.close()  # map will persist beyond file close
 782     return map
 783
 784
 785 def mmap_read(f, sz = 0, close=True):
 786     """Create a read-only memory mapped region on file 'f'.
 787     If sz is 0, the region will cover the entire file.
 788     """
 789     return _mmap_do(f, sz, mmap.MAP_PRIVATE, mmap.PROT_READ, close)
 790
 791
 792 def mmap_readwrite(f, sz = 0, close=True):
 793     """Create a read-write memory mapped region on file 'f'.
 794     If sz is 0, the region will cover the entire file.
 795     """
 796     return _mmap_do(f, sz, mmap.MAP_SHARED, mmap.PROT_READ|mmap.PROT_WRITE,
 797                     close)
 798
 799
 800 def mmap_readwrite_private(f, sz = 0, close=True):
 801     """Create a read-write memory mapped region on file 'f'.
 802     If sz is 0, the region will cover the entire file.
 803     The map is private, which means the changes are never flushed back to the
 804     file.
 805     """
 806     return _mmap_do(f, sz, mmap.MAP_PRIVATE, mmap.PROT_READ|mmap.PROT_WRITE,
 807                     close)
 808
 809
 810 _mincore = getattr(_helpers, 'mincore', None)
 811 if _mincore:
 812     # ./configure ensures that we're on Linux if MINCORE_INCORE isn't defined.
 813     MINCORE_INCORE = getattr(_helpers, 'MINCORE_INCORE', 1)
 814
 815     _fmincore_chunk_size = None
 816     def _set_fmincore_chunk_size():
 817         global _fmincore_chunk_size
 818         pref_chunk_size = 64 * 1024 * 1024
 819         chunk_size = sc_page_size
 820         if (sc_page_size < pref_chunk_size):
 821             chunk_size = sc_page_size * (pref_chunk_size / sc_page_size)
 822         _fmincore_chunk_size = chunk_size
 823
 824     def fmincore(fd):
 825         """Return the mincore() data for fd as a bytearray whose values can be
 826         tested via MINCORE_INCORE, or None if fd does not fully
 827         support the operation."""
 828         st = os.fstat(fd)
 829         if (st.st_size == 0):
 830             return bytearray(0)
 831         if not _fmincore_chunk_size:
 832             _set_fmincore_chunk_size()
 833         pages_per_chunk = _fmincore_chunk_size / sc_page_size;
 834         page_count = (st.st_size + sc_page_size - 1) / sc_page_size;
 835         chunk_count = page_count / _fmincore_chunk_size
 836         if chunk_count < 1:
 837             chunk_count = 1
 838         result = bytearray(page_count)
 839         for ci in xrange(chunk_count):
 840             pos = _fmincore_chunk_size * ci;
 841             msize = min(_fmincore_chunk_size, st.st_size - pos)
 842             try:
 843                 m = mmap.mmap(fd, msize, mmap.MAP_PRIVATE, 0, 0, pos)
 844             except mmap.error as ex:
 845                 if ex.errno == errno.EINVAL or ex.errno == errno.ENODEV:
 846                     # Perhaps the file was a pipe, i.e. "... | bup split ..."
 847                     return None
 848                 raise ex
 849             try:
 850                 _mincore(m, msize, 0, result, ci * pages_per_chunk)
 851             except OSError as ex:
 852                 if ex.errno == errno.ENOSYS:
 853                     return None
 854                 raise
 855         return result
 856
 857
 858 def parse_timestamp(epoch_str):
 859     """Return the number of nanoseconds since the epoch that are described
 860 by epoch_str (100ms, 100ns, ...); when epoch_str cannot be parsed,
 861 throw a ValueError that may contain additional information."""
 862     ns_per = {'s' :  1000000000,
 863               'ms' : 1000000,
 864               'us' : 1000,
 865               'ns' : 1}
 866     match = re.match(r'^((?:[-+]?[0-9]+)?)(s|ms|us|ns)$', epoch_str)
 867     if not match:
 868         if re.match(r'^([-+]?[0-9]+)$', epoch_str):
 869             raise ValueError('must include units, i.e. 100ns, 100ms, ...')
 870         raise ValueError()
 871     (n, units) = match.group(1, 2)
 872     if not n:
 873         n = 1
 874     n = int(n)
 875     return n * ns_per[units]
 876
 877
 878 def parse_num(s):
 879     """Parse data size information into a float number.
 880
 881     Here are some examples of conversions:
 882         199.2k means 203981 bytes
 883         1GB means 1073741824 bytes
 884         2.1 tb means 2199023255552 bytes
 885     """
 886     g = re.match(r'([-+\d.e]+)\s*(\w*)', str(s))
 887     if not g:
 888         raise ValueError("can't parse %r as a number" % s)
 889     (val, unit) = g.groups()
 890     num = float(val)
 891     unit = unit.lower()
 892     if unit in ['t', 'tb']:
 893         mult = 1024*1024*1024*1024
 894     elif unit in ['g', 'gb']:
 895         mult = 1024*1024*1024
 896     elif unit in ['m', 'mb']:
 897         mult = 1024*1024
 898     elif unit in ['k', 'kb']:
 899         mult = 1024
 900     elif unit in ['', 'b']:
 901         mult = 1
 902     else:
 903         raise ValueError("invalid unit %r in number %r" % (unit, s))
 904     return int(num*mult)
 905
 906
 907 def count(l):
 908     """Count the number of elements in an iterator. (consumes the iterator)"""
 909     return reduce(lambda x,y: x+1, l)
 910
 911
 912 saved_errors = []
 913 def add_error(e):
 914     """Append an error message to the list of saved errors.
 915
 916     Once processing is able to stop and output the errors, the saved errors are
 917     accessible in the module variable helpers.saved_errors.
 918     """
 919     saved_errors.append(e)
 920     log('%-70s\n' % e)
 921
 922
 923 def clear_errors():
 924     global saved_errors
 925     saved_errors = []
 926
 927
 928 def die_if_errors(msg=None, status=1):
 929     global saved_errors
 930     if saved_errors:
 931         if not msg:
 932             msg = 'warning: %d errors encountered\n' % len(saved_errors)
 933         log(msg)
 934         sys.exit(status)
 935
 936
 937 def handle_ctrl_c():
 938     """Replace the default exception handler for KeyboardInterrupt (Ctrl-C).
 939
 940     The new exception handler will make sure that bup will exit without an ugly
 941     stacktrace when Ctrl-C is hit.
 942     """
 943     oldhook = sys.excepthook
 944     def newhook(exctype, value, traceback):
 945         if exctype == KeyboardInterrupt:
 946             log('\nInterrupted.\n')
 947         else:
 948             return oldhook(exctype, value, traceback)
 949     sys.excepthook = newhook
 950
 951
 952 def columnate(l, prefix):
 953     """Format elements of 'l' in columns with 'prefix' leading each line.
 954
 955     The number of columns is determined automatically based on the string
 956     lengths.
 957     """
 958     if not l:
 959         return ""
 960     l = l[:]
 961     clen = max(len(s) for s in l)
 962     ncols = (tty_width() - len(prefix)) // (clen + 2)
 963     if ncols <= 1:
 964         ncols = 1
 965         clen = 0
 966     cols = []
 967     while len(l) % ncols:
 968         l.append('')
 969     rows = len(l) // ncols
 970     for s in range(0, len(l), rows):
 971         cols.append(l[s:s+rows])
 972     out = ''
 973     for row in zip(*cols):
 974         out += prefix + ''.join(('%-*s' % (clen+2, s)) for s in row) + '\n'
 975     return out
 976
 977
 978 def parse_date_or_fatal(str, fatal):
 979     """Parses the given date or calls Option.fatal().
 980     For now we expect a string that contains a float."""
 981     try:
 982         date = float(str)
 983     except ValueError as e:
 984         raise fatal('invalid date format (should be a float): %r' % e)
 985     else:
 986         return date
 987
 988
 989 def parse_excludes(options, fatal):
 990     """Traverse the options and extract all excludes, or call Option.fatal()."""
 991     excluded_paths = []
 992
 993     for flag in options:
 994         (option, parameter) = flag
 995         if option == '--exclude':
 996             excluded_paths.append(resolve_parent(parameter))
 997         elif option == '--exclude-from':
 998             try:
 999                 f = open(resolve_parent(parameter))
1000             except IOError as e:
1001                 raise fatal("couldn't read %s" % parameter)
1002             for exclude_path in f.readlines():
1003                 # FIXME: perhaps this should be rstrip('\n')
1004                 exclude_path = resolve_parent(exclude_path.strip())
1005                 if exclude_path:
1006                     excluded_paths.append(exclude_path)
1007     return sorted(frozenset(excluded_paths))
1008
1009
1010 def parse_rx_excludes(options, fatal):
1011     """Traverse the options and extract all rx excludes, or call
1012     Option.fatal()."""
1013     excluded_patterns = []
1014
1015     for flag in options:
1016         (option, parameter) = flag
1017         if option == '--exclude-rx':
1018             try:
1019                 excluded_patterns.append(re.compile(parameter))
1020             except re.error as ex:
1021                 fatal('invalid --exclude-rx pattern (%s): %s' % (parameter, ex))
1022         elif option == '--exclude-rx-from':
1023             try:
1024                 f = open(resolve_parent(parameter))
1025             except IOError as e:
1026                 raise fatal("couldn't read %s" % parameter)
1027             for pattern in f.readlines():
1028                 spattern = pattern.rstrip('\n')
1029                 if not spattern:
1030                     continue
1031                 try:
1032                     excluded_patterns.append(re.compile(spattern))
1033                 except re.error as ex:
1034                     fatal('invalid --exclude-rx pattern (%s): %s' % (spattern, ex))
1035     return excluded_patterns
1036
1037
1038 def should_rx_exclude_path(path, exclude_rxs):
1039     """Return True if path matches a regular expression in exclude_rxs."""
1040     for rx in exclude_rxs:
1041         if rx.search(path):
1042             debug1('Skipping %r: excluded by rx pattern %r.\n'
1043                    % (path, rx.pattern))
1044             return True
1045     return False
1046
1047
1048 # FIXME: Carefully consider the use of functions (os.path.*, etc.)
1049 # that resolve against the current filesystem in the strip/graft
1050 # functions for example, but elsewhere as well.  I suspect bup's not
1051 # always being careful about that.  For some cases, the contents of
1052 # the current filesystem should be irrelevant, and consulting it might
1053 # produce the wrong result, perhaps via unintended symlink resolution,
1054 # for example.
1055
1056 def path_components(path):
1057     """Break path into a list of pairs of the form (name,
1058     full_path_to_name).  Path must start with '/'.
1059     Example:
1060       '/home/foo' -> [('', '/'), ('home', '/home'), ('foo', '/home/foo')]"""
1061     if not path.startswith('/'):
1062         raise Exception('path must start with "/": %s' % path)
1063     # Since we assume path startswith('/'), we can skip the first element.
1064     result = [('', '/')]
1065     norm_path = os.path.abspath(path)
1066     if norm_path == '/':
1067         return result
1068     full_path = ''
1069     for p in norm_path.split('/')[1:]:
1070         full_path += '/' + p
1071         result.append((p, full_path))
1072     return result
1073
1074
1075 def stripped_path_components(path, strip_prefixes):
1076     """Strip any prefix in strip_prefixes from path and return a list
1077     of path components where each component is (name,
1078     none_or_full_fs_path_to_name).  Assume path startswith('/').
1079     See thelpers.py for examples."""
1080     normalized_path = os.path.abspath(path)
1081     sorted_strip_prefixes = sorted(strip_prefixes, key=len, reverse=True)
1082     for bp in sorted_strip_prefixes:
1083         normalized_bp = os.path.abspath(bp)
1084         if normalized_bp == '/':
1085             continue
1086         if normalized_path.startswith(normalized_bp):
1087             prefix = normalized_path[:len(normalized_bp)]
1088             result = []
1089             for p in normalized_path[len(normalized_bp):].split('/'):
1090                 if p: # not root
1091                     prefix += '/'
1092                 prefix += p
1093                 result.append((p, prefix))
1094             return result
1095     # Nothing to strip.
1096     return path_components(path)
1097
1098
1099 def grafted_path_components(graft_points, path):
1100     # Create a result that consists of some number of faked graft
1101     # directories before the graft point, followed by all of the real
1102     # directories from path that are after the graft point.  Arrange
1103     # for the directory at the graft point in the result to correspond
1104     # to the "orig" directory in --graft orig=new.  See t/thelpers.py
1105     # for some examples.
1106
1107     # Note that given --graft orig=new, orig and new have *nothing* to
1108     # do with each other, even if some of their component names
1109     # match. i.e. --graft /foo/bar/baz=/foo/bar/bax is semantically
1110     # equivalent to --graft /foo/bar/baz=/x/y/z, or even
1111     # /foo/bar/baz=/x.
1112
1113     # FIXME: This can't be the best solution...
1114     clean_path = os.path.abspath(path)
1115     for graft_point in graft_points:
1116         old_prefix, new_prefix = graft_point
1117         # Expand prefixes iff not absolute paths.
1118         old_prefix = os.path.normpath(old_prefix)
1119         new_prefix = os.path.normpath(new_prefix)
1120         if clean_path.startswith(old_prefix):
1121             escaped_prefix = re.escape(old_prefix)
1122             grafted_path = re.sub(r'^' + escaped_prefix, new_prefix, clean_path)
1123             # Handle /foo=/ (at least) -- which produces //whatever.
1124             grafted_path = '/' + grafted_path.lstrip('/')
1125             clean_path_components = path_components(clean_path)
1126             # Count the components that were stripped.
1127             strip_count = 0 if old_prefix == '/' else old_prefix.count('/')
1128             new_prefix_parts = new_prefix.split('/')
1129             result_prefix = grafted_path.split('/')[:new_prefix.count('/')]
1130             result = [(p, None) for p in result_prefix] \
1131                 + clean_path_components[strip_count:]
1132             # Now set the graft point name to match the end of new_prefix.
1133             graft_point = len(result_prefix)
1134             result[graft_point] = \
1135                 (new_prefix_parts[-1], clean_path_components[strip_count][1])
1136             if new_prefix == '/': # --graft ...=/ is a special case.
1137                 return result[1:]
1138             return result
1139     return path_components(clean_path)
1140
1141
1142 Sha1 = hashlib.sha1
1143
1144
1145 _localtime = getattr(_helpers, 'localtime', None)
1146
1147 if _localtime:
1148     bup_time = namedtuple('bup_time', ['tm_year', 'tm_mon', 'tm_mday',
1149                                        'tm_hour', 'tm_min', 'tm_sec',
1150                                        'tm_wday', 'tm_yday',
1151                                        'tm_isdst', 'tm_gmtoff', 'tm_zone'])
1152
1153 # Define a localtime() that returns bup_time when possible.  Note:
1154 # this means that any helpers.localtime() results may need to be
1155 # passed through to_py_time() before being passed to python's time
1156 # module, which doesn't appear willing to ignore the extra items.
1157 if _localtime:
1158     def localtime(time):
1159         return bup_time(*_helpers.localtime(time))
1160     def utc_offset_str(t):
1161         """Return the local offset from UTC as "+hhmm" or "-hhmm" for time t.
1162         If the current UTC offset does not represent an integer number
1163         of minutes, the fractional component will be truncated."""
1164         off = localtime(t).tm_gmtoff
1165         # Note: // doesn't truncate like C for negative values, it rounds down.
1166         offmin = abs(off) // 60
1167         m = offmin % 60
1168         h = (offmin - m) // 60
1169         return "%+03d%02d" % (-h if off < 0 else h, m)
1170     def to_py_time(x):
1171         if isinstance(x, time.struct_time):
1172             return x
1173         return time.struct_time(x[:9])
1174 else:
1175     localtime = time.localtime
1176     def utc_offset_str(t):
1177         return time.strftime('%z', localtime(t))
1178     def to_py_time(x):
1179         return x
1180
1181
1182 _some_invalid_save_parts_rx = re.compile(r'[[ ~^:?*\\]|\.\.|//|@{')
1183
1184 def valid_save_name(name):
1185     # Enforce a superset of the restrictions in git-check-ref-format(1)
1186     if name == '@' \
1187        or name.startswith('/') or name.endswith('/') \
1188        or name.endswith('.'):
1189         return False
1190     if _some_invalid_save_parts_rx.search(name):
1191         return False
1192     for c in name:
1193         if ord(c) < 0x20 or ord(c) == 0x7f:
1194             return False
1195     for part in name.split('/'):
1196         if part.startswith('.') or part.endswith('.lock'):
1197             return False
1198     return True
1199
1200
1201 _period_rx = re.compile(r'^([0-9]+)(s|min|h|d|w|m|y)$')
1202
1203 def period_as_secs(s):
1204     if s == 'forever':
1205         return float('inf')
1206     match = _period_rx.match(s)
1207     if not match:
1208         return None
1209     mag = int(match.group(1))
1210     scale = match.group(2)
1211     return mag * {'s': 1,
1212                   'min': 60,
1213                   'h': 60 * 60,
1214                   'd': 60 * 60 * 24,
1215                   'w': 60 * 60 * 24 * 7,
1216                   'm': 60 * 60 * 24 * 31,
1217                   'y': 60 * 60 * 24 * 366}[scale]