lib/bup/helpers.py

   1 """Helper functions and classes for bup."""
   2
   3 from __future__ import absolute_import, division
   4 from collections import namedtuple
   5 from contextlib import contextmanager
   6 from ctypes import sizeof, c_void_p
   7 from os import environ
   8 from pipes import quote
   9 from subprocess import PIPE, Popen
  10 import sys, os, pwd, subprocess, errno, socket, select, mmap, stat, re, struct
  11 import hashlib, heapq, math, operator, time, grp, tempfile
  12
  13 from bup import _helpers
  14 from bup import compat
  15 # This function should really be in helpers, not in bup.options.  But we
  16 # want options.py to be standalone so people can include it in other projects.
  17 from bup.options import _tty_width as tty_width
  18
  19
  20 class Nonlocal:
  21     """Helper to deal with Python scoping issues"""
  22     pass
  23
  24
  25 sc_page_size = os.sysconf('SC_PAGE_SIZE')
  26 assert(sc_page_size > 0)
  27
  28 sc_arg_max = os.sysconf('SC_ARG_MAX')
  29 if sc_arg_max == -1:  # "no definite limit" - let's choose 2M
  30     sc_arg_max = 2 * 1024 * 1024
  31
  32 def last(iterable):
  33     result = None
  34     for result in iterable:
  35         pass
  36     return result
  37
  38
  39 def atoi(s):
  40     """Convert the string 's' to an integer. Return 0 if s is not a number."""
  41     try:
  42         return int(s or '0')
  43     except ValueError:
  44         return 0
  45
  46
  47 def atof(s):
  48     """Convert the string 's' to a float. Return 0 if s is not a number."""
  49     try:
  50         return float(s or '0')
  51     except ValueError:
  52         return 0
  53
  54
  55 buglvl = atoi(os.environ.get('BUP_DEBUG', 0))
  56
  57
  58 try:
  59     _fdatasync = os.fdatasync
  60 except AttributeError:
  61     _fdatasync = os.fsync
  62
  63 if sys.platform.startswith('darwin'):
  64     # Apparently os.fsync on OS X doesn't guarantee to sync all the way down
  65     import fcntl
  66     def fdatasync(fd):
  67         try:
  68             return fcntl.fcntl(fd, fcntl.F_FULLFSYNC)
  69         except IOError as e:
  70             # Fallback for file systems (SMB) that do not support F_FULLFSYNC
  71             if e.errno == errno.ENOTSUP:
  72                 return _fdatasync(fd)
  73             else:
  74                 raise
  75 else:
  76     fdatasync = _fdatasync
  77
  78
  79 def partition(predicate, stream):
  80     """Returns (leading_matches_it, rest_it), where leading_matches_it
  81     must be completely exhausted before traversing rest_it.
  82
  83     """
  84     stream = iter(stream)
  85     ns = Nonlocal()
  86     ns.first_nonmatch = None
  87     def leading_matches():
  88         for x in stream:
  89             if predicate(x):
  90                 yield x
  91             else:
  92                 ns.first_nonmatch = (x,)
  93                 break
  94     def rest():
  95         if ns.first_nonmatch:
  96             yield ns.first_nonmatch[0]
  97             for x in stream:
  98                 yield x
  99     return (leading_matches(), rest())
 100
 101
 102 def merge_dict(*xs):
 103     result = {}
 104     for x in xs:
 105         result.update(x)
 106     return result
 107
 108
 109 def lines_until_sentinel(f, sentinel, ex_type):
 110     # sentinel must end with \n and must contain only one \n
 111     while True:
 112         line = f.readline()
 113         if not (line and line.endswith('\n')):
 114             raise ex_type('Hit EOF while reading line')
 115         if line == sentinel:
 116             return
 117         yield line
 118
 119
 120 def stat_if_exists(path):
 121     try:
 122         return os.stat(path)
 123     except OSError as e:
 124         if e.errno != errno.ENOENT:
 125             raise
 126     return None
 127
 128
 129 # Write (blockingly) to sockets that may or may not be in blocking mode.
 130 # We need this because our stderr is sometimes eaten by subprocesses
 131 # (probably ssh) that sometimes make it nonblocking, if only temporarily,
 132 # leading to race conditions.  Ick.  We'll do it the hard way.
 133 def _hard_write(fd, buf):
 134     while buf:
 135         (r,w,x) = select.select([], [fd], [], None)
 136         if not w:
 137             raise IOError('select(fd) returned without being writable')
 138         try:
 139             sz = os.write(fd, buf)
 140         except OSError as e:
 141             if e.errno != errno.EAGAIN:
 142                 raise
 143         assert(sz >= 0)
 144         buf = buf[sz:]
 145
 146
 147 _last_prog = 0
 148 def log(s):
 149     """Print a log message to stderr."""
 150     global _last_prog
 151     sys.stdout.flush()
 152     _hard_write(sys.stderr.fileno(), s)
 153     _last_prog = 0
 154
 155
 156 def debug1(s):
 157     if buglvl >= 1:
 158         log(s)
 159
 160
 161 def debug2(s):
 162     if buglvl >= 2:
 163         log(s)
 164
 165
 166 istty1 = os.isatty(1) or (atoi(os.environ.get('BUP_FORCE_TTY')) & 1)
 167 istty2 = os.isatty(2) or (atoi(os.environ.get('BUP_FORCE_TTY')) & 2)
 168 _last_progress = ''
 169 def progress(s):
 170     """Calls log() if stderr is a TTY.  Does nothing otherwise."""
 171     global _last_progress
 172     if istty2:
 173         log(s)
 174         _last_progress = s
 175
 176
 177 def qprogress(s):
 178     """Calls progress() only if we haven't printed progress in a while.
 179
 180     This avoids overloading the stderr buffer with excess junk.
 181     """
 182     global _last_prog
 183     now = time.time()
 184     if now - _last_prog > 0.1:
 185         progress(s)
 186         _last_prog = now
 187
 188
 189 def reprogress():
 190     """Calls progress() to redisplay the most recent progress message.
 191
 192     Useful after you've printed some other message that wipes out the
 193     progress line.
 194     """
 195     if _last_progress and _last_progress.endswith('\r'):
 196         progress(_last_progress)
 197
 198
 199 def mkdirp(d, mode=None):
 200     """Recursively create directories on path 'd'.
 201
 202     Unlike os.makedirs(), it doesn't raise an exception if the last element of
 203     the path already exists.
 204     """
 205     try:
 206         if mode:
 207             os.makedirs(d, mode)
 208         else:
 209             os.makedirs(d)
 210     except OSError as e:
 211         if e.errno == errno.EEXIST:
 212             pass
 213         else:
 214             raise
 215
 216
 217 def merge_iter(iters, pfreq, pfunc, pfinal, key=None):
 218     if key:
 219         samekey = lambda e, pe: getattr(e, key) == getattr(pe, key, None)
 220     else:
 221         samekey = operator.eq
 222     count = 0
 223     total = sum(len(it) for it in iters)
 224     iters = (iter(it) for it in iters)
 225     heap = ((next(it, None),it) for it in iters)
 226     heap = [(e,it) for e,it in heap if e]
 227
 228     heapq.heapify(heap)
 229     pe = None
 230     while heap:
 231         if not count % pfreq:
 232             pfunc(count, total)
 233         e, it = heap[0]
 234         if not samekey(e, pe):
 235             pe = e
 236             yield e
 237         count += 1
 238         try:
 239             e = next(it)
 240         except StopIteration:
 241             heapq.heappop(heap) # remove current
 242         else:
 243             heapq.heapreplace(heap, (e, it)) # shift current to new location
 244     pfinal(count, total)
 245
 246
 247 def unlink(f):
 248     """Delete a file at path 'f' if it currently exists.
 249
 250     Unlike os.unlink(), does not throw an exception if the file didn't already
 251     exist.
 252     """
 253     try:
 254         os.unlink(f)
 255     except OSError as e:
 256         if e.errno != errno.ENOENT:
 257             raise
 258
 259
 260 def shstr(cmd):
 261     if isinstance(cmd, compat.str_type):
 262         return cmd
 263     else:
 264         return ' '.join(map(quote, cmd))
 265
 266 exc = subprocess.check_call
 267
 268 def exo(cmd,
 269         input=None,
 270         stdin=None,
 271         stderr=None,
 272         shell=False,
 273         check=True,
 274         preexec_fn=None):
 275     if input:
 276         assert stdin in (None, PIPE)
 277         stdin = PIPE
 278     p = Popen(cmd,
 279               stdin=stdin, stdout=PIPE, stderr=stderr,
 280               shell=shell,
 281               preexec_fn=preexec_fn)
 282     out, err = p.communicate(input)
 283     if check and p.returncode != 0:
 284         raise Exception('subprocess %r failed with status %d, stderr: %r'
 285                         % (' '.join(map(quote, cmd)), p.returncode, err))
 286     return out, err, p
 287
 288 def readpipe(argv, preexec_fn=None, shell=False):
 289     """Run a subprocess and return its output."""
 290     p = subprocess.Popen(argv, stdout=subprocess.PIPE, preexec_fn=preexec_fn,
 291                          shell=shell)
 292     out, err = p.communicate()
 293     if p.returncode != 0:
 294         raise Exception('subprocess %r failed with status %d'
 295                         % (' '.join(argv), p.returncode))
 296     return out
 297
 298
 299 def _argmax_base(command):
 300     base_size = 2048
 301     for c in command:
 302         base_size += len(command) + 1
 303     for k, v in compat.items(environ):
 304         base_size += len(k) + len(v) + 2 + sizeof(c_void_p)
 305     return base_size
 306
 307
 308 def _argmax_args_size(args):
 309     return sum(len(x) + 1 + sizeof(c_void_p) for x in args)
 310
 311
 312 def batchpipe(command, args, preexec_fn=None, arg_max=sc_arg_max):
 313     """If args is not empty, yield the output produced by calling the
 314 command list with args as a sequence of strings (It may be necessary
 315 to return multiple strings in order to respect ARG_MAX)."""
 316     # The optional arg_max arg is a workaround for an issue with the
 317     # current wvtest behavior.
 318     base_size = _argmax_base(command)
 319     while args:
 320         room = arg_max - base_size
 321         i = 0
 322         while i < len(args):
 323             next_size = _argmax_args_size(args[i:i+1])
 324             if room - next_size < 0:
 325                 break
 326             room -= next_size
 327             i += 1
 328         sub_args = args[:i]
 329         args = args[i:]
 330         assert(len(sub_args))
 331         yield readpipe(command + sub_args, preexec_fn=preexec_fn)
 332
 333
 334 def resolve_parent(p):
 335     """Return the absolute path of a file without following any final symlink.
 336
 337     Behaves like os.path.realpath, but doesn't follow a symlink for the last
 338     element. (ie. if 'p' itself is a symlink, this one won't follow it, but it
 339     will follow symlinks in p's directory)
 340     """
 341     try:
 342         st = os.lstat(p)
 343     except OSError:
 344         st = None
 345     if st and stat.S_ISLNK(st.st_mode):
 346         (dir, name) = os.path.split(p)
 347         dir = os.path.realpath(dir)
 348         out = os.path.join(dir, name)
 349     else:
 350         out = os.path.realpath(p)
 351     #log('realpathing:%r,%r\n' % (p, out))
 352     return out
 353
 354
 355 def detect_fakeroot():
 356     "Return True if we appear to be running under fakeroot."
 357     return os.getenv("FAKEROOTKEY") != None
 358
 359
 360 if sys.platform.startswith('cygwin'):
 361     def is_superuser():
 362         # https://cygwin.com/ml/cygwin/2015-02/msg00057.html
 363         groups = os.getgroups()
 364         return 544 in groups or 0 in groups
 365 else:
 366     def is_superuser():
 367         return os.geteuid() == 0
 368
 369
 370 def cache_key_value(get_value, key, cache):
 371     """Return (value, was_cached).  If there is a value in the cache
 372     for key, use that, otherwise, call get_value(key) which should
 373     throw a KeyError if there is no value -- in which case the cached
 374     and returned value will be None.
 375     """
 376     try: # Do we already have it (or know there wasn't one)?
 377         value = cache[key]
 378         return value, True
 379     except KeyError:
 380         pass
 381     value = None
 382     try:
 383         cache[key] = value = get_value(key)
 384     except KeyError:
 385         cache[key] = None
 386     return value, False
 387
 388
 389 _hostname = None
 390 def hostname():
 391     """Get the FQDN of this machine."""
 392     global _hostname
 393     if not _hostname:
 394         _hostname = socket.getfqdn()
 395     return _hostname
 396
 397
 398 _resource_path = None
 399 def resource_path(subdir=''):
 400     global _resource_path
 401     if not _resource_path:
 402         _resource_path = os.environ.get('BUP_RESOURCE_PATH') or '.'
 403     return os.path.join(_resource_path, subdir)
 404
 405 def format_filesize(size):
 406     unit = 1024.0
 407     size = float(size)
 408     if size < unit:
 409         return "%d" % (size)
 410     exponent = int(math.log(size) // math.log(unit))
 411     size_prefix = "KMGTPE"[exponent - 1]
 412     return "%.1f%s" % (size // math.pow(unit, exponent), size_prefix)
 413
 414
 415 class NotOk(Exception):
 416     pass
 417
 418
 419 class BaseConn:
 420     def __init__(self, outp):
 421         self.outp = outp
 422
 423     def close(self):
 424         while self._read(65536): pass
 425
 426     def read(self, size):
 427         """Read 'size' bytes from input stream."""
 428         self.outp.flush()
 429         return self._read(size)
 430
 431     def readline(self):
 432         """Read from input stream until a newline is found."""
 433         self.outp.flush()
 434         return self._readline()
 435
 436     def write(self, data):
 437         """Write 'data' to output stream."""
 438         #log('%d writing: %d bytes\n' % (os.getpid(), len(data)))
 439         self.outp.write(data)
 440
 441     def has_input(self):
 442         """Return true if input stream is readable."""
 443         raise NotImplemented("Subclasses must implement has_input")
 444
 445     def ok(self):
 446         """Indicate end of output from last sent command."""
 447         self.write('\nok\n')
 448
 449     def error(self, s):
 450         """Indicate server error to the client."""
 451         s = re.sub(r'\s+', ' ', str(s))
 452         self.write('\nerror %s\n' % s)
 453
 454     def _check_ok(self, onempty):
 455         self.outp.flush()
 456         rl = ''
 457         for rl in linereader(self):
 458             #log('%d got line: %r\n' % (os.getpid(), rl))
 459             if not rl:  # empty line
 460                 continue
 461             elif rl == 'ok':
 462                 return None
 463             elif rl.startswith('error '):
 464                 #log('client: error: %s\n' % rl[6:])
 465                 return NotOk(rl[6:])
 466             else:
 467                 onempty(rl)
 468         raise Exception('server exited unexpectedly; see errors above')
 469
 470     def drain_and_check_ok(self):
 471         """Remove all data for the current command from input stream."""
 472         def onempty(rl):
 473             pass
 474         return self._check_ok(onempty)
 475
 476     def check_ok(self):
 477         """Verify that server action completed successfully."""
 478         def onempty(rl):
 479             raise Exception('expected "ok", got %r' % rl)
 480         return self._check_ok(onempty)
 481
 482
 483 class Conn(BaseConn):
 484     def __init__(self, inp, outp):
 485         BaseConn.__init__(self, outp)
 486         self.inp = inp
 487
 488     def _read(self, size):
 489         return self.inp.read(size)
 490
 491     def _readline(self):
 492         return self.inp.readline()
 493
 494     def has_input(self):
 495         [rl, wl, xl] = select.select([self.inp.fileno()], [], [], 0)
 496         if rl:
 497             assert(rl[0] == self.inp.fileno())
 498             return True
 499         else:
 500             return None
 501
 502
 503 def checked_reader(fd, n):
 504     while n > 0:
 505         rl, _, _ = select.select([fd], [], [])
 506         assert(rl[0] == fd)
 507         buf = os.read(fd, n)
 508         if not buf: raise Exception("Unexpected EOF reading %d more bytes" % n)
 509         yield buf
 510         n -= len(buf)
 511
 512
 513 MAX_PACKET = 128 * 1024
 514 def mux(p, outfd, outr, errr):
 515     try:
 516         fds = [outr, errr]
 517         while p.poll() is None:
 518             rl, _, _ = select.select(fds, [], [])
 519             for fd in rl:
 520                 if fd == outr:
 521                     buf = os.read(outr, MAX_PACKET)
 522                     if not buf: break
 523                     os.write(outfd, struct.pack('!IB', len(buf), 1) + buf)
 524                 elif fd == errr:
 525                     buf = os.read(errr, 1024)
 526                     if not buf: break
 527                     os.write(outfd, struct.pack('!IB', len(buf), 2) + buf)
 528     finally:
 529         os.write(outfd, struct.pack('!IB', 0, 3))
 530
 531
 532 class DemuxConn(BaseConn):
 533     """A helper class for bup's client-server protocol."""
 534     def __init__(self, infd, outp):
 535         BaseConn.__init__(self, outp)
 536         # Anything that comes through before the sync string was not
 537         # multiplexed and can be assumed to be debug/log before mux init.
 538         tail = ''
 539         while tail != 'BUPMUX':
 540             b = os.read(infd, (len(tail) < 6) and (6-len(tail)) or 1)
 541             if not b:
 542                 raise IOError('demux: unexpected EOF during initialization')
 543             tail += b
 544             sys.stderr.write(tail[:-6])  # pre-mux log messages
 545             tail = tail[-6:]
 546         self.infd = infd
 547         self.reader = None
 548         self.buf = None
 549         self.closed = False
 550
 551     def write(self, data):
 552         self._load_buf(0)
 553         BaseConn.write(self, data)
 554
 555     def _next_packet(self, timeout):
 556         if self.closed: return False
 557         rl, wl, xl = select.select([self.infd], [], [], timeout)
 558         if not rl: return False
 559         assert(rl[0] == self.infd)
 560         ns = ''.join(checked_reader(self.infd, 5))
 561         n, fdw = struct.unpack('!IB', ns)
 562         assert(n <= MAX_PACKET)
 563         if fdw == 1:
 564             self.reader = checked_reader(self.infd, n)
 565         elif fdw == 2:
 566             for buf in checked_reader(self.infd, n):
 567                 sys.stderr.write(buf)
 568         elif fdw == 3:
 569             self.closed = True
 570             debug2("DemuxConn: marked closed\n")
 571         return True
 572
 573     def _load_buf(self, timeout):
 574         if self.buf is not None:
 575             return True
 576         while not self.closed:
 577             while not self.reader:
 578                 if not self._next_packet(timeout):
 579                     return False
 580             try:
 581                 self.buf = next(self.reader)
 582                 return True
 583             except StopIteration:
 584                 self.reader = None
 585         return False
 586
 587     def _read_parts(self, ix_fn):
 588         while self._load_buf(None):
 589             assert(self.buf is not None)
 590             i = ix_fn(self.buf)
 591             if i is None or i == len(self.buf):
 592                 yv = self.buf
 593                 self.buf = None
 594             else:
 595                 yv = self.buf[:i]
 596                 self.buf = self.buf[i:]
 597             yield yv
 598             if i is not None:
 599                 break
 600
 601     def _readline(self):
 602         def find_eol(buf):
 603             try:
 604                 return buf.index('\n')+1
 605             except ValueError:
 606                 return None
 607         return ''.join(self._read_parts(find_eol))
 608
 609     def _read(self, size):
 610         csize = [size]
 611         def until_size(buf): # Closes on csize
 612             if len(buf) < csize[0]:
 613                 csize[0] -= len(buf)
 614                 return None
 615             else:
 616                 return csize[0]
 617         return ''.join(self._read_parts(until_size))
 618
 619     def has_input(self):
 620         return self._load_buf(0)
 621
 622
 623 def linereader(f):
 624     """Generate a list of input lines from 'f' without terminating newlines."""
 625     while 1:
 626         line = f.readline()
 627         if not line:
 628             break
 629         yield line[:-1]
 630
 631
 632 def chunkyreader(f, count = None):
 633     """Generate a list of chunks of data read from 'f'.
 634
 635     If count is None, read until EOF is reached.
 636
 637     If count is a positive integer, read 'count' bytes from 'f'. If EOF is
 638     reached while reading, raise IOError.
 639     """
 640     if count != None:
 641         while count > 0:
 642             b = f.read(min(count, 65536))
 643             if not b:
 644                 raise IOError('EOF with %d bytes remaining' % count)
 645             yield b
 646             count -= len(b)
 647     else:
 648         while 1:
 649             b = f.read(65536)
 650             if not b: break
 651             yield b
 652
 653
 654 @contextmanager
 655 def atomically_replaced_file(name, mode='w', buffering=-1):
 656     """Yield a file that will be atomically renamed name when leaving the block.
 657
 658     This contextmanager yields an open file object that is backed by a
 659     temporary file which will be renamed (atomically) to the target
 660     name if everything succeeds.
 661
 662     The mode and buffering arguments are handled exactly as with open,
 663     and the yielded file will have very restrictive permissions, as
 664     per mkstemp.
 665
 666     E.g.::
 667
 668         with atomically_replaced_file('foo.txt', 'w') as f:
 669             f.write('hello jack.')
 670
 671     """
 672
 673     (ffd, tempname) = tempfile.mkstemp(dir=os.path.dirname(name),
 674                                        text=('b' not in mode))
 675     try:
 676         try:
 677             f = os.fdopen(ffd, mode, buffering)
 678         except:
 679             os.close(ffd)
 680             raise
 681         try:
 682             yield f
 683         finally:
 684             f.close()
 685         os.rename(tempname, name)
 686     finally:
 687         unlink(tempname)  # nonexistant file is ignored
 688
 689
 690 def slashappend(s):
 691     """Append "/" to 's' if it doesn't aleady end in "/"."""
 692     if s and not s.endswith('/'):
 693         return s + '/'
 694     else:
 695         return s
 696
 697
 698 def _mmap_do(f, sz, flags, prot, close):
 699     if not sz:
 700         st = os.fstat(f.fileno())
 701         sz = st.st_size
 702     if not sz:
 703         # trying to open a zero-length map gives an error, but an empty
 704         # string has all the same behaviour of a zero-length map, ie. it has
 705         # no elements :)
 706         return ''
 707     map = mmap.mmap(f.fileno(), sz, flags, prot)
 708     if close:
 709         f.close()  # map will persist beyond file close
 710     return map
 711
 712
 713 def mmap_read(f, sz = 0, close=True):
 714     """Create a read-only memory mapped region on file 'f'.
 715     If sz is 0, the region will cover the entire file.
 716     """
 717     return _mmap_do(f, sz, mmap.MAP_PRIVATE, mmap.PROT_READ, close)
 718
 719
 720 def mmap_readwrite(f, sz = 0, close=True):
 721     """Create a read-write memory mapped region on file 'f'.
 722     If sz is 0, the region will cover the entire file.
 723     """
 724     return _mmap_do(f, sz, mmap.MAP_SHARED, mmap.PROT_READ|mmap.PROT_WRITE,
 725                     close)
 726
 727
 728 def mmap_readwrite_private(f, sz = 0, close=True):
 729     """Create a read-write memory mapped region on file 'f'.
 730     If sz is 0, the region will cover the entire file.
 731     The map is private, which means the changes are never flushed back to the
 732     file.
 733     """
 734     return _mmap_do(f, sz, mmap.MAP_PRIVATE, mmap.PROT_READ|mmap.PROT_WRITE,
 735                     close)
 736
 737
 738 _mincore = getattr(_helpers, 'mincore', None)
 739 if _mincore:
 740     # ./configure ensures that we're on Linux if MINCORE_INCORE isn't defined.
 741     MINCORE_INCORE = getattr(_helpers, 'MINCORE_INCORE', 1)
 742
 743     _fmincore_chunk_size = None
 744     def _set_fmincore_chunk_size():
 745         global _fmincore_chunk_size
 746         pref_chunk_size = 64 * 1024 * 1024
 747         chunk_size = sc_page_size
 748         if (sc_page_size < pref_chunk_size):
 749             chunk_size = sc_page_size * (pref_chunk_size // sc_page_size)
 750         _fmincore_chunk_size = chunk_size
 751
 752     def fmincore(fd):
 753         """Return the mincore() data for fd as a bytearray whose values can be
 754         tested via MINCORE_INCORE, or None if fd does not fully
 755         support the operation."""
 756         st = os.fstat(fd)
 757         if (st.st_size == 0):
 758             return bytearray(0)
 759         if not _fmincore_chunk_size:
 760             _set_fmincore_chunk_size()
 761         pages_per_chunk = _fmincore_chunk_size // sc_page_size;
 762         page_count = (st.st_size + sc_page_size - 1) // sc_page_size;
 763         chunk_count = page_count // _fmincore_chunk_size
 764         if chunk_count < 1:
 765             chunk_count = 1
 766         result = bytearray(page_count)
 767         for ci in compat.range(chunk_count):
 768             pos = _fmincore_chunk_size * ci;
 769             msize = min(_fmincore_chunk_size, st.st_size - pos)
 770             try:
 771                 m = mmap.mmap(fd, msize, mmap.MAP_PRIVATE, 0, 0, pos)
 772             except mmap.error as ex:
 773                 if ex.errno == errno.EINVAL or ex.errno == errno.ENODEV:
 774                     # Perhaps the file was a pipe, i.e. "... | bup split ..."
 775                     return None
 776                 raise ex
 777             try:
 778                 _mincore(m, msize, 0, result, ci * pages_per_chunk)
 779             except OSError as ex:
 780                 if ex.errno == errno.ENOSYS:
 781                     return None
 782                 raise
 783         return result
 784
 785
 786 def parse_timestamp(epoch_str):
 787     """Return the number of nanoseconds since the epoch that are described
 788 by epoch_str (100ms, 100ns, ...); when epoch_str cannot be parsed,
 789 throw a ValueError that may contain additional information."""
 790     ns_per = {'s' :  1000000000,
 791               'ms' : 1000000,
 792               'us' : 1000,
 793               'ns' : 1}
 794     match = re.match(r'^((?:[-+]?[0-9]+)?)(s|ms|us|ns)$', epoch_str)
 795     if not match:
 796         if re.match(r'^([-+]?[0-9]+)$', epoch_str):
 797             raise ValueError('must include units, i.e. 100ns, 100ms, ...')
 798         raise ValueError()
 799     (n, units) = match.group(1, 2)
 800     if not n:
 801         n = 1
 802     n = int(n)
 803     return n * ns_per[units]
 804
 805
 806 def parse_num(s):
 807     """Parse data size information into a float number.
 808
 809     Here are some examples of conversions:
 810         199.2k means 203981 bytes
 811         1GB means 1073741824 bytes
 812         2.1 tb means 2199023255552 bytes
 813     """
 814     g = re.match(r'([-+\d.e]+)\s*(\w*)', str(s))
 815     if not g:
 816         raise ValueError("can't parse %r as a number" % s)
 817     (val, unit) = g.groups()
 818     num = float(val)
 819     unit = unit.lower()
 820     if unit in ['t', 'tb']:
 821         mult = 1024*1024*1024*1024
 822     elif unit in ['g', 'gb']:
 823         mult = 1024*1024*1024
 824     elif unit in ['m', 'mb']:
 825         mult = 1024*1024
 826     elif unit in ['k', 'kb']:
 827         mult = 1024
 828     elif unit in ['', 'b']:
 829         mult = 1
 830     else:
 831         raise ValueError("invalid unit %r in number %r" % (unit, s))
 832     return int(num*mult)
 833
 834
 835 def count(l):
 836     """Count the number of elements in an iterator. (consumes the iterator)"""
 837     return reduce(lambda x,y: x+1, l)
 838
 839
 840 saved_errors = []
 841 def add_error(e):
 842     """Append an error message to the list of saved errors.
 843
 844     Once processing is able to stop and output the errors, the saved errors are
 845     accessible in the module variable helpers.saved_errors.
 846     """
 847     saved_errors.append(e)
 848     log('%-70s\n' % e)
 849
 850
 851 def clear_errors():
 852     global saved_errors
 853     saved_errors = []
 854
 855
 856 def die_if_errors(msg=None, status=1):
 857     global saved_errors
 858     if saved_errors:
 859         if not msg:
 860             msg = 'warning: %d errors encountered\n' % len(saved_errors)
 861         log(msg)
 862         sys.exit(status)
 863
 864
 865 def handle_ctrl_c():
 866     """Replace the default exception handler for KeyboardInterrupt (Ctrl-C).
 867
 868     The new exception handler will make sure that bup will exit without an ugly
 869     stacktrace when Ctrl-C is hit.
 870     """
 871     oldhook = sys.excepthook
 872     def newhook(exctype, value, traceback):
 873         if exctype == KeyboardInterrupt:
 874             log('\nInterrupted.\n')
 875         else:
 876             return oldhook(exctype, value, traceback)
 877     sys.excepthook = newhook
 878
 879
 880 def columnate(l, prefix):
 881     """Format elements of 'l' in columns with 'prefix' leading each line.
 882
 883     The number of columns is determined automatically based on the string
 884     lengths.
 885     """
 886     if not l:
 887         return ""
 888     l = l[:]
 889     clen = max(len(s) for s in l)
 890     ncols = (tty_width() - len(prefix)) // (clen + 2)
 891     if ncols <= 1:
 892         ncols = 1
 893         clen = 0
 894     cols = []
 895     while len(l) % ncols:
 896         l.append('')
 897     rows = len(l) // ncols
 898     for s in compat.range(0, len(l), rows):
 899         cols.append(l[s:s+rows])
 900     out = ''
 901     for row in zip(*cols):
 902         out += prefix + ''.join(('%-*s' % (clen+2, s)) for s in row) + '\n'
 903     return out
 904
 905
 906 def parse_date_or_fatal(str, fatal):
 907     """Parses the given date or calls Option.fatal().
 908     For now we expect a string that contains a float."""
 909     try:
 910         date = float(str)
 911     except ValueError as e:
 912         raise fatal('invalid date format (should be a float): %r' % e)
 913     else:
 914         return date
 915
 916
 917 def parse_excludes(options, fatal):
 918     """Traverse the options and extract all excludes, or call Option.fatal()."""
 919     excluded_paths = []
 920
 921     for flag in options:
 922         (option, parameter) = flag
 923         if option == '--exclude':
 924             excluded_paths.append(resolve_parent(parameter))
 925         elif option == '--exclude-from':
 926             try:
 927                 f = open(resolve_parent(parameter))
 928             except IOError as e:
 929                 raise fatal("couldn't read %s" % parameter)
 930             for exclude_path in f.readlines():
 931                 # FIXME: perhaps this should be rstrip('\n')
 932                 exclude_path = resolve_parent(exclude_path.strip())
 933                 if exclude_path:
 934                     excluded_paths.append(exclude_path)
 935     return sorted(frozenset(excluded_paths))
 936
 937
 938 def parse_rx_excludes(options, fatal):
 939     """Traverse the options and extract all rx excludes, or call
 940     Option.fatal()."""
 941     excluded_patterns = []
 942
 943     for flag in options:
 944         (option, parameter) = flag
 945         if option == '--exclude-rx':
 946             try:
 947                 excluded_patterns.append(re.compile(parameter))
 948             except re.error as ex:
 949                 fatal('invalid --exclude-rx pattern (%s): %s' % (parameter, ex))
 950         elif option == '--exclude-rx-from':
 951             try:
 952                 f = open(resolve_parent(parameter))
 953             except IOError as e:
 954                 raise fatal("couldn't read %s" % parameter)
 955             for pattern in f.readlines():
 956                 spattern = pattern.rstrip('\n')
 957                 if not spattern:
 958                     continue
 959                 try:
 960                     excluded_patterns.append(re.compile(spattern))
 961                 except re.error as ex:
 962                     fatal('invalid --exclude-rx pattern (%s): %s' % (spattern, ex))
 963     return excluded_patterns
 964
 965
 966 def should_rx_exclude_path(path, exclude_rxs):
 967     """Return True if path matches a regular expression in exclude_rxs."""
 968     for rx in exclude_rxs:
 969         if rx.search(path):
 970             debug1('Skipping %r: excluded by rx pattern %r.\n'
 971                    % (path, rx.pattern))
 972             return True
 973     return False
 974
 975
 976 # FIXME: Carefully consider the use of functions (os.path.*, etc.)
 977 # that resolve against the current filesystem in the strip/graft
 978 # functions for example, but elsewhere as well.  I suspect bup's not
 979 # always being careful about that.  For some cases, the contents of
 980 # the current filesystem should be irrelevant, and consulting it might
 981 # produce the wrong result, perhaps via unintended symlink resolution,
 982 # for example.
 983
 984 def path_components(path):
 985     """Break path into a list of pairs of the form (name,
 986     full_path_to_name).  Path must start with '/'.
 987     Example:
 988       '/home/foo' -> [('', '/'), ('home', '/home'), ('foo', '/home/foo')]"""
 989     if not path.startswith('/'):
 990         raise Exception('path must start with "/": %s' % path)
 991     # Since we assume path startswith('/'), we can skip the first element.
 992     result = [('', '/')]
 993     norm_path = os.path.abspath(path)
 994     if norm_path == '/':
 995         return result
 996     full_path = ''
 997     for p in norm_path.split('/')[1:]:
 998         full_path += '/' + p
 999         result.append((p, full_path))
1000     return result
1001
1002
1003 def stripped_path_components(path, strip_prefixes):
1004     """Strip any prefix in strip_prefixes from path and return a list
1005     of path components where each component is (name,
1006     none_or_full_fs_path_to_name).  Assume path startswith('/').
1007     See thelpers.py for examples."""
1008     normalized_path = os.path.abspath(path)
1009     sorted_strip_prefixes = sorted(strip_prefixes, key=len, reverse=True)
1010     for bp in sorted_strip_prefixes:
1011         normalized_bp = os.path.abspath(bp)
1012         if normalized_bp == '/':
1013             continue
1014         if normalized_path.startswith(normalized_bp):
1015             prefix = normalized_path[:len(normalized_bp)]
1016             result = []
1017             for p in normalized_path[len(normalized_bp):].split('/'):
1018                 if p: # not root
1019                     prefix += '/'
1020                 prefix += p
1021                 result.append((p, prefix))
1022             return result
1023     # Nothing to strip.
1024     return path_components(path)
1025
1026
1027 def grafted_path_components(graft_points, path):
1028     # Create a result that consists of some number of faked graft
1029     # directories before the graft point, followed by all of the real
1030     # directories from path that are after the graft point.  Arrange
1031     # for the directory at the graft point in the result to correspond
1032     # to the "orig" directory in --graft orig=new.  See t/thelpers.py
1033     # for some examples.
1034
1035     # Note that given --graft orig=new, orig and new have *nothing* to
1036     # do with each other, even if some of their component names
1037     # match. i.e. --graft /foo/bar/baz=/foo/bar/bax is semantically
1038     # equivalent to --graft /foo/bar/baz=/x/y/z, or even
1039     # /foo/bar/baz=/x.
1040
1041     # FIXME: This can't be the best solution...
1042     clean_path = os.path.abspath(path)
1043     for graft_point in graft_points:
1044         old_prefix, new_prefix = graft_point
1045         # Expand prefixes iff not absolute paths.
1046         old_prefix = os.path.normpath(old_prefix)
1047         new_prefix = os.path.normpath(new_prefix)
1048         if clean_path.startswith(old_prefix):
1049             escaped_prefix = re.escape(old_prefix)
1050             grafted_path = re.sub(r'^' + escaped_prefix, new_prefix, clean_path)
1051             # Handle /foo=/ (at least) -- which produces //whatever.
1052             grafted_path = '/' + grafted_path.lstrip('/')
1053             clean_path_components = path_components(clean_path)
1054             # Count the components that were stripped.
1055             strip_count = 0 if old_prefix == '/' else old_prefix.count('/')
1056             new_prefix_parts = new_prefix.split('/')
1057             result_prefix = grafted_path.split('/')[:new_prefix.count('/')]
1058             result = [(p, None) for p in result_prefix] \
1059                 + clean_path_components[strip_count:]
1060             # Now set the graft point name to match the end of new_prefix.
1061             graft_point = len(result_prefix)
1062             result[graft_point] = \
1063                 (new_prefix_parts[-1], clean_path_components[strip_count][1])
1064             if new_prefix == '/': # --graft ...=/ is a special case.
1065                 return result[1:]
1066             return result
1067     return path_components(clean_path)
1068
1069
1070 Sha1 = hashlib.sha1
1071
1072
1073 _localtime = getattr(_helpers, 'localtime', None)
1074
1075 if _localtime:
1076     bup_time = namedtuple('bup_time', ['tm_year', 'tm_mon', 'tm_mday',
1077                                        'tm_hour', 'tm_min', 'tm_sec',
1078                                        'tm_wday', 'tm_yday',
1079                                        'tm_isdst', 'tm_gmtoff', 'tm_zone'])
1080
1081 # Define a localtime() that returns bup_time when possible.  Note:
1082 # this means that any helpers.localtime() results may need to be
1083 # passed through to_py_time() before being passed to python's time
1084 # module, which doesn't appear willing to ignore the extra items.
1085 if _localtime:
1086     def localtime(time):
1087         return bup_time(*_helpers.localtime(time))
1088     def utc_offset_str(t):
1089         """Return the local offset from UTC as "+hhmm" or "-hhmm" for time t.
1090         If the current UTC offset does not represent an integer number
1091         of minutes, the fractional component will be truncated."""
1092         off = localtime(t).tm_gmtoff
1093         # Note: // doesn't truncate like C for negative values, it rounds down.
1094         offmin = abs(off) // 60
1095         m = offmin % 60
1096         h = (offmin - m) // 60
1097         return "%+03d%02d" % (-h if off < 0 else h, m)
1098     def to_py_time(x):
1099         if isinstance(x, time.struct_time):
1100             return x
1101         return time.struct_time(x[:9])
1102 else:
1103     localtime = time.localtime
1104     def utc_offset_str(t):
1105         return time.strftime('%z', localtime(t))
1106     def to_py_time(x):
1107         return x
1108
1109
1110 _some_invalid_save_parts_rx = re.compile(r'[\[ ~^:?*\\]|\.\.|//|@{')
1111
1112 def valid_save_name(name):
1113     # Enforce a superset of the restrictions in git-check-ref-format(1)
1114     if name == '@' \
1115        or name.startswith('/') or name.endswith('/') \
1116        or name.endswith('.'):
1117         return False
1118     if _some_invalid_save_parts_rx.search(name):
1119         return False
1120     for c in name:
1121         if ord(c) < 0x20 or ord(c) == 0x7f:
1122             return False
1123     for part in name.split('/'):
1124         if part.startswith('.') or part.endswith('.lock'):
1125             return False
1126     return True
1127
1128
1129 _period_rx = re.compile(r'^([0-9]+)(s|min|h|d|w|m|y)$')
1130
1131 def period_as_secs(s):
1132     if s == 'forever':
1133         return float('inf')
1134     match = _period_rx.match(s)
1135     if not match:
1136         return None
1137     mag = int(match.group(1))
1138     scale = match.group(2)
1139     return mag * {'s': 1,
1140                   'min': 60,
1141                   'h': 60 * 60,
1142                   'd': 60 * 60 * 24,
1143                   'w': 60 * 60 * 24 * 7,
1144                   'm': 60 * 60 * 24 * 31,
1145                   'y': 60 * 60 * 24 * 366}[scale]