lib/bup/helpers.py

   1 """Helper functions and classes for bup."""
   2
   3 from __future__ import absolute_import, division
   4 from collections import namedtuple
   5 from contextlib import contextmanager
   6 from ctypes import sizeof, c_void_p
   7 from os import environ
   8 from pipes import quote
   9 from subprocess import PIPE, Popen
  10 import sys, os, pwd, subprocess, errno, socket, select, mmap, stat, re, struct
  11 import hashlib, heapq, math, operator, time, grp, tempfile
  12
  13 from bup import _helpers
  14 from bup import compat
  15 # This function should really be in helpers, not in bup.options.  But we
  16 # want options.py to be standalone so people can include it in other projects.
  17 from bup.options import _tty_width as tty_width
  18
  19
  20 class Nonlocal:
  21     """Helper to deal with Python scoping issues"""
  22     pass
  23
  24
  25 sc_page_size = os.sysconf('SC_PAGE_SIZE')
  26 assert(sc_page_size > 0)
  27
  28 sc_arg_max = os.sysconf('SC_ARG_MAX')
  29 if sc_arg_max == -1:  # "no definite limit" - let's choose 2M
  30     sc_arg_max = 2 * 1024 * 1024
  31
  32 def last(iterable):
  33     result = None
  34     for result in iterable:
  35         pass
  36     return result
  37
  38
  39 def atoi(s):
  40     """Convert the string 's' to an integer. Return 0 if s is not a number."""
  41     try:
  42         return int(s or '0')
  43     except ValueError:
  44         return 0
  45
  46
  47 def atof(s):
  48     """Convert the string 's' to a float. Return 0 if s is not a number."""
  49     try:
  50         return float(s or '0')
  51     except ValueError:
  52         return 0
  53
  54
  55 buglvl = atoi(os.environ.get('BUP_DEBUG', 0))
  56
  57
  58 try:
  59     _fdatasync = os.fdatasync
  60 except AttributeError:
  61     _fdatasync = os.fsync
  62
  63 if sys.platform.startswith('darwin'):
  64     # Apparently os.fsync on OS X doesn't guarantee to sync all the way down
  65     import fcntl
  66     def fdatasync(fd):
  67         try:
  68             return fcntl.fcntl(fd, fcntl.F_FULLFSYNC)
  69         except IOError as e:
  70             # Fallback for file systems (SMB) that do not support F_FULLFSYNC
  71             if e.errno == errno.ENOTSUP:
  72                 return _fdatasync(fd)
  73             else:
  74                 raise
  75 else:
  76     fdatasync = _fdatasync
  77
  78
  79 def partition(predicate, stream):
  80     """Returns (leading_matches_it, rest_it), where leading_matches_it
  81     must be completely exhausted before traversing rest_it.
  82
  83     """
  84     stream = iter(stream)
  85     ns = Nonlocal()
  86     ns.first_nonmatch = None
  87     def leading_matches():
  88         for x in stream:
  89             if predicate(x):
  90                 yield x
  91             else:
  92                 ns.first_nonmatch = (x,)
  93                 break
  94     def rest():
  95         if ns.first_nonmatch:
  96             yield ns.first_nonmatch[0]
  97             for x in stream:
  98                 yield x
  99     return (leading_matches(), rest())
 100
 101
 102 def merge_dict(*xs):
 103     result = {}
 104     for x in xs:
 105         result.update(x)
 106     return result
 107
 108
 109 def lines_until_sentinel(f, sentinel, ex_type):
 110     # sentinel must end with \n and must contain only one \n
 111     while True:
 112         line = f.readline()
 113         if not (line and line.endswith('\n')):
 114             raise ex_type('Hit EOF while reading line')
 115         if line == sentinel:
 116             return
 117         yield line
 118
 119
 120 def stat_if_exists(path):
 121     try:
 122         return os.stat(path)
 123     except OSError as e:
 124         if e.errno != errno.ENOENT:
 125             raise
 126     return None
 127
 128
 129 # Write (blockingly) to sockets that may or may not be in blocking mode.
 130 # We need this because our stderr is sometimes eaten by subprocesses
 131 # (probably ssh) that sometimes make it nonblocking, if only temporarily,
 132 # leading to race conditions.  Ick.  We'll do it the hard way.
 133 def _hard_write(fd, buf):
 134     while buf:
 135         (r,w,x) = select.select([], [fd], [], None)
 136         if not w:
 137             raise IOError('select(fd) returned without being writable')
 138         try:
 139             sz = os.write(fd, buf)
 140         except OSError as e:
 141             if e.errno != errno.EAGAIN:
 142                 raise
 143         assert(sz >= 0)
 144         buf = buf[sz:]
 145
 146
 147 _last_prog = 0
 148 def log(s):
 149     """Print a log message to stderr."""
 150     global _last_prog
 151     sys.stdout.flush()
 152     _hard_write(sys.stderr.fileno(), s if isinstance(s, bytes) else s.encode())
 153     _last_prog = 0
 154
 155
 156 def debug1(s):
 157     if buglvl >= 1:
 158         log(s)
 159
 160
 161 def debug2(s):
 162     if buglvl >= 2:
 163         log(s)
 164
 165
 166 istty1 = os.isatty(1) or (atoi(os.environ.get('BUP_FORCE_TTY')) & 1)
 167 istty2 = os.isatty(2) or (atoi(os.environ.get('BUP_FORCE_TTY')) & 2)
 168 _last_progress = ''
 169 def progress(s):
 170     """Calls log() if stderr is a TTY.  Does nothing otherwise."""
 171     global _last_progress
 172     if istty2:
 173         log(s)
 174         _last_progress = s
 175
 176
 177 def qprogress(s):
 178     """Calls progress() only if we haven't printed progress in a while.
 179
 180     This avoids overloading the stderr buffer with excess junk.
 181     """
 182     global _last_prog
 183     now = time.time()
 184     if now - _last_prog > 0.1:
 185         progress(s)
 186         _last_prog = now
 187
 188
 189 def reprogress():
 190     """Calls progress() to redisplay the most recent progress message.
 191
 192     Useful after you've printed some other message that wipes out the
 193     progress line.
 194     """
 195     if _last_progress and _last_progress.endswith('\r'):
 196         progress(_last_progress)
 197
 198
 199 def mkdirp(d, mode=None):
 200     """Recursively create directories on path 'd'.
 201
 202     Unlike os.makedirs(), it doesn't raise an exception if the last element of
 203     the path already exists.
 204     """
 205     try:
 206         if mode:
 207             os.makedirs(d, mode)
 208         else:
 209             os.makedirs(d)
 210     except OSError as e:
 211         if e.errno == errno.EEXIST:
 212             pass
 213         else:
 214             raise
 215
 216
 217 class MergeIterItem:
 218     def __init__(self, entry, read_it):
 219         self.entry = entry
 220         self.read_it = read_it
 221     def __lt__(self, x):
 222         return self.entry < x.entry
 223
 224 def merge_iter(iters, pfreq, pfunc, pfinal, key=None):
 225     if key:
 226         samekey = lambda e, pe: getattr(e, key) == getattr(pe, key, None)
 227     else:
 228         samekey = operator.eq
 229     count = 0
 230     total = sum(len(it) for it in iters)
 231     iters = (iter(it) for it in iters)
 232     heap = ((next(it, None),it) for it in iters)
 233     heap = [MergeIterItem(e, it) for e, it in heap if e]
 234
 235     heapq.heapify(heap)
 236     pe = None
 237     while heap:
 238         if not count % pfreq:
 239             pfunc(count, total)
 240         e, it = heap[0].entry, heap[0].read_it
 241         if not samekey(e, pe):
 242             pe = e
 243             yield e
 244         count += 1
 245         try:
 246             e = next(it)
 247         except StopIteration:
 248             heapq.heappop(heap) # remove current
 249         else:
 250             # shift current to new location
 251             heapq.heapreplace(heap, MergeIterItem(e, it))
 252     pfinal(count, total)
 253
 254
 255 def unlink(f):
 256     """Delete a file at path 'f' if it currently exists.
 257
 258     Unlike os.unlink(), does not throw an exception if the file didn't already
 259     exist.
 260     """
 261     try:
 262         os.unlink(f)
 263     except OSError as e:
 264         if e.errno != errno.ENOENT:
 265             raise
 266
 267
 268 def shstr(cmd):
 269     if isinstance(cmd, compat.str_type):
 270         return cmd
 271     else:
 272         return ' '.join(map(quote, cmd))
 273
 274 exc = subprocess.check_call
 275
 276 def exo(cmd,
 277         input=None,
 278         stdin=None,
 279         stderr=None,
 280         shell=False,
 281         check=True,
 282         preexec_fn=None):
 283     if input:
 284         assert stdin in (None, PIPE)
 285         stdin = PIPE
 286     p = Popen(cmd,
 287               stdin=stdin, stdout=PIPE, stderr=stderr,
 288               shell=shell,
 289               preexec_fn=preexec_fn)
 290     out, err = p.communicate(input)
 291     if check and p.returncode != 0:
 292         raise Exception('subprocess %r failed with status %d, stderr: %r'
 293                         % (' '.join(map(quote, cmd)), p.returncode, err))
 294     return out, err, p
 295
 296 def readpipe(argv, preexec_fn=None, shell=False):
 297     """Run a subprocess and return its output."""
 298     p = subprocess.Popen(argv, stdout=subprocess.PIPE, preexec_fn=preexec_fn,
 299                          shell=shell)
 300     out, err = p.communicate()
 301     if p.returncode != 0:
 302         raise Exception('subprocess %r failed with status %d'
 303                         % (' '.join(argv), p.returncode))
 304     return out
 305
 306
 307 def _argmax_base(command):
 308     base_size = 2048
 309     for c in command:
 310         base_size += len(command) + 1
 311     for k, v in compat.items(environ):
 312         base_size += len(k) + len(v) + 2 + sizeof(c_void_p)
 313     return base_size
 314
 315
 316 def _argmax_args_size(args):
 317     return sum(len(x) + 1 + sizeof(c_void_p) for x in args)
 318
 319
 320 def batchpipe(command, args, preexec_fn=None, arg_max=sc_arg_max):
 321     """If args is not empty, yield the output produced by calling the
 322 command list with args as a sequence of strings (It may be necessary
 323 to return multiple strings in order to respect ARG_MAX)."""
 324     # The optional arg_max arg is a workaround for an issue with the
 325     # current wvtest behavior.
 326     base_size = _argmax_base(command)
 327     while args:
 328         room = arg_max - base_size
 329         i = 0
 330         while i < len(args):
 331             next_size = _argmax_args_size(args[i:i+1])
 332             if room - next_size < 0:
 333                 break
 334             room -= next_size
 335             i += 1
 336         sub_args = args[:i]
 337         args = args[i:]
 338         assert(len(sub_args))
 339         yield readpipe(command + sub_args, preexec_fn=preexec_fn)
 340
 341
 342 def resolve_parent(p):
 343     """Return the absolute path of a file without following any final symlink.
 344
 345     Behaves like os.path.realpath, but doesn't follow a symlink for the last
 346     element. (ie. if 'p' itself is a symlink, this one won't follow it, but it
 347     will follow symlinks in p's directory)
 348     """
 349     try:
 350         st = os.lstat(p)
 351     except OSError:
 352         st = None
 353     if st and stat.S_ISLNK(st.st_mode):
 354         (dir, name) = os.path.split(p)
 355         dir = os.path.realpath(dir)
 356         out = os.path.join(dir, name)
 357     else:
 358         out = os.path.realpath(p)
 359     #log('realpathing:%r,%r\n' % (p, out))
 360     return out
 361
 362
 363 def detect_fakeroot():
 364     "Return True if we appear to be running under fakeroot."
 365     return os.getenv("FAKEROOTKEY") != None
 366
 367
 368 if sys.platform.startswith('cygwin'):
 369     def is_superuser():
 370         # https://cygwin.com/ml/cygwin/2015-02/msg00057.html
 371         groups = os.getgroups()
 372         return 544 in groups or 0 in groups
 373 else:
 374     def is_superuser():
 375         return os.geteuid() == 0
 376
 377
 378 def cache_key_value(get_value, key, cache):
 379     """Return (value, was_cached).  If there is a value in the cache
 380     for key, use that, otherwise, call get_value(key) which should
 381     throw a KeyError if there is no value -- in which case the cached
 382     and returned value will be None.
 383     """
 384     try: # Do we already have it (or know there wasn't one)?
 385         value = cache[key]
 386         return value, True
 387     except KeyError:
 388         pass
 389     value = None
 390     try:
 391         cache[key] = value = get_value(key)
 392     except KeyError:
 393         cache[key] = None
 394     return value, False
 395
 396
 397 _hostname = None
 398 def hostname():
 399     """Get the FQDN of this machine."""
 400     global _hostname
 401     if not _hostname:
 402         _hostname = socket.getfqdn()
 403     return _hostname
 404
 405
 406 def format_filesize(size):
 407     unit = 1024.0
 408     size = float(size)
 409     if size < unit:
 410         return "%d" % (size)
 411     exponent = int(math.log(size) // math.log(unit))
 412     size_prefix = "KMGTPE"[exponent - 1]
 413     return "%.1f%s" % (size // math.pow(unit, exponent), size_prefix)
 414
 415
 416 class NotOk(Exception):
 417     pass
 418
 419
 420 class BaseConn:
 421     def __init__(self, outp):
 422         self.outp = outp
 423
 424     def close(self):
 425         while self._read(65536): pass
 426
 427     def read(self, size):
 428         """Read 'size' bytes from input stream."""
 429         self.outp.flush()
 430         return self._read(size)
 431
 432     def readline(self):
 433         """Read from input stream until a newline is found."""
 434         self.outp.flush()
 435         return self._readline()
 436
 437     def write(self, data):
 438         """Write 'data' to output stream."""
 439         #log('%d writing: %d bytes\n' % (os.getpid(), len(data)))
 440         self.outp.write(data)
 441
 442     def has_input(self):
 443         """Return true if input stream is readable."""
 444         raise NotImplemented("Subclasses must implement has_input")
 445
 446     def ok(self):
 447         """Indicate end of output from last sent command."""
 448         self.write('\nok\n')
 449
 450     def error(self, s):
 451         """Indicate server error to the client."""
 452         s = re.sub(r'\s+', ' ', str(s))
 453         self.write('\nerror %s\n' % s)
 454
 455     def _check_ok(self, onempty):
 456         self.outp.flush()
 457         rl = ''
 458         for rl in linereader(self):
 459             #log('%d got line: %r\n' % (os.getpid(), rl))
 460             if not rl:  # empty line
 461                 continue
 462             elif rl == 'ok':
 463                 return None
 464             elif rl.startswith('error '):
 465                 #log('client: error: %s\n' % rl[6:])
 466                 return NotOk(rl[6:])
 467             else:
 468                 onempty(rl)
 469         raise Exception('server exited unexpectedly; see errors above')
 470
 471     def drain_and_check_ok(self):
 472         """Remove all data for the current command from input stream."""
 473         def onempty(rl):
 474             pass
 475         return self._check_ok(onempty)
 476
 477     def check_ok(self):
 478         """Verify that server action completed successfully."""
 479         def onempty(rl):
 480             raise Exception('expected "ok", got %r' % rl)
 481         return self._check_ok(onempty)
 482
 483
 484 class Conn(BaseConn):
 485     def __init__(self, inp, outp):
 486         BaseConn.__init__(self, outp)
 487         self.inp = inp
 488
 489     def _read(self, size):
 490         return self.inp.read(size)
 491
 492     def _readline(self):
 493         return self.inp.readline()
 494
 495     def has_input(self):
 496         [rl, wl, xl] = select.select([self.inp.fileno()], [], [], 0)
 497         if rl:
 498             assert(rl[0] == self.inp.fileno())
 499             return True
 500         else:
 501             return None
 502
 503
 504 def checked_reader(fd, n):
 505     while n > 0:
 506         rl, _, _ = select.select([fd], [], [])
 507         assert(rl[0] == fd)
 508         buf = os.read(fd, n)
 509         if not buf: raise Exception("Unexpected EOF reading %d more bytes" % n)
 510         yield buf
 511         n -= len(buf)
 512
 513
 514 MAX_PACKET = 128 * 1024
 515 def mux(p, outfd, outr, errr):
 516     try:
 517         fds = [outr, errr]
 518         while p.poll() is None:
 519             rl, _, _ = select.select(fds, [], [])
 520             for fd in rl:
 521                 if fd == outr:
 522                     buf = os.read(outr, MAX_PACKET)
 523                     if not buf: break
 524                     os.write(outfd, struct.pack('!IB', len(buf), 1) + buf)
 525                 elif fd == errr:
 526                     buf = os.read(errr, 1024)
 527                     if not buf: break
 528                     os.write(outfd, struct.pack('!IB', len(buf), 2) + buf)
 529     finally:
 530         os.write(outfd, struct.pack('!IB', 0, 3))
 531
 532
 533 class DemuxConn(BaseConn):
 534     """A helper class for bup's client-server protocol."""
 535     def __init__(self, infd, outp):
 536         BaseConn.__init__(self, outp)
 537         # Anything that comes through before the sync string was not
 538         # multiplexed and can be assumed to be debug/log before mux init.
 539         tail = ''
 540         while tail != 'BUPMUX':
 541             b = os.read(infd, (len(tail) < 6) and (6-len(tail)) or 1)
 542             if not b:
 543                 raise IOError('demux: unexpected EOF during initialization')
 544             tail += b
 545             sys.stderr.write(tail[:-6])  # pre-mux log messages
 546             tail = tail[-6:]
 547         self.infd = infd
 548         self.reader = None
 549         self.buf = None
 550         self.closed = False
 551
 552     def write(self, data):
 553         self._load_buf(0)
 554         BaseConn.write(self, data)
 555
 556     def _next_packet(self, timeout):
 557         if self.closed: return False
 558         rl, wl, xl = select.select([self.infd], [], [], timeout)
 559         if not rl: return False
 560         assert(rl[0] == self.infd)
 561         ns = ''.join(checked_reader(self.infd, 5))
 562         n, fdw = struct.unpack('!IB', ns)
 563         assert(n <= MAX_PACKET)
 564         if fdw == 1:
 565             self.reader = checked_reader(self.infd, n)
 566         elif fdw == 2:
 567             for buf in checked_reader(self.infd, n):
 568                 sys.stderr.write(buf)
 569         elif fdw == 3:
 570             self.closed = True
 571             debug2("DemuxConn: marked closed\n")
 572         return True
 573
 574     def _load_buf(self, timeout):
 575         if self.buf is not None:
 576             return True
 577         while not self.closed:
 578             while not self.reader:
 579                 if not self._next_packet(timeout):
 580                     return False
 581             try:
 582                 self.buf = next(self.reader)
 583                 return True
 584             except StopIteration:
 585                 self.reader = None
 586         return False
 587
 588     def _read_parts(self, ix_fn):
 589         while self._load_buf(None):
 590             assert(self.buf is not None)
 591             i = ix_fn(self.buf)
 592             if i is None or i == len(self.buf):
 593                 yv = self.buf
 594                 self.buf = None
 595             else:
 596                 yv = self.buf[:i]
 597                 self.buf = self.buf[i:]
 598             yield yv
 599             if i is not None:
 600                 break
 601
 602     def _readline(self):
 603         def find_eol(buf):
 604             try:
 605                 return buf.index('\n')+1
 606             except ValueError:
 607                 return None
 608         return ''.join(self._read_parts(find_eol))
 609
 610     def _read(self, size):
 611         csize = [size]
 612         def until_size(buf): # Closes on csize
 613             if len(buf) < csize[0]:
 614                 csize[0] -= len(buf)
 615                 return None
 616             else:
 617                 return csize[0]
 618         return ''.join(self._read_parts(until_size))
 619
 620     def has_input(self):
 621         return self._load_buf(0)
 622
 623
 624 def linereader(f):
 625     """Generate a list of input lines from 'f' without terminating newlines."""
 626     while 1:
 627         line = f.readline()
 628         if not line:
 629             break
 630         yield line[:-1]
 631
 632
 633 def chunkyreader(f, count = None):
 634     """Generate a list of chunks of data read from 'f'.
 635
 636     If count is None, read until EOF is reached.
 637
 638     If count is a positive integer, read 'count' bytes from 'f'. If EOF is
 639     reached while reading, raise IOError.
 640     """
 641     if count != None:
 642         while count > 0:
 643             b = f.read(min(count, 65536))
 644             if not b:
 645                 raise IOError('EOF with %d bytes remaining' % count)
 646             yield b
 647             count -= len(b)
 648     else:
 649         while 1:
 650             b = f.read(65536)
 651             if not b: break
 652             yield b
 653
 654
 655 @contextmanager
 656 def atomically_replaced_file(name, mode='w', buffering=-1):
 657     """Yield a file that will be atomically renamed name when leaving the block.
 658
 659     This contextmanager yields an open file object that is backed by a
 660     temporary file which will be renamed (atomically) to the target
 661     name if everything succeeds.
 662
 663     The mode and buffering arguments are handled exactly as with open,
 664     and the yielded file will have very restrictive permissions, as
 665     per mkstemp.
 666
 667     E.g.::
 668
 669         with atomically_replaced_file('foo.txt', 'w') as f:
 670             f.write('hello jack.')
 671
 672     """
 673
 674     (ffd, tempname) = tempfile.mkstemp(dir=os.path.dirname(name),
 675                                        text=('b' not in mode))
 676     try:
 677         try:
 678             f = os.fdopen(ffd, mode, buffering)
 679         except:
 680             os.close(ffd)
 681             raise
 682         try:
 683             yield f
 684         finally:
 685             f.close()
 686         os.rename(tempname, name)
 687     finally:
 688         unlink(tempname)  # nonexistant file is ignored
 689
 690
 691 def slashappend(s):
 692     """Append "/" to 's' if it doesn't aleady end in "/"."""
 693     if s and not s.endswith('/'):
 694         return s + '/'
 695     else:
 696         return s
 697
 698
 699 def _mmap_do(f, sz, flags, prot, close):
 700     if not sz:
 701         st = os.fstat(f.fileno())
 702         sz = st.st_size
 703     if not sz:
 704         # trying to open a zero-length map gives an error, but an empty
 705         # string has all the same behaviour of a zero-length map, ie. it has
 706         # no elements :)
 707         return ''
 708     map = mmap.mmap(f.fileno(), sz, flags, prot)
 709     if close:
 710         f.close()  # map will persist beyond file close
 711     return map
 712
 713
 714 def mmap_read(f, sz = 0, close=True):
 715     """Create a read-only memory mapped region on file 'f'.
 716     If sz is 0, the region will cover the entire file.
 717     """
 718     return _mmap_do(f, sz, mmap.MAP_PRIVATE, mmap.PROT_READ, close)
 719
 720
 721 def mmap_readwrite(f, sz = 0, close=True):
 722     """Create a read-write memory mapped region on file 'f'.
 723     If sz is 0, the region will cover the entire file.
 724     """
 725     return _mmap_do(f, sz, mmap.MAP_SHARED, mmap.PROT_READ|mmap.PROT_WRITE,
 726                     close)
 727
 728
 729 def mmap_readwrite_private(f, sz = 0, close=True):
 730     """Create a read-write memory mapped region on file 'f'.
 731     If sz is 0, the region will cover the entire file.
 732     The map is private, which means the changes are never flushed back to the
 733     file.
 734     """
 735     return _mmap_do(f, sz, mmap.MAP_PRIVATE, mmap.PROT_READ|mmap.PROT_WRITE,
 736                     close)
 737
 738
 739 _mincore = getattr(_helpers, 'mincore', None)
 740 if _mincore:
 741     # ./configure ensures that we're on Linux if MINCORE_INCORE isn't defined.
 742     MINCORE_INCORE = getattr(_helpers, 'MINCORE_INCORE', 1)
 743
 744     _fmincore_chunk_size = None
 745     def _set_fmincore_chunk_size():
 746         global _fmincore_chunk_size
 747         pref_chunk_size = 64 * 1024 * 1024
 748         chunk_size = sc_page_size
 749         if (sc_page_size < pref_chunk_size):
 750             chunk_size = sc_page_size * (pref_chunk_size // sc_page_size)
 751         _fmincore_chunk_size = chunk_size
 752
 753     def fmincore(fd):
 754         """Return the mincore() data for fd as a bytearray whose values can be
 755         tested via MINCORE_INCORE, or None if fd does not fully
 756         support the operation."""
 757         st = os.fstat(fd)
 758         if (st.st_size == 0):
 759             return bytearray(0)
 760         if not _fmincore_chunk_size:
 761             _set_fmincore_chunk_size()
 762         pages_per_chunk = _fmincore_chunk_size // sc_page_size;
 763         page_count = (st.st_size + sc_page_size - 1) // sc_page_size;
 764         chunk_count = page_count // _fmincore_chunk_size
 765         if chunk_count < 1:
 766             chunk_count = 1
 767         result = bytearray(page_count)
 768         for ci in compat.range(chunk_count):
 769             pos = _fmincore_chunk_size * ci;
 770             msize = min(_fmincore_chunk_size, st.st_size - pos)
 771             try:
 772                 m = mmap.mmap(fd, msize, mmap.MAP_PRIVATE, 0, 0, pos)
 773             except mmap.error as ex:
 774                 if ex.errno == errno.EINVAL or ex.errno == errno.ENODEV:
 775                     # Perhaps the file was a pipe, i.e. "... | bup split ..."
 776                     return None
 777                 raise ex
 778             try:
 779                 _mincore(m, msize, 0, result, ci * pages_per_chunk)
 780             except OSError as ex:
 781                 if ex.errno == errno.ENOSYS:
 782                     return None
 783                 raise
 784         return result
 785
 786
 787 def parse_timestamp(epoch_str):
 788     """Return the number of nanoseconds since the epoch that are described
 789 by epoch_str (100ms, 100ns, ...); when epoch_str cannot be parsed,
 790 throw a ValueError that may contain additional information."""
 791     ns_per = {'s' :  1000000000,
 792               'ms' : 1000000,
 793               'us' : 1000,
 794               'ns' : 1}
 795     match = re.match(r'^((?:[-+]?[0-9]+)?)(s|ms|us|ns)$', epoch_str)
 796     if not match:
 797         if re.match(r'^([-+]?[0-9]+)$', epoch_str):
 798             raise ValueError('must include units, i.e. 100ns, 100ms, ...')
 799         raise ValueError()
 800     (n, units) = match.group(1, 2)
 801     if not n:
 802         n = 1
 803     n = int(n)
 804     return n * ns_per[units]
 805
 806
 807 def parse_num(s):
 808     """Parse data size information into a float number.
 809
 810     Here are some examples of conversions:
 811         199.2k means 203981 bytes
 812         1GB means 1073741824 bytes
 813         2.1 tb means 2199023255552 bytes
 814     """
 815     g = re.match(r'([-+\d.e]+)\s*(\w*)', str(s))
 816     if not g:
 817         raise ValueError("can't parse %r as a number" % s)
 818     (val, unit) = g.groups()
 819     num = float(val)
 820     unit = unit.lower()
 821     if unit in ['t', 'tb']:
 822         mult = 1024*1024*1024*1024
 823     elif unit in ['g', 'gb']:
 824         mult = 1024*1024*1024
 825     elif unit in ['m', 'mb']:
 826         mult = 1024*1024
 827     elif unit in ['k', 'kb']:
 828         mult = 1024
 829     elif unit in ['', 'b']:
 830         mult = 1
 831     else:
 832         raise ValueError("invalid unit %r in number %r" % (unit, s))
 833     return int(num*mult)
 834
 835
 836 def count(l):
 837     """Count the number of elements in an iterator. (consumes the iterator)"""
 838     return reduce(lambda x,y: x+1, l)
 839
 840
 841 saved_errors = []
 842 def add_error(e):
 843     """Append an error message to the list of saved errors.
 844
 845     Once processing is able to stop and output the errors, the saved errors are
 846     accessible in the module variable helpers.saved_errors.
 847     """
 848     saved_errors.append(e)
 849     log('%-70s\n' % e)
 850
 851
 852 def clear_errors():
 853     global saved_errors
 854     saved_errors = []
 855
 856
 857 def die_if_errors(msg=None, status=1):
 858     global saved_errors
 859     if saved_errors:
 860         if not msg:
 861             msg = 'warning: %d errors encountered\n' % len(saved_errors)
 862         log(msg)
 863         sys.exit(status)
 864
 865
 866 def handle_ctrl_c():
 867     """Replace the default exception handler for KeyboardInterrupt (Ctrl-C).
 868
 869     The new exception handler will make sure that bup will exit without an ugly
 870     stacktrace when Ctrl-C is hit.
 871     """
 872     oldhook = sys.excepthook
 873     def newhook(exctype, value, traceback):
 874         if exctype == KeyboardInterrupt:
 875             log('\nInterrupted.\n')
 876         else:
 877             return oldhook(exctype, value, traceback)
 878     sys.excepthook = newhook
 879
 880
 881 def columnate(l, prefix):
 882     """Format elements of 'l' in columns with 'prefix' leading each line.
 883
 884     The number of columns is determined automatically based on the string
 885     lengths.
 886     """
 887     if not l:
 888         return ""
 889     l = l[:]
 890     clen = max(len(s) for s in l)
 891     ncols = (tty_width() - len(prefix)) // (clen + 2)
 892     if ncols <= 1:
 893         ncols = 1
 894         clen = 0
 895     cols = []
 896     while len(l) % ncols:
 897         l.append('')
 898     rows = len(l) // ncols
 899     for s in compat.range(0, len(l), rows):
 900         cols.append(l[s:s+rows])
 901     out = ''
 902     for row in zip(*cols):
 903         out += prefix + ''.join(('%-*s' % (clen+2, s)) for s in row) + '\n'
 904     return out
 905
 906
 907 def parse_date_or_fatal(str, fatal):
 908     """Parses the given date or calls Option.fatal().
 909     For now we expect a string that contains a float."""
 910     try:
 911         date = float(str)
 912     except ValueError as e:
 913         raise fatal('invalid date format (should be a float): %r' % e)
 914     else:
 915         return date
 916
 917
 918 def parse_excludes(options, fatal):
 919     """Traverse the options and extract all excludes, or call Option.fatal()."""
 920     excluded_paths = []
 921
 922     for flag in options:
 923         (option, parameter) = flag
 924         if option == '--exclude':
 925             excluded_paths.append(resolve_parent(parameter))
 926         elif option == '--exclude-from':
 927             try:
 928                 f = open(resolve_parent(parameter))
 929             except IOError as e:
 930                 raise fatal("couldn't read %s" % parameter)
 931             for exclude_path in f.readlines():
 932                 # FIXME: perhaps this should be rstrip('\n')
 933                 exclude_path = resolve_parent(exclude_path.strip())
 934                 if exclude_path:
 935                     excluded_paths.append(exclude_path)
 936     return sorted(frozenset(excluded_paths))
 937
 938
 939 def parse_rx_excludes(options, fatal):
 940     """Traverse the options and extract all rx excludes, or call
 941     Option.fatal()."""
 942     excluded_patterns = []
 943
 944     for flag in options:
 945         (option, parameter) = flag
 946         if option == '--exclude-rx':
 947             try:
 948                 excluded_patterns.append(re.compile(parameter))
 949             except re.error as ex:
 950                 fatal('invalid --exclude-rx pattern (%s): %s' % (parameter, ex))
 951         elif option == '--exclude-rx-from':
 952             try:
 953                 f = open(resolve_parent(parameter))
 954             except IOError as e:
 955                 raise fatal("couldn't read %s" % parameter)
 956             for pattern in f.readlines():
 957                 spattern = pattern.rstrip('\n')
 958                 if not spattern:
 959                     continue
 960                 try:
 961                     excluded_patterns.append(re.compile(spattern))
 962                 except re.error as ex:
 963                     fatal('invalid --exclude-rx pattern (%s): %s' % (spattern, ex))
 964     return excluded_patterns
 965
 966
 967 def should_rx_exclude_path(path, exclude_rxs):
 968     """Return True if path matches a regular expression in exclude_rxs."""
 969     for rx in exclude_rxs:
 970         if rx.search(path):
 971             debug1('Skipping %r: excluded by rx pattern %r.\n'
 972                    % (path, rx.pattern))
 973             return True
 974     return False
 975
 976
 977 # FIXME: Carefully consider the use of functions (os.path.*, etc.)
 978 # that resolve against the current filesystem in the strip/graft
 979 # functions for example, but elsewhere as well.  I suspect bup's not
 980 # always being careful about that.  For some cases, the contents of
 981 # the current filesystem should be irrelevant, and consulting it might
 982 # produce the wrong result, perhaps via unintended symlink resolution,
 983 # for example.
 984
 985 def path_components(path):
 986     """Break path into a list of pairs of the form (name,
 987     full_path_to_name).  Path must start with '/'.
 988     Example:
 989       '/home/foo' -> [('', '/'), ('home', '/home'), ('foo', '/home/foo')]"""
 990     if not path.startswith('/'):
 991         raise Exception('path must start with "/": %s' % path)
 992     # Since we assume path startswith('/'), we can skip the first element.
 993     result = [('', '/')]
 994     norm_path = os.path.abspath(path)
 995     if norm_path == '/':
 996         return result
 997     full_path = ''
 998     for p in norm_path.split('/')[1:]:
 999         full_path += '/' + p
1000         result.append((p, full_path))
1001     return result
1002
1003
1004 def stripped_path_components(path, strip_prefixes):
1005     """Strip any prefix in strip_prefixes from path and return a list
1006     of path components where each component is (name,
1007     none_or_full_fs_path_to_name).  Assume path startswith('/').
1008     See thelpers.py for examples."""
1009     normalized_path = os.path.abspath(path)
1010     sorted_strip_prefixes = sorted(strip_prefixes, key=len, reverse=True)
1011     for bp in sorted_strip_prefixes:
1012         normalized_bp = os.path.abspath(bp)
1013         if normalized_bp == '/':
1014             continue
1015         if normalized_path.startswith(normalized_bp):
1016             prefix = normalized_path[:len(normalized_bp)]
1017             result = []
1018             for p in normalized_path[len(normalized_bp):].split('/'):
1019                 if p: # not root
1020                     prefix += '/'
1021                 prefix += p
1022                 result.append((p, prefix))
1023             return result
1024     # Nothing to strip.
1025     return path_components(path)
1026
1027
1028 def grafted_path_components(graft_points, path):
1029     # Create a result that consists of some number of faked graft
1030     # directories before the graft point, followed by all of the real
1031     # directories from path that are after the graft point.  Arrange
1032     # for the directory at the graft point in the result to correspond
1033     # to the "orig" directory in --graft orig=new.  See t/thelpers.py
1034     # for some examples.
1035
1036     # Note that given --graft orig=new, orig and new have *nothing* to
1037     # do with each other, even if some of their component names
1038     # match. i.e. --graft /foo/bar/baz=/foo/bar/bax is semantically
1039     # equivalent to --graft /foo/bar/baz=/x/y/z, or even
1040     # /foo/bar/baz=/x.
1041
1042     # FIXME: This can't be the best solution...
1043     clean_path = os.path.abspath(path)
1044     for graft_point in graft_points:
1045         old_prefix, new_prefix = graft_point
1046         # Expand prefixes iff not absolute paths.
1047         old_prefix = os.path.normpath(old_prefix)
1048         new_prefix = os.path.normpath(new_prefix)
1049         if clean_path.startswith(old_prefix):
1050             escaped_prefix = re.escape(old_prefix)
1051             grafted_path = re.sub(r'^' + escaped_prefix, new_prefix, clean_path)
1052             # Handle /foo=/ (at least) -- which produces //whatever.
1053             grafted_path = '/' + grafted_path.lstrip('/')
1054             clean_path_components = path_components(clean_path)
1055             # Count the components that were stripped.
1056             strip_count = 0 if old_prefix == '/' else old_prefix.count('/')
1057             new_prefix_parts = new_prefix.split('/')
1058             result_prefix = grafted_path.split('/')[:new_prefix.count('/')]
1059             result = [(p, None) for p in result_prefix] \
1060                 + clean_path_components[strip_count:]
1061             # Now set the graft point name to match the end of new_prefix.
1062             graft_point = len(result_prefix)
1063             result[graft_point] = \
1064                 (new_prefix_parts[-1], clean_path_components[strip_count][1])
1065             if new_prefix == '/': # --graft ...=/ is a special case.
1066                 return result[1:]
1067             return result
1068     return path_components(clean_path)
1069
1070
1071 Sha1 = hashlib.sha1
1072
1073
1074 _localtime = getattr(_helpers, 'localtime', None)
1075
1076 if _localtime:
1077     bup_time = namedtuple('bup_time', ['tm_year', 'tm_mon', 'tm_mday',
1078                                        'tm_hour', 'tm_min', 'tm_sec',
1079                                        'tm_wday', 'tm_yday',
1080                                        'tm_isdst', 'tm_gmtoff', 'tm_zone'])
1081
1082 # Define a localtime() that returns bup_time when possible.  Note:
1083 # this means that any helpers.localtime() results may need to be
1084 # passed through to_py_time() before being passed to python's time
1085 # module, which doesn't appear willing to ignore the extra items.
1086 if _localtime:
1087     def localtime(time):
1088         return bup_time(*_helpers.localtime(time))
1089     def utc_offset_str(t):
1090         """Return the local offset from UTC as "+hhmm" or "-hhmm" for time t.
1091         If the current UTC offset does not represent an integer number
1092         of minutes, the fractional component will be truncated."""
1093         off = localtime(t).tm_gmtoff
1094         # Note: // doesn't truncate like C for negative values, it rounds down.
1095         offmin = abs(off) // 60
1096         m = offmin % 60
1097         h = (offmin - m) // 60
1098         return "%+03d%02d" % (-h if off < 0 else h, m)
1099     def to_py_time(x):
1100         if isinstance(x, time.struct_time):
1101             return x
1102         return time.struct_time(x[:9])
1103 else:
1104     localtime = time.localtime
1105     def utc_offset_str(t):
1106         return time.strftime('%z', localtime(t))
1107     def to_py_time(x):
1108         return x
1109
1110
1111 _some_invalid_save_parts_rx = re.compile(r'[\[ ~^:?*\\]|\.\.|//|@{')
1112
1113 def valid_save_name(name):
1114     # Enforce a superset of the restrictions in git-check-ref-format(1)
1115     if name == '@' \
1116        or name.startswith('/') or name.endswith('/') \
1117        or name.endswith('.'):
1118         return False
1119     if _some_invalid_save_parts_rx.search(name):
1120         return False
1121     for c in name:
1122         if ord(c) < 0x20 or ord(c) == 0x7f:
1123             return False
1124     for part in name.split('/'):
1125         if part.startswith('.') or part.endswith('.lock'):
1126             return False
1127     return True
1128
1129
1130 _period_rx = re.compile(r'^([0-9]+)(s|min|h|d|w|m|y)$')
1131
1132 def period_as_secs(s):
1133     if s == 'forever':
1134         return float('inf')
1135     match = _period_rx.match(s)
1136     if not match:
1137         return None
1138     mag = int(match.group(1))
1139     scale = match.group(2)
1140     return mag * {'s': 1,
1141                   'min': 60,
1142                   'h': 60 * 60,
1143                   'd': 60 * 60 * 24,
1144                   'w': 60 * 60 * 24 * 7,
1145                   'm': 60 * 60 * 24 * 31,
1146                   'y': 60 * 60 * 24 * 366}[scale]