lib/bup/helpers.py

   1 """Helper functions and classes for bup."""
   2
   3 from __future__ import absolute_import, division
   4 from collections import namedtuple
   5 from contextlib import contextmanager
   6 from ctypes import sizeof, c_void_p
   7 from math import floor
   8 from os import environ
   9 from subprocess import PIPE, Popen
  10 import sys, os, pwd, subprocess, errno, socket, select, mmap, stat, re, struct
  11 import hashlib, heapq, math, operator, time, grp, tempfile
  12
  13 from bup import _helpers
  14 from bup import compat
  15 from bup.compat import argv_bytes, byte_int
  16 from bup.io import byte_stream, path_msg
  17 # This function should really be in helpers, not in bup.options.  But we
  18 # want options.py to be standalone so people can include it in other projects.
  19 from bup.options import _tty_width as tty_width
  20
  21
  22 class Nonlocal:
  23     """Helper to deal with Python scoping issues"""
  24     pass
  25
  26
  27 sc_page_size = os.sysconf('SC_PAGE_SIZE')
  28 assert(sc_page_size > 0)
  29
  30 sc_arg_max = os.sysconf('SC_ARG_MAX')
  31 if sc_arg_max == -1:  # "no definite limit" - let's choose 2M
  32     sc_arg_max = 2 * 1024 * 1024
  33
  34 def last(iterable):
  35     result = None
  36     for result in iterable:
  37         pass
  38     return result
  39
  40
  41 def atoi(s):
  42     """Convert s (ascii bytes) to an integer. Return 0 if s is not a number."""
  43     try:
  44         return int(s or b'0')
  45     except ValueError:
  46         return 0
  47
  48
  49 def atof(s):
  50     """Convert s (ascii bytes) to a float. Return 0 if s is not a number."""
  51     try:
  52         return float(s or b'0')
  53     except ValueError:
  54         return 0
  55
  56
  57 buglvl = atoi(os.environ.get('BUP_DEBUG', 0))
  58
  59
  60 try:
  61     _fdatasync = os.fdatasync
  62 except AttributeError:
  63     _fdatasync = os.fsync
  64
  65 if sys.platform.startswith('darwin'):
  66     # Apparently os.fsync on OS X doesn't guarantee to sync all the way down
  67     import fcntl
  68     def fdatasync(fd):
  69         try:
  70             return fcntl.fcntl(fd, fcntl.F_FULLFSYNC)
  71         except IOError as e:
  72             # Fallback for file systems (SMB) that do not support F_FULLFSYNC
  73             if e.errno == errno.ENOTSUP:
  74                 return _fdatasync(fd)
  75             else:
  76                 raise
  77 else:
  78     fdatasync = _fdatasync
  79
  80
  81 def partition(predicate, stream):
  82     """Returns (leading_matches_it, rest_it), where leading_matches_it
  83     must be completely exhausted before traversing rest_it.
  84
  85     """
  86     stream = iter(stream)
  87     ns = Nonlocal()
  88     ns.first_nonmatch = None
  89     def leading_matches():
  90         for x in stream:
  91             if predicate(x):
  92                 yield x
  93             else:
  94                 ns.first_nonmatch = (x,)
  95                 break
  96     def rest():
  97         if ns.first_nonmatch:
  98             yield ns.first_nonmatch[0]
  99             for x in stream:
 100                 yield x
 101     return (leading_matches(), rest())
 102
 103
 104 def merge_dict(*xs):
 105     result = {}
 106     for x in xs:
 107         result.update(x)
 108     return result
 109
 110
 111 def lines_until_sentinel(f, sentinel, ex_type):
 112     # sentinel must end with \n and must contain only one \n
 113     while True:
 114         line = f.readline()
 115         if not (line and line.endswith(b'\n')):
 116             raise ex_type('Hit EOF while reading line')
 117         if line == sentinel:
 118             return
 119         yield line
 120
 121
 122 def stat_if_exists(path):
 123     try:
 124         return os.stat(path)
 125     except OSError as e:
 126         if e.errno != errno.ENOENT:
 127             raise
 128     return None
 129
 130
 131 # Write (blockingly) to sockets that may or may not be in blocking mode.
 132 # We need this because our stderr is sometimes eaten by subprocesses
 133 # (probably ssh) that sometimes make it nonblocking, if only temporarily,
 134 # leading to race conditions.  Ick.  We'll do it the hard way.
 135 def _hard_write(fd, buf):
 136     while buf:
 137         (r,w,x) = select.select([], [fd], [], None)
 138         if not w:
 139             raise IOError('select(fd) returned without being writable')
 140         try:
 141             sz = os.write(fd, buf)
 142         except OSError as e:
 143             if e.errno != errno.EAGAIN:
 144                 raise
 145         assert(sz >= 0)
 146         buf = buf[sz:]
 147
 148
 149 _last_prog = 0
 150 def log(s):
 151     """Print a log message to stderr."""
 152     global _last_prog
 153     sys.stdout.flush()
 154     _hard_write(sys.stderr.fileno(), s if isinstance(s, bytes) else s.encode())
 155     _last_prog = 0
 156
 157
 158 def debug1(s):
 159     if buglvl >= 1:
 160         log(s)
 161
 162
 163 def debug2(s):
 164     if buglvl >= 2:
 165         log(s)
 166
 167
 168 istty1 = os.isatty(1) or (atoi(os.environ.get('BUP_FORCE_TTY')) & 1)
 169 istty2 = os.isatty(2) or (atoi(os.environ.get('BUP_FORCE_TTY')) & 2)
 170 _last_progress = ''
 171 def progress(s):
 172     """Calls log() if stderr is a TTY.  Does nothing otherwise."""
 173     global _last_progress
 174     if istty2:
 175         log(s)
 176         _last_progress = s
 177
 178
 179 def qprogress(s):
 180     """Calls progress() only if we haven't printed progress in a while.
 181
 182     This avoids overloading the stderr buffer with excess junk.
 183     """
 184     global _last_prog
 185     now = time.time()
 186     if now - _last_prog > 0.1:
 187         progress(s)
 188         _last_prog = now
 189
 190
 191 def reprogress():
 192     """Calls progress() to redisplay the most recent progress message.
 193
 194     Useful after you've printed some other message that wipes out the
 195     progress line.
 196     """
 197     if _last_progress and _last_progress.endswith('\r'):
 198         progress(_last_progress)
 199
 200
 201 def mkdirp(d, mode=None):
 202     """Recursively create directories on path 'd'.
 203
 204     Unlike os.makedirs(), it doesn't raise an exception if the last element of
 205     the path already exists.
 206     """
 207     try:
 208         if mode:
 209             os.makedirs(d, mode)
 210         else:
 211             os.makedirs(d)
 212     except OSError as e:
 213         if e.errno == errno.EEXIST:
 214             pass
 215         else:
 216             raise
 217
 218
 219 class MergeIterItem:
 220     def __init__(self, entry, read_it):
 221         self.entry = entry
 222         self.read_it = read_it
 223     def __lt__(self, x):
 224         return self.entry < x.entry
 225
 226 def merge_iter(iters, pfreq, pfunc, pfinal, key=None):
 227     if key:
 228         samekey = lambda e, pe: getattr(e, key) == getattr(pe, key, None)
 229     else:
 230         samekey = operator.eq
 231     count = 0
 232     total = sum(len(it) for it in iters)
 233     iters = (iter(it) for it in iters)
 234     heap = ((next(it, None),it) for it in iters)
 235     heap = [MergeIterItem(e, it) for e, it in heap if e]
 236
 237     heapq.heapify(heap)
 238     pe = None
 239     while heap:
 240         if not count % pfreq:
 241             pfunc(count, total)
 242         e, it = heap[0].entry, heap[0].read_it
 243         if not samekey(e, pe):
 244             pe = e
 245             yield e
 246         count += 1
 247         try:
 248             e = next(it)
 249         except StopIteration:
 250             heapq.heappop(heap) # remove current
 251         else:
 252             # shift current to new location
 253             heapq.heapreplace(heap, MergeIterItem(e, it))
 254     pfinal(count, total)
 255
 256
 257 def unlink(f):
 258     """Delete a file at path 'f' if it currently exists.
 259
 260     Unlike os.unlink(), does not throw an exception if the file didn't already
 261     exist.
 262     """
 263     try:
 264         os.unlink(f)
 265     except OSError as e:
 266         if e.errno != errno.ENOENT:
 267             raise
 268
 269
 270 _bq_simple_id_rx = re.compile(br'^[-_./a-zA-Z0-9]+$')
 271 _sq_simple_id_rx = re.compile(r'^[-_./a-zA-Z0-9]+$')
 272
 273 def bquote(x):
 274     if x == b'':
 275         return b"''"
 276     if _bq_simple_id_rx.match(x):
 277         return x
 278     return b"'%s'" % x.replace(b"'", b"'\"'\"'")
 279
 280 def squote(x):
 281     if x == '':
 282         return "''"
 283     if _sq_simple_id_rx.match(x):
 284         return x
 285     return "'%s'" % x.replace("'", "'\"'\"'")
 286
 287 def quote(x):
 288     if isinstance(x, bytes):
 289         return bquote(x)
 290     if isinstance(x, compat.str_type):
 291         return squote(x)
 292     assert False
 293
 294 def shstr(cmd):
 295     """Return a shell quoted string for cmd if it's a sequence, else cmd.
 296
 297     cmd must be a string, bytes, or a sequence of one or the other,
 298     and the assumption is that if cmd is a string or bytes, then it's
 299     already quoted (because it's what's actually being passed to
 300     call() and friends.  e.g. log(shstr(cmd)); call(cmd)
 301
 302     """
 303     if isinstance(cmd, (bytes, compat.str_type)):
 304         return cmd
 305     elif all(isinstance(x, bytes) for x in cmd):
 306         return b' '.join(map(bquote, cmd))
 307     elif all(isinstance(x, compat.str_type) for x in cmd):
 308         return ' '.join(map(squote, cmd))
 309     raise TypeError('unsupported shstr argument: ' + repr(cmd))
 310
 311
 312 exc = subprocess.check_call
 313
 314 def exo(cmd,
 315         input=None,
 316         stdin=None,
 317         stderr=None,
 318         shell=False,
 319         check=True,
 320         preexec_fn=None):
 321     if input:
 322         assert stdin in (None, PIPE)
 323         stdin = PIPE
 324     p = Popen(cmd,
 325               stdin=stdin, stdout=PIPE, stderr=stderr,
 326               shell=shell,
 327               preexec_fn=preexec_fn)
 328     out, err = p.communicate(input)
 329     if check and p.returncode != 0:
 330         raise Exception('subprocess %r failed with status %d, stderr: %r'
 331                         % (b' '.join(map(quote, cmd)), p.returncode, err))
 332     return out, err, p
 333
 334 def readpipe(argv, preexec_fn=None, shell=False):
 335     """Run a subprocess and return its output."""
 336     p = subprocess.Popen(argv, stdout=subprocess.PIPE, preexec_fn=preexec_fn,
 337                          shell=shell)
 338     out, err = p.communicate()
 339     if p.returncode != 0:
 340         raise Exception('subprocess %r failed with status %d'
 341                         % (b' '.join(argv), p.returncode))
 342     return out
 343
 344
 345 def _argmax_base(command):
 346     base_size = 2048
 347     for c in command:
 348         base_size += len(command) + 1
 349     for k, v in compat.items(environ):
 350         base_size += len(k) + len(v) + 2 + sizeof(c_void_p)
 351     return base_size
 352
 353
 354 def _argmax_args_size(args):
 355     return sum(len(x) + 1 + sizeof(c_void_p) for x in args)
 356
 357
 358 def batchpipe(command, args, preexec_fn=None, arg_max=sc_arg_max):
 359     """If args is not empty, yield the output produced by calling the
 360 command list with args as a sequence of strings (It may be necessary
 361 to return multiple strings in order to respect ARG_MAX)."""
 362     # The optional arg_max arg is a workaround for an issue with the
 363     # current wvtest behavior.
 364     base_size = _argmax_base(command)
 365     while args:
 366         room = arg_max - base_size
 367         i = 0
 368         while i < len(args):
 369             next_size = _argmax_args_size(args[i:i+1])
 370             if room - next_size < 0:
 371                 break
 372             room -= next_size
 373             i += 1
 374         sub_args = args[:i]
 375         args = args[i:]
 376         assert(len(sub_args))
 377         yield readpipe(command + sub_args, preexec_fn=preexec_fn)
 378
 379
 380 def resolve_parent(p):
 381     """Return the absolute path of a file without following any final symlink.
 382
 383     Behaves like os.path.realpath, but doesn't follow a symlink for the last
 384     element. (ie. if 'p' itself is a symlink, this one won't follow it, but it
 385     will follow symlinks in p's directory)
 386     """
 387     try:
 388         st = os.lstat(p)
 389     except OSError:
 390         st = None
 391     if st and stat.S_ISLNK(st.st_mode):
 392         (dir, name) = os.path.split(p)
 393         dir = os.path.realpath(dir)
 394         out = os.path.join(dir, name)
 395     else:
 396         out = os.path.realpath(p)
 397     #log('realpathing:%r,%r\n' % (p, out))
 398     return out
 399
 400
 401 def detect_fakeroot():
 402     "Return True if we appear to be running under fakeroot."
 403     return os.getenv("FAKEROOTKEY") != None
 404
 405
 406 if sys.platform.startswith('cygwin'):
 407     def is_superuser():
 408         # https://cygwin.com/ml/cygwin/2015-02/msg00057.html
 409         groups = os.getgroups()
 410         return 544 in groups or 0 in groups
 411 else:
 412     def is_superuser():
 413         return os.geteuid() == 0
 414
 415
 416 def cache_key_value(get_value, key, cache):
 417     """Return (value, was_cached).  If there is a value in the cache
 418     for key, use that, otherwise, call get_value(key) which should
 419     throw a KeyError if there is no value -- in which case the cached
 420     and returned value will be None.
 421     """
 422     try: # Do we already have it (or know there wasn't one)?
 423         value = cache[key]
 424         return value, True
 425     except KeyError:
 426         pass
 427     value = None
 428     try:
 429         cache[key] = value = get_value(key)
 430     except KeyError:
 431         cache[key] = None
 432     return value, False
 433
 434
 435 _hostname = None
 436 def hostname():
 437     """Get the FQDN of this machine."""
 438     global _hostname
 439     if not _hostname:
 440         _hostname = socket.getfqdn().encode('iso-8859-1')
 441     return _hostname
 442
 443
 444 def format_filesize(size):
 445     unit = 1024.0
 446     size = float(size)
 447     if size < unit:
 448         return "%d" % (size)
 449     exponent = int(math.log(size) // math.log(unit))
 450     size_prefix = "KMGTPE"[exponent - 1]
 451     return "%.1f%s" % (size // math.pow(unit, exponent), size_prefix)
 452
 453
 454 class NotOk(Exception):
 455     pass
 456
 457
 458 class BaseConn:
 459     def __init__(self, outp):
 460         self.outp = outp
 461
 462     def close(self):
 463         while self._read(65536): pass
 464
 465     def read(self, size):
 466         """Read 'size' bytes from input stream."""
 467         self.outp.flush()
 468         return self._read(size)
 469
 470     def readline(self):
 471         """Read from input stream until a newline is found."""
 472         self.outp.flush()
 473         return self._readline()
 474
 475     def write(self, data):
 476         """Write 'data' to output stream."""
 477         #log('%d writing: %d bytes\n' % (os.getpid(), len(data)))
 478         self.outp.write(data)
 479
 480     def has_input(self):
 481         """Return true if input stream is readable."""
 482         raise NotImplemented("Subclasses must implement has_input")
 483
 484     def ok(self):
 485         """Indicate end of output from last sent command."""
 486         self.write(b'\nok\n')
 487
 488     def error(self, s):
 489         """Indicate server error to the client."""
 490         s = re.sub(br'\s+', b' ', s)
 491         self.write(b'\nerror %s\n' % s)
 492
 493     def _check_ok(self, onempty):
 494         self.outp.flush()
 495         rl = b''
 496         for rl in linereader(self):
 497             #log('%d got line: %r\n' % (os.getpid(), rl))
 498             if not rl:  # empty line
 499                 continue
 500             elif rl == b'ok':
 501                 return None
 502             elif rl.startswith(b'error '):
 503                 #log('client: error: %s\n' % rl[6:])
 504                 return NotOk(rl[6:])
 505             else:
 506                 onempty(rl)
 507         raise Exception('server exited unexpectedly; see errors above')
 508
 509     def drain_and_check_ok(self):
 510         """Remove all data for the current command from input stream."""
 511         def onempty(rl):
 512             pass
 513         return self._check_ok(onempty)
 514
 515     def check_ok(self):
 516         """Verify that server action completed successfully."""
 517         def onempty(rl):
 518             raise Exception('expected "ok", got %r' % rl)
 519         return self._check_ok(onempty)
 520
 521
 522 class Conn(BaseConn):
 523     def __init__(self, inp, outp):
 524         BaseConn.__init__(self, outp)
 525         self.inp = inp
 526
 527     def _read(self, size):
 528         return self.inp.read(size)
 529
 530     def _readline(self):
 531         return self.inp.readline()
 532
 533     def has_input(self):
 534         [rl, wl, xl] = select.select([self.inp.fileno()], [], [], 0)
 535         if rl:
 536             assert(rl[0] == self.inp.fileno())
 537             return True
 538         else:
 539             return None
 540
 541
 542 def checked_reader(fd, n):
 543     while n > 0:
 544         rl, _, _ = select.select([fd], [], [])
 545         assert(rl[0] == fd)
 546         buf = os.read(fd, n)
 547         if not buf: raise Exception("Unexpected EOF reading %d more bytes" % n)
 548         yield buf
 549         n -= len(buf)
 550
 551
 552 MAX_PACKET = 128 * 1024
 553 def mux(p, outfd, outr, errr):
 554     try:
 555         fds = [outr, errr]
 556         while p.poll() is None:
 557             rl, _, _ = select.select(fds, [], [])
 558             for fd in rl:
 559                 if fd == outr:
 560                     buf = os.read(outr, MAX_PACKET)
 561                     if not buf: break
 562                     os.write(outfd, struct.pack('!IB', len(buf), 1) + buf)
 563                 elif fd == errr:
 564                     buf = os.read(errr, 1024)
 565                     if not buf: break
 566                     os.write(outfd, struct.pack('!IB', len(buf), 2) + buf)
 567     finally:
 568         os.write(outfd, struct.pack('!IB', 0, 3))
 569
 570
 571 class DemuxConn(BaseConn):
 572     """A helper class for bup's client-server protocol."""
 573     def __init__(self, infd, outp):
 574         BaseConn.__init__(self, outp)
 575         # Anything that comes through before the sync string was not
 576         # multiplexed and can be assumed to be debug/log before mux init.
 577         tail = b''
 578         while tail != b'BUPMUX':
 579             b = os.read(infd, (len(tail) < 6) and (6-len(tail)) or 1)
 580             if not b:
 581                 raise IOError('demux: unexpected EOF during initialization')
 582             tail += b
 583             byte_stream(sys.stderr).write(tail[:-6])  # pre-mux log messages
 584             tail = tail[-6:]
 585         self.infd = infd
 586         self.reader = None
 587         self.buf = None
 588         self.closed = False
 589
 590     def write(self, data):
 591         self._load_buf(0)
 592         BaseConn.write(self, data)
 593
 594     def _next_packet(self, timeout):
 595         if self.closed: return False
 596         rl, wl, xl = select.select([self.infd], [], [], timeout)
 597         if not rl: return False
 598         assert(rl[0] == self.infd)
 599         ns = b''.join(checked_reader(self.infd, 5))
 600         n, fdw = struct.unpack('!IB', ns)
 601         assert(n <= MAX_PACKET)
 602         if fdw == 1:
 603             self.reader = checked_reader(self.infd, n)
 604         elif fdw == 2:
 605             for buf in checked_reader(self.infd, n):
 606                 byte_stream(sys.stderr).write(buf)
 607         elif fdw == 3:
 608             self.closed = True
 609             debug2("DemuxConn: marked closed\n")
 610         return True
 611
 612     def _load_buf(self, timeout):
 613         if self.buf is not None:
 614             return True
 615         while not self.closed:
 616             while not self.reader:
 617                 if not self._next_packet(timeout):
 618                     return False
 619             try:
 620                 self.buf = next(self.reader)
 621                 return True
 622             except StopIteration:
 623                 self.reader = None
 624         return False
 625
 626     def _read_parts(self, ix_fn):
 627         while self._load_buf(None):
 628             assert(self.buf is not None)
 629             i = ix_fn(self.buf)
 630             if i is None or i == len(self.buf):
 631                 yv = self.buf
 632                 self.buf = None
 633             else:
 634                 yv = self.buf[:i]
 635                 self.buf = self.buf[i:]
 636             yield yv
 637             if i is not None:
 638                 break
 639
 640     def _readline(self):
 641         def find_eol(buf):
 642             try:
 643                 return buf.index(b'\n')+1
 644             except ValueError:
 645                 return None
 646         return b''.join(self._read_parts(find_eol))
 647
 648     def _read(self, size):
 649         csize = [size]
 650         def until_size(buf): # Closes on csize
 651             if len(buf) < csize[0]:
 652                 csize[0] -= len(buf)
 653                 return None
 654             else:
 655                 return csize[0]
 656         return b''.join(self._read_parts(until_size))
 657
 658     def has_input(self):
 659         return self._load_buf(0)
 660
 661
 662 def linereader(f):
 663     """Generate a list of input lines from 'f' without terminating newlines."""
 664     while 1:
 665         line = f.readline()
 666         if not line:
 667             break
 668         yield line[:-1]
 669
 670
 671 def chunkyreader(f, count = None):
 672     """Generate a list of chunks of data read from 'f'.
 673
 674     If count is None, read until EOF is reached.
 675
 676     If count is a positive integer, read 'count' bytes from 'f'. If EOF is
 677     reached while reading, raise IOError.
 678     """
 679     if count != None:
 680         while count > 0:
 681             b = f.read(min(count, 65536))
 682             if not b:
 683                 raise IOError('EOF with %d bytes remaining' % count)
 684             yield b
 685             count -= len(b)
 686     else:
 687         while 1:
 688             b = f.read(65536)
 689             if not b: break
 690             yield b
 691
 692
 693 @contextmanager
 694 def atomically_replaced_file(name, mode='w', buffering=-1):
 695     """Yield a file that will be atomically renamed name when leaving the block.
 696
 697     This contextmanager yields an open file object that is backed by a
 698     temporary file which will be renamed (atomically) to the target
 699     name if everything succeeds.
 700
 701     The mode and buffering arguments are handled exactly as with open,
 702     and the yielded file will have very restrictive permissions, as
 703     per mkstemp.
 704
 705     E.g.::
 706
 707         with atomically_replaced_file('foo.txt', 'w') as f:
 708             f.write('hello jack.')
 709
 710     """
 711
 712     (ffd, tempname) = tempfile.mkstemp(dir=os.path.dirname(name),
 713                                        text=('b' not in mode))
 714     try:
 715         try:
 716             f = os.fdopen(ffd, mode, buffering)
 717         except:
 718             os.close(ffd)
 719             raise
 720         try:
 721             yield f
 722         finally:
 723             f.close()
 724         os.rename(tempname, name)
 725     finally:
 726         unlink(tempname)  # nonexistant file is ignored
 727
 728
 729 def slashappend(s):
 730     """Append "/" to 's' if it doesn't aleady end in "/"."""
 731     assert isinstance(s, bytes)
 732     if s and not s.endswith(b'/'):
 733         return s + b'/'
 734     else:
 735         return s
 736
 737
 738 def _mmap_do(f, sz, flags, prot, close):
 739     if not sz:
 740         st = os.fstat(f.fileno())
 741         sz = st.st_size
 742     if not sz:
 743         # trying to open a zero-length map gives an error, but an empty
 744         # string has all the same behaviour of a zero-length map, ie. it has
 745         # no elements :)
 746         return ''
 747     map = mmap.mmap(f.fileno(), sz, flags, prot)
 748     if close:
 749         f.close()  # map will persist beyond file close
 750     return map
 751
 752
 753 def mmap_read(f, sz = 0, close=True):
 754     """Create a read-only memory mapped region on file 'f'.
 755     If sz is 0, the region will cover the entire file.
 756     """
 757     return _mmap_do(f, sz, mmap.MAP_PRIVATE, mmap.PROT_READ, close)
 758
 759
 760 def mmap_readwrite(f, sz = 0, close=True):
 761     """Create a read-write memory mapped region on file 'f'.
 762     If sz is 0, the region will cover the entire file.
 763     """
 764     return _mmap_do(f, sz, mmap.MAP_SHARED, mmap.PROT_READ|mmap.PROT_WRITE,
 765                     close)
 766
 767
 768 def mmap_readwrite_private(f, sz = 0, close=True):
 769     """Create a read-write memory mapped region on file 'f'.
 770     If sz is 0, the region will cover the entire file.
 771     The map is private, which means the changes are never flushed back to the
 772     file.
 773     """
 774     return _mmap_do(f, sz, mmap.MAP_PRIVATE, mmap.PROT_READ|mmap.PROT_WRITE,
 775                     close)
 776
 777
 778 _mincore = getattr(_helpers, 'mincore', None)
 779 if _mincore:
 780     # ./configure ensures that we're on Linux if MINCORE_INCORE isn't defined.
 781     MINCORE_INCORE = getattr(_helpers, 'MINCORE_INCORE', 1)
 782
 783     _fmincore_chunk_size = None
 784     def _set_fmincore_chunk_size():
 785         global _fmincore_chunk_size
 786         pref_chunk_size = 64 * 1024 * 1024
 787         chunk_size = sc_page_size
 788         if (sc_page_size < pref_chunk_size):
 789             chunk_size = sc_page_size * (pref_chunk_size // sc_page_size)
 790         _fmincore_chunk_size = chunk_size
 791
 792     def fmincore(fd):
 793         """Return the mincore() data for fd as a bytearray whose values can be
 794         tested via MINCORE_INCORE, or None if fd does not fully
 795         support the operation."""
 796         st = os.fstat(fd)
 797         if (st.st_size == 0):
 798             return bytearray(0)
 799         if not _fmincore_chunk_size:
 800             _set_fmincore_chunk_size()
 801         pages_per_chunk = _fmincore_chunk_size // sc_page_size;
 802         page_count = (st.st_size + sc_page_size - 1) // sc_page_size;
 803         chunk_count = page_count // _fmincore_chunk_size
 804         if chunk_count < 1:
 805             chunk_count = 1
 806         result = bytearray(page_count)
 807         for ci in compat.range(chunk_count):
 808             pos = _fmincore_chunk_size * ci;
 809             msize = min(_fmincore_chunk_size, st.st_size - pos)
 810             try:
 811                 m = mmap.mmap(fd, msize, mmap.MAP_PRIVATE, 0, 0, pos)
 812             except mmap.error as ex:
 813                 if ex.errno == errno.EINVAL or ex.errno == errno.ENODEV:
 814                     # Perhaps the file was a pipe, i.e. "... | bup split ..."
 815                     return None
 816                 raise ex
 817             try:
 818                 _mincore(m, msize, 0, result, ci * pages_per_chunk)
 819             except OSError as ex:
 820                 if ex.errno == errno.ENOSYS:
 821                     return None
 822                 raise
 823         return result
 824
 825
 826 def parse_timestamp(epoch_str):
 827     """Return the number of nanoseconds since the epoch that are described
 828 by epoch_str (100ms, 100ns, ...); when epoch_str cannot be parsed,
 829 throw a ValueError that may contain additional information."""
 830     ns_per = {'s' :  1000000000,
 831               'ms' : 1000000,
 832               'us' : 1000,
 833               'ns' : 1}
 834     match = re.match(r'^((?:[-+]?[0-9]+)?)(s|ms|us|ns)$', epoch_str)
 835     if not match:
 836         if re.match(r'^([-+]?[0-9]+)$', epoch_str):
 837             raise ValueError('must include units, i.e. 100ns, 100ms, ...')
 838         raise ValueError()
 839     (n, units) = match.group(1, 2)
 840     if not n:
 841         n = 1
 842     n = int(n)
 843     return n * ns_per[units]
 844
 845
 846 def parse_num(s):
 847     """Parse string or bytes as a possibly unit suffixed number.
 848
 849     For example:
 850         199.2k means 203981 bytes
 851         1GB means 1073741824 bytes
 852         2.1 tb means 2199023255552 bytes
 853     """
 854     if isinstance(s, bytes):
 855         # FIXME: should this raise a ValueError for UnicodeDecodeError
 856         # (perhaps with the latter as the context).
 857         s = s.decode('ascii')
 858     g = re.match(r'([-+\d.e]+)\s*(\w*)', str(s))
 859     if not g:
 860         raise ValueError("can't parse %r as a number" % s)
 861     (val, unit) = g.groups()
 862     num = float(val)
 863     unit = unit.lower()
 864     if unit in ['t', 'tb']:
 865         mult = 1024*1024*1024*1024
 866     elif unit in ['g', 'gb']:
 867         mult = 1024*1024*1024
 868     elif unit in ['m', 'mb']:
 869         mult = 1024*1024
 870     elif unit in ['k', 'kb']:
 871         mult = 1024
 872     elif unit in ['', 'b']:
 873         mult = 1
 874     else:
 875         raise ValueError("invalid unit %r in number %r" % (unit, s))
 876     return int(num*mult)
 877
 878
 879 saved_errors = []
 880 def add_error(e):
 881     """Append an error message to the list of saved errors.
 882
 883     Once processing is able to stop and output the errors, the saved errors are
 884     accessible in the module variable helpers.saved_errors.
 885     """
 886     saved_errors.append(e)
 887     log('%-70s\n' % e)
 888
 889
 890 def clear_errors():
 891     global saved_errors
 892     saved_errors = []
 893
 894
 895 def die_if_errors(msg=None, status=1):
 896     global saved_errors
 897     if saved_errors:
 898         if not msg:
 899             msg = 'warning: %d errors encountered\n' % len(saved_errors)
 900         log(msg)
 901         sys.exit(status)
 902
 903
 904 def handle_ctrl_c():
 905     """Replace the default exception handler for KeyboardInterrupt (Ctrl-C).
 906
 907     The new exception handler will make sure that bup will exit without an ugly
 908     stacktrace when Ctrl-C is hit.
 909     """
 910     oldhook = sys.excepthook
 911     def newhook(exctype, value, traceback):
 912         if exctype == KeyboardInterrupt:
 913             log('\nInterrupted.\n')
 914         else:
 915             return oldhook(exctype, value, traceback)
 916     sys.excepthook = newhook
 917
 918
 919 def columnate(l, prefix):
 920     """Format elements of 'l' in columns with 'prefix' leading each line.
 921
 922     The number of columns is determined automatically based on the string
 923     lengths.
 924     """
 925     binary = isinstance(prefix, bytes)
 926     nothing = b'' if binary else ''
 927     nl = b'\n' if binary else '\n'
 928     if not l:
 929         return nothing
 930     l = l[:]
 931     clen = max(len(s) for s in l)
 932     ncols = (tty_width() - len(prefix)) // (clen + 2)
 933     if ncols <= 1:
 934         ncols = 1
 935         clen = 0
 936     cols = []
 937     while len(l) % ncols:
 938         l.append(nothing)
 939     rows = len(l) // ncols
 940     for s in compat.range(0, len(l), rows):
 941         cols.append(l[s:s+rows])
 942     out = nothing
 943     fmt = b'%-*s' if binary else '%-*s'
 944     for row in zip(*cols):
 945         out += prefix + nothing.join((fmt % (clen+2, s)) for s in row) + nl
 946     return out
 947
 948
 949 def parse_date_or_fatal(str, fatal):
 950     """Parses the given date or calls Option.fatal().
 951     For now we expect a string that contains a float."""
 952     try:
 953         date = float(str)
 954     except ValueError as e:
 955         raise fatal('invalid date format (should be a float): %r' % e)
 956     else:
 957         return date
 958
 959
 960 def parse_excludes(options, fatal):
 961     """Traverse the options and extract all excludes, or call Option.fatal()."""
 962     excluded_paths = []
 963
 964     for flag in options:
 965         (option, parameter) = flag
 966         if option == '--exclude':
 967             excluded_paths.append(resolve_parent(argv_bytes(parameter)))
 968         elif option == '--exclude-from':
 969             try:
 970                 f = open(resolve_parent(argv_bytes(parameter)), 'rb')
 971             except IOError as e:
 972                 raise fatal("couldn't read %r" % parameter)
 973             for exclude_path in f.readlines():
 974                 # FIXME: perhaps this should be rstrip('\n')
 975                 exclude_path = resolve_parent(exclude_path.strip())
 976                 if exclude_path:
 977                     excluded_paths.append(exclude_path)
 978     return sorted(frozenset(excluded_paths))
 979
 980
 981 def parse_rx_excludes(options, fatal):
 982     """Traverse the options and extract all rx excludes, or call
 983     Option.fatal()."""
 984     excluded_patterns = []
 985
 986     for flag in options:
 987         (option, parameter) = flag
 988         if option == '--exclude-rx':
 989             try:
 990                 excluded_patterns.append(re.compile(argv_bytes(parameter)))
 991             except re.error as ex:
 992                 fatal('invalid --exclude-rx pattern (%r): %s' % (parameter, ex))
 993         elif option == '--exclude-rx-from':
 994             try:
 995                 f = open(resolve_parent(parameter), 'rb')
 996             except IOError as e:
 997                 raise fatal("couldn't read %r" % parameter)
 998             for pattern in f.readlines():
 999                 spattern = pattern.rstrip(b'\n')
1000                 if not spattern:
1001                     continue
1002                 try:
1003                     excluded_patterns.append(re.compile(spattern))
1004                 except re.error as ex:
1005                     fatal('invalid --exclude-rx pattern (%r): %s' % (spattern, ex))
1006     return excluded_patterns
1007
1008
1009 def should_rx_exclude_path(path, exclude_rxs):
1010     """Return True if path matches a regular expression in exclude_rxs."""
1011     for rx in exclude_rxs:
1012         if rx.search(path):
1013             debug1('Skipping %r: excluded by rx pattern %r.\n'
1014                    % (path, rx.pattern))
1015             return True
1016     return False
1017
1018
1019 # FIXME: Carefully consider the use of functions (os.path.*, etc.)
1020 # that resolve against the current filesystem in the strip/graft
1021 # functions for example, but elsewhere as well.  I suspect bup's not
1022 # always being careful about that.  For some cases, the contents of
1023 # the current filesystem should be irrelevant, and consulting it might
1024 # produce the wrong result, perhaps via unintended symlink resolution,
1025 # for example.
1026
1027 def path_components(path):
1028     """Break path into a list of pairs of the form (name,
1029     full_path_to_name).  Path must start with '/'.
1030     Example:
1031       '/home/foo' -> [('', '/'), ('home', '/home'), ('foo', '/home/foo')]"""
1032     if not path.startswith(b'/'):
1033         raise Exception('path must start with "/": %s' % path_msg(path))
1034     # Since we assume path startswith('/'), we can skip the first element.
1035     result = [(b'', b'/')]
1036     norm_path = os.path.abspath(path)
1037     if norm_path == b'/':
1038         return result
1039     full_path = b''
1040     for p in norm_path.split(b'/')[1:]:
1041         full_path += b'/' + p
1042         result.append((p, full_path))
1043     return result
1044
1045
1046 def stripped_path_components(path, strip_prefixes):
1047     """Strip any prefix in strip_prefixes from path and return a list
1048     of path components where each component is (name,
1049     none_or_full_fs_path_to_name).  Assume path startswith('/').
1050     See thelpers.py for examples."""
1051     normalized_path = os.path.abspath(path)
1052     sorted_strip_prefixes = sorted(strip_prefixes, key=len, reverse=True)
1053     for bp in sorted_strip_prefixes:
1054         normalized_bp = os.path.abspath(bp)
1055         if normalized_bp == b'/':
1056             continue
1057         if normalized_path.startswith(normalized_bp):
1058             prefix = normalized_path[:len(normalized_bp)]
1059             result = []
1060             for p in normalized_path[len(normalized_bp):].split(b'/'):
1061                 if p: # not root
1062                     prefix += b'/'
1063                 prefix += p
1064                 result.append((p, prefix))
1065             return result
1066     # Nothing to strip.
1067     return path_components(path)
1068
1069
1070 def grafted_path_components(graft_points, path):
1071     # Create a result that consists of some number of faked graft
1072     # directories before the graft point, followed by all of the real
1073     # directories from path that are after the graft point.  Arrange
1074     # for the directory at the graft point in the result to correspond
1075     # to the "orig" directory in --graft orig=new.  See t/thelpers.py
1076     # for some examples.
1077
1078     # Note that given --graft orig=new, orig and new have *nothing* to
1079     # do with each other, even if some of their component names
1080     # match. i.e. --graft /foo/bar/baz=/foo/bar/bax is semantically
1081     # equivalent to --graft /foo/bar/baz=/x/y/z, or even
1082     # /foo/bar/baz=/x.
1083
1084     # FIXME: This can't be the best solution...
1085     clean_path = os.path.abspath(path)
1086     for graft_point in graft_points:
1087         old_prefix, new_prefix = graft_point
1088         # Expand prefixes iff not absolute paths.
1089         old_prefix = os.path.normpath(old_prefix)
1090         new_prefix = os.path.normpath(new_prefix)
1091         if clean_path.startswith(old_prefix):
1092             escaped_prefix = re.escape(old_prefix)
1093             grafted_path = re.sub(br'^' + escaped_prefix, new_prefix, clean_path)
1094             # Handle /foo=/ (at least) -- which produces //whatever.
1095             grafted_path = b'/' + grafted_path.lstrip(b'/')
1096             clean_path_components = path_components(clean_path)
1097             # Count the components that were stripped.
1098             strip_count = 0 if old_prefix == b'/' else old_prefix.count(b'/')
1099             new_prefix_parts = new_prefix.split(b'/')
1100             result_prefix = grafted_path.split(b'/')[:new_prefix.count(b'/')]
1101             result = [(p, None) for p in result_prefix] \
1102                 + clean_path_components[strip_count:]
1103             # Now set the graft point name to match the end of new_prefix.
1104             graft_point = len(result_prefix)
1105             result[graft_point] = \
1106                 (new_prefix_parts[-1], clean_path_components[strip_count][1])
1107             if new_prefix == b'/': # --graft ...=/ is a special case.
1108                 return result[1:]
1109             return result
1110     return path_components(clean_path)
1111
1112
1113 Sha1 = hashlib.sha1
1114
1115
1116 _localtime = getattr(_helpers, 'localtime', None)
1117
1118 if _localtime:
1119     bup_time = namedtuple('bup_time', ['tm_year', 'tm_mon', 'tm_mday',
1120                                        'tm_hour', 'tm_min', 'tm_sec',
1121                                        'tm_wday', 'tm_yday',
1122                                        'tm_isdst', 'tm_gmtoff', 'tm_zone'])
1123
1124 # Define a localtime() that returns bup_time when possible.  Note:
1125 # this means that any helpers.localtime() results may need to be
1126 # passed through to_py_time() before being passed to python's time
1127 # module, which doesn't appear willing to ignore the extra items.
1128 if _localtime:
1129     def localtime(time):
1130         return bup_time(*_helpers.localtime(floor(time)))
1131     def utc_offset_str(t):
1132         """Return the local offset from UTC as "+hhmm" or "-hhmm" for time t.
1133         If the current UTC offset does not represent an integer number
1134         of minutes, the fractional component will be truncated."""
1135         off = localtime(t).tm_gmtoff
1136         # Note: // doesn't truncate like C for negative values, it rounds down.
1137         offmin = abs(off) // 60
1138         m = offmin % 60
1139         h = (offmin - m) // 60
1140         return b'%+03d%02d' % (-h if off < 0 else h, m)
1141     def to_py_time(x):
1142         if isinstance(x, time.struct_time):
1143             return x
1144         return time.struct_time(x[:9])
1145 else:
1146     localtime = time.localtime
1147     def utc_offset_str(t):
1148         return time.strftime(b'%z', localtime(t))
1149     def to_py_time(x):
1150         return x
1151
1152
1153 _some_invalid_save_parts_rx = re.compile(br'[\[ ~^:?*\\]|\.\.|//|@{')
1154
1155 def valid_save_name(name):
1156     # Enforce a superset of the restrictions in git-check-ref-format(1)
1157     if name == b'@' \
1158        or name.startswith(b'/') or name.endswith(b'/') \
1159        or name.endswith(b'.'):
1160         return False
1161     if _some_invalid_save_parts_rx.search(name):
1162         return False
1163     for c in name:
1164         if byte_int(c) < 0x20 or byte_int(c) == 0x7f:
1165             return False
1166     for part in name.split(b'/'):
1167         if part.startswith(b'.') or part.endswith(b'.lock'):
1168             return False
1169     return True
1170
1171
1172 _period_rx = re.compile(r'^([0-9]+)(s|min|h|d|w|m|y)$')
1173
1174 def period_as_secs(s):
1175     if s == 'forever':
1176         return float('inf')
1177     match = _period_rx.match(s)
1178     if not match:
1179         return None
1180     mag = int(match.group(1))
1181     scale = match.group(2)
1182     return mag * {'s': 1,
1183                   'min': 60,
1184                   'h': 60 * 60,
1185                   'd': 60 * 60 * 24,
1186                   'w': 60 * 60 * 24 * 7,
1187                   'm': 60 * 60 * 24 * 31,
1188                   'y': 60 * 60 * 24 * 366}[scale]