lib/bup/helpers.py

   1 """Helper functions and classes for bup."""
   2
   3 from __future__ import absolute_import, division
   4 from collections import namedtuple
   5 from contextlib import contextmanager
   6 from ctypes import sizeof, c_void_p
   7 from math import floor
   8 from os import environ
   9 from subprocess import PIPE, Popen
  10 import sys, os, subprocess, errno, select, mmap, stat, re, struct
  11 import hashlib, heapq, math, operator, time, tempfile
  12
  13 from bup import _helpers
  14 from bup import compat
  15 from bup.compat import argv_bytes, byte_int, pending_raise
  16 from bup.io import byte_stream, path_msg
  17 # This function should really be in helpers, not in bup.options.  But we
  18 # want options.py to be standalone so people can include it in other projects.
  19 from bup.options import _tty_width as tty_width
  20
  21
  22 buglvl = int(os.environ.get('BUP_DEBUG', 0))
  23
  24
  25 class Nonlocal:
  26     """Helper to deal with Python scoping issues"""
  27     pass
  28
  29
  30 sc_page_size = os.sysconf('SC_PAGE_SIZE')
  31 assert(sc_page_size > 0)
  32
  33 sc_arg_max = os.sysconf('SC_ARG_MAX')
  34 if sc_arg_max == -1:  # "no definite limit" - let's choose 2M
  35     sc_arg_max = 2 * 1024 * 1024
  36
  37 def last(iterable):
  38     result = None
  39     for result in iterable:
  40         pass
  41     return result
  42
  43 try:
  44     _fdatasync = os.fdatasync
  45 except AttributeError:
  46     _fdatasync = os.fsync
  47
  48 if sys.platform.startswith('darwin'):
  49     # Apparently os.fsync on OS X doesn't guarantee to sync all the way down
  50     import fcntl
  51     def fdatasync(fd):
  52         try:
  53             return fcntl.fcntl(fd, fcntl.F_FULLFSYNC)
  54         except IOError as e:
  55             # Fallback for file systems (SMB) that do not support F_FULLFSYNC
  56             if e.errno == errno.ENOTSUP:
  57                 return _fdatasync(fd)
  58             else:
  59                 raise
  60 else:
  61     fdatasync = _fdatasync
  62
  63
  64 def partition(predicate, stream):
  65     """Returns (leading_matches_it, rest_it), where leading_matches_it
  66     must be completely exhausted before traversing rest_it.
  67
  68     """
  69     stream = iter(stream)
  70     ns = Nonlocal()
  71     ns.first_nonmatch = None
  72     def leading_matches():
  73         for x in stream:
  74             if predicate(x):
  75                 yield x
  76             else:
  77                 ns.first_nonmatch = (x,)
  78                 break
  79     def rest():
  80         if ns.first_nonmatch:
  81             yield ns.first_nonmatch[0]
  82             for x in stream:
  83                 yield x
  84     return (leading_matches(), rest())
  85
  86
  87 def merge_dict(*xs):
  88     result = {}
  89     for x in xs:
  90         result.update(x)
  91     return result
  92
  93
  94 def lines_until_sentinel(f, sentinel, ex_type):
  95     # sentinel must end with \n and must contain only one \n
  96     while True:
  97         line = f.readline()
  98         if not (line and line.endswith(b'\n')):
  99             raise ex_type('Hit EOF while reading line')
 100         if line == sentinel:
 101             return
 102         yield line
 103
 104
 105 def stat_if_exists(path):
 106     try:
 107         return os.stat(path)
 108     except OSError as e:
 109         if e.errno != errno.ENOENT:
 110             raise
 111     return None
 112
 113
 114 # Write (blockingly) to sockets that may or may not be in blocking mode.
 115 # We need this because our stderr is sometimes eaten by subprocesses
 116 # (probably ssh) that sometimes make it nonblocking, if only temporarily,
 117 # leading to race conditions.  Ick.  We'll do it the hard way.
 118 def _hard_write(fd, buf):
 119     while buf:
 120         (r,w,x) = select.select([], [fd], [], None)
 121         if not w:
 122             raise IOError('select(fd) returned without being writable')
 123         try:
 124             sz = os.write(fd, buf)
 125         except OSError as e:
 126             if e.errno != errno.EAGAIN:
 127                 raise
 128         assert(sz >= 0)
 129         buf = buf[sz:]
 130
 131
 132 _last_prog = 0
 133 def log(s):
 134     """Print a log message to stderr."""
 135     global _last_prog
 136     sys.stdout.flush()
 137     _hard_write(sys.stderr.fileno(), s if isinstance(s, bytes) else s.encode())
 138     _last_prog = 0
 139
 140
 141 def debug1(s):
 142     if buglvl >= 1:
 143         log(s)
 144
 145
 146 def debug2(s):
 147     if buglvl >= 2:
 148         log(s)
 149
 150
 151 istty1 = os.isatty(1) or (int(os.environ.get('BUP_FORCE_TTY', 0)) & 1)
 152 istty2 = os.isatty(2) or (int(os.environ.get('BUP_FORCE_TTY', 0)) & 2)
 153 _last_progress = ''
 154 def progress(s):
 155     """Calls log() if stderr is a TTY.  Does nothing otherwise."""
 156     global _last_progress
 157     if istty2:
 158         log(s)
 159         _last_progress = s
 160
 161
 162 def qprogress(s):
 163     """Calls progress() only if we haven't printed progress in a while.
 164
 165     This avoids overloading the stderr buffer with excess junk.
 166     """
 167     global _last_prog
 168     now = time.time()
 169     if now - _last_prog > 0.1:
 170         progress(s)
 171         _last_prog = now
 172
 173
 174 def reprogress():
 175     """Calls progress() to redisplay the most recent progress message.
 176
 177     Useful after you've printed some other message that wipes out the
 178     progress line.
 179     """
 180     if _last_progress and _last_progress.endswith('\r'):
 181         progress(_last_progress)
 182
 183
 184 def mkdirp(d, mode=None):
 185     """Recursively create directories on path 'd'.
 186
 187     Unlike os.makedirs(), it doesn't raise an exception if the last element of
 188     the path already exists.
 189     """
 190     try:
 191         if mode:
 192             os.makedirs(d, mode)
 193         else:
 194             os.makedirs(d)
 195     except OSError as e:
 196         if e.errno == errno.EEXIST:
 197             pass
 198         else:
 199             raise
 200
 201
 202 class MergeIterItem:
 203     def __init__(self, entry, read_it):
 204         self.entry = entry
 205         self.read_it = read_it
 206     def __lt__(self, x):
 207         return self.entry < x.entry
 208
 209 def merge_iter(iters, pfreq, pfunc, pfinal, key=None):
 210     if key:
 211         samekey = lambda e, pe: getattr(e, key) == getattr(pe, key, None)
 212     else:
 213         samekey = operator.eq
 214     count = 0
 215     total = sum(len(it) for it in iters)
 216     iters = (iter(it) for it in iters)
 217     heap = ((next(it, None),it) for it in iters)
 218     heap = [MergeIterItem(e, it) for e, it in heap if e]
 219
 220     heapq.heapify(heap)
 221     pe = None
 222     while heap:
 223         if not count % pfreq:
 224             pfunc(count, total)
 225         e, it = heap[0].entry, heap[0].read_it
 226         if not samekey(e, pe):
 227             pe = e
 228             yield e
 229         count += 1
 230         try:
 231             e = next(it)
 232         except StopIteration:
 233             heapq.heappop(heap) # remove current
 234         else:
 235             # shift current to new location
 236             heapq.heapreplace(heap, MergeIterItem(e, it))
 237     pfinal(count, total)
 238
 239
 240 def unlink(f):
 241     """Delete a file at path 'f' if it currently exists.
 242
 243     Unlike os.unlink(), does not throw an exception if the file didn't already
 244     exist.
 245     """
 246     try:
 247         os.unlink(f)
 248     except OSError as e:
 249         if e.errno != errno.ENOENT:
 250             raise
 251
 252
 253 _bq_simple_id_rx = re.compile(br'^[-_./a-zA-Z0-9]+$')
 254 _sq_simple_id_rx = re.compile(r'^[-_./a-zA-Z0-9]+$')
 255
 256 def bquote(x):
 257     if x == b'':
 258         return b"''"
 259     if _bq_simple_id_rx.match(x):
 260         return x
 261     return b"'%s'" % x.replace(b"'", b"'\"'\"'")
 262
 263 def squote(x):
 264     if x == '':
 265         return "''"
 266     if _sq_simple_id_rx.match(x):
 267         return x
 268     return "'%s'" % x.replace("'", "'\"'\"'")
 269
 270 def quote(x):
 271     if isinstance(x, bytes):
 272         return bquote(x)
 273     if isinstance(x, compat.str_type):
 274         return squote(x)
 275     assert False
 276     # some versions of pylint get confused
 277     return None
 278
 279 def shstr(cmd):
 280     """Return a shell quoted string for cmd if it's a sequence, else cmd.
 281
 282     cmd must be a string, bytes, or a sequence of one or the other,
 283     and the assumption is that if cmd is a string or bytes, then it's
 284     already quoted (because it's what's actually being passed to
 285     call() and friends.  e.g. log(shstr(cmd)); call(cmd)
 286
 287     """
 288     if isinstance(cmd, (bytes, compat.str_type)):
 289         return cmd
 290     elif all(isinstance(x, bytes) for x in cmd):
 291         return b' '.join(map(bquote, cmd))
 292     elif all(isinstance(x, compat.str_type) for x in cmd):
 293         return ' '.join(map(squote, cmd))
 294     raise TypeError('unsupported shstr argument: ' + repr(cmd))
 295
 296
 297 exc = subprocess.check_call
 298
 299 def exo(cmd,
 300         input=None,
 301         stdin=None,
 302         stderr=None,
 303         shell=False,
 304         check=True,
 305         preexec_fn=None,
 306         close_fds=True):
 307     if input:
 308         assert stdin in (None, PIPE)
 309         stdin = PIPE
 310     p = Popen(cmd,
 311               stdin=stdin, stdout=PIPE, stderr=stderr,
 312               shell=shell,
 313               preexec_fn=preexec_fn,
 314               close_fds=close_fds)
 315     out, err = p.communicate(input)
 316     if check and p.returncode != 0:
 317         raise Exception('subprocess %r failed with status %d%s'
 318                         % (b' '.join(map(quote, cmd)), p.returncode,
 319                            ', stderr: %r' % err if err else ''))
 320     return out, err, p
 321
 322 def readpipe(argv, preexec_fn=None, shell=False):
 323     """Run a subprocess and return its output."""
 324     return exo(argv, preexec_fn=preexec_fn, shell=shell)[0]
 325
 326
 327 def _argmax_base(command):
 328     base_size = 2048
 329     for c in command:
 330         base_size += len(command) + 1
 331     for k, v in compat.items(environ):
 332         base_size += len(k) + len(v) + 2 + sizeof(c_void_p)
 333     return base_size
 334
 335
 336 def _argmax_args_size(args):
 337     return sum(len(x) + 1 + sizeof(c_void_p) for x in args)
 338
 339
 340 def batchpipe(command, args, preexec_fn=None, arg_max=sc_arg_max):
 341     """If args is not empty, yield the output produced by calling the
 342 command list with args as a sequence of strings (It may be necessary
 343 to return multiple strings in order to respect ARG_MAX)."""
 344     # The optional arg_max arg is a workaround for an issue with the
 345     # current wvtest behavior.
 346     base_size = _argmax_base(command)
 347     while args:
 348         room = arg_max - base_size
 349         i = 0
 350         while i < len(args):
 351             next_size = _argmax_args_size(args[i:i+1])
 352             if room - next_size < 0:
 353                 break
 354             room -= next_size
 355             i += 1
 356         sub_args = args[:i]
 357         args = args[i:]
 358         assert(len(sub_args))
 359         yield readpipe(command + sub_args, preexec_fn=preexec_fn)
 360
 361
 362 def resolve_parent(p):
 363     """Return the absolute path of a file without following any final symlink.
 364
 365     Behaves like os.path.realpath, but doesn't follow a symlink for the last
 366     element. (ie. if 'p' itself is a symlink, this one won't follow it, but it
 367     will follow symlinks in p's directory)
 368     """
 369     try:
 370         st = os.lstat(p)
 371     except OSError:
 372         st = None
 373     if st and stat.S_ISLNK(st.st_mode):
 374         (dir, name) = os.path.split(p)
 375         dir = os.path.realpath(dir)
 376         out = os.path.join(dir, name)
 377     else:
 378         out = os.path.realpath(p)
 379     #log('realpathing:%r,%r\n' % (p, out))
 380     return out
 381
 382
 383 def detect_fakeroot():
 384     "Return True if we appear to be running under fakeroot."
 385     return os.getenv("FAKEROOTKEY") != None
 386
 387
 388 if sys.platform.startswith('cygwin'):
 389     def is_superuser():
 390         # https://cygwin.com/ml/cygwin/2015-02/msg00057.html
 391         groups = os.getgroups()
 392         return 544 in groups or 0 in groups
 393 else:
 394     def is_superuser():
 395         return os.geteuid() == 0
 396
 397
 398 def cache_key_value(get_value, key, cache):
 399     """Return (value, was_cached).  If there is a value in the cache
 400     for key, use that, otherwise, call get_value(key) which should
 401     throw a KeyError if there is no value -- in which case the cached
 402     and returned value will be None.
 403     """
 404     try: # Do we already have it (or know there wasn't one)?
 405         value = cache[key]
 406         return value, True
 407     except KeyError:
 408         pass
 409     value = None
 410     try:
 411         cache[key] = value = get_value(key)
 412     except KeyError:
 413         cache[key] = None
 414     return value, False
 415
 416
 417 _hostname = None
 418 def hostname():
 419     """Get the FQDN of this machine."""
 420     global _hostname
 421     if not _hostname:
 422         _hostname = _helpers.gethostname()
 423     return _hostname
 424
 425
 426 def format_filesize(size):
 427     unit = 1024.0
 428     size = float(size)
 429     if size < unit:
 430         return "%d" % (size)
 431     exponent = int(math.log(size) // math.log(unit))
 432     size_prefix = "KMGTPE"[exponent - 1]
 433     return "%.1f%s" % (size / math.pow(unit, exponent), size_prefix)
 434
 435
 436 class NotOk(Exception):
 437     pass
 438
 439
 440 class BaseConn:
 441     def __init__(self, outp):
 442         self.outp = outp
 443
 444     def close(self):
 445         while self._read(65536): pass
 446
 447     def _read(self, size):
 448         raise NotImplementedError("Subclasses must implement _read")
 449
 450     def read(self, size):
 451         """Read 'size' bytes from input stream."""
 452         self.outp.flush()
 453         return self._read(size)
 454
 455     def _readline(self, size):
 456         raise NotImplementedError("Subclasses must implement _readline")
 457
 458     def readline(self):
 459         """Read from input stream until a newline is found."""
 460         self.outp.flush()
 461         return self._readline()
 462
 463     def write(self, data):
 464         """Write 'data' to output stream."""
 465         #log('%d writing: %d bytes\n' % (os.getpid(), len(data)))
 466         self.outp.write(data)
 467
 468     def has_input(self):
 469         """Return true if input stream is readable."""
 470         raise NotImplementedError("Subclasses must implement has_input")
 471
 472     def ok(self):
 473         """Indicate end of output from last sent command."""
 474         self.write(b'\nok\n')
 475
 476     def error(self, s):
 477         """Indicate server error to the client."""
 478         s = re.sub(br'\s+', b' ', s)
 479         self.write(b'\nerror %s\n' % s)
 480
 481     def _check_ok(self, onempty):
 482         self.outp.flush()
 483         rl = b''
 484         for rl in linereader(self):
 485             #log('%d got line: %r\n' % (os.getpid(), rl))
 486             if not rl:  # empty line
 487                 continue
 488             elif rl == b'ok':
 489                 return None
 490             elif rl.startswith(b'error '):
 491                 #log('client: error: %s\n' % rl[6:])
 492                 return NotOk(rl[6:])
 493             else:
 494                 onempty(rl)
 495         raise Exception('server exited unexpectedly; see errors above')
 496
 497     def drain_and_check_ok(self):
 498         """Remove all data for the current command from input stream."""
 499         def onempty(rl):
 500             pass
 501         return self._check_ok(onempty)
 502
 503     def check_ok(self):
 504         """Verify that server action completed successfully."""
 505         def onempty(rl):
 506             raise Exception('expected "ok", got %r' % rl)
 507         return self._check_ok(onempty)
 508
 509
 510 class Conn(BaseConn):
 511     def __init__(self, inp, outp):
 512         BaseConn.__init__(self, outp)
 513         self.inp = inp
 514
 515     def _read(self, size):
 516         return self.inp.read(size)
 517
 518     def _readline(self):
 519         return self.inp.readline()
 520
 521     def has_input(self):
 522         [rl, wl, xl] = select.select([self.inp.fileno()], [], [], 0)
 523         if rl:
 524             assert(rl[0] == self.inp.fileno())
 525             return True
 526         else:
 527             return None
 528
 529
 530 def checked_reader(fd, n):
 531     while n > 0:
 532         rl, _, _ = select.select([fd], [], [])
 533         assert(rl[0] == fd)
 534         buf = os.read(fd, n)
 535         if not buf: raise Exception("Unexpected EOF reading %d more bytes" % n)
 536         yield buf
 537         n -= len(buf)
 538
 539
 540 MAX_PACKET = 128 * 1024
 541 def mux(p, outfd, outr, errr):
 542     try:
 543         fds = [outr, errr]
 544         while p.poll() is None:
 545             rl, _, _ = select.select(fds, [], [])
 546             for fd in rl:
 547                 if fd == outr:
 548                     buf = os.read(outr, MAX_PACKET)
 549                     if not buf: break
 550                     os.write(outfd, struct.pack('!IB', len(buf), 1) + buf)
 551                 elif fd == errr:
 552                     buf = os.read(errr, 1024)
 553                     if not buf: break
 554                     os.write(outfd, struct.pack('!IB', len(buf), 2) + buf)
 555     finally:
 556         os.write(outfd, struct.pack('!IB', 0, 3))
 557
 558
 559 class DemuxConn(BaseConn):
 560     """A helper class for bup's client-server protocol."""
 561     def __init__(self, infd, outp):
 562         BaseConn.__init__(self, outp)
 563         # Anything that comes through before the sync string was not
 564         # multiplexed and can be assumed to be debug/log before mux init.
 565         tail = b''
 566         stderr = byte_stream(sys.stderr)
 567         while tail != b'BUPMUX':
 568             # Make sure to write all pre-BUPMUX output to stderr
 569             b = os.read(infd, (len(tail) < 6) and (6-len(tail)) or 1)
 570             if not b:
 571                 ex = IOError('demux: unexpected EOF during initialization')
 572                 with pending_raise(ex):
 573                     stderr.write(tail)
 574                     stderr.flush()
 575             tail += b
 576             stderr.write(tail[:-6])
 577             tail = tail[-6:]
 578         stderr.flush()
 579         self.infd = infd
 580         self.reader = None
 581         self.buf = None
 582         self.closed = False
 583
 584     def write(self, data):
 585         self._load_buf(0)
 586         BaseConn.write(self, data)
 587
 588     def _next_packet(self, timeout):
 589         if self.closed: return False
 590         rl, wl, xl = select.select([self.infd], [], [], timeout)
 591         if not rl: return False
 592         assert(rl[0] == self.infd)
 593         ns = b''.join(checked_reader(self.infd, 5))
 594         n, fdw = struct.unpack('!IB', ns)
 595         if n > MAX_PACKET:
 596             # assume that something went wrong and print stuff
 597             ns += os.read(self.infd, 1024)
 598             stderr = byte_stream(sys.stderr)
 599             stderr.write(ns)
 600             stderr.flush()
 601             raise Exception("Connection broken")
 602         if fdw == 1:
 603             self.reader = checked_reader(self.infd, n)
 604         elif fdw == 2:
 605             for buf in checked_reader(self.infd, n):
 606                 byte_stream(sys.stderr).write(buf)
 607         elif fdw == 3:
 608             self.closed = True
 609             debug2("DemuxConn: marked closed\n")
 610         return True
 611
 612     def _load_buf(self, timeout):
 613         if self.buf is not None:
 614             return True
 615         while not self.closed:
 616             while not self.reader:
 617                 if not self._next_packet(timeout):
 618                     return False
 619             try:
 620                 self.buf = next(self.reader)
 621                 return True
 622             except StopIteration:
 623                 self.reader = None
 624         return False
 625
 626     def _read_parts(self, ix_fn):
 627         while self._load_buf(None):
 628             assert(self.buf is not None)
 629             i = ix_fn(self.buf)
 630             if i is None or i == len(self.buf):
 631                 yv = self.buf
 632                 self.buf = None
 633             else:
 634                 yv = self.buf[:i]
 635                 self.buf = self.buf[i:]
 636             yield yv
 637             if i is not None:
 638                 break
 639
 640     def _readline(self):
 641         def find_eol(buf):
 642             try:
 643                 return buf.index(b'\n')+1
 644             except ValueError:
 645                 return None
 646         return b''.join(self._read_parts(find_eol))
 647
 648     def _read(self, size):
 649         csize = [size]
 650         def until_size(buf): # Closes on csize
 651             if len(buf) < csize[0]:
 652                 csize[0] -= len(buf)
 653                 return None
 654             else:
 655                 return csize[0]
 656         return b''.join(self._read_parts(until_size))
 657
 658     def has_input(self):
 659         return self._load_buf(0)
 660
 661
 662 def linereader(f):
 663     """Generate a list of input lines from 'f' without terminating newlines."""
 664     while 1:
 665         line = f.readline()
 666         if not line:
 667             break
 668         yield line[:-1]
 669
 670
 671 def chunkyreader(f, count = None):
 672     """Generate a list of chunks of data read from 'f'.
 673
 674     If count is None, read until EOF is reached.
 675
 676     If count is a positive integer, read 'count' bytes from 'f'. If EOF is
 677     reached while reading, raise IOError.
 678     """
 679     if count != None:
 680         while count > 0:
 681             b = f.read(min(count, 65536))
 682             if not b:
 683                 raise IOError('EOF with %d bytes remaining' % count)
 684             yield b
 685             count -= len(b)
 686     else:
 687         while 1:
 688             b = f.read(65536)
 689             if not b: break
 690             yield b
 691
 692
 693 @contextmanager
 694 def atomically_replaced_file(name, mode='w', buffering=-1):
 695     """Yield a file that will be atomically renamed name when leaving the block.
 696
 697     This contextmanager yields an open file object that is backed by a
 698     temporary file which will be renamed (atomically) to the target
 699     name if everything succeeds.
 700
 701     The mode and buffering arguments are handled exactly as with open,
 702     and the yielded file will have very restrictive permissions, as
 703     per mkstemp.
 704
 705     E.g.::
 706
 707         with atomically_replaced_file('foo.txt', 'w') as f:
 708             f.write('hello jack.')
 709
 710     """
 711
 712     (ffd, tempname) = tempfile.mkstemp(dir=os.path.dirname(name),
 713                                        text=('b' not in mode))
 714     try:
 715         try:
 716             f = os.fdopen(ffd, mode, buffering)
 717         except:
 718             os.close(ffd)
 719             raise
 720         try:
 721             yield f
 722         finally:
 723             f.close()
 724         os.rename(tempname, name)
 725     finally:
 726         unlink(tempname)  # nonexistant file is ignored
 727
 728
 729 def slashappend(s):
 730     """Append "/" to 's' if it doesn't aleady end in "/"."""
 731     assert isinstance(s, bytes)
 732     if s and not s.endswith(b'/'):
 733         return s + b'/'
 734     else:
 735         return s
 736
 737
 738 def _mmap_do(f, sz, flags, prot, close):
 739     if not sz:
 740         st = os.fstat(f.fileno())
 741         sz = st.st_size
 742     if not sz:
 743         # trying to open a zero-length map gives an error, but an empty
 744         # string has all the same behaviour of a zero-length map, ie. it has
 745         # no elements :)
 746         return ''
 747     map = mmap.mmap(f.fileno(), sz, flags, prot)
 748     if close:
 749         f.close()  # map will persist beyond file close
 750     return map
 751
 752
 753 def mmap_read(f, sz = 0, close=True):
 754     """Create a read-only memory mapped region on file 'f'.
 755     If sz is 0, the region will cover the entire file.
 756     """
 757     return _mmap_do(f, sz, mmap.MAP_PRIVATE, mmap.PROT_READ, close)
 758
 759
 760 def mmap_readwrite(f, sz = 0, close=True):
 761     """Create a read-write memory mapped region on file 'f'.
 762     If sz is 0, the region will cover the entire file.
 763     """
 764     return _mmap_do(f, sz, mmap.MAP_SHARED, mmap.PROT_READ|mmap.PROT_WRITE,
 765                     close)
 766
 767
 768 def mmap_readwrite_private(f, sz = 0, close=True):
 769     """Create a read-write memory mapped region on file 'f'.
 770     If sz is 0, the region will cover the entire file.
 771     The map is private, which means the changes are never flushed back to the
 772     file.
 773     """
 774     return _mmap_do(f, sz, mmap.MAP_PRIVATE, mmap.PROT_READ|mmap.PROT_WRITE,
 775                     close)
 776
 777
 778 _mincore = getattr(_helpers, 'mincore', None)
 779 if _mincore:
 780     # ./configure ensures that we're on Linux if MINCORE_INCORE isn't defined.
 781     MINCORE_INCORE = getattr(_helpers, 'MINCORE_INCORE', 1)
 782
 783     _fmincore_chunk_size = None
 784     def _set_fmincore_chunk_size():
 785         global _fmincore_chunk_size
 786         pref_chunk_size = 64 * 1024 * 1024
 787         chunk_size = sc_page_size
 788         if (sc_page_size < pref_chunk_size):
 789             chunk_size = sc_page_size * (pref_chunk_size // sc_page_size)
 790         _fmincore_chunk_size = chunk_size
 791
 792     def fmincore(fd):
 793         """Return the mincore() data for fd as a bytearray whose values can be
 794         tested via MINCORE_INCORE, or None if fd does not fully
 795         support the operation."""
 796         st = os.fstat(fd)
 797         if (st.st_size == 0):
 798             return bytearray(0)
 799         if not _fmincore_chunk_size:
 800             _set_fmincore_chunk_size()
 801         pages_per_chunk = _fmincore_chunk_size // sc_page_size;
 802         page_count = (st.st_size + sc_page_size - 1) // sc_page_size;
 803         chunk_count = (st.st_size + _fmincore_chunk_size - 1) // _fmincore_chunk_size
 804         result = bytearray(page_count)
 805         for ci in compat.range(chunk_count):
 806             pos = _fmincore_chunk_size * ci;
 807             msize = min(_fmincore_chunk_size, st.st_size - pos)
 808             try:
 809                 m = mmap.mmap(fd, msize, mmap.MAP_PRIVATE, 0, 0, pos)
 810             except mmap.error as ex:
 811                 if ex.errno == errno.EINVAL or ex.errno == errno.ENODEV:
 812                     # Perhaps the file was a pipe, i.e. "... | bup split ..."
 813                     return None
 814                 raise ex
 815             try:
 816                 _mincore(m, msize, 0, result, ci * pages_per_chunk)
 817             except OSError as ex:
 818                 if ex.errno == errno.ENOSYS:
 819                     return None
 820                 raise
 821         return result
 822
 823
 824 def parse_timestamp(epoch_str):
 825     """Return the number of nanoseconds since the epoch that are described
 826 by epoch_str (100ms, 100ns, ...); when epoch_str cannot be parsed,
 827 throw a ValueError that may contain additional information."""
 828     ns_per = {'s' :  1000000000,
 829               'ms' : 1000000,
 830               'us' : 1000,
 831               'ns' : 1}
 832     match = re.match(r'^((?:[-+]?[0-9]+)?)(s|ms|us|ns)$', epoch_str)
 833     if not match:
 834         if re.match(r'^([-+]?[0-9]+)$', epoch_str):
 835             raise ValueError('must include units, i.e. 100ns, 100ms, ...')
 836         raise ValueError()
 837     (n, units) = match.group(1, 2)
 838     if not n:
 839         n = 1
 840     n = int(n)
 841     return n * ns_per[units]
 842
 843
 844 def parse_num(s):
 845     """Parse string or bytes as a possibly unit suffixed number.
 846
 847     For example:
 848         199.2k means 203981 bytes
 849         1GB means 1073741824 bytes
 850         2.1 tb means 2199023255552 bytes
 851     """
 852     if isinstance(s, bytes):
 853         # FIXME: should this raise a ValueError for UnicodeDecodeError
 854         # (perhaps with the latter as the context).
 855         s = s.decode('ascii')
 856     g = re.match(r'([-+\d.e]+)\s*(\w*)', str(s))
 857     if not g:
 858         raise ValueError("can't parse %r as a number" % s)
 859     (val, unit) = g.groups()
 860     num = float(val)
 861     unit = unit.lower()
 862     if unit in ['t', 'tb']:
 863         mult = 1024*1024*1024*1024
 864     elif unit in ['g', 'gb']:
 865         mult = 1024*1024*1024
 866     elif unit in ['m', 'mb']:
 867         mult = 1024*1024
 868     elif unit in ['k', 'kb']:
 869         mult = 1024
 870     elif unit in ['', 'b']:
 871         mult = 1
 872     else:
 873         raise ValueError("invalid unit %r in number %r" % (unit, s))
 874     return int(num*mult)
 875
 876
 877 saved_errors = []
 878 def add_error(e):
 879     """Append an error message to the list of saved errors.
 880
 881     Once processing is able to stop and output the errors, the saved errors are
 882     accessible in the module variable helpers.saved_errors.
 883     """
 884     saved_errors.append(e)
 885     log('%-70s\n' % e)
 886
 887
 888 def clear_errors():
 889     global saved_errors
 890     saved_errors = []
 891
 892
 893 def die_if_errors(msg=None, status=1):
 894     global saved_errors
 895     if saved_errors:
 896         if not msg:
 897             msg = 'warning: %d errors encountered\n' % len(saved_errors)
 898         log(msg)
 899         sys.exit(status)
 900
 901
 902 def handle_ctrl_c():
 903     """Replace the default exception handler for KeyboardInterrupt (Ctrl-C).
 904
 905     The new exception handler will make sure that bup will exit without an ugly
 906     stacktrace when Ctrl-C is hit.
 907     """
 908     oldhook = sys.excepthook
 909     def newhook(exctype, value, traceback):
 910         if exctype == KeyboardInterrupt:
 911             log('\nInterrupted.\n')
 912         else:
 913             oldhook(exctype, value, traceback)
 914     sys.excepthook = newhook
 915
 916
 917 def columnate(l, prefix):
 918     """Format elements of 'l' in columns with 'prefix' leading each line.
 919
 920     The number of columns is determined automatically based on the string
 921     lengths.
 922     """
 923     binary = isinstance(prefix, bytes)
 924     nothing = b'' if binary else ''
 925     nl = b'\n' if binary else '\n'
 926     if not l:
 927         return nothing
 928     l = l[:]
 929     clen = max(len(s) for s in l)
 930     ncols = (tty_width() - len(prefix)) // (clen + 2)
 931     if ncols <= 1:
 932         ncols = 1
 933         clen = 0
 934     cols = []
 935     while len(l) % ncols:
 936         l.append(nothing)
 937     rows = len(l) // ncols
 938     for s in compat.range(0, len(l), rows):
 939         cols.append(l[s:s+rows])
 940     out = nothing
 941     fmt = b'%-*s' if binary else '%-*s'
 942     for row in zip(*cols):
 943         out += prefix + nothing.join((fmt % (clen+2, s)) for s in row) + nl
 944     return out
 945
 946
 947 def parse_date_or_fatal(str, fatal):
 948     """Parses the given date or calls Option.fatal().
 949     For now we expect a string that contains a float."""
 950     try:
 951         date = float(str)
 952     except ValueError as e:
 953         raise fatal('invalid date format (should be a float): %r' % e)
 954     else:
 955         return date
 956
 957
 958 def parse_excludes(options, fatal):
 959     """Traverse the options and extract all excludes, or call Option.fatal()."""
 960     excluded_paths = []
 961
 962     for flag in options:
 963         (option, parameter) = flag
 964         if option == '--exclude':
 965             excluded_paths.append(resolve_parent(argv_bytes(parameter)))
 966         elif option == '--exclude-from':
 967             try:
 968                 f = open(resolve_parent(argv_bytes(parameter)), 'rb')
 969             except IOError as e:
 970                 raise fatal("couldn't read %r" % parameter)
 971             for exclude_path in f.readlines():
 972                 # FIXME: perhaps this should be rstrip('\n')
 973                 exclude_path = resolve_parent(exclude_path.strip())
 974                 if exclude_path:
 975                     excluded_paths.append(exclude_path)
 976     return sorted(frozenset(excluded_paths))
 977
 978
 979 def parse_rx_excludes(options, fatal):
 980     """Traverse the options and extract all rx excludes, or call
 981     Option.fatal()."""
 982     excluded_patterns = []
 983
 984     for flag in options:
 985         (option, parameter) = flag
 986         if option == '--exclude-rx':
 987             try:
 988                 excluded_patterns.append(re.compile(argv_bytes(parameter)))
 989             except re.error as ex:
 990                 fatal('invalid --exclude-rx pattern (%r): %s' % (parameter, ex))
 991         elif option == '--exclude-rx-from':
 992             try:
 993                 f = open(resolve_parent(parameter), 'rb')
 994             except IOError as e:
 995                 raise fatal("couldn't read %r" % parameter)
 996             for pattern in f.readlines():
 997                 spattern = pattern.rstrip(b'\n')
 998                 if not spattern:
 999                     continue
1000                 try:
1001                     excluded_patterns.append(re.compile(spattern))
1002                 except re.error as ex:
1003                     fatal('invalid --exclude-rx pattern (%r): %s' % (spattern, ex))
1004     return excluded_patterns
1005
1006
1007 def should_rx_exclude_path(path, exclude_rxs):
1008     """Return True if path matches a regular expression in exclude_rxs."""
1009     for rx in exclude_rxs:
1010         if rx.search(path):
1011             debug1('Skipping %r: excluded by rx pattern %r.\n'
1012                    % (path, rx.pattern))
1013             return True
1014     return False
1015
1016
1017 # FIXME: Carefully consider the use of functions (os.path.*, etc.)
1018 # that resolve against the current filesystem in the strip/graft
1019 # functions for example, but elsewhere as well.  I suspect bup's not
1020 # always being careful about that.  For some cases, the contents of
1021 # the current filesystem should be irrelevant, and consulting it might
1022 # produce the wrong result, perhaps via unintended symlink resolution,
1023 # for example.
1024
1025 def path_components(path):
1026     """Break path into a list of pairs of the form (name,
1027     full_path_to_name).  Path must start with '/'.
1028     Example:
1029       '/home/foo' -> [('', '/'), ('home', '/home'), ('foo', '/home/foo')]"""
1030     if not path.startswith(b'/'):
1031         raise Exception('path must start with "/": %s' % path_msg(path))
1032     # Since we assume path startswith('/'), we can skip the first element.
1033     result = [(b'', b'/')]
1034     norm_path = os.path.abspath(path)
1035     if norm_path == b'/':
1036         return result
1037     full_path = b''
1038     for p in norm_path.split(b'/')[1:]:
1039         full_path += b'/' + p
1040         result.append((p, full_path))
1041     return result
1042
1043
1044 def stripped_path_components(path, strip_prefixes):
1045     """Strip any prefix in strip_prefixes from path and return a list
1046     of path components where each component is (name,
1047     none_or_full_fs_path_to_name).  Assume path startswith('/').
1048     See thelpers.py for examples."""
1049     normalized_path = os.path.abspath(path)
1050     sorted_strip_prefixes = sorted(strip_prefixes, key=len, reverse=True)
1051     for bp in sorted_strip_prefixes:
1052         normalized_bp = os.path.abspath(bp)
1053         if normalized_bp == b'/':
1054             continue
1055         if normalized_path.startswith(normalized_bp):
1056             prefix = normalized_path[:len(normalized_bp)]
1057             result = []
1058             for p in normalized_path[len(normalized_bp):].split(b'/'):
1059                 if p: # not root
1060                     prefix += b'/'
1061                 prefix += p
1062                 result.append((p, prefix))
1063             return result
1064     # Nothing to strip.
1065     return path_components(path)
1066
1067
1068 def grafted_path_components(graft_points, path):
1069     # Create a result that consists of some number of faked graft
1070     # directories before the graft point, followed by all of the real
1071     # directories from path that are after the graft point.  Arrange
1072     # for the directory at the graft point in the result to correspond
1073     # to the "orig" directory in --graft orig=new.  See t/thelpers.py
1074     # for some examples.
1075
1076     # Note that given --graft orig=new, orig and new have *nothing* to
1077     # do with each other, even if some of their component names
1078     # match. i.e. --graft /foo/bar/baz=/foo/bar/bax is semantically
1079     # equivalent to --graft /foo/bar/baz=/x/y/z, or even
1080     # /foo/bar/baz=/x.
1081
1082     # FIXME: This can't be the best solution...
1083     clean_path = os.path.abspath(path)
1084     for graft_point in graft_points:
1085         old_prefix, new_prefix = graft_point
1086         # Expand prefixes iff not absolute paths.
1087         old_prefix = os.path.normpath(old_prefix)
1088         new_prefix = os.path.normpath(new_prefix)
1089         if clean_path.startswith(old_prefix):
1090             escaped_prefix = re.escape(old_prefix)
1091             grafted_path = re.sub(br'^' + escaped_prefix, new_prefix, clean_path)
1092             # Handle /foo=/ (at least) -- which produces //whatever.
1093             grafted_path = b'/' + grafted_path.lstrip(b'/')
1094             clean_path_components = path_components(clean_path)
1095             # Count the components that were stripped.
1096             strip_count = 0 if old_prefix == b'/' else old_prefix.count(b'/')
1097             new_prefix_parts = new_prefix.split(b'/')
1098             result_prefix = grafted_path.split(b'/')[:new_prefix.count(b'/')]
1099             result = [(p, None) for p in result_prefix] \
1100                 + clean_path_components[strip_count:]
1101             # Now set the graft point name to match the end of new_prefix.
1102             graft_point = len(result_prefix)
1103             result[graft_point] = \
1104                 (new_prefix_parts[-1], clean_path_components[strip_count][1])
1105             if new_prefix == b'/': # --graft ...=/ is a special case.
1106                 return result[1:]
1107             return result
1108     return path_components(clean_path)
1109
1110
1111 Sha1 = hashlib.sha1
1112
1113
1114 _localtime = getattr(_helpers, 'localtime', None)
1115
1116 if _localtime:
1117     bup_time = namedtuple('bup_time', ['tm_year', 'tm_mon', 'tm_mday',
1118                                        'tm_hour', 'tm_min', 'tm_sec',
1119                                        'tm_wday', 'tm_yday',
1120                                        'tm_isdst', 'tm_gmtoff', 'tm_zone'])
1121
1122 # Define a localtime() that returns bup_time when possible.  Note:
1123 # this means that any helpers.localtime() results may need to be
1124 # passed through to_py_time() before being passed to python's time
1125 # module, which doesn't appear willing to ignore the extra items.
1126 if _localtime:
1127     def localtime(time):
1128         return bup_time(*_helpers.localtime(int(floor(time))))
1129     def utc_offset_str(t):
1130         """Return the local offset from UTC as "+hhmm" or "-hhmm" for time t.
1131         If the current UTC offset does not represent an integer number
1132         of minutes, the fractional component will be truncated."""
1133         off = localtime(t).tm_gmtoff
1134         # Note: // doesn't truncate like C for negative values, it rounds down.
1135         offmin = abs(off) // 60
1136         m = offmin % 60
1137         h = (offmin - m) // 60
1138         return b'%+03d%02d' % (-h if off < 0 else h, m)
1139     def to_py_time(x):
1140         if isinstance(x, time.struct_time):
1141             return x
1142         return time.struct_time(x[:9])
1143 else:
1144     localtime = time.localtime
1145     def utc_offset_str(t):
1146         return time.strftime(b'%z', localtime(t))
1147     def to_py_time(x):
1148         return x
1149
1150
1151 _some_invalid_save_parts_rx = re.compile(br'[\[ ~^:?*\\]|\.\.|//|@{')
1152
1153 def valid_save_name(name):
1154     # Enforce a superset of the restrictions in git-check-ref-format(1)
1155     if name == b'@' \
1156        or name.startswith(b'/') or name.endswith(b'/') \
1157        or name.endswith(b'.'):
1158         return False
1159     if _some_invalid_save_parts_rx.search(name):
1160         return False
1161     for c in name:
1162         if byte_int(c) < 0x20 or byte_int(c) == 0x7f:
1163             return False
1164     for part in name.split(b'/'):
1165         if part.startswith(b'.') or part.endswith(b'.lock'):
1166             return False
1167     return True
1168
1169
1170 _period_rx = re.compile(br'^([0-9]+)(s|min|h|d|w|m|y)$')
1171
1172 def period_as_secs(s):
1173     if s == b'forever':
1174         return float('inf')
1175     match = _period_rx.match(s)
1176     if not match:
1177         return None
1178     mag = int(match.group(1))
1179     scale = match.group(2)
1180     return mag * {b's': 1,
1181                   b'min': 60,
1182                   b'h': 60 * 60,
1183                   b'd': 60 * 60 * 24,
1184                   b'w': 60 * 60 * 24 * 7,
1185                   b'm': 60 * 60 * 24 * 31,
1186                   b'y': 60 * 60 * 24 * 366}[scale]