lib/bup/helpers.py

   1 """Helper functions and classes for bup."""
   2
   3 from __future__ import absolute_import, division
   4 from collections import namedtuple
   5 from contextlib import contextmanager
   6 from ctypes import sizeof, c_void_p
   7 from math import floor
   8 from os import environ
   9 from subprocess import PIPE, Popen
  10 import sys, os, subprocess, errno, select, mmap, stat, re, struct
  11 import hashlib, heapq, math, operator, time, tempfile
  12
  13 from bup import _helpers
  14 from bup import compat
  15 from bup.compat import argv_bytes, byte_int, pending_raise
  16 from bup.io import byte_stream, path_msg
  17 # This function should really be in helpers, not in bup.options.  But we
  18 # want options.py to be standalone so people can include it in other projects.
  19 from bup.options import _tty_width as tty_width
  20
  21
  22 buglvl = int(os.environ.get('BUP_DEBUG', 0))
  23
  24
  25 class Nonlocal:
  26     """Helper to deal with Python scoping issues"""
  27     pass
  28
  29
  30 sc_page_size = os.sysconf('SC_PAGE_SIZE')
  31 assert(sc_page_size > 0)
  32
  33 sc_arg_max = os.sysconf('SC_ARG_MAX')
  34 if sc_arg_max == -1:  # "no definite limit" - let's choose 2M
  35     sc_arg_max = 2 * 1024 * 1024
  36
  37 def last(iterable):
  38     result = None
  39     for result in iterable:
  40         pass
  41     return result
  42
  43 try:
  44     _fdatasync = os.fdatasync
  45 except AttributeError:
  46     _fdatasync = os.fsync
  47
  48 if sys.platform.startswith('darwin'):
  49     # Apparently os.fsync on OS X doesn't guarantee to sync all the way down
  50     import fcntl
  51     def fdatasync(fd):
  52         try:
  53             return fcntl.fcntl(fd, fcntl.F_FULLFSYNC)
  54         except IOError as e:
  55             # Fallback for file systems (SMB) that do not support F_FULLFSYNC
  56             if e.errno == errno.ENOTSUP:
  57                 return _fdatasync(fd)
  58             else:
  59                 raise
  60 else:
  61     fdatasync = _fdatasync
  62
  63
  64 def partition(predicate, stream):
  65     """Returns (leading_matches_it, rest_it), where leading_matches_it
  66     must be completely exhausted before traversing rest_it.
  67
  68     """
  69     stream = iter(stream)
  70     ns = Nonlocal()
  71     ns.first_nonmatch = None
  72     def leading_matches():
  73         for x in stream:
  74             if predicate(x):
  75                 yield x
  76             else:
  77                 ns.first_nonmatch = (x,)
  78                 break
  79     def rest():
  80         if ns.first_nonmatch:
  81             yield ns.first_nonmatch[0]
  82             for x in stream:
  83                 yield x
  84     return (leading_matches(), rest())
  85
  86
  87 def merge_dict(*xs):
  88     result = {}
  89     for x in xs:
  90         result.update(x)
  91     return result
  92
  93
  94 def lines_until_sentinel(f, sentinel, ex_type):
  95     # sentinel must end with \n and must contain only one \n
  96     while True:
  97         line = f.readline()
  98         if not (line and line.endswith(b'\n')):
  99             raise ex_type('Hit EOF while reading line')
 100         if line == sentinel:
 101             return
 102         yield line
 103
 104
 105 def stat_if_exists(path):
 106     try:
 107         return os.stat(path)
 108     except OSError as e:
 109         if e.errno != errno.ENOENT:
 110             raise
 111     return None
 112
 113
 114 # Write (blockingly) to sockets that may or may not be in blocking mode.
 115 # We need this because our stderr is sometimes eaten by subprocesses
 116 # (probably ssh) that sometimes make it nonblocking, if only temporarily,
 117 # leading to race conditions.  Ick.  We'll do it the hard way.
 118 def _hard_write(fd, buf):
 119     while buf:
 120         (r,w,x) = select.select([], [fd], [], None)
 121         if not w:
 122             raise IOError('select(fd) returned without being writable')
 123         try:
 124             sz = os.write(fd, buf)
 125         except OSError as e:
 126             if e.errno != errno.EAGAIN:
 127                 raise
 128         assert(sz >= 0)
 129         buf = buf[sz:]
 130
 131
 132 _last_prog = 0
 133 def log(s):
 134     """Print a log message to stderr."""
 135     global _last_prog
 136     sys.stdout.flush()
 137     _hard_write(sys.stderr.fileno(), s if isinstance(s, bytes) else s.encode())
 138     _last_prog = 0
 139
 140
 141 def debug1(s):
 142     if buglvl >= 1:
 143         log(s)
 144
 145
 146 def debug2(s):
 147     if buglvl >= 2:
 148         log(s)
 149
 150
 151 istty1 = os.isatty(1) or (int(os.environ.get('BUP_FORCE_TTY', 0)) & 1)
 152 istty2 = os.isatty(2) or (int(os.environ.get('BUP_FORCE_TTY', 0)) & 2)
 153 _last_progress = ''
 154 def progress(s):
 155     """Calls log() if stderr is a TTY.  Does nothing otherwise."""
 156     global _last_progress
 157     if istty2:
 158         log(s)
 159         _last_progress = s
 160
 161
 162 def qprogress(s):
 163     """Calls progress() only if we haven't printed progress in a while.
 164
 165     This avoids overloading the stderr buffer with excess junk.
 166     """
 167     global _last_prog
 168     now = time.time()
 169     if now - _last_prog > 0.1:
 170         progress(s)
 171         _last_prog = now
 172
 173
 174 def reprogress():
 175     """Calls progress() to redisplay the most recent progress message.
 176
 177     Useful after you've printed some other message that wipes out the
 178     progress line.
 179     """
 180     if _last_progress and _last_progress.endswith('\r'):
 181         progress(_last_progress)
 182
 183
 184 def mkdirp(d, mode=None):
 185     """Recursively create directories on path 'd'.
 186
 187     Unlike os.makedirs(), it doesn't raise an exception if the last element of
 188     the path already exists.
 189     """
 190     try:
 191         if mode:
 192             os.makedirs(d, mode)
 193         else:
 194             os.makedirs(d)
 195     except OSError as e:
 196         if e.errno == errno.EEXIST:
 197             pass
 198         else:
 199             raise
 200
 201
 202 class MergeIterItem:
 203     def __init__(self, entry, read_it):
 204         self.entry = entry
 205         self.read_it = read_it
 206     def __lt__(self, x):
 207         return self.entry < x.entry
 208
 209 def merge_iter(iters, pfreq, pfunc, pfinal, key=None):
 210     if key:
 211         samekey = lambda e, pe: getattr(e, key) == getattr(pe, key, None)
 212     else:
 213         samekey = operator.eq
 214     count = 0
 215     total = sum(len(it) for it in iters)
 216     iters = (iter(it) for it in iters)
 217     heap = ((next(it, None),it) for it in iters)
 218     heap = [MergeIterItem(e, it) for e, it in heap if e]
 219
 220     heapq.heapify(heap)
 221     pe = None
 222     while heap:
 223         if not count % pfreq:
 224             pfunc(count, total)
 225         e, it = heap[0].entry, heap[0].read_it
 226         if not samekey(e, pe):
 227             pe = e
 228             yield e
 229         count += 1
 230         try:
 231             e = next(it)
 232         except StopIteration:
 233             heapq.heappop(heap) # remove current
 234         else:
 235             # shift current to new location
 236             heapq.heapreplace(heap, MergeIterItem(e, it))
 237     pfinal(count, total)
 238
 239
 240 def unlink(f):
 241     """Delete a file at path 'f' if it currently exists.
 242
 243     Unlike os.unlink(), does not throw an exception if the file didn't already
 244     exist.
 245     """
 246     try:
 247         os.unlink(f)
 248     except OSError as e:
 249         if e.errno != errno.ENOENT:
 250             raise
 251
 252
 253 _bq_simple_id_rx = re.compile(br'^[-_./a-zA-Z0-9]+$')
 254 _sq_simple_id_rx = re.compile(r'^[-_./a-zA-Z0-9]+$')
 255
 256 def bquote(x):
 257     if x == b'':
 258         return b"''"
 259     if _bq_simple_id_rx.match(x):
 260         return x
 261     return b"'%s'" % x.replace(b"'", b"'\"'\"'")
 262
 263 def squote(x):
 264     if x == '':
 265         return "''"
 266     if _sq_simple_id_rx.match(x):
 267         return x
 268     return "'%s'" % x.replace("'", "'\"'\"'")
 269
 270 def quote(x):
 271     if isinstance(x, bytes):
 272         return bquote(x)
 273     if isinstance(x, compat.str_type):
 274         return squote(x)
 275     assert False
 276
 277 def shstr(cmd):
 278     """Return a shell quoted string for cmd if it's a sequence, else cmd.
 279
 280     cmd must be a string, bytes, or a sequence of one or the other,
 281     and the assumption is that if cmd is a string or bytes, then it's
 282     already quoted (because it's what's actually being passed to
 283     call() and friends.  e.g. log(shstr(cmd)); call(cmd)
 284
 285     """
 286     if isinstance(cmd, (bytes, compat.str_type)):
 287         return cmd
 288     elif all(isinstance(x, bytes) for x in cmd):
 289         return b' '.join(map(bquote, cmd))
 290     elif all(isinstance(x, compat.str_type) for x in cmd):
 291         return ' '.join(map(squote, cmd))
 292     raise TypeError('unsupported shstr argument: ' + repr(cmd))
 293
 294
 295 exc = subprocess.check_call
 296
 297 def exo(cmd,
 298         input=None,
 299         stdin=None,
 300         stderr=None,
 301         shell=False,
 302         check=True,
 303         preexec_fn=None,
 304         close_fds=True):
 305     if input:
 306         assert stdin in (None, PIPE)
 307         stdin = PIPE
 308     p = Popen(cmd,
 309               stdin=stdin, stdout=PIPE, stderr=stderr,
 310               shell=shell,
 311               preexec_fn=preexec_fn,
 312               close_fds=close_fds)
 313     out, err = p.communicate(input)
 314     if check and p.returncode != 0:
 315         raise Exception('subprocess %r failed with status %d%s'
 316                         % (b' '.join(map(quote, cmd)), p.returncode,
 317                            ', stderr: %r' % err if err else ''))
 318     return out, err, p
 319
 320 def readpipe(argv, preexec_fn=None, shell=False):
 321     """Run a subprocess and return its output."""
 322     return exo(argv, preexec_fn=preexec_fn, shell=shell)[0]
 323
 324
 325 def _argmax_base(command):
 326     base_size = 2048
 327     for c in command:
 328         base_size += len(command) + 1
 329     for k, v in compat.items(environ):
 330         base_size += len(k) + len(v) + 2 + sizeof(c_void_p)
 331     return base_size
 332
 333
 334 def _argmax_args_size(args):
 335     return sum(len(x) + 1 + sizeof(c_void_p) for x in args)
 336
 337
 338 def batchpipe(command, args, preexec_fn=None, arg_max=sc_arg_max):
 339     """If args is not empty, yield the output produced by calling the
 340 command list with args as a sequence of strings (It may be necessary
 341 to return multiple strings in order to respect ARG_MAX)."""
 342     # The optional arg_max arg is a workaround for an issue with the
 343     # current wvtest behavior.
 344     base_size = _argmax_base(command)
 345     while args:
 346         room = arg_max - base_size
 347         i = 0
 348         while i < len(args):
 349             next_size = _argmax_args_size(args[i:i+1])
 350             if room - next_size < 0:
 351                 break
 352             room -= next_size
 353             i += 1
 354         sub_args = args[:i]
 355         args = args[i:]
 356         assert(len(sub_args))
 357         yield readpipe(command + sub_args, preexec_fn=preexec_fn)
 358
 359
 360 def resolve_parent(p):
 361     """Return the absolute path of a file without following any final symlink.
 362
 363     Behaves like os.path.realpath, but doesn't follow a symlink for the last
 364     element. (ie. if 'p' itself is a symlink, this one won't follow it, but it
 365     will follow symlinks in p's directory)
 366     """
 367     try:
 368         st = os.lstat(p)
 369     except OSError:
 370         st = None
 371     if st and stat.S_ISLNK(st.st_mode):
 372         (dir, name) = os.path.split(p)
 373         dir = os.path.realpath(dir)
 374         out = os.path.join(dir, name)
 375     else:
 376         out = os.path.realpath(p)
 377     #log('realpathing:%r,%r\n' % (p, out))
 378     return out
 379
 380
 381 def detect_fakeroot():
 382     "Return True if we appear to be running under fakeroot."
 383     return os.getenv("FAKEROOTKEY") != None
 384
 385
 386 if sys.platform.startswith('cygwin'):
 387     def is_superuser():
 388         # https://cygwin.com/ml/cygwin/2015-02/msg00057.html
 389         groups = os.getgroups()
 390         return 544 in groups or 0 in groups
 391 else:
 392     def is_superuser():
 393         return os.geteuid() == 0
 394
 395
 396 def cache_key_value(get_value, key, cache):
 397     """Return (value, was_cached).  If there is a value in the cache
 398     for key, use that, otherwise, call get_value(key) which should
 399     throw a KeyError if there is no value -- in which case the cached
 400     and returned value will be None.
 401     """
 402     try: # Do we already have it (or know there wasn't one)?
 403         value = cache[key]
 404         return value, True
 405     except KeyError:
 406         pass
 407     value = None
 408     try:
 409         cache[key] = value = get_value(key)
 410     except KeyError:
 411         cache[key] = None
 412     return value, False
 413
 414
 415 _hostname = None
 416 def hostname():
 417     """Get the FQDN of this machine."""
 418     global _hostname
 419     if not _hostname:
 420         _hostname = _helpers.gethostname()
 421     return _hostname
 422
 423
 424 def format_filesize(size):
 425     unit = 1024.0
 426     size = float(size)
 427     if size < unit:
 428         return "%d" % (size)
 429     exponent = int(math.log(size) // math.log(unit))
 430     size_prefix = "KMGTPE"[exponent - 1]
 431     return "%.1f%s" % (size / math.pow(unit, exponent), size_prefix)
 432
 433
 434 class NotOk(Exception):
 435     pass
 436
 437
 438 class BaseConn:
 439     def __init__(self, outp):
 440         self.outp = outp
 441
 442     def close(self):
 443         while self._read(65536): pass
 444
 445     def _read(self, size):
 446         raise NotImplementedError("Subclasses must implement _read")
 447
 448     def read(self, size):
 449         """Read 'size' bytes from input stream."""
 450         self.outp.flush()
 451         return self._read(size)
 452
 453     def _readline(self, size):
 454         raise NotImplementedError("Subclasses must implement _readline")
 455
 456     def readline(self):
 457         """Read from input stream until a newline is found."""
 458         self.outp.flush()
 459         return self._readline()
 460
 461     def write(self, data):
 462         """Write 'data' to output stream."""
 463         #log('%d writing: %d bytes\n' % (os.getpid(), len(data)))
 464         self.outp.write(data)
 465
 466     def has_input(self):
 467         """Return true if input stream is readable."""
 468         raise NotImplementedError("Subclasses must implement has_input")
 469
 470     def ok(self):
 471         """Indicate end of output from last sent command."""
 472         self.write(b'\nok\n')
 473
 474     def error(self, s):
 475         """Indicate server error to the client."""
 476         s = re.sub(br'\s+', b' ', s)
 477         self.write(b'\nerror %s\n' % s)
 478
 479     def _check_ok(self, onempty):
 480         self.outp.flush()
 481         rl = b''
 482         for rl in linereader(self):
 483             #log('%d got line: %r\n' % (os.getpid(), rl))
 484             if not rl:  # empty line
 485                 continue
 486             elif rl == b'ok':
 487                 return None
 488             elif rl.startswith(b'error '):
 489                 #log('client: error: %s\n' % rl[6:])
 490                 return NotOk(rl[6:])
 491             else:
 492                 onempty(rl)
 493         raise Exception('server exited unexpectedly; see errors above')
 494
 495     def drain_and_check_ok(self):
 496         """Remove all data for the current command from input stream."""
 497         def onempty(rl):
 498             pass
 499         return self._check_ok(onempty)
 500
 501     def check_ok(self):
 502         """Verify that server action completed successfully."""
 503         def onempty(rl):
 504             raise Exception('expected "ok", got %r' % rl)
 505         return self._check_ok(onempty)
 506
 507
 508 class Conn(BaseConn):
 509     def __init__(self, inp, outp):
 510         BaseConn.__init__(self, outp)
 511         self.inp = inp
 512
 513     def _read(self, size):
 514         return self.inp.read(size)
 515
 516     def _readline(self):
 517         return self.inp.readline()
 518
 519     def has_input(self):
 520         [rl, wl, xl] = select.select([self.inp.fileno()], [], [], 0)
 521         if rl:
 522             assert(rl[0] == self.inp.fileno())
 523             return True
 524         else:
 525             return None
 526
 527
 528 def checked_reader(fd, n):
 529     while n > 0:
 530         rl, _, _ = select.select([fd], [], [])
 531         assert(rl[0] == fd)
 532         buf = os.read(fd, n)
 533         if not buf: raise Exception("Unexpected EOF reading %d more bytes" % n)
 534         yield buf
 535         n -= len(buf)
 536
 537
 538 MAX_PACKET = 128 * 1024
 539 def mux(p, outfd, outr, errr):
 540     try:
 541         fds = [outr, errr]
 542         while p.poll() is None:
 543             rl, _, _ = select.select(fds, [], [])
 544             for fd in rl:
 545                 if fd == outr:
 546                     buf = os.read(outr, MAX_PACKET)
 547                     if not buf: break
 548                     os.write(outfd, struct.pack('!IB', len(buf), 1) + buf)
 549                 elif fd == errr:
 550                     buf = os.read(errr, 1024)
 551                     if not buf: break
 552                     os.write(outfd, struct.pack('!IB', len(buf), 2) + buf)
 553     finally:
 554         os.write(outfd, struct.pack('!IB', 0, 3))
 555
 556
 557 class DemuxConn(BaseConn):
 558     """A helper class for bup's client-server protocol."""
 559     def __init__(self, infd, outp):
 560         BaseConn.__init__(self, outp)
 561         # Anything that comes through before the sync string was not
 562         # multiplexed and can be assumed to be debug/log before mux init.
 563         tail = b''
 564         stderr = byte_stream(sys.stderr)
 565         while tail != b'BUPMUX':
 566             # Make sure to write all pre-BUPMUX output to stderr
 567             b = os.read(infd, (len(tail) < 6) and (6-len(tail)) or 1)
 568             if not b:
 569                 ex = IOError('demux: unexpected EOF during initialization')
 570                 with pending_raise(ex):
 571                     stderr.write(tail)
 572                     stderr.flush()
 573             tail += b
 574             stderr.write(tail[:-6])
 575             tail = tail[-6:]
 576         stderr.flush()
 577         self.infd = infd
 578         self.reader = None
 579         self.buf = None
 580         self.closed = False
 581
 582     def write(self, data):
 583         self._load_buf(0)
 584         BaseConn.write(self, data)
 585
 586     def _next_packet(self, timeout):
 587         if self.closed: return False
 588         rl, wl, xl = select.select([self.infd], [], [], timeout)
 589         if not rl: return False
 590         assert(rl[0] == self.infd)
 591         ns = b''.join(checked_reader(self.infd, 5))
 592         n, fdw = struct.unpack('!IB', ns)
 593         if n > MAX_PACKET:
 594             # assume that something went wrong and print stuff
 595             ns += os.read(self.infd, 1024)
 596             stderr = byte_stream(sys.stderr)
 597             stderr.write(ns)
 598             stderr.flush()
 599             raise Exception("Connection broken")
 600         if fdw == 1:
 601             self.reader = checked_reader(self.infd, n)
 602         elif fdw == 2:
 603             for buf in checked_reader(self.infd, n):
 604                 byte_stream(sys.stderr).write(buf)
 605         elif fdw == 3:
 606             self.closed = True
 607             debug2("DemuxConn: marked closed\n")
 608         return True
 609
 610     def _load_buf(self, timeout):
 611         if self.buf is not None:
 612             return True
 613         while not self.closed:
 614             while not self.reader:
 615                 if not self._next_packet(timeout):
 616                     return False
 617             try:
 618                 self.buf = next(self.reader)
 619                 return True
 620             except StopIteration:
 621                 self.reader = None
 622         return False
 623
 624     def _read_parts(self, ix_fn):
 625         while self._load_buf(None):
 626             assert(self.buf is not None)
 627             i = ix_fn(self.buf)
 628             if i is None or i == len(self.buf):
 629                 yv = self.buf
 630                 self.buf = None
 631             else:
 632                 yv = self.buf[:i]
 633                 self.buf = self.buf[i:]
 634             yield yv
 635             if i is not None:
 636                 break
 637
 638     def _readline(self):
 639         def find_eol(buf):
 640             try:
 641                 return buf.index(b'\n')+1
 642             except ValueError:
 643                 return None
 644         return b''.join(self._read_parts(find_eol))
 645
 646     def _read(self, size):
 647         csize = [size]
 648         def until_size(buf): # Closes on csize
 649             if len(buf) < csize[0]:
 650                 csize[0] -= len(buf)
 651                 return None
 652             else:
 653                 return csize[0]
 654         return b''.join(self._read_parts(until_size))
 655
 656     def has_input(self):
 657         return self._load_buf(0)
 658
 659
 660 def linereader(f):
 661     """Generate a list of input lines from 'f' without terminating newlines."""
 662     while 1:
 663         line = f.readline()
 664         if not line:
 665             break
 666         yield line[:-1]
 667
 668
 669 def chunkyreader(f, count = None):
 670     """Generate a list of chunks of data read from 'f'.
 671
 672     If count is None, read until EOF is reached.
 673
 674     If count is a positive integer, read 'count' bytes from 'f'. If EOF is
 675     reached while reading, raise IOError.
 676     """
 677     if count != None:
 678         while count > 0:
 679             b = f.read(min(count, 65536))
 680             if not b:
 681                 raise IOError('EOF with %d bytes remaining' % count)
 682             yield b
 683             count -= len(b)
 684     else:
 685         while 1:
 686             b = f.read(65536)
 687             if not b: break
 688             yield b
 689
 690
 691 @contextmanager
 692 def atomically_replaced_file(name, mode='w', buffering=-1):
 693     """Yield a file that will be atomically renamed name when leaving the block.
 694
 695     This contextmanager yields an open file object that is backed by a
 696     temporary file which will be renamed (atomically) to the target
 697     name if everything succeeds.
 698
 699     The mode and buffering arguments are handled exactly as with open,
 700     and the yielded file will have very restrictive permissions, as
 701     per mkstemp.
 702
 703     E.g.::
 704
 705         with atomically_replaced_file('foo.txt', 'w') as f:
 706             f.write('hello jack.')
 707
 708     """
 709
 710     (ffd, tempname) = tempfile.mkstemp(dir=os.path.dirname(name),
 711                                        text=('b' not in mode))
 712     try:
 713         try:
 714             f = os.fdopen(ffd, mode, buffering)
 715         except:
 716             os.close(ffd)
 717             raise
 718         try:
 719             yield f
 720         finally:
 721             f.close()
 722         os.rename(tempname, name)
 723     finally:
 724         unlink(tempname)  # nonexistant file is ignored
 725
 726
 727 def slashappend(s):
 728     """Append "/" to 's' if it doesn't aleady end in "/"."""
 729     assert isinstance(s, bytes)
 730     if s and not s.endswith(b'/'):
 731         return s + b'/'
 732     else:
 733         return s
 734
 735
 736 def _mmap_do(f, sz, flags, prot, close):
 737     if not sz:
 738         st = os.fstat(f.fileno())
 739         sz = st.st_size
 740     if not sz:
 741         # trying to open a zero-length map gives an error, but an empty
 742         # string has all the same behaviour of a zero-length map, ie. it has
 743         # no elements :)
 744         return ''
 745     map = mmap.mmap(f.fileno(), sz, flags, prot)
 746     if close:
 747         f.close()  # map will persist beyond file close
 748     return map
 749
 750
 751 def mmap_read(f, sz = 0, close=True):
 752     """Create a read-only memory mapped region on file 'f'.
 753     If sz is 0, the region will cover the entire file.
 754     """
 755     return _mmap_do(f, sz, mmap.MAP_PRIVATE, mmap.PROT_READ, close)
 756
 757
 758 def mmap_readwrite(f, sz = 0, close=True):
 759     """Create a read-write memory mapped region on file 'f'.
 760     If sz is 0, the region will cover the entire file.
 761     """
 762     return _mmap_do(f, sz, mmap.MAP_SHARED, mmap.PROT_READ|mmap.PROT_WRITE,
 763                     close)
 764
 765
 766 def mmap_readwrite_private(f, sz = 0, close=True):
 767     """Create a read-write memory mapped region on file 'f'.
 768     If sz is 0, the region will cover the entire file.
 769     The map is private, which means the changes are never flushed back to the
 770     file.
 771     """
 772     return _mmap_do(f, sz, mmap.MAP_PRIVATE, mmap.PROT_READ|mmap.PROT_WRITE,
 773                     close)
 774
 775
 776 _mincore = getattr(_helpers, 'mincore', None)
 777 if _mincore:
 778     # ./configure ensures that we're on Linux if MINCORE_INCORE isn't defined.
 779     MINCORE_INCORE = getattr(_helpers, 'MINCORE_INCORE', 1)
 780
 781     _fmincore_chunk_size = None
 782     def _set_fmincore_chunk_size():
 783         global _fmincore_chunk_size
 784         pref_chunk_size = 64 * 1024 * 1024
 785         chunk_size = sc_page_size
 786         if (sc_page_size < pref_chunk_size):
 787             chunk_size = sc_page_size * (pref_chunk_size // sc_page_size)
 788         _fmincore_chunk_size = chunk_size
 789
 790     def fmincore(fd):
 791         """Return the mincore() data for fd as a bytearray whose values can be
 792         tested via MINCORE_INCORE, or None if fd does not fully
 793         support the operation."""
 794         st = os.fstat(fd)
 795         if (st.st_size == 0):
 796             return bytearray(0)
 797         if not _fmincore_chunk_size:
 798             _set_fmincore_chunk_size()
 799         pages_per_chunk = _fmincore_chunk_size // sc_page_size;
 800         page_count = (st.st_size + sc_page_size - 1) // sc_page_size;
 801         chunk_count = (st.st_size + _fmincore_chunk_size - 1) // _fmincore_chunk_size
 802         result = bytearray(page_count)
 803         for ci in compat.range(chunk_count):
 804             pos = _fmincore_chunk_size * ci;
 805             msize = min(_fmincore_chunk_size, st.st_size - pos)
 806             try:
 807                 m = mmap.mmap(fd, msize, mmap.MAP_PRIVATE, 0, 0, pos)
 808             except mmap.error as ex:
 809                 if ex.errno == errno.EINVAL or ex.errno == errno.ENODEV:
 810                     # Perhaps the file was a pipe, i.e. "... | bup split ..."
 811                     return None
 812                 raise ex
 813             try:
 814                 _mincore(m, msize, 0, result, ci * pages_per_chunk)
 815             except OSError as ex:
 816                 if ex.errno == errno.ENOSYS:
 817                     return None
 818                 raise
 819         return result
 820
 821
 822 def parse_timestamp(epoch_str):
 823     """Return the number of nanoseconds since the epoch that are described
 824 by epoch_str (100ms, 100ns, ...); when epoch_str cannot be parsed,
 825 throw a ValueError that may contain additional information."""
 826     ns_per = {'s' :  1000000000,
 827               'ms' : 1000000,
 828               'us' : 1000,
 829               'ns' : 1}
 830     match = re.match(r'^((?:[-+]?[0-9]+)?)(s|ms|us|ns)$', epoch_str)
 831     if not match:
 832         if re.match(r'^([-+]?[0-9]+)$', epoch_str):
 833             raise ValueError('must include units, i.e. 100ns, 100ms, ...')
 834         raise ValueError()
 835     (n, units) = match.group(1, 2)
 836     if not n:
 837         n = 1
 838     n = int(n)
 839     return n * ns_per[units]
 840
 841
 842 def parse_num(s):
 843     """Parse string or bytes as a possibly unit suffixed number.
 844
 845     For example:
 846         199.2k means 203981 bytes
 847         1GB means 1073741824 bytes
 848         2.1 tb means 2199023255552 bytes
 849     """
 850     if isinstance(s, bytes):
 851         # FIXME: should this raise a ValueError for UnicodeDecodeError
 852         # (perhaps with the latter as the context).
 853         s = s.decode('ascii')
 854     g = re.match(r'([-+\d.e]+)\s*(\w*)', str(s))
 855     if not g:
 856         raise ValueError("can't parse %r as a number" % s)
 857     (val, unit) = g.groups()
 858     num = float(val)
 859     unit = unit.lower()
 860     if unit in ['t', 'tb']:
 861         mult = 1024*1024*1024*1024
 862     elif unit in ['g', 'gb']:
 863         mult = 1024*1024*1024
 864     elif unit in ['m', 'mb']:
 865         mult = 1024*1024
 866     elif unit in ['k', 'kb']:
 867         mult = 1024
 868     elif unit in ['', 'b']:
 869         mult = 1
 870     else:
 871         raise ValueError("invalid unit %r in number %r" % (unit, s))
 872     return int(num*mult)
 873
 874
 875 saved_errors = []
 876 def add_error(e):
 877     """Append an error message to the list of saved errors.
 878
 879     Once processing is able to stop and output the errors, the saved errors are
 880     accessible in the module variable helpers.saved_errors.
 881     """
 882     saved_errors.append(e)
 883     log('%-70s\n' % e)
 884
 885
 886 def clear_errors():
 887     global saved_errors
 888     saved_errors = []
 889
 890
 891 def die_if_errors(msg=None, status=1):
 892     global saved_errors
 893     if saved_errors:
 894         if not msg:
 895             msg = 'warning: %d errors encountered\n' % len(saved_errors)
 896         log(msg)
 897         sys.exit(status)
 898
 899
 900 def handle_ctrl_c():
 901     """Replace the default exception handler for KeyboardInterrupt (Ctrl-C).
 902
 903     The new exception handler will make sure that bup will exit without an ugly
 904     stacktrace when Ctrl-C is hit.
 905     """
 906     oldhook = sys.excepthook
 907     def newhook(exctype, value, traceback):
 908         if exctype == KeyboardInterrupt:
 909             log('\nInterrupted.\n')
 910         else:
 911             return oldhook(exctype, value, traceback)
 912     sys.excepthook = newhook
 913
 914
 915 def columnate(l, prefix):
 916     """Format elements of 'l' in columns with 'prefix' leading each line.
 917
 918     The number of columns is determined automatically based on the string
 919     lengths.
 920     """
 921     binary = isinstance(prefix, bytes)
 922     nothing = b'' if binary else ''
 923     nl = b'\n' if binary else '\n'
 924     if not l:
 925         return nothing
 926     l = l[:]
 927     clen = max(len(s) for s in l)
 928     ncols = (tty_width() - len(prefix)) // (clen + 2)
 929     if ncols <= 1:
 930         ncols = 1
 931         clen = 0
 932     cols = []
 933     while len(l) % ncols:
 934         l.append(nothing)
 935     rows = len(l) // ncols
 936     for s in compat.range(0, len(l), rows):
 937         cols.append(l[s:s+rows])
 938     out = nothing
 939     fmt = b'%-*s' if binary else '%-*s'
 940     for row in zip(*cols):
 941         out += prefix + nothing.join((fmt % (clen+2, s)) for s in row) + nl
 942     return out
 943
 944
 945 def parse_date_or_fatal(str, fatal):
 946     """Parses the given date or calls Option.fatal().
 947     For now we expect a string that contains a float."""
 948     try:
 949         date = float(str)
 950     except ValueError as e:
 951         raise fatal('invalid date format (should be a float): %r' % e)
 952     else:
 953         return date
 954
 955
 956 def parse_excludes(options, fatal):
 957     """Traverse the options and extract all excludes, or call Option.fatal()."""
 958     excluded_paths = []
 959
 960     for flag in options:
 961         (option, parameter) = flag
 962         if option == '--exclude':
 963             excluded_paths.append(resolve_parent(argv_bytes(parameter)))
 964         elif option == '--exclude-from':
 965             try:
 966                 f = open(resolve_parent(argv_bytes(parameter)), 'rb')
 967             except IOError as e:
 968                 raise fatal("couldn't read %r" % parameter)
 969             for exclude_path in f.readlines():
 970                 # FIXME: perhaps this should be rstrip('\n')
 971                 exclude_path = resolve_parent(exclude_path.strip())
 972                 if exclude_path:
 973                     excluded_paths.append(exclude_path)
 974     return sorted(frozenset(excluded_paths))
 975
 976
 977 def parse_rx_excludes(options, fatal):
 978     """Traverse the options and extract all rx excludes, or call
 979     Option.fatal()."""
 980     excluded_patterns = []
 981
 982     for flag in options:
 983         (option, parameter) = flag
 984         if option == '--exclude-rx':
 985             try:
 986                 excluded_patterns.append(re.compile(argv_bytes(parameter)))
 987             except re.error as ex:
 988                 fatal('invalid --exclude-rx pattern (%r): %s' % (parameter, ex))
 989         elif option == '--exclude-rx-from':
 990             try:
 991                 f = open(resolve_parent(parameter), 'rb')
 992             except IOError as e:
 993                 raise fatal("couldn't read %r" % parameter)
 994             for pattern in f.readlines():
 995                 spattern = pattern.rstrip(b'\n')
 996                 if not spattern:
 997                     continue
 998                 try:
 999                     excluded_patterns.append(re.compile(spattern))
1000                 except re.error as ex:
1001                     fatal('invalid --exclude-rx pattern (%r): %s' % (spattern, ex))
1002     return excluded_patterns
1003
1004
1005 def should_rx_exclude_path(path, exclude_rxs):
1006     """Return True if path matches a regular expression in exclude_rxs."""
1007     for rx in exclude_rxs:
1008         if rx.search(path):
1009             debug1('Skipping %r: excluded by rx pattern %r.\n'
1010                    % (path, rx.pattern))
1011             return True
1012     return False
1013
1014
1015 # FIXME: Carefully consider the use of functions (os.path.*, etc.)
1016 # that resolve against the current filesystem in the strip/graft
1017 # functions for example, but elsewhere as well.  I suspect bup's not
1018 # always being careful about that.  For some cases, the contents of
1019 # the current filesystem should be irrelevant, and consulting it might
1020 # produce the wrong result, perhaps via unintended symlink resolution,
1021 # for example.
1022
1023 def path_components(path):
1024     """Break path into a list of pairs of the form (name,
1025     full_path_to_name).  Path must start with '/'.
1026     Example:
1027       '/home/foo' -> [('', '/'), ('home', '/home'), ('foo', '/home/foo')]"""
1028     if not path.startswith(b'/'):
1029         raise Exception('path must start with "/": %s' % path_msg(path))
1030     # Since we assume path startswith('/'), we can skip the first element.
1031     result = [(b'', b'/')]
1032     norm_path = os.path.abspath(path)
1033     if norm_path == b'/':
1034         return result
1035     full_path = b''
1036     for p in norm_path.split(b'/')[1:]:
1037         full_path += b'/' + p
1038         result.append((p, full_path))
1039     return result
1040
1041
1042 def stripped_path_components(path, strip_prefixes):
1043     """Strip any prefix in strip_prefixes from path and return a list
1044     of path components where each component is (name,
1045     none_or_full_fs_path_to_name).  Assume path startswith('/').
1046     See thelpers.py for examples."""
1047     normalized_path = os.path.abspath(path)
1048     sorted_strip_prefixes = sorted(strip_prefixes, key=len, reverse=True)
1049     for bp in sorted_strip_prefixes:
1050         normalized_bp = os.path.abspath(bp)
1051         if normalized_bp == b'/':
1052             continue
1053         if normalized_path.startswith(normalized_bp):
1054             prefix = normalized_path[:len(normalized_bp)]
1055             result = []
1056             for p in normalized_path[len(normalized_bp):].split(b'/'):
1057                 if p: # not root
1058                     prefix += b'/'
1059                 prefix += p
1060                 result.append((p, prefix))
1061             return result
1062     # Nothing to strip.
1063     return path_components(path)
1064
1065
1066 def grafted_path_components(graft_points, path):
1067     # Create a result that consists of some number of faked graft
1068     # directories before the graft point, followed by all of the real
1069     # directories from path that are after the graft point.  Arrange
1070     # for the directory at the graft point in the result to correspond
1071     # to the "orig" directory in --graft orig=new.  See t/thelpers.py
1072     # for some examples.
1073
1074     # Note that given --graft orig=new, orig and new have *nothing* to
1075     # do with each other, even if some of their component names
1076     # match. i.e. --graft /foo/bar/baz=/foo/bar/bax is semantically
1077     # equivalent to --graft /foo/bar/baz=/x/y/z, or even
1078     # /foo/bar/baz=/x.
1079
1080     # FIXME: This can't be the best solution...
1081     clean_path = os.path.abspath(path)
1082     for graft_point in graft_points:
1083         old_prefix, new_prefix = graft_point
1084         # Expand prefixes iff not absolute paths.
1085         old_prefix = os.path.normpath(old_prefix)
1086         new_prefix = os.path.normpath(new_prefix)
1087         if clean_path.startswith(old_prefix):
1088             escaped_prefix = re.escape(old_prefix)
1089             grafted_path = re.sub(br'^' + escaped_prefix, new_prefix, clean_path)
1090             # Handle /foo=/ (at least) -- which produces //whatever.
1091             grafted_path = b'/' + grafted_path.lstrip(b'/')
1092             clean_path_components = path_components(clean_path)
1093             # Count the components that were stripped.
1094             strip_count = 0 if old_prefix == b'/' else old_prefix.count(b'/')
1095             new_prefix_parts = new_prefix.split(b'/')
1096             result_prefix = grafted_path.split(b'/')[:new_prefix.count(b'/')]
1097             result = [(p, None) for p in result_prefix] \
1098                 + clean_path_components[strip_count:]
1099             # Now set the graft point name to match the end of new_prefix.
1100             graft_point = len(result_prefix)
1101             result[graft_point] = \
1102                 (new_prefix_parts[-1], clean_path_components[strip_count][1])
1103             if new_prefix == b'/': # --graft ...=/ is a special case.
1104                 return result[1:]
1105             return result
1106     return path_components(clean_path)
1107
1108
1109 Sha1 = hashlib.sha1
1110
1111
1112 _localtime = getattr(_helpers, 'localtime', None)
1113
1114 if _localtime:
1115     bup_time = namedtuple('bup_time', ['tm_year', 'tm_mon', 'tm_mday',
1116                                        'tm_hour', 'tm_min', 'tm_sec',
1117                                        'tm_wday', 'tm_yday',
1118                                        'tm_isdst', 'tm_gmtoff', 'tm_zone'])
1119
1120 # Define a localtime() that returns bup_time when possible.  Note:
1121 # this means that any helpers.localtime() results may need to be
1122 # passed through to_py_time() before being passed to python's time
1123 # module, which doesn't appear willing to ignore the extra items.
1124 if _localtime:
1125     def localtime(time):
1126         return bup_time(*_helpers.localtime(int(floor(time))))
1127     def utc_offset_str(t):
1128         """Return the local offset from UTC as "+hhmm" or "-hhmm" for time t.
1129         If the current UTC offset does not represent an integer number
1130         of minutes, the fractional component will be truncated."""
1131         off = localtime(t).tm_gmtoff
1132         # Note: // doesn't truncate like C for negative values, it rounds down.
1133         offmin = abs(off) // 60
1134         m = offmin % 60
1135         h = (offmin - m) // 60
1136         return b'%+03d%02d' % (-h if off < 0 else h, m)
1137     def to_py_time(x):
1138         if isinstance(x, time.struct_time):
1139             return x
1140         return time.struct_time(x[:9])
1141 else:
1142     localtime = time.localtime
1143     def utc_offset_str(t):
1144         return time.strftime(b'%z', localtime(t))
1145     def to_py_time(x):
1146         return x
1147
1148
1149 _some_invalid_save_parts_rx = re.compile(br'[\[ ~^:?*\\]|\.\.|//|@{')
1150
1151 def valid_save_name(name):
1152     # Enforce a superset of the restrictions in git-check-ref-format(1)
1153     if name == b'@' \
1154        or name.startswith(b'/') or name.endswith(b'/') \
1155        or name.endswith(b'.'):
1156         return False
1157     if _some_invalid_save_parts_rx.search(name):
1158         return False
1159     for c in name:
1160         if byte_int(c) < 0x20 or byte_int(c) == 0x7f:
1161             return False
1162     for part in name.split(b'/'):
1163         if part.startswith(b'.') or part.endswith(b'.lock'):
1164             return False
1165     return True
1166
1167
1168 _period_rx = re.compile(br'^([0-9]+)(s|min|h|d|w|m|y)$')
1169
1170 def period_as_secs(s):
1171     if s == b'forever':
1172         return float('inf')
1173     match = _period_rx.match(s)
1174     if not match:
1175         return None
1176     mag = int(match.group(1))
1177     scale = match.group(2)
1178     return mag * {b's': 1,
1179                   b'min': 60,
1180                   b'h': 60 * 60,
1181                   b'd': 60 * 60 * 24,
1182                   b'w': 60 * 60 * 24 * 7,
1183                   b'm': 60 * 60 * 24 * 31,
1184                   b'y': 60 * 60 * 24 * 366}[scale]