lib/bup/helpers.py

   1 """Helper functions and classes for bup."""
   2
   3 from __future__ import absolute_import, division
   4 from collections import namedtuple
   5 from contextlib import contextmanager
   6 from ctypes import sizeof, c_void_p
   7 from math import floor
   8 from os import environ
   9 from subprocess import PIPE, Popen
  10 import sys, os, pwd, subprocess, errno, socket, select, mmap, stat, re, struct
  11 import hashlib, heapq, math, operator, time, grp, tempfile
  12
  13 from bup import _helpers
  14 from bup import compat
  15 from bup.compat import argv_bytes, byte_int
  16 from bup.io import byte_stream, path_msg
  17 # This function should really be in helpers, not in bup.options.  But we
  18 # want options.py to be standalone so people can include it in other projects.
  19 from bup.options import _tty_width as tty_width
  20
  21
  22 class Nonlocal:
  23     """Helper to deal with Python scoping issues"""
  24     pass
  25
  26
  27 sc_page_size = os.sysconf('SC_PAGE_SIZE')
  28 assert(sc_page_size > 0)
  29
  30 sc_arg_max = os.sysconf('SC_ARG_MAX')
  31 if sc_arg_max == -1:  # "no definite limit" - let's choose 2M
  32     sc_arg_max = 2 * 1024 * 1024
  33
  34 def last(iterable):
  35     result = None
  36     for result in iterable:
  37         pass
  38     return result
  39
  40
  41 def atoi(s):
  42     """Convert s (ascii bytes) to an integer. Return 0 if s is not a number."""
  43     try:
  44         return int(s or b'0')
  45     except ValueError:
  46         return 0
  47
  48
  49 def atof(s):
  50     """Convert s (ascii bytes) to a float. Return 0 if s is not a number."""
  51     try:
  52         return float(s or b'0')
  53     except ValueError:
  54         return 0
  55
  56
  57 buglvl = atoi(os.environ.get('BUP_DEBUG', 0))
  58
  59
  60 try:
  61     _fdatasync = os.fdatasync
  62 except AttributeError:
  63     _fdatasync = os.fsync
  64
  65 if sys.platform.startswith('darwin'):
  66     # Apparently os.fsync on OS X doesn't guarantee to sync all the way down
  67     import fcntl
  68     def fdatasync(fd):
  69         try:
  70             return fcntl.fcntl(fd, fcntl.F_FULLFSYNC)
  71         except IOError as e:
  72             # Fallback for file systems (SMB) that do not support F_FULLFSYNC
  73             if e.errno == errno.ENOTSUP:
  74                 return _fdatasync(fd)
  75             else:
  76                 raise
  77 else:
  78     fdatasync = _fdatasync
  79
  80
  81 def partition(predicate, stream):
  82     """Returns (leading_matches_it, rest_it), where leading_matches_it
  83     must be completely exhausted before traversing rest_it.
  84
  85     """
  86     stream = iter(stream)
  87     ns = Nonlocal()
  88     ns.first_nonmatch = None
  89     def leading_matches():
  90         for x in stream:
  91             if predicate(x):
  92                 yield x
  93             else:
  94                 ns.first_nonmatch = (x,)
  95                 break
  96     def rest():
  97         if ns.first_nonmatch:
  98             yield ns.first_nonmatch[0]
  99             for x in stream:
 100                 yield x
 101     return (leading_matches(), rest())
 102
 103
 104 def merge_dict(*xs):
 105     result = {}
 106     for x in xs:
 107         result.update(x)
 108     return result
 109
 110
 111 def lines_until_sentinel(f, sentinel, ex_type):
 112     # sentinel must end with \n and must contain only one \n
 113     while True:
 114         line = f.readline()
 115         if not (line and line.endswith(b'\n')):
 116             raise ex_type('Hit EOF while reading line')
 117         if line == sentinel:
 118             return
 119         yield line
 120
 121
 122 def stat_if_exists(path):
 123     try:
 124         return os.stat(path)
 125     except OSError as e:
 126         if e.errno != errno.ENOENT:
 127             raise
 128     return None
 129
 130
 131 # Write (blockingly) to sockets that may or may not be in blocking mode.
 132 # We need this because our stderr is sometimes eaten by subprocesses
 133 # (probably ssh) that sometimes make it nonblocking, if only temporarily,
 134 # leading to race conditions.  Ick.  We'll do it the hard way.
 135 def _hard_write(fd, buf):
 136     while buf:
 137         (r,w,x) = select.select([], [fd], [], None)
 138         if not w:
 139             raise IOError('select(fd) returned without being writable')
 140         try:
 141             sz = os.write(fd, buf)
 142         except OSError as e:
 143             if e.errno != errno.EAGAIN:
 144                 raise
 145         assert(sz >= 0)
 146         buf = buf[sz:]
 147
 148
 149 _last_prog = 0
 150 def log(s):
 151     """Print a log message to stderr."""
 152     global _last_prog
 153     sys.stdout.flush()
 154     _hard_write(sys.stderr.fileno(), s if isinstance(s, bytes) else s.encode())
 155     _last_prog = 0
 156
 157
 158 def debug1(s):
 159     if buglvl >= 1:
 160         log(s)
 161
 162
 163 def debug2(s):
 164     if buglvl >= 2:
 165         log(s)
 166
 167
 168 istty1 = os.isatty(1) or (atoi(os.environ.get('BUP_FORCE_TTY')) & 1)
 169 istty2 = os.isatty(2) or (atoi(os.environ.get('BUP_FORCE_TTY')) & 2)
 170 _last_progress = ''
 171 def progress(s):
 172     """Calls log() if stderr is a TTY.  Does nothing otherwise."""
 173     global _last_progress
 174     if istty2:
 175         log(s)
 176         _last_progress = s
 177
 178
 179 def qprogress(s):
 180     """Calls progress() only if we haven't printed progress in a while.
 181
 182     This avoids overloading the stderr buffer with excess junk.
 183     """
 184     global _last_prog
 185     now = time.time()
 186     if now - _last_prog > 0.1:
 187         progress(s)
 188         _last_prog = now
 189
 190
 191 def reprogress():
 192     """Calls progress() to redisplay the most recent progress message.
 193
 194     Useful after you've printed some other message that wipes out the
 195     progress line.
 196     """
 197     if _last_progress and _last_progress.endswith('\r'):
 198         progress(_last_progress)
 199
 200
 201 def mkdirp(d, mode=None):
 202     """Recursively create directories on path 'd'.
 203
 204     Unlike os.makedirs(), it doesn't raise an exception if the last element of
 205     the path already exists.
 206     """
 207     try:
 208         if mode:
 209             os.makedirs(d, mode)
 210         else:
 211             os.makedirs(d)
 212     except OSError as e:
 213         if e.errno == errno.EEXIST:
 214             pass
 215         else:
 216             raise
 217
 218
 219 class MergeIterItem:
 220     def __init__(self, entry, read_it):
 221         self.entry = entry
 222         self.read_it = read_it
 223     def __lt__(self, x):
 224         return self.entry < x.entry
 225
 226 def merge_iter(iters, pfreq, pfunc, pfinal, key=None):
 227     if key:
 228         samekey = lambda e, pe: getattr(e, key) == getattr(pe, key, None)
 229     else:
 230         samekey = operator.eq
 231     count = 0
 232     total = sum(len(it) for it in iters)
 233     iters = (iter(it) for it in iters)
 234     heap = ((next(it, None),it) for it in iters)
 235     heap = [MergeIterItem(e, it) for e, it in heap if e]
 236
 237     heapq.heapify(heap)
 238     pe = None
 239     while heap:
 240         if not count % pfreq:
 241             pfunc(count, total)
 242         e, it = heap[0].entry, heap[0].read_it
 243         if not samekey(e, pe):
 244             pe = e
 245             yield e
 246         count += 1
 247         try:
 248             e = next(it)
 249         except StopIteration:
 250             heapq.heappop(heap) # remove current
 251         else:
 252             # shift current to new location
 253             heapq.heapreplace(heap, MergeIterItem(e, it))
 254     pfinal(count, total)
 255
 256
 257 def unlink(f):
 258     """Delete a file at path 'f' if it currently exists.
 259
 260     Unlike os.unlink(), does not throw an exception if the file didn't already
 261     exist.
 262     """
 263     try:
 264         os.unlink(f)
 265     except OSError as e:
 266         if e.errno != errno.ENOENT:
 267             raise
 268
 269
 270 _bq_simple_id_rx = re.compile(br'^[-_./a-zA-Z0-9]+$')
 271 _sq_simple_id_rx = re.compile(r'^[-_./a-zA-Z0-9]+$')
 272
 273 def bquote(x):
 274     if x == b'':
 275         return b"''"
 276     if _bq_simple_id_rx.match(x):
 277         return x
 278     return b"'%s'" % x.replace(b"'", b"'\"'\"'")
 279
 280 def squote(x):
 281     if x == '':
 282         return "''"
 283     if _sq_simple_id_rx.match(x):
 284         return x
 285     return "'%s'" % x.replace("'", "'\"'\"'")
 286
 287 def quote(x):
 288     if isinstance(x, bytes):
 289         return bquote(x)
 290     if isinstance(x, compat.str_type):
 291         return squote(x)
 292     assert False
 293
 294 def shstr(cmd):
 295     """Return a shell quoted string for cmd if it's a sequence, else cmd.
 296
 297     cmd must be a string, bytes, or a sequence of one or the other,
 298     and the assumption is that if cmd is a string or bytes, then it's
 299     already quoted (because it's what's actually being passed to
 300     call() and friends.  e.g. log(shstr(cmd)); call(cmd)
 301
 302     """
 303     if isinstance(cmd, (bytes, compat.str_type)):
 304         return cmd
 305     elif all(isinstance(x, bytes) for x in cmd):
 306         return b' '.join(map(bquote, cmd))
 307     elif all(isinstance(x, compat.str_type) for x in cmd):
 308         return ' '.join(map(squote, cmd))
 309     raise TypeError('unsupported shstr argument: ' + repr(cmd))
 310
 311
 312 exc = subprocess.check_call
 313
 314 def exo(cmd,
 315         input=None,
 316         stdin=None,
 317         stderr=None,
 318         shell=False,
 319         check=True,
 320         preexec_fn=None,
 321         close_fds=True):
 322     if input:
 323         assert stdin in (None, PIPE)
 324         stdin = PIPE
 325     p = Popen(cmd,
 326               stdin=stdin, stdout=PIPE, stderr=stderr,
 327               shell=shell,
 328               preexec_fn=preexec_fn,
 329               close_fds=close_fds)
 330     out, err = p.communicate(input)
 331     if check and p.returncode != 0:
 332         raise Exception('subprocess %r failed with status %d%s'
 333                         % (b' '.join(map(quote, cmd)), p.returncode,
 334                            ', stderr: %r' % err if err else ''))
 335     return out, err, p
 336
 337 def readpipe(argv, preexec_fn=None, shell=False):
 338     """Run a subprocess and return its output."""
 339     return exo(argv, preexec_fn=preexec_fn, shell=shell)[0]
 340
 341
 342 def _argmax_base(command):
 343     base_size = 2048
 344     for c in command:
 345         base_size += len(command) + 1
 346     for k, v in compat.items(environ):
 347         base_size += len(k) + len(v) + 2 + sizeof(c_void_p)
 348     return base_size
 349
 350
 351 def _argmax_args_size(args):
 352     return sum(len(x) + 1 + sizeof(c_void_p) for x in args)
 353
 354
 355 def batchpipe(command, args, preexec_fn=None, arg_max=sc_arg_max):
 356     """If args is not empty, yield the output produced by calling the
 357 command list with args as a sequence of strings (It may be necessary
 358 to return multiple strings in order to respect ARG_MAX)."""
 359     # The optional arg_max arg is a workaround for an issue with the
 360     # current wvtest behavior.
 361     base_size = _argmax_base(command)
 362     while args:
 363         room = arg_max - base_size
 364         i = 0
 365         while i < len(args):
 366             next_size = _argmax_args_size(args[i:i+1])
 367             if room - next_size < 0:
 368                 break
 369             room -= next_size
 370             i += 1
 371         sub_args = args[:i]
 372         args = args[i:]
 373         assert(len(sub_args))
 374         yield readpipe(command + sub_args, preexec_fn=preexec_fn)
 375
 376
 377 def resolve_parent(p):
 378     """Return the absolute path of a file without following any final symlink.
 379
 380     Behaves like os.path.realpath, but doesn't follow a symlink for the last
 381     element. (ie. if 'p' itself is a symlink, this one won't follow it, but it
 382     will follow symlinks in p's directory)
 383     """
 384     try:
 385         st = os.lstat(p)
 386     except OSError:
 387         st = None
 388     if st and stat.S_ISLNK(st.st_mode):
 389         (dir, name) = os.path.split(p)
 390         dir = os.path.realpath(dir)
 391         out = os.path.join(dir, name)
 392     else:
 393         out = os.path.realpath(p)
 394     #log('realpathing:%r,%r\n' % (p, out))
 395     return out
 396
 397
 398 def detect_fakeroot():
 399     "Return True if we appear to be running under fakeroot."
 400     return os.getenv("FAKEROOTKEY") != None
 401
 402
 403 if sys.platform.startswith('cygwin'):
 404     def is_superuser():
 405         # https://cygwin.com/ml/cygwin/2015-02/msg00057.html
 406         groups = os.getgroups()
 407         return 544 in groups or 0 in groups
 408 else:
 409     def is_superuser():
 410         return os.geteuid() == 0
 411
 412
 413 def cache_key_value(get_value, key, cache):
 414     """Return (value, was_cached).  If there is a value in the cache
 415     for key, use that, otherwise, call get_value(key) which should
 416     throw a KeyError if there is no value -- in which case the cached
 417     and returned value will be None.
 418     """
 419     try: # Do we already have it (or know there wasn't one)?
 420         value = cache[key]
 421         return value, True
 422     except KeyError:
 423         pass
 424     value = None
 425     try:
 426         cache[key] = value = get_value(key)
 427     except KeyError:
 428         cache[key] = None
 429     return value, False
 430
 431
 432 _hostname = None
 433 def hostname():
 434     """Get the FQDN of this machine."""
 435     global _hostname
 436     if not _hostname:
 437         _hostname = socket.getfqdn().encode('iso-8859-1')
 438     return _hostname
 439
 440
 441 def format_filesize(size):
 442     unit = 1024.0
 443     size = float(size)
 444     if size < unit:
 445         return "%d" % (size)
 446     exponent = int(math.log(size) // math.log(unit))
 447     size_prefix = "KMGTPE"[exponent - 1]
 448     return "%.1f%s" % (size // math.pow(unit, exponent), size_prefix)
 449
 450
 451 class NotOk(Exception):
 452     pass
 453
 454
 455 class BaseConn:
 456     def __init__(self, outp):
 457         self.outp = outp
 458
 459     def close(self):
 460         while self._read(65536): pass
 461
 462     def read(self, size):
 463         """Read 'size' bytes from input stream."""
 464         self.outp.flush()
 465         return self._read(size)
 466
 467     def readline(self):
 468         """Read from input stream until a newline is found."""
 469         self.outp.flush()
 470         return self._readline()
 471
 472     def write(self, data):
 473         """Write 'data' to output stream."""
 474         #log('%d writing: %d bytes\n' % (os.getpid(), len(data)))
 475         self.outp.write(data)
 476
 477     def has_input(self):
 478         """Return true if input stream is readable."""
 479         raise NotImplemented("Subclasses must implement has_input")
 480
 481     def ok(self):
 482         """Indicate end of output from last sent command."""
 483         self.write(b'\nok\n')
 484
 485     def error(self, s):
 486         """Indicate server error to the client."""
 487         s = re.sub(br'\s+', b' ', s)
 488         self.write(b'\nerror %s\n' % s)
 489
 490     def _check_ok(self, onempty):
 491         self.outp.flush()
 492         rl = b''
 493         for rl in linereader(self):
 494             #log('%d got line: %r\n' % (os.getpid(), rl))
 495             if not rl:  # empty line
 496                 continue
 497             elif rl == b'ok':
 498                 return None
 499             elif rl.startswith(b'error '):
 500                 #log('client: error: %s\n' % rl[6:])
 501                 return NotOk(rl[6:])
 502             else:
 503                 onempty(rl)
 504         raise Exception('server exited unexpectedly; see errors above')
 505
 506     def drain_and_check_ok(self):
 507         """Remove all data for the current command from input stream."""
 508         def onempty(rl):
 509             pass
 510         return self._check_ok(onempty)
 511
 512     def check_ok(self):
 513         """Verify that server action completed successfully."""
 514         def onempty(rl):
 515             raise Exception('expected "ok", got %r' % rl)
 516         return self._check_ok(onempty)
 517
 518
 519 class Conn(BaseConn):
 520     def __init__(self, inp, outp):
 521         BaseConn.__init__(self, outp)
 522         self.inp = inp
 523
 524     def _read(self, size):
 525         return self.inp.read(size)
 526
 527     def _readline(self):
 528         return self.inp.readline()
 529
 530     def has_input(self):
 531         [rl, wl, xl] = select.select([self.inp.fileno()], [], [], 0)
 532         if rl:
 533             assert(rl[0] == self.inp.fileno())
 534             return True
 535         else:
 536             return None
 537
 538
 539 def checked_reader(fd, n):
 540     while n > 0:
 541         rl, _, _ = select.select([fd], [], [])
 542         assert(rl[0] == fd)
 543         buf = os.read(fd, n)
 544         if not buf: raise Exception("Unexpected EOF reading %d more bytes" % n)
 545         yield buf
 546         n -= len(buf)
 547
 548
 549 MAX_PACKET = 128 * 1024
 550 def mux(p, outfd, outr, errr):
 551     try:
 552         fds = [outr, errr]
 553         while p.poll() is None:
 554             rl, _, _ = select.select(fds, [], [])
 555             for fd in rl:
 556                 if fd == outr:
 557                     buf = os.read(outr, MAX_PACKET)
 558                     if not buf: break
 559                     os.write(outfd, struct.pack('!IB', len(buf), 1) + buf)
 560                 elif fd == errr:
 561                     buf = os.read(errr, 1024)
 562                     if not buf: break
 563                     os.write(outfd, struct.pack('!IB', len(buf), 2) + buf)
 564     finally:
 565         os.write(outfd, struct.pack('!IB', 0, 3))
 566
 567
 568 class DemuxConn(BaseConn):
 569     """A helper class for bup's client-server protocol."""
 570     def __init__(self, infd, outp):
 571         BaseConn.__init__(self, outp)
 572         # Anything that comes through before the sync string was not
 573         # multiplexed and can be assumed to be debug/log before mux init.
 574         tail = b''
 575         while tail != b'BUPMUX':
 576             b = os.read(infd, (len(tail) < 6) and (6-len(tail)) or 1)
 577             if not b:
 578                 raise IOError('demux: unexpected EOF during initialization')
 579             tail += b
 580             byte_stream(sys.stderr).write(tail[:-6])  # pre-mux log messages
 581             tail = tail[-6:]
 582         self.infd = infd
 583         self.reader = None
 584         self.buf = None
 585         self.closed = False
 586
 587     def write(self, data):
 588         self._load_buf(0)
 589         BaseConn.write(self, data)
 590
 591     def _next_packet(self, timeout):
 592         if self.closed: return False
 593         rl, wl, xl = select.select([self.infd], [], [], timeout)
 594         if not rl: return False
 595         assert(rl[0] == self.infd)
 596         ns = b''.join(checked_reader(self.infd, 5))
 597         n, fdw = struct.unpack('!IB', ns)
 598         assert(n <= MAX_PACKET)
 599         if fdw == 1:
 600             self.reader = checked_reader(self.infd, n)
 601         elif fdw == 2:
 602             for buf in checked_reader(self.infd, n):
 603                 byte_stream(sys.stderr).write(buf)
 604         elif fdw == 3:
 605             self.closed = True
 606             debug2("DemuxConn: marked closed\n")
 607         return True
 608
 609     def _load_buf(self, timeout):
 610         if self.buf is not None:
 611             return True
 612         while not self.closed:
 613             while not self.reader:
 614                 if not self._next_packet(timeout):
 615                     return False
 616             try:
 617                 self.buf = next(self.reader)
 618                 return True
 619             except StopIteration:
 620                 self.reader = None
 621         return False
 622
 623     def _read_parts(self, ix_fn):
 624         while self._load_buf(None):
 625             assert(self.buf is not None)
 626             i = ix_fn(self.buf)
 627             if i is None or i == len(self.buf):
 628                 yv = self.buf
 629                 self.buf = None
 630             else:
 631                 yv = self.buf[:i]
 632                 self.buf = self.buf[i:]
 633             yield yv
 634             if i is not None:
 635                 break
 636
 637     def _readline(self):
 638         def find_eol(buf):
 639             try:
 640                 return buf.index(b'\n')+1
 641             except ValueError:
 642                 return None
 643         return b''.join(self._read_parts(find_eol))
 644
 645     def _read(self, size):
 646         csize = [size]
 647         def until_size(buf): # Closes on csize
 648             if len(buf) < csize[0]:
 649                 csize[0] -= len(buf)
 650                 return None
 651             else:
 652                 return csize[0]
 653         return b''.join(self._read_parts(until_size))
 654
 655     def has_input(self):
 656         return self._load_buf(0)
 657
 658
 659 def linereader(f):
 660     """Generate a list of input lines from 'f' without terminating newlines."""
 661     while 1:
 662         line = f.readline()
 663         if not line:
 664             break
 665         yield line[:-1]
 666
 667
 668 def chunkyreader(f, count = None):
 669     """Generate a list of chunks of data read from 'f'.
 670
 671     If count is None, read until EOF is reached.
 672
 673     If count is a positive integer, read 'count' bytes from 'f'. If EOF is
 674     reached while reading, raise IOError.
 675     """
 676     if count != None:
 677         while count > 0:
 678             b = f.read(min(count, 65536))
 679             if not b:
 680                 raise IOError('EOF with %d bytes remaining' % count)
 681             yield b
 682             count -= len(b)
 683     else:
 684         while 1:
 685             b = f.read(65536)
 686             if not b: break
 687             yield b
 688
 689
 690 @contextmanager
 691 def atomically_replaced_file(name, mode='w', buffering=-1):
 692     """Yield a file that will be atomically renamed name when leaving the block.
 693
 694     This contextmanager yields an open file object that is backed by a
 695     temporary file which will be renamed (atomically) to the target
 696     name if everything succeeds.
 697
 698     The mode and buffering arguments are handled exactly as with open,
 699     and the yielded file will have very restrictive permissions, as
 700     per mkstemp.
 701
 702     E.g.::
 703
 704         with atomically_replaced_file('foo.txt', 'w') as f:
 705             f.write('hello jack.')
 706
 707     """
 708
 709     (ffd, tempname) = tempfile.mkstemp(dir=os.path.dirname(name),
 710                                        text=('b' not in mode))
 711     try:
 712         try:
 713             f = os.fdopen(ffd, mode, buffering)
 714         except:
 715             os.close(ffd)
 716             raise
 717         try:
 718             yield f
 719         finally:
 720             f.close()
 721         os.rename(tempname, name)
 722     finally:
 723         unlink(tempname)  # nonexistant file is ignored
 724
 725
 726 def slashappend(s):
 727     """Append "/" to 's' if it doesn't aleady end in "/"."""
 728     assert isinstance(s, bytes)
 729     if s and not s.endswith(b'/'):
 730         return s + b'/'
 731     else:
 732         return s
 733
 734
 735 def _mmap_do(f, sz, flags, prot, close):
 736     if not sz:
 737         st = os.fstat(f.fileno())
 738         sz = st.st_size
 739     if not sz:
 740         # trying to open a zero-length map gives an error, but an empty
 741         # string has all the same behaviour of a zero-length map, ie. it has
 742         # no elements :)
 743         return ''
 744     map = mmap.mmap(f.fileno(), sz, flags, prot)
 745     if close:
 746         f.close()  # map will persist beyond file close
 747     return map
 748
 749
 750 def mmap_read(f, sz = 0, close=True):
 751     """Create a read-only memory mapped region on file 'f'.
 752     If sz is 0, the region will cover the entire file.
 753     """
 754     return _mmap_do(f, sz, mmap.MAP_PRIVATE, mmap.PROT_READ, close)
 755
 756
 757 def mmap_readwrite(f, sz = 0, close=True):
 758     """Create a read-write memory mapped region on file 'f'.
 759     If sz is 0, the region will cover the entire file.
 760     """
 761     return _mmap_do(f, sz, mmap.MAP_SHARED, mmap.PROT_READ|mmap.PROT_WRITE,
 762                     close)
 763
 764
 765 def mmap_readwrite_private(f, sz = 0, close=True):
 766     """Create a read-write memory mapped region on file 'f'.
 767     If sz is 0, the region will cover the entire file.
 768     The map is private, which means the changes are never flushed back to the
 769     file.
 770     """
 771     return _mmap_do(f, sz, mmap.MAP_PRIVATE, mmap.PROT_READ|mmap.PROT_WRITE,
 772                     close)
 773
 774
 775 _mincore = getattr(_helpers, 'mincore', None)
 776 if _mincore:
 777     # ./configure ensures that we're on Linux if MINCORE_INCORE isn't defined.
 778     MINCORE_INCORE = getattr(_helpers, 'MINCORE_INCORE', 1)
 779
 780     _fmincore_chunk_size = None
 781     def _set_fmincore_chunk_size():
 782         global _fmincore_chunk_size
 783         pref_chunk_size = 64 * 1024 * 1024
 784         chunk_size = sc_page_size
 785         if (sc_page_size < pref_chunk_size):
 786             chunk_size = sc_page_size * (pref_chunk_size // sc_page_size)
 787         _fmincore_chunk_size = chunk_size
 788
 789     def fmincore(fd):
 790         """Return the mincore() data for fd as a bytearray whose values can be
 791         tested via MINCORE_INCORE, or None if fd does not fully
 792         support the operation."""
 793         st = os.fstat(fd)
 794         if (st.st_size == 0):
 795             return bytearray(0)
 796         if not _fmincore_chunk_size:
 797             _set_fmincore_chunk_size()
 798         pages_per_chunk = _fmincore_chunk_size // sc_page_size;
 799         page_count = (st.st_size + sc_page_size - 1) // sc_page_size;
 800         chunk_count = page_count // _fmincore_chunk_size
 801         if chunk_count < 1:
 802             chunk_count = 1
 803         result = bytearray(page_count)
 804         for ci in compat.range(chunk_count):
 805             pos = _fmincore_chunk_size * ci;
 806             msize = min(_fmincore_chunk_size, st.st_size - pos)
 807             try:
 808                 m = mmap.mmap(fd, msize, mmap.MAP_PRIVATE, 0, 0, pos)
 809             except mmap.error as ex:
 810                 if ex.errno == errno.EINVAL or ex.errno == errno.ENODEV:
 811                     # Perhaps the file was a pipe, i.e. "... | bup split ..."
 812                     return None
 813                 raise ex
 814             try:
 815                 _mincore(m, msize, 0, result, ci * pages_per_chunk)
 816             except OSError as ex:
 817                 if ex.errno == errno.ENOSYS:
 818                     return None
 819                 raise
 820         return result
 821
 822
 823 def parse_timestamp(epoch_str):
 824     """Return the number of nanoseconds since the epoch that are described
 825 by epoch_str (100ms, 100ns, ...); when epoch_str cannot be parsed,
 826 throw a ValueError that may contain additional information."""
 827     ns_per = {'s' :  1000000000,
 828               'ms' : 1000000,
 829               'us' : 1000,
 830               'ns' : 1}
 831     match = re.match(r'^((?:[-+]?[0-9]+)?)(s|ms|us|ns)$', epoch_str)
 832     if not match:
 833         if re.match(r'^([-+]?[0-9]+)$', epoch_str):
 834             raise ValueError('must include units, i.e. 100ns, 100ms, ...')
 835         raise ValueError()
 836     (n, units) = match.group(1, 2)
 837     if not n:
 838         n = 1
 839     n = int(n)
 840     return n * ns_per[units]
 841
 842
 843 def parse_num(s):
 844     """Parse string or bytes as a possibly unit suffixed number.
 845
 846     For example:
 847         199.2k means 203981 bytes
 848         1GB means 1073741824 bytes
 849         2.1 tb means 2199023255552 bytes
 850     """
 851     if isinstance(s, bytes):
 852         # FIXME: should this raise a ValueError for UnicodeDecodeError
 853         # (perhaps with the latter as the context).
 854         s = s.decode('ascii')
 855     g = re.match(r'([-+\d.e]+)\s*(\w*)', str(s))
 856     if not g:
 857         raise ValueError("can't parse %r as a number" % s)
 858     (val, unit) = g.groups()
 859     num = float(val)
 860     unit = unit.lower()
 861     if unit in ['t', 'tb']:
 862         mult = 1024*1024*1024*1024
 863     elif unit in ['g', 'gb']:
 864         mult = 1024*1024*1024
 865     elif unit in ['m', 'mb']:
 866         mult = 1024*1024
 867     elif unit in ['k', 'kb']:
 868         mult = 1024
 869     elif unit in ['', 'b']:
 870         mult = 1
 871     else:
 872         raise ValueError("invalid unit %r in number %r" % (unit, s))
 873     return int(num*mult)
 874
 875
 876 saved_errors = []
 877 def add_error(e):
 878     """Append an error message to the list of saved errors.
 879
 880     Once processing is able to stop and output the errors, the saved errors are
 881     accessible in the module variable helpers.saved_errors.
 882     """
 883     saved_errors.append(e)
 884     log('%-70s\n' % e)
 885
 886
 887 def clear_errors():
 888     global saved_errors
 889     saved_errors = []
 890
 891
 892 def die_if_errors(msg=None, status=1):
 893     global saved_errors
 894     if saved_errors:
 895         if not msg:
 896             msg = 'warning: %d errors encountered\n' % len(saved_errors)
 897         log(msg)
 898         sys.exit(status)
 899
 900
 901 def handle_ctrl_c():
 902     """Replace the default exception handler for KeyboardInterrupt (Ctrl-C).
 903
 904     The new exception handler will make sure that bup will exit without an ugly
 905     stacktrace when Ctrl-C is hit.
 906     """
 907     oldhook = sys.excepthook
 908     def newhook(exctype, value, traceback):
 909         if exctype == KeyboardInterrupt:
 910             log('\nInterrupted.\n')
 911         else:
 912             return oldhook(exctype, value, traceback)
 913     sys.excepthook = newhook
 914
 915
 916 def columnate(l, prefix):
 917     """Format elements of 'l' in columns with 'prefix' leading each line.
 918
 919     The number of columns is determined automatically based on the string
 920     lengths.
 921     """
 922     binary = isinstance(prefix, bytes)
 923     nothing = b'' if binary else ''
 924     nl = b'\n' if binary else '\n'
 925     if not l:
 926         return nothing
 927     l = l[:]
 928     clen = max(len(s) for s in l)
 929     ncols = (tty_width() - len(prefix)) // (clen + 2)
 930     if ncols <= 1:
 931         ncols = 1
 932         clen = 0
 933     cols = []
 934     while len(l) % ncols:
 935         l.append(nothing)
 936     rows = len(l) // ncols
 937     for s in compat.range(0, len(l), rows):
 938         cols.append(l[s:s+rows])
 939     out = nothing
 940     fmt = b'%-*s' if binary else '%-*s'
 941     for row in zip(*cols):
 942         out += prefix + nothing.join((fmt % (clen+2, s)) for s in row) + nl
 943     return out
 944
 945
 946 def parse_date_or_fatal(str, fatal):
 947     """Parses the given date or calls Option.fatal().
 948     For now we expect a string that contains a float."""
 949     try:
 950         date = float(str)
 951     except ValueError as e:
 952         raise fatal('invalid date format (should be a float): %r' % e)
 953     else:
 954         return date
 955
 956
 957 def parse_excludes(options, fatal):
 958     """Traverse the options and extract all excludes, or call Option.fatal()."""
 959     excluded_paths = []
 960
 961     for flag in options:
 962         (option, parameter) = flag
 963         if option == '--exclude':
 964             excluded_paths.append(resolve_parent(argv_bytes(parameter)))
 965         elif option == '--exclude-from':
 966             try:
 967                 f = open(resolve_parent(argv_bytes(parameter)), 'rb')
 968             except IOError as e:
 969                 raise fatal("couldn't read %r" % parameter)
 970             for exclude_path in f.readlines():
 971                 # FIXME: perhaps this should be rstrip('\n')
 972                 exclude_path = resolve_parent(exclude_path.strip())
 973                 if exclude_path:
 974                     excluded_paths.append(exclude_path)
 975     return sorted(frozenset(excluded_paths))
 976
 977
 978 def parse_rx_excludes(options, fatal):
 979     """Traverse the options and extract all rx excludes, or call
 980     Option.fatal()."""
 981     excluded_patterns = []
 982
 983     for flag in options:
 984         (option, parameter) = flag
 985         if option == '--exclude-rx':
 986             try:
 987                 excluded_patterns.append(re.compile(argv_bytes(parameter)))
 988             except re.error as ex:
 989                 fatal('invalid --exclude-rx pattern (%r): %s' % (parameter, ex))
 990         elif option == '--exclude-rx-from':
 991             try:
 992                 f = open(resolve_parent(parameter), 'rb')
 993             except IOError as e:
 994                 raise fatal("couldn't read %r" % parameter)
 995             for pattern in f.readlines():
 996                 spattern = pattern.rstrip(b'\n')
 997                 if not spattern:
 998                     continue
 999                 try:
1000                     excluded_patterns.append(re.compile(spattern))
1001                 except re.error as ex:
1002                     fatal('invalid --exclude-rx pattern (%r): %s' % (spattern, ex))
1003     return excluded_patterns
1004
1005
1006 def should_rx_exclude_path(path, exclude_rxs):
1007     """Return True if path matches a regular expression in exclude_rxs."""
1008     for rx in exclude_rxs:
1009         if rx.search(path):
1010             debug1('Skipping %r: excluded by rx pattern %r.\n'
1011                    % (path, rx.pattern))
1012             return True
1013     return False
1014
1015
1016 # FIXME: Carefully consider the use of functions (os.path.*, etc.)
1017 # that resolve against the current filesystem in the strip/graft
1018 # functions for example, but elsewhere as well.  I suspect bup's not
1019 # always being careful about that.  For some cases, the contents of
1020 # the current filesystem should be irrelevant, and consulting it might
1021 # produce the wrong result, perhaps via unintended symlink resolution,
1022 # for example.
1023
1024 def path_components(path):
1025     """Break path into a list of pairs of the form (name,
1026     full_path_to_name).  Path must start with '/'.
1027     Example:
1028       '/home/foo' -> [('', '/'), ('home', '/home'), ('foo', '/home/foo')]"""
1029     if not path.startswith(b'/'):
1030         raise Exception('path must start with "/": %s' % path_msg(path))
1031     # Since we assume path startswith('/'), we can skip the first element.
1032     result = [(b'', b'/')]
1033     norm_path = os.path.abspath(path)
1034     if norm_path == b'/':
1035         return result
1036     full_path = b''
1037     for p in norm_path.split(b'/')[1:]:
1038         full_path += b'/' + p
1039         result.append((p, full_path))
1040     return result
1041
1042
1043 def stripped_path_components(path, strip_prefixes):
1044     """Strip any prefix in strip_prefixes from path and return a list
1045     of path components where each component is (name,
1046     none_or_full_fs_path_to_name).  Assume path startswith('/').
1047     See thelpers.py for examples."""
1048     normalized_path = os.path.abspath(path)
1049     sorted_strip_prefixes = sorted(strip_prefixes, key=len, reverse=True)
1050     for bp in sorted_strip_prefixes:
1051         normalized_bp = os.path.abspath(bp)
1052         if normalized_bp == b'/':
1053             continue
1054         if normalized_path.startswith(normalized_bp):
1055             prefix = normalized_path[:len(normalized_bp)]
1056             result = []
1057             for p in normalized_path[len(normalized_bp):].split(b'/'):
1058                 if p: # not root
1059                     prefix += b'/'
1060                 prefix += p
1061                 result.append((p, prefix))
1062             return result
1063     # Nothing to strip.
1064     return path_components(path)
1065
1066
1067 def grafted_path_components(graft_points, path):
1068     # Create a result that consists of some number of faked graft
1069     # directories before the graft point, followed by all of the real
1070     # directories from path that are after the graft point.  Arrange
1071     # for the directory at the graft point in the result to correspond
1072     # to the "orig" directory in --graft orig=new.  See t/thelpers.py
1073     # for some examples.
1074
1075     # Note that given --graft orig=new, orig and new have *nothing* to
1076     # do with each other, even if some of their component names
1077     # match. i.e. --graft /foo/bar/baz=/foo/bar/bax is semantically
1078     # equivalent to --graft /foo/bar/baz=/x/y/z, or even
1079     # /foo/bar/baz=/x.
1080
1081     # FIXME: This can't be the best solution...
1082     clean_path = os.path.abspath(path)
1083     for graft_point in graft_points:
1084         old_prefix, new_prefix = graft_point
1085         # Expand prefixes iff not absolute paths.
1086         old_prefix = os.path.normpath(old_prefix)
1087         new_prefix = os.path.normpath(new_prefix)
1088         if clean_path.startswith(old_prefix):
1089             escaped_prefix = re.escape(old_prefix)
1090             grafted_path = re.sub(br'^' + escaped_prefix, new_prefix, clean_path)
1091             # Handle /foo=/ (at least) -- which produces //whatever.
1092             grafted_path = b'/' + grafted_path.lstrip(b'/')
1093             clean_path_components = path_components(clean_path)
1094             # Count the components that were stripped.
1095             strip_count = 0 if old_prefix == b'/' else old_prefix.count(b'/')
1096             new_prefix_parts = new_prefix.split(b'/')
1097             result_prefix = grafted_path.split(b'/')[:new_prefix.count(b'/')]
1098             result = [(p, None) for p in result_prefix] \
1099                 + clean_path_components[strip_count:]
1100             # Now set the graft point name to match the end of new_prefix.
1101             graft_point = len(result_prefix)
1102             result[graft_point] = \
1103                 (new_prefix_parts[-1], clean_path_components[strip_count][1])
1104             if new_prefix == b'/': # --graft ...=/ is a special case.
1105                 return result[1:]
1106             return result
1107     return path_components(clean_path)
1108
1109
1110 Sha1 = hashlib.sha1
1111
1112
1113 _localtime = getattr(_helpers, 'localtime', None)
1114
1115 if _localtime:
1116     bup_time = namedtuple('bup_time', ['tm_year', 'tm_mon', 'tm_mday',
1117                                        'tm_hour', 'tm_min', 'tm_sec',
1118                                        'tm_wday', 'tm_yday',
1119                                        'tm_isdst', 'tm_gmtoff', 'tm_zone'])
1120
1121 # Define a localtime() that returns bup_time when possible.  Note:
1122 # this means that any helpers.localtime() results may need to be
1123 # passed through to_py_time() before being passed to python's time
1124 # module, which doesn't appear willing to ignore the extra items.
1125 if _localtime:
1126     def localtime(time):
1127         return bup_time(*_helpers.localtime(floor(time)))
1128     def utc_offset_str(t):
1129         """Return the local offset from UTC as "+hhmm" or "-hhmm" for time t.
1130         If the current UTC offset does not represent an integer number
1131         of minutes, the fractional component will be truncated."""
1132         off = localtime(t).tm_gmtoff
1133         # Note: // doesn't truncate like C for negative values, it rounds down.
1134         offmin = abs(off) // 60
1135         m = offmin % 60
1136         h = (offmin - m) // 60
1137         return b'%+03d%02d' % (-h if off < 0 else h, m)
1138     def to_py_time(x):
1139         if isinstance(x, time.struct_time):
1140             return x
1141         return time.struct_time(x[:9])
1142 else:
1143     localtime = time.localtime
1144     def utc_offset_str(t):
1145         return time.strftime(b'%z', localtime(t))
1146     def to_py_time(x):
1147         return x
1148
1149
1150 _some_invalid_save_parts_rx = re.compile(br'[\[ ~^:?*\\]|\.\.|//|@{')
1151
1152 def valid_save_name(name):
1153     # Enforce a superset of the restrictions in git-check-ref-format(1)
1154     if name == b'@' \
1155        or name.startswith(b'/') or name.endswith(b'/') \
1156        or name.endswith(b'.'):
1157         return False
1158     if _some_invalid_save_parts_rx.search(name):
1159         return False
1160     for c in name:
1161         if byte_int(c) < 0x20 or byte_int(c) == 0x7f:
1162             return False
1163     for part in name.split(b'/'):
1164         if part.startswith(b'.') or part.endswith(b'.lock'):
1165             return False
1166     return True
1167
1168
1169 _period_rx = re.compile(r'^([0-9]+)(s|min|h|d|w|m|y)$')
1170
1171 def period_as_secs(s):
1172     if s == 'forever':
1173         return float('inf')
1174     match = _period_rx.match(s)
1175     if not match:
1176         return None
1177     mag = int(match.group(1))
1178     scale = match.group(2)
1179     return mag * {'s': 1,
1180                   'min': 60,
1181                   'h': 60 * 60,
1182                   'd': 60 * 60 * 24,
1183                   'w': 60 * 60 * 24 * 7,
1184                   'm': 60 * 60 * 24 * 31,
1185                   'y': 60 * 60 * 24 * 366}[scale]