lib/bup/helpers.py

   1 """Helper functions and classes for bup."""
   2
   3 from __future__ import absolute_import
   4 from collections import namedtuple
   5 from contextlib import contextmanager
   6 from ctypes import sizeof, c_void_p
   7 from os import environ
   8 from pipes import quote
   9 from subprocess import PIPE, Popen
  10 import sys, os, pwd, subprocess, errno, socket, select, mmap, stat, re, struct
  11 import hashlib, heapq, math, operator, time, grp, tempfile
  12
  13 from bup import _helpers
  14 from bup import compat
  15 # This function should really be in helpers, not in bup.options.  But we
  16 # want options.py to be standalone so people can include it in other projects.
  17 from bup.options import _tty_width as tty_width
  18
  19
  20 class Nonlocal:
  21     """Helper to deal with Python scoping issues"""
  22     pass
  23
  24
  25 sc_page_size = os.sysconf('SC_PAGE_SIZE')
  26 assert(sc_page_size > 0)
  27
  28 sc_arg_max = os.sysconf('SC_ARG_MAX')
  29 if sc_arg_max == -1:  # "no definite limit" - let's choose 2M
  30     sc_arg_max = 2 * 1024 * 1024
  31
  32 def last(iterable):
  33     result = None
  34     for result in iterable:
  35         pass
  36     return result
  37
  38
  39 def atoi(s):
  40     """Convert the string 's' to an integer. Return 0 if s is not a number."""
  41     try:
  42         return int(s or '0')
  43     except ValueError:
  44         return 0
  45
  46
  47 def atof(s):
  48     """Convert the string 's' to a float. Return 0 if s is not a number."""
  49     try:
  50         return float(s or '0')
  51     except ValueError:
  52         return 0
  53
  54
  55 buglvl = atoi(os.environ.get('BUP_DEBUG', 0))
  56
  57
  58 try:
  59     _fdatasync = os.fdatasync
  60 except AttributeError:
  61     _fdatasync = os.fsync
  62
  63 if sys.platform.startswith('darwin'):
  64     # Apparently os.fsync on OS X doesn't guarantee to sync all the way down
  65     import fcntl
  66     def fdatasync(fd):
  67         try:
  68             return fcntl.fcntl(fd, fcntl.F_FULLFSYNC)
  69         except IOError as e:
  70             # Fallback for file systems (SMB) that do not support F_FULLFSYNC
  71             if e.errno == errno.ENOTSUP:
  72                 return _fdatasync(fd)
  73             else:
  74                 raise
  75 else:
  76     fdatasync = _fdatasync
  77
  78
  79 def partition(predicate, stream):
  80     """Returns (leading_matches_it, rest_it), where leading_matches_it
  81     must be completely exhausted before traversing rest_it.
  82
  83     """
  84     stream = iter(stream)
  85     ns = Nonlocal()
  86     ns.first_nonmatch = None
  87     def leading_matches():
  88         for x in stream:
  89             if predicate(x):
  90                 yield x
  91             else:
  92                 ns.first_nonmatch = (x,)
  93                 break
  94     def rest():
  95         if ns.first_nonmatch:
  96             yield ns.first_nonmatch[0]
  97             for x in stream:
  98                 yield x
  99     return (leading_matches(), rest())
 100
 101
 102 def lines_until_sentinel(f, sentinel, ex_type):
 103     # sentinel must end with \n and must contain only one \n
 104     while True:
 105         line = f.readline()
 106         if not (line and line.endswith('\n')):
 107             raise ex_type('Hit EOF while reading line')
 108         if line == sentinel:
 109             return
 110         yield line
 111
 112
 113 def stat_if_exists(path):
 114     try:
 115         return os.stat(path)
 116     except OSError as e:
 117         if e.errno != errno.ENOENT:
 118             raise
 119     return None
 120
 121
 122 # Write (blockingly) to sockets that may or may not be in blocking mode.
 123 # We need this because our stderr is sometimes eaten by subprocesses
 124 # (probably ssh) that sometimes make it nonblocking, if only temporarily,
 125 # leading to race conditions.  Ick.  We'll do it the hard way.
 126 def _hard_write(fd, buf):
 127     while buf:
 128         (r,w,x) = select.select([], [fd], [], None)
 129         if not w:
 130             raise IOError('select(fd) returned without being writable')
 131         try:
 132             sz = os.write(fd, buf)
 133         except OSError as e:
 134             if e.errno != errno.EAGAIN:
 135                 raise
 136         assert(sz >= 0)
 137         buf = buf[sz:]
 138
 139
 140 _last_prog = 0
 141 def log(s):
 142     """Print a log message to stderr."""
 143     global _last_prog
 144     sys.stdout.flush()
 145     _hard_write(sys.stderr.fileno(), s)
 146     _last_prog = 0
 147
 148
 149 def debug1(s):
 150     if buglvl >= 1:
 151         log(s)
 152
 153
 154 def debug2(s):
 155     if buglvl >= 2:
 156         log(s)
 157
 158
 159 istty1 = os.isatty(1) or (atoi(os.environ.get('BUP_FORCE_TTY')) & 1)
 160 istty2 = os.isatty(2) or (atoi(os.environ.get('BUP_FORCE_TTY')) & 2)
 161 _last_progress = ''
 162 def progress(s):
 163     """Calls log() if stderr is a TTY.  Does nothing otherwise."""
 164     global _last_progress
 165     if istty2:
 166         log(s)
 167         _last_progress = s
 168
 169
 170 def qprogress(s):
 171     """Calls progress() only if we haven't printed progress in a while.
 172
 173     This avoids overloading the stderr buffer with excess junk.
 174     """
 175     global _last_prog
 176     now = time.time()
 177     if now - _last_prog > 0.1:
 178         progress(s)
 179         _last_prog = now
 180
 181
 182 def reprogress():
 183     """Calls progress() to redisplay the most recent progress message.
 184
 185     Useful after you've printed some other message that wipes out the
 186     progress line.
 187     """
 188     if _last_progress and _last_progress.endswith('\r'):
 189         progress(_last_progress)
 190
 191
 192 def mkdirp(d, mode=None):
 193     """Recursively create directories on path 'd'.
 194
 195     Unlike os.makedirs(), it doesn't raise an exception if the last element of
 196     the path already exists.
 197     """
 198     try:
 199         if mode:
 200             os.makedirs(d, mode)
 201         else:
 202             os.makedirs(d)
 203     except OSError as e:
 204         if e.errno == errno.EEXIST:
 205             pass
 206         else:
 207             raise
 208
 209
 210 def merge_iter(iters, pfreq, pfunc, pfinal, key=None):
 211     if key:
 212         samekey = lambda e, pe: getattr(e, key) == getattr(pe, key, None)
 213     else:
 214         samekey = operator.eq
 215     count = 0
 216     total = sum(len(it) for it in iters)
 217     iters = (iter(it) for it in iters)
 218     heap = ((next(it, None),it) for it in iters)
 219     heap = [(e,it) for e,it in heap if e]
 220
 221     heapq.heapify(heap)
 222     pe = None
 223     while heap:
 224         if not count % pfreq:
 225             pfunc(count, total)
 226         e, it = heap[0]
 227         if not samekey(e, pe):
 228             pe = e
 229             yield e
 230         count += 1
 231         try:
 232             e = next(it)
 233         except StopIteration:
 234             heapq.heappop(heap) # remove current
 235         else:
 236             heapq.heapreplace(heap, (e, it)) # shift current to new location
 237     pfinal(count, total)
 238
 239
 240 def unlink(f):
 241     """Delete a file at path 'f' if it currently exists.
 242
 243     Unlike os.unlink(), does not throw an exception if the file didn't already
 244     exist.
 245     """
 246     try:
 247         os.unlink(f)
 248     except OSError as e:
 249         if e.errno != errno.ENOENT:
 250             raise
 251
 252
 253 def shstr(cmd):
 254     if isinstance(cmd, compat.str_type):
 255         return cmd
 256     else:
 257         return ' '.join(map(quote, cmd))
 258
 259 exc = subprocess.check_call
 260
 261 def exo(cmd,
 262         input=None,
 263         stdin=None,
 264         stderr=None,
 265         shell=False,
 266         check=True,
 267         preexec_fn=None):
 268     if input:
 269         assert stdin in (None, PIPE)
 270         stdin = PIPE
 271     p = Popen(cmd,
 272               stdin=stdin, stdout=PIPE, stderr=stderr,
 273               shell=shell,
 274               preexec_fn=preexec_fn)
 275     out, err = p.communicate(input)
 276     if check and p.returncode != 0:
 277         raise Exception('subprocess %r failed with status %d, stderr: %r'
 278                         % (' '.join(map(quote, cmd)), p.returncode, err))
 279     return out, err, p
 280
 281 def readpipe(argv, preexec_fn=None, shell=False):
 282     """Run a subprocess and return its output."""
 283     p = subprocess.Popen(argv, stdout=subprocess.PIPE, preexec_fn=preexec_fn,
 284                          shell=shell)
 285     out, err = p.communicate()
 286     if p.returncode != 0:
 287         raise Exception('subprocess %r failed with status %d'
 288                         % (' '.join(argv), p.returncode))
 289     return out
 290
 291
 292 def _argmax_base(command):
 293     base_size = 2048
 294     for c in command:
 295         base_size += len(command) + 1
 296     for k, v in compat.items(environ):
 297         base_size += len(k) + len(v) + 2 + sizeof(c_void_p)
 298     return base_size
 299
 300
 301 def _argmax_args_size(args):
 302     return sum(len(x) + 1 + sizeof(c_void_p) for x in args)
 303
 304
 305 def batchpipe(command, args, preexec_fn=None, arg_max=sc_arg_max):
 306     """If args is not empty, yield the output produced by calling the
 307 command list with args as a sequence of strings (It may be necessary
 308 to return multiple strings in order to respect ARG_MAX)."""
 309     # The optional arg_max arg is a workaround for an issue with the
 310     # current wvtest behavior.
 311     base_size = _argmax_base(command)
 312     while args:
 313         room = arg_max - base_size
 314         i = 0
 315         while i < len(args):
 316             next_size = _argmax_args_size(args[i:i+1])
 317             if room - next_size < 0:
 318                 break
 319             room -= next_size
 320             i += 1
 321         sub_args = args[:i]
 322         args = args[i:]
 323         assert(len(sub_args))
 324         yield readpipe(command + sub_args, preexec_fn=preexec_fn)
 325
 326
 327 def resolve_parent(p):
 328     """Return the absolute path of a file without following any final symlink.
 329
 330     Behaves like os.path.realpath, but doesn't follow a symlink for the last
 331     element. (ie. if 'p' itself is a symlink, this one won't follow it, but it
 332     will follow symlinks in p's directory)
 333     """
 334     try:
 335         st = os.lstat(p)
 336     except OSError:
 337         st = None
 338     if st and stat.S_ISLNK(st.st_mode):
 339         (dir, name) = os.path.split(p)
 340         dir = os.path.realpath(dir)
 341         out = os.path.join(dir, name)
 342     else:
 343         out = os.path.realpath(p)
 344     #log('realpathing:%r,%r\n' % (p, out))
 345     return out
 346
 347
 348 def detect_fakeroot():
 349     "Return True if we appear to be running under fakeroot."
 350     return os.getenv("FAKEROOTKEY") != None
 351
 352
 353 if sys.platform.startswith('cygwin'):
 354     def is_superuser():
 355         # https://cygwin.com/ml/cygwin/2015-02/msg00057.html
 356         groups = os.getgroups()
 357         return 544 in groups or 0 in groups
 358 else:
 359     def is_superuser():
 360         return os.geteuid() == 0
 361
 362
 363 def _cache_key_value(get_value, key, cache):
 364     """Return (value, was_cached).  If there is a value in the cache
 365     for key, use that, otherwise, call get_value(key) which should
 366     throw a KeyError if there is no value -- in which case the cached
 367     and returned value will be None.
 368     """
 369     try: # Do we already have it (or know there wasn't one)?
 370         value = cache[key]
 371         return value, True
 372     except KeyError:
 373         pass
 374     value = None
 375     try:
 376         cache[key] = value = get_value(key)
 377     except KeyError:
 378         cache[key] = None
 379     return value, False
 380
 381
 382 _uid_to_pwd_cache = {}
 383 _name_to_pwd_cache = {}
 384
 385 def pwd_from_uid(uid):
 386     """Return password database entry for uid (may be a cached value).
 387     Return None if no entry is found.
 388     """
 389     global _uid_to_pwd_cache, _name_to_pwd_cache
 390     entry, cached = _cache_key_value(pwd.getpwuid, uid, _uid_to_pwd_cache)
 391     if entry and not cached:
 392         _name_to_pwd_cache[entry.pw_name] = entry
 393     return entry
 394
 395
 396 def pwd_from_name(name):
 397     """Return password database entry for name (may be a cached value).
 398     Return None if no entry is found.
 399     """
 400     global _uid_to_pwd_cache, _name_to_pwd_cache
 401     entry, cached = _cache_key_value(pwd.getpwnam, name, _name_to_pwd_cache)
 402     if entry and not cached:
 403         _uid_to_pwd_cache[entry.pw_uid] = entry
 404     return entry
 405
 406
 407 _gid_to_grp_cache = {}
 408 _name_to_grp_cache = {}
 409
 410 def grp_from_gid(gid):
 411     """Return password database entry for gid (may be a cached value).
 412     Return None if no entry is found.
 413     """
 414     global _gid_to_grp_cache, _name_to_grp_cache
 415     entry, cached = _cache_key_value(grp.getgrgid, gid, _gid_to_grp_cache)
 416     if entry and not cached:
 417         _name_to_grp_cache[entry.gr_name] = entry
 418     return entry
 419
 420
 421 def grp_from_name(name):
 422     """Return password database entry for name (may be a cached value).
 423     Return None if no entry is found.
 424     """
 425     global _gid_to_grp_cache, _name_to_grp_cache
 426     entry, cached = _cache_key_value(grp.getgrnam, name, _name_to_grp_cache)
 427     if entry and not cached:
 428         _gid_to_grp_cache[entry.gr_gid] = entry
 429     return entry
 430
 431
 432 _username = None
 433 def username():
 434     """Get the user's login name."""
 435     global _username
 436     if not _username:
 437         uid = os.getuid()
 438         _username = pwd_from_uid(uid)[0] or 'user%d' % uid
 439     return _username
 440
 441
 442 _userfullname = None
 443 def userfullname():
 444     """Get the user's full name."""
 445     global _userfullname
 446     if not _userfullname:
 447         uid = os.getuid()
 448         entry = pwd_from_uid(uid)
 449         if entry:
 450             _userfullname = entry[4].split(',')[0] or entry[0]
 451         if not _userfullname:
 452             _userfullname = 'user%d' % uid
 453     return _userfullname
 454
 455
 456 _hostname = None
 457 def hostname():
 458     """Get the FQDN of this machine."""
 459     global _hostname
 460     if not _hostname:
 461         _hostname = socket.getfqdn()
 462     return _hostname
 463
 464
 465 _resource_path = None
 466 def resource_path(subdir=''):
 467     global _resource_path
 468     if not _resource_path:
 469         _resource_path = os.environ.get('BUP_RESOURCE_PATH') or '.'
 470     return os.path.join(_resource_path, subdir)
 471
 472 def format_filesize(size):
 473     unit = 1024.0
 474     size = float(size)
 475     if size < unit:
 476         return "%d" % (size)
 477     exponent = int(math.log(size) / math.log(unit))
 478     size_prefix = "KMGTPE"[exponent - 1]
 479     return "%.1f%s" % (size / math.pow(unit, exponent), size_prefix)
 480
 481
 482 class NotOk(Exception):
 483     pass
 484
 485
 486 class BaseConn:
 487     def __init__(self, outp):
 488         self.outp = outp
 489
 490     def close(self):
 491         while self._read(65536): pass
 492
 493     def read(self, size):
 494         """Read 'size' bytes from input stream."""
 495         self.outp.flush()
 496         return self._read(size)
 497
 498     def readline(self):
 499         """Read from input stream until a newline is found."""
 500         self.outp.flush()
 501         return self._readline()
 502
 503     def write(self, data):
 504         """Write 'data' to output stream."""
 505         #log('%d writing: %d bytes\n' % (os.getpid(), len(data)))
 506         self.outp.write(data)
 507
 508     def has_input(self):
 509         """Return true if input stream is readable."""
 510         raise NotImplemented("Subclasses must implement has_input")
 511
 512     def ok(self):
 513         """Indicate end of output from last sent command."""
 514         self.write('\nok\n')
 515
 516     def error(self, s):
 517         """Indicate server error to the client."""
 518         s = re.sub(r'\s+', ' ', str(s))
 519         self.write('\nerror %s\n' % s)
 520
 521     def _check_ok(self, onempty):
 522         self.outp.flush()
 523         rl = ''
 524         for rl in linereader(self):
 525             #log('%d got line: %r\n' % (os.getpid(), rl))
 526             if not rl:  # empty line
 527                 continue
 528             elif rl == 'ok':
 529                 return None
 530             elif rl.startswith('error '):
 531                 #log('client: error: %s\n' % rl[6:])
 532                 return NotOk(rl[6:])
 533             else:
 534                 onempty(rl)
 535         raise Exception('server exited unexpectedly; see errors above')
 536
 537     def drain_and_check_ok(self):
 538         """Remove all data for the current command from input stream."""
 539         def onempty(rl):
 540             pass
 541         return self._check_ok(onempty)
 542
 543     def check_ok(self):
 544         """Verify that server action completed successfully."""
 545         def onempty(rl):
 546             raise Exception('expected "ok", got %r' % rl)
 547         return self._check_ok(onempty)
 548
 549
 550 class Conn(BaseConn):
 551     def __init__(self, inp, outp):
 552         BaseConn.__init__(self, outp)
 553         self.inp = inp
 554
 555     def _read(self, size):
 556         return self.inp.read(size)
 557
 558     def _readline(self):
 559         return self.inp.readline()
 560
 561     def has_input(self):
 562         [rl, wl, xl] = select.select([self.inp.fileno()], [], [], 0)
 563         if rl:
 564             assert(rl[0] == self.inp.fileno())
 565             return True
 566         else:
 567             return None
 568
 569
 570 def checked_reader(fd, n):
 571     while n > 0:
 572         rl, _, _ = select.select([fd], [], [])
 573         assert(rl[0] == fd)
 574         buf = os.read(fd, n)
 575         if not buf: raise Exception("Unexpected EOF reading %d more bytes" % n)
 576         yield buf
 577         n -= len(buf)
 578
 579
 580 MAX_PACKET = 128 * 1024
 581 def mux(p, outfd, outr, errr):
 582     try:
 583         fds = [outr, errr]
 584         while p.poll() is None:
 585             rl, _, _ = select.select(fds, [], [])
 586             for fd in rl:
 587                 if fd == outr:
 588                     buf = os.read(outr, MAX_PACKET)
 589                     if not buf: break
 590                     os.write(outfd, struct.pack('!IB', len(buf), 1) + buf)
 591                 elif fd == errr:
 592                     buf = os.read(errr, 1024)
 593                     if not buf: break
 594                     os.write(outfd, struct.pack('!IB', len(buf), 2) + buf)
 595     finally:
 596         os.write(outfd, struct.pack('!IB', 0, 3))
 597
 598
 599 class DemuxConn(BaseConn):
 600     """A helper class for bup's client-server protocol."""
 601     def __init__(self, infd, outp):
 602         BaseConn.__init__(self, outp)
 603         # Anything that comes through before the sync string was not
 604         # multiplexed and can be assumed to be debug/log before mux init.
 605         tail = ''
 606         while tail != 'BUPMUX':
 607             b = os.read(infd, (len(tail) < 6) and (6-len(tail)) or 1)
 608             if not b:
 609                 raise IOError('demux: unexpected EOF during initialization')
 610             tail += b
 611             sys.stderr.write(tail[:-6])  # pre-mux log messages
 612             tail = tail[-6:]
 613         self.infd = infd
 614         self.reader = None
 615         self.buf = None
 616         self.closed = False
 617
 618     def write(self, data):
 619         self._load_buf(0)
 620         BaseConn.write(self, data)
 621
 622     def _next_packet(self, timeout):
 623         if self.closed: return False
 624         rl, wl, xl = select.select([self.infd], [], [], timeout)
 625         if not rl: return False
 626         assert(rl[0] == self.infd)
 627         ns = ''.join(checked_reader(self.infd, 5))
 628         n, fdw = struct.unpack('!IB', ns)
 629         assert(n <= MAX_PACKET)
 630         if fdw == 1:
 631             self.reader = checked_reader(self.infd, n)
 632         elif fdw == 2:
 633             for buf in checked_reader(self.infd, n):
 634                 sys.stderr.write(buf)
 635         elif fdw == 3:
 636             self.closed = True
 637             debug2("DemuxConn: marked closed\n")
 638         return True
 639
 640     def _load_buf(self, timeout):
 641         if self.buf is not None:
 642             return True
 643         while not self.closed:
 644             while not self.reader:
 645                 if not self._next_packet(timeout):
 646                     return False
 647             try:
 648                 self.buf = next(self.reader)
 649                 return True
 650             except StopIteration:
 651                 self.reader = None
 652         return False
 653
 654     def _read_parts(self, ix_fn):
 655         while self._load_buf(None):
 656             assert(self.buf is not None)
 657             i = ix_fn(self.buf)
 658             if i is None or i == len(self.buf):
 659                 yv = self.buf
 660                 self.buf = None
 661             else:
 662                 yv = self.buf[:i]
 663                 self.buf = self.buf[i:]
 664             yield yv
 665             if i is not None:
 666                 break
 667
 668     def _readline(self):
 669         def find_eol(buf):
 670             try:
 671                 return buf.index('\n')+1
 672             except ValueError:
 673                 return None
 674         return ''.join(self._read_parts(find_eol))
 675
 676     def _read(self, size):
 677         csize = [size]
 678         def until_size(buf): # Closes on csize
 679             if len(buf) < csize[0]:
 680                 csize[0] -= len(buf)
 681                 return None
 682             else:
 683                 return csize[0]
 684         return ''.join(self._read_parts(until_size))
 685
 686     def has_input(self):
 687         return self._load_buf(0)
 688
 689
 690 def linereader(f):
 691     """Generate a list of input lines from 'f' without terminating newlines."""
 692     while 1:
 693         line = f.readline()
 694         if not line:
 695             break
 696         yield line[:-1]
 697
 698
 699 def chunkyreader(f, count = None):
 700     """Generate a list of chunks of data read from 'f'.
 701
 702     If count is None, read until EOF is reached.
 703
 704     If count is a positive integer, read 'count' bytes from 'f'. If EOF is
 705     reached while reading, raise IOError.
 706     """
 707     if count != None:
 708         while count > 0:
 709             b = f.read(min(count, 65536))
 710             if not b:
 711                 raise IOError('EOF with %d bytes remaining' % count)
 712             yield b
 713             count -= len(b)
 714     else:
 715         while 1:
 716             b = f.read(65536)
 717             if not b: break
 718             yield b
 719
 720
 721 @contextmanager
 722 def atomically_replaced_file(name, mode='w', buffering=-1):
 723     """Yield a file that will be atomically renamed name when leaving the block.
 724
 725     This contextmanager yields an open file object that is backed by a
 726     temporary file which will be renamed (atomically) to the target
 727     name if everything succeeds.
 728
 729     The mode and buffering arguments are handled exactly as with open,
 730     and the yielded file will have very restrictive permissions, as
 731     per mkstemp.
 732
 733     E.g.::
 734
 735         with atomically_replaced_file('foo.txt', 'w') as f:
 736             f.write('hello jack.')
 737
 738     """
 739
 740     (ffd, tempname) = tempfile.mkstemp(dir=os.path.dirname(name),
 741                                        text=('b' not in mode))
 742     try:
 743         try:
 744             f = os.fdopen(ffd, mode, buffering)
 745         except:
 746             os.close(ffd)
 747             raise
 748         try:
 749             yield f
 750         finally:
 751             f.close()
 752         os.rename(tempname, name)
 753     finally:
 754         unlink(tempname)  # nonexistant file is ignored
 755
 756
 757 def slashappend(s):
 758     """Append "/" to 's' if it doesn't aleady end in "/"."""
 759     if s and not s.endswith('/'):
 760         return s + '/'
 761     else:
 762         return s
 763
 764
 765 def _mmap_do(f, sz, flags, prot, close):
 766     if not sz:
 767         st = os.fstat(f.fileno())
 768         sz = st.st_size
 769     if not sz:
 770         # trying to open a zero-length map gives an error, but an empty
 771         # string has all the same behaviour of a zero-length map, ie. it has
 772         # no elements :)
 773         return ''
 774     map = mmap.mmap(f.fileno(), sz, flags, prot)
 775     if close:
 776         f.close()  # map will persist beyond file close
 777     return map
 778
 779
 780 def mmap_read(f, sz = 0, close=True):
 781     """Create a read-only memory mapped region on file 'f'.
 782     If sz is 0, the region will cover the entire file.
 783     """
 784     return _mmap_do(f, sz, mmap.MAP_PRIVATE, mmap.PROT_READ, close)
 785
 786
 787 def mmap_readwrite(f, sz = 0, close=True):
 788     """Create a read-write memory mapped region on file 'f'.
 789     If sz is 0, the region will cover the entire file.
 790     """
 791     return _mmap_do(f, sz, mmap.MAP_SHARED, mmap.PROT_READ|mmap.PROT_WRITE,
 792                     close)
 793
 794
 795 def mmap_readwrite_private(f, sz = 0, close=True):
 796     """Create a read-write memory mapped region on file 'f'.
 797     If sz is 0, the region will cover the entire file.
 798     The map is private, which means the changes are never flushed back to the
 799     file.
 800     """
 801     return _mmap_do(f, sz, mmap.MAP_PRIVATE, mmap.PROT_READ|mmap.PROT_WRITE,
 802                     close)
 803
 804
 805 _mincore = getattr(_helpers, 'mincore', None)
 806 if _mincore:
 807     # ./configure ensures that we're on Linux if MINCORE_INCORE isn't defined.
 808     MINCORE_INCORE = getattr(_helpers, 'MINCORE_INCORE', 1)
 809
 810     _fmincore_chunk_size = None
 811     def _set_fmincore_chunk_size():
 812         global _fmincore_chunk_size
 813         pref_chunk_size = 64 * 1024 * 1024
 814         chunk_size = sc_page_size
 815         if (sc_page_size < pref_chunk_size):
 816             chunk_size = sc_page_size * (pref_chunk_size / sc_page_size)
 817         _fmincore_chunk_size = chunk_size
 818
 819     def fmincore(fd):
 820         """Return the mincore() data for fd as a bytearray whose values can be
 821         tested via MINCORE_INCORE, or None if fd does not fully
 822         support the operation."""
 823         st = os.fstat(fd)
 824         if (st.st_size == 0):
 825             return bytearray(0)
 826         if not _fmincore_chunk_size:
 827             _set_fmincore_chunk_size()
 828         pages_per_chunk = _fmincore_chunk_size / sc_page_size;
 829         page_count = (st.st_size + sc_page_size - 1) / sc_page_size;
 830         chunk_count = page_count / _fmincore_chunk_size
 831         if chunk_count < 1:
 832             chunk_count = 1
 833         result = bytearray(page_count)
 834         for ci in xrange(chunk_count):
 835             pos = _fmincore_chunk_size * ci;
 836             msize = min(_fmincore_chunk_size, st.st_size - pos)
 837             try:
 838                 m = mmap.mmap(fd, msize, mmap.MAP_PRIVATE, 0, 0, pos)
 839             except mmap.error as ex:
 840                 if ex.errno == errno.EINVAL or ex.errno == errno.ENODEV:
 841                     # Perhaps the file was a pipe, i.e. "... | bup split ..."
 842                     return None
 843                 raise ex
 844             try:
 845                 _mincore(m, msize, 0, result, ci * pages_per_chunk)
 846             except OSError as ex:
 847                 if ex.errno == errno.ENOSYS:
 848                     return None
 849                 raise
 850         return result
 851
 852
 853 def parse_timestamp(epoch_str):
 854     """Return the number of nanoseconds since the epoch that are described
 855 by epoch_str (100ms, 100ns, ...); when epoch_str cannot be parsed,
 856 throw a ValueError that may contain additional information."""
 857     ns_per = {'s' :  1000000000,
 858               'ms' : 1000000,
 859               'us' : 1000,
 860               'ns' : 1}
 861     match = re.match(r'^((?:[-+]?[0-9]+)?)(s|ms|us|ns)$', epoch_str)
 862     if not match:
 863         if re.match(r'^([-+]?[0-9]+)$', epoch_str):
 864             raise ValueError('must include units, i.e. 100ns, 100ms, ...')
 865         raise ValueError()
 866     (n, units) = match.group(1, 2)
 867     if not n:
 868         n = 1
 869     n = int(n)
 870     return n * ns_per[units]
 871
 872
 873 def parse_num(s):
 874     """Parse data size information into a float number.
 875
 876     Here are some examples of conversions:
 877         199.2k means 203981 bytes
 878         1GB means 1073741824 bytes
 879         2.1 tb means 2199023255552 bytes
 880     """
 881     g = re.match(r'([-+\d.e]+)\s*(\w*)', str(s))
 882     if not g:
 883         raise ValueError("can't parse %r as a number" % s)
 884     (val, unit) = g.groups()
 885     num = float(val)
 886     unit = unit.lower()
 887     if unit in ['t', 'tb']:
 888         mult = 1024*1024*1024*1024
 889     elif unit in ['g', 'gb']:
 890         mult = 1024*1024*1024
 891     elif unit in ['m', 'mb']:
 892         mult = 1024*1024
 893     elif unit in ['k', 'kb']:
 894         mult = 1024
 895     elif unit in ['', 'b']:
 896         mult = 1
 897     else:
 898         raise ValueError("invalid unit %r in number %r" % (unit, s))
 899     return int(num*mult)
 900
 901
 902 def count(l):
 903     """Count the number of elements in an iterator. (consumes the iterator)"""
 904     return reduce(lambda x,y: x+1, l)
 905
 906
 907 saved_errors = []
 908 def add_error(e):
 909     """Append an error message to the list of saved errors.
 910
 911     Once processing is able to stop and output the errors, the saved errors are
 912     accessible in the module variable helpers.saved_errors.
 913     """
 914     saved_errors.append(e)
 915     log('%-70s\n' % e)
 916
 917
 918 def clear_errors():
 919     global saved_errors
 920     saved_errors = []
 921
 922
 923 def die_if_errors(msg=None, status=1):
 924     global saved_errors
 925     if saved_errors:
 926         if not msg:
 927             msg = 'warning: %d errors encountered\n' % len(saved_errors)
 928         log(msg)
 929         sys.exit(status)
 930
 931
 932 def handle_ctrl_c():
 933     """Replace the default exception handler for KeyboardInterrupt (Ctrl-C).
 934
 935     The new exception handler will make sure that bup will exit without an ugly
 936     stacktrace when Ctrl-C is hit.
 937     """
 938     oldhook = sys.excepthook
 939     def newhook(exctype, value, traceback):
 940         if exctype == KeyboardInterrupt:
 941             log('\nInterrupted.\n')
 942         else:
 943             return oldhook(exctype, value, traceback)
 944     sys.excepthook = newhook
 945
 946
 947 def columnate(l, prefix):
 948     """Format elements of 'l' in columns with 'prefix' leading each line.
 949
 950     The number of columns is determined automatically based on the string
 951     lengths.
 952     """
 953     if not l:
 954         return ""
 955     l = l[:]
 956     clen = max(len(s) for s in l)
 957     ncols = (tty_width() - len(prefix)) // (clen + 2)
 958     if ncols <= 1:
 959         ncols = 1
 960         clen = 0
 961     cols = []
 962     while len(l) % ncols:
 963         l.append('')
 964     rows = len(l) // ncols
 965     for s in range(0, len(l), rows):
 966         cols.append(l[s:s+rows])
 967     out = ''
 968     for row in zip(*cols):
 969         out += prefix + ''.join(('%-*s' % (clen+2, s)) for s in row) + '\n'
 970     return out
 971
 972
 973 def parse_date_or_fatal(str, fatal):
 974     """Parses the given date or calls Option.fatal().
 975     For now we expect a string that contains a float."""
 976     try:
 977         date = float(str)
 978     except ValueError as e:
 979         raise fatal('invalid date format (should be a float): %r' % e)
 980     else:
 981         return date
 982
 983
 984 def parse_excludes(options, fatal):
 985     """Traverse the options and extract all excludes, or call Option.fatal()."""
 986     excluded_paths = []
 987
 988     for flag in options:
 989         (option, parameter) = flag
 990         if option == '--exclude':
 991             excluded_paths.append(resolve_parent(parameter))
 992         elif option == '--exclude-from':
 993             try:
 994                 f = open(resolve_parent(parameter))
 995             except IOError as e:
 996                 raise fatal("couldn't read %s" % parameter)
 997             for exclude_path in f.readlines():
 998                 # FIXME: perhaps this should be rstrip('\n')
 999                 exclude_path = resolve_parent(exclude_path.strip())
1000                 if exclude_path:
1001                     excluded_paths.append(exclude_path)
1002     return sorted(frozenset(excluded_paths))
1003
1004
1005 def parse_rx_excludes(options, fatal):
1006     """Traverse the options and extract all rx excludes, or call
1007     Option.fatal()."""
1008     excluded_patterns = []
1009
1010     for flag in options:
1011         (option, parameter) = flag
1012         if option == '--exclude-rx':
1013             try:
1014                 excluded_patterns.append(re.compile(parameter))
1015             except re.error as ex:
1016                 fatal('invalid --exclude-rx pattern (%s): %s' % (parameter, ex))
1017         elif option == '--exclude-rx-from':
1018             try:
1019                 f = open(resolve_parent(parameter))
1020             except IOError as e:
1021                 raise fatal("couldn't read %s" % parameter)
1022             for pattern in f.readlines():
1023                 spattern = pattern.rstrip('\n')
1024                 if not spattern:
1025                     continue
1026                 try:
1027                     excluded_patterns.append(re.compile(spattern))
1028                 except re.error as ex:
1029                     fatal('invalid --exclude-rx pattern (%s): %s' % (spattern, ex))
1030     return excluded_patterns
1031
1032
1033 def should_rx_exclude_path(path, exclude_rxs):
1034     """Return True if path matches a regular expression in exclude_rxs."""
1035     for rx in exclude_rxs:
1036         if rx.search(path):
1037             debug1('Skipping %r: excluded by rx pattern %r.\n'
1038                    % (path, rx.pattern))
1039             return True
1040     return False
1041
1042
1043 # FIXME: Carefully consider the use of functions (os.path.*, etc.)
1044 # that resolve against the current filesystem in the strip/graft
1045 # functions for example, but elsewhere as well.  I suspect bup's not
1046 # always being careful about that.  For some cases, the contents of
1047 # the current filesystem should be irrelevant, and consulting it might
1048 # produce the wrong result, perhaps via unintended symlink resolution,
1049 # for example.
1050
1051 def path_components(path):
1052     """Break path into a list of pairs of the form (name,
1053     full_path_to_name).  Path must start with '/'.
1054     Example:
1055       '/home/foo' -> [('', '/'), ('home', '/home'), ('foo', '/home/foo')]"""
1056     if not path.startswith('/'):
1057         raise Exception('path must start with "/": %s' % path)
1058     # Since we assume path startswith('/'), we can skip the first element.
1059     result = [('', '/')]
1060     norm_path = os.path.abspath(path)
1061     if norm_path == '/':
1062         return result
1063     full_path = ''
1064     for p in norm_path.split('/')[1:]:
1065         full_path += '/' + p
1066         result.append((p, full_path))
1067     return result
1068
1069
1070 def stripped_path_components(path, strip_prefixes):
1071     """Strip any prefix in strip_prefixes from path and return a list
1072     of path components where each component is (name,
1073     none_or_full_fs_path_to_name).  Assume path startswith('/').
1074     See thelpers.py for examples."""
1075     normalized_path = os.path.abspath(path)
1076     sorted_strip_prefixes = sorted(strip_prefixes, key=len, reverse=True)
1077     for bp in sorted_strip_prefixes:
1078         normalized_bp = os.path.abspath(bp)
1079         if normalized_bp == '/':
1080             continue
1081         if normalized_path.startswith(normalized_bp):
1082             prefix = normalized_path[:len(normalized_bp)]
1083             result = []
1084             for p in normalized_path[len(normalized_bp):].split('/'):
1085                 if p: # not root
1086                     prefix += '/'
1087                 prefix += p
1088                 result.append((p, prefix))
1089             return result
1090     # Nothing to strip.
1091     return path_components(path)
1092
1093
1094 def grafted_path_components(graft_points, path):
1095     # Create a result that consists of some number of faked graft
1096     # directories before the graft point, followed by all of the real
1097     # directories from path that are after the graft point.  Arrange
1098     # for the directory at the graft point in the result to correspond
1099     # to the "orig" directory in --graft orig=new.  See t/thelpers.py
1100     # for some examples.
1101
1102     # Note that given --graft orig=new, orig and new have *nothing* to
1103     # do with each other, even if some of their component names
1104     # match. i.e. --graft /foo/bar/baz=/foo/bar/bax is semantically
1105     # equivalent to --graft /foo/bar/baz=/x/y/z, or even
1106     # /foo/bar/baz=/x.
1107
1108     # FIXME: This can't be the best solution...
1109     clean_path = os.path.abspath(path)
1110     for graft_point in graft_points:
1111         old_prefix, new_prefix = graft_point
1112         # Expand prefixes iff not absolute paths.
1113         old_prefix = os.path.normpath(old_prefix)
1114         new_prefix = os.path.normpath(new_prefix)
1115         if clean_path.startswith(old_prefix):
1116             escaped_prefix = re.escape(old_prefix)
1117             grafted_path = re.sub(r'^' + escaped_prefix, new_prefix, clean_path)
1118             # Handle /foo=/ (at least) -- which produces //whatever.
1119             grafted_path = '/' + grafted_path.lstrip('/')
1120             clean_path_components = path_components(clean_path)
1121             # Count the components that were stripped.
1122             strip_count = 0 if old_prefix == '/' else old_prefix.count('/')
1123             new_prefix_parts = new_prefix.split('/')
1124             result_prefix = grafted_path.split('/')[:new_prefix.count('/')]
1125             result = [(p, None) for p in result_prefix] \
1126                 + clean_path_components[strip_count:]
1127             # Now set the graft point name to match the end of new_prefix.
1128             graft_point = len(result_prefix)
1129             result[graft_point] = \
1130                 (new_prefix_parts[-1], clean_path_components[strip_count][1])
1131             if new_prefix == '/': # --graft ...=/ is a special case.
1132                 return result[1:]
1133             return result
1134     return path_components(clean_path)
1135
1136
1137 Sha1 = hashlib.sha1
1138
1139
1140 _localtime = getattr(_helpers, 'localtime', None)
1141
1142 if _localtime:
1143     bup_time = namedtuple('bup_time', ['tm_year', 'tm_mon', 'tm_mday',
1144                                        'tm_hour', 'tm_min', 'tm_sec',
1145                                        'tm_wday', 'tm_yday',
1146                                        'tm_isdst', 'tm_gmtoff', 'tm_zone'])
1147
1148 # Define a localtime() that returns bup_time when possible.  Note:
1149 # this means that any helpers.localtime() results may need to be
1150 # passed through to_py_time() before being passed to python's time
1151 # module, which doesn't appear willing to ignore the extra items.
1152 if _localtime:
1153     def localtime(time):
1154         return bup_time(*_helpers.localtime(time))
1155     def utc_offset_str(t):
1156         """Return the local offset from UTC as "+hhmm" or "-hhmm" for time t.
1157         If the current UTC offset does not represent an integer number
1158         of minutes, the fractional component will be truncated."""
1159         off = localtime(t).tm_gmtoff
1160         # Note: // doesn't truncate like C for negative values, it rounds down.
1161         offmin = abs(off) // 60
1162         m = offmin % 60
1163         h = (offmin - m) // 60
1164         return "%+03d%02d" % (-h if off < 0 else h, m)
1165     def to_py_time(x):
1166         if isinstance(x, time.struct_time):
1167             return x
1168         return time.struct_time(x[:9])
1169 else:
1170     localtime = time.localtime
1171     def utc_offset_str(t):
1172         return time.strftime('%z', localtime(t))
1173     def to_py_time(x):
1174         return x
1175
1176
1177 _some_invalid_save_parts_rx = re.compile(r'[[ ~^:?*\\]|\.\.|//|@{')
1178
1179 def valid_save_name(name):
1180     # Enforce a superset of the restrictions in git-check-ref-format(1)
1181     if name == '@' \
1182        or name.startswith('/') or name.endswith('/') \
1183        or name.endswith('.'):
1184         return False
1185     if _some_invalid_save_parts_rx.search(name):
1186         return False
1187     for c in name:
1188         if ord(c) < 0x20 or ord(c) == 0x7f:
1189             return False
1190     for part in name.split('/'):
1191         if part.startswith('.') or part.endswith('.lock'):
1192             return False
1193     return True
1194
1195
1196 _period_rx = re.compile(r'^([0-9]+)(s|min|h|d|w|m|y)$')
1197
1198 def period_as_secs(s):
1199     if s == 'forever':
1200         return float('inf')
1201     match = _period_rx.match(s)
1202     if not match:
1203         return None
1204     mag = int(match.group(1))
1205     scale = match.group(2)
1206     return mag * {'s': 1,
1207                   'min': 60,
1208                   'h': 60 * 60,
1209                   'd': 60 * 60 * 24,
1210                   'w': 60 * 60 * 24 * 7,
1211                   'm': 60 * 60 * 24 * 31,
1212                   'y': 60 * 60 * 24 * 366}[scale]