lib/bup/helpers.py

   1 """Helper functions and classes for bup."""
   2
   3 from __future__ import absolute_import, division
   4 from collections import namedtuple
   5 from contextlib import contextmanager
   6 from ctypes import sizeof, c_void_p
   7 from os import environ
   8 from pipes import quote
   9 from subprocess import PIPE, Popen
  10 import sys, os, pwd, subprocess, errno, socket, select, mmap, stat, re, struct
  11 import hashlib, heapq, math, operator, time, grp, tempfile
  12
  13 from bup import _helpers
  14 from bup import compat
  15 # This function should really be in helpers, not in bup.options.  But we
  16 # want options.py to be standalone so people can include it in other projects.
  17 from bup.options import _tty_width as tty_width
  18
  19
  20 class Nonlocal:
  21     """Helper to deal with Python scoping issues"""
  22     pass
  23
  24
  25 sc_page_size = os.sysconf('SC_PAGE_SIZE')
  26 assert(sc_page_size > 0)
  27
  28 sc_arg_max = os.sysconf('SC_ARG_MAX')
  29 if sc_arg_max == -1:  # "no definite limit" - let's choose 2M
  30     sc_arg_max = 2 * 1024 * 1024
  31
  32 def last(iterable):
  33     result = None
  34     for result in iterable:
  35         pass
  36     return result
  37
  38
  39 def atoi(s):
  40     """Convert the string 's' to an integer. Return 0 if s is not a number."""
  41     try:
  42         return int(s or '0')
  43     except ValueError:
  44         return 0
  45
  46
  47 def atof(s):
  48     """Convert the string 's' to a float. Return 0 if s is not a number."""
  49     try:
  50         return float(s or '0')
  51     except ValueError:
  52         return 0
  53
  54
  55 buglvl = atoi(os.environ.get('BUP_DEBUG', 0))
  56
  57
  58 try:
  59     _fdatasync = os.fdatasync
  60 except AttributeError:
  61     _fdatasync = os.fsync
  62
  63 if sys.platform.startswith('darwin'):
  64     # Apparently os.fsync on OS X doesn't guarantee to sync all the way down
  65     import fcntl
  66     def fdatasync(fd):
  67         try:
  68             return fcntl.fcntl(fd, fcntl.F_FULLFSYNC)
  69         except IOError as e:
  70             # Fallback for file systems (SMB) that do not support F_FULLFSYNC
  71             if e.errno == errno.ENOTSUP:
  72                 return _fdatasync(fd)
  73             else:
  74                 raise
  75 else:
  76     fdatasync = _fdatasync
  77
  78
  79 def partition(predicate, stream):
  80     """Returns (leading_matches_it, rest_it), where leading_matches_it
  81     must be completely exhausted before traversing rest_it.
  82
  83     """
  84     stream = iter(stream)
  85     ns = Nonlocal()
  86     ns.first_nonmatch = None
  87     def leading_matches():
  88         for x in stream:
  89             if predicate(x):
  90                 yield x
  91             else:
  92                 ns.first_nonmatch = (x,)
  93                 break
  94     def rest():
  95         if ns.first_nonmatch:
  96             yield ns.first_nonmatch[0]
  97             for x in stream:
  98                 yield x
  99     return (leading_matches(), rest())
 100
 101
 102 def merge_dict(*xs):
 103     result = {}
 104     for x in xs:
 105         result.update(x)
 106     return result
 107
 108
 109 def lines_until_sentinel(f, sentinel, ex_type):
 110     # sentinel must end with \n and must contain only one \n
 111     while True:
 112         line = f.readline()
 113         if not (line and line.endswith('\n')):
 114             raise ex_type('Hit EOF while reading line')
 115         if line == sentinel:
 116             return
 117         yield line
 118
 119
 120 def stat_if_exists(path):
 121     try:
 122         return os.stat(path)
 123     except OSError as e:
 124         if e.errno != errno.ENOENT:
 125             raise
 126     return None
 127
 128
 129 # Write (blockingly) to sockets that may or may not be in blocking mode.
 130 # We need this because our stderr is sometimes eaten by subprocesses
 131 # (probably ssh) that sometimes make it nonblocking, if only temporarily,
 132 # leading to race conditions.  Ick.  We'll do it the hard way.
 133 def _hard_write(fd, buf):
 134     while buf:
 135         (r,w,x) = select.select([], [fd], [], None)
 136         if not w:
 137             raise IOError('select(fd) returned without being writable')
 138         try:
 139             sz = os.write(fd, buf)
 140         except OSError as e:
 141             if e.errno != errno.EAGAIN:
 142                 raise
 143         assert(sz >= 0)
 144         buf = buf[sz:]
 145
 146
 147 _last_prog = 0
 148 def log(s):
 149     """Print a log message to stderr."""
 150     global _last_prog
 151     sys.stdout.flush()
 152     _hard_write(sys.stderr.fileno(), s)
 153     _last_prog = 0
 154
 155
 156 def debug1(s):
 157     if buglvl >= 1:
 158         log(s)
 159
 160
 161 def debug2(s):
 162     if buglvl >= 2:
 163         log(s)
 164
 165
 166 istty1 = os.isatty(1) or (atoi(os.environ.get('BUP_FORCE_TTY')) & 1)
 167 istty2 = os.isatty(2) or (atoi(os.environ.get('BUP_FORCE_TTY')) & 2)
 168 _last_progress = ''
 169 def progress(s):
 170     """Calls log() if stderr is a TTY.  Does nothing otherwise."""
 171     global _last_progress
 172     if istty2:
 173         log(s)
 174         _last_progress = s
 175
 176
 177 def qprogress(s):
 178     """Calls progress() only if we haven't printed progress in a while.
 179
 180     This avoids overloading the stderr buffer with excess junk.
 181     """
 182     global _last_prog
 183     now = time.time()
 184     if now - _last_prog > 0.1:
 185         progress(s)
 186         _last_prog = now
 187
 188
 189 def reprogress():
 190     """Calls progress() to redisplay the most recent progress message.
 191
 192     Useful after you've printed some other message that wipes out the
 193     progress line.
 194     """
 195     if _last_progress and _last_progress.endswith('\r'):
 196         progress(_last_progress)
 197
 198
 199 def mkdirp(d, mode=None):
 200     """Recursively create directories on path 'd'.
 201
 202     Unlike os.makedirs(), it doesn't raise an exception if the last element of
 203     the path already exists.
 204     """
 205     try:
 206         if mode:
 207             os.makedirs(d, mode)
 208         else:
 209             os.makedirs(d)
 210     except OSError as e:
 211         if e.errno == errno.EEXIST:
 212             pass
 213         else:
 214             raise
 215
 216
 217 def merge_iter(iters, pfreq, pfunc, pfinal, key=None):
 218     if key:
 219         samekey = lambda e, pe: getattr(e, key) == getattr(pe, key, None)
 220     else:
 221         samekey = operator.eq
 222     count = 0
 223     total = sum(len(it) for it in iters)
 224     iters = (iter(it) for it in iters)
 225     heap = ((next(it, None),it) for it in iters)
 226     heap = [(e,it) for e,it in heap if e]
 227
 228     heapq.heapify(heap)
 229     pe = None
 230     while heap:
 231         if not count % pfreq:
 232             pfunc(count, total)
 233         e, it = heap[0]
 234         if not samekey(e, pe):
 235             pe = e
 236             yield e
 237         count += 1
 238         try:
 239             e = next(it)
 240         except StopIteration:
 241             heapq.heappop(heap) # remove current
 242         else:
 243             heapq.heapreplace(heap, (e, it)) # shift current to new location
 244     pfinal(count, total)
 245
 246
 247 def unlink(f):
 248     """Delete a file at path 'f' if it currently exists.
 249
 250     Unlike os.unlink(), does not throw an exception if the file didn't already
 251     exist.
 252     """
 253     try:
 254         os.unlink(f)
 255     except OSError as e:
 256         if e.errno != errno.ENOENT:
 257             raise
 258
 259
 260 def shstr(cmd):
 261     if isinstance(cmd, compat.str_type):
 262         return cmd
 263     else:
 264         return ' '.join(map(quote, cmd))
 265
 266 exc = subprocess.check_call
 267
 268 def exo(cmd,
 269         input=None,
 270         stdin=None,
 271         stderr=None,
 272         shell=False,
 273         check=True,
 274         preexec_fn=None):
 275     if input:
 276         assert stdin in (None, PIPE)
 277         stdin = PIPE
 278     p = Popen(cmd,
 279               stdin=stdin, stdout=PIPE, stderr=stderr,
 280               shell=shell,
 281               preexec_fn=preexec_fn)
 282     out, err = p.communicate(input)
 283     if check and p.returncode != 0:
 284         raise Exception('subprocess %r failed with status %d, stderr: %r'
 285                         % (' '.join(map(quote, cmd)), p.returncode, err))
 286     return out, err, p
 287
 288 def readpipe(argv, preexec_fn=None, shell=False):
 289     """Run a subprocess and return its output."""
 290     p = subprocess.Popen(argv, stdout=subprocess.PIPE, preexec_fn=preexec_fn,
 291                          shell=shell)
 292     out, err = p.communicate()
 293     if p.returncode != 0:
 294         raise Exception('subprocess %r failed with status %d'
 295                         % (' '.join(argv), p.returncode))
 296     return out
 297
 298
 299 def _argmax_base(command):
 300     base_size = 2048
 301     for c in command:
 302         base_size += len(command) + 1
 303     for k, v in compat.items(environ):
 304         base_size += len(k) + len(v) + 2 + sizeof(c_void_p)
 305     return base_size
 306
 307
 308 def _argmax_args_size(args):
 309     return sum(len(x) + 1 + sizeof(c_void_p) for x in args)
 310
 311
 312 def batchpipe(command, args, preexec_fn=None, arg_max=sc_arg_max):
 313     """If args is not empty, yield the output produced by calling the
 314 command list with args as a sequence of strings (It may be necessary
 315 to return multiple strings in order to respect ARG_MAX)."""
 316     # The optional arg_max arg is a workaround for an issue with the
 317     # current wvtest behavior.
 318     base_size = _argmax_base(command)
 319     while args:
 320         room = arg_max - base_size
 321         i = 0
 322         while i < len(args):
 323             next_size = _argmax_args_size(args[i:i+1])
 324             if room - next_size < 0:
 325                 break
 326             room -= next_size
 327             i += 1
 328         sub_args = args[:i]
 329         args = args[i:]
 330         assert(len(sub_args))
 331         yield readpipe(command + sub_args, preexec_fn=preexec_fn)
 332
 333
 334 def resolve_parent(p):
 335     """Return the absolute path of a file without following any final symlink.
 336
 337     Behaves like os.path.realpath, but doesn't follow a symlink for the last
 338     element. (ie. if 'p' itself is a symlink, this one won't follow it, but it
 339     will follow symlinks in p's directory)
 340     """
 341     try:
 342         st = os.lstat(p)
 343     except OSError:
 344         st = None
 345     if st and stat.S_ISLNK(st.st_mode):
 346         (dir, name) = os.path.split(p)
 347         dir = os.path.realpath(dir)
 348         out = os.path.join(dir, name)
 349     else:
 350         out = os.path.realpath(p)
 351     #log('realpathing:%r,%r\n' % (p, out))
 352     return out
 353
 354
 355 def detect_fakeroot():
 356     "Return True if we appear to be running under fakeroot."
 357     return os.getenv("FAKEROOTKEY") != None
 358
 359
 360 if sys.platform.startswith('cygwin'):
 361     def is_superuser():
 362         # https://cygwin.com/ml/cygwin/2015-02/msg00057.html
 363         groups = os.getgroups()
 364         return 544 in groups or 0 in groups
 365 else:
 366     def is_superuser():
 367         return os.geteuid() == 0
 368
 369
 370 def cache_key_value(get_value, key, cache):
 371     """Return (value, was_cached).  If there is a value in the cache
 372     for key, use that, otherwise, call get_value(key) which should
 373     throw a KeyError if there is no value -- in which case the cached
 374     and returned value will be None.
 375     """
 376     try: # Do we already have it (or know there wasn't one)?
 377         value = cache[key]
 378         return value, True
 379     except KeyError:
 380         pass
 381     value = None
 382     try:
 383         cache[key] = value = get_value(key)
 384     except KeyError:
 385         cache[key] = None
 386     return value, False
 387
 388
 389 _hostname = None
 390 def hostname():
 391     """Get the FQDN of this machine."""
 392     global _hostname
 393     if not _hostname:
 394         _hostname = socket.getfqdn()
 395     return _hostname
 396
 397
 398 def format_filesize(size):
 399     unit = 1024.0
 400     size = float(size)
 401     if size < unit:
 402         return "%d" % (size)
 403     exponent = int(math.log(size) // math.log(unit))
 404     size_prefix = "KMGTPE"[exponent - 1]
 405     return "%.1f%s" % (size // math.pow(unit, exponent), size_prefix)
 406
 407
 408 class NotOk(Exception):
 409     pass
 410
 411
 412 class BaseConn:
 413     def __init__(self, outp):
 414         self.outp = outp
 415
 416     def close(self):
 417         while self._read(65536): pass
 418
 419     def read(self, size):
 420         """Read 'size' bytes from input stream."""
 421         self.outp.flush()
 422         return self._read(size)
 423
 424     def readline(self):
 425         """Read from input stream until a newline is found."""
 426         self.outp.flush()
 427         return self._readline()
 428
 429     def write(self, data):
 430         """Write 'data' to output stream."""
 431         #log('%d writing: %d bytes\n' % (os.getpid(), len(data)))
 432         self.outp.write(data)
 433
 434     def has_input(self):
 435         """Return true if input stream is readable."""
 436         raise NotImplemented("Subclasses must implement has_input")
 437
 438     def ok(self):
 439         """Indicate end of output from last sent command."""
 440         self.write('\nok\n')
 441
 442     def error(self, s):
 443         """Indicate server error to the client."""
 444         s = re.sub(r'\s+', ' ', str(s))
 445         self.write('\nerror %s\n' % s)
 446
 447     def _check_ok(self, onempty):
 448         self.outp.flush()
 449         rl = ''
 450         for rl in linereader(self):
 451             #log('%d got line: %r\n' % (os.getpid(), rl))
 452             if not rl:  # empty line
 453                 continue
 454             elif rl == 'ok':
 455                 return None
 456             elif rl.startswith('error '):
 457                 #log('client: error: %s\n' % rl[6:])
 458                 return NotOk(rl[6:])
 459             else:
 460                 onempty(rl)
 461         raise Exception('server exited unexpectedly; see errors above')
 462
 463     def drain_and_check_ok(self):
 464         """Remove all data for the current command from input stream."""
 465         def onempty(rl):
 466             pass
 467         return self._check_ok(onempty)
 468
 469     def check_ok(self):
 470         """Verify that server action completed successfully."""
 471         def onempty(rl):
 472             raise Exception('expected "ok", got %r' % rl)
 473         return self._check_ok(onempty)
 474
 475
 476 class Conn(BaseConn):
 477     def __init__(self, inp, outp):
 478         BaseConn.__init__(self, outp)
 479         self.inp = inp
 480
 481     def _read(self, size):
 482         return self.inp.read(size)
 483
 484     def _readline(self):
 485         return self.inp.readline()
 486
 487     def has_input(self):
 488         [rl, wl, xl] = select.select([self.inp.fileno()], [], [], 0)
 489         if rl:
 490             assert(rl[0] == self.inp.fileno())
 491             return True
 492         else:
 493             return None
 494
 495
 496 def checked_reader(fd, n):
 497     while n > 0:
 498         rl, _, _ = select.select([fd], [], [])
 499         assert(rl[0] == fd)
 500         buf = os.read(fd, n)
 501         if not buf: raise Exception("Unexpected EOF reading %d more bytes" % n)
 502         yield buf
 503         n -= len(buf)
 504
 505
 506 MAX_PACKET = 128 * 1024
 507 def mux(p, outfd, outr, errr):
 508     try:
 509         fds = [outr, errr]
 510         while p.poll() is None:
 511             rl, _, _ = select.select(fds, [], [])
 512             for fd in rl:
 513                 if fd == outr:
 514                     buf = os.read(outr, MAX_PACKET)
 515                     if not buf: break
 516                     os.write(outfd, struct.pack('!IB', len(buf), 1) + buf)
 517                 elif fd == errr:
 518                     buf = os.read(errr, 1024)
 519                     if not buf: break
 520                     os.write(outfd, struct.pack('!IB', len(buf), 2) + buf)
 521     finally:
 522         os.write(outfd, struct.pack('!IB', 0, 3))
 523
 524
 525 class DemuxConn(BaseConn):
 526     """A helper class for bup's client-server protocol."""
 527     def __init__(self, infd, outp):
 528         BaseConn.__init__(self, outp)
 529         # Anything that comes through before the sync string was not
 530         # multiplexed and can be assumed to be debug/log before mux init.
 531         tail = ''
 532         while tail != 'BUPMUX':
 533             b = os.read(infd, (len(tail) < 6) and (6-len(tail)) or 1)
 534             if not b:
 535                 raise IOError('demux: unexpected EOF during initialization')
 536             tail += b
 537             sys.stderr.write(tail[:-6])  # pre-mux log messages
 538             tail = tail[-6:]
 539         self.infd = infd
 540         self.reader = None
 541         self.buf = None
 542         self.closed = False
 543
 544     def write(self, data):
 545         self._load_buf(0)
 546         BaseConn.write(self, data)
 547
 548     def _next_packet(self, timeout):
 549         if self.closed: return False
 550         rl, wl, xl = select.select([self.infd], [], [], timeout)
 551         if not rl: return False
 552         assert(rl[0] == self.infd)
 553         ns = ''.join(checked_reader(self.infd, 5))
 554         n, fdw = struct.unpack('!IB', ns)
 555         assert(n <= MAX_PACKET)
 556         if fdw == 1:
 557             self.reader = checked_reader(self.infd, n)
 558         elif fdw == 2:
 559             for buf in checked_reader(self.infd, n):
 560                 sys.stderr.write(buf)
 561         elif fdw == 3:
 562             self.closed = True
 563             debug2("DemuxConn: marked closed\n")
 564         return True
 565
 566     def _load_buf(self, timeout):
 567         if self.buf is not None:
 568             return True
 569         while not self.closed:
 570             while not self.reader:
 571                 if not self._next_packet(timeout):
 572                     return False
 573             try:
 574                 self.buf = next(self.reader)
 575                 return True
 576             except StopIteration:
 577                 self.reader = None
 578         return False
 579
 580     def _read_parts(self, ix_fn):
 581         while self._load_buf(None):
 582             assert(self.buf is not None)
 583             i = ix_fn(self.buf)
 584             if i is None or i == len(self.buf):
 585                 yv = self.buf
 586                 self.buf = None
 587             else:
 588                 yv = self.buf[:i]
 589                 self.buf = self.buf[i:]
 590             yield yv
 591             if i is not None:
 592                 break
 593
 594     def _readline(self):
 595         def find_eol(buf):
 596             try:
 597                 return buf.index('\n')+1
 598             except ValueError:
 599                 return None
 600         return ''.join(self._read_parts(find_eol))
 601
 602     def _read(self, size):
 603         csize = [size]
 604         def until_size(buf): # Closes on csize
 605             if len(buf) < csize[0]:
 606                 csize[0] -= len(buf)
 607                 return None
 608             else:
 609                 return csize[0]
 610         return ''.join(self._read_parts(until_size))
 611
 612     def has_input(self):
 613         return self._load_buf(0)
 614
 615
 616 def linereader(f):
 617     """Generate a list of input lines from 'f' without terminating newlines."""
 618     while 1:
 619         line = f.readline()
 620         if not line:
 621             break
 622         yield line[:-1]
 623
 624
 625 def chunkyreader(f, count = None):
 626     """Generate a list of chunks of data read from 'f'.
 627
 628     If count is None, read until EOF is reached.
 629
 630     If count is a positive integer, read 'count' bytes from 'f'. If EOF is
 631     reached while reading, raise IOError.
 632     """
 633     if count != None:
 634         while count > 0:
 635             b = f.read(min(count, 65536))
 636             if not b:
 637                 raise IOError('EOF with %d bytes remaining' % count)
 638             yield b
 639             count -= len(b)
 640     else:
 641         while 1:
 642             b = f.read(65536)
 643             if not b: break
 644             yield b
 645
 646
 647 @contextmanager
 648 def atomically_replaced_file(name, mode='w', buffering=-1):
 649     """Yield a file that will be atomically renamed name when leaving the block.
 650
 651     This contextmanager yields an open file object that is backed by a
 652     temporary file which will be renamed (atomically) to the target
 653     name if everything succeeds.
 654
 655     The mode and buffering arguments are handled exactly as with open,
 656     and the yielded file will have very restrictive permissions, as
 657     per mkstemp.
 658
 659     E.g.::
 660
 661         with atomically_replaced_file('foo.txt', 'w') as f:
 662             f.write('hello jack.')
 663
 664     """
 665
 666     (ffd, tempname) = tempfile.mkstemp(dir=os.path.dirname(name),
 667                                        text=('b' not in mode))
 668     try:
 669         try:
 670             f = os.fdopen(ffd, mode, buffering)
 671         except:
 672             os.close(ffd)
 673             raise
 674         try:
 675             yield f
 676         finally:
 677             f.close()
 678         os.rename(tempname, name)
 679     finally:
 680         unlink(tempname)  # nonexistant file is ignored
 681
 682
 683 def slashappend(s):
 684     """Append "/" to 's' if it doesn't aleady end in "/"."""
 685     if s and not s.endswith('/'):
 686         return s + '/'
 687     else:
 688         return s
 689
 690
 691 def _mmap_do(f, sz, flags, prot, close):
 692     if not sz:
 693         st = os.fstat(f.fileno())
 694         sz = st.st_size
 695     if not sz:
 696         # trying to open a zero-length map gives an error, but an empty
 697         # string has all the same behaviour of a zero-length map, ie. it has
 698         # no elements :)
 699         return ''
 700     map = mmap.mmap(f.fileno(), sz, flags, prot)
 701     if close:
 702         f.close()  # map will persist beyond file close
 703     return map
 704
 705
 706 def mmap_read(f, sz = 0, close=True):
 707     """Create a read-only memory mapped region on file 'f'.
 708     If sz is 0, the region will cover the entire file.
 709     """
 710     return _mmap_do(f, sz, mmap.MAP_PRIVATE, mmap.PROT_READ, close)
 711
 712
 713 def mmap_readwrite(f, sz = 0, close=True):
 714     """Create a read-write memory mapped region on file 'f'.
 715     If sz is 0, the region will cover the entire file.
 716     """
 717     return _mmap_do(f, sz, mmap.MAP_SHARED, mmap.PROT_READ|mmap.PROT_WRITE,
 718                     close)
 719
 720
 721 def mmap_readwrite_private(f, sz = 0, close=True):
 722     """Create a read-write memory mapped region on file 'f'.
 723     If sz is 0, the region will cover the entire file.
 724     The map is private, which means the changes are never flushed back to the
 725     file.
 726     """
 727     return _mmap_do(f, sz, mmap.MAP_PRIVATE, mmap.PROT_READ|mmap.PROT_WRITE,
 728                     close)
 729
 730
 731 _mincore = getattr(_helpers, 'mincore', None)
 732 if _mincore:
 733     # ./configure ensures that we're on Linux if MINCORE_INCORE isn't defined.
 734     MINCORE_INCORE = getattr(_helpers, 'MINCORE_INCORE', 1)
 735
 736     _fmincore_chunk_size = None
 737     def _set_fmincore_chunk_size():
 738         global _fmincore_chunk_size
 739         pref_chunk_size = 64 * 1024 * 1024
 740         chunk_size = sc_page_size
 741         if (sc_page_size < pref_chunk_size):
 742             chunk_size = sc_page_size * (pref_chunk_size // sc_page_size)
 743         _fmincore_chunk_size = chunk_size
 744
 745     def fmincore(fd):
 746         """Return the mincore() data for fd as a bytearray whose values can be
 747         tested via MINCORE_INCORE, or None if fd does not fully
 748         support the operation."""
 749         st = os.fstat(fd)
 750         if (st.st_size == 0):
 751             return bytearray(0)
 752         if not _fmincore_chunk_size:
 753             _set_fmincore_chunk_size()
 754         pages_per_chunk = _fmincore_chunk_size // sc_page_size;
 755         page_count = (st.st_size + sc_page_size - 1) // sc_page_size;
 756         chunk_count = page_count // _fmincore_chunk_size
 757         if chunk_count < 1:
 758             chunk_count = 1
 759         result = bytearray(page_count)
 760         for ci in compat.range(chunk_count):
 761             pos = _fmincore_chunk_size * ci;
 762             msize = min(_fmincore_chunk_size, st.st_size - pos)
 763             try:
 764                 m = mmap.mmap(fd, msize, mmap.MAP_PRIVATE, 0, 0, pos)
 765             except mmap.error as ex:
 766                 if ex.errno == errno.EINVAL or ex.errno == errno.ENODEV:
 767                     # Perhaps the file was a pipe, i.e. "... | bup split ..."
 768                     return None
 769                 raise ex
 770             try:
 771                 _mincore(m, msize, 0, result, ci * pages_per_chunk)
 772             except OSError as ex:
 773                 if ex.errno == errno.ENOSYS:
 774                     return None
 775                 raise
 776         return result
 777
 778
 779 def parse_timestamp(epoch_str):
 780     """Return the number of nanoseconds since the epoch that are described
 781 by epoch_str (100ms, 100ns, ...); when epoch_str cannot be parsed,
 782 throw a ValueError that may contain additional information."""
 783     ns_per = {'s' :  1000000000,
 784               'ms' : 1000000,
 785               'us' : 1000,
 786               'ns' : 1}
 787     match = re.match(r'^((?:[-+]?[0-9]+)?)(s|ms|us|ns)$', epoch_str)
 788     if not match:
 789         if re.match(r'^([-+]?[0-9]+)$', epoch_str):
 790             raise ValueError('must include units, i.e. 100ns, 100ms, ...')
 791         raise ValueError()
 792     (n, units) = match.group(1, 2)
 793     if not n:
 794         n = 1
 795     n = int(n)
 796     return n * ns_per[units]
 797
 798
 799 def parse_num(s):
 800     """Parse data size information into a float number.
 801
 802     Here are some examples of conversions:
 803         199.2k means 203981 bytes
 804         1GB means 1073741824 bytes
 805         2.1 tb means 2199023255552 bytes
 806     """
 807     g = re.match(r'([-+\d.e]+)\s*(\w*)', str(s))
 808     if not g:
 809         raise ValueError("can't parse %r as a number" % s)
 810     (val, unit) = g.groups()
 811     num = float(val)
 812     unit = unit.lower()
 813     if unit in ['t', 'tb']:
 814         mult = 1024*1024*1024*1024
 815     elif unit in ['g', 'gb']:
 816         mult = 1024*1024*1024
 817     elif unit in ['m', 'mb']:
 818         mult = 1024*1024
 819     elif unit in ['k', 'kb']:
 820         mult = 1024
 821     elif unit in ['', 'b']:
 822         mult = 1
 823     else:
 824         raise ValueError("invalid unit %r in number %r" % (unit, s))
 825     return int(num*mult)
 826
 827
 828 def count(l):
 829     """Count the number of elements in an iterator. (consumes the iterator)"""
 830     return reduce(lambda x,y: x+1, l)
 831
 832
 833 saved_errors = []
 834 def add_error(e):
 835     """Append an error message to the list of saved errors.
 836
 837     Once processing is able to stop and output the errors, the saved errors are
 838     accessible in the module variable helpers.saved_errors.
 839     """
 840     saved_errors.append(e)
 841     log('%-70s\n' % e)
 842
 843
 844 def clear_errors():
 845     global saved_errors
 846     saved_errors = []
 847
 848
 849 def die_if_errors(msg=None, status=1):
 850     global saved_errors
 851     if saved_errors:
 852         if not msg:
 853             msg = 'warning: %d errors encountered\n' % len(saved_errors)
 854         log(msg)
 855         sys.exit(status)
 856
 857
 858 def handle_ctrl_c():
 859     """Replace the default exception handler for KeyboardInterrupt (Ctrl-C).
 860
 861     The new exception handler will make sure that bup will exit without an ugly
 862     stacktrace when Ctrl-C is hit.
 863     """
 864     oldhook = sys.excepthook
 865     def newhook(exctype, value, traceback):
 866         if exctype == KeyboardInterrupt:
 867             log('\nInterrupted.\n')
 868         else:
 869             return oldhook(exctype, value, traceback)
 870     sys.excepthook = newhook
 871
 872
 873 def columnate(l, prefix):
 874     """Format elements of 'l' in columns with 'prefix' leading each line.
 875
 876     The number of columns is determined automatically based on the string
 877     lengths.
 878     """
 879     if not l:
 880         return ""
 881     l = l[:]
 882     clen = max(len(s) for s in l)
 883     ncols = (tty_width() - len(prefix)) // (clen + 2)
 884     if ncols <= 1:
 885         ncols = 1
 886         clen = 0
 887     cols = []
 888     while len(l) % ncols:
 889         l.append('')
 890     rows = len(l) // ncols
 891     for s in compat.range(0, len(l), rows):
 892         cols.append(l[s:s+rows])
 893     out = ''
 894     for row in zip(*cols):
 895         out += prefix + ''.join(('%-*s' % (clen+2, s)) for s in row) + '\n'
 896     return out
 897
 898
 899 def parse_date_or_fatal(str, fatal):
 900     """Parses the given date or calls Option.fatal().
 901     For now we expect a string that contains a float."""
 902     try:
 903         date = float(str)
 904     except ValueError as e:
 905         raise fatal('invalid date format (should be a float): %r' % e)
 906     else:
 907         return date
 908
 909
 910 def parse_excludes(options, fatal):
 911     """Traverse the options and extract all excludes, or call Option.fatal()."""
 912     excluded_paths = []
 913
 914     for flag in options:
 915         (option, parameter) = flag
 916         if option == '--exclude':
 917             excluded_paths.append(resolve_parent(parameter))
 918         elif option == '--exclude-from':
 919             try:
 920                 f = open(resolve_parent(parameter))
 921             except IOError as e:
 922                 raise fatal("couldn't read %s" % parameter)
 923             for exclude_path in f.readlines():
 924                 # FIXME: perhaps this should be rstrip('\n')
 925                 exclude_path = resolve_parent(exclude_path.strip())
 926                 if exclude_path:
 927                     excluded_paths.append(exclude_path)
 928     return sorted(frozenset(excluded_paths))
 929
 930
 931 def parse_rx_excludes(options, fatal):
 932     """Traverse the options and extract all rx excludes, or call
 933     Option.fatal()."""
 934     excluded_patterns = []
 935
 936     for flag in options:
 937         (option, parameter) = flag
 938         if option == '--exclude-rx':
 939             try:
 940                 excluded_patterns.append(re.compile(parameter))
 941             except re.error as ex:
 942                 fatal('invalid --exclude-rx pattern (%s): %s' % (parameter, ex))
 943         elif option == '--exclude-rx-from':
 944             try:
 945                 f = open(resolve_parent(parameter))
 946             except IOError as e:
 947                 raise fatal("couldn't read %s" % parameter)
 948             for pattern in f.readlines():
 949                 spattern = pattern.rstrip('\n')
 950                 if not spattern:
 951                     continue
 952                 try:
 953                     excluded_patterns.append(re.compile(spattern))
 954                 except re.error as ex:
 955                     fatal('invalid --exclude-rx pattern (%s): %s' % (spattern, ex))
 956     return excluded_patterns
 957
 958
 959 def should_rx_exclude_path(path, exclude_rxs):
 960     """Return True if path matches a regular expression in exclude_rxs."""
 961     for rx in exclude_rxs:
 962         if rx.search(path):
 963             debug1('Skipping %r: excluded by rx pattern %r.\n'
 964                    % (path, rx.pattern))
 965             return True
 966     return False
 967
 968
 969 # FIXME: Carefully consider the use of functions (os.path.*, etc.)
 970 # that resolve against the current filesystem in the strip/graft
 971 # functions for example, but elsewhere as well.  I suspect bup's not
 972 # always being careful about that.  For some cases, the contents of
 973 # the current filesystem should be irrelevant, and consulting it might
 974 # produce the wrong result, perhaps via unintended symlink resolution,
 975 # for example.
 976
 977 def path_components(path):
 978     """Break path into a list of pairs of the form (name,
 979     full_path_to_name).  Path must start with '/'.
 980     Example:
 981       '/home/foo' -> [('', '/'), ('home', '/home'), ('foo', '/home/foo')]"""
 982     if not path.startswith('/'):
 983         raise Exception('path must start with "/": %s' % path)
 984     # Since we assume path startswith('/'), we can skip the first element.
 985     result = [('', '/')]
 986     norm_path = os.path.abspath(path)
 987     if norm_path == '/':
 988         return result
 989     full_path = ''
 990     for p in norm_path.split('/')[1:]:
 991         full_path += '/' + p
 992         result.append((p, full_path))
 993     return result
 994
 995
 996 def stripped_path_components(path, strip_prefixes):
 997     """Strip any prefix in strip_prefixes from path and return a list
 998     of path components where each component is (name,
 999     none_or_full_fs_path_to_name).  Assume path startswith('/').
1000     See thelpers.py for examples."""
1001     normalized_path = os.path.abspath(path)
1002     sorted_strip_prefixes = sorted(strip_prefixes, key=len, reverse=True)
1003     for bp in sorted_strip_prefixes:
1004         normalized_bp = os.path.abspath(bp)
1005         if normalized_bp == '/':
1006             continue
1007         if normalized_path.startswith(normalized_bp):
1008             prefix = normalized_path[:len(normalized_bp)]
1009             result = []
1010             for p in normalized_path[len(normalized_bp):].split('/'):
1011                 if p: # not root
1012                     prefix += '/'
1013                 prefix += p
1014                 result.append((p, prefix))
1015             return result
1016     # Nothing to strip.
1017     return path_components(path)
1018
1019
1020 def grafted_path_components(graft_points, path):
1021     # Create a result that consists of some number of faked graft
1022     # directories before the graft point, followed by all of the real
1023     # directories from path that are after the graft point.  Arrange
1024     # for the directory at the graft point in the result to correspond
1025     # to the "orig" directory in --graft orig=new.  See t/thelpers.py
1026     # for some examples.
1027
1028     # Note that given --graft orig=new, orig and new have *nothing* to
1029     # do with each other, even if some of their component names
1030     # match. i.e. --graft /foo/bar/baz=/foo/bar/bax is semantically
1031     # equivalent to --graft /foo/bar/baz=/x/y/z, or even
1032     # /foo/bar/baz=/x.
1033
1034     # FIXME: This can't be the best solution...
1035     clean_path = os.path.abspath(path)
1036     for graft_point in graft_points:
1037         old_prefix, new_prefix = graft_point
1038         # Expand prefixes iff not absolute paths.
1039         old_prefix = os.path.normpath(old_prefix)
1040         new_prefix = os.path.normpath(new_prefix)
1041         if clean_path.startswith(old_prefix):
1042             escaped_prefix = re.escape(old_prefix)
1043             grafted_path = re.sub(r'^' + escaped_prefix, new_prefix, clean_path)
1044             # Handle /foo=/ (at least) -- which produces //whatever.
1045             grafted_path = '/' + grafted_path.lstrip('/')
1046             clean_path_components = path_components(clean_path)
1047             # Count the components that were stripped.
1048             strip_count = 0 if old_prefix == '/' else old_prefix.count('/')
1049             new_prefix_parts = new_prefix.split('/')
1050             result_prefix = grafted_path.split('/')[:new_prefix.count('/')]
1051             result = [(p, None) for p in result_prefix] \
1052                 + clean_path_components[strip_count:]
1053             # Now set the graft point name to match the end of new_prefix.
1054             graft_point = len(result_prefix)
1055             result[graft_point] = \
1056                 (new_prefix_parts[-1], clean_path_components[strip_count][1])
1057             if new_prefix == '/': # --graft ...=/ is a special case.
1058                 return result[1:]
1059             return result
1060     return path_components(clean_path)
1061
1062
1063 Sha1 = hashlib.sha1
1064
1065
1066 _localtime = getattr(_helpers, 'localtime', None)
1067
1068 if _localtime:
1069     bup_time = namedtuple('bup_time', ['tm_year', 'tm_mon', 'tm_mday',
1070                                        'tm_hour', 'tm_min', 'tm_sec',
1071                                        'tm_wday', 'tm_yday',
1072                                        'tm_isdst', 'tm_gmtoff', 'tm_zone'])
1073
1074 # Define a localtime() that returns bup_time when possible.  Note:
1075 # this means that any helpers.localtime() results may need to be
1076 # passed through to_py_time() before being passed to python's time
1077 # module, which doesn't appear willing to ignore the extra items.
1078 if _localtime:
1079     def localtime(time):
1080         return bup_time(*_helpers.localtime(time))
1081     def utc_offset_str(t):
1082         """Return the local offset from UTC as "+hhmm" or "-hhmm" for time t.
1083         If the current UTC offset does not represent an integer number
1084         of minutes, the fractional component will be truncated."""
1085         off = localtime(t).tm_gmtoff
1086         # Note: // doesn't truncate like C for negative values, it rounds down.
1087         offmin = abs(off) // 60
1088         m = offmin % 60
1089         h = (offmin - m) // 60
1090         return "%+03d%02d" % (-h if off < 0 else h, m)
1091     def to_py_time(x):
1092         if isinstance(x, time.struct_time):
1093             return x
1094         return time.struct_time(x[:9])
1095 else:
1096     localtime = time.localtime
1097     def utc_offset_str(t):
1098         return time.strftime('%z', localtime(t))
1099     def to_py_time(x):
1100         return x
1101
1102
1103 _some_invalid_save_parts_rx = re.compile(r'[\[ ~^:?*\\]|\.\.|//|@{')
1104
1105 def valid_save_name(name):
1106     # Enforce a superset of the restrictions in git-check-ref-format(1)
1107     if name == '@' \
1108        or name.startswith('/') or name.endswith('/') \
1109        or name.endswith('.'):
1110         return False
1111     if _some_invalid_save_parts_rx.search(name):
1112         return False
1113     for c in name:
1114         if ord(c) < 0x20 or ord(c) == 0x7f:
1115             return False
1116     for part in name.split('/'):
1117         if part.startswith('.') or part.endswith('.lock'):
1118             return False
1119     return True
1120
1121
1122 _period_rx = re.compile(r'^([0-9]+)(s|min|h|d|w|m|y)$')
1123
1124 def period_as_secs(s):
1125     if s == 'forever':
1126         return float('inf')
1127     match = _period_rx.match(s)
1128     if not match:
1129         return None
1130     mag = int(match.group(1))
1131     scale = match.group(2)
1132     return mag * {'s': 1,
1133                   'min': 60,
1134                   'h': 60 * 60,
1135                   'd': 60 * 60 * 24,
1136                   'w': 60 * 60 * 24 * 7,
1137                   'm': 60 * 60 * 24 * 31,
1138                   'y': 60 * 60 * 24 * 366}[scale]