lib/bup/helpers.py

   1 """Helper functions and classes for bup."""
   2
   3 from collections import namedtuple
   4 from ctypes import sizeof, c_void_p
   5 from os import environ
   6 from contextlib import contextmanager
   7 import sys, os, pwd, subprocess, errno, socket, select, mmap, stat, re, struct
   8 import hashlib, heapq, math, operator, time, grp, tempfile
   9
  10 from bup import _helpers
  11
  12
  13 class Nonlocal:
  14     """Helper to deal with Python scoping issues"""
  15     pass
  16
  17
  18 sc_page_size = os.sysconf('SC_PAGE_SIZE')
  19 assert(sc_page_size > 0)
  20
  21 sc_arg_max = os.sysconf('SC_ARG_MAX')
  22 if sc_arg_max == -1:  # "no definite limit" - let's choose 2M
  23     sc_arg_max = 2 * 1024 * 1024
  24
  25 # This function should really be in helpers, not in bup.options.  But we
  26 # want options.py to be standalone so people can include it in other projects.
  27 from bup.options import _tty_width
  28 tty_width = _tty_width
  29
  30
  31 def atoi(s):
  32     """Convert the string 's' to an integer. Return 0 if s is not a number."""
  33     try:
  34         return int(s or '0')
  35     except ValueError:
  36         return 0
  37
  38
  39 def atof(s):
  40     """Convert the string 's' to a float. Return 0 if s is not a number."""
  41     try:
  42         return float(s or '0')
  43     except ValueError:
  44         return 0
  45
  46
  47 buglvl = atoi(os.environ.get('BUP_DEBUG', 0))
  48
  49
  50 try:
  51     _fdatasync = os.fdatasync
  52 except AttributeError:
  53     _fdatasync = os.fsync
  54
  55 if sys.platform.startswith('darwin'):
  56     # Apparently os.fsync on OS X doesn't guarantee to sync all the way down
  57     import fcntl
  58     def fdatasync(fd):
  59         try:
  60             return fcntl.fcntl(fd, fcntl.F_FULLFSYNC)
  61         except IOError as e:
  62             # Fallback for file systems (SMB) that do not support F_FULLFSYNC
  63             if e.errno == errno.ENOTSUP:
  64                 return _fdatasync(fd)
  65             else:
  66                 raise
  67 else:
  68     fdatasync = _fdatasync
  69
  70
  71 def partition(predicate, stream):
  72     """Returns (leading_matches_it, rest_it), where leading_matches_it
  73     must be completely exhausted before traversing rest_it.
  74
  75     """
  76     stream = iter(stream)
  77     ns = Nonlocal()
  78     ns.first_nonmatch = None
  79     def leading_matches():
  80         for x in stream:
  81             if predicate(x):
  82                 yield x
  83             else:
  84                 ns.first_nonmatch = (x,)
  85                 break
  86     def rest():
  87         if ns.first_nonmatch:
  88             yield ns.first_nonmatch[0]
  89             for x in stream:
  90                 yield x
  91     return (leading_matches(), rest())
  92
  93
  94 def stat_if_exists(path):
  95     try:
  96         return os.stat(path)
  97     except OSError as e:
  98         if e.errno != errno.ENOENT:
  99             raise
 100     return None
 101
 102
 103 # Write (blockingly) to sockets that may or may not be in blocking mode.
 104 # We need this because our stderr is sometimes eaten by subprocesses
 105 # (probably ssh) that sometimes make it nonblocking, if only temporarily,
 106 # leading to race conditions.  Ick.  We'll do it the hard way.
 107 def _hard_write(fd, buf):
 108     while buf:
 109         (r,w,x) = select.select([], [fd], [], None)
 110         if not w:
 111             raise IOError('select(fd) returned without being writable')
 112         try:
 113             sz = os.write(fd, buf)
 114         except OSError as e:
 115             if e.errno != errno.EAGAIN:
 116                 raise
 117         assert(sz >= 0)
 118         buf = buf[sz:]
 119
 120
 121 _last_prog = 0
 122 def log(s):
 123     """Print a log message to stderr."""
 124     global _last_prog
 125     sys.stdout.flush()
 126     _hard_write(sys.stderr.fileno(), s)
 127     _last_prog = 0
 128
 129
 130 def debug1(s):
 131     if buglvl >= 1:
 132         log(s)
 133
 134
 135 def debug2(s):
 136     if buglvl >= 2:
 137         log(s)
 138
 139
 140 istty1 = os.isatty(1) or (atoi(os.environ.get('BUP_FORCE_TTY')) & 1)
 141 istty2 = os.isatty(2) or (atoi(os.environ.get('BUP_FORCE_TTY')) & 2)
 142 _last_progress = ''
 143 def progress(s):
 144     """Calls log() if stderr is a TTY.  Does nothing otherwise."""
 145     global _last_progress
 146     if istty2:
 147         log(s)
 148         _last_progress = s
 149
 150
 151 def qprogress(s):
 152     """Calls progress() only if we haven't printed progress in a while.
 153
 154     This avoids overloading the stderr buffer with excess junk.
 155     """
 156     global _last_prog
 157     now = time.time()
 158     if now - _last_prog > 0.1:
 159         progress(s)
 160         _last_prog = now
 161
 162
 163 def reprogress():
 164     """Calls progress() to redisplay the most recent progress message.
 165
 166     Useful after you've printed some other message that wipes out the
 167     progress line.
 168     """
 169     if _last_progress and _last_progress.endswith('\r'):
 170         progress(_last_progress)
 171
 172
 173 def mkdirp(d, mode=None):
 174     """Recursively create directories on path 'd'.
 175
 176     Unlike os.makedirs(), it doesn't raise an exception if the last element of
 177     the path already exists.
 178     """
 179     try:
 180         if mode:
 181             os.makedirs(d, mode)
 182         else:
 183             os.makedirs(d)
 184     except OSError as e:
 185         if e.errno == errno.EEXIST:
 186             pass
 187         else:
 188             raise
 189
 190
 191 _unspecified_next_default = object()
 192
 193 def _fallback_next(it, default=_unspecified_next_default):
 194     """Retrieve the next item from the iterator by calling its
 195     next() method. If default is given, it is returned if the
 196     iterator is exhausted, otherwise StopIteration is raised."""
 197
 198     if default is _unspecified_next_default:
 199         return it.next()
 200     else:
 201         try:
 202             return it.next()
 203         except StopIteration:
 204             return default
 205
 206 if sys.version_info < (2, 6):
 207     next =  _fallback_next
 208
 209
 210 def merge_iter(iters, pfreq, pfunc, pfinal, key=None):
 211     if key:
 212         samekey = lambda e, pe: getattr(e, key) == getattr(pe, key, None)
 213     else:
 214         samekey = operator.eq
 215     count = 0
 216     total = sum(len(it) for it in iters)
 217     iters = (iter(it) for it in iters)
 218     heap = ((next(it, None),it) for it in iters)
 219     heap = [(e,it) for e,it in heap if e]
 220
 221     heapq.heapify(heap)
 222     pe = None
 223     while heap:
 224         if not count % pfreq:
 225             pfunc(count, total)
 226         e, it = heap[0]
 227         if not samekey(e, pe):
 228             pe = e
 229             yield e
 230         count += 1
 231         try:
 232             e = it.next() # Don't use next() function, it's too expensive
 233         except StopIteration:
 234             heapq.heappop(heap) # remove current
 235         else:
 236             heapq.heapreplace(heap, (e, it)) # shift current to new location
 237     pfinal(count, total)
 238
 239
 240 def unlink(f):
 241     """Delete a file at path 'f' if it currently exists.
 242
 243     Unlike os.unlink(), does not throw an exception if the file didn't already
 244     exist.
 245     """
 246     try:
 247         os.unlink(f)
 248     except OSError as e:
 249         if e.errno != errno.ENOENT:
 250             raise
 251
 252
 253 def readpipe(argv, preexec_fn=None, shell=False):
 254     """Run a subprocess and return its output."""
 255     p = subprocess.Popen(argv, stdout=subprocess.PIPE, preexec_fn=preexec_fn,
 256                          shell=shell)
 257     out, err = p.communicate()
 258     if p.returncode != 0:
 259         raise Exception('subprocess %r failed with status %d'
 260                         % (' '.join(argv), p.returncode))
 261     return out
 262
 263
 264 def _argmax_base(command):
 265     base_size = 2048
 266     for c in command:
 267         base_size += len(command) + 1
 268     for k, v in environ.iteritems():
 269         base_size += len(k) + len(v) + 2 + sizeof(c_void_p)
 270     return base_size
 271
 272
 273 def _argmax_args_size(args):
 274     return sum(len(x) + 1 + sizeof(c_void_p) for x in args)
 275
 276
 277 def batchpipe(command, args, preexec_fn=None, arg_max=sc_arg_max):
 278     """If args is not empty, yield the output produced by calling the
 279 command list with args as a sequence of strings (It may be necessary
 280 to return multiple strings in order to respect ARG_MAX)."""
 281     # The optional arg_max arg is a workaround for an issue with the
 282     # current wvtest behavior.
 283     base_size = _argmax_base(command)
 284     while args:
 285         room = arg_max - base_size
 286         i = 0
 287         while i < len(args):
 288             next_size = _argmax_args_size(args[i:i+1])
 289             if room - next_size < 0:
 290                 break
 291             room -= next_size
 292             i += 1
 293         sub_args = args[:i]
 294         args = args[i:]
 295         assert(len(sub_args))
 296         yield readpipe(command + sub_args, preexec_fn=preexec_fn)
 297
 298
 299 def resolve_parent(p):
 300     """Return the absolute path of a file without following any final symlink.
 301
 302     Behaves like os.path.realpath, but doesn't follow a symlink for the last
 303     element. (ie. if 'p' itself is a symlink, this one won't follow it, but it
 304     will follow symlinks in p's directory)
 305     """
 306     try:
 307         st = os.lstat(p)
 308     except OSError:
 309         st = None
 310     if st and stat.S_ISLNK(st.st_mode):
 311         (dir, name) = os.path.split(p)
 312         dir = os.path.realpath(dir)
 313         out = os.path.join(dir, name)
 314     else:
 315         out = os.path.realpath(p)
 316     #log('realpathing:%r,%r\n' % (p, out))
 317     return out
 318
 319
 320 def detect_fakeroot():
 321     "Return True if we appear to be running under fakeroot."
 322     return os.getenv("FAKEROOTKEY") != None
 323
 324
 325 if sys.platform.startswith('cygwin'):
 326     def is_superuser():
 327         # https://cygwin.com/ml/cygwin/2015-02/msg00057.html
 328         groups = os.getgroups()
 329         return 544 in groups or 0 in groups
 330 else:
 331     def is_superuser():
 332         return os.geteuid() == 0
 333
 334
 335 def _cache_key_value(get_value, key, cache):
 336     """Return (value, was_cached).  If there is a value in the cache
 337     for key, use that, otherwise, call get_value(key) which should
 338     throw a KeyError if there is no value -- in which case the cached
 339     and returned value will be None.
 340     """
 341     try: # Do we already have it (or know there wasn't one)?
 342         value = cache[key]
 343         return value, True
 344     except KeyError:
 345         pass
 346     value = None
 347     try:
 348         cache[key] = value = get_value(key)
 349     except KeyError:
 350         cache[key] = None
 351     return value, False
 352
 353
 354 _uid_to_pwd_cache = {}
 355 _name_to_pwd_cache = {}
 356
 357 def pwd_from_uid(uid):
 358     """Return password database entry for uid (may be a cached value).
 359     Return None if no entry is found.
 360     """
 361     global _uid_to_pwd_cache, _name_to_pwd_cache
 362     entry, cached = _cache_key_value(pwd.getpwuid, uid, _uid_to_pwd_cache)
 363     if entry and not cached:
 364         _name_to_pwd_cache[entry.pw_name] = entry
 365     return entry
 366
 367
 368 def pwd_from_name(name):
 369     """Return password database entry for name (may be a cached value).
 370     Return None if no entry is found.
 371     """
 372     global _uid_to_pwd_cache, _name_to_pwd_cache
 373     entry, cached = _cache_key_value(pwd.getpwnam, name, _name_to_pwd_cache)
 374     if entry and not cached:
 375         _uid_to_pwd_cache[entry.pw_uid] = entry
 376     return entry
 377
 378
 379 _gid_to_grp_cache = {}
 380 _name_to_grp_cache = {}
 381
 382 def grp_from_gid(gid):
 383     """Return password database entry for gid (may be a cached value).
 384     Return None if no entry is found.
 385     """
 386     global _gid_to_grp_cache, _name_to_grp_cache
 387     entry, cached = _cache_key_value(grp.getgrgid, gid, _gid_to_grp_cache)
 388     if entry and not cached:
 389         _name_to_grp_cache[entry.gr_name] = entry
 390     return entry
 391
 392
 393 def grp_from_name(name):
 394     """Return password database entry for name (may be a cached value).
 395     Return None if no entry is found.
 396     """
 397     global _gid_to_grp_cache, _name_to_grp_cache
 398     entry, cached = _cache_key_value(grp.getgrnam, name, _name_to_grp_cache)
 399     if entry and not cached:
 400         _gid_to_grp_cache[entry.gr_gid] = entry
 401     return entry
 402
 403
 404 _username = None
 405 def username():
 406     """Get the user's login name."""
 407     global _username
 408     if not _username:
 409         uid = os.getuid()
 410         _username = pwd_from_uid(uid)[0] or 'user%d' % uid
 411     return _username
 412
 413
 414 _userfullname = None
 415 def userfullname():
 416     """Get the user's full name."""
 417     global _userfullname
 418     if not _userfullname:
 419         uid = os.getuid()
 420         entry = pwd_from_uid(uid)
 421         if entry:
 422             _userfullname = entry[4].split(',')[0] or entry[0]
 423         if not _userfullname:
 424             _userfullname = 'user%d' % uid
 425     return _userfullname
 426
 427
 428 _hostname = None
 429 def hostname():
 430     """Get the FQDN of this machine."""
 431     global _hostname
 432     if not _hostname:
 433         _hostname = socket.getfqdn()
 434     return _hostname
 435
 436
 437 _resource_path = None
 438 def resource_path(subdir=''):
 439     global _resource_path
 440     if not _resource_path:
 441         _resource_path = os.environ.get('BUP_RESOURCE_PATH') or '.'
 442     return os.path.join(_resource_path, subdir)
 443
 444 def format_filesize(size):
 445     unit = 1024.0
 446     size = float(size)
 447     if size < unit:
 448         return "%d" % (size)
 449     exponent = int(math.log(size) / math.log(unit))
 450     size_prefix = "KMGTPE"[exponent - 1]
 451     return "%.1f%s" % (size / math.pow(unit, exponent), size_prefix)
 452
 453
 454 class NotOk(Exception):
 455     pass
 456
 457
 458 class BaseConn:
 459     def __init__(self, outp):
 460         self.outp = outp
 461
 462     def close(self):
 463         while self._read(65536): pass
 464
 465     def read(self, size):
 466         """Read 'size' bytes from input stream."""
 467         self.outp.flush()
 468         return self._read(size)
 469
 470     def readline(self):
 471         """Read from input stream until a newline is found."""
 472         self.outp.flush()
 473         return self._readline()
 474
 475     def write(self, data):
 476         """Write 'data' to output stream."""
 477         #log('%d writing: %d bytes\n' % (os.getpid(), len(data)))
 478         self.outp.write(data)
 479
 480     def has_input(self):
 481         """Return true if input stream is readable."""
 482         raise NotImplemented("Subclasses must implement has_input")
 483
 484     def ok(self):
 485         """Indicate end of output from last sent command."""
 486         self.write('\nok\n')
 487
 488     def error(self, s):
 489         """Indicate server error to the client."""
 490         s = re.sub(r'\s+', ' ', str(s))
 491         self.write('\nerror %s\n' % s)
 492
 493     def _check_ok(self, onempty):
 494         self.outp.flush()
 495         rl = ''
 496         for rl in linereader(self):
 497             #log('%d got line: %r\n' % (os.getpid(), rl))
 498             if not rl:  # empty line
 499                 continue
 500             elif rl == 'ok':
 501                 return None
 502             elif rl.startswith('error '):
 503                 #log('client: error: %s\n' % rl[6:])
 504                 return NotOk(rl[6:])
 505             else:
 506                 onempty(rl)
 507         raise Exception('server exited unexpectedly; see errors above')
 508
 509     def drain_and_check_ok(self):
 510         """Remove all data for the current command from input stream."""
 511         def onempty(rl):
 512             pass
 513         return self._check_ok(onempty)
 514
 515     def check_ok(self):
 516         """Verify that server action completed successfully."""
 517         def onempty(rl):
 518             raise Exception('expected "ok", got %r' % rl)
 519         return self._check_ok(onempty)
 520
 521
 522 class Conn(BaseConn):
 523     def __init__(self, inp, outp):
 524         BaseConn.__init__(self, outp)
 525         self.inp = inp
 526
 527     def _read(self, size):
 528         return self.inp.read(size)
 529
 530     def _readline(self):
 531         return self.inp.readline()
 532
 533     def has_input(self):
 534         [rl, wl, xl] = select.select([self.inp.fileno()], [], [], 0)
 535         if rl:
 536             assert(rl[0] == self.inp.fileno())
 537             return True
 538         else:
 539             return None
 540
 541
 542 def checked_reader(fd, n):
 543     while n > 0:
 544         rl, _, _ = select.select([fd], [], [])
 545         assert(rl[0] == fd)
 546         buf = os.read(fd, n)
 547         if not buf: raise Exception("Unexpected EOF reading %d more bytes" % n)
 548         yield buf
 549         n -= len(buf)
 550
 551
 552 MAX_PACKET = 128 * 1024
 553 def mux(p, outfd, outr, errr):
 554     try:
 555         fds = [outr, errr]
 556         while p.poll() is None:
 557             rl, _, _ = select.select(fds, [], [])
 558             for fd in rl:
 559                 if fd == outr:
 560                     buf = os.read(outr, MAX_PACKET)
 561                     if not buf: break
 562                     os.write(outfd, struct.pack('!IB', len(buf), 1) + buf)
 563                 elif fd == errr:
 564                     buf = os.read(errr, 1024)
 565                     if not buf: break
 566                     os.write(outfd, struct.pack('!IB', len(buf), 2) + buf)
 567     finally:
 568         os.write(outfd, struct.pack('!IB', 0, 3))
 569
 570
 571 class DemuxConn(BaseConn):
 572     """A helper class for bup's client-server protocol."""
 573     def __init__(self, infd, outp):
 574         BaseConn.__init__(self, outp)
 575         # Anything that comes through before the sync string was not
 576         # multiplexed and can be assumed to be debug/log before mux init.
 577         tail = ''
 578         while tail != 'BUPMUX':
 579             b = os.read(infd, (len(tail) < 6) and (6-len(tail)) or 1)
 580             if not b:
 581                 raise IOError('demux: unexpected EOF during initialization')
 582             tail += b
 583             sys.stderr.write(tail[:-6])  # pre-mux log messages
 584             tail = tail[-6:]
 585         self.infd = infd
 586         self.reader = None
 587         self.buf = None
 588         self.closed = False
 589
 590     def write(self, data):
 591         self._load_buf(0)
 592         BaseConn.write(self, data)
 593
 594     def _next_packet(self, timeout):
 595         if self.closed: return False
 596         rl, wl, xl = select.select([self.infd], [], [], timeout)
 597         if not rl: return False
 598         assert(rl[0] == self.infd)
 599         ns = ''.join(checked_reader(self.infd, 5))
 600         n, fdw = struct.unpack('!IB', ns)
 601         assert(n <= MAX_PACKET)
 602         if fdw == 1:
 603             self.reader = checked_reader(self.infd, n)
 604         elif fdw == 2:
 605             for buf in checked_reader(self.infd, n):
 606                 sys.stderr.write(buf)
 607         elif fdw == 3:
 608             self.closed = True
 609             debug2("DemuxConn: marked closed\n")
 610         return True
 611
 612     def _load_buf(self, timeout):
 613         if self.buf is not None:
 614             return True
 615         while not self.closed:
 616             while not self.reader:
 617                 if not self._next_packet(timeout):
 618                     return False
 619             try:
 620                 self.buf = self.reader.next()
 621                 return True
 622             except StopIteration:
 623                 self.reader = None
 624         return False
 625
 626     def _read_parts(self, ix_fn):
 627         while self._load_buf(None):
 628             assert(self.buf is not None)
 629             i = ix_fn(self.buf)
 630             if i is None or i == len(self.buf):
 631                 yv = self.buf
 632                 self.buf = None
 633             else:
 634                 yv = self.buf[:i]
 635                 self.buf = self.buf[i:]
 636             yield yv
 637             if i is not None:
 638                 break
 639
 640     def _readline(self):
 641         def find_eol(buf):
 642             try:
 643                 return buf.index('\n')+1
 644             except ValueError:
 645                 return None
 646         return ''.join(self._read_parts(find_eol))
 647
 648     def _read(self, size):
 649         csize = [size]
 650         def until_size(buf): # Closes on csize
 651             if len(buf) < csize[0]:
 652                 csize[0] -= len(buf)
 653                 return None
 654             else:
 655                 return csize[0]
 656         return ''.join(self._read_parts(until_size))
 657
 658     def has_input(self):
 659         return self._load_buf(0)
 660
 661
 662 def linereader(f):
 663     """Generate a list of input lines from 'f' without terminating newlines."""
 664     while 1:
 665         line = f.readline()
 666         if not line:
 667             break
 668         yield line[:-1]
 669
 670
 671 def chunkyreader(f, count = None):
 672     """Generate a list of chunks of data read from 'f'.
 673
 674     If count is None, read until EOF is reached.
 675
 676     If count is a positive integer, read 'count' bytes from 'f'. If EOF is
 677     reached while reading, raise IOError.
 678     """
 679     if count != None:
 680         while count > 0:
 681             b = f.read(min(count, 65536))
 682             if not b:
 683                 raise IOError('EOF with %d bytes remaining' % count)
 684             yield b
 685             count -= len(b)
 686     else:
 687         while 1:
 688             b = f.read(65536)
 689             if not b: break
 690             yield b
 691
 692
 693 @contextmanager
 694 def atomically_replaced_file(name, mode='w', buffering=-1):
 695     """Yield a file that will be atomically renamed name when leaving the block.
 696
 697     This contextmanager yields an open file object that is backed by a
 698     temporary file which will be renamed (atomically) to the target
 699     name if everything succeeds.
 700
 701     The mode and buffering arguments are handled exactly as with open,
 702     and the yielded file will have very restrictive permissions, as
 703     per mkstemp.
 704
 705     E.g.::
 706
 707         with atomically_replaced_file('foo.txt', 'w') as f:
 708             f.write('hello jack.')
 709
 710     """
 711
 712     (ffd, tempname) = tempfile.mkstemp(dir=os.path.dirname(name),
 713                                        text=('b' not in mode))
 714     try:
 715         try:
 716             f = os.fdopen(ffd, mode, buffering)
 717         except:
 718             os.close(ffd)
 719             raise
 720         try:
 721             yield f
 722         finally:
 723             f.close()
 724         os.rename(tempname, name)
 725     finally:
 726         unlink(tempname)  # nonexistant file is ignored
 727
 728
 729 def slashappend(s):
 730     """Append "/" to 's' if it doesn't aleady end in "/"."""
 731     if s and not s.endswith('/'):
 732         return s + '/'
 733     else:
 734         return s
 735
 736
 737 def _mmap_do(f, sz, flags, prot, close):
 738     if not sz:
 739         st = os.fstat(f.fileno())
 740         sz = st.st_size
 741     if not sz:
 742         # trying to open a zero-length map gives an error, but an empty
 743         # string has all the same behaviour of a zero-length map, ie. it has
 744         # no elements :)
 745         return ''
 746     map = mmap.mmap(f.fileno(), sz, flags, prot)
 747     if close:
 748         f.close()  # map will persist beyond file close
 749     return map
 750
 751
 752 def mmap_read(f, sz = 0, close=True):
 753     """Create a read-only memory mapped region on file 'f'.
 754     If sz is 0, the region will cover the entire file.
 755     """
 756     return _mmap_do(f, sz, mmap.MAP_PRIVATE, mmap.PROT_READ, close)
 757
 758
 759 def mmap_readwrite(f, sz = 0, close=True):
 760     """Create a read-write memory mapped region on file 'f'.
 761     If sz is 0, the region will cover the entire file.
 762     """
 763     return _mmap_do(f, sz, mmap.MAP_SHARED, mmap.PROT_READ|mmap.PROT_WRITE,
 764                     close)
 765
 766
 767 def mmap_readwrite_private(f, sz = 0, close=True):
 768     """Create a read-write memory mapped region on file 'f'.
 769     If sz is 0, the region will cover the entire file.
 770     The map is private, which means the changes are never flushed back to the
 771     file.
 772     """
 773     return _mmap_do(f, sz, mmap.MAP_PRIVATE, mmap.PROT_READ|mmap.PROT_WRITE,
 774                     close)
 775
 776
 777 _mincore = getattr(_helpers, 'mincore', None)
 778 if _mincore:
 779     # ./configure ensures that we're on Linux if MINCORE_INCORE isn't defined.
 780     MINCORE_INCORE = getattr(_helpers, 'MINCORE_INCORE', 1)
 781
 782     _fmincore_chunk_size = None
 783     def _set_fmincore_chunk_size():
 784         global _fmincore_chunk_size
 785         pref_chunk_size = 64 * 1024 * 1024
 786         chunk_size = sc_page_size
 787         if (sc_page_size < pref_chunk_size):
 788             chunk_size = sc_page_size * (pref_chunk_size / sc_page_size)
 789         _fmincore_chunk_size = chunk_size
 790
 791     def fmincore(fd):
 792         """Return the mincore() data for fd as a bytearray whose values can be
 793         tested via MINCORE_INCORE, or None if fd does not fully
 794         support the operation."""
 795         st = os.fstat(fd)
 796         if (st.st_size == 0):
 797             return bytearray(0)
 798         if not _fmincore_chunk_size:
 799             _set_fmincore_chunk_size()
 800         pages_per_chunk = _fmincore_chunk_size / sc_page_size;
 801         page_count = (st.st_size + sc_page_size - 1) / sc_page_size;
 802         chunk_count = page_count / _fmincore_chunk_size
 803         if chunk_count < 1:
 804             chunk_count = 1
 805         result = bytearray(page_count)
 806         for ci in xrange(chunk_count):
 807             pos = _fmincore_chunk_size * ci;
 808             msize = min(_fmincore_chunk_size, st.st_size - pos)
 809             try:
 810                 m = mmap.mmap(fd, msize, mmap.MAP_PRIVATE, 0, 0, pos)
 811             except mmap.error as ex:
 812                 if ex.errno == errno.EINVAL or ex.errno == errno.ENODEV:
 813                     # Perhaps the file was a pipe, i.e. "... | bup split ..."
 814                     return None
 815                 raise ex
 816             _mincore(m, msize, 0, result, ci * pages_per_chunk);
 817         return result
 818
 819
 820 def parse_timestamp(epoch_str):
 821     """Return the number of nanoseconds since the epoch that are described
 822 by epoch_str (100ms, 100ns, ...); when epoch_str cannot be parsed,
 823 throw a ValueError that may contain additional information."""
 824     ns_per = {'s' :  1000000000,
 825               'ms' : 1000000,
 826               'us' : 1000,
 827               'ns' : 1}
 828     match = re.match(r'^((?:[-+]?[0-9]+)?)(s|ms|us|ns)$', epoch_str)
 829     if not match:
 830         if re.match(r'^([-+]?[0-9]+)$', epoch_str):
 831             raise ValueError('must include units, i.e. 100ns, 100ms, ...')
 832         raise ValueError()
 833     (n, units) = match.group(1, 2)
 834     if not n:
 835         n = 1
 836     n = int(n)
 837     return n * ns_per[units]
 838
 839
 840 def parse_num(s):
 841     """Parse data size information into a float number.
 842
 843     Here are some examples of conversions:
 844         199.2k means 203981 bytes
 845         1GB means 1073741824 bytes
 846         2.1 tb means 2199023255552 bytes
 847     """
 848     g = re.match(r'([-+\d.e]+)\s*(\w*)', str(s))
 849     if not g:
 850         raise ValueError("can't parse %r as a number" % s)
 851     (val, unit) = g.groups()
 852     num = float(val)
 853     unit = unit.lower()
 854     if unit in ['t', 'tb']:
 855         mult = 1024*1024*1024*1024
 856     elif unit in ['g', 'gb']:
 857         mult = 1024*1024*1024
 858     elif unit in ['m', 'mb']:
 859         mult = 1024*1024
 860     elif unit in ['k', 'kb']:
 861         mult = 1024
 862     elif unit in ['', 'b']:
 863         mult = 1
 864     else:
 865         raise ValueError("invalid unit %r in number %r" % (unit, s))
 866     return int(num*mult)
 867
 868
 869 def count(l):
 870     """Count the number of elements in an iterator. (consumes the iterator)"""
 871     return reduce(lambda x,y: x+1, l)
 872
 873
 874 saved_errors = []
 875 def add_error(e):
 876     """Append an error message to the list of saved errors.
 877
 878     Once processing is able to stop and output the errors, the saved errors are
 879     accessible in the module variable helpers.saved_errors.
 880     """
 881     saved_errors.append(e)
 882     log('%-70s\n' % e)
 883
 884
 885 def clear_errors():
 886     global saved_errors
 887     saved_errors = []
 888
 889
 890 def die_if_errors(msg=None, status=1):
 891     global saved_errors
 892     if saved_errors:
 893         if not msg:
 894             msg = 'warning: %d errors encountered\n' % len(saved_errors)
 895         log(msg)
 896         sys.exit(status)
 897
 898
 899 def handle_ctrl_c():
 900     """Replace the default exception handler for KeyboardInterrupt (Ctrl-C).
 901
 902     The new exception handler will make sure that bup will exit without an ugly
 903     stacktrace when Ctrl-C is hit.
 904     """
 905     oldhook = sys.excepthook
 906     def newhook(exctype, value, traceback):
 907         if exctype == KeyboardInterrupt:
 908             log('\nInterrupted.\n')
 909         else:
 910             return oldhook(exctype, value, traceback)
 911     sys.excepthook = newhook
 912
 913
 914 def columnate(l, prefix):
 915     """Format elements of 'l' in columns with 'prefix' leading each line.
 916
 917     The number of columns is determined automatically based on the string
 918     lengths.
 919     """
 920     if not l:
 921         return ""
 922     l = l[:]
 923     clen = max(len(s) for s in l)
 924     ncols = (tty_width() - len(prefix)) / (clen + 2)
 925     if ncols <= 1:
 926         ncols = 1
 927         clen = 0
 928     cols = []
 929     while len(l) % ncols:
 930         l.append('')
 931     rows = len(l)/ncols
 932     for s in range(0, len(l), rows):
 933         cols.append(l[s:s+rows])
 934     out = ''
 935     for row in zip(*cols):
 936         out += prefix + ''.join(('%-*s' % (clen+2, s)) for s in row) + '\n'
 937     return out
 938
 939
 940 def parse_date_or_fatal(str, fatal):
 941     """Parses the given date or calls Option.fatal().
 942     For now we expect a string that contains a float."""
 943     try:
 944         date = float(str)
 945     except ValueError as e:
 946         raise fatal('invalid date format (should be a float): %r' % e)
 947     else:
 948         return date
 949
 950
 951 def parse_excludes(options, fatal):
 952     """Traverse the options and extract all excludes, or call Option.fatal()."""
 953     excluded_paths = []
 954
 955     for flag in options:
 956         (option, parameter) = flag
 957         if option == '--exclude':
 958             excluded_paths.append(resolve_parent(parameter))
 959         elif option == '--exclude-from':
 960             try:
 961                 f = open(resolve_parent(parameter))
 962             except IOError as e:
 963                 raise fatal("couldn't read %s" % parameter)
 964             for exclude_path in f.readlines():
 965                 # FIXME: perhaps this should be rstrip('\n')
 966                 exclude_path = resolve_parent(exclude_path.strip())
 967                 if exclude_path:
 968                     excluded_paths.append(exclude_path)
 969     return sorted(frozenset(excluded_paths))
 970
 971
 972 def parse_rx_excludes(options, fatal):
 973     """Traverse the options and extract all rx excludes, or call
 974     Option.fatal()."""
 975     excluded_patterns = []
 976
 977     for flag in options:
 978         (option, parameter) = flag
 979         if option == '--exclude-rx':
 980             try:
 981                 excluded_patterns.append(re.compile(parameter))
 982             except re.error as ex:
 983                 fatal('invalid --exclude-rx pattern (%s): %s' % (parameter, ex))
 984         elif option == '--exclude-rx-from':
 985             try:
 986                 f = open(resolve_parent(parameter))
 987             except IOError as e:
 988                 raise fatal("couldn't read %s" % parameter)
 989             for pattern in f.readlines():
 990                 spattern = pattern.rstrip('\n')
 991                 if not spattern:
 992                     continue
 993                 try:
 994                     excluded_patterns.append(re.compile(spattern))
 995                 except re.error as ex:
 996                     fatal('invalid --exclude-rx pattern (%s): %s' % (spattern, ex))
 997     return excluded_patterns
 998
 999
1000 def should_rx_exclude_path(path, exclude_rxs):
1001     """Return True if path matches a regular expression in exclude_rxs."""
1002     for rx in exclude_rxs:
1003         if rx.search(path):
1004             debug1('Skipping %r: excluded by rx pattern %r.\n'
1005                    % (path, rx.pattern))
1006             return True
1007     return False
1008
1009
1010 # FIXME: Carefully consider the use of functions (os.path.*, etc.)
1011 # that resolve against the current filesystem in the strip/graft
1012 # functions for example, but elsewhere as well.  I suspect bup's not
1013 # always being careful about that.  For some cases, the contents of
1014 # the current filesystem should be irrelevant, and consulting it might
1015 # produce the wrong result, perhaps via unintended symlink resolution,
1016 # for example.
1017
1018 def path_components(path):
1019     """Break path into a list of pairs of the form (name,
1020     full_path_to_name).  Path must start with '/'.
1021     Example:
1022       '/home/foo' -> [('', '/'), ('home', '/home'), ('foo', '/home/foo')]"""
1023     if not path.startswith('/'):
1024         raise Exception, 'path must start with "/": %s' % path
1025     # Since we assume path startswith('/'), we can skip the first element.
1026     result = [('', '/')]
1027     norm_path = os.path.abspath(path)
1028     if norm_path == '/':
1029         return result
1030     full_path = ''
1031     for p in norm_path.split('/')[1:]:
1032         full_path += '/' + p
1033         result.append((p, full_path))
1034     return result
1035
1036
1037 def stripped_path_components(path, strip_prefixes):
1038     """Strip any prefix in strip_prefixes from path and return a list
1039     of path components where each component is (name,
1040     none_or_full_fs_path_to_name).  Assume path startswith('/').
1041     See thelpers.py for examples."""
1042     normalized_path = os.path.abspath(path)
1043     sorted_strip_prefixes = sorted(strip_prefixes, key=len, reverse=True)
1044     for bp in sorted_strip_prefixes:
1045         normalized_bp = os.path.abspath(bp)
1046         if normalized_bp == '/':
1047             continue
1048         if normalized_path.startswith(normalized_bp):
1049             prefix = normalized_path[:len(normalized_bp)]
1050             result = []
1051             for p in normalized_path[len(normalized_bp):].split('/'):
1052                 if p: # not root
1053                     prefix += '/'
1054                 prefix += p
1055                 result.append((p, prefix))
1056             return result
1057     # Nothing to strip.
1058     return path_components(path)
1059
1060
1061 def grafted_path_components(graft_points, path):
1062     # Create a result that consists of some number of faked graft
1063     # directories before the graft point, followed by all of the real
1064     # directories from path that are after the graft point.  Arrange
1065     # for the directory at the graft point in the result to correspond
1066     # to the "orig" directory in --graft orig=new.  See t/thelpers.py
1067     # for some examples.
1068
1069     # Note that given --graft orig=new, orig and new have *nothing* to
1070     # do with each other, even if some of their component names
1071     # match. i.e. --graft /foo/bar/baz=/foo/bar/bax is semantically
1072     # equivalent to --graft /foo/bar/baz=/x/y/z, or even
1073     # /foo/bar/baz=/x.
1074
1075     # FIXME: This can't be the best solution...
1076     clean_path = os.path.abspath(path)
1077     for graft_point in graft_points:
1078         old_prefix, new_prefix = graft_point
1079         # Expand prefixes iff not absolute paths.
1080         old_prefix = os.path.normpath(old_prefix)
1081         new_prefix = os.path.normpath(new_prefix)
1082         if clean_path.startswith(old_prefix):
1083             escaped_prefix = re.escape(old_prefix)
1084             grafted_path = re.sub(r'^' + escaped_prefix, new_prefix, clean_path)
1085             # Handle /foo=/ (at least) -- which produces //whatever.
1086             grafted_path = '/' + grafted_path.lstrip('/')
1087             clean_path_components = path_components(clean_path)
1088             # Count the components that were stripped.
1089             strip_count = 0 if old_prefix == '/' else old_prefix.count('/')
1090             new_prefix_parts = new_prefix.split('/')
1091             result_prefix = grafted_path.split('/')[:new_prefix.count('/')]
1092             result = [(p, None) for p in result_prefix] \
1093                 + clean_path_components[strip_count:]
1094             # Now set the graft point name to match the end of new_prefix.
1095             graft_point = len(result_prefix)
1096             result[graft_point] = \
1097                 (new_prefix_parts[-1], clean_path_components[strip_count][1])
1098             if new_prefix == '/': # --graft ...=/ is a special case.
1099                 return result[1:]
1100             return result
1101     return path_components(clean_path)
1102
1103
1104 Sha1 = hashlib.sha1
1105
1106
1107 _localtime = getattr(_helpers, 'localtime', None)
1108
1109 if _localtime:
1110     bup_time = namedtuple('bup_time', ['tm_year', 'tm_mon', 'tm_mday',
1111                                        'tm_hour', 'tm_min', 'tm_sec',
1112                                        'tm_wday', 'tm_yday',
1113                                        'tm_isdst', 'tm_gmtoff', 'tm_zone'])
1114
1115 # Define a localtime() that returns bup_time when possible.  Note:
1116 # this means that any helpers.localtime() results may need to be
1117 # passed through to_py_time() before being passed to python's time
1118 # module, which doesn't appear willing to ignore the extra items.
1119 if _localtime:
1120     def localtime(time):
1121         return bup_time(*_helpers.localtime(time))
1122     def utc_offset_str(t):
1123         """Return the local offset from UTC as "+hhmm" or "-hhmm" for time t.
1124         If the current UTC offset does not represent an integer number
1125         of minutes, the fractional component will be truncated."""
1126         off = localtime(t).tm_gmtoff
1127         # Note: // doesn't truncate like C for negative values, it rounds down.
1128         offmin = abs(off) // 60
1129         m = offmin % 60
1130         h = (offmin - m) // 60
1131         return "%+03d%02d" % (-h if off < 0 else h, m)
1132     def to_py_time(x):
1133         if isinstance(x, time.struct_time):
1134             return x
1135         return time.struct_time(x[:9])
1136 else:
1137     localtime = time.localtime
1138     def utc_offset_str(t):
1139         return time.strftime('%z', localtime(t))
1140     def to_py_time(x):
1141         return x
1142
1143
1144 _some_invalid_save_parts_rx = re.compile(r'[[ ~^:?*\\]|\.\.|//|@{')
1145
1146 def valid_save_name(name):
1147     # Enforce a superset of the restrictions in git-check-ref-format(1)
1148     if name == '@' \
1149        or name.startswith('/') or name.endswith('/') \
1150        or name.endswith('.'):
1151         return False
1152     if _some_invalid_save_parts_rx.search(name):
1153         return False
1154     for c in name:
1155         if ord(c) < 0x20 or ord(c) == 0x7f:
1156             return False
1157     for part in name.split('/'):
1158         if part.startswith('.') or part.endswith('.lock'):
1159             return False
1160     return True
1161
1162
1163 _period_rx = re.compile(r'^([0-9]+)(s|min|h|d|w|m|y)$')
1164
1165 def period_as_secs(s):
1166     if s == 'forever':
1167         return float('inf')
1168     match = _period_rx.match(s)
1169     if not match:
1170         return None
1171     mag = int(match.group(1))
1172     scale = match.group(2)
1173     return mag * {'s': 1,
1174                   'min': 60,
1175                   'h': 60 * 60,
1176                   'd': 60 * 60 * 24,
1177                   'w': 60 * 60 * 24 * 7,
1178                   'm': 60 * 60 * 24 * 31,
1179                   'y': 60 * 60 * 24 * 366}[scale]