lib/bup/helpers.py

   1 """Helper functions and classes for bup."""
   2
   3 from collections import namedtuple
   4 from ctypes import sizeof, c_void_p
   5 from os import environ
   6 from contextlib import contextmanager
   7 import sys, os, pwd, subprocess, errno, socket, select, mmap, stat, re, struct
   8 import hashlib, heapq, math, operator, time, grp, tempfile
   9
  10 from bup import _helpers
  11
  12
  13 class Nonlocal:
  14     """Helper to deal with Python scoping issues"""
  15     pass
  16
  17
  18 sc_page_size = os.sysconf('SC_PAGE_SIZE')
  19 assert(sc_page_size > 0)
  20
  21 sc_arg_max = os.sysconf('SC_ARG_MAX')
  22 if sc_arg_max == -1:  # "no definite limit" - let's choose 2M
  23     sc_arg_max = 2 * 1024 * 1024
  24
  25 # This function should really be in helpers, not in bup.options.  But we
  26 # want options.py to be standalone so people can include it in other projects.
  27 from bup.options import _tty_width
  28 tty_width = _tty_width
  29
  30
  31 def atoi(s):
  32     """Convert the string 's' to an integer. Return 0 if s is not a number."""
  33     try:
  34         return int(s or '0')
  35     except ValueError:
  36         return 0
  37
  38
  39 def atof(s):
  40     """Convert the string 's' to a float. Return 0 if s is not a number."""
  41     try:
  42         return float(s or '0')
  43     except ValueError:
  44         return 0
  45
  46
  47 buglvl = atoi(os.environ.get('BUP_DEBUG', 0))
  48
  49
  50 try:
  51     _fdatasync = os.fdatasync
  52 except AttributeError:
  53     _fdatasync = os.fsync
  54
  55 if sys.platform.startswith('darwin'):
  56     # Apparently os.fsync on OS X doesn't guarantee to sync all the way down
  57     import fcntl
  58     def fdatasync(fd):
  59         try:
  60             return fcntl.fcntl(fd, fcntl.F_FULLFSYNC)
  61         except IOError as e:
  62             # Fallback for file systems (SMB) that do not support F_FULLFSYNC
  63             if e.errno == errno.ENOTSUP:
  64                 return _fdatasync(fd)
  65             else:
  66                 raise
  67 else:
  68     fdatasync = _fdatasync
  69
  70
  71 def partition(predicate, stream):
  72     """Returns (leading_matches_it, rest_it), where leading_matches_it
  73     must be completely exhausted before traversing rest_it.
  74
  75     """
  76     stream = iter(stream)
  77     ns = Nonlocal()
  78     ns.first_nonmatch = None
  79     def leading_matches():
  80         for x in stream:
  81             if predicate(x):
  82                 yield x
  83             else:
  84                 ns.first_nonmatch = (x,)
  85                 break
  86     def rest():
  87         if ns.first_nonmatch:
  88             yield ns.first_nonmatch[0]
  89             for x in stream:
  90                 yield x
  91     return (leading_matches(), rest())
  92
  93
  94 def stat_if_exists(path):
  95     try:
  96         return os.stat(path)
  97     except OSError as e:
  98         if e.errno != errno.ENOENT:
  99             raise
 100     return None
 101
 102
 103 # Write (blockingly) to sockets that may or may not be in blocking mode.
 104 # We need this because our stderr is sometimes eaten by subprocesses
 105 # (probably ssh) that sometimes make it nonblocking, if only temporarily,
 106 # leading to race conditions.  Ick.  We'll do it the hard way.
 107 def _hard_write(fd, buf):
 108     while buf:
 109         (r,w,x) = select.select([], [fd], [], None)
 110         if not w:
 111             raise IOError('select(fd) returned without being writable')
 112         try:
 113             sz = os.write(fd, buf)
 114         except OSError as e:
 115             if e.errno != errno.EAGAIN:
 116                 raise
 117         assert(sz >= 0)
 118         buf = buf[sz:]
 119
 120
 121 _last_prog = 0
 122 def log(s):
 123     """Print a log message to stderr."""
 124     global _last_prog
 125     sys.stdout.flush()
 126     _hard_write(sys.stderr.fileno(), s)
 127     _last_prog = 0
 128
 129
 130 def debug1(s):
 131     if buglvl >= 1:
 132         log(s)
 133
 134
 135 def debug2(s):
 136     if buglvl >= 2:
 137         log(s)
 138
 139
 140 istty1 = os.isatty(1) or (atoi(os.environ.get('BUP_FORCE_TTY')) & 1)
 141 istty2 = os.isatty(2) or (atoi(os.environ.get('BUP_FORCE_TTY')) & 2)
 142 _last_progress = ''
 143 def progress(s):
 144     """Calls log() if stderr is a TTY.  Does nothing otherwise."""
 145     global _last_progress
 146     if istty2:
 147         log(s)
 148         _last_progress = s
 149
 150
 151 def qprogress(s):
 152     """Calls progress() only if we haven't printed progress in a while.
 153
 154     This avoids overloading the stderr buffer with excess junk.
 155     """
 156     global _last_prog
 157     now = time.time()
 158     if now - _last_prog > 0.1:
 159         progress(s)
 160         _last_prog = now
 161
 162
 163 def reprogress():
 164     """Calls progress() to redisplay the most recent progress message.
 165
 166     Useful after you've printed some other message that wipes out the
 167     progress line.
 168     """
 169     if _last_progress and _last_progress.endswith('\r'):
 170         progress(_last_progress)
 171
 172
 173 def mkdirp(d, mode=None):
 174     """Recursively create directories on path 'd'.
 175
 176     Unlike os.makedirs(), it doesn't raise an exception if the last element of
 177     the path already exists.
 178     """
 179     try:
 180         if mode:
 181             os.makedirs(d, mode)
 182         else:
 183             os.makedirs(d)
 184     except OSError as e:
 185         if e.errno == errno.EEXIST:
 186             pass
 187         else:
 188             raise
 189
 190
 191 def merge_iter(iters, pfreq, pfunc, pfinal, key=None):
 192     if key:
 193         samekey = lambda e, pe: getattr(e, key) == getattr(pe, key, None)
 194     else:
 195         samekey = operator.eq
 196     count = 0
 197     total = sum(len(it) for it in iters)
 198     iters = (iter(it) for it in iters)
 199     heap = ((next(it, None),it) for it in iters)
 200     heap = [(e,it) for e,it in heap if e]
 201
 202     heapq.heapify(heap)
 203     pe = None
 204     while heap:
 205         if not count % pfreq:
 206             pfunc(count, total)
 207         e, it = heap[0]
 208         if not samekey(e, pe):
 209             pe = e
 210             yield e
 211         count += 1
 212         try:
 213             e = next(it)
 214         except StopIteration:
 215             heapq.heappop(heap) # remove current
 216         else:
 217             heapq.heapreplace(heap, (e, it)) # shift current to new location
 218     pfinal(count, total)
 219
 220
 221 def unlink(f):
 222     """Delete a file at path 'f' if it currently exists.
 223
 224     Unlike os.unlink(), does not throw an exception if the file didn't already
 225     exist.
 226     """
 227     try:
 228         os.unlink(f)
 229     except OSError as e:
 230         if e.errno != errno.ENOENT:
 231             raise
 232
 233
 234 def readpipe(argv, preexec_fn=None, shell=False):
 235     """Run a subprocess and return its output."""
 236     p = subprocess.Popen(argv, stdout=subprocess.PIPE, preexec_fn=preexec_fn,
 237                          shell=shell)
 238     out, err = p.communicate()
 239     if p.returncode != 0:
 240         raise Exception('subprocess %r failed with status %d'
 241                         % (' '.join(argv), p.returncode))
 242     return out
 243
 244
 245 def _argmax_base(command):
 246     base_size = 2048
 247     for c in command:
 248         base_size += len(command) + 1
 249     for k, v in environ.iteritems():
 250         base_size += len(k) + len(v) + 2 + sizeof(c_void_p)
 251     return base_size
 252
 253
 254 def _argmax_args_size(args):
 255     return sum(len(x) + 1 + sizeof(c_void_p) for x in args)
 256
 257
 258 def batchpipe(command, args, preexec_fn=None, arg_max=sc_arg_max):
 259     """If args is not empty, yield the output produced by calling the
 260 command list with args as a sequence of strings (It may be necessary
 261 to return multiple strings in order to respect ARG_MAX)."""
 262     # The optional arg_max arg is a workaround for an issue with the
 263     # current wvtest behavior.
 264     base_size = _argmax_base(command)
 265     while args:
 266         room = arg_max - base_size
 267         i = 0
 268         while i < len(args):
 269             next_size = _argmax_args_size(args[i:i+1])
 270             if room - next_size < 0:
 271                 break
 272             room -= next_size
 273             i += 1
 274         sub_args = args[:i]
 275         args = args[i:]
 276         assert(len(sub_args))
 277         yield readpipe(command + sub_args, preexec_fn=preexec_fn)
 278
 279
 280 def resolve_parent(p):
 281     """Return the absolute path of a file without following any final symlink.
 282
 283     Behaves like os.path.realpath, but doesn't follow a symlink for the last
 284     element. (ie. if 'p' itself is a symlink, this one won't follow it, but it
 285     will follow symlinks in p's directory)
 286     """
 287     try:
 288         st = os.lstat(p)
 289     except OSError:
 290         st = None
 291     if st and stat.S_ISLNK(st.st_mode):
 292         (dir, name) = os.path.split(p)
 293         dir = os.path.realpath(dir)
 294         out = os.path.join(dir, name)
 295     else:
 296         out = os.path.realpath(p)
 297     #log('realpathing:%r,%r\n' % (p, out))
 298     return out
 299
 300
 301 def detect_fakeroot():
 302     "Return True if we appear to be running under fakeroot."
 303     return os.getenv("FAKEROOTKEY") != None
 304
 305
 306 _warned_about_superuser_detection = None
 307 def is_superuser():
 308     if sys.platform.startswith('cygwin'):
 309         if sys.getwindowsversion()[0] > 5:
 310             # Sounds like situation is much more complicated here
 311             global _warned_about_superuser_detection
 312             if not _warned_about_superuser_detection:
 313                 log("can't detect root status for OS version > 5; assuming not root")
 314                 _warned_about_superuser_detection = True
 315             return False
 316         import ctypes
 317         return ctypes.cdll.shell32.IsUserAnAdmin()
 318     else:
 319         return os.geteuid() == 0
 320
 321
 322 def _cache_key_value(get_value, key, cache):
 323     """Return (value, was_cached).  If there is a value in the cache
 324     for key, use that, otherwise, call get_value(key) which should
 325     throw a KeyError if there is no value -- in which case the cached
 326     and returned value will be None.
 327     """
 328     try: # Do we already have it (or know there wasn't one)?
 329         value = cache[key]
 330         return value, True
 331     except KeyError:
 332         pass
 333     value = None
 334     try:
 335         cache[key] = value = get_value(key)
 336     except KeyError:
 337         cache[key] = None
 338     return value, False
 339
 340
 341 _uid_to_pwd_cache = {}
 342 _name_to_pwd_cache = {}
 343
 344 def pwd_from_uid(uid):
 345     """Return password database entry for uid (may be a cached value).
 346     Return None if no entry is found.
 347     """
 348     global _uid_to_pwd_cache, _name_to_pwd_cache
 349     entry, cached = _cache_key_value(pwd.getpwuid, uid, _uid_to_pwd_cache)
 350     if entry and not cached:
 351         _name_to_pwd_cache[entry.pw_name] = entry
 352     return entry
 353
 354
 355 def pwd_from_name(name):
 356     """Return password database entry for name (may be a cached value).
 357     Return None if no entry is found.
 358     """
 359     global _uid_to_pwd_cache, _name_to_pwd_cache
 360     entry, cached = _cache_key_value(pwd.getpwnam, name, _name_to_pwd_cache)
 361     if entry and not cached:
 362         _uid_to_pwd_cache[entry.pw_uid] = entry
 363     return entry
 364
 365
 366 _gid_to_grp_cache = {}
 367 _name_to_grp_cache = {}
 368
 369 def grp_from_gid(gid):
 370     """Return password database entry for gid (may be a cached value).
 371     Return None if no entry is found.
 372     """
 373     global _gid_to_grp_cache, _name_to_grp_cache
 374     entry, cached = _cache_key_value(grp.getgrgid, gid, _gid_to_grp_cache)
 375     if entry and not cached:
 376         _name_to_grp_cache[entry.gr_name] = entry
 377     return entry
 378
 379
 380 def grp_from_name(name):
 381     """Return password database entry for name (may be a cached value).
 382     Return None if no entry is found.
 383     """
 384     global _gid_to_grp_cache, _name_to_grp_cache
 385     entry, cached = _cache_key_value(grp.getgrnam, name, _name_to_grp_cache)
 386     if entry and not cached:
 387         _gid_to_grp_cache[entry.gr_gid] = entry
 388     return entry
 389
 390
 391 _username = None
 392 def username():
 393     """Get the user's login name."""
 394     global _username
 395     if not _username:
 396         uid = os.getuid()
 397         _username = pwd_from_uid(uid)[0] or 'user%d' % uid
 398     return _username
 399
 400
 401 _userfullname = None
 402 def userfullname():
 403     """Get the user's full name."""
 404     global _userfullname
 405     if not _userfullname:
 406         uid = os.getuid()
 407         entry = pwd_from_uid(uid)
 408         if entry:
 409             _userfullname = entry[4].split(',')[0] or entry[0]
 410         if not _userfullname:
 411             _userfullname = 'user%d' % uid
 412     return _userfullname
 413
 414
 415 _hostname = None
 416 def hostname():
 417     """Get the FQDN of this machine."""
 418     global _hostname
 419     if not _hostname:
 420         _hostname = socket.getfqdn()
 421     return _hostname
 422
 423
 424 _resource_path = None
 425 def resource_path(subdir=''):
 426     global _resource_path
 427     if not _resource_path:
 428         _resource_path = os.environ.get('BUP_RESOURCE_PATH') or '.'
 429     return os.path.join(_resource_path, subdir)
 430
 431 def format_filesize(size):
 432     unit = 1024.0
 433     size = float(size)
 434     if size < unit:
 435         return "%d" % (size)
 436     exponent = int(math.log(size) / math.log(unit))
 437     size_prefix = "KMGTPE"[exponent - 1]
 438     return "%.1f%s" % (size / math.pow(unit, exponent), size_prefix)
 439
 440
 441 class NotOk(Exception):
 442     pass
 443
 444
 445 class BaseConn:
 446     def __init__(self, outp):
 447         self.outp = outp
 448
 449     def close(self):
 450         while self._read(65536): pass
 451
 452     def read(self, size):
 453         """Read 'size' bytes from input stream."""
 454         self.outp.flush()
 455         return self._read(size)
 456
 457     def readline(self):
 458         """Read from input stream until a newline is found."""
 459         self.outp.flush()
 460         return self._readline()
 461
 462     def write(self, data):
 463         """Write 'data' to output stream."""
 464         #log('%d writing: %d bytes\n' % (os.getpid(), len(data)))
 465         self.outp.write(data)
 466
 467     def has_input(self):
 468         """Return true if input stream is readable."""
 469         raise NotImplemented("Subclasses must implement has_input")
 470
 471     def ok(self):
 472         """Indicate end of output from last sent command."""
 473         self.write('\nok\n')
 474
 475     def error(self, s):
 476         """Indicate server error to the client."""
 477         s = re.sub(r'\s+', ' ', str(s))
 478         self.write('\nerror %s\n' % s)
 479
 480     def _check_ok(self, onempty):
 481         self.outp.flush()
 482         rl = ''
 483         for rl in linereader(self):
 484             #log('%d got line: %r\n' % (os.getpid(), rl))
 485             if not rl:  # empty line
 486                 continue
 487             elif rl == 'ok':
 488                 return None
 489             elif rl.startswith('error '):
 490                 #log('client: error: %s\n' % rl[6:])
 491                 return NotOk(rl[6:])
 492             else:
 493                 onempty(rl)
 494         raise Exception('server exited unexpectedly; see errors above')
 495
 496     def drain_and_check_ok(self):
 497         """Remove all data for the current command from input stream."""
 498         def onempty(rl):
 499             pass
 500         return self._check_ok(onempty)
 501
 502     def check_ok(self):
 503         """Verify that server action completed successfully."""
 504         def onempty(rl):
 505             raise Exception('expected "ok", got %r' % rl)
 506         return self._check_ok(onempty)
 507
 508
 509 class Conn(BaseConn):
 510     def __init__(self, inp, outp):
 511         BaseConn.__init__(self, outp)
 512         self.inp = inp
 513
 514     def _read(self, size):
 515         return self.inp.read(size)
 516
 517     def _readline(self):
 518         return self.inp.readline()
 519
 520     def has_input(self):
 521         [rl, wl, xl] = select.select([self.inp.fileno()], [], [], 0)
 522         if rl:
 523             assert(rl[0] == self.inp.fileno())
 524             return True
 525         else:
 526             return None
 527
 528
 529 def checked_reader(fd, n):
 530     while n > 0:
 531         rl, _, _ = select.select([fd], [], [])
 532         assert(rl[0] == fd)
 533         buf = os.read(fd, n)
 534         if not buf: raise Exception("Unexpected EOF reading %d more bytes" % n)
 535         yield buf
 536         n -= len(buf)
 537
 538
 539 MAX_PACKET = 128 * 1024
 540 def mux(p, outfd, outr, errr):
 541     try:
 542         fds = [outr, errr]
 543         while p.poll() is None:
 544             rl, _, _ = select.select(fds, [], [])
 545             for fd in rl:
 546                 if fd == outr:
 547                     buf = os.read(outr, MAX_PACKET)
 548                     if not buf: break
 549                     os.write(outfd, struct.pack('!IB', len(buf), 1) + buf)
 550                 elif fd == errr:
 551                     buf = os.read(errr, 1024)
 552                     if not buf: break
 553                     os.write(outfd, struct.pack('!IB', len(buf), 2) + buf)
 554     finally:
 555         os.write(outfd, struct.pack('!IB', 0, 3))
 556
 557
 558 class DemuxConn(BaseConn):
 559     """A helper class for bup's client-server protocol."""
 560     def __init__(self, infd, outp):
 561         BaseConn.__init__(self, outp)
 562         # Anything that comes through before the sync string was not
 563         # multiplexed and can be assumed to be debug/log before mux init.
 564         tail = ''
 565         while tail != 'BUPMUX':
 566             b = os.read(infd, (len(tail) < 6) and (6-len(tail)) or 1)
 567             if not b:
 568                 raise IOError('demux: unexpected EOF during initialization')
 569             tail += b
 570             sys.stderr.write(tail[:-6])  # pre-mux log messages
 571             tail = tail[-6:]
 572         self.infd = infd
 573         self.reader = None
 574         self.buf = None
 575         self.closed = False
 576
 577     def write(self, data):
 578         self._load_buf(0)
 579         BaseConn.write(self, data)
 580
 581     def _next_packet(self, timeout):
 582         if self.closed: return False
 583         rl, wl, xl = select.select([self.infd], [], [], timeout)
 584         if not rl: return False
 585         assert(rl[0] == self.infd)
 586         ns = ''.join(checked_reader(self.infd, 5))
 587         n, fdw = struct.unpack('!IB', ns)
 588         assert(n <= MAX_PACKET)
 589         if fdw == 1:
 590             self.reader = checked_reader(self.infd, n)
 591         elif fdw == 2:
 592             for buf in checked_reader(self.infd, n):
 593                 sys.stderr.write(buf)
 594         elif fdw == 3:
 595             self.closed = True
 596             debug2("DemuxConn: marked closed\n")
 597         return True
 598
 599     def _load_buf(self, timeout):
 600         if self.buf is not None:
 601             return True
 602         while not self.closed:
 603             while not self.reader:
 604                 if not self._next_packet(timeout):
 605                     return False
 606             try:
 607                 self.buf = next(self.reader)
 608                 return True
 609             except StopIteration:
 610                 self.reader = None
 611         return False
 612
 613     def _read_parts(self, ix_fn):
 614         while self._load_buf(None):
 615             assert(self.buf is not None)
 616             i = ix_fn(self.buf)
 617             if i is None or i == len(self.buf):
 618                 yv = self.buf
 619                 self.buf = None
 620             else:
 621                 yv = self.buf[:i]
 622                 self.buf = self.buf[i:]
 623             yield yv
 624             if i is not None:
 625                 break
 626
 627     def _readline(self):
 628         def find_eol(buf):
 629             try:
 630                 return buf.index('\n')+1
 631             except ValueError:
 632                 return None
 633         return ''.join(self._read_parts(find_eol))
 634
 635     def _read(self, size):
 636         csize = [size]
 637         def until_size(buf): # Closes on csize
 638             if len(buf) < csize[0]:
 639                 csize[0] -= len(buf)
 640                 return None
 641             else:
 642                 return csize[0]
 643         return ''.join(self._read_parts(until_size))
 644
 645     def has_input(self):
 646         return self._load_buf(0)
 647
 648
 649 def linereader(f):
 650     """Generate a list of input lines from 'f' without terminating newlines."""
 651     while 1:
 652         line = f.readline()
 653         if not line:
 654             break
 655         yield line[:-1]
 656
 657
 658 def chunkyreader(f, count = None):
 659     """Generate a list of chunks of data read from 'f'.
 660
 661     If count is None, read until EOF is reached.
 662
 663     If count is a positive integer, read 'count' bytes from 'f'. If EOF is
 664     reached while reading, raise IOError.
 665     """
 666     if count != None:
 667         while count > 0:
 668             b = f.read(min(count, 65536))
 669             if not b:
 670                 raise IOError('EOF with %d bytes remaining' % count)
 671             yield b
 672             count -= len(b)
 673     else:
 674         while 1:
 675             b = f.read(65536)
 676             if not b: break
 677             yield b
 678
 679
 680 @contextmanager
 681 def atomically_replaced_file(name, mode='w', buffering=-1):
 682     """Yield a file that will be atomically renamed name when leaving the block.
 683
 684     This contextmanager yields an open file object that is backed by a
 685     temporary file which will be renamed (atomically) to the target
 686     name if everything succeeds.
 687
 688     The mode and buffering arguments are handled exactly as with open,
 689     and the yielded file will have very restrictive permissions, as
 690     per mkstemp.
 691
 692     E.g.::
 693
 694         with atomically_replaced_file('foo.txt', 'w') as f:
 695             f.write('hello jack.')
 696
 697     """
 698
 699     (ffd, tempname) = tempfile.mkstemp(dir=os.path.dirname(name),
 700                                        text=('b' not in mode))
 701     try:
 702         try:
 703             f = os.fdopen(ffd, mode, buffering)
 704         except:
 705             os.close(ffd)
 706             raise
 707         try:
 708             yield f
 709         finally:
 710             f.close()
 711         os.rename(tempname, name)
 712     finally:
 713         unlink(tempname)  # nonexistant file is ignored
 714
 715
 716 def slashappend(s):
 717     """Append "/" to 's' if it doesn't aleady end in "/"."""
 718     if s and not s.endswith('/'):
 719         return s + '/'
 720     else:
 721         return s
 722
 723
 724 def _mmap_do(f, sz, flags, prot, close):
 725     if not sz:
 726         st = os.fstat(f.fileno())
 727         sz = st.st_size
 728     if not sz:
 729         # trying to open a zero-length map gives an error, but an empty
 730         # string has all the same behaviour of a zero-length map, ie. it has
 731         # no elements :)
 732         return ''
 733     map = mmap.mmap(f.fileno(), sz, flags, prot)
 734     if close:
 735         f.close()  # map will persist beyond file close
 736     return map
 737
 738
 739 def mmap_read(f, sz = 0, close=True):
 740     """Create a read-only memory mapped region on file 'f'.
 741     If sz is 0, the region will cover the entire file.
 742     """
 743     return _mmap_do(f, sz, mmap.MAP_PRIVATE, mmap.PROT_READ, close)
 744
 745
 746 def mmap_readwrite(f, sz = 0, close=True):
 747     """Create a read-write memory mapped region on file 'f'.
 748     If sz is 0, the region will cover the entire file.
 749     """
 750     return _mmap_do(f, sz, mmap.MAP_SHARED, mmap.PROT_READ|mmap.PROT_WRITE,
 751                     close)
 752
 753
 754 def mmap_readwrite_private(f, sz = 0, close=True):
 755     """Create a read-write memory mapped region on file 'f'.
 756     If sz is 0, the region will cover the entire file.
 757     The map is private, which means the changes are never flushed back to the
 758     file.
 759     """
 760     return _mmap_do(f, sz, mmap.MAP_PRIVATE, mmap.PROT_READ|mmap.PROT_WRITE,
 761                     close)
 762
 763
 764 _mincore = getattr(_helpers, 'mincore', None)
 765 if _mincore:
 766     # ./configure ensures that we're on Linux if MINCORE_INCORE isn't defined.
 767     MINCORE_INCORE = getattr(_helpers, 'MINCORE_INCORE', 1)
 768
 769     _fmincore_chunk_size = None
 770     def _set_fmincore_chunk_size():
 771         global _fmincore_chunk_size
 772         pref_chunk_size = 64 * 1024 * 1024
 773         chunk_size = sc_page_size
 774         if (sc_page_size < pref_chunk_size):
 775             chunk_size = sc_page_size * (pref_chunk_size / sc_page_size)
 776         _fmincore_chunk_size = chunk_size
 777
 778     def fmincore(fd):
 779         """Return the mincore() data for fd as a bytearray whose values can be
 780         tested via MINCORE_INCORE, or None if fd does not fully
 781         support the operation."""
 782         st = os.fstat(fd)
 783         if (st.st_size == 0):
 784             return bytearray(0)
 785         if not _fmincore_chunk_size:
 786             _set_fmincore_chunk_size()
 787         pages_per_chunk = _fmincore_chunk_size / sc_page_size;
 788         page_count = (st.st_size + sc_page_size - 1) / sc_page_size;
 789         chunk_count = page_count / _fmincore_chunk_size
 790         if chunk_count < 1:
 791             chunk_count = 1
 792         result = bytearray(page_count)
 793         for ci in xrange(chunk_count):
 794             pos = _fmincore_chunk_size * ci;
 795             msize = min(_fmincore_chunk_size, st.st_size - pos)
 796             try:
 797                 m = mmap.mmap(fd, msize, mmap.MAP_PRIVATE, 0, 0, pos)
 798             except mmap.error as ex:
 799                 if ex.errno == errno.EINVAL or ex.errno == errno.ENODEV:
 800                     # Perhaps the file was a pipe, i.e. "... | bup split ..."
 801                     return None
 802                 raise ex
 803             _mincore(m, msize, 0, result, ci * pages_per_chunk);
 804         return result
 805
 806
 807 def parse_timestamp(epoch_str):
 808     """Return the number of nanoseconds since the epoch that are described
 809 by epoch_str (100ms, 100ns, ...); when epoch_str cannot be parsed,
 810 throw a ValueError that may contain additional information."""
 811     ns_per = {'s' :  1000000000,
 812               'ms' : 1000000,
 813               'us' : 1000,
 814               'ns' : 1}
 815     match = re.match(r'^((?:[-+]?[0-9]+)?)(s|ms|us|ns)$', epoch_str)
 816     if not match:
 817         if re.match(r'^([-+]?[0-9]+)$', epoch_str):
 818             raise ValueError('must include units, i.e. 100ns, 100ms, ...')
 819         raise ValueError()
 820     (n, units) = match.group(1, 2)
 821     if not n:
 822         n = 1
 823     n = int(n)
 824     return n * ns_per[units]
 825
 826
 827 def parse_num(s):
 828     """Parse data size information into a float number.
 829
 830     Here are some examples of conversions:
 831         199.2k means 203981 bytes
 832         1GB means 1073741824 bytes
 833         2.1 tb means 2199023255552 bytes
 834     """
 835     g = re.match(r'([-+\d.e]+)\s*(\w*)', str(s))
 836     if not g:
 837         raise ValueError("can't parse %r as a number" % s)
 838     (val, unit) = g.groups()
 839     num = float(val)
 840     unit = unit.lower()
 841     if unit in ['t', 'tb']:
 842         mult = 1024*1024*1024*1024
 843     elif unit in ['g', 'gb']:
 844         mult = 1024*1024*1024
 845     elif unit in ['m', 'mb']:
 846         mult = 1024*1024
 847     elif unit in ['k', 'kb']:
 848         mult = 1024
 849     elif unit in ['', 'b']:
 850         mult = 1
 851     else:
 852         raise ValueError("invalid unit %r in number %r" % (unit, s))
 853     return int(num*mult)
 854
 855
 856 def count(l):
 857     """Count the number of elements in an iterator. (consumes the iterator)"""
 858     return reduce(lambda x,y: x+1, l)
 859
 860
 861 saved_errors = []
 862 def add_error(e):
 863     """Append an error message to the list of saved errors.
 864
 865     Once processing is able to stop and output the errors, the saved errors are
 866     accessible in the module variable helpers.saved_errors.
 867     """
 868     saved_errors.append(e)
 869     log('%-70s\n' % e)
 870
 871
 872 def clear_errors():
 873     global saved_errors
 874     saved_errors = []
 875
 876
 877 def die_if_errors(msg=None, status=1):
 878     global saved_errors
 879     if saved_errors:
 880         if not msg:
 881             msg = 'warning: %d errors encountered\n' % len(saved_errors)
 882         log(msg)
 883         sys.exit(status)
 884
 885
 886 def handle_ctrl_c():
 887     """Replace the default exception handler for KeyboardInterrupt (Ctrl-C).
 888
 889     The new exception handler will make sure that bup will exit without an ugly
 890     stacktrace when Ctrl-C is hit.
 891     """
 892     oldhook = sys.excepthook
 893     def newhook(exctype, value, traceback):
 894         if exctype == KeyboardInterrupt:
 895             log('\nInterrupted.\n')
 896         else:
 897             return oldhook(exctype, value, traceback)
 898     sys.excepthook = newhook
 899
 900
 901 def columnate(l, prefix):
 902     """Format elements of 'l' in columns with 'prefix' leading each line.
 903
 904     The number of columns is determined automatically based on the string
 905     lengths.
 906     """
 907     if not l:
 908         return ""
 909     l = l[:]
 910     clen = max(len(s) for s in l)
 911     ncols = (tty_width() - len(prefix)) / (clen + 2)
 912     if ncols <= 1:
 913         ncols = 1
 914         clen = 0
 915     cols = []
 916     while len(l) % ncols:
 917         l.append('')
 918     rows = len(l)/ncols
 919     for s in range(0, len(l), rows):
 920         cols.append(l[s:s+rows])
 921     out = ''
 922     for row in zip(*cols):
 923         out += prefix + ''.join(('%-*s' % (clen+2, s)) for s in row) + '\n'
 924     return out
 925
 926
 927 def parse_date_or_fatal(str, fatal):
 928     """Parses the given date or calls Option.fatal().
 929     For now we expect a string that contains a float."""
 930     try:
 931         date = float(str)
 932     except ValueError as e:
 933         raise fatal('invalid date format (should be a float): %r' % e)
 934     else:
 935         return date
 936
 937
 938 def parse_excludes(options, fatal):
 939     """Traverse the options and extract all excludes, or call Option.fatal()."""
 940     excluded_paths = []
 941
 942     for flag in options:
 943         (option, parameter) = flag
 944         if option == '--exclude':
 945             excluded_paths.append(resolve_parent(parameter))
 946         elif option == '--exclude-from':
 947             try:
 948                 f = open(resolve_parent(parameter))
 949             except IOError as e:
 950                 raise fatal("couldn't read %s" % parameter)
 951             for exclude_path in f.readlines():
 952                 # FIXME: perhaps this should be rstrip('\n')
 953                 exclude_path = resolve_parent(exclude_path.strip())
 954                 if exclude_path:
 955                     excluded_paths.append(exclude_path)
 956     return sorted(frozenset(excluded_paths))
 957
 958
 959 def parse_rx_excludes(options, fatal):
 960     """Traverse the options and extract all rx excludes, or call
 961     Option.fatal()."""
 962     excluded_patterns = []
 963
 964     for flag in options:
 965         (option, parameter) = flag
 966         if option == '--exclude-rx':
 967             try:
 968                 excluded_patterns.append(re.compile(parameter))
 969             except re.error as ex:
 970                 fatal('invalid --exclude-rx pattern (%s): %s' % (parameter, ex))
 971         elif option == '--exclude-rx-from':
 972             try:
 973                 f = open(resolve_parent(parameter))
 974             except IOError as e:
 975                 raise fatal("couldn't read %s" % parameter)
 976             for pattern in f.readlines():
 977                 spattern = pattern.rstrip('\n')
 978                 if not spattern:
 979                     continue
 980                 try:
 981                     excluded_patterns.append(re.compile(spattern))
 982                 except re.error as ex:
 983                     fatal('invalid --exclude-rx pattern (%s): %s' % (spattern, ex))
 984     return excluded_patterns
 985
 986
 987 def should_rx_exclude_path(path, exclude_rxs):
 988     """Return True if path matches a regular expression in exclude_rxs."""
 989     for rx in exclude_rxs:
 990         if rx.search(path):
 991             debug1('Skipping %r: excluded by rx pattern %r.\n'
 992                    % (path, rx.pattern))
 993             return True
 994     return False
 995
 996
 997 # FIXME: Carefully consider the use of functions (os.path.*, etc.)
 998 # that resolve against the current filesystem in the strip/graft
 999 # functions for example, but elsewhere as well.  I suspect bup's not
1000 # always being careful about that.  For some cases, the contents of
1001 # the current filesystem should be irrelevant, and consulting it might
1002 # produce the wrong result, perhaps via unintended symlink resolution,
1003 # for example.
1004
1005 def path_components(path):
1006     """Break path into a list of pairs of the form (name,
1007     full_path_to_name).  Path must start with '/'.
1008     Example:
1009       '/home/foo' -> [('', '/'), ('home', '/home'), ('foo', '/home/foo')]"""
1010     if not path.startswith('/'):
1011         raise Exception, 'path must start with "/": %s' % path
1012     # Since we assume path startswith('/'), we can skip the first element.
1013     result = [('', '/')]
1014     norm_path = os.path.abspath(path)
1015     if norm_path == '/':
1016         return result
1017     full_path = ''
1018     for p in norm_path.split('/')[1:]:
1019         full_path += '/' + p
1020         result.append((p, full_path))
1021     return result
1022
1023
1024 def stripped_path_components(path, strip_prefixes):
1025     """Strip any prefix in strip_prefixes from path and return a list
1026     of path components where each component is (name,
1027     none_or_full_fs_path_to_name).  Assume path startswith('/').
1028     See thelpers.py for examples."""
1029     normalized_path = os.path.abspath(path)
1030     sorted_strip_prefixes = sorted(strip_prefixes, key=len, reverse=True)
1031     for bp in sorted_strip_prefixes:
1032         normalized_bp = os.path.abspath(bp)
1033         if normalized_bp == '/':
1034             continue
1035         if normalized_path.startswith(normalized_bp):
1036             prefix = normalized_path[:len(normalized_bp)]
1037             result = []
1038             for p in normalized_path[len(normalized_bp):].split('/'):
1039                 if p: # not root
1040                     prefix += '/'
1041                 prefix += p
1042                 result.append((p, prefix))
1043             return result
1044     # Nothing to strip.
1045     return path_components(path)
1046
1047
1048 def grafted_path_components(graft_points, path):
1049     # Create a result that consists of some number of faked graft
1050     # directories before the graft point, followed by all of the real
1051     # directories from path that are after the graft point.  Arrange
1052     # for the directory at the graft point in the result to correspond
1053     # to the "orig" directory in --graft orig=new.  See t/thelpers.py
1054     # for some examples.
1055
1056     # Note that given --graft orig=new, orig and new have *nothing* to
1057     # do with each other, even if some of their component names
1058     # match. i.e. --graft /foo/bar/baz=/foo/bar/bax is semantically
1059     # equivalent to --graft /foo/bar/baz=/x/y/z, or even
1060     # /foo/bar/baz=/x.
1061
1062     # FIXME: This can't be the best solution...
1063     clean_path = os.path.abspath(path)
1064     for graft_point in graft_points:
1065         old_prefix, new_prefix = graft_point
1066         # Expand prefixes iff not absolute paths.
1067         old_prefix = os.path.normpath(old_prefix)
1068         new_prefix = os.path.normpath(new_prefix)
1069         if clean_path.startswith(old_prefix):
1070             escaped_prefix = re.escape(old_prefix)
1071             grafted_path = re.sub(r'^' + escaped_prefix, new_prefix, clean_path)
1072             # Handle /foo=/ (at least) -- which produces //whatever.
1073             grafted_path = '/' + grafted_path.lstrip('/')
1074             clean_path_components = path_components(clean_path)
1075             # Count the components that were stripped.
1076             strip_count = 0 if old_prefix == '/' else old_prefix.count('/')
1077             new_prefix_parts = new_prefix.split('/')
1078             result_prefix = grafted_path.split('/')[:new_prefix.count('/')]
1079             result = [(p, None) for p in result_prefix] \
1080                 + clean_path_components[strip_count:]
1081             # Now set the graft point name to match the end of new_prefix.
1082             graft_point = len(result_prefix)
1083             result[graft_point] = \
1084                 (new_prefix_parts[-1], clean_path_components[strip_count][1])
1085             if new_prefix == '/': # --graft ...=/ is a special case.
1086                 return result[1:]
1087             return result
1088     return path_components(clean_path)
1089
1090
1091 Sha1 = hashlib.sha1
1092
1093
1094 _localtime = getattr(_helpers, 'localtime', None)
1095
1096 if _localtime:
1097     bup_time = namedtuple('bup_time', ['tm_year', 'tm_mon', 'tm_mday',
1098                                        'tm_hour', 'tm_min', 'tm_sec',
1099                                        'tm_wday', 'tm_yday',
1100                                        'tm_isdst', 'tm_gmtoff', 'tm_zone'])
1101
1102 # Define a localtime() that returns bup_time when possible.  Note:
1103 # this means that any helpers.localtime() results may need to be
1104 # passed through to_py_time() before being passed to python's time
1105 # module, which doesn't appear willing to ignore the extra items.
1106 if _localtime:
1107     def localtime(time):
1108         return bup_time(*_helpers.localtime(time))
1109     def utc_offset_str(t):
1110         """Return the local offset from UTC as "+hhmm" or "-hhmm" for time t.
1111         If the current UTC offset does not represent an integer number
1112         of minutes, the fractional component will be truncated."""
1113         off = localtime(t).tm_gmtoff
1114         # Note: // doesn't truncate like C for negative values, it rounds down.
1115         offmin = abs(off) // 60
1116         m = offmin % 60
1117         h = (offmin - m) // 60
1118         return "%+03d%02d" % (-h if off < 0 else h, m)
1119     def to_py_time(x):
1120         if isinstance(x, time.struct_time):
1121             return x
1122         return time.struct_time(x[:9])
1123 else:
1124     localtime = time.localtime
1125     def utc_offset_str(t):
1126         return time.strftime('%z', localtime(t))
1127     def to_py_time(x):
1128         return x
1129
1130
1131 _some_invalid_save_parts_rx = re.compile(r'[[ ~^:?*\\]|\.\.|//|@{')
1132
1133 def valid_save_name(name):
1134     # Enforce a superset of the restrictions in git-check-ref-format(1)
1135     if name == '@' \
1136        or name.startswith('/') or name.endswith('/') \
1137        or name.endswith('.'):
1138         return False
1139     if _some_invalid_save_parts_rx.search(name):
1140         return False
1141     for c in name:
1142         if ord(c) < 0x20 or ord(c) == 0x7f:
1143             return False
1144     for part in name.split('/'):
1145         if part.startswith('.') or part.endswith('.lock'):
1146             return False
1147     return True
1148
1149
1150 _period_rx = re.compile(r'^([0-9]+)(s|min|h|d|w|m|y)$')
1151
1152 def period_as_secs(s):
1153     if s == 'forever':
1154         return float('inf')
1155     match = _period_rx.match(s)
1156     if not match:
1157         return None
1158     mag = int(match.group(1))
1159     scale = match.group(2)
1160     return mag * {'s': 1,
1161                   'min': 60,
1162                   'h': 60 * 60,
1163                   'd': 60 * 60 * 24,
1164                   'w': 60 * 60 * 24 * 7,
1165                   'm': 60 * 60 * 24 * 31,
1166                   'y': 60 * 60 * 24 * 366}[scale]