lib/bup/helpers.py

   1 """Helper functions and classes for bup."""
   2
   3 from collections import namedtuple
   4 from ctypes import sizeof, c_void_p
   5 from os import environ
   6 from contextlib import contextmanager
   7 import sys, os, pwd, subprocess, errno, socket, select, mmap, stat, re, struct
   8 import hashlib, heapq, math, operator, time, grp, tempfile
   9
  10 from bup import _helpers
  11
  12
  13 class Nonlocal:
  14     """Helper to deal with Python scoping issues"""
  15     pass
  16
  17
  18 sc_page_size = os.sysconf('SC_PAGE_SIZE')
  19 assert(sc_page_size > 0)
  20
  21 sc_arg_max = os.sysconf('SC_ARG_MAX')
  22 if sc_arg_max == -1:  # "no definite limit" - let's choose 2M
  23     sc_arg_max = 2 * 1024 * 1024
  24
  25 # This function should really be in helpers, not in bup.options.  But we
  26 # want options.py to be standalone so people can include it in other projects.
  27 from bup.options import _tty_width
  28 tty_width = _tty_width
  29
  30
  31 def atoi(s):
  32     """Convert the string 's' to an integer. Return 0 if s is not a number."""
  33     try:
  34         return int(s or '0')
  35     except ValueError:
  36         return 0
  37
  38
  39 def atof(s):
  40     """Convert the string 's' to a float. Return 0 if s is not a number."""
  41     try:
  42         return float(s or '0')
  43     except ValueError:
  44         return 0
  45
  46
  47 buglvl = atoi(os.environ.get('BUP_DEBUG', 0))
  48
  49
  50 try:
  51     _fdatasync = os.fdatasync
  52 except AttributeError:
  53     _fdatasync = os.fsync
  54
  55 if sys.platform.startswith('darwin'):
  56     # Apparently os.fsync on OS X doesn't guarantee to sync all the way down
  57     import fcntl
  58     def fdatasync(fd):
  59         try:
  60             return fcntl.fcntl(fd, fcntl.F_FULLFSYNC)
  61         except IOError as e:
  62             # Fallback for file systems (SMB) that do not support F_FULLFSYNC
  63             if e.errno == errno.ENOTSUP:
  64                 return _fdatasync(fd)
  65             else:
  66                 raise
  67 else:
  68     fdatasync = _fdatasync
  69
  70
  71 def partition(predicate, stream):
  72     """Returns (leading_matches_it, rest_it), where leading_matches_it
  73     must be completely exhausted before traversing rest_it.
  74
  75     """
  76     stream = iter(stream)
  77     ns = Nonlocal()
  78     ns.first_nonmatch = None
  79     def leading_matches():
  80         for x in stream:
  81             if predicate(x):
  82                 yield x
  83             else:
  84                 ns.first_nonmatch = (x,)
  85                 break
  86     def rest():
  87         if ns.first_nonmatch:
  88             yield ns.first_nonmatch[0]
  89             for x in stream:
  90                 yield x
  91     return (leading_matches(), rest())
  92
  93
  94 # Write (blockingly) to sockets that may or may not be in blocking mode.
  95 # We need this because our stderr is sometimes eaten by subprocesses
  96 # (probably ssh) that sometimes make it nonblocking, if only temporarily,
  97 # leading to race conditions.  Ick.  We'll do it the hard way.
  98 def _hard_write(fd, buf):
  99     while buf:
 100         (r,w,x) = select.select([], [fd], [], None)
 101         if not w:
 102             raise IOError('select(fd) returned without being writable')
 103         try:
 104             sz = os.write(fd, buf)
 105         except OSError as e:
 106             if e.errno != errno.EAGAIN:
 107                 raise
 108         assert(sz >= 0)
 109         buf = buf[sz:]
 110
 111
 112 _last_prog = 0
 113 def log(s):
 114     """Print a log message to stderr."""
 115     global _last_prog
 116     sys.stdout.flush()
 117     _hard_write(sys.stderr.fileno(), s)
 118     _last_prog = 0
 119
 120
 121 def debug1(s):
 122     if buglvl >= 1:
 123         log(s)
 124
 125
 126 def debug2(s):
 127     if buglvl >= 2:
 128         log(s)
 129
 130
 131 istty1 = os.isatty(1) or (atoi(os.environ.get('BUP_FORCE_TTY')) & 1)
 132 istty2 = os.isatty(2) or (atoi(os.environ.get('BUP_FORCE_TTY')) & 2)
 133 _last_progress = ''
 134 def progress(s):
 135     """Calls log() if stderr is a TTY.  Does nothing otherwise."""
 136     global _last_progress
 137     if istty2:
 138         log(s)
 139         _last_progress = s
 140
 141
 142 def qprogress(s):
 143     """Calls progress() only if we haven't printed progress in a while.
 144
 145     This avoids overloading the stderr buffer with excess junk.
 146     """
 147     global _last_prog
 148     now = time.time()
 149     if now - _last_prog > 0.1:
 150         progress(s)
 151         _last_prog = now
 152
 153
 154 def reprogress():
 155     """Calls progress() to redisplay the most recent progress message.
 156
 157     Useful after you've printed some other message that wipes out the
 158     progress line.
 159     """
 160     if _last_progress and _last_progress.endswith('\r'):
 161         progress(_last_progress)
 162
 163
 164 def mkdirp(d, mode=None):
 165     """Recursively create directories on path 'd'.
 166
 167     Unlike os.makedirs(), it doesn't raise an exception if the last element of
 168     the path already exists.
 169     """
 170     try:
 171         if mode:
 172             os.makedirs(d, mode)
 173         else:
 174             os.makedirs(d)
 175     except OSError as e:
 176         if e.errno == errno.EEXIST:
 177             pass
 178         else:
 179             raise
 180
 181
 182 _unspecified_next_default = object()
 183
 184 def _fallback_next(it, default=_unspecified_next_default):
 185     """Retrieve the next item from the iterator by calling its
 186     next() method. If default is given, it is returned if the
 187     iterator is exhausted, otherwise StopIteration is raised."""
 188
 189     if default is _unspecified_next_default:
 190         return it.next()
 191     else:
 192         try:
 193             return it.next()
 194         except StopIteration:
 195             return default
 196
 197 if sys.version_info < (2, 6):
 198     next =  _fallback_next
 199
 200
 201 def merge_iter(iters, pfreq, pfunc, pfinal, key=None):
 202     if key:
 203         samekey = lambda e, pe: getattr(e, key) == getattr(pe, key, None)
 204     else:
 205         samekey = operator.eq
 206     count = 0
 207     total = sum(len(it) for it in iters)
 208     iters = (iter(it) for it in iters)
 209     heap = ((next(it, None),it) for it in iters)
 210     heap = [(e,it) for e,it in heap if e]
 211
 212     heapq.heapify(heap)
 213     pe = None
 214     while heap:
 215         if not count % pfreq:
 216             pfunc(count, total)
 217         e, it = heap[0]
 218         if not samekey(e, pe):
 219             pe = e
 220             yield e
 221         count += 1
 222         try:
 223             e = it.next() # Don't use next() function, it's too expensive
 224         except StopIteration:
 225             heapq.heappop(heap) # remove current
 226         else:
 227             heapq.heapreplace(heap, (e, it)) # shift current to new location
 228     pfinal(count, total)
 229
 230
 231 def unlink(f):
 232     """Delete a file at path 'f' if it currently exists.
 233
 234     Unlike os.unlink(), does not throw an exception if the file didn't already
 235     exist.
 236     """
 237     try:
 238         os.unlink(f)
 239     except OSError as e:
 240         if e.errno != errno.ENOENT:
 241             raise
 242
 243
 244 def readpipe(argv, preexec_fn=None, shell=False):
 245     """Run a subprocess and return its output."""
 246     p = subprocess.Popen(argv, stdout=subprocess.PIPE, preexec_fn=preexec_fn,
 247                          shell=shell)
 248     out, err = p.communicate()
 249     if p.returncode != 0:
 250         raise Exception('subprocess %r failed with status %d'
 251                         % (' '.join(argv), p.returncode))
 252     return out
 253
 254
 255 def _argmax_base(command):
 256     base_size = 2048
 257     for c in command:
 258         base_size += len(command) + 1
 259     for k, v in environ.iteritems():
 260         base_size += len(k) + len(v) + 2 + sizeof(c_void_p)
 261     return base_size
 262
 263
 264 def _argmax_args_size(args):
 265     return sum(len(x) + 1 + sizeof(c_void_p) for x in args)
 266
 267
 268 def batchpipe(command, args, preexec_fn=None, arg_max=sc_arg_max):
 269     """If args is not empty, yield the output produced by calling the
 270 command list with args as a sequence of strings (It may be necessary
 271 to return multiple strings in order to respect ARG_MAX)."""
 272     # The optional arg_max arg is a workaround for an issue with the
 273     # current wvtest behavior.
 274     base_size = _argmax_base(command)
 275     while args:
 276         room = arg_max - base_size
 277         i = 0
 278         while i < len(args):
 279             next_size = _argmax_args_size(args[i:i+1])
 280             if room - next_size < 0:
 281                 break
 282             room -= next_size
 283             i += 1
 284         sub_args = args[:i]
 285         args = args[i:]
 286         assert(len(sub_args))
 287         yield readpipe(command + sub_args, preexec_fn=preexec_fn)
 288
 289
 290 def resolve_parent(p):
 291     """Return the absolute path of a file without following any final symlink.
 292
 293     Behaves like os.path.realpath, but doesn't follow a symlink for the last
 294     element. (ie. if 'p' itself is a symlink, this one won't follow it, but it
 295     will follow symlinks in p's directory)
 296     """
 297     try:
 298         st = os.lstat(p)
 299     except OSError:
 300         st = None
 301     if st and stat.S_ISLNK(st.st_mode):
 302         (dir, name) = os.path.split(p)
 303         dir = os.path.realpath(dir)
 304         out = os.path.join(dir, name)
 305     else:
 306         out = os.path.realpath(p)
 307     #log('realpathing:%r,%r\n' % (p, out))
 308     return out
 309
 310
 311 def detect_fakeroot():
 312     "Return True if we appear to be running under fakeroot."
 313     return os.getenv("FAKEROOTKEY") != None
 314
 315
 316 _warned_about_superuser_detection = None
 317 def is_superuser():
 318     if sys.platform.startswith('cygwin'):
 319         if sys.getwindowsversion()[0] > 5:
 320             # Sounds like situation is much more complicated here
 321             global _warned_about_superuser_detection
 322             if not _warned_about_superuser_detection:
 323                 log("can't detect root status for OS version > 5; assuming not root")
 324                 _warned_about_superuser_detection = True
 325             return False
 326         import ctypes
 327         return ctypes.cdll.shell32.IsUserAnAdmin()
 328     else:
 329         return os.geteuid() == 0
 330
 331
 332 def _cache_key_value(get_value, key, cache):
 333     """Return (value, was_cached).  If there is a value in the cache
 334     for key, use that, otherwise, call get_value(key) which should
 335     throw a KeyError if there is no value -- in which case the cached
 336     and returned value will be None.
 337     """
 338     try: # Do we already have it (or know there wasn't one)?
 339         value = cache[key]
 340         return value, True
 341     except KeyError:
 342         pass
 343     value = None
 344     try:
 345         cache[key] = value = get_value(key)
 346     except KeyError:
 347         cache[key] = None
 348     return value, False
 349
 350
 351 _uid_to_pwd_cache = {}
 352 _name_to_pwd_cache = {}
 353
 354 def pwd_from_uid(uid):
 355     """Return password database entry for uid (may be a cached value).
 356     Return None if no entry is found.
 357     """
 358     global _uid_to_pwd_cache, _name_to_pwd_cache
 359     entry, cached = _cache_key_value(pwd.getpwuid, uid, _uid_to_pwd_cache)
 360     if entry and not cached:
 361         _name_to_pwd_cache[entry.pw_name] = entry
 362     return entry
 363
 364
 365 def pwd_from_name(name):
 366     """Return password database entry for name (may be a cached value).
 367     Return None if no entry is found.
 368     """
 369     global _uid_to_pwd_cache, _name_to_pwd_cache
 370     entry, cached = _cache_key_value(pwd.getpwnam, name, _name_to_pwd_cache)
 371     if entry and not cached:
 372         _uid_to_pwd_cache[entry.pw_uid] = entry
 373     return entry
 374
 375
 376 _gid_to_grp_cache = {}
 377 _name_to_grp_cache = {}
 378
 379 def grp_from_gid(gid):
 380     """Return password database entry for gid (may be a cached value).
 381     Return None if no entry is found.
 382     """
 383     global _gid_to_grp_cache, _name_to_grp_cache
 384     entry, cached = _cache_key_value(grp.getgrgid, gid, _gid_to_grp_cache)
 385     if entry and not cached:
 386         _name_to_grp_cache[entry.gr_name] = entry
 387     return entry
 388
 389
 390 def grp_from_name(name):
 391     """Return password database entry for name (may be a cached value).
 392     Return None if no entry is found.
 393     """
 394     global _gid_to_grp_cache, _name_to_grp_cache
 395     entry, cached = _cache_key_value(grp.getgrnam, name, _name_to_grp_cache)
 396     if entry and not cached:
 397         _gid_to_grp_cache[entry.gr_gid] = entry
 398     return entry
 399
 400
 401 _username = None
 402 def username():
 403     """Get the user's login name."""
 404     global _username
 405     if not _username:
 406         uid = os.getuid()
 407         _username = pwd_from_uid(uid)[0] or 'user%d' % uid
 408     return _username
 409
 410
 411 _userfullname = None
 412 def userfullname():
 413     """Get the user's full name."""
 414     global _userfullname
 415     if not _userfullname:
 416         uid = os.getuid()
 417         entry = pwd_from_uid(uid)
 418         if entry:
 419             _userfullname = entry[4].split(',')[0] or entry[0]
 420         if not _userfullname:
 421             _userfullname = 'user%d' % uid
 422     return _userfullname
 423
 424
 425 _hostname = None
 426 def hostname():
 427     """Get the FQDN of this machine."""
 428     global _hostname
 429     if not _hostname:
 430         _hostname = socket.getfqdn()
 431     return _hostname
 432
 433
 434 _resource_path = None
 435 def resource_path(subdir=''):
 436     global _resource_path
 437     if not _resource_path:
 438         _resource_path = os.environ.get('BUP_RESOURCE_PATH') or '.'
 439     return os.path.join(_resource_path, subdir)
 440
 441 def format_filesize(size):
 442     unit = 1024.0
 443     size = float(size)
 444     if size < unit:
 445         return "%d" % (size)
 446     exponent = int(math.log(size) / math.log(unit))
 447     size_prefix = "KMGTPE"[exponent - 1]
 448     return "%.1f%s" % (size / math.pow(unit, exponent), size_prefix)
 449
 450
 451 class NotOk(Exception):
 452     pass
 453
 454
 455 class BaseConn:
 456     def __init__(self, outp):
 457         self.outp = outp
 458
 459     def close(self):
 460         while self._read(65536): pass
 461
 462     def read(self, size):
 463         """Read 'size' bytes from input stream."""
 464         self.outp.flush()
 465         return self._read(size)
 466
 467     def readline(self):
 468         """Read from input stream until a newline is found."""
 469         self.outp.flush()
 470         return self._readline()
 471
 472     def write(self, data):
 473         """Write 'data' to output stream."""
 474         #log('%d writing: %d bytes\n' % (os.getpid(), len(data)))
 475         self.outp.write(data)
 476
 477     def has_input(self):
 478         """Return true if input stream is readable."""
 479         raise NotImplemented("Subclasses must implement has_input")
 480
 481     def ok(self):
 482         """Indicate end of output from last sent command."""
 483         self.write('\nok\n')
 484
 485     def error(self, s):
 486         """Indicate server error to the client."""
 487         s = re.sub(r'\s+', ' ', str(s))
 488         self.write('\nerror %s\n' % s)
 489
 490     def _check_ok(self, onempty):
 491         self.outp.flush()
 492         rl = ''
 493         for rl in linereader(self):
 494             #log('%d got line: %r\n' % (os.getpid(), rl))
 495             if not rl:  # empty line
 496                 continue
 497             elif rl == 'ok':
 498                 return None
 499             elif rl.startswith('error '):
 500                 #log('client: error: %s\n' % rl[6:])
 501                 return NotOk(rl[6:])
 502             else:
 503                 onempty(rl)
 504         raise Exception('server exited unexpectedly; see errors above')
 505
 506     def drain_and_check_ok(self):
 507         """Remove all data for the current command from input stream."""
 508         def onempty(rl):
 509             pass
 510         return self._check_ok(onempty)
 511
 512     def check_ok(self):
 513         """Verify that server action completed successfully."""
 514         def onempty(rl):
 515             raise Exception('expected "ok", got %r' % rl)
 516         return self._check_ok(onempty)
 517
 518
 519 class Conn(BaseConn):
 520     def __init__(self, inp, outp):
 521         BaseConn.__init__(self, outp)
 522         self.inp = inp
 523
 524     def _read(self, size):
 525         return self.inp.read(size)
 526
 527     def _readline(self):
 528         return self.inp.readline()
 529
 530     def has_input(self):
 531         [rl, wl, xl] = select.select([self.inp.fileno()], [], [], 0)
 532         if rl:
 533             assert(rl[0] == self.inp.fileno())
 534             return True
 535         else:
 536             return None
 537
 538
 539 def checked_reader(fd, n):
 540     while n > 0:
 541         rl, _, _ = select.select([fd], [], [])
 542         assert(rl[0] == fd)
 543         buf = os.read(fd, n)
 544         if not buf: raise Exception("Unexpected EOF reading %d more bytes" % n)
 545         yield buf
 546         n -= len(buf)
 547
 548
 549 MAX_PACKET = 128 * 1024
 550 def mux(p, outfd, outr, errr):
 551     try:
 552         fds = [outr, errr]
 553         while p.poll() is None:
 554             rl, _, _ = select.select(fds, [], [])
 555             for fd in rl:
 556                 if fd == outr:
 557                     buf = os.read(outr, MAX_PACKET)
 558                     if not buf: break
 559                     os.write(outfd, struct.pack('!IB', len(buf), 1) + buf)
 560                 elif fd == errr:
 561                     buf = os.read(errr, 1024)
 562                     if not buf: break
 563                     os.write(outfd, struct.pack('!IB', len(buf), 2) + buf)
 564     finally:
 565         os.write(outfd, struct.pack('!IB', 0, 3))
 566
 567
 568 class DemuxConn(BaseConn):
 569     """A helper class for bup's client-server protocol."""
 570     def __init__(self, infd, outp):
 571         BaseConn.__init__(self, outp)
 572         # Anything that comes through before the sync string was not
 573         # multiplexed and can be assumed to be debug/log before mux init.
 574         tail = ''
 575         while tail != 'BUPMUX':
 576             b = os.read(infd, (len(tail) < 6) and (6-len(tail)) or 1)
 577             if not b:
 578                 raise IOError('demux: unexpected EOF during initialization')
 579             tail += b
 580             sys.stderr.write(tail[:-6])  # pre-mux log messages
 581             tail = tail[-6:]
 582         self.infd = infd
 583         self.reader = None
 584         self.buf = None
 585         self.closed = False
 586
 587     def write(self, data):
 588         self._load_buf(0)
 589         BaseConn.write(self, data)
 590
 591     def _next_packet(self, timeout):
 592         if self.closed: return False
 593         rl, wl, xl = select.select([self.infd], [], [], timeout)
 594         if not rl: return False
 595         assert(rl[0] == self.infd)
 596         ns = ''.join(checked_reader(self.infd, 5))
 597         n, fdw = struct.unpack('!IB', ns)
 598         assert(n <= MAX_PACKET)
 599         if fdw == 1:
 600             self.reader = checked_reader(self.infd, n)
 601         elif fdw == 2:
 602             for buf in checked_reader(self.infd, n):
 603                 sys.stderr.write(buf)
 604         elif fdw == 3:
 605             self.closed = True
 606             debug2("DemuxConn: marked closed\n")
 607         return True
 608
 609     def _load_buf(self, timeout):
 610         if self.buf is not None:
 611             return True
 612         while not self.closed:
 613             while not self.reader:
 614                 if not self._next_packet(timeout):
 615                     return False
 616             try:
 617                 self.buf = self.reader.next()
 618                 return True
 619             except StopIteration:
 620                 self.reader = None
 621         return False
 622
 623     def _read_parts(self, ix_fn):
 624         while self._load_buf(None):
 625             assert(self.buf is not None)
 626             i = ix_fn(self.buf)
 627             if i is None or i == len(self.buf):
 628                 yv = self.buf
 629                 self.buf = None
 630             else:
 631                 yv = self.buf[:i]
 632                 self.buf = self.buf[i:]
 633             yield yv
 634             if i is not None:
 635                 break
 636
 637     def _readline(self):
 638         def find_eol(buf):
 639             try:
 640                 return buf.index('\n')+1
 641             except ValueError:
 642                 return None
 643         return ''.join(self._read_parts(find_eol))
 644
 645     def _read(self, size):
 646         csize = [size]
 647         def until_size(buf): # Closes on csize
 648             if len(buf) < csize[0]:
 649                 csize[0] -= len(buf)
 650                 return None
 651             else:
 652                 return csize[0]
 653         return ''.join(self._read_parts(until_size))
 654
 655     def has_input(self):
 656         return self._load_buf(0)
 657
 658
 659 def linereader(f):
 660     """Generate a list of input lines from 'f' without terminating newlines."""
 661     while 1:
 662         line = f.readline()
 663         if not line:
 664             break
 665         yield line[:-1]
 666
 667
 668 def chunkyreader(f, count = None):
 669     """Generate a list of chunks of data read from 'f'.
 670
 671     If count is None, read until EOF is reached.
 672
 673     If count is a positive integer, read 'count' bytes from 'f'. If EOF is
 674     reached while reading, raise IOError.
 675     """
 676     if count != None:
 677         while count > 0:
 678             b = f.read(min(count, 65536))
 679             if not b:
 680                 raise IOError('EOF with %d bytes remaining' % count)
 681             yield b
 682             count -= len(b)
 683     else:
 684         while 1:
 685             b = f.read(65536)
 686             if not b: break
 687             yield b
 688
 689
 690 @contextmanager
 691 def atomically_replaced_file(name, mode='w', buffering=-1):
 692     """Yield a file that will be atomically renamed name when leaving the block.
 693
 694     This contextmanager yields an open file object that is backed by a
 695     temporary file which will be renamed (atomically) to the target
 696     name if everything succeeds.
 697
 698     The mode and buffering arguments are handled exactly as with open,
 699     and the yielded file will have very restrictive permissions, as
 700     per mkstemp.
 701
 702     E.g.::
 703
 704         with atomically_replaced_file('foo.txt', 'w') as f:
 705             f.write('hello jack.')
 706
 707     """
 708
 709     (ffd, tempname) = tempfile.mkstemp(dir=os.path.dirname(name),
 710                                        text=('b' not in mode))
 711     try:
 712         try:
 713             f = os.fdopen(ffd, mode, buffering)
 714         except:
 715             os.close(ffd)
 716             raise
 717         try:
 718             yield f
 719         finally:
 720             f.close()
 721         os.rename(tempname, name)
 722     finally:
 723         unlink(tempname)  # nonexistant file is ignored
 724
 725
 726 def slashappend(s):
 727     """Append "/" to 's' if it doesn't aleady end in "/"."""
 728     if s and not s.endswith('/'):
 729         return s + '/'
 730     else:
 731         return s
 732
 733
 734 def _mmap_do(f, sz, flags, prot, close):
 735     if not sz:
 736         st = os.fstat(f.fileno())
 737         sz = st.st_size
 738     if not sz:
 739         # trying to open a zero-length map gives an error, but an empty
 740         # string has all the same behaviour of a zero-length map, ie. it has
 741         # no elements :)
 742         return ''
 743     map = mmap.mmap(f.fileno(), sz, flags, prot)
 744     if close:
 745         f.close()  # map will persist beyond file close
 746     return map
 747
 748
 749 def mmap_read(f, sz = 0, close=True):
 750     """Create a read-only memory mapped region on file 'f'.
 751     If sz is 0, the region will cover the entire file.
 752     """
 753     return _mmap_do(f, sz, mmap.MAP_PRIVATE, mmap.PROT_READ, close)
 754
 755
 756 def mmap_readwrite(f, sz = 0, close=True):
 757     """Create a read-write memory mapped region on file 'f'.
 758     If sz is 0, the region will cover the entire file.
 759     """
 760     return _mmap_do(f, sz, mmap.MAP_SHARED, mmap.PROT_READ|mmap.PROT_WRITE,
 761                     close)
 762
 763
 764 def mmap_readwrite_private(f, sz = 0, close=True):
 765     """Create a read-write memory mapped region on file 'f'.
 766     If sz is 0, the region will cover the entire file.
 767     The map is private, which means the changes are never flushed back to the
 768     file.
 769     """
 770     return _mmap_do(f, sz, mmap.MAP_PRIVATE, mmap.PROT_READ|mmap.PROT_WRITE,
 771                     close)
 772
 773
 774 _mincore = getattr(_helpers, 'mincore', None)
 775 if _mincore:
 776     # ./configure ensures that we're on Linux if MINCORE_INCORE isn't defined.
 777     MINCORE_INCORE = getattr(_helpers, 'MINCORE_INCORE', 1)
 778
 779     _fmincore_chunk_size = None
 780     def _set_fmincore_chunk_size():
 781         global _fmincore_chunk_size
 782         pref_chunk_size = 64 * 1024 * 1024
 783         chunk_size = sc_page_size
 784         if (sc_page_size < pref_chunk_size):
 785             chunk_size = sc_page_size * (pref_chunk_size / sc_page_size)
 786         _fmincore_chunk_size = chunk_size
 787
 788     def fmincore(fd):
 789         """Return the mincore() data for fd as a bytearray whose values can be
 790         tested via MINCORE_INCORE, or None if fd does not fully
 791         support the operation."""
 792         st = os.fstat(fd)
 793         if (st.st_size == 0):
 794             return bytearray(0)
 795         if not _fmincore_chunk_size:
 796             _set_fmincore_chunk_size()
 797         pages_per_chunk = _fmincore_chunk_size / sc_page_size;
 798         page_count = (st.st_size + sc_page_size - 1) / sc_page_size;
 799         chunk_count = page_count / _fmincore_chunk_size
 800         if chunk_count < 1:
 801             chunk_count = 1
 802         result = bytearray(page_count)
 803         for ci in xrange(chunk_count):
 804             pos = _fmincore_chunk_size * ci;
 805             msize = min(_fmincore_chunk_size, st.st_size - pos)
 806             try:
 807                 m = mmap.mmap(fd, msize, mmap.MAP_PRIVATE, 0, 0, pos)
 808             except mmap.error as ex:
 809                 if ex.errno == errno.EINVAL or ex.errno == errno.ENODEV:
 810                     # Perhaps the file was a pipe, i.e. "... | bup split ..."
 811                     return None
 812                 raise ex
 813             _mincore(m, msize, 0, result, ci * pages_per_chunk);
 814         return result
 815
 816
 817 def parse_timestamp(epoch_str):
 818     """Return the number of nanoseconds since the epoch that are described
 819 by epoch_str (100ms, 100ns, ...); when epoch_str cannot be parsed,
 820 throw a ValueError that may contain additional information."""
 821     ns_per = {'s' :  1000000000,
 822               'ms' : 1000000,
 823               'us' : 1000,
 824               'ns' : 1}
 825     match = re.match(r'^((?:[-+]?[0-9]+)?)(s|ms|us|ns)$', epoch_str)
 826     if not match:
 827         if re.match(r'^([-+]?[0-9]+)$', epoch_str):
 828             raise ValueError('must include units, i.e. 100ns, 100ms, ...')
 829         raise ValueError()
 830     (n, units) = match.group(1, 2)
 831     if not n:
 832         n = 1
 833     n = int(n)
 834     return n * ns_per[units]
 835
 836
 837 def parse_num(s):
 838     """Parse data size information into a float number.
 839
 840     Here are some examples of conversions:
 841         199.2k means 203981 bytes
 842         1GB means 1073741824 bytes
 843         2.1 tb means 2199023255552 bytes
 844     """
 845     g = re.match(r'([-+\d.e]+)\s*(\w*)', str(s))
 846     if not g:
 847         raise ValueError("can't parse %r as a number" % s)
 848     (val, unit) = g.groups()
 849     num = float(val)
 850     unit = unit.lower()
 851     if unit in ['t', 'tb']:
 852         mult = 1024*1024*1024*1024
 853     elif unit in ['g', 'gb']:
 854         mult = 1024*1024*1024
 855     elif unit in ['m', 'mb']:
 856         mult = 1024*1024
 857     elif unit in ['k', 'kb']:
 858         mult = 1024
 859     elif unit in ['', 'b']:
 860         mult = 1
 861     else:
 862         raise ValueError("invalid unit %r in number %r" % (unit, s))
 863     return int(num*mult)
 864
 865
 866 def count(l):
 867     """Count the number of elements in an iterator. (consumes the iterator)"""
 868     return reduce(lambda x,y: x+1, l)
 869
 870
 871 saved_errors = []
 872 def add_error(e):
 873     """Append an error message to the list of saved errors.
 874
 875     Once processing is able to stop and output the errors, the saved errors are
 876     accessible in the module variable helpers.saved_errors.
 877     """
 878     saved_errors.append(e)
 879     log('%-70s\n' % e)
 880
 881
 882 def clear_errors():
 883     global saved_errors
 884     saved_errors = []
 885
 886
 887 def die_if_errors(msg=None, status=1):
 888     global saved_errors
 889     if saved_errors:
 890         if not msg:
 891             msg = 'warning: %d errors encountered\n' % len(saved_errors)
 892         log(msg)
 893         sys.exit(status)
 894
 895
 896 def handle_ctrl_c():
 897     """Replace the default exception handler for KeyboardInterrupt (Ctrl-C).
 898
 899     The new exception handler will make sure that bup will exit without an ugly
 900     stacktrace when Ctrl-C is hit.
 901     """
 902     oldhook = sys.excepthook
 903     def newhook(exctype, value, traceback):
 904         if exctype == KeyboardInterrupt:
 905             log('\nInterrupted.\n')
 906         else:
 907             return oldhook(exctype, value, traceback)
 908     sys.excepthook = newhook
 909
 910
 911 def columnate(l, prefix):
 912     """Format elements of 'l' in columns with 'prefix' leading each line.
 913
 914     The number of columns is determined automatically based on the string
 915     lengths.
 916     """
 917     if not l:
 918         return ""
 919     l = l[:]
 920     clen = max(len(s) for s in l)
 921     ncols = (tty_width() - len(prefix)) / (clen + 2)
 922     if ncols <= 1:
 923         ncols = 1
 924         clen = 0
 925     cols = []
 926     while len(l) % ncols:
 927         l.append('')
 928     rows = len(l)/ncols
 929     for s in range(0, len(l), rows):
 930         cols.append(l[s:s+rows])
 931     out = ''
 932     for row in zip(*cols):
 933         out += prefix + ''.join(('%-*s' % (clen+2, s)) for s in row) + '\n'
 934     return out
 935
 936
 937 def parse_date_or_fatal(str, fatal):
 938     """Parses the given date or calls Option.fatal().
 939     For now we expect a string that contains a float."""
 940     try:
 941         date = float(str)
 942     except ValueError as e:
 943         raise fatal('invalid date format (should be a float): %r' % e)
 944     else:
 945         return date
 946
 947
 948 def parse_excludes(options, fatal):
 949     """Traverse the options and extract all excludes, or call Option.fatal()."""
 950     excluded_paths = []
 951
 952     for flag in options:
 953         (option, parameter) = flag
 954         if option == '--exclude':
 955             excluded_paths.append(resolve_parent(parameter))
 956         elif option == '--exclude-from':
 957             try:
 958                 f = open(resolve_parent(parameter))
 959             except IOError as e:
 960                 raise fatal("couldn't read %s" % parameter)
 961             for exclude_path in f.readlines():
 962                 # FIXME: perhaps this should be rstrip('\n')
 963                 exclude_path = resolve_parent(exclude_path.strip())
 964                 if exclude_path:
 965                     excluded_paths.append(exclude_path)
 966     return sorted(frozenset(excluded_paths))
 967
 968
 969 def parse_rx_excludes(options, fatal):
 970     """Traverse the options and extract all rx excludes, or call
 971     Option.fatal()."""
 972     excluded_patterns = []
 973
 974     for flag in options:
 975         (option, parameter) = flag
 976         if option == '--exclude-rx':
 977             try:
 978                 excluded_patterns.append(re.compile(parameter))
 979             except re.error as ex:
 980                 fatal('invalid --exclude-rx pattern (%s): %s' % (parameter, ex))
 981         elif option == '--exclude-rx-from':
 982             try:
 983                 f = open(resolve_parent(parameter))
 984             except IOError as e:
 985                 raise fatal("couldn't read %s" % parameter)
 986             for pattern in f.readlines():
 987                 spattern = pattern.rstrip('\n')
 988                 if not spattern:
 989                     continue
 990                 try:
 991                     excluded_patterns.append(re.compile(spattern))
 992                 except re.error as ex:
 993                     fatal('invalid --exclude-rx pattern (%s): %s' % (spattern, ex))
 994     return excluded_patterns
 995
 996
 997 def should_rx_exclude_path(path, exclude_rxs):
 998     """Return True if path matches a regular expression in exclude_rxs."""
 999     for rx in exclude_rxs:
1000         if rx.search(path):
1001             debug1('Skipping %r: excluded by rx pattern %r.\n'
1002                    % (path, rx.pattern))
1003             return True
1004     return False
1005
1006
1007 # FIXME: Carefully consider the use of functions (os.path.*, etc.)
1008 # that resolve against the current filesystem in the strip/graft
1009 # functions for example, but elsewhere as well.  I suspect bup's not
1010 # always being careful about that.  For some cases, the contents of
1011 # the current filesystem should be irrelevant, and consulting it might
1012 # produce the wrong result, perhaps via unintended symlink resolution,
1013 # for example.
1014
1015 def path_components(path):
1016     """Break path into a list of pairs of the form (name,
1017     full_path_to_name).  Path must start with '/'.
1018     Example:
1019       '/home/foo' -> [('', '/'), ('home', '/home'), ('foo', '/home/foo')]"""
1020     if not path.startswith('/'):
1021         raise Exception, 'path must start with "/": %s' % path
1022     # Since we assume path startswith('/'), we can skip the first element.
1023     result = [('', '/')]
1024     norm_path = os.path.abspath(path)
1025     if norm_path == '/':
1026         return result
1027     full_path = ''
1028     for p in norm_path.split('/')[1:]:
1029         full_path += '/' + p
1030         result.append((p, full_path))
1031     return result
1032
1033
1034 def stripped_path_components(path, strip_prefixes):
1035     """Strip any prefix in strip_prefixes from path and return a list
1036     of path components where each component is (name,
1037     none_or_full_fs_path_to_name).  Assume path startswith('/').
1038     See thelpers.py for examples."""
1039     normalized_path = os.path.abspath(path)
1040     sorted_strip_prefixes = sorted(strip_prefixes, key=len, reverse=True)
1041     for bp in sorted_strip_prefixes:
1042         normalized_bp = os.path.abspath(bp)
1043         if normalized_bp == '/':
1044             continue
1045         if normalized_path.startswith(normalized_bp):
1046             prefix = normalized_path[:len(normalized_bp)]
1047             result = []
1048             for p in normalized_path[len(normalized_bp):].split('/'):
1049                 if p: # not root
1050                     prefix += '/'
1051                 prefix += p
1052                 result.append((p, prefix))
1053             return result
1054     # Nothing to strip.
1055     return path_components(path)
1056
1057
1058 def grafted_path_components(graft_points, path):
1059     # Create a result that consists of some number of faked graft
1060     # directories before the graft point, followed by all of the real
1061     # directories from path that are after the graft point.  Arrange
1062     # for the directory at the graft point in the result to correspond
1063     # to the "orig" directory in --graft orig=new.  See t/thelpers.py
1064     # for some examples.
1065
1066     # Note that given --graft orig=new, orig and new have *nothing* to
1067     # do with each other, even if some of their component names
1068     # match. i.e. --graft /foo/bar/baz=/foo/bar/bax is semantically
1069     # equivalent to --graft /foo/bar/baz=/x/y/z, or even
1070     # /foo/bar/baz=/x.
1071
1072     # FIXME: This can't be the best solution...
1073     clean_path = os.path.abspath(path)
1074     for graft_point in graft_points:
1075         old_prefix, new_prefix = graft_point
1076         # Expand prefixes iff not absolute paths.
1077         old_prefix = os.path.normpath(old_prefix)
1078         new_prefix = os.path.normpath(new_prefix)
1079         if clean_path.startswith(old_prefix):
1080             escaped_prefix = re.escape(old_prefix)
1081             grafted_path = re.sub(r'^' + escaped_prefix, new_prefix, clean_path)
1082             # Handle /foo=/ (at least) -- which produces //whatever.
1083             grafted_path = '/' + grafted_path.lstrip('/')
1084             clean_path_components = path_components(clean_path)
1085             # Count the components that were stripped.
1086             strip_count = 0 if old_prefix == '/' else old_prefix.count('/')
1087             new_prefix_parts = new_prefix.split('/')
1088             result_prefix = grafted_path.split('/')[:new_prefix.count('/')]
1089             result = [(p, None) for p in result_prefix] \
1090                 + clean_path_components[strip_count:]
1091             # Now set the graft point name to match the end of new_prefix.
1092             graft_point = len(result_prefix)
1093             result[graft_point] = \
1094                 (new_prefix_parts[-1], clean_path_components[strip_count][1])
1095             if new_prefix == '/': # --graft ...=/ is a special case.
1096                 return result[1:]
1097             return result
1098     return path_components(clean_path)
1099
1100
1101 Sha1 = hashlib.sha1
1102
1103
1104 _localtime = getattr(_helpers, 'localtime', None)
1105
1106 if _localtime:
1107     bup_time = namedtuple('bup_time', ['tm_year', 'tm_mon', 'tm_mday',
1108                                        'tm_hour', 'tm_min', 'tm_sec',
1109                                        'tm_wday', 'tm_yday',
1110                                        'tm_isdst', 'tm_gmtoff', 'tm_zone'])
1111
1112 # Define a localtime() that returns bup_time when possible.  Note:
1113 # this means that any helpers.localtime() results may need to be
1114 # passed through to_py_time() before being passed to python's time
1115 # module, which doesn't appear willing to ignore the extra items.
1116 if _localtime:
1117     def localtime(time):
1118         return bup_time(*_helpers.localtime(time))
1119     def utc_offset_str(t):
1120         """Return the local offset from UTC as "+hhmm" or "-hhmm" for time t.
1121         If the current UTC offset does not represent an integer number
1122         of minutes, the fractional component will be truncated."""
1123         off = localtime(t).tm_gmtoff
1124         # Note: // doesn't truncate like C for negative values, it rounds down.
1125         offmin = abs(off) // 60
1126         m = offmin % 60
1127         h = (offmin - m) // 60
1128         return "%+03d%02d" % (-h if off < 0 else h, m)
1129     def to_py_time(x):
1130         if isinstance(x, time.struct_time):
1131             return x
1132         return time.struct_time(x[:9])
1133 else:
1134     localtime = time.localtime
1135     def utc_offset_str(t):
1136         return time.strftime('%z', localtime(t))
1137     def to_py_time(x):
1138         return x
1139
1140
1141 _some_invalid_save_parts_rx = re.compile(r'[[ ~^:?*\\]|\.\.|//|@{')
1142
1143 def valid_save_name(name):
1144     # Enforce a superset of the restrictions in git-check-ref-format(1)
1145     if name == '@' \
1146        or name.startswith('/') or name.endswith('/') \
1147        or name.endswith('.'):
1148         return False
1149     if _some_invalid_save_parts_rx.search(name):
1150         return False
1151     for c in name:
1152         if ord(c) < 0x20 or ord(c) == 0x7f:
1153             return False
1154     for part in name.split('/'):
1155         if part.startswith('.') or part.endswith('.lock'):
1156             return False
1157     return True
1158
1159
1160 _period_rx = re.compile(r'^([0-9]+)(s|min|h|d|w|m|y)$')
1161
1162 def period_as_secs(s):
1163     if s == 'forever':
1164         return float('inf')
1165     match = _period_rx.match(s)
1166     if not match:
1167         return None
1168     mag = int(match.group(1))
1169     scale = match.group(2)
1170     return mag * {'s': 1,
1171                   'min': 60,
1172                   'h': 60 * 60,
1173                   'd': 60 * 60 * 24,
1174                   'w': 60 * 60 * 24 * 7,
1175                   'm': 60 * 60 * 24 * 31,
1176                   'y': 60 * 60 * 24 * 366}[scale]