lib/bup/helpers.py

   1 """Helper functions and classes for bup."""
   2
   3 from collections import namedtuple
   4 from ctypes import sizeof, c_void_p
   5 from os import environ
   6 from contextlib import contextmanager
   7 import sys, os, pwd, subprocess, errno, socket, select, mmap, stat, re, struct
   8 import hashlib, heapq, math, operator, time, grp, tempfile
   9
  10 from bup import _helpers
  11
  12 sc_page_size = os.sysconf('SC_PAGE_SIZE')
  13 assert(sc_page_size > 0)
  14
  15 sc_arg_max = os.sysconf('SC_ARG_MAX')
  16 if sc_arg_max == -1:  # "no definite limit" - let's choose 2M
  17     sc_arg_max = 2 * 1024 * 1024
  18
  19 # This function should really be in helpers, not in bup.options.  But we
  20 # want options.py to be standalone so people can include it in other projects.
  21 from bup.options import _tty_width
  22 tty_width = _tty_width
  23
  24
  25 def atoi(s):
  26     """Convert the string 's' to an integer. Return 0 if s is not a number."""
  27     try:
  28         return int(s or '0')
  29     except ValueError:
  30         return 0
  31
  32
  33 def atof(s):
  34     """Convert the string 's' to a float. Return 0 if s is not a number."""
  35     try:
  36         return float(s or '0')
  37     except ValueError:
  38         return 0
  39
  40
  41 buglvl = atoi(os.environ.get('BUP_DEBUG', 0))
  42
  43
  44 if sys.platform.startswith('darwin'):
  45     # Apparently fsync on OS X doesn't guarantee to sync all the way down
  46     import fcntl
  47     fdatasync = lambda fd : fcntl.fcntl(fd, fcntl.F_FULLFSYNC)
  48 else: # If the platform doesn't have fdatasync, fall back to fsync
  49     try:
  50         fdatasync = os.fdatasync
  51     except AttributeError:
  52         fdatasync = os.fsync
  53
  54
  55 # Write (blockingly) to sockets that may or may not be in blocking mode.
  56 # We need this because our stderr is sometimes eaten by subprocesses
  57 # (probably ssh) that sometimes make it nonblocking, if only temporarily,
  58 # leading to race conditions.  Ick.  We'll do it the hard way.
  59 def _hard_write(fd, buf):
  60     while buf:
  61         (r,w,x) = select.select([], [fd], [], None)
  62         if not w:
  63             raise IOError('select(fd) returned without being writable')
  64         try:
  65             sz = os.write(fd, buf)
  66         except OSError as e:
  67             if e.errno != errno.EAGAIN:
  68                 raise
  69         assert(sz >= 0)
  70         buf = buf[sz:]
  71
  72
  73 _last_prog = 0
  74 def log(s):
  75     """Print a log message to stderr."""
  76     global _last_prog
  77     sys.stdout.flush()
  78     _hard_write(sys.stderr.fileno(), s)
  79     _last_prog = 0
  80
  81
  82 def debug1(s):
  83     if buglvl >= 1:
  84         log(s)
  85
  86
  87 def debug2(s):
  88     if buglvl >= 2:
  89         log(s)
  90
  91
  92 istty1 = os.isatty(1) or (atoi(os.environ.get('BUP_FORCE_TTY')) & 1)
  93 istty2 = os.isatty(2) or (atoi(os.environ.get('BUP_FORCE_TTY')) & 2)
  94 _last_progress = ''
  95 def progress(s):
  96     """Calls log() if stderr is a TTY.  Does nothing otherwise."""
  97     global _last_progress
  98     if istty2:
  99         log(s)
 100         _last_progress = s
 101
 102
 103 def qprogress(s):
 104     """Calls progress() only if we haven't printed progress in a while.
 105
 106     This avoids overloading the stderr buffer with excess junk.
 107     """
 108     global _last_prog
 109     now = time.time()
 110     if now - _last_prog > 0.1:
 111         progress(s)
 112         _last_prog = now
 113
 114
 115 def reprogress():
 116     """Calls progress() to redisplay the most recent progress message.
 117
 118     Useful after you've printed some other message that wipes out the
 119     progress line.
 120     """
 121     if _last_progress and _last_progress.endswith('\r'):
 122         progress(_last_progress)
 123
 124
 125 def mkdirp(d, mode=None):
 126     """Recursively create directories on path 'd'.
 127
 128     Unlike os.makedirs(), it doesn't raise an exception if the last element of
 129     the path already exists.
 130     """
 131     try:
 132         if mode:
 133             os.makedirs(d, mode)
 134         else:
 135             os.makedirs(d)
 136     except OSError as e:
 137         if e.errno == errno.EEXIST:
 138             pass
 139         else:
 140             raise
 141
 142
 143 _unspecified_next_default = object()
 144
 145 def _fallback_next(it, default=_unspecified_next_default):
 146     """Retrieve the next item from the iterator by calling its
 147     next() method. If default is given, it is returned if the
 148     iterator is exhausted, otherwise StopIteration is raised."""
 149
 150     if default is _unspecified_next_default:
 151         return it.next()
 152     else:
 153         try:
 154             return it.next()
 155         except StopIteration:
 156             return default
 157
 158 if sys.version_info < (2, 6):
 159     next =  _fallback_next
 160
 161
 162 def merge_iter(iters, pfreq, pfunc, pfinal, key=None):
 163     if key:
 164         samekey = lambda e, pe: getattr(e, key) == getattr(pe, key, None)
 165     else:
 166         samekey = operator.eq
 167     count = 0
 168     total = sum(len(it) for it in iters)
 169     iters = (iter(it) for it in iters)
 170     heap = ((next(it, None),it) for it in iters)
 171     heap = [(e,it) for e,it in heap if e]
 172
 173     heapq.heapify(heap)
 174     pe = None
 175     while heap:
 176         if not count % pfreq:
 177             pfunc(count, total)
 178         e, it = heap[0]
 179         if not samekey(e, pe):
 180             pe = e
 181             yield e
 182         count += 1
 183         try:
 184             e = it.next() # Don't use next() function, it's too expensive
 185         except StopIteration:
 186             heapq.heappop(heap) # remove current
 187         else:
 188             heapq.heapreplace(heap, (e, it)) # shift current to new location
 189     pfinal(count, total)
 190
 191
 192 def unlink(f):
 193     """Delete a file at path 'f' if it currently exists.
 194
 195     Unlike os.unlink(), does not throw an exception if the file didn't already
 196     exist.
 197     """
 198     try:
 199         os.unlink(f)
 200     except OSError as e:
 201         if e.errno != errno.ENOENT:
 202             raise
 203
 204
 205 def readpipe(argv, preexec_fn=None):
 206     """Run a subprocess and return its output."""
 207     p = subprocess.Popen(argv, stdout=subprocess.PIPE, preexec_fn=preexec_fn)
 208     out, err = p.communicate()
 209     if p.returncode != 0:
 210         raise Exception('subprocess %r failed with status %d'
 211                         % (' '.join(argv), p.returncode))
 212     return out
 213
 214
 215 def _argmax_base(command):
 216     base_size = 2048
 217     for c in command:
 218         base_size += len(command) + 1
 219     for k, v in environ.iteritems():
 220         base_size += len(k) + len(v) + 2 + sizeof(c_void_p)
 221     return base_size
 222
 223
 224 def _argmax_args_size(args):
 225     return sum(len(x) + 1 + sizeof(c_void_p) for x in args)
 226
 227
 228 def batchpipe(command, args, preexec_fn=None, arg_max=sc_arg_max):
 229     """If args is not empty, yield the output produced by calling the
 230 command list with args as a sequence of strings (It may be necessary
 231 to return multiple strings in order to respect ARG_MAX)."""
 232     # The optional arg_max arg is a workaround for an issue with the
 233     # current wvtest behavior.
 234     base_size = _argmax_base(command)
 235     while args:
 236         room = arg_max - base_size
 237         i = 0
 238         while i < len(args):
 239             next_size = _argmax_args_size(args[i:i+1])
 240             if room - next_size < 0:
 241                 break
 242             room -= next_size
 243             i += 1
 244         sub_args = args[:i]
 245         args = args[i:]
 246         assert(len(sub_args))
 247         yield readpipe(command + sub_args, preexec_fn=preexec_fn)
 248
 249
 250 def resolve_parent(p):
 251     """Return the absolute path of a file without following any final symlink.
 252
 253     Behaves like os.path.realpath, but doesn't follow a symlink for the last
 254     element. (ie. if 'p' itself is a symlink, this one won't follow it, but it
 255     will follow symlinks in p's directory)
 256     """
 257     try:
 258         st = os.lstat(p)
 259     except OSError:
 260         st = None
 261     if st and stat.S_ISLNK(st.st_mode):
 262         (dir, name) = os.path.split(p)
 263         dir = os.path.realpath(dir)
 264         out = os.path.join(dir, name)
 265     else:
 266         out = os.path.realpath(p)
 267     #log('realpathing:%r,%r\n' % (p, out))
 268     return out
 269
 270
 271 def detect_fakeroot():
 272     "Return True if we appear to be running under fakeroot."
 273     return os.getenv("FAKEROOTKEY") != None
 274
 275
 276 def is_superuser():
 277     if sys.platform.startswith('cygwin'):
 278         import ctypes
 279         return ctypes.cdll.shell32.IsUserAnAdmin()
 280     else:
 281         return os.geteuid() == 0
 282
 283
 284 def _cache_key_value(get_value, key, cache):
 285     """Return (value, was_cached).  If there is a value in the cache
 286     for key, use that, otherwise, call get_value(key) which should
 287     throw a KeyError if there is no value -- in which case the cached
 288     and returned value will be None.
 289     """
 290     try: # Do we already have it (or know there wasn't one)?
 291         value = cache[key]
 292         return value, True
 293     except KeyError:
 294         pass
 295     value = None
 296     try:
 297         cache[key] = value = get_value(key)
 298     except KeyError:
 299         cache[key] = None
 300     return value, False
 301
 302
 303 _uid_to_pwd_cache = {}
 304 _name_to_pwd_cache = {}
 305
 306 def pwd_from_uid(uid):
 307     """Return password database entry for uid (may be a cached value).
 308     Return None if no entry is found.
 309     """
 310     global _uid_to_pwd_cache, _name_to_pwd_cache
 311     entry, cached = _cache_key_value(pwd.getpwuid, uid, _uid_to_pwd_cache)
 312     if entry and not cached:
 313         _name_to_pwd_cache[entry.pw_name] = entry
 314     return entry
 315
 316
 317 def pwd_from_name(name):
 318     """Return password database entry for name (may be a cached value).
 319     Return None if no entry is found.
 320     """
 321     global _uid_to_pwd_cache, _name_to_pwd_cache
 322     entry, cached = _cache_key_value(pwd.getpwnam, name, _name_to_pwd_cache)
 323     if entry and not cached:
 324         _uid_to_pwd_cache[entry.pw_uid] = entry
 325     return entry
 326
 327
 328 _gid_to_grp_cache = {}
 329 _name_to_grp_cache = {}
 330
 331 def grp_from_gid(gid):
 332     """Return password database entry for gid (may be a cached value).
 333     Return None if no entry is found.
 334     """
 335     global _gid_to_grp_cache, _name_to_grp_cache
 336     entry, cached = _cache_key_value(grp.getgrgid, gid, _gid_to_grp_cache)
 337     if entry and not cached:
 338         _name_to_grp_cache[entry.gr_name] = entry
 339     return entry
 340
 341
 342 def grp_from_name(name):
 343     """Return password database entry for name (may be a cached value).
 344     Return None if no entry is found.
 345     """
 346     global _gid_to_grp_cache, _name_to_grp_cache
 347     entry, cached = _cache_key_value(grp.getgrnam, name, _name_to_grp_cache)
 348     if entry and not cached:
 349         _gid_to_grp_cache[entry.gr_gid] = entry
 350     return entry
 351
 352
 353 _username = None
 354 def username():
 355     """Get the user's login name."""
 356     global _username
 357     if not _username:
 358         uid = os.getuid()
 359         _username = pwd_from_uid(uid)[0] or 'user%d' % uid
 360     return _username
 361
 362
 363 _userfullname = None
 364 def userfullname():
 365     """Get the user's full name."""
 366     global _userfullname
 367     if not _userfullname:
 368         uid = os.getuid()
 369         entry = pwd_from_uid(uid)
 370         if entry:
 371             _userfullname = entry[4].split(',')[0] or entry[0]
 372         if not _userfullname:
 373             _userfullname = 'user%d' % uid
 374     return _userfullname
 375
 376
 377 _hostname = None
 378 def hostname():
 379     """Get the FQDN of this machine."""
 380     global _hostname
 381     if not _hostname:
 382         _hostname = socket.getfqdn()
 383     return _hostname
 384
 385
 386 _resource_path = None
 387 def resource_path(subdir=''):
 388     global _resource_path
 389     if not _resource_path:
 390         _resource_path = os.environ.get('BUP_RESOURCE_PATH') or '.'
 391     return os.path.join(_resource_path, subdir)
 392
 393 def format_filesize(size):
 394     unit = 1024.0
 395     size = float(size)
 396     if size < unit:
 397         return "%d" % (size)
 398     exponent = int(math.log(size) / math.log(unit))
 399     size_prefix = "KMGTPE"[exponent - 1]
 400     return "%.1f%s" % (size / math.pow(unit, exponent), size_prefix)
 401
 402
 403 class NotOk(Exception):
 404     pass
 405
 406
 407 class BaseConn:
 408     def __init__(self, outp):
 409         self.outp = outp
 410
 411     def close(self):
 412         while self._read(65536): pass
 413
 414     def read(self, size):
 415         """Read 'size' bytes from input stream."""
 416         self.outp.flush()
 417         return self._read(size)
 418
 419     def readline(self):
 420         """Read from input stream until a newline is found."""
 421         self.outp.flush()
 422         return self._readline()
 423
 424     def write(self, data):
 425         """Write 'data' to output stream."""
 426         #log('%d writing: %d bytes\n' % (os.getpid(), len(data)))
 427         self.outp.write(data)
 428
 429     def has_input(self):
 430         """Return true if input stream is readable."""
 431         raise NotImplemented("Subclasses must implement has_input")
 432
 433     def ok(self):
 434         """Indicate end of output from last sent command."""
 435         self.write('\nok\n')
 436
 437     def error(self, s):
 438         """Indicate server error to the client."""
 439         s = re.sub(r'\s+', ' ', str(s))
 440         self.write('\nerror %s\n' % s)
 441
 442     def _check_ok(self, onempty):
 443         self.outp.flush()
 444         rl = ''
 445         for rl in linereader(self):
 446             #log('%d got line: %r\n' % (os.getpid(), rl))
 447             if not rl:  # empty line
 448                 continue
 449             elif rl == 'ok':
 450                 return None
 451             elif rl.startswith('error '):
 452                 #log('client: error: %s\n' % rl[6:])
 453                 return NotOk(rl[6:])
 454             else:
 455                 onempty(rl)
 456         raise Exception('server exited unexpectedly; see errors above')
 457
 458     def drain_and_check_ok(self):
 459         """Remove all data for the current command from input stream."""
 460         def onempty(rl):
 461             pass
 462         return self._check_ok(onempty)
 463
 464     def check_ok(self):
 465         """Verify that server action completed successfully."""
 466         def onempty(rl):
 467             raise Exception('expected "ok", got %r' % rl)
 468         return self._check_ok(onempty)
 469
 470
 471 class Conn(BaseConn):
 472     def __init__(self, inp, outp):
 473         BaseConn.__init__(self, outp)
 474         self.inp = inp
 475
 476     def _read(self, size):
 477         return self.inp.read(size)
 478
 479     def _readline(self):
 480         return self.inp.readline()
 481
 482     def has_input(self):
 483         [rl, wl, xl] = select.select([self.inp.fileno()], [], [], 0)
 484         if rl:
 485             assert(rl[0] == self.inp.fileno())
 486             return True
 487         else:
 488             return None
 489
 490
 491 def checked_reader(fd, n):
 492     while n > 0:
 493         rl, _, _ = select.select([fd], [], [])
 494         assert(rl[0] == fd)
 495         buf = os.read(fd, n)
 496         if not buf: raise Exception("Unexpected EOF reading %d more bytes" % n)
 497         yield buf
 498         n -= len(buf)
 499
 500
 501 MAX_PACKET = 128 * 1024
 502 def mux(p, outfd, outr, errr):
 503     try:
 504         fds = [outr, errr]
 505         while p.poll() is None:
 506             rl, _, _ = select.select(fds, [], [])
 507             for fd in rl:
 508                 if fd == outr:
 509                     buf = os.read(outr, MAX_PACKET)
 510                     if not buf: break
 511                     os.write(outfd, struct.pack('!IB', len(buf), 1) + buf)
 512                 elif fd == errr:
 513                     buf = os.read(errr, 1024)
 514                     if not buf: break
 515                     os.write(outfd, struct.pack('!IB', len(buf), 2) + buf)
 516     finally:
 517         os.write(outfd, struct.pack('!IB', 0, 3))
 518
 519
 520 class DemuxConn(BaseConn):
 521     """A helper class for bup's client-server protocol."""
 522     def __init__(self, infd, outp):
 523         BaseConn.__init__(self, outp)
 524         # Anything that comes through before the sync string was not
 525         # multiplexed and can be assumed to be debug/log before mux init.
 526         tail = ''
 527         while tail != 'BUPMUX':
 528             b = os.read(infd, (len(tail) < 6) and (6-len(tail)) or 1)
 529             if not b:
 530                 raise IOError('demux: unexpected EOF during initialization')
 531             tail += b
 532             sys.stderr.write(tail[:-6])  # pre-mux log messages
 533             tail = tail[-6:]
 534         self.infd = infd
 535         self.reader = None
 536         self.buf = None
 537         self.closed = False
 538
 539     def write(self, data):
 540         self._load_buf(0)
 541         BaseConn.write(self, data)
 542
 543     def _next_packet(self, timeout):
 544         if self.closed: return False
 545         rl, wl, xl = select.select([self.infd], [], [], timeout)
 546         if not rl: return False
 547         assert(rl[0] == self.infd)
 548         ns = ''.join(checked_reader(self.infd, 5))
 549         n, fdw = struct.unpack('!IB', ns)
 550         assert(n <= MAX_PACKET)
 551         if fdw == 1:
 552             self.reader = checked_reader(self.infd, n)
 553         elif fdw == 2:
 554             for buf in checked_reader(self.infd, n):
 555                 sys.stderr.write(buf)
 556         elif fdw == 3:
 557             self.closed = True
 558             debug2("DemuxConn: marked closed\n")
 559         return True
 560
 561     def _load_buf(self, timeout):
 562         if self.buf is not None:
 563             return True
 564         while not self.closed:
 565             while not self.reader:
 566                 if not self._next_packet(timeout):
 567                     return False
 568             try:
 569                 self.buf = self.reader.next()
 570                 return True
 571             except StopIteration:
 572                 self.reader = None
 573         return False
 574
 575     def _read_parts(self, ix_fn):
 576         while self._load_buf(None):
 577             assert(self.buf is not None)
 578             i = ix_fn(self.buf)
 579             if i is None or i == len(self.buf):
 580                 yv = self.buf
 581                 self.buf = None
 582             else:
 583                 yv = self.buf[:i]
 584                 self.buf = self.buf[i:]
 585             yield yv
 586             if i is not None:
 587                 break
 588
 589     def _readline(self):
 590         def find_eol(buf):
 591             try:
 592                 return buf.index('\n')+1
 593             except ValueError:
 594                 return None
 595         return ''.join(self._read_parts(find_eol))
 596
 597     def _read(self, size):
 598         csize = [size]
 599         def until_size(buf): # Closes on csize
 600             if len(buf) < csize[0]:
 601                 csize[0] -= len(buf)
 602                 return None
 603             else:
 604                 return csize[0]
 605         return ''.join(self._read_parts(until_size))
 606
 607     def has_input(self):
 608         return self._load_buf(0)
 609
 610
 611 def linereader(f):
 612     """Generate a list of input lines from 'f' without terminating newlines."""
 613     while 1:
 614         line = f.readline()
 615         if not line:
 616             break
 617         yield line[:-1]
 618
 619
 620 def chunkyreader(f, count = None):
 621     """Generate a list of chunks of data read from 'f'.
 622
 623     If count is None, read until EOF is reached.
 624
 625     If count is a positive integer, read 'count' bytes from 'f'. If EOF is
 626     reached while reading, raise IOError.
 627     """
 628     if count != None:
 629         while count > 0:
 630             b = f.read(min(count, 65536))
 631             if not b:
 632                 raise IOError('EOF with %d bytes remaining' % count)
 633             yield b
 634             count -= len(b)
 635     else:
 636         while 1:
 637             b = f.read(65536)
 638             if not b: break
 639             yield b
 640
 641
 642 @contextmanager
 643 def atomically_replaced_file(name, mode='w', buffering=-1):
 644     """Yield a file that will be atomically renamed name when leaving the block.
 645
 646     This contextmanager yields an open file object that is backed by a
 647     temporary file which will be renamed (atomically) to the target
 648     name if everything succeeds.
 649
 650     The mode and buffering arguments are handled exactly as with open,
 651     and the yielded file will have very restrictive permissions, as
 652     per mkstemp.
 653
 654     E.g.::
 655
 656         with atomically_replaced_file('foo.txt', 'w') as f:
 657             f.write('hello jack.')
 658
 659     """
 660
 661     (ffd, tempname) = tempfile.mkstemp(dir=os.path.dirname(name),
 662                                        text=('b' not in mode))
 663     try:
 664         try:
 665             f = os.fdopen(ffd, mode, buffering)
 666         except:
 667             os.close(ffd)
 668             raise
 669         try:
 670             yield f
 671         finally:
 672             f.close()
 673         os.rename(tempname, name)
 674     finally:
 675         unlink(tempname)  # nonexistant file is ignored
 676
 677
 678 def slashappend(s):
 679     """Append "/" to 's' if it doesn't aleady end in "/"."""
 680     if s and not s.endswith('/'):
 681         return s + '/'
 682     else:
 683         return s
 684
 685
 686 def _mmap_do(f, sz, flags, prot, close):
 687     if not sz:
 688         st = os.fstat(f.fileno())
 689         sz = st.st_size
 690     if not sz:
 691         # trying to open a zero-length map gives an error, but an empty
 692         # string has all the same behaviour of a zero-length map, ie. it has
 693         # no elements :)
 694         return ''
 695     map = mmap.mmap(f.fileno(), sz, flags, prot)
 696     if close:
 697         f.close()  # map will persist beyond file close
 698     return map
 699
 700
 701 def mmap_read(f, sz = 0, close=True):
 702     """Create a read-only memory mapped region on file 'f'.
 703     If sz is 0, the region will cover the entire file.
 704     """
 705     return _mmap_do(f, sz, mmap.MAP_PRIVATE, mmap.PROT_READ, close)
 706
 707
 708 def mmap_readwrite(f, sz = 0, close=True):
 709     """Create a read-write memory mapped region on file 'f'.
 710     If sz is 0, the region will cover the entire file.
 711     """
 712     return _mmap_do(f, sz, mmap.MAP_SHARED, mmap.PROT_READ|mmap.PROT_WRITE,
 713                     close)
 714
 715
 716 def mmap_readwrite_private(f, sz = 0, close=True):
 717     """Create a read-write memory mapped region on file 'f'.
 718     If sz is 0, the region will cover the entire file.
 719     The map is private, which means the changes are never flushed back to the
 720     file.
 721     """
 722     return _mmap_do(f, sz, mmap.MAP_PRIVATE, mmap.PROT_READ|mmap.PROT_WRITE,
 723                     close)
 724
 725
 726 _mincore = getattr(_helpers, 'mincore', None)
 727 if _mincore:
 728     # ./configure ensures that we're on Linux if MINCORE_INCORE isn't defined.
 729     MINCORE_INCORE = getattr(_helpers, 'MINCORE_INCORE', 1)
 730
 731     _fmincore_chunk_size = None
 732     def _set_fmincore_chunk_size():
 733         global _fmincore_chunk_size
 734         pref_chunk_size = 64 * 1024 * 1024
 735         chunk_size = sc_page_size
 736         if (sc_page_size < pref_chunk_size):
 737             chunk_size = sc_page_size * (pref_chunk_size / sc_page_size)
 738         _fmincore_chunk_size = chunk_size
 739
 740     def fmincore(fd):
 741         """Return the mincore() data for fd as a bytearray whose values can be
 742         tested via MINCORE_INCORE, or None if fd does not fully
 743         support the operation."""
 744         st = os.fstat(fd)
 745         if (st.st_size == 0):
 746             return bytearray(0)
 747         if not _fmincore_chunk_size:
 748             _set_fmincore_chunk_size()
 749         pages_per_chunk = _fmincore_chunk_size / sc_page_size;
 750         page_count = (st.st_size + sc_page_size - 1) / sc_page_size;
 751         chunk_count = page_count / _fmincore_chunk_size
 752         if chunk_count < 1:
 753             chunk_count = 1
 754         result = bytearray(page_count)
 755         for ci in xrange(chunk_count):
 756             pos = _fmincore_chunk_size * ci;
 757             msize = min(_fmincore_chunk_size, st.st_size - pos)
 758             try:
 759                 m = mmap.mmap(fd, msize, mmap.MAP_PRIVATE, 0, 0, pos)
 760             except mmap.error as ex:
 761                 if ex.errno == errno.EINVAL or ex.errno == errno.ENODEV:
 762                     # Perhaps the file was a pipe, i.e. "... | bup split ..."
 763                     return None
 764                 raise ex
 765             _mincore(m, msize, 0, result, ci * pages_per_chunk);
 766         return result
 767
 768
 769 def parse_timestamp(epoch_str):
 770     """Return the number of nanoseconds since the epoch that are described
 771 by epoch_str (100ms, 100ns, ...); when epoch_str cannot be parsed,
 772 throw a ValueError that may contain additional information."""
 773     ns_per = {'s' :  1000000000,
 774               'ms' : 1000000,
 775               'us' : 1000,
 776               'ns' : 1}
 777     match = re.match(r'^((?:[-+]?[0-9]+)?)(s|ms|us|ns)$', epoch_str)
 778     if not match:
 779         if re.match(r'^([-+]?[0-9]+)$', epoch_str):
 780             raise ValueError('must include units, i.e. 100ns, 100ms, ...')
 781         raise ValueError()
 782     (n, units) = match.group(1, 2)
 783     if not n:
 784         n = 1
 785     n = int(n)
 786     return n * ns_per[units]
 787
 788
 789 def parse_num(s):
 790     """Parse data size information into a float number.
 791
 792     Here are some examples of conversions:
 793         199.2k means 203981 bytes
 794         1GB means 1073741824 bytes
 795         2.1 tb means 2199023255552 bytes
 796     """
 797     g = re.match(r'([-+\d.e]+)\s*(\w*)', str(s))
 798     if not g:
 799         raise ValueError("can't parse %r as a number" % s)
 800     (val, unit) = g.groups()
 801     num = float(val)
 802     unit = unit.lower()
 803     if unit in ['t', 'tb']:
 804         mult = 1024*1024*1024*1024
 805     elif unit in ['g', 'gb']:
 806         mult = 1024*1024*1024
 807     elif unit in ['m', 'mb']:
 808         mult = 1024*1024
 809     elif unit in ['k', 'kb']:
 810         mult = 1024
 811     elif unit in ['', 'b']:
 812         mult = 1
 813     else:
 814         raise ValueError("invalid unit %r in number %r" % (unit, s))
 815     return int(num*mult)
 816
 817
 818 def count(l):
 819     """Count the number of elements in an iterator. (consumes the iterator)"""
 820     return reduce(lambda x,y: x+1, l)
 821
 822
 823 saved_errors = []
 824 def add_error(e):
 825     """Append an error message to the list of saved errors.
 826
 827     Once processing is able to stop and output the errors, the saved errors are
 828     accessible in the module variable helpers.saved_errors.
 829     """
 830     saved_errors.append(e)
 831     log('%-70s\n' % e)
 832
 833
 834 def clear_errors():
 835     global saved_errors
 836     saved_errors = []
 837
 838
 839 def handle_ctrl_c():
 840     """Replace the default exception handler for KeyboardInterrupt (Ctrl-C).
 841
 842     The new exception handler will make sure that bup will exit without an ugly
 843     stacktrace when Ctrl-C is hit.
 844     """
 845     oldhook = sys.excepthook
 846     def newhook(exctype, value, traceback):
 847         if exctype == KeyboardInterrupt:
 848             log('\nInterrupted.\n')
 849         else:
 850             return oldhook(exctype, value, traceback)
 851     sys.excepthook = newhook
 852
 853
 854 def columnate(l, prefix):
 855     """Format elements of 'l' in columns with 'prefix' leading each line.
 856
 857     The number of columns is determined automatically based on the string
 858     lengths.
 859     """
 860     if not l:
 861         return ""
 862     l = l[:]
 863     clen = max(len(s) for s in l)
 864     ncols = (tty_width() - len(prefix)) / (clen + 2)
 865     if ncols <= 1:
 866         ncols = 1
 867         clen = 0
 868     cols = []
 869     while len(l) % ncols:
 870         l.append('')
 871     rows = len(l)/ncols
 872     for s in range(0, len(l), rows):
 873         cols.append(l[s:s+rows])
 874     out = ''
 875     for row in zip(*cols):
 876         out += prefix + ''.join(('%-*s' % (clen+2, s)) for s in row) + '\n'
 877     return out
 878
 879
 880 def parse_date_or_fatal(str, fatal):
 881     """Parses the given date or calls Option.fatal().
 882     For now we expect a string that contains a float."""
 883     try:
 884         date = float(str)
 885     except ValueError as e:
 886         raise fatal('invalid date format (should be a float): %r' % e)
 887     else:
 888         return date
 889
 890
 891 def parse_excludes(options, fatal):
 892     """Traverse the options and extract all excludes, or call Option.fatal()."""
 893     excluded_paths = []
 894
 895     for flag in options:
 896         (option, parameter) = flag
 897         if option == '--exclude':
 898             excluded_paths.append(resolve_parent(parameter))
 899         elif option == '--exclude-from':
 900             try:
 901                 f = open(resolve_parent(parameter))
 902             except IOError as e:
 903                 raise fatal("couldn't read %s" % parameter)
 904             for exclude_path in f.readlines():
 905                 # FIXME: perhaps this should be rstrip('\n')
 906                 exclude_path = resolve_parent(exclude_path.strip())
 907                 if exclude_path:
 908                     excluded_paths.append(exclude_path)
 909     return sorted(frozenset(excluded_paths))
 910
 911
 912 def parse_rx_excludes(options, fatal):
 913     """Traverse the options and extract all rx excludes, or call
 914     Option.fatal()."""
 915     excluded_patterns = []
 916
 917     for flag in options:
 918         (option, parameter) = flag
 919         if option == '--exclude-rx':
 920             try:
 921                 excluded_patterns.append(re.compile(parameter))
 922             except re.error as ex:
 923                 fatal('invalid --exclude-rx pattern (%s): %s' % (parameter, ex))
 924         elif option == '--exclude-rx-from':
 925             try:
 926                 f = open(resolve_parent(parameter))
 927             except IOError as e:
 928                 raise fatal("couldn't read %s" % parameter)
 929             for pattern in f.readlines():
 930                 spattern = pattern.rstrip('\n')
 931                 if not spattern:
 932                     continue
 933                 try:
 934                     excluded_patterns.append(re.compile(spattern))
 935                 except re.error as ex:
 936                     fatal('invalid --exclude-rx pattern (%s): %s' % (spattern, ex))
 937     return excluded_patterns
 938
 939
 940 def should_rx_exclude_path(path, exclude_rxs):
 941     """Return True if path matches a regular expression in exclude_rxs."""
 942     for rx in exclude_rxs:
 943         if rx.search(path):
 944             debug1('Skipping %r: excluded by rx pattern %r.\n'
 945                    % (path, rx.pattern))
 946             return True
 947     return False
 948
 949
 950 # FIXME: Carefully consider the use of functions (os.path.*, etc.)
 951 # that resolve against the current filesystem in the strip/graft
 952 # functions for example, but elsewhere as well.  I suspect bup's not
 953 # always being careful about that.  For some cases, the contents of
 954 # the current filesystem should be irrelevant, and consulting it might
 955 # produce the wrong result, perhaps via unintended symlink resolution,
 956 # for example.
 957
 958 def path_components(path):
 959     """Break path into a list of pairs of the form (name,
 960     full_path_to_name).  Path must start with '/'.
 961     Example:
 962       '/home/foo' -> [('', '/'), ('home', '/home'), ('foo', '/home/foo')]"""
 963     if not path.startswith('/'):
 964         raise Exception, 'path must start with "/": %s' % path
 965     # Since we assume path startswith('/'), we can skip the first element.
 966     result = [('', '/')]
 967     norm_path = os.path.abspath(path)
 968     if norm_path == '/':
 969         return result
 970     full_path = ''
 971     for p in norm_path.split('/')[1:]:
 972         full_path += '/' + p
 973         result.append((p, full_path))
 974     return result
 975
 976
 977 def stripped_path_components(path, strip_prefixes):
 978     """Strip any prefix in strip_prefixes from path and return a list
 979     of path components where each component is (name,
 980     none_or_full_fs_path_to_name).  Assume path startswith('/').
 981     See thelpers.py for examples."""
 982     normalized_path = os.path.abspath(path)
 983     sorted_strip_prefixes = sorted(strip_prefixes, key=len, reverse=True)
 984     for bp in sorted_strip_prefixes:
 985         normalized_bp = os.path.abspath(bp)
 986         if normalized_bp == '/':
 987             continue
 988         if normalized_path.startswith(normalized_bp):
 989             prefix = normalized_path[:len(normalized_bp)]
 990             result = []
 991             for p in normalized_path[len(normalized_bp):].split('/'):
 992                 if p: # not root
 993                     prefix += '/'
 994                 prefix += p
 995                 result.append((p, prefix))
 996             return result
 997     # Nothing to strip.
 998     return path_components(path)
 999
1000
1001 def grafted_path_components(graft_points, path):
1002     # Create a result that consists of some number of faked graft
1003     # directories before the graft point, followed by all of the real
1004     # directories from path that are after the graft point.  Arrange
1005     # for the directory at the graft point in the result to correspond
1006     # to the "orig" directory in --graft orig=new.  See t/thelpers.py
1007     # for some examples.
1008
1009     # Note that given --graft orig=new, orig and new have *nothing* to
1010     # do with each other, even if some of their component names
1011     # match. i.e. --graft /foo/bar/baz=/foo/bar/bax is semantically
1012     # equivalent to --graft /foo/bar/baz=/x/y/z, or even
1013     # /foo/bar/baz=/x.
1014
1015     # FIXME: This can't be the best solution...
1016     clean_path = os.path.abspath(path)
1017     for graft_point in graft_points:
1018         old_prefix, new_prefix = graft_point
1019         # Expand prefixes iff not absolute paths.
1020         old_prefix = os.path.normpath(old_prefix)
1021         new_prefix = os.path.normpath(new_prefix)
1022         if clean_path.startswith(old_prefix):
1023             escaped_prefix = re.escape(old_prefix)
1024             grafted_path = re.sub(r'^' + escaped_prefix, new_prefix, clean_path)
1025             # Handle /foo=/ (at least) -- which produces //whatever.
1026             grafted_path = '/' + grafted_path.lstrip('/')
1027             clean_path_components = path_components(clean_path)
1028             # Count the components that were stripped.
1029             strip_count = 0 if old_prefix == '/' else old_prefix.count('/')
1030             new_prefix_parts = new_prefix.split('/')
1031             result_prefix = grafted_path.split('/')[:new_prefix.count('/')]
1032             result = [(p, None) for p in result_prefix] \
1033                 + clean_path_components[strip_count:]
1034             # Now set the graft point name to match the end of new_prefix.
1035             graft_point = len(result_prefix)
1036             result[graft_point] = \
1037                 (new_prefix_parts[-1], clean_path_components[strip_count][1])
1038             if new_prefix == '/': # --graft ...=/ is a special case.
1039                 return result[1:]
1040             return result
1041     return path_components(clean_path)
1042
1043
1044 Sha1 = hashlib.sha1
1045
1046
1047 _localtime = getattr(_helpers, 'localtime', None)
1048
1049 if _localtime:
1050     bup_time = namedtuple('bup_time', ['tm_year', 'tm_mon', 'tm_mday',
1051                                        'tm_hour', 'tm_min', 'tm_sec',
1052                                        'tm_wday', 'tm_yday',
1053                                        'tm_isdst', 'tm_gmtoff', 'tm_zone'])
1054
1055 # Define a localtime() that returns bup_time when possible.  Note:
1056 # this means that any helpers.localtime() results may need to be
1057 # passed through to_py_time() before being passed to python's time
1058 # module, which doesn't appear willing to ignore the extra items.
1059 if _localtime:
1060     def localtime(time):
1061         return bup_time(*_helpers.localtime(time))
1062     def utc_offset_str(t):
1063         """Return the local offset from UTC as "+hhmm" or "-hhmm" for time t.
1064         If the current UTC offset does not represent an integer number
1065         of minutes, the fractional component will be truncated."""
1066         off = localtime(t).tm_gmtoff
1067         # Note: // doesn't truncate like C for negative values, it rounds down.
1068         offmin = abs(off) // 60
1069         m = offmin % 60
1070         h = (offmin - m) // 60
1071         return "%+03d%02d" % (-h if off < 0 else h, m)
1072     def to_py_time(x):
1073         if isinstance(x, time.struct_time):
1074             return x
1075         return time.struct_time(x[:9])
1076 else:
1077     localtime = time.localtime
1078     def utc_offset_str(t):
1079         return time.strftime('%z', localtime(t))
1080     def to_py_time(x):
1081         return x
1082
1083
1084 _some_invalid_save_parts_rx = re.compile(r'[[ ~^:?*\\]|\.\.|//|@{')
1085
1086 def valid_save_name(name):
1087     # Enforce a superset of the restrictions in git-check-ref-format(1)
1088     if name == '@' \
1089        or name.startswith('/') or name.endswith('/') \
1090        or name.endswith('.'):
1091         return False
1092     if _some_invalid_save_parts_rx.search(name):
1093         return False
1094     for c in name:
1095         if ord(c) < 0x20 or ord(c) == 0x7f:
1096             return False
1097     for part in name.split('/'):
1098         if part.startswith('.') or part.endswith('.lock'):
1099             return False
1100     return True