lib/bup/helpers.py

   1 """Helper functions and classes for bup."""
   2
   3 from collections import namedtuple
   4 from ctypes import sizeof, c_void_p
   5 from os import environ
   6 from contextlib import contextmanager
   7 import sys, os, pwd, subprocess, errno, socket, select, mmap, stat, re, struct
   8 import hashlib, heapq, math, operator, time, grp, tempfile
   9
  10 from bup import _helpers
  11
  12 sc_page_size = os.sysconf('SC_PAGE_SIZE')
  13 assert(sc_page_size > 0)
  14
  15 sc_arg_max = os.sysconf('SC_ARG_MAX')
  16 if sc_arg_max == -1:  # "no definite limit" - let's choose 2M
  17     sc_arg_max = 2 * 1024 * 1024
  18
  19 # This function should really be in helpers, not in bup.options.  But we
  20 # want options.py to be standalone so people can include it in other projects.
  21 from bup.options import _tty_width
  22 tty_width = _tty_width
  23
  24
  25 def atoi(s):
  26     """Convert the string 's' to an integer. Return 0 if s is not a number."""
  27     try:
  28         return int(s or '0')
  29     except ValueError:
  30         return 0
  31
  32
  33 def atof(s):
  34     """Convert the string 's' to a float. Return 0 if s is not a number."""
  35     try:
  36         return float(s or '0')
  37     except ValueError:
  38         return 0
  39
  40
  41 buglvl = atoi(os.environ.get('BUP_DEBUG', 0))
  42
  43
  44 try:
  45     _fdatasync = os.fdatasync
  46 except AttributeError:
  47     _fdatasync = os.fsync
  48
  49 if sys.platform.startswith('darwin'):
  50     # Apparently os.fsync on OS X doesn't guarantee to sync all the way down
  51     import fcntl
  52     def fdatasync(fd):
  53         try:
  54             return fcntl.fcntl(fd, fcntl.F_FULLFSYNC)
  55         except IOError as e:
  56             # Fallback for file systems (SMB) that do not support F_FULLFSYNC
  57             if e.errno == errno.ENOTSUP:
  58                 return _fdatasync(fd)
  59             else:
  60                 raise
  61 else:
  62     fdatasync = _fdatasync
  63
  64
  65 # Write (blockingly) to sockets that may or may not be in blocking mode.
  66 # We need this because our stderr is sometimes eaten by subprocesses
  67 # (probably ssh) that sometimes make it nonblocking, if only temporarily,
  68 # leading to race conditions.  Ick.  We'll do it the hard way.
  69 def _hard_write(fd, buf):
  70     while buf:
  71         (r,w,x) = select.select([], [fd], [], None)
  72         if not w:
  73             raise IOError('select(fd) returned without being writable')
  74         try:
  75             sz = os.write(fd, buf)
  76         except OSError as e:
  77             if e.errno != errno.EAGAIN:
  78                 raise
  79         assert(sz >= 0)
  80         buf = buf[sz:]
  81
  82
  83 _last_prog = 0
  84 def log(s):
  85     """Print a log message to stderr."""
  86     global _last_prog
  87     sys.stdout.flush()
  88     _hard_write(sys.stderr.fileno(), s)
  89     _last_prog = 0
  90
  91
  92 def debug1(s):
  93     if buglvl >= 1:
  94         log(s)
  95
  96
  97 def debug2(s):
  98     if buglvl >= 2:
  99         log(s)
 100
 101
 102 istty1 = os.isatty(1) or (atoi(os.environ.get('BUP_FORCE_TTY')) & 1)
 103 istty2 = os.isatty(2) or (atoi(os.environ.get('BUP_FORCE_TTY')) & 2)
 104 _last_progress = ''
 105 def progress(s):
 106     """Calls log() if stderr is a TTY.  Does nothing otherwise."""
 107     global _last_progress
 108     if istty2:
 109         log(s)
 110         _last_progress = s
 111
 112
 113 def qprogress(s):
 114     """Calls progress() only if we haven't printed progress in a while.
 115
 116     This avoids overloading the stderr buffer with excess junk.
 117     """
 118     global _last_prog
 119     now = time.time()
 120     if now - _last_prog > 0.1:
 121         progress(s)
 122         _last_prog = now
 123
 124
 125 def reprogress():
 126     """Calls progress() to redisplay the most recent progress message.
 127
 128     Useful after you've printed some other message that wipes out the
 129     progress line.
 130     """
 131     if _last_progress and _last_progress.endswith('\r'):
 132         progress(_last_progress)
 133
 134
 135 def mkdirp(d, mode=None):
 136     """Recursively create directories on path 'd'.
 137
 138     Unlike os.makedirs(), it doesn't raise an exception if the last element of
 139     the path already exists.
 140     """
 141     try:
 142         if mode:
 143             os.makedirs(d, mode)
 144         else:
 145             os.makedirs(d)
 146     except OSError as e:
 147         if e.errno == errno.EEXIST:
 148             pass
 149         else:
 150             raise
 151
 152
 153 _unspecified_next_default = object()
 154
 155 def _fallback_next(it, default=_unspecified_next_default):
 156     """Retrieve the next item from the iterator by calling its
 157     next() method. If default is given, it is returned if the
 158     iterator is exhausted, otherwise StopIteration is raised."""
 159
 160     if default is _unspecified_next_default:
 161         return it.next()
 162     else:
 163         try:
 164             return it.next()
 165         except StopIteration:
 166             return default
 167
 168 if sys.version_info < (2, 6):
 169     next =  _fallback_next
 170
 171
 172 def merge_iter(iters, pfreq, pfunc, pfinal, key=None):
 173     if key:
 174         samekey = lambda e, pe: getattr(e, key) == getattr(pe, key, None)
 175     else:
 176         samekey = operator.eq
 177     count = 0
 178     total = sum(len(it) for it in iters)
 179     iters = (iter(it) for it in iters)
 180     heap = ((next(it, None),it) for it in iters)
 181     heap = [(e,it) for e,it in heap if e]
 182
 183     heapq.heapify(heap)
 184     pe = None
 185     while heap:
 186         if not count % pfreq:
 187             pfunc(count, total)
 188         e, it = heap[0]
 189         if not samekey(e, pe):
 190             pe = e
 191             yield e
 192         count += 1
 193         try:
 194             e = it.next() # Don't use next() function, it's too expensive
 195         except StopIteration:
 196             heapq.heappop(heap) # remove current
 197         else:
 198             heapq.heapreplace(heap, (e, it)) # shift current to new location
 199     pfinal(count, total)
 200
 201
 202 def unlink(f):
 203     """Delete a file at path 'f' if it currently exists.
 204
 205     Unlike os.unlink(), does not throw an exception if the file didn't already
 206     exist.
 207     """
 208     try:
 209         os.unlink(f)
 210     except OSError as e:
 211         if e.errno != errno.ENOENT:
 212             raise
 213
 214
 215 def readpipe(argv, preexec_fn=None, shell=False):
 216     """Run a subprocess and return its output."""
 217     p = subprocess.Popen(argv, stdout=subprocess.PIPE, preexec_fn=preexec_fn,
 218                          shell=shell)
 219     out, err = p.communicate()
 220     if p.returncode != 0:
 221         raise Exception('subprocess %r failed with status %d'
 222                         % (' '.join(argv), p.returncode))
 223     return out
 224
 225
 226 def _argmax_base(command):
 227     base_size = 2048
 228     for c in command:
 229         base_size += len(command) + 1
 230     for k, v in environ.iteritems():
 231         base_size += len(k) + len(v) + 2 + sizeof(c_void_p)
 232     return base_size
 233
 234
 235 def _argmax_args_size(args):
 236     return sum(len(x) + 1 + sizeof(c_void_p) for x in args)
 237
 238
 239 def batchpipe(command, args, preexec_fn=None, arg_max=sc_arg_max):
 240     """If args is not empty, yield the output produced by calling the
 241 command list with args as a sequence of strings (It may be necessary
 242 to return multiple strings in order to respect ARG_MAX)."""
 243     # The optional arg_max arg is a workaround for an issue with the
 244     # current wvtest behavior.
 245     base_size = _argmax_base(command)
 246     while args:
 247         room = arg_max - base_size
 248         i = 0
 249         while i < len(args):
 250             next_size = _argmax_args_size(args[i:i+1])
 251             if room - next_size < 0:
 252                 break
 253             room -= next_size
 254             i += 1
 255         sub_args = args[:i]
 256         args = args[i:]
 257         assert(len(sub_args))
 258         yield readpipe(command + sub_args, preexec_fn=preexec_fn)
 259
 260
 261 def resolve_parent(p):
 262     """Return the absolute path of a file without following any final symlink.
 263
 264     Behaves like os.path.realpath, but doesn't follow a symlink for the last
 265     element. (ie. if 'p' itself is a symlink, this one won't follow it, but it
 266     will follow symlinks in p's directory)
 267     """
 268     try:
 269         st = os.lstat(p)
 270     except OSError:
 271         st = None
 272     if st and stat.S_ISLNK(st.st_mode):
 273         (dir, name) = os.path.split(p)
 274         dir = os.path.realpath(dir)
 275         out = os.path.join(dir, name)
 276     else:
 277         out = os.path.realpath(p)
 278     #log('realpathing:%r,%r\n' % (p, out))
 279     return out
 280
 281
 282 def detect_fakeroot():
 283     "Return True if we appear to be running under fakeroot."
 284     return os.getenv("FAKEROOTKEY") != None
 285
 286
 287 def is_superuser():
 288     if sys.platform.startswith('cygwin'):
 289         import ctypes
 290         return ctypes.cdll.shell32.IsUserAnAdmin()
 291     else:
 292         return os.geteuid() == 0
 293
 294
 295 def _cache_key_value(get_value, key, cache):
 296     """Return (value, was_cached).  If there is a value in the cache
 297     for key, use that, otherwise, call get_value(key) which should
 298     throw a KeyError if there is no value -- in which case the cached
 299     and returned value will be None.
 300     """
 301     try: # Do we already have it (or know there wasn't one)?
 302         value = cache[key]
 303         return value, True
 304     except KeyError:
 305         pass
 306     value = None
 307     try:
 308         cache[key] = value = get_value(key)
 309     except KeyError:
 310         cache[key] = None
 311     return value, False
 312
 313
 314 _uid_to_pwd_cache = {}
 315 _name_to_pwd_cache = {}
 316
 317 def pwd_from_uid(uid):
 318     """Return password database entry for uid (may be a cached value).
 319     Return None if no entry is found.
 320     """
 321     global _uid_to_pwd_cache, _name_to_pwd_cache
 322     entry, cached = _cache_key_value(pwd.getpwuid, uid, _uid_to_pwd_cache)
 323     if entry and not cached:
 324         _name_to_pwd_cache[entry.pw_name] = entry
 325     return entry
 326
 327
 328 def pwd_from_name(name):
 329     """Return password database entry for name (may be a cached value).
 330     Return None if no entry is found.
 331     """
 332     global _uid_to_pwd_cache, _name_to_pwd_cache
 333     entry, cached = _cache_key_value(pwd.getpwnam, name, _name_to_pwd_cache)
 334     if entry and not cached:
 335         _uid_to_pwd_cache[entry.pw_uid] = entry
 336     return entry
 337
 338
 339 _gid_to_grp_cache = {}
 340 _name_to_grp_cache = {}
 341
 342 def grp_from_gid(gid):
 343     """Return password database entry for gid (may be a cached value).
 344     Return None if no entry is found.
 345     """
 346     global _gid_to_grp_cache, _name_to_grp_cache
 347     entry, cached = _cache_key_value(grp.getgrgid, gid, _gid_to_grp_cache)
 348     if entry and not cached:
 349         _name_to_grp_cache[entry.gr_name] = entry
 350     return entry
 351
 352
 353 def grp_from_name(name):
 354     """Return password database entry for name (may be a cached value).
 355     Return None if no entry is found.
 356     """
 357     global _gid_to_grp_cache, _name_to_grp_cache
 358     entry, cached = _cache_key_value(grp.getgrnam, name, _name_to_grp_cache)
 359     if entry and not cached:
 360         _gid_to_grp_cache[entry.gr_gid] = entry
 361     return entry
 362
 363
 364 _username = None
 365 def username():
 366     """Get the user's login name."""
 367     global _username
 368     if not _username:
 369         uid = os.getuid()
 370         _username = pwd_from_uid(uid)[0] or 'user%d' % uid
 371     return _username
 372
 373
 374 _userfullname = None
 375 def userfullname():
 376     """Get the user's full name."""
 377     global _userfullname
 378     if not _userfullname:
 379         uid = os.getuid()
 380         entry = pwd_from_uid(uid)
 381         if entry:
 382             _userfullname = entry[4].split(',')[0] or entry[0]
 383         if not _userfullname:
 384             _userfullname = 'user%d' % uid
 385     return _userfullname
 386
 387
 388 _hostname = None
 389 def hostname():
 390     """Get the FQDN of this machine."""
 391     global _hostname
 392     if not _hostname:
 393         _hostname = socket.getfqdn()
 394     return _hostname
 395
 396
 397 _resource_path = None
 398 def resource_path(subdir=''):
 399     global _resource_path
 400     if not _resource_path:
 401         _resource_path = os.environ.get('BUP_RESOURCE_PATH') or '.'
 402     return os.path.join(_resource_path, subdir)
 403
 404 def format_filesize(size):
 405     unit = 1024.0
 406     size = float(size)
 407     if size < unit:
 408         return "%d" % (size)
 409     exponent = int(math.log(size) / math.log(unit))
 410     size_prefix = "KMGTPE"[exponent - 1]
 411     return "%.1f%s" % (size / math.pow(unit, exponent), size_prefix)
 412
 413
 414 class NotOk(Exception):
 415     pass
 416
 417
 418 class BaseConn:
 419     def __init__(self, outp):
 420         self.outp = outp
 421
 422     def close(self):
 423         while self._read(65536): pass
 424
 425     def read(self, size):
 426         """Read 'size' bytes from input stream."""
 427         self.outp.flush()
 428         return self._read(size)
 429
 430     def readline(self):
 431         """Read from input stream until a newline is found."""
 432         self.outp.flush()
 433         return self._readline()
 434
 435     def write(self, data):
 436         """Write 'data' to output stream."""
 437         #log('%d writing: %d bytes\n' % (os.getpid(), len(data)))
 438         self.outp.write(data)
 439
 440     def has_input(self):
 441         """Return true if input stream is readable."""
 442         raise NotImplemented("Subclasses must implement has_input")
 443
 444     def ok(self):
 445         """Indicate end of output from last sent command."""
 446         self.write('\nok\n')
 447
 448     def error(self, s):
 449         """Indicate server error to the client."""
 450         s = re.sub(r'\s+', ' ', str(s))
 451         self.write('\nerror %s\n' % s)
 452
 453     def _check_ok(self, onempty):
 454         self.outp.flush()
 455         rl = ''
 456         for rl in linereader(self):
 457             #log('%d got line: %r\n' % (os.getpid(), rl))
 458             if not rl:  # empty line
 459                 continue
 460             elif rl == 'ok':
 461                 return None
 462             elif rl.startswith('error '):
 463                 #log('client: error: %s\n' % rl[6:])
 464                 return NotOk(rl[6:])
 465             else:
 466                 onempty(rl)
 467         raise Exception('server exited unexpectedly; see errors above')
 468
 469     def drain_and_check_ok(self):
 470         """Remove all data for the current command from input stream."""
 471         def onempty(rl):
 472             pass
 473         return self._check_ok(onempty)
 474
 475     def check_ok(self):
 476         """Verify that server action completed successfully."""
 477         def onempty(rl):
 478             raise Exception('expected "ok", got %r' % rl)
 479         return self._check_ok(onempty)
 480
 481
 482 class Conn(BaseConn):
 483     def __init__(self, inp, outp):
 484         BaseConn.__init__(self, outp)
 485         self.inp = inp
 486
 487     def _read(self, size):
 488         return self.inp.read(size)
 489
 490     def _readline(self):
 491         return self.inp.readline()
 492
 493     def has_input(self):
 494         [rl, wl, xl] = select.select([self.inp.fileno()], [], [], 0)
 495         if rl:
 496             assert(rl[0] == self.inp.fileno())
 497             return True
 498         else:
 499             return None
 500
 501
 502 def checked_reader(fd, n):
 503     while n > 0:
 504         rl, _, _ = select.select([fd], [], [])
 505         assert(rl[0] == fd)
 506         buf = os.read(fd, n)
 507         if not buf: raise Exception("Unexpected EOF reading %d more bytes" % n)
 508         yield buf
 509         n -= len(buf)
 510
 511
 512 MAX_PACKET = 128 * 1024
 513 def mux(p, outfd, outr, errr):
 514     try:
 515         fds = [outr, errr]
 516         while p.poll() is None:
 517             rl, _, _ = select.select(fds, [], [])
 518             for fd in rl:
 519                 if fd == outr:
 520                     buf = os.read(outr, MAX_PACKET)
 521                     if not buf: break
 522                     os.write(outfd, struct.pack('!IB', len(buf), 1) + buf)
 523                 elif fd == errr:
 524                     buf = os.read(errr, 1024)
 525                     if not buf: break
 526                     os.write(outfd, struct.pack('!IB', len(buf), 2) + buf)
 527     finally:
 528         os.write(outfd, struct.pack('!IB', 0, 3))
 529
 530
 531 class DemuxConn(BaseConn):
 532     """A helper class for bup's client-server protocol."""
 533     def __init__(self, infd, outp):
 534         BaseConn.__init__(self, outp)
 535         # Anything that comes through before the sync string was not
 536         # multiplexed and can be assumed to be debug/log before mux init.
 537         tail = ''
 538         while tail != 'BUPMUX':
 539             b = os.read(infd, (len(tail) < 6) and (6-len(tail)) or 1)
 540             if not b:
 541                 raise IOError('demux: unexpected EOF during initialization')
 542             tail += b
 543             sys.stderr.write(tail[:-6])  # pre-mux log messages
 544             tail = tail[-6:]
 545         self.infd = infd
 546         self.reader = None
 547         self.buf = None
 548         self.closed = False
 549
 550     def write(self, data):
 551         self._load_buf(0)
 552         BaseConn.write(self, data)
 553
 554     def _next_packet(self, timeout):
 555         if self.closed: return False
 556         rl, wl, xl = select.select([self.infd], [], [], timeout)
 557         if not rl: return False
 558         assert(rl[0] == self.infd)
 559         ns = ''.join(checked_reader(self.infd, 5))
 560         n, fdw = struct.unpack('!IB', ns)
 561         assert(n <= MAX_PACKET)
 562         if fdw == 1:
 563             self.reader = checked_reader(self.infd, n)
 564         elif fdw == 2:
 565             for buf in checked_reader(self.infd, n):
 566                 sys.stderr.write(buf)
 567         elif fdw == 3:
 568             self.closed = True
 569             debug2("DemuxConn: marked closed\n")
 570         return True
 571
 572     def _load_buf(self, timeout):
 573         if self.buf is not None:
 574             return True
 575         while not self.closed:
 576             while not self.reader:
 577                 if not self._next_packet(timeout):
 578                     return False
 579             try:
 580                 self.buf = self.reader.next()
 581                 return True
 582             except StopIteration:
 583                 self.reader = None
 584         return False
 585
 586     def _read_parts(self, ix_fn):
 587         while self._load_buf(None):
 588             assert(self.buf is not None)
 589             i = ix_fn(self.buf)
 590             if i is None or i == len(self.buf):
 591                 yv = self.buf
 592                 self.buf = None
 593             else:
 594                 yv = self.buf[:i]
 595                 self.buf = self.buf[i:]
 596             yield yv
 597             if i is not None:
 598                 break
 599
 600     def _readline(self):
 601         def find_eol(buf):
 602             try:
 603                 return buf.index('\n')+1
 604             except ValueError:
 605                 return None
 606         return ''.join(self._read_parts(find_eol))
 607
 608     def _read(self, size):
 609         csize = [size]
 610         def until_size(buf): # Closes on csize
 611             if len(buf) < csize[0]:
 612                 csize[0] -= len(buf)
 613                 return None
 614             else:
 615                 return csize[0]
 616         return ''.join(self._read_parts(until_size))
 617
 618     def has_input(self):
 619         return self._load_buf(0)
 620
 621
 622 def linereader(f):
 623     """Generate a list of input lines from 'f' without terminating newlines."""
 624     while 1:
 625         line = f.readline()
 626         if not line:
 627             break
 628         yield line[:-1]
 629
 630
 631 def chunkyreader(f, count = None):
 632     """Generate a list of chunks of data read from 'f'.
 633
 634     If count is None, read until EOF is reached.
 635
 636     If count is a positive integer, read 'count' bytes from 'f'. If EOF is
 637     reached while reading, raise IOError.
 638     """
 639     if count != None:
 640         while count > 0:
 641             b = f.read(min(count, 65536))
 642             if not b:
 643                 raise IOError('EOF with %d bytes remaining' % count)
 644             yield b
 645             count -= len(b)
 646     else:
 647         while 1:
 648             b = f.read(65536)
 649             if not b: break
 650             yield b
 651
 652
 653 @contextmanager
 654 def atomically_replaced_file(name, mode='w', buffering=-1):
 655     """Yield a file that will be atomically renamed name when leaving the block.
 656
 657     This contextmanager yields an open file object that is backed by a
 658     temporary file which will be renamed (atomically) to the target
 659     name if everything succeeds.
 660
 661     The mode and buffering arguments are handled exactly as with open,
 662     and the yielded file will have very restrictive permissions, as
 663     per mkstemp.
 664
 665     E.g.::
 666
 667         with atomically_replaced_file('foo.txt', 'w') as f:
 668             f.write('hello jack.')
 669
 670     """
 671
 672     (ffd, tempname) = tempfile.mkstemp(dir=os.path.dirname(name),
 673                                        text=('b' not in mode))
 674     try:
 675         try:
 676             f = os.fdopen(ffd, mode, buffering)
 677         except:
 678             os.close(ffd)
 679             raise
 680         try:
 681             yield f
 682         finally:
 683             f.close()
 684         os.rename(tempname, name)
 685     finally:
 686         unlink(tempname)  # nonexistant file is ignored
 687
 688
 689 def slashappend(s):
 690     """Append "/" to 's' if it doesn't aleady end in "/"."""
 691     if s and not s.endswith('/'):
 692         return s + '/'
 693     else:
 694         return s
 695
 696
 697 def _mmap_do(f, sz, flags, prot, close):
 698     if not sz:
 699         st = os.fstat(f.fileno())
 700         sz = st.st_size
 701     if not sz:
 702         # trying to open a zero-length map gives an error, but an empty
 703         # string has all the same behaviour of a zero-length map, ie. it has
 704         # no elements :)
 705         return ''
 706     map = mmap.mmap(f.fileno(), sz, flags, prot)
 707     if close:
 708         f.close()  # map will persist beyond file close
 709     return map
 710
 711
 712 def mmap_read(f, sz = 0, close=True):
 713     """Create a read-only memory mapped region on file 'f'.
 714     If sz is 0, the region will cover the entire file.
 715     """
 716     return _mmap_do(f, sz, mmap.MAP_PRIVATE, mmap.PROT_READ, close)
 717
 718
 719 def mmap_readwrite(f, sz = 0, close=True):
 720     """Create a read-write memory mapped region on file 'f'.
 721     If sz is 0, the region will cover the entire file.
 722     """
 723     return _mmap_do(f, sz, mmap.MAP_SHARED, mmap.PROT_READ|mmap.PROT_WRITE,
 724                     close)
 725
 726
 727 def mmap_readwrite_private(f, sz = 0, close=True):
 728     """Create a read-write memory mapped region on file 'f'.
 729     If sz is 0, the region will cover the entire file.
 730     The map is private, which means the changes are never flushed back to the
 731     file.
 732     """
 733     return _mmap_do(f, sz, mmap.MAP_PRIVATE, mmap.PROT_READ|mmap.PROT_WRITE,
 734                     close)
 735
 736
 737 _mincore = getattr(_helpers, 'mincore', None)
 738 if _mincore:
 739     # ./configure ensures that we're on Linux if MINCORE_INCORE isn't defined.
 740     MINCORE_INCORE = getattr(_helpers, 'MINCORE_INCORE', 1)
 741
 742     _fmincore_chunk_size = None
 743     def _set_fmincore_chunk_size():
 744         global _fmincore_chunk_size
 745         pref_chunk_size = 64 * 1024 * 1024
 746         chunk_size = sc_page_size
 747         if (sc_page_size < pref_chunk_size):
 748             chunk_size = sc_page_size * (pref_chunk_size / sc_page_size)
 749         _fmincore_chunk_size = chunk_size
 750
 751     def fmincore(fd):
 752         """Return the mincore() data for fd as a bytearray whose values can be
 753         tested via MINCORE_INCORE, or None if fd does not fully
 754         support the operation."""
 755         st = os.fstat(fd)
 756         if (st.st_size == 0):
 757             return bytearray(0)
 758         if not _fmincore_chunk_size:
 759             _set_fmincore_chunk_size()
 760         pages_per_chunk = _fmincore_chunk_size / sc_page_size;
 761         page_count = (st.st_size + sc_page_size - 1) / sc_page_size;
 762         chunk_count = page_count / _fmincore_chunk_size
 763         if chunk_count < 1:
 764             chunk_count = 1
 765         result = bytearray(page_count)
 766         for ci in xrange(chunk_count):
 767             pos = _fmincore_chunk_size * ci;
 768             msize = min(_fmincore_chunk_size, st.st_size - pos)
 769             try:
 770                 m = mmap.mmap(fd, msize, mmap.MAP_PRIVATE, 0, 0, pos)
 771             except mmap.error as ex:
 772                 if ex.errno == errno.EINVAL or ex.errno == errno.ENODEV:
 773                     # Perhaps the file was a pipe, i.e. "... | bup split ..."
 774                     return None
 775                 raise ex
 776             _mincore(m, msize, 0, result, ci * pages_per_chunk);
 777         return result
 778
 779
 780 def parse_timestamp(epoch_str):
 781     """Return the number of nanoseconds since the epoch that are described
 782 by epoch_str (100ms, 100ns, ...); when epoch_str cannot be parsed,
 783 throw a ValueError that may contain additional information."""
 784     ns_per = {'s' :  1000000000,
 785               'ms' : 1000000,
 786               'us' : 1000,
 787               'ns' : 1}
 788     match = re.match(r'^((?:[-+]?[0-9]+)?)(s|ms|us|ns)$', epoch_str)
 789     if not match:
 790         if re.match(r'^([-+]?[0-9]+)$', epoch_str):
 791             raise ValueError('must include units, i.e. 100ns, 100ms, ...')
 792         raise ValueError()
 793     (n, units) = match.group(1, 2)
 794     if not n:
 795         n = 1
 796     n = int(n)
 797     return n * ns_per[units]
 798
 799
 800 def parse_num(s):
 801     """Parse data size information into a float number.
 802
 803     Here are some examples of conversions:
 804         199.2k means 203981 bytes
 805         1GB means 1073741824 bytes
 806         2.1 tb means 2199023255552 bytes
 807     """
 808     g = re.match(r'([-+\d.e]+)\s*(\w*)', str(s))
 809     if not g:
 810         raise ValueError("can't parse %r as a number" % s)
 811     (val, unit) = g.groups()
 812     num = float(val)
 813     unit = unit.lower()
 814     if unit in ['t', 'tb']:
 815         mult = 1024*1024*1024*1024
 816     elif unit in ['g', 'gb']:
 817         mult = 1024*1024*1024
 818     elif unit in ['m', 'mb']:
 819         mult = 1024*1024
 820     elif unit in ['k', 'kb']:
 821         mult = 1024
 822     elif unit in ['', 'b']:
 823         mult = 1
 824     else:
 825         raise ValueError("invalid unit %r in number %r" % (unit, s))
 826     return int(num*mult)
 827
 828
 829 def count(l):
 830     """Count the number of elements in an iterator. (consumes the iterator)"""
 831     return reduce(lambda x,y: x+1, l)
 832
 833
 834 saved_errors = []
 835 def add_error(e):
 836     """Append an error message to the list of saved errors.
 837
 838     Once processing is able to stop and output the errors, the saved errors are
 839     accessible in the module variable helpers.saved_errors.
 840     """
 841     saved_errors.append(e)
 842     log('%-70s\n' % e)
 843
 844
 845 def clear_errors():
 846     global saved_errors
 847     saved_errors = []
 848
 849
 850 def handle_ctrl_c():
 851     """Replace the default exception handler for KeyboardInterrupt (Ctrl-C).
 852
 853     The new exception handler will make sure that bup will exit without an ugly
 854     stacktrace when Ctrl-C is hit.
 855     """
 856     oldhook = sys.excepthook
 857     def newhook(exctype, value, traceback):
 858         if exctype == KeyboardInterrupt:
 859             log('\nInterrupted.\n')
 860         else:
 861             return oldhook(exctype, value, traceback)
 862     sys.excepthook = newhook
 863
 864
 865 def columnate(l, prefix):
 866     """Format elements of 'l' in columns with 'prefix' leading each line.
 867
 868     The number of columns is determined automatically based on the string
 869     lengths.
 870     """
 871     if not l:
 872         return ""
 873     l = l[:]
 874     clen = max(len(s) for s in l)
 875     ncols = (tty_width() - len(prefix)) / (clen + 2)
 876     if ncols <= 1:
 877         ncols = 1
 878         clen = 0
 879     cols = []
 880     while len(l) % ncols:
 881         l.append('')
 882     rows = len(l)/ncols
 883     for s in range(0, len(l), rows):
 884         cols.append(l[s:s+rows])
 885     out = ''
 886     for row in zip(*cols):
 887         out += prefix + ''.join(('%-*s' % (clen+2, s)) for s in row) + '\n'
 888     return out
 889
 890
 891 def parse_date_or_fatal(str, fatal):
 892     """Parses the given date or calls Option.fatal().
 893     For now we expect a string that contains a float."""
 894     try:
 895         date = float(str)
 896     except ValueError as e:
 897         raise fatal('invalid date format (should be a float): %r' % e)
 898     else:
 899         return date
 900
 901
 902 def parse_excludes(options, fatal):
 903     """Traverse the options and extract all excludes, or call Option.fatal()."""
 904     excluded_paths = []
 905
 906     for flag in options:
 907         (option, parameter) = flag
 908         if option == '--exclude':
 909             excluded_paths.append(resolve_parent(parameter))
 910         elif option == '--exclude-from':
 911             try:
 912                 f = open(resolve_parent(parameter))
 913             except IOError as e:
 914                 raise fatal("couldn't read %s" % parameter)
 915             for exclude_path in f.readlines():
 916                 # FIXME: perhaps this should be rstrip('\n')
 917                 exclude_path = resolve_parent(exclude_path.strip())
 918                 if exclude_path:
 919                     excluded_paths.append(exclude_path)
 920     return sorted(frozenset(excluded_paths))
 921
 922
 923 def parse_rx_excludes(options, fatal):
 924     """Traverse the options and extract all rx excludes, or call
 925     Option.fatal()."""
 926     excluded_patterns = []
 927
 928     for flag in options:
 929         (option, parameter) = flag
 930         if option == '--exclude-rx':
 931             try:
 932                 excluded_patterns.append(re.compile(parameter))
 933             except re.error as ex:
 934                 fatal('invalid --exclude-rx pattern (%s): %s' % (parameter, ex))
 935         elif option == '--exclude-rx-from':
 936             try:
 937                 f = open(resolve_parent(parameter))
 938             except IOError as e:
 939                 raise fatal("couldn't read %s" % parameter)
 940             for pattern in f.readlines():
 941                 spattern = pattern.rstrip('\n')
 942                 if not spattern:
 943                     continue
 944                 try:
 945                     excluded_patterns.append(re.compile(spattern))
 946                 except re.error as ex:
 947                     fatal('invalid --exclude-rx pattern (%s): %s' % (spattern, ex))
 948     return excluded_patterns
 949
 950
 951 def should_rx_exclude_path(path, exclude_rxs):
 952     """Return True if path matches a regular expression in exclude_rxs."""
 953     for rx in exclude_rxs:
 954         if rx.search(path):
 955             debug1('Skipping %r: excluded by rx pattern %r.\n'
 956                    % (path, rx.pattern))
 957             return True
 958     return False
 959
 960
 961 # FIXME: Carefully consider the use of functions (os.path.*, etc.)
 962 # that resolve against the current filesystem in the strip/graft
 963 # functions for example, but elsewhere as well.  I suspect bup's not
 964 # always being careful about that.  For some cases, the contents of
 965 # the current filesystem should be irrelevant, and consulting it might
 966 # produce the wrong result, perhaps via unintended symlink resolution,
 967 # for example.
 968
 969 def path_components(path):
 970     """Break path into a list of pairs of the form (name,
 971     full_path_to_name).  Path must start with '/'.
 972     Example:
 973       '/home/foo' -> [('', '/'), ('home', '/home'), ('foo', '/home/foo')]"""
 974     if not path.startswith('/'):
 975         raise Exception, 'path must start with "/": %s' % path
 976     # Since we assume path startswith('/'), we can skip the first element.
 977     result = [('', '/')]
 978     norm_path = os.path.abspath(path)
 979     if norm_path == '/':
 980         return result
 981     full_path = ''
 982     for p in norm_path.split('/')[1:]:
 983         full_path += '/' + p
 984         result.append((p, full_path))
 985     return result
 986
 987
 988 def stripped_path_components(path, strip_prefixes):
 989     """Strip any prefix in strip_prefixes from path and return a list
 990     of path components where each component is (name,
 991     none_or_full_fs_path_to_name).  Assume path startswith('/').
 992     See thelpers.py for examples."""
 993     normalized_path = os.path.abspath(path)
 994     sorted_strip_prefixes = sorted(strip_prefixes, key=len, reverse=True)
 995     for bp in sorted_strip_prefixes:
 996         normalized_bp = os.path.abspath(bp)
 997         if normalized_bp == '/':
 998             continue
 999         if normalized_path.startswith(normalized_bp):
1000             prefix = normalized_path[:len(normalized_bp)]
1001             result = []
1002             for p in normalized_path[len(normalized_bp):].split('/'):
1003                 if p: # not root
1004                     prefix += '/'
1005                 prefix += p
1006                 result.append((p, prefix))
1007             return result
1008     # Nothing to strip.
1009     return path_components(path)
1010
1011
1012 def grafted_path_components(graft_points, path):
1013     # Create a result that consists of some number of faked graft
1014     # directories before the graft point, followed by all of the real
1015     # directories from path that are after the graft point.  Arrange
1016     # for the directory at the graft point in the result to correspond
1017     # to the "orig" directory in --graft orig=new.  See t/thelpers.py
1018     # for some examples.
1019
1020     # Note that given --graft orig=new, orig and new have *nothing* to
1021     # do with each other, even if some of their component names
1022     # match. i.e. --graft /foo/bar/baz=/foo/bar/bax is semantically
1023     # equivalent to --graft /foo/bar/baz=/x/y/z, or even
1024     # /foo/bar/baz=/x.
1025
1026     # FIXME: This can't be the best solution...
1027     clean_path = os.path.abspath(path)
1028     for graft_point in graft_points:
1029         old_prefix, new_prefix = graft_point
1030         # Expand prefixes iff not absolute paths.
1031         old_prefix = os.path.normpath(old_prefix)
1032         new_prefix = os.path.normpath(new_prefix)
1033         if clean_path.startswith(old_prefix):
1034             escaped_prefix = re.escape(old_prefix)
1035             grafted_path = re.sub(r'^' + escaped_prefix, new_prefix, clean_path)
1036             # Handle /foo=/ (at least) -- which produces //whatever.
1037             grafted_path = '/' + grafted_path.lstrip('/')
1038             clean_path_components = path_components(clean_path)
1039             # Count the components that were stripped.
1040             strip_count = 0 if old_prefix == '/' else old_prefix.count('/')
1041             new_prefix_parts = new_prefix.split('/')
1042             result_prefix = grafted_path.split('/')[:new_prefix.count('/')]
1043             result = [(p, None) for p in result_prefix] \
1044                 + clean_path_components[strip_count:]
1045             # Now set the graft point name to match the end of new_prefix.
1046             graft_point = len(result_prefix)
1047             result[graft_point] = \
1048                 (new_prefix_parts[-1], clean_path_components[strip_count][1])
1049             if new_prefix == '/': # --graft ...=/ is a special case.
1050                 return result[1:]
1051             return result
1052     return path_components(clean_path)
1053
1054
1055 Sha1 = hashlib.sha1
1056
1057
1058 _localtime = getattr(_helpers, 'localtime', None)
1059
1060 if _localtime:
1061     bup_time = namedtuple('bup_time', ['tm_year', 'tm_mon', 'tm_mday',
1062                                        'tm_hour', 'tm_min', 'tm_sec',
1063                                        'tm_wday', 'tm_yday',
1064                                        'tm_isdst', 'tm_gmtoff', 'tm_zone'])
1065
1066 # Define a localtime() that returns bup_time when possible.  Note:
1067 # this means that any helpers.localtime() results may need to be
1068 # passed through to_py_time() before being passed to python's time
1069 # module, which doesn't appear willing to ignore the extra items.
1070 if _localtime:
1071     def localtime(time):
1072         return bup_time(*_helpers.localtime(time))
1073     def utc_offset_str(t):
1074         """Return the local offset from UTC as "+hhmm" or "-hhmm" for time t.
1075         If the current UTC offset does not represent an integer number
1076         of minutes, the fractional component will be truncated."""
1077         off = localtime(t).tm_gmtoff
1078         # Note: // doesn't truncate like C for negative values, it rounds down.
1079         offmin = abs(off) // 60
1080         m = offmin % 60
1081         h = (offmin - m) // 60
1082         return "%+03d%02d" % (-h if off < 0 else h, m)
1083     def to_py_time(x):
1084         if isinstance(x, time.struct_time):
1085             return x
1086         return time.struct_time(x[:9])
1087 else:
1088     localtime = time.localtime
1089     def utc_offset_str(t):
1090         return time.strftime('%z', localtime(t))
1091     def to_py_time(x):
1092         return x
1093
1094
1095 _some_invalid_save_parts_rx = re.compile(r'[[ ~^:?*\\]|\.\.|//|@{')
1096
1097 def valid_save_name(name):
1098     # Enforce a superset of the restrictions in git-check-ref-format(1)
1099     if name == '@' \
1100        or name.startswith('/') or name.endswith('/') \
1101        or name.endswith('.'):
1102         return False
1103     if _some_invalid_save_parts_rx.search(name):
1104         return False
1105     for c in name:
1106         if ord(c) < 0x20 or ord(c) == 0x7f:
1107             return False
1108     for part in name.split('/'):
1109         if part.startswith('.') or part.endswith('.lock'):
1110             return False
1111     return True