lib/bup/helpers.py

   1 """Helper functions and classes for bup."""
   2
   3 import sys, os, pwd, subprocess, errno, socket, select, mmap, stat, re, struct
   4 import hashlib, heapq, operator, time, grp
   5 from bup import _version, _helpers
   6 import bup._helpers as _helpers
   7 import math
   8
   9 # This function should really be in helpers, not in bup.options.  But we
  10 # want options.py to be standalone so people can include it in other projects.
  11 from bup.options import _tty_width
  12 tty_width = _tty_width
  13
  14
  15 def atoi(s):
  16     """Convert the string 's' to an integer. Return 0 if s is not a number."""
  17     try:
  18         return int(s or '0')
  19     except ValueError:
  20         return 0
  21
  22
  23 def atof(s):
  24     """Convert the string 's' to a float. Return 0 if s is not a number."""
  25     try:
  26         return float(s or '0')
  27     except ValueError:
  28         return 0
  29
  30
  31 buglvl = atoi(os.environ.get('BUP_DEBUG', 0))
  32
  33
  34 # If the platform doesn't have fdatasync (OS X), fall back to fsync.
  35 try:
  36     fdatasync = os.fdatasync
  37 except AttributeError:
  38     fdatasync = os.fsync
  39
  40
  41 # Write (blockingly) to sockets that may or may not be in blocking mode.
  42 # We need this because our stderr is sometimes eaten by subprocesses
  43 # (probably ssh) that sometimes make it nonblocking, if only temporarily,
  44 # leading to race conditions.  Ick.  We'll do it the hard way.
  45 def _hard_write(fd, buf):
  46     while buf:
  47         (r,w,x) = select.select([], [fd], [], None)
  48         if not w:
  49             raise IOError('select(fd) returned without being writable')
  50         try:
  51             sz = os.write(fd, buf)
  52         except OSError, e:
  53             if e.errno != errno.EAGAIN:
  54                 raise
  55         assert(sz >= 0)
  56         buf = buf[sz:]
  57
  58
  59 _last_prog = 0
  60 def log(s):
  61     """Print a log message to stderr."""
  62     global _last_prog
  63     sys.stdout.flush()
  64     _hard_write(sys.stderr.fileno(), s)
  65     _last_prog = 0
  66
  67
  68 def debug1(s):
  69     if buglvl >= 1:
  70         log(s)
  71
  72
  73 def debug2(s):
  74     if buglvl >= 2:
  75         log(s)
  76
  77
  78 istty1 = os.isatty(1) or (atoi(os.environ.get('BUP_FORCE_TTY')) & 1)
  79 istty2 = os.isatty(2) or (atoi(os.environ.get('BUP_FORCE_TTY')) & 2)
  80 _last_progress = ''
  81 def progress(s):
  82     """Calls log() if stderr is a TTY.  Does nothing otherwise."""
  83     global _last_progress
  84     if istty2:
  85         log(s)
  86         _last_progress = s
  87
  88
  89 def qprogress(s):
  90     """Calls progress() only if we haven't printed progress in a while.
  91
  92     This avoids overloading the stderr buffer with excess junk.
  93     """
  94     global _last_prog
  95     now = time.time()
  96     if now - _last_prog > 0.1:
  97         progress(s)
  98         _last_prog = now
  99
 100
 101 def reprogress():
 102     """Calls progress() to redisplay the most recent progress message.
 103
 104     Useful after you've printed some other message that wipes out the
 105     progress line.
 106     """
 107     if _last_progress and _last_progress.endswith('\r'):
 108         progress(_last_progress)
 109
 110
 111 def mkdirp(d, mode=None):
 112     """Recursively create directories on path 'd'.
 113
 114     Unlike os.makedirs(), it doesn't raise an exception if the last element of
 115     the path already exists.
 116     """
 117     try:
 118         if mode:
 119             os.makedirs(d, mode)
 120         else:
 121             os.makedirs(d)
 122     except OSError, e:
 123         if e.errno == errno.EEXIST:
 124             pass
 125         else:
 126             raise
 127
 128
 129 def next(it):
 130     """Get the next item from an iterator, None if we reached the end."""
 131     try:
 132         return it.next()
 133     except StopIteration:
 134         return None
 135
 136
 137 def merge_iter(iters, pfreq, pfunc, pfinal, key=None):
 138     if key:
 139         samekey = lambda e, pe: getattr(e, key) == getattr(pe, key, None)
 140     else:
 141         samekey = operator.eq
 142     count = 0
 143     total = sum(len(it) for it in iters)
 144     iters = (iter(it) for it in iters)
 145     heap = ((next(it),it) for it in iters)
 146     heap = [(e,it) for e,it in heap if e]
 147
 148     heapq.heapify(heap)
 149     pe = None
 150     while heap:
 151         if not count % pfreq:
 152             pfunc(count, total)
 153         e, it = heap[0]
 154         if not samekey(e, pe):
 155             pe = e
 156             yield e
 157         count += 1
 158         try:
 159             e = it.next() # Don't use next() function, it's too expensive
 160         except StopIteration:
 161             heapq.heappop(heap) # remove current
 162         else:
 163             heapq.heapreplace(heap, (e, it)) # shift current to new location
 164     pfinal(count, total)
 165
 166
 167 def unlink(f):
 168     """Delete a file at path 'f' if it currently exists.
 169
 170     Unlike os.unlink(), does not throw an exception if the file didn't already
 171     exist.
 172     """
 173     try:
 174         os.unlink(f)
 175     except OSError, e:
 176         if e.errno == errno.ENOENT:
 177             pass  # it doesn't exist, that's what you asked for
 178
 179
 180 def readpipe(argv):
 181     """Run a subprocess and return its output."""
 182     p = subprocess.Popen(argv, stdout=subprocess.PIPE)
 183     out, err = p.communicate()
 184     if p.returncode != 0:
 185         raise Exception('subprocess %r failed with status %d'
 186                         % (' '.join(argv), p.retcode))
 187     return out
 188
 189
 190 def realpath(p):
 191     """Get the absolute path of a file.
 192
 193     Behaves like os.path.realpath, but doesn't follow a symlink for the last
 194     element. (ie. if 'p' itself is a symlink, this one won't follow it, but it
 195     will follow symlinks in p's directory)
 196     """
 197     try:
 198         st = os.lstat(p)
 199     except OSError:
 200         st = None
 201     if st and stat.S_ISLNK(st.st_mode):
 202         (dir, name) = os.path.split(p)
 203         dir = os.path.realpath(dir)
 204         out = os.path.join(dir, name)
 205     else:
 206         out = os.path.realpath(p)
 207     #log('realpathing:%r,%r\n' % (p, out))
 208     return out
 209
 210
 211 def detect_fakeroot():
 212     "Return True if we appear to be running under fakeroot."
 213     return os.getenv("FAKEROOTKEY") != None
 214
 215
 216 def is_superuser():
 217     if sys.platform.startswith('cygwin'):
 218         import ctypes
 219         return ctypes.cdll.shell32.IsUserAnAdmin()
 220     else:
 221         return os.geteuid() == 0
 222
 223
 224 def _cache_key_value(get_value, key, cache):
 225     """Return (value, was_cached).  If there is a value in the cache
 226     for key, use that, otherwise, call get_value(key) which should
 227     throw a KeyError if there is no value -- in which case the cached
 228     and returned value will be None.
 229     """
 230     try: # Do we already have it (or know there wasn't one)?
 231         value = cache[key]
 232         return value, True
 233     except KeyError:
 234         pass
 235     value = None
 236     try:
 237         cache[key] = value = get_value(key)
 238     except KeyError:
 239         cache[key] = None
 240     return value, False
 241
 242
 243 _uid_to_pwd_cache = {}
 244 _name_to_pwd_cache = {}
 245
 246 def pwd_from_uid(uid):
 247     """Return password database entry for uid (may be a cached value).
 248     Return None if no entry is found.
 249     """
 250     global _uid_to_pwd_cache, _name_to_pwd_cache
 251     entry, cached = _cache_key_value(pwd.getpwuid, uid, _uid_to_pwd_cache)
 252     if entry and not cached:
 253         _name_to_pwd_cache[entry.pw_name] = entry
 254     return entry
 255
 256
 257 def pwd_from_name(name):
 258     """Return password database entry for name (may be a cached value).
 259     Return None if no entry is found.
 260     """
 261     global _uid_to_pwd_cache, _name_to_pwd_cache
 262     entry, cached = _cache_key_value(pwd.getpwnam, name, _name_to_pwd_cache)
 263     if entry and not cached:
 264         _uid_to_pwd_cache[entry.pw_uid] = entry
 265     return entry
 266
 267
 268 _gid_to_grp_cache = {}
 269 _name_to_grp_cache = {}
 270
 271 def grp_from_gid(gid):
 272     """Return password database entry for gid (may be a cached value).
 273     Return None if no entry is found.
 274     """
 275     global _gid_to_grp_cache, _name_to_grp_cache
 276     entry, cached = _cache_key_value(grp.getgrgid, gid, _gid_to_grp_cache)
 277     if entry and not cached:
 278         _name_to_grp_cache[entry.gr_name] = entry
 279     return entry
 280
 281
 282 def grp_from_name(name):
 283     """Return password database entry for name (may be a cached value).
 284     Return None if no entry is found.
 285     """
 286     global _gid_to_grp_cache, _name_to_grp_cache
 287     entry, cached = _cache_key_value(grp.getgrnam, name, _name_to_grp_cache)
 288     if entry and not cached:
 289         _gid_to_grp_cache[entry.gr_gid] = entry
 290     return entry
 291
 292
 293 _username = None
 294 def username():
 295     """Get the user's login name."""
 296     global _username
 297     if not _username:
 298         uid = os.getuid()
 299         _username = pwd_from_uid(uid)[0] or 'user%d' % uid
 300     return _username
 301
 302
 303 _userfullname = None
 304 def userfullname():
 305     """Get the user's full name."""
 306     global _userfullname
 307     if not _userfullname:
 308         uid = os.getuid()
 309         entry = pwd_from_uid(uid)
 310         if entry:
 311             _userfullname = entry[4].split(',')[0] or entry[0]
 312         if not _userfullname:
 313             _userfullname = 'user%d' % uid
 314     return _userfullname
 315
 316
 317 _hostname = None
 318 def hostname():
 319     """Get the FQDN of this machine."""
 320     global _hostname
 321     if not _hostname:
 322         _hostname = socket.getfqdn()
 323     return _hostname
 324
 325
 326 _resource_path = None
 327 def resource_path(subdir=''):
 328     global _resource_path
 329     if not _resource_path:
 330         _resource_path = os.environ.get('BUP_RESOURCE_PATH') or '.'
 331     return os.path.join(_resource_path, subdir)
 332
 333 def format_filesize(size):
 334     unit = 1024.0
 335     size = float(size)
 336     if size < unit:
 337         return "%d" % (size)
 338     exponent = int(math.log(size) / math.log(unit))
 339     size_prefix = "KMGTPE"[exponent - 1]
 340     return "%.1f%s" % (size / math.pow(unit, exponent), size_prefix)
 341
 342
 343 class NotOk(Exception):
 344     pass
 345
 346
 347 class BaseConn:
 348     def __init__(self, outp):
 349         self.outp = outp
 350
 351     def close(self):
 352         while self._read(65536): pass
 353
 354     def read(self, size):
 355         """Read 'size' bytes from input stream."""
 356         self.outp.flush()
 357         return self._read(size)
 358
 359     def readline(self):
 360         """Read from input stream until a newline is found."""
 361         self.outp.flush()
 362         return self._readline()
 363
 364     def write(self, data):
 365         """Write 'data' to output stream."""
 366         #log('%d writing: %d bytes\n' % (os.getpid(), len(data)))
 367         self.outp.write(data)
 368
 369     def has_input(self):
 370         """Return true if input stream is readable."""
 371         raise NotImplemented("Subclasses must implement has_input")
 372
 373     def ok(self):
 374         """Indicate end of output from last sent command."""
 375         self.write('\nok\n')
 376
 377     def error(self, s):
 378         """Indicate server error to the client."""
 379         s = re.sub(r'\s+', ' ', str(s))
 380         self.write('\nerror %s\n' % s)
 381
 382     def _check_ok(self, onempty):
 383         self.outp.flush()
 384         rl = ''
 385         for rl in linereader(self):
 386             #log('%d got line: %r\n' % (os.getpid(), rl))
 387             if not rl:  # empty line
 388                 continue
 389             elif rl == 'ok':
 390                 return None
 391             elif rl.startswith('error '):
 392                 #log('client: error: %s\n' % rl[6:])
 393                 return NotOk(rl[6:])
 394             else:
 395                 onempty(rl)
 396         raise Exception('server exited unexpectedly; see errors above')
 397
 398     def drain_and_check_ok(self):
 399         """Remove all data for the current command from input stream."""
 400         def onempty(rl):
 401             pass
 402         return self._check_ok(onempty)
 403
 404     def check_ok(self):
 405         """Verify that server action completed successfully."""
 406         def onempty(rl):
 407             raise Exception('expected "ok", got %r' % rl)
 408         return self._check_ok(onempty)
 409
 410
 411 class Conn(BaseConn):
 412     def __init__(self, inp, outp):
 413         BaseConn.__init__(self, outp)
 414         self.inp = inp
 415
 416     def _read(self, size):
 417         return self.inp.read(size)
 418
 419     def _readline(self):
 420         return self.inp.readline()
 421
 422     def has_input(self):
 423         [rl, wl, xl] = select.select([self.inp.fileno()], [], [], 0)
 424         if rl:
 425             assert(rl[0] == self.inp.fileno())
 426             return True
 427         else:
 428             return None
 429
 430
 431 def checked_reader(fd, n):
 432     while n > 0:
 433         rl, _, _ = select.select([fd], [], [])
 434         assert(rl[0] == fd)
 435         buf = os.read(fd, n)
 436         if not buf: raise Exception("Unexpected EOF reading %d more bytes" % n)
 437         yield buf
 438         n -= len(buf)
 439
 440
 441 MAX_PACKET = 128 * 1024
 442 def mux(p, outfd, outr, errr):
 443     try:
 444         fds = [outr, errr]
 445         while p.poll() is None:
 446             rl, _, _ = select.select(fds, [], [])
 447             for fd in rl:
 448                 if fd == outr:
 449                     buf = os.read(outr, MAX_PACKET)
 450                     if not buf: break
 451                     os.write(outfd, struct.pack('!IB', len(buf), 1) + buf)
 452                 elif fd == errr:
 453                     buf = os.read(errr, 1024)
 454                     if not buf: break
 455                     os.write(outfd, struct.pack('!IB', len(buf), 2) + buf)
 456     finally:
 457         os.write(outfd, struct.pack('!IB', 0, 3))
 458
 459
 460 class DemuxConn(BaseConn):
 461     """A helper class for bup's client-server protocol."""
 462     def __init__(self, infd, outp):
 463         BaseConn.__init__(self, outp)
 464         # Anything that comes through before the sync string was not
 465         # multiplexed and can be assumed to be debug/log before mux init.
 466         tail = ''
 467         while tail != 'BUPMUX':
 468             b = os.read(infd, (len(tail) < 6) and (6-len(tail)) or 1)
 469             if not b:
 470                 raise IOError('demux: unexpected EOF during initialization')
 471             tail += b
 472             sys.stderr.write(tail[:-6])  # pre-mux log messages
 473             tail = tail[-6:]
 474         self.infd = infd
 475         self.reader = None
 476         self.buf = None
 477         self.closed = False
 478
 479     def write(self, data):
 480         self._load_buf(0)
 481         BaseConn.write(self, data)
 482
 483     def _next_packet(self, timeout):
 484         if self.closed: return False
 485         rl, wl, xl = select.select([self.infd], [], [], timeout)
 486         if not rl: return False
 487         assert(rl[0] == self.infd)
 488         ns = ''.join(checked_reader(self.infd, 5))
 489         n, fdw = struct.unpack('!IB', ns)
 490         assert(n <= MAX_PACKET)
 491         if fdw == 1:
 492             self.reader = checked_reader(self.infd, n)
 493         elif fdw == 2:
 494             for buf in checked_reader(self.infd, n):
 495                 sys.stderr.write(buf)
 496         elif fdw == 3:
 497             self.closed = True
 498             debug2("DemuxConn: marked closed\n")
 499         return True
 500
 501     def _load_buf(self, timeout):
 502         if self.buf is not None:
 503             return True
 504         while not self.closed:
 505             while not self.reader:
 506                 if not self._next_packet(timeout):
 507                     return False
 508             try:
 509                 self.buf = self.reader.next()
 510                 return True
 511             except StopIteration:
 512                 self.reader = None
 513         return False
 514
 515     def _read_parts(self, ix_fn):
 516         while self._load_buf(None):
 517             assert(self.buf is not None)
 518             i = ix_fn(self.buf)
 519             if i is None or i == len(self.buf):
 520                 yv = self.buf
 521                 self.buf = None
 522             else:
 523                 yv = self.buf[:i]
 524                 self.buf = self.buf[i:]
 525             yield yv
 526             if i is not None:
 527                 break
 528
 529     def _readline(self):
 530         def find_eol(buf):
 531             try:
 532                 return buf.index('\n')+1
 533             except ValueError:
 534                 return None
 535         return ''.join(self._read_parts(find_eol))
 536
 537     def _read(self, size):
 538         csize = [size]
 539         def until_size(buf): # Closes on csize
 540             if len(buf) < csize[0]:
 541                 csize[0] -= len(buf)
 542                 return None
 543             else:
 544                 return csize[0]
 545         return ''.join(self._read_parts(until_size))
 546
 547     def has_input(self):
 548         return self._load_buf(0)
 549
 550
 551 def linereader(f):
 552     """Generate a list of input lines from 'f' without terminating newlines."""
 553     while 1:
 554         line = f.readline()
 555         if not line:
 556             break
 557         yield line[:-1]
 558
 559
 560 def chunkyreader(f, count = None):
 561     """Generate a list of chunks of data read from 'f'.
 562
 563     If count is None, read until EOF is reached.
 564
 565     If count is a positive integer, read 'count' bytes from 'f'. If EOF is
 566     reached while reading, raise IOError.
 567     """
 568     if count != None:
 569         while count > 0:
 570             b = f.read(min(count, 65536))
 571             if not b:
 572                 raise IOError('EOF with %d bytes remaining' % count)
 573             yield b
 574             count -= len(b)
 575     else:
 576         while 1:
 577             b = f.read(65536)
 578             if not b: break
 579             yield b
 580
 581
 582 def slashappend(s):
 583     """Append "/" to 's' if it doesn't aleady end in "/"."""
 584     if s and not s.endswith('/'):
 585         return s + '/'
 586     else:
 587         return s
 588
 589
 590 def _mmap_do(f, sz, flags, prot, close):
 591     if not sz:
 592         st = os.fstat(f.fileno())
 593         sz = st.st_size
 594     if not sz:
 595         # trying to open a zero-length map gives an error, but an empty
 596         # string has all the same behaviour of a zero-length map, ie. it has
 597         # no elements :)
 598         return ''
 599     map = mmap.mmap(f.fileno(), sz, flags, prot)
 600     if close:
 601         f.close()  # map will persist beyond file close
 602     return map
 603
 604
 605 def mmap_read(f, sz = 0, close=True):
 606     """Create a read-only memory mapped region on file 'f'.
 607     If sz is 0, the region will cover the entire file.
 608     """
 609     return _mmap_do(f, sz, mmap.MAP_PRIVATE, mmap.PROT_READ, close)
 610
 611
 612 def mmap_readwrite(f, sz = 0, close=True):
 613     """Create a read-write memory mapped region on file 'f'.
 614     If sz is 0, the region will cover the entire file.
 615     """
 616     return _mmap_do(f, sz, mmap.MAP_SHARED, mmap.PROT_READ|mmap.PROT_WRITE,
 617                     close)
 618
 619
 620 def mmap_readwrite_private(f, sz = 0, close=True):
 621     """Create a read-write memory mapped region on file 'f'.
 622     If sz is 0, the region will cover the entire file.
 623     The map is private, which means the changes are never flushed back to the
 624     file.
 625     """
 626     return _mmap_do(f, sz, mmap.MAP_PRIVATE, mmap.PROT_READ|mmap.PROT_WRITE,
 627                     close)
 628
 629
 630 def parse_timestamp(epoch_str):
 631     """Return the number of nanoseconds since the epoch that are described
 632 by epoch_str (100ms, 100ns, ...); when epoch_str cannot be parsed,
 633 throw a ValueError that may contain additional information."""
 634     ns_per = {'s' :  1000000000,
 635               'ms' : 1000000,
 636               'us' : 1000,
 637               'ns' : 1}
 638     match = re.match(r'^((?:[-+]?[0-9]+)?)(s|ms|us|ns)$', epoch_str)
 639     if not match:
 640         if re.match(r'^([-+]?[0-9]+)$', epoch_str):
 641             raise ValueError('must include units, i.e. 100ns, 100ms, ...')
 642         raise ValueError()
 643     (n, units) = match.group(1, 2)
 644     if not n:
 645         n = 1
 646     n = int(n)
 647     return n * ns_per[units]
 648
 649
 650 def parse_num(s):
 651     """Parse data size information into a float number.
 652
 653     Here are some examples of conversions:
 654         199.2k means 203981 bytes
 655         1GB means 1073741824 bytes
 656         2.1 tb means 2199023255552 bytes
 657     """
 658     g = re.match(r'([-+\d.e]+)\s*(\w*)', str(s))
 659     if not g:
 660         raise ValueError("can't parse %r as a number" % s)
 661     (val, unit) = g.groups()
 662     num = float(val)
 663     unit = unit.lower()
 664     if unit in ['t', 'tb']:
 665         mult = 1024*1024*1024*1024
 666     elif unit in ['g', 'gb']:
 667         mult = 1024*1024*1024
 668     elif unit in ['m', 'mb']:
 669         mult = 1024*1024
 670     elif unit in ['k', 'kb']:
 671         mult = 1024
 672     elif unit in ['', 'b']:
 673         mult = 1
 674     else:
 675         raise ValueError("invalid unit %r in number %r" % (unit, s))
 676     return int(num*mult)
 677
 678
 679 def count(l):
 680     """Count the number of elements in an iterator. (consumes the iterator)"""
 681     return reduce(lambda x,y: x+1, l)
 682
 683
 684 saved_errors = []
 685 def add_error(e):
 686     """Append an error message to the list of saved errors.
 687
 688     Once processing is able to stop and output the errors, the saved errors are
 689     accessible in the module variable helpers.saved_errors.
 690     """
 691     saved_errors.append(e)
 692     log('%-70s\n' % e)
 693
 694
 695 def clear_errors():
 696     global saved_errors
 697     saved_errors = []
 698
 699
 700 def handle_ctrl_c():
 701     """Replace the default exception handler for KeyboardInterrupt (Ctrl-C).
 702
 703     The new exception handler will make sure that bup will exit without an ugly
 704     stacktrace when Ctrl-C is hit.
 705     """
 706     oldhook = sys.excepthook
 707     def newhook(exctype, value, traceback):
 708         if exctype == KeyboardInterrupt:
 709             log('\nInterrupted.\n')
 710         else:
 711             return oldhook(exctype, value, traceback)
 712     sys.excepthook = newhook
 713
 714
 715 def columnate(l, prefix):
 716     """Format elements of 'l' in columns with 'prefix' leading each line.
 717
 718     The number of columns is determined automatically based on the string
 719     lengths.
 720     """
 721     if not l:
 722         return ""
 723     l = l[:]
 724     clen = max(len(s) for s in l)
 725     ncols = (tty_width() - len(prefix)) / (clen + 2)
 726     if ncols <= 1:
 727         ncols = 1
 728         clen = 0
 729     cols = []
 730     while len(l) % ncols:
 731         l.append('')
 732     rows = len(l)/ncols
 733     for s in range(0, len(l), rows):
 734         cols.append(l[s:s+rows])
 735     out = ''
 736     for row in zip(*cols):
 737         out += prefix + ''.join(('%-*s' % (clen+2, s)) for s in row) + '\n'
 738     return out
 739
 740
 741 def parse_date_or_fatal(str, fatal):
 742     """Parses the given date or calls Option.fatal().
 743     For now we expect a string that contains a float."""
 744     try:
 745         date = atof(str)
 746     except ValueError, e:
 747         raise fatal('invalid date format (should be a float): %r' % e)
 748     else:
 749         return date
 750
 751
 752 def parse_excludes(options, fatal):
 753     """Traverse the options and extract all excludes, or call Option.fatal()."""
 754     excluded_paths = []
 755
 756     for flag in options:
 757         (option, parameter) = flag
 758         if option == '--exclude':
 759             excluded_paths.append(realpath(parameter))
 760         elif option == '--exclude-from':
 761             try:
 762                 f = open(realpath(parameter))
 763             except IOError, e:
 764                 raise fatal("couldn't read %s" % parameter)
 765             for exclude_path in f.readlines():
 766                 excluded_paths.append(realpath(exclude_path.strip()))
 767     return sorted(frozenset(excluded_paths))
 768
 769
 770 def parse_rx_excludes(options, fatal):
 771     """Traverse the options and extract all rx excludes, or call
 772     Option.fatal()."""
 773     excluded_patterns = []
 774
 775     for flag in options:
 776         (option, parameter) = flag
 777         if option == '--exclude-rx':
 778             try:
 779                 excluded_patterns.append(re.compile(parameter))
 780             except re.error, ex:
 781                 fatal('invalid --exclude-rx pattern (%s): %s' % (parameter, ex))
 782         elif option == '--exclude-rx-from':
 783             try:
 784                 f = open(realpath(parameter))
 785             except IOError, e:
 786                 raise fatal("couldn't read %s" % parameter)
 787             for pattern in f.readlines():
 788                 spattern = pattern.rstrip('\n')
 789                 try:
 790                     excluded_patterns.append(re.compile(spattern))
 791                 except re.error, ex:
 792                     fatal('invalid --exclude-rx pattern (%s): %s' % (spattern, ex))
 793     return excluded_patterns
 794
 795
 796 def should_rx_exclude_path(path, exclude_rxs):
 797     """Return True if path matches a regular expression in exclude_rxs."""
 798     for rx in exclude_rxs:
 799         if rx.search(path):
 800             debug1('Skipping %r: excluded by rx pattern %r.\n'
 801                    % (path, rx.pattern))
 802             return True
 803     return False
 804
 805
 806 # FIXME: Carefully consider the use of functions (os.path.*, etc.)
 807 # that resolve against the current filesystem in the strip/graft
 808 # functions for example, but elsewhere as well.  I suspect bup's not
 809 # always being careful about that.  For some cases, the contents of
 810 # the current filesystem should be irrelevant, and consulting it might
 811 # produce the wrong result, perhaps via unintended symlink resolution,
 812 # for example.
 813
 814 def path_components(path):
 815     """Break path into a list of pairs of the form (name,
 816     full_path_to_name).  Path must start with '/'.
 817     Example:
 818       '/home/foo' -> [('', '/'), ('home', '/home'), ('foo', '/home/foo')]"""
 819     if not path.startswith('/'):
 820         raise Exception, 'path must start with "/": %s' % path
 821     # Since we assume path startswith('/'), we can skip the first element.
 822     result = [('', '/')]
 823     norm_path = os.path.abspath(path)
 824     if norm_path == '/':
 825         return result
 826     full_path = ''
 827     for p in norm_path.split('/')[1:]:
 828         full_path += '/' + p
 829         result.append((p, full_path))
 830     return result
 831
 832
 833 def stripped_path_components(path, strip_prefixes):
 834     """Strip any prefix in strip_prefixes from path and return a list
 835     of path components where each component is (name,
 836     none_or_full_fs_path_to_name).  Assume path startswith('/').
 837     See thelpers.py for examples."""
 838     normalized_path = os.path.abspath(path)
 839     sorted_strip_prefixes = sorted(strip_prefixes, key=len, reverse=True)
 840     for bp in sorted_strip_prefixes:
 841         normalized_bp = os.path.abspath(bp)
 842         if normalized_path.startswith(normalized_bp):
 843             prefix = normalized_path[:len(normalized_bp)]
 844             result = []
 845             for p in normalized_path[len(normalized_bp):].split('/'):
 846                 if p: # not root
 847                     prefix += '/'
 848                 prefix += p
 849                 result.append((p, prefix))
 850             return result
 851     # Nothing to strip.
 852     return path_components(path)
 853
 854
 855 def grafted_path_components(graft_points, path):
 856     # Create a result that consists of some number of faked graft
 857     # directories before the graft point, followed by all of the real
 858     # directories from path that are after the graft point.  Arrange
 859     # for the directory at the graft point in the result to correspond
 860     # to the "orig" directory in --graft orig=new.  See t/thelpers.py
 861     # for some examples.
 862
 863     # Note that given --graft orig=new, orig and new have *nothing* to
 864     # do with each other, even if some of their component names
 865     # match. i.e. --graft /foo/bar/baz=/foo/bar/bax is semantically
 866     # equivalent to --graft /foo/bar/baz=/x/y/z, or even
 867     # /foo/bar/baz=/x.
 868
 869     # FIXME: This can't be the best solution...
 870     clean_path = os.path.abspath(path)
 871     for graft_point in graft_points:
 872         old_prefix, new_prefix = graft_point
 873         # Expand prefixes iff not absolute paths.
 874         old_prefix = os.path.normpath(old_prefix)
 875         new_prefix = os.path.normpath(new_prefix)
 876         if clean_path.startswith(old_prefix):
 877             escaped_prefix = re.escape(old_prefix)
 878             grafted_path = re.sub(r'^' + escaped_prefix, new_prefix, clean_path)
 879             # Handle /foo=/ (at least) -- which produces //whatever.
 880             grafted_path = '/' + grafted_path.lstrip('/')
 881             clean_path_components = path_components(clean_path)
 882             # Count the components that were stripped.
 883             strip_count = 0 if old_prefix == '/' else old_prefix.count('/')
 884             new_prefix_parts = new_prefix.split('/')
 885             result_prefix = grafted_path.split('/')[:new_prefix.count('/')]
 886             result = [(p, None) for p in result_prefix] \
 887                 + clean_path_components[strip_count:]
 888             # Now set the graft point name to match the end of new_prefix.
 889             graft_point = len(result_prefix)
 890             result[graft_point] = \
 891                 (new_prefix_parts[-1], clean_path_components[strip_count][1])
 892             if new_prefix == '/': # --graft ...=/ is a special case.
 893                 return result[1:]
 894             return result
 895     return path_components(clean_path)
 896
 897 Sha1 = hashlib.sha1
 898
 899 def version_date():
 900     """Format bup's version date string for output."""
 901     return _version.DATE.split(' ')[0]
 902
 903
 904 def version_commit():
 905     """Get the commit hash of bup's current version."""
 906     return _version.COMMIT
 907
 908
 909 def version_tag():
 910     """Format bup's version tag (the official version number).
 911
 912     When generated from a commit other than one pointed to with a tag, the
 913     returned string will be "unknown-" followed by the first seven positions of
 914     the commit hash.
 915     """
 916     names = _version.NAMES.strip()
 917     assert(names[0] == '(')
 918     assert(names[-1] == ')')
 919     names = names[1:-1]
 920     l = [n.strip() for n in names.split(',')]
 921     for n in l:
 922         if n.startswith('tag: bup-'):
 923             return n[9:]
 924     return 'unknown-%s' % _version.COMMIT[:7]