1 """Helper functions and classes for bup."""
3 import sys, os, pwd, subprocess, errno, socket, select, mmap, stat, re, struct
4 import heapq, operator, time, platform
5 from bup import _version, _helpers
6 import bup._helpers as _helpers
8 # This function should really be in helpers, not in bup.options. But we
9 # want options.py to be standalone so people can include it in other projects.
10 from bup.options import _tty_width
11 tty_width = _tty_width
15 """Convert the string 's' to an integer. Return 0 if s is not a number."""
23 """Convert the string 's' to a float. Return 0 if s is not a number."""
25 return float(s or '0')
30 buglvl = atoi(os.environ.get('BUP_DEBUG', 0))
33 # Write (blockingly) to sockets that may or may not be in blocking mode.
34 # We need this because our stderr is sometimes eaten by subprocesses
35 # (probably ssh) that sometimes make it nonblocking, if only temporarily,
36 # leading to race conditions. Ick. We'll do it the hard way.
37 def _hard_write(fd, buf):
39 (r,w,x) = select.select([], [fd], [], None)
41 raise IOError('select(fd) returned without being writable')
43 sz = os.write(fd, buf)
45 if e.errno != errno.EAGAIN:
53 """Print a log message to stderr."""
56 _hard_write(sys.stderr.fileno(), s)
70 istty1 = os.isatty(1) or (atoi(os.environ.get('BUP_FORCE_TTY')) & 1)
71 istty2 = os.isatty(2) or (atoi(os.environ.get('BUP_FORCE_TTY')) & 2)
74 """Calls log() if stderr is a TTY. Does nothing otherwise."""
82 """Calls progress() only if we haven't printed progress in a while.
84 This avoids overloading the stderr buffer with excess junk.
88 if now - _last_prog > 0.1:
94 """Calls progress() to redisplay the most recent progress message.
96 Useful after you've printed some other message that wipes out the
99 if _last_progress and _last_progress.endswith('\r'):
100 progress(_last_progress)
103 def mkdirp(d, mode=None):
104 """Recursively create directories on path 'd'.
106 Unlike os.makedirs(), it doesn't raise an exception if the last element of
107 the path already exists.
115 if e.errno == errno.EEXIST:
122 """Get the next item from an iterator, None if we reached the end."""
125 except StopIteration:
129 def merge_iter(iters, pfreq, pfunc, pfinal, key=None):
131 samekey = lambda e, pe: getattr(e, key) == getattr(pe, key, None)
133 samekey = operator.eq
135 total = sum(len(it) for it in iters)
136 iters = (iter(it) for it in iters)
137 heap = ((next(it),it) for it in iters)
138 heap = [(e,it) for e,it in heap if e]
143 if not count % pfreq:
146 if not samekey(e, pe):
151 e = it.next() # Don't use next() function, it's too expensive
152 except StopIteration:
153 heapq.heappop(heap) # remove current
155 heapq.heapreplace(heap, (e, it)) # shift current to new location
160 """Delete a file at path 'f' if it currently exists.
162 Unlike os.unlink(), does not throw an exception if the file didn't already
168 if e.errno == errno.ENOENT:
169 pass # it doesn't exist, that's what you asked for
173 """Run a subprocess and return its output."""
174 p = subprocess.Popen(argv, stdout=subprocess.PIPE)
181 """Get the absolute path of a file.
183 Behaves like os.path.realpath, but doesn't follow a symlink for the last
184 element. (ie. if 'p' itself is a symlink, this one won't follow it, but it
185 will follow symlinks in p's directory)
191 if st and stat.S_ISLNK(st.st_mode):
192 (dir, name) = os.path.split(p)
193 dir = os.path.realpath(dir)
194 out = os.path.join(dir, name)
196 out = os.path.realpath(p)
197 #log('realpathing:%r,%r\n' % (p, out))
201 def detect_fakeroot():
202 "Return True if we appear to be running under fakeroot."
203 return os.getenv("FAKEROOTKEY") != None
207 if platform.system().startswith('CYGWIN'):
209 return ctypes.cdll.shell32.IsUserAnAdmin()
211 return os.geteuid() == 0
216 """Get the user's login name."""
221 _username = pwd.getpwuid(uid)[0]
223 _username = 'user%d' % uid
229 """Get the user's full name."""
231 if not _userfullname:
234 entry = pwd.getpwuid(uid)
235 _userfullname = entry[4].split(',')[0] or entry[0]
239 if not _userfullname:
240 _userfullname = 'user%d' % uid
246 """Get the FQDN of this machine."""
249 _hostname = socket.getfqdn()
253 _resource_path = None
254 def resource_path(subdir=''):
255 global _resource_path
256 if not _resource_path:
257 _resource_path = os.environ.get('BUP_RESOURCE_PATH') or '.'
258 return os.path.join(_resource_path, subdir)
261 class NotOk(Exception):
266 def __init__(self, outp):
270 while self._read(65536): pass
272 def read(self, size):
273 """Read 'size' bytes from input stream."""
275 return self._read(size)
278 """Read from input stream until a newline is found."""
280 return self._readline()
282 def write(self, data):
283 """Write 'data' to output stream."""
284 #log('%d writing: %d bytes\n' % (os.getpid(), len(data)))
285 self.outp.write(data)
288 """Return true if input stream is readable."""
289 raise NotImplemented("Subclasses must implement has_input")
292 """Indicate end of output from last sent command."""
296 """Indicate server error to the client."""
297 s = re.sub(r'\s+', ' ', str(s))
298 self.write('\nerror %s\n' % s)
300 def _check_ok(self, onempty):
303 for rl in linereader(self):
304 #log('%d got line: %r\n' % (os.getpid(), rl))
305 if not rl: # empty line
309 elif rl.startswith('error '):
310 #log('client: error: %s\n' % rl[6:])
314 raise Exception('server exited unexpectedly; see errors above')
316 def drain_and_check_ok(self):
317 """Remove all data for the current command from input stream."""
320 return self._check_ok(onempty)
323 """Verify that server action completed successfully."""
325 raise Exception('expected "ok", got %r' % rl)
326 return self._check_ok(onempty)
329 class Conn(BaseConn):
330 def __init__(self, inp, outp):
331 BaseConn.__init__(self, outp)
334 def _read(self, size):
335 return self.inp.read(size)
338 return self.inp.readline()
341 [rl, wl, xl] = select.select([self.inp.fileno()], [], [], 0)
343 assert(rl[0] == self.inp.fileno())
349 def checked_reader(fd, n):
351 rl, _, _ = select.select([fd], [], [])
354 if not buf: raise Exception("Unexpected EOF reading %d more bytes" % n)
359 MAX_PACKET = 128 * 1024
360 def mux(p, outfd, outr, errr):
363 while p.poll() is None:
364 rl, _, _ = select.select(fds, [], [])
367 buf = os.read(outr, MAX_PACKET)
369 os.write(outfd, struct.pack('!IB', len(buf), 1) + buf)
371 buf = os.read(errr, 1024)
373 os.write(outfd, struct.pack('!IB', len(buf), 2) + buf)
375 os.write(outfd, struct.pack('!IB', 0, 3))
378 class DemuxConn(BaseConn):
379 """A helper class for bup's client-server protocol."""
380 def __init__(self, infd, outp):
381 BaseConn.__init__(self, outp)
382 # Anything that comes through before the sync string was not
383 # multiplexed and can be assumed to be debug/log before mux init.
385 while tail != 'BUPMUX':
386 b = os.read(infd, (len(tail) < 6) and (6-len(tail)) or 1)
388 raise IOError('demux: unexpected EOF during initialization')
390 sys.stderr.write(tail[:-6]) # pre-mux log messages
397 def write(self, data):
399 BaseConn.write(self, data)
401 def _next_packet(self, timeout):
402 if self.closed: return False
403 rl, wl, xl = select.select([self.infd], [], [], timeout)
404 if not rl: return False
405 assert(rl[0] == self.infd)
406 ns = ''.join(checked_reader(self.infd, 5))
407 n, fdw = struct.unpack('!IB', ns)
408 assert(n <= MAX_PACKET)
410 self.reader = checked_reader(self.infd, n)
412 for buf in checked_reader(self.infd, n):
413 sys.stderr.write(buf)
416 debug2("DemuxConn: marked closed\n")
419 def _load_buf(self, timeout):
420 if self.buf is not None:
422 while not self.closed:
423 while not self.reader:
424 if not self._next_packet(timeout):
427 self.buf = self.reader.next()
429 except StopIteration:
433 def _read_parts(self, ix_fn):
434 while self._load_buf(None):
435 assert(self.buf is not None)
437 if i is None or i == len(self.buf):
442 self.buf = self.buf[i:]
450 return buf.index('\n')+1
453 return ''.join(self._read_parts(find_eol))
455 def _read(self, size):
457 def until_size(buf): # Closes on csize
458 if len(buf) < csize[0]:
463 return ''.join(self._read_parts(until_size))
466 return self._load_buf(0)
470 """Generate a list of input lines from 'f' without terminating newlines."""
478 def chunkyreader(f, count = None):
479 """Generate a list of chunks of data read from 'f'.
481 If count is None, read until EOF is reached.
483 If count is a positive integer, read 'count' bytes from 'f'. If EOF is
484 reached while reading, raise IOError.
488 b = f.read(min(count, 65536))
490 raise IOError('EOF with %d bytes remaining' % count)
501 """Append "/" to 's' if it doesn't aleady end in "/"."""
502 if s and not s.endswith('/'):
508 def _mmap_do(f, sz, flags, prot, close):
510 st = os.fstat(f.fileno())
513 # trying to open a zero-length map gives an error, but an empty
514 # string has all the same behaviour of a zero-length map, ie. it has
517 map = mmap.mmap(f.fileno(), sz, flags, prot)
519 f.close() # map will persist beyond file close
523 def mmap_read(f, sz = 0, close=True):
524 """Create a read-only memory mapped region on file 'f'.
525 If sz is 0, the region will cover the entire file.
527 return _mmap_do(f, sz, mmap.MAP_PRIVATE, mmap.PROT_READ, close)
530 def mmap_readwrite(f, sz = 0, close=True):
531 """Create a read-write memory mapped region on file 'f'.
532 If sz is 0, the region will cover the entire file.
534 return _mmap_do(f, sz, mmap.MAP_SHARED, mmap.PROT_READ|mmap.PROT_WRITE,
538 def mmap_readwrite_private(f, sz = 0, close=True):
539 """Create a read-write memory mapped region on file 'f'.
540 If sz is 0, the region will cover the entire file.
541 The map is private, which means the changes are never flushed back to the
544 return _mmap_do(f, sz, mmap.MAP_PRIVATE, mmap.PROT_READ|mmap.PROT_WRITE,
549 """Parse data size information into a float number.
551 Here are some examples of conversions:
552 199.2k means 203981 bytes
553 1GB means 1073741824 bytes
554 2.1 tb means 2199023255552 bytes
556 g = re.match(r'([-+\d.e]+)\s*(\w*)', str(s))
558 raise ValueError("can't parse %r as a number" % s)
559 (val, unit) = g.groups()
562 if unit in ['t', 'tb']:
563 mult = 1024*1024*1024*1024
564 elif unit in ['g', 'gb']:
565 mult = 1024*1024*1024
566 elif unit in ['m', 'mb']:
568 elif unit in ['k', 'kb']:
570 elif unit in ['', 'b']:
573 raise ValueError("invalid unit %r in number %r" % (unit, s))
578 """Count the number of elements in an iterator. (consumes the iterator)"""
579 return reduce(lambda x,y: x+1, l)
584 """Append an error message to the list of saved errors.
586 Once processing is able to stop and output the errors, the saved errors are
587 accessible in the module variable helpers.saved_errors.
589 saved_errors.append(e)
599 """Replace the default exception handler for KeyboardInterrupt (Ctrl-C).
601 The new exception handler will make sure that bup will exit without an ugly
602 stacktrace when Ctrl-C is hit.
604 oldhook = sys.excepthook
605 def newhook(exctype, value, traceback):
606 if exctype == KeyboardInterrupt:
607 log('Interrupted.\n')
609 return oldhook(exctype, value, traceback)
610 sys.excepthook = newhook
613 def columnate(l, prefix):
614 """Format elements of 'l' in columns with 'prefix' leading each line.
616 The number of columns is determined automatically based on the string
622 clen = max(len(s) for s in l)
623 ncols = (tty_width() - len(prefix)) / (clen + 2)
628 while len(l) % ncols:
631 for s in range(0, len(l), rows):
632 cols.append(l[s:s+rows])
634 for row in zip(*cols):
635 out += prefix + ''.join(('%-*s' % (clen+2, s)) for s in row) + '\n'
639 def parse_date_or_fatal(str, fatal):
640 """Parses the given date or calls Option.fatal().
641 For now we expect a string that contains a float."""
644 except ValueError, e:
645 raise fatal('invalid date format (should be a float): %r' % e)
650 # FIXME: Carefully consider the use of functions (os.path.*, etc.)
651 # that resolve against the current filesystem in the strip/graft
652 # functions for example, but elsewhere as well. I suspect bup's not
653 # always being careful about that. For some cases, the contents of
654 # the current filesystem should be irrelevant, and consulting it might
655 # produce the wrong result, perhaps via unintended symlink resolution,
658 def path_components(path):
659 """Break path into a list of pairs of the form (name,
660 full_path_to_name). Path must start with '/'.
662 '/home/foo' -> [('', '/'), ('home', '/home'), ('foo', '/home/foo')]"""
663 assert(path.startswith('/'))
664 # Since we assume path startswith('/'), we can skip the first element.
666 norm_path = os.path.abspath(path)
670 for p in norm_path.split('/')[1:]:
672 result.append((p, full_path))
676 def stripped_path_components(path, strip_prefixes):
677 """Strip any prefix in strip_prefixes from path and return a list
678 of path components where each component is (name,
679 none_or_full_fs_path_to_name). Assume path startswith('/').
680 See thelpers.py for examples."""
681 normalized_path = os.path.abspath(path)
682 sorted_strip_prefixes = sorted(strip_prefixes, key=len, reverse=True)
683 for bp in sorted_strip_prefixes:
684 normalized_bp = os.path.abspath(bp)
685 if normalized_path.startswith(normalized_bp):
686 prefix = normalized_path[:len(normalized_bp)]
688 for p in normalized_path[len(normalized_bp):].split('/'):
692 result.append((p, prefix))
695 return path_components(path)
698 def grafted_path_components(graft_points, path):
699 # Create a result that consists of some number of faked graft
700 # directories before the graft point, followed by all of the real
701 # directories from path that are after the graft point. Arrange
702 # for the directory at the graft point in the result to correspond
703 # to the "orig" directory in --graft orig=new. See t/thelpers.py
706 # Note that given --graft orig=new, orig and new have *nothing* to
707 # do with each other, even if some of their component names
708 # match. i.e. --graft /foo/bar/baz=/foo/bar/bax is semantically
709 # equivalent to --graft /foo/bar/baz=/x/y/z, or even
712 # FIXME: This can't be the best solution...
713 clean_path = os.path.abspath(path)
714 for graft_point in graft_points:
715 old_prefix, new_prefix = graft_point
716 # Expand prefixes iff not absolute paths.
717 old_prefix = os.path.normpath(old_prefix)
718 new_prefix = os.path.normpath(new_prefix)
719 if clean_path.startswith(old_prefix):
720 escaped_prefix = re.escape(old_prefix)
721 grafted_path = re.sub(r'^' + escaped_prefix, new_prefix, clean_path)
722 # Handle /foo=/ (at least) -- which produces //whatever.
723 grafted_path = '/' + grafted_path.lstrip('/')
724 clean_path_components = path_components(clean_path)
725 # Count the components that were stripped.
726 strip_count = 0 if old_prefix == '/' else old_prefix.count('/')
727 new_prefix_parts = new_prefix.split('/')
728 result_prefix = grafted_path.split('/')[:new_prefix.count('/')]
729 result = [(p, None) for p in result_prefix] \
730 + clean_path_components[strip_count:]
731 # Now set the graft point name to match the end of new_prefix.
732 graft_point = len(result_prefix)
733 result[graft_point] = \
734 (new_prefix_parts[-1], clean_path_components[strip_count][1])
735 if new_prefix == '/': # --graft ...=/ is a special case.
738 return path_components(clean_path)
740 # hashlib is only available in python 2.5 or higher, but the 'sha' module
741 # produces a DeprecationWarning in python 2.6 or higher. We want to support
742 # python 2.4 and above without any stupid warnings, so let's try using hashlib
743 # first, and downgrade if it fails.
754 """Format bup's version date string for output."""
755 return _version.DATE.split(' ')[0]
758 def version_commit():
759 """Get the commit hash of bup's current version."""
760 return _version.COMMIT
764 """Format bup's version tag (the official version number).
766 When generated from a commit other than one pointed to with a tag, the
767 returned string will be "unknown-" followed by the first seven positions of
770 names = _version.NAMES.strip()
771 assert(names[0] == '(')
772 assert(names[-1] == ')')
774 l = [n.strip() for n in names.split(',')]
776 if n.startswith('tag: bup-'):
778 return 'unknown-%s' % _version.COMMIT[:7]