X-Git-Url: https://arthur.barton.de/cgi-bin/gitweb.cgi?a=blobdiff_plain;f=lib%2Fbup%2Fhelpers.py;h=d05226ebc28207dd94435a6c920ef99c4933b721;hb=00fb1f1b2a53935ca7d5ce95ea4cf56b7f9bcc3d;hp=a11e6d602659df5c01c1a2bea1cbb424ad61f847;hpb=556a604de073e64557143d44afee07da1e443641;p=bup.git diff --git a/lib/bup/helpers.py b/lib/bup/helpers.py index a11e6d6..d05226e 100644 --- a/lib/bup/helpers.py +++ b/lib/bup/helpers.py @@ -1,18 +1,39 @@ """Helper functions and classes for bup.""" +from collections import namedtuple +from contextlib import contextmanager from ctypes import sizeof, c_void_p from os import environ +from pipes import quote +from subprocess import PIPE, Popen import sys, os, pwd, subprocess, errno, socket, select, mmap, stat, re, struct -import hashlib, heapq, operator, time, grp - -from bup import _version, _helpers -import bup._helpers as _helpers -import math +import hashlib, heapq, math, operator, time, grp, tempfile +from bup import _helpers +from bup import compat # This function should really be in helpers, not in bup.options. But we # want options.py to be standalone so people can include it in other projects. -from bup.options import _tty_width -tty_width = _tty_width +from bup.options import _tty_width as tty_width + + +class Nonlocal: + """Helper to deal with Python scoping issues""" + pass + + +sc_page_size = os.sysconf('SC_PAGE_SIZE') +assert(sc_page_size > 0) + +sc_arg_max = os.sysconf('SC_ARG_MAX') +if sc_arg_max == -1: # "no definite limit" - let's choose 2M + sc_arg_max = 2 * 1024 * 1024 + + +def last(iterable): + result = None + for result in iterable: + pass + return result def atoi(s): @@ -34,11 +55,68 @@ def atof(s): buglvl = atoi(os.environ.get('BUP_DEBUG', 0)) -# If the platform doesn't have fdatasync (OS X), fall back to fsync. try: - fdatasync = os.fdatasync + _fdatasync = os.fdatasync except AttributeError: - fdatasync = os.fsync + _fdatasync = os.fsync + +if sys.platform.startswith('darwin'): + # Apparently os.fsync on OS X doesn't guarantee to sync all the way down + import fcntl + def fdatasync(fd): + try: + return fcntl.fcntl(fd, fcntl.F_FULLFSYNC) + except IOError as e: + # Fallback for file systems (SMB) that do not support F_FULLFSYNC + if e.errno == errno.ENOTSUP: + return _fdatasync(fd) + else: + raise +else: + fdatasync = _fdatasync + + +def partition(predicate, stream): + """Returns (leading_matches_it, rest_it), where leading_matches_it + must be completely exhausted before traversing rest_it. + + """ + stream = iter(stream) + ns = Nonlocal() + ns.first_nonmatch = None + def leading_matches(): + for x in stream: + if predicate(x): + yield x + else: + ns.first_nonmatch = (x,) + break + def rest(): + if ns.first_nonmatch: + yield ns.first_nonmatch[0] + for x in stream: + yield x + return (leading_matches(), rest()) + + +def lines_until_sentinel(f, sentinel, ex_type): + # sentinel must end with \n and must contain only one \n + while True: + line = f.readline() + if not (line and line.endswith('\n')): + raise ex_type('Hit EOF while reading line') + if line == sentinel: + return + yield line + + +def stat_if_exists(path): + try: + return os.stat(path) + except OSError as e: + if e.errno != errno.ENOENT: + raise + return None # Write (blockingly) to sockets that may or may not be in blocking mode. @@ -52,7 +130,7 @@ def _hard_write(fd, buf): raise IOError('select(fd) returned without being writable') try: sz = os.write(fd, buf) - except OSError, e: + except OSError as e: if e.errno != errno.EAGAIN: raise assert(sz >= 0) @@ -122,32 +200,13 @@ def mkdirp(d, mode=None): os.makedirs(d, mode) else: os.makedirs(d) - except OSError, e: + except OSError as e: if e.errno == errno.EEXIST: pass else: raise -_unspecified_next_default = object() - -def _fallback_next(it, default=_unspecified_next_default): - """Retrieve the next item from the iterator by calling its - next() method. If default is given, it is returned if the - iterator is exhausted, otherwise StopIteration is raised.""" - - if default is _unspecified_next_default: - return it.next() - else: - try: - return it.next() - except StopIteration: - return default - -if sys.version_info < (2, 6): - next = _fallback_next - - def merge_iter(iters, pfreq, pfunc, pfinal, key=None): if key: samekey = lambda e, pe: getattr(e, key) == getattr(pe, key, None) @@ -170,7 +229,7 @@ def merge_iter(iters, pfreq, pfunc, pfinal, key=None): yield e count += 1 try: - e = it.next() # Don't use next() function, it's too expensive + e = next(it) except StopIteration: heapq.heappop(heap) # remove current else: @@ -186,14 +245,43 @@ def unlink(f): """ try: os.unlink(f) - except OSError, e: - if e.errno == errno.ENOENT: - pass # it doesn't exist, that's what you asked for + except OSError as e: + if e.errno != errno.ENOENT: + raise -def readpipe(argv, preexec_fn=None): +def shstr(cmd): + if isinstance(cmd, compat.str_type): + return cmd + else: + return ' '.join(map(quote, cmd)) + +exc = subprocess.check_call + +def exo(cmd, + input=None, + stdin=None, + stderr=None, + shell=False, + check=True, + preexec_fn=None): + if input: + assert stdin in (None, PIPE) + stdin = PIPE + p = Popen(cmd, + stdin=stdin, stdout=PIPE, stderr=stderr, + shell=shell, + preexec_fn=preexec_fn) + out, err = p.communicate(input) + if check and p.returncode != 0: + raise Exception('subprocess %r failed with status %d, stderr: %r' + % (' '.join(map(quote, cmd)), p.returncode, err)) + return out, err, p + +def readpipe(argv, preexec_fn=None, shell=False): """Run a subprocess and return its output.""" - p = subprocess.Popen(argv, stdout=subprocess.PIPE, preexec_fn=preexec_fn) + p = subprocess.Popen(argv, stdout=subprocess.PIPE, preexec_fn=preexec_fn, + shell=shell) out, err = p.communicate() if p.returncode != 0: raise Exception('subprocess %r failed with status %d' @@ -214,7 +302,7 @@ def _argmax_args_size(args): return sum(len(x) + 1 + sizeof(c_void_p) for x in args) -def batchpipe(command, args, preexec_fn=None, arg_max=_helpers.SC_ARG_MAX): +def batchpipe(command, args, preexec_fn=None, arg_max=sc_arg_max): """If args is not empty, yield the output produced by calling the command list with args as a sequence of strings (It may be necessary to return multiple strings in order to respect ARG_MAX).""" @@ -236,8 +324,8 @@ to return multiple strings in order to respect ARG_MAX).""" yield readpipe(command + sub_args, preexec_fn=preexec_fn) -def realpath(p): - """Get the absolute path of a file. +def resolve_parent(p): + """Return the absolute path of a file without following any final symlink. Behaves like os.path.realpath, but doesn't follow a symlink for the last element. (ie. if 'p' itself is a symlink, this one won't follow it, but it @@ -262,8 +350,16 @@ def detect_fakeroot(): return os.getenv("FAKEROOTKEY") != None +_warned_about_superuser_detection = None def is_superuser(): if sys.platform.startswith('cygwin'): + if sys.getwindowsversion()[0] > 5: + # Sounds like situation is much more complicated here + global _warned_about_superuser_detection + if not _warned_about_superuser_detection: + log("can't detect root status for OS version > 5; assuming not root") + _warned_about_superuser_detection = True + return False import ctypes return ctypes.cdll.shell32.IsUserAnAdmin() else: @@ -555,7 +651,7 @@ class DemuxConn(BaseConn): if not self._next_packet(timeout): return False try: - self.buf = self.reader.next() + self.buf = next(self.reader) return True except StopIteration: self.reader = None @@ -628,6 +724,42 @@ def chunkyreader(f, count = None): yield b +@contextmanager +def atomically_replaced_file(name, mode='w', buffering=-1): + """Yield a file that will be atomically renamed name when leaving the block. + + This contextmanager yields an open file object that is backed by a + temporary file which will be renamed (atomically) to the target + name if everything succeeds. + + The mode and buffering arguments are handled exactly as with open, + and the yielded file will have very restrictive permissions, as + per mkstemp. + + E.g.:: + + with atomically_replaced_file('foo.txt', 'w') as f: + f.write('hello jack.') + + """ + + (ffd, tempname) = tempfile.mkstemp(dir=os.path.dirname(name), + text=('b' not in mode)) + try: + try: + f = os.fdopen(ffd, mode, buffering) + except: + os.close(ffd) + raise + try: + yield f + finally: + f.close() + os.rename(tempname, name) + finally: + unlink(tempname) # nonexistant file is ignored + + def slashappend(s): """Append "/" to 's' if it doesn't aleady end in "/".""" if s and not s.endswith('/'): @@ -676,6 +808,49 @@ def mmap_readwrite_private(f, sz = 0, close=True): close) +_mincore = getattr(_helpers, 'mincore', None) +if _mincore: + # ./configure ensures that we're on Linux if MINCORE_INCORE isn't defined. + MINCORE_INCORE = getattr(_helpers, 'MINCORE_INCORE', 1) + + _fmincore_chunk_size = None + def _set_fmincore_chunk_size(): + global _fmincore_chunk_size + pref_chunk_size = 64 * 1024 * 1024 + chunk_size = sc_page_size + if (sc_page_size < pref_chunk_size): + chunk_size = sc_page_size * (pref_chunk_size / sc_page_size) + _fmincore_chunk_size = chunk_size + + def fmincore(fd): + """Return the mincore() data for fd as a bytearray whose values can be + tested via MINCORE_INCORE, or None if fd does not fully + support the operation.""" + st = os.fstat(fd) + if (st.st_size == 0): + return bytearray(0) + if not _fmincore_chunk_size: + _set_fmincore_chunk_size() + pages_per_chunk = _fmincore_chunk_size / sc_page_size; + page_count = (st.st_size + sc_page_size - 1) / sc_page_size; + chunk_count = page_count / _fmincore_chunk_size + if chunk_count < 1: + chunk_count = 1 + result = bytearray(page_count) + for ci in xrange(chunk_count): + pos = _fmincore_chunk_size * ci; + msize = min(_fmincore_chunk_size, st.st_size - pos) + try: + m = mmap.mmap(fd, msize, mmap.MAP_PRIVATE, 0, 0, pos) + except mmap.error as ex: + if ex.errno == errno.EINVAL or ex.errno == errno.ENODEV: + # Perhaps the file was a pipe, i.e. "... | bup split ..." + return None + raise ex + _mincore(m, msize, 0, result, ci * pages_per_chunk); + return result + + def parse_timestamp(epoch_str): """Return the number of nanoseconds since the epoch that are described by epoch_str (100ms, 100ns, ...); when epoch_str cannot be parsed, @@ -746,6 +921,15 @@ def clear_errors(): saved_errors = [] +def die_if_errors(msg=None, status=1): + global saved_errors + if saved_errors: + if not msg: + msg = 'warning: %d errors encountered\n' % len(saved_errors) + log(msg) + sys.exit(status) + + def handle_ctrl_c(): """Replace the default exception handler for KeyboardInterrupt (Ctrl-C). @@ -791,8 +975,8 @@ def parse_date_or_fatal(str, fatal): """Parses the given date or calls Option.fatal(). For now we expect a string that contains a float.""" try: - date = atof(str) - except ValueError, e: + date = float(str) + except ValueError as e: raise fatal('invalid date format (should be a float): %r' % e) else: return date @@ -805,14 +989,17 @@ def parse_excludes(options, fatal): for flag in options: (option, parameter) = flag if option == '--exclude': - excluded_paths.append(realpath(parameter)) + excluded_paths.append(resolve_parent(parameter)) elif option == '--exclude-from': try: - f = open(realpath(parameter)) - except IOError, e: + f = open(resolve_parent(parameter)) + except IOError as e: raise fatal("couldn't read %s" % parameter) for exclude_path in f.readlines(): - excluded_paths.append(realpath(exclude_path.strip())) + # FIXME: perhaps this should be rstrip('\n') + exclude_path = resolve_parent(exclude_path.strip()) + if exclude_path: + excluded_paths.append(exclude_path) return sorted(frozenset(excluded_paths)) @@ -826,18 +1013,20 @@ def parse_rx_excludes(options, fatal): if option == '--exclude-rx': try: excluded_patterns.append(re.compile(parameter)) - except re.error, ex: + except re.error as ex: fatal('invalid --exclude-rx pattern (%s): %s' % (parameter, ex)) elif option == '--exclude-rx-from': try: - f = open(realpath(parameter)) - except IOError, e: + f = open(resolve_parent(parameter)) + except IOError as e: raise fatal("couldn't read %s" % parameter) for pattern in f.readlines(): spattern = pattern.rstrip('\n') + if not spattern: + continue try: excluded_patterns.append(re.compile(spattern)) - except re.error, ex: + except re.error as ex: fatal('invalid --exclude-rx pattern (%s): %s' % (spattern, ex)) return excluded_patterns @@ -888,6 +1077,8 @@ def stripped_path_components(path, strip_prefixes): sorted_strip_prefixes = sorted(strip_prefixes, key=len, reverse=True) for bp in sorted_strip_prefixes: normalized_bp = os.path.abspath(bp) + if normalized_bp == '/': + continue if normalized_path.startswith(normalized_bp): prefix = normalized_path[:len(normalized_bp)] result = [] @@ -943,31 +1134,80 @@ def grafted_path_components(graft_points, path): return result return path_components(clean_path) -Sha1 = hashlib.sha1 -def version_date(): - """Format bup's version date string for output.""" - return _version.DATE.split(' ')[0] +Sha1 = hashlib.sha1 -def version_commit(): - """Get the commit hash of bup's current version.""" - return _version.COMMIT +_localtime = getattr(_helpers, 'localtime', None) + +if _localtime: + bup_time = namedtuple('bup_time', ['tm_year', 'tm_mon', 'tm_mday', + 'tm_hour', 'tm_min', 'tm_sec', + 'tm_wday', 'tm_yday', + 'tm_isdst', 'tm_gmtoff', 'tm_zone']) + +# Define a localtime() that returns bup_time when possible. Note: +# this means that any helpers.localtime() results may need to be +# passed through to_py_time() before being passed to python's time +# module, which doesn't appear willing to ignore the extra items. +if _localtime: + def localtime(time): + return bup_time(*_helpers.localtime(time)) + def utc_offset_str(t): + """Return the local offset from UTC as "+hhmm" or "-hhmm" for time t. + If the current UTC offset does not represent an integer number + of minutes, the fractional component will be truncated.""" + off = localtime(t).tm_gmtoff + # Note: // doesn't truncate like C for negative values, it rounds down. + offmin = abs(off) // 60 + m = offmin % 60 + h = (offmin - m) // 60 + return "%+03d%02d" % (-h if off < 0 else h, m) + def to_py_time(x): + if isinstance(x, time.struct_time): + return x + return time.struct_time(x[:9]) +else: + localtime = time.localtime + def utc_offset_str(t): + return time.strftime('%z', localtime(t)) + def to_py_time(x): + return x + + +_some_invalid_save_parts_rx = re.compile(r'[[ ~^:?*\\]|\.\.|//|@{') + +def valid_save_name(name): + # Enforce a superset of the restrictions in git-check-ref-format(1) + if name == '@' \ + or name.startswith('/') or name.endswith('/') \ + or name.endswith('.'): + return False + if _some_invalid_save_parts_rx.search(name): + return False + for c in name: + if ord(c) < 0x20 or ord(c) == 0x7f: + return False + for part in name.split('/'): + if part.startswith('.') or part.endswith('.lock'): + return False + return True -def version_tag(): - """Format bup's version tag (the official version number). +_period_rx = re.compile(r'^([0-9]+)(s|min|h|d|w|m|y)$') - When generated from a commit other than one pointed to with a tag, the - returned string will be "unknown-" followed by the first seven positions of - the commit hash. - """ - names = _version.NAMES.strip() - assert(names[0] == '(') - assert(names[-1] == ')') - names = names[1:-1] - l = [n.strip() for n in names.split(',')] - for n in l: - if n.startswith('tag: bup-'): - return n[9:] - return 'unknown-%s' % _version.COMMIT[:7] +def period_as_secs(s): + if s == 'forever': + return float('inf') + match = _period_rx.match(s) + if not match: + return None + mag = int(match.group(1)) + scale = match.group(2) + return mag * {'s': 1, + 'min': 60, + 'h': 60 * 60, + 'd': 60 * 60 * 24, + 'w': 60 * 60 * 24 * 7, + 'm': 60 * 60 * 24 * 31, + 'y': 60 * 60 * 24 * 366}[scale]