X-Git-Url: https://arthur.barton.de/gitweb/?p=bup.git;a=blobdiff_plain;f=lib%2Fbup%2Fhelpers.py;h=1bae59d22e99da707daac8198bcba5c0275882b7;hp=e7fbc5b8992a18433d3f54cc5241c8adac5de18c;hb=41c3f3d78d79531863110f5ffd9ae8ee5a2b3986;hpb=aa29a8483010e4c63f3a3d9234080f9b7fcd8a35 diff --git a/lib/bup/helpers.py b/lib/bup/helpers.py index e7fbc5b..1bae59d 100644 --- a/lib/bup/helpers.py +++ b/lib/bup/helpers.py @@ -1,13 +1,26 @@ """Helper functions and classes for bup.""" +from __future__ import absolute_import from collections import namedtuple +from contextlib import contextmanager from ctypes import sizeof, c_void_p from os import environ -from contextlib import contextmanager +from pipes import quote +from subprocess import PIPE, Popen import sys, os, pwd, subprocess, errno, socket, select, mmap, stat, re, struct import hashlib, heapq, math, operator, time, grp, tempfile from bup import _helpers +from bup import compat +# This function should really be in helpers, not in bup.options. But we +# want options.py to be standalone so people can include it in other projects. +from bup.options import _tty_width as tty_width + + +class Nonlocal: + """Helper to deal with Python scoping issues""" + pass + sc_page_size = os.sysconf('SC_PAGE_SIZE') assert(sc_page_size > 0) @@ -16,10 +29,11 @@ sc_arg_max = os.sysconf('SC_ARG_MAX') if sc_arg_max == -1: # "no definite limit" - let's choose 2M sc_arg_max = 2 * 1024 * 1024 -# This function should really be in helpers, not in bup.options. But we -# want options.py to be standalone so people can include it in other projects. -from bup.options import _tty_width -tty_width = _tty_width +def last(iterable): + result = None + for result in iterable: + pass + return result def atoi(s): @@ -41,11 +55,68 @@ def atof(s): buglvl = atoi(os.environ.get('BUP_DEBUG', 0)) -# If the platform doesn't have fdatasync (OS X), fall back to fsync. try: - fdatasync = os.fdatasync + _fdatasync = os.fdatasync except AttributeError: - fdatasync = os.fsync + _fdatasync = os.fsync + +if sys.platform.startswith('darwin'): + # Apparently os.fsync on OS X doesn't guarantee to sync all the way down + import fcntl + def fdatasync(fd): + try: + return fcntl.fcntl(fd, fcntl.F_FULLFSYNC) + except IOError as e: + # Fallback for file systems (SMB) that do not support F_FULLFSYNC + if e.errno == errno.ENOTSUP: + return _fdatasync(fd) + else: + raise +else: + fdatasync = _fdatasync + + +def partition(predicate, stream): + """Returns (leading_matches_it, rest_it), where leading_matches_it + must be completely exhausted before traversing rest_it. + + """ + stream = iter(stream) + ns = Nonlocal() + ns.first_nonmatch = None + def leading_matches(): + for x in stream: + if predicate(x): + yield x + else: + ns.first_nonmatch = (x,) + break + def rest(): + if ns.first_nonmatch: + yield ns.first_nonmatch[0] + for x in stream: + yield x + return (leading_matches(), rest()) + + +def lines_until_sentinel(f, sentinel, ex_type): + # sentinel must end with \n and must contain only one \n + while True: + line = f.readline() + if not (line and line.endswith('\n')): + raise ex_type('Hit EOF while reading line') + if line == sentinel: + return + yield line + + +def stat_if_exists(path): + try: + return os.stat(path) + except OSError as e: + if e.errno != errno.ENOENT: + raise + return None # Write (blockingly) to sockets that may or may not be in blocking mode. @@ -59,7 +130,7 @@ def _hard_write(fd, buf): raise IOError('select(fd) returned without being writable') try: sz = os.write(fd, buf) - except OSError, e: + except OSError as e: if e.errno != errno.EAGAIN: raise assert(sz >= 0) @@ -129,32 +200,13 @@ def mkdirp(d, mode=None): os.makedirs(d, mode) else: os.makedirs(d) - except OSError, e: + except OSError as e: if e.errno == errno.EEXIST: pass else: raise -_unspecified_next_default = object() - -def _fallback_next(it, default=_unspecified_next_default): - """Retrieve the next item from the iterator by calling its - next() method. If default is given, it is returned if the - iterator is exhausted, otherwise StopIteration is raised.""" - - if default is _unspecified_next_default: - return it.next() - else: - try: - return it.next() - except StopIteration: - return default - -if sys.version_info < (2, 6): - next = _fallback_next - - def merge_iter(iters, pfreq, pfunc, pfinal, key=None): if key: samekey = lambda e, pe: getattr(e, key) == getattr(pe, key, None) @@ -177,7 +229,7 @@ def merge_iter(iters, pfreq, pfunc, pfinal, key=None): yield e count += 1 try: - e = it.next() # Don't use next() function, it's too expensive + e = next(it) except StopIteration: heapq.heappop(heap) # remove current else: @@ -193,14 +245,43 @@ def unlink(f): """ try: os.unlink(f) - except OSError, e: + except OSError as e: if e.errno != errno.ENOENT: raise -def readpipe(argv, preexec_fn=None): +def shstr(cmd): + if isinstance(cmd, compat.str_type): + return cmd + else: + return ' '.join(map(quote, cmd)) + +exc = subprocess.check_call + +def exo(cmd, + input=None, + stdin=None, + stderr=None, + shell=False, + check=True, + preexec_fn=None): + if input: + assert stdin in (None, PIPE) + stdin = PIPE + p = Popen(cmd, + stdin=stdin, stdout=PIPE, stderr=stderr, + shell=shell, + preexec_fn=preexec_fn) + out, err = p.communicate(input) + if check and p.returncode != 0: + raise Exception('subprocess %r failed with status %d, stderr: %r' + % (' '.join(map(quote, cmd)), p.returncode, err)) + return out, err, p + +def readpipe(argv, preexec_fn=None, shell=False): """Run a subprocess and return its output.""" - p = subprocess.Popen(argv, stdout=subprocess.PIPE, preexec_fn=preexec_fn) + p = subprocess.Popen(argv, stdout=subprocess.PIPE, preexec_fn=preexec_fn, + shell=shell) out, err = p.communicate() if p.returncode != 0: raise Exception('subprocess %r failed with status %d' @@ -212,7 +293,7 @@ def _argmax_base(command): base_size = 2048 for c in command: base_size += len(command) + 1 - for k, v in environ.iteritems(): + for k, v in compat.items(environ): base_size += len(k) + len(v) + 2 + sizeof(c_void_p) return base_size @@ -243,8 +324,8 @@ to return multiple strings in order to respect ARG_MAX).""" yield readpipe(command + sub_args, preexec_fn=preexec_fn) -def realpath(p): - """Get the absolute path of a file. +def resolve_parent(p): + """Return the absolute path of a file without following any final symlink. Behaves like os.path.realpath, but doesn't follow a symlink for the last element. (ie. if 'p' itself is a symlink, this one won't follow it, but it @@ -269,11 +350,13 @@ def detect_fakeroot(): return os.getenv("FAKEROOTKEY") != None -def is_superuser(): - if sys.platform.startswith('cygwin'): - import ctypes - return ctypes.cdll.shell32.IsUserAnAdmin() - else: +if sys.platform.startswith('cygwin'): + def is_superuser(): + # https://cygwin.com/ml/cygwin/2015-02/msg00057.html + groups = os.getgroups() + return 544 in groups or 0 in groups +else: + def is_superuser(): return os.geteuid() == 0 @@ -562,7 +645,7 @@ class DemuxConn(BaseConn): if not self._next_packet(timeout): return False try: - self.buf = self.reader.next() + self.buf = next(self.reader) return True except StopIteration: self.reader = None @@ -735,7 +818,8 @@ if _mincore: def fmincore(fd): """Return the mincore() data for fd as a bytearray whose values can be - tested via MINCORE_INCORE""" + tested via MINCORE_INCORE, or None if fd does not fully + support the operation.""" st = os.fstat(fd) if (st.st_size == 0): return bytearray(0) @@ -750,8 +834,19 @@ if _mincore: for ci in xrange(chunk_count): pos = _fmincore_chunk_size * ci; msize = min(_fmincore_chunk_size, st.st_size - pos) - m = mmap.mmap(fd, msize, mmap.MAP_PRIVATE, 0, 0, pos) - _mincore(m, msize, 0, result, ci * pages_per_chunk); + try: + m = mmap.mmap(fd, msize, mmap.MAP_PRIVATE, 0, 0, pos) + except mmap.error as ex: + if ex.errno == errno.EINVAL or ex.errno == errno.ENODEV: + # Perhaps the file was a pipe, i.e. "... | bup split ..." + return None + raise ex + try: + _mincore(m, msize, 0, result, ci * pages_per_chunk) + except OSError as ex: + if ex.errno == errno.ENOSYS: + return None + raise return result @@ -825,6 +920,15 @@ def clear_errors(): saved_errors = [] +def die_if_errors(msg=None, status=1): + global saved_errors + if saved_errors: + if not msg: + msg = 'warning: %d errors encountered\n' % len(saved_errors) + log(msg) + sys.exit(status) + + def handle_ctrl_c(): """Replace the default exception handler for KeyboardInterrupt (Ctrl-C). @@ -850,14 +954,14 @@ def columnate(l, prefix): return "" l = l[:] clen = max(len(s) for s in l) - ncols = (tty_width() - len(prefix)) / (clen + 2) + ncols = (tty_width() - len(prefix)) // (clen + 2) if ncols <= 1: ncols = 1 clen = 0 cols = [] while len(l) % ncols: l.append('') - rows = len(l)/ncols + rows = len(l) // ncols for s in range(0, len(l), rows): cols.append(l[s:s+rows]) out = '' @@ -871,7 +975,7 @@ def parse_date_or_fatal(str, fatal): For now we expect a string that contains a float.""" try: date = float(str) - except ValueError, e: + except ValueError as e: raise fatal('invalid date format (should be a float): %r' % e) else: return date @@ -884,15 +988,15 @@ def parse_excludes(options, fatal): for flag in options: (option, parameter) = flag if option == '--exclude': - excluded_paths.append(realpath(parameter)) + excluded_paths.append(resolve_parent(parameter)) elif option == '--exclude-from': try: - f = open(realpath(parameter)) - except IOError, e: + f = open(resolve_parent(parameter)) + except IOError as e: raise fatal("couldn't read %s" % parameter) for exclude_path in f.readlines(): # FIXME: perhaps this should be rstrip('\n') - exclude_path = realpath(exclude_path.strip()) + exclude_path = resolve_parent(exclude_path.strip()) if exclude_path: excluded_paths.append(exclude_path) return sorted(frozenset(excluded_paths)) @@ -908,12 +1012,12 @@ def parse_rx_excludes(options, fatal): if option == '--exclude-rx': try: excluded_patterns.append(re.compile(parameter)) - except re.error, ex: + except re.error as ex: fatal('invalid --exclude-rx pattern (%s): %s' % (parameter, ex)) elif option == '--exclude-rx-from': try: - f = open(realpath(parameter)) - except IOError, e: + f = open(resolve_parent(parameter)) + except IOError as e: raise fatal("couldn't read %s" % parameter) for pattern in f.readlines(): spattern = pattern.rstrip('\n') @@ -921,7 +1025,7 @@ def parse_rx_excludes(options, fatal): continue try: excluded_patterns.append(re.compile(spattern)) - except re.error, ex: + except re.error as ex: fatal('invalid --exclude-rx pattern (%s): %s' % (spattern, ex)) return excluded_patterns @@ -950,7 +1054,7 @@ def path_components(path): Example: '/home/foo' -> [('', '/'), ('home', '/home'), ('foo', '/home/foo')]""" if not path.startswith('/'): - raise Exception, 'path must start with "/": %s' % path + raise Exception('path must start with "/": %s' % path) # Since we assume path startswith('/'), we can skip the first element. result = [('', '/')] norm_path = os.path.abspath(path) @@ -1049,10 +1153,15 @@ if _localtime: def localtime(time): return bup_time(*_helpers.localtime(time)) def utc_offset_str(t): - 'Return the local offset from UTC as "+hhmm" or "-hhmm" for time t.' + """Return the local offset from UTC as "+hhmm" or "-hhmm" for time t. + If the current UTC offset does not represent an integer number + of minutes, the fractional component will be truncated.""" off = localtime(t).tm_gmtoff - hrs = off / 60 / 60 - return "%+03d%02d" % (hrs, abs(off - (hrs * 60 * 60))) + # Note: // doesn't truncate like C for negative values, it rounds down. + offmin = abs(off) // 60 + m = offmin % 60 + h = (offmin - m) // 60 + return "%+03d%02d" % (-h if off < 0 else h, m) def to_py_time(x): if isinstance(x, time.struct_time): return x @@ -1063,3 +1172,41 @@ else: return time.strftime('%z', localtime(t)) def to_py_time(x): return x + + +_some_invalid_save_parts_rx = re.compile(r'[[ ~^:?*\\]|\.\.|//|@{') + +def valid_save_name(name): + # Enforce a superset of the restrictions in git-check-ref-format(1) + if name == '@' \ + or name.startswith('/') or name.endswith('/') \ + or name.endswith('.'): + return False + if _some_invalid_save_parts_rx.search(name): + return False + for c in name: + if ord(c) < 0x20 or ord(c) == 0x7f: + return False + for part in name.split('/'): + if part.startswith('.') or part.endswith('.lock'): + return False + return True + + +_period_rx = re.compile(r'^([0-9]+)(s|min|h|d|w|m|y)$') + +def period_as_secs(s): + if s == 'forever': + return float('inf') + match = _period_rx.match(s) + if not match: + return None + mag = int(match.group(1)) + scale = match.group(2) + return mag * {'s': 1, + 'min': 60, + 'h': 60 * 60, + 'd': 60 * 60 * 24, + 'w': 60 * 60 * 24 * 7, + 'm': 60 * 60 * 24 * 31, + 'y': 60 * 60 * 24 * 366}[scale]