X-Git-Url: https://arthur.barton.de/cgi-bin/gitweb.cgi?a=blobdiff_plain;f=lib%2Fbup%2Fhelpers.py;h=b052b63021511b035a94ac9945e01dce1099637d;hb=dd2bf330103347208f63198c7e8a1490bddc0c6e;hp=300e67eee25dd9123537b73a1b4fc1a3fc72075c;hpb=a01949cd7bef41c7f43e6334e6989878d47b29b6;p=bup.git diff --git a/lib/bup/helpers.py b/lib/bup/helpers.py index 300e67e..b052b63 100644 --- a/lib/bup/helpers.py +++ b/lib/bup/helpers.py @@ -1,14 +1,20 @@ """Helper functions and classes for bup.""" +from collections import namedtuple from ctypes import sizeof, c_void_p from os import environ from contextlib import contextmanager import sys, os, pwd, subprocess, errno, socket, select, mmap, stat, re, struct -import hashlib, heapq, operator, time, grp, tempfile +import hashlib, heapq, math, operator, time, grp, tempfile from bup import _helpers -import bup._helpers as _helpers -import math + +sc_page_size = os.sysconf('SC_PAGE_SIZE') +assert(sc_page_size > 0) + +sc_arg_max = os.sysconf('SC_ARG_MAX') +if sc_arg_max == -1: # "no definite limit" - let's choose 2M + sc_arg_max = 2 * 1024 * 1024 # This function should really be in helpers, not in bup.options. But we # want options.py to be standalone so people can include it in other projects. @@ -35,11 +41,15 @@ def atof(s): buglvl = atoi(os.environ.get('BUP_DEBUG', 0)) -# If the platform doesn't have fdatasync (OS X), fall back to fsync. -try: - fdatasync = os.fdatasync -except AttributeError: - fdatasync = os.fsync +if sys.platform.startswith('darwin'): + # Apparently fsync on OS X doesn't guarantee to sync all the way down + import fcntl + fdatasync = lambda fd : fcntl.fcntl(fd, fcntl.F_FULLFSYNC) +else: # If the platform doesn't have fdatasync, fall back to fsync + try: + fdatasync = os.fdatasync + except AttributeError: + fdatasync = os.fsync # Write (blockingly) to sockets that may or may not be in blocking mode. @@ -53,7 +63,7 @@ def _hard_write(fd, buf): raise IOError('select(fd) returned without being writable') try: sz = os.write(fd, buf) - except OSError, e: + except OSError as e: if e.errno != errno.EAGAIN: raise assert(sz >= 0) @@ -123,7 +133,7 @@ def mkdirp(d, mode=None): os.makedirs(d, mode) else: os.makedirs(d) - except OSError, e: + except OSError as e: if e.errno == errno.EEXIST: pass else: @@ -187,9 +197,9 @@ def unlink(f): """ try: os.unlink(f) - except OSError, e: - if e.errno == errno.ENOENT: - pass # it doesn't exist, that's what you asked for + except OSError as e: + if e.errno != errno.ENOENT: + raise def readpipe(argv, preexec_fn=None): @@ -215,7 +225,7 @@ def _argmax_args_size(args): return sum(len(x) + 1 + sizeof(c_void_p) for x in args) -def batchpipe(command, args, preexec_fn=None, arg_max=_helpers.SC_ARG_MAX): +def batchpipe(command, args, preexec_fn=None, arg_max=sc_arg_max): """If args is not empty, yield the output produced by calling the command list with args as a sequence of strings (It may be necessary to return multiple strings in order to respect ARG_MAX).""" @@ -237,8 +247,8 @@ to return multiple strings in order to respect ARG_MAX).""" yield readpipe(command + sub_args, preexec_fn=preexec_fn) -def realpath(p): - """Get the absolute path of a file. +def resolve_parent(p): + """Return the absolute path of a file without following any final symlink. Behaves like os.path.realpath, but doesn't follow a symlink for the last element. (ie. if 'p' itself is a symlink, this one won't follow it, but it @@ -638,8 +648,8 @@ def atomically_replaced_file(name, mode='w', buffering=-1): name if everything succeeds. The mode and buffering arguments are handled exactly as with open, - and the yielded file will have have very restrictive permissions, - as per mkstemp. + and the yielded file will have very restrictive permissions, as + per mkstemp. E.g.:: @@ -713,6 +723,49 @@ def mmap_readwrite_private(f, sz = 0, close=True): close) +_mincore = getattr(_helpers, 'mincore', None) +if _mincore: + # ./configure ensures that we're on Linux if MINCORE_INCORE isn't defined. + MINCORE_INCORE = getattr(_helpers, 'MINCORE_INCORE', 1) + + _fmincore_chunk_size = None + def _set_fmincore_chunk_size(): + global _fmincore_chunk_size + pref_chunk_size = 64 * 1024 * 1024 + chunk_size = sc_page_size + if (sc_page_size < pref_chunk_size): + chunk_size = sc_page_size * (pref_chunk_size / sc_page_size) + _fmincore_chunk_size = chunk_size + + def fmincore(fd): + """Return the mincore() data for fd as a bytearray whose values can be + tested via MINCORE_INCORE, or None if fd does not fully + support the operation.""" + st = os.fstat(fd) + if (st.st_size == 0): + return bytearray(0) + if not _fmincore_chunk_size: + _set_fmincore_chunk_size() + pages_per_chunk = _fmincore_chunk_size / sc_page_size; + page_count = (st.st_size + sc_page_size - 1) / sc_page_size; + chunk_count = page_count / _fmincore_chunk_size + if chunk_count < 1: + chunk_count = 1 + result = bytearray(page_count) + for ci in xrange(chunk_count): + pos = _fmincore_chunk_size * ci; + msize = min(_fmincore_chunk_size, st.st_size - pos) + try: + m = mmap.mmap(fd, msize, mmap.MAP_PRIVATE, 0, 0, pos) + except mmap.error as ex: + if ex.errno == errno.EINVAL or ex.errno == errno.ENODEV: + # Perhaps the file was a pipe, i.e. "... | bup split ..." + return None + raise ex + _mincore(m, msize, 0, result, ci * pages_per_chunk); + return result + + def parse_timestamp(epoch_str): """Return the number of nanoseconds since the epoch that are described by epoch_str (100ms, 100ns, ...); when epoch_str cannot be parsed, @@ -828,8 +881,8 @@ def parse_date_or_fatal(str, fatal): """Parses the given date or calls Option.fatal(). For now we expect a string that contains a float.""" try: - date = atof(str) - except ValueError, e: + date = float(str) + except ValueError as e: raise fatal('invalid date format (should be a float): %r' % e) else: return date @@ -842,15 +895,15 @@ def parse_excludes(options, fatal): for flag in options: (option, parameter) = flag if option == '--exclude': - excluded_paths.append(realpath(parameter)) + excluded_paths.append(resolve_parent(parameter)) elif option == '--exclude-from': try: - f = open(realpath(parameter)) - except IOError, e: + f = open(resolve_parent(parameter)) + except IOError as e: raise fatal("couldn't read %s" % parameter) for exclude_path in f.readlines(): # FIXME: perhaps this should be rstrip('\n') - exclude_path = realpath(exclude_path.strip()) + exclude_path = resolve_parent(exclude_path.strip()) if exclude_path: excluded_paths.append(exclude_path) return sorted(frozenset(excluded_paths)) @@ -866,12 +919,12 @@ def parse_rx_excludes(options, fatal): if option == '--exclude-rx': try: excluded_patterns.append(re.compile(parameter)) - except re.error, ex: + except re.error as ex: fatal('invalid --exclude-rx pattern (%s): %s' % (parameter, ex)) elif option == '--exclude-rx-from': try: - f = open(realpath(parameter)) - except IOError, e: + f = open(resolve_parent(parameter)) + except IOError as e: raise fatal("couldn't read %s" % parameter) for pattern in f.readlines(): spattern = pattern.rstrip('\n') @@ -879,7 +932,7 @@ def parse_rx_excludes(options, fatal): continue try: excluded_patterns.append(re.compile(spattern)) - except re.error, ex: + except re.error as ex: fatal('invalid --exclude-rx pattern (%s): %s' % (spattern, ex)) return excluded_patterns @@ -930,6 +983,8 @@ def stripped_path_components(path, strip_prefixes): sorted_strip_prefixes = sorted(strip_prefixes, key=len, reverse=True) for bp in sorted_strip_prefixes: normalized_bp = os.path.abspath(bp) + if normalized_bp == '/': + continue if normalized_path.startswith(normalized_bp): prefix = normalized_path[:len(normalized_bp)] result = [] @@ -985,4 +1040,42 @@ def grafted_path_components(graft_points, path): return result return path_components(clean_path) + Sha1 = hashlib.sha1 + + +_localtime = getattr(_helpers, 'localtime', None) + +if _localtime: + bup_time = namedtuple('bup_time', ['tm_year', 'tm_mon', 'tm_mday', + 'tm_hour', 'tm_min', 'tm_sec', + 'tm_wday', 'tm_yday', + 'tm_isdst', 'tm_gmtoff', 'tm_zone']) + +# Define a localtime() that returns bup_time when possible. Note: +# this means that any helpers.localtime() results may need to be +# passed through to_py_time() before being passed to python's time +# module, which doesn't appear willing to ignore the extra items. +if _localtime: + def localtime(time): + return bup_time(*_helpers.localtime(time)) + def utc_offset_str(t): + """Return the local offset from UTC as "+hhmm" or "-hhmm" for time t. + If the current UTC offset does not represent an integer number + of minutes, the fractional component will be truncated.""" + off = localtime(t).tm_gmtoff + # Note: // doesn't truncate like C for negative values, it rounds down. + offmin = abs(off) // 60 + m = offmin % 60 + h = (offmin - m) // 60 + return "%+03d%02d" % (-h if off < 0 else h, m) + def to_py_time(x): + if isinstance(x, time.struct_time): + return x + return time.struct_time(x[:9]) +else: + localtime = time.localtime + def utc_offset_str(t): + return time.strftime('%z', localtime(t)) + def to_py_time(x): + return x