X-Git-Url: https://arthur.barton.de/cgi-bin/gitweb.cgi?a=blobdiff_plain;f=lib%2Fbup%2Fhelpers.py;h=b052b63021511b035a94ac9945e01dce1099637d;hb=dd2bf330103347208f63198c7e8a1490bddc0c6e;hp=0923761fb4f5fdb4be224f2c7d7ea9abd94d7cf0;hpb=b81d49d349c279399f81a02448b2185b6a1cbcd9;p=bup.git diff --git a/lib/bup/helpers.py b/lib/bup/helpers.py index 0923761..b052b63 100644 --- a/lib/bup/helpers.py +++ b/lib/bup/helpers.py @@ -1,13 +1,20 @@ """Helper functions and classes for bup.""" +from collections import namedtuple from ctypes import sizeof, c_void_p from os import environ +from contextlib import contextmanager import sys, os, pwd, subprocess, errno, socket, select, mmap, stat, re, struct -import config, hashlib, heapq, operator, time, grp +import hashlib, heapq, math, operator, time, grp, tempfile -from bup import _version, _helpers -import bup._helpers as _helpers -import math +from bup import _helpers + +sc_page_size = os.sysconf('SC_PAGE_SIZE') +assert(sc_page_size > 0) + +sc_arg_max = os.sysconf('SC_ARG_MAX') +if sc_arg_max == -1: # "no definite limit" - let's choose 2M + sc_arg_max = 2 * 1024 * 1024 # This function should really be in helpers, not in bup.options. But we # want options.py to be standalone so people can include it in other projects. @@ -34,11 +41,15 @@ def atof(s): buglvl = atoi(os.environ.get('BUP_DEBUG', 0)) -# If the platform doesn't have fdatasync (OS X), fall back to fsync. -try: - fdatasync = os.fdatasync -except AttributeError: - fdatasync = os.fsync +if sys.platform.startswith('darwin'): + # Apparently fsync on OS X doesn't guarantee to sync all the way down + import fcntl + fdatasync = lambda fd : fcntl.fcntl(fd, fcntl.F_FULLFSYNC) +else: # If the platform doesn't have fdatasync, fall back to fsync + try: + fdatasync = os.fdatasync + except AttributeError: + fdatasync = os.fsync # Write (blockingly) to sockets that may or may not be in blocking mode. @@ -52,7 +63,7 @@ def _hard_write(fd, buf): raise IOError('select(fd) returned without being writable') try: sz = os.write(fd, buf) - except OSError, e: + except OSError as e: if e.errno != errno.EAGAIN: raise assert(sz >= 0) @@ -122,7 +133,7 @@ def mkdirp(d, mode=None): os.makedirs(d, mode) else: os.makedirs(d) - except OSError, e: + except OSError as e: if e.errno == errno.EEXIST: pass else: @@ -186,9 +197,9 @@ def unlink(f): """ try: os.unlink(f) - except OSError, e: - if e.errno == errno.ENOENT: - pass # it doesn't exist, that's what you asked for + except OSError as e: + if e.errno != errno.ENOENT: + raise def readpipe(argv, preexec_fn=None): @@ -214,13 +225,15 @@ def _argmax_args_size(args): return sum(len(x) + 1 + sizeof(c_void_p) for x in args) -def batchpipe(command, args, preexec_fn=None): +def batchpipe(command, args, preexec_fn=None, arg_max=sc_arg_max): """If args is not empty, yield the output produced by calling the command list with args as a sequence of strings (It may be necessary to return multiple strings in order to respect ARG_MAX).""" + # The optional arg_max arg is a workaround for an issue with the + # current wvtest behavior. base_size = _argmax_base(command) while args: - room = config.arg_max - base_size + room = arg_max - base_size i = 0 while i < len(args): next_size = _argmax_args_size(args[i:i+1]) @@ -234,8 +247,8 @@ to return multiple strings in order to respect ARG_MAX).""" yield readpipe(command + sub_args, preexec_fn=preexec_fn) -def realpath(p): - """Get the absolute path of a file. +def resolve_parent(p): + """Return the absolute path of a file without following any final symlink. Behaves like os.path.realpath, but doesn't follow a symlink for the last element. (ie. if 'p' itself is a symlink, this one won't follow it, but it @@ -626,6 +639,42 @@ def chunkyreader(f, count = None): yield b +@contextmanager +def atomically_replaced_file(name, mode='w', buffering=-1): + """Yield a file that will be atomically renamed name when leaving the block. + + This contextmanager yields an open file object that is backed by a + temporary file which will be renamed (atomically) to the target + name if everything succeeds. + + The mode and buffering arguments are handled exactly as with open, + and the yielded file will have very restrictive permissions, as + per mkstemp. + + E.g.:: + + with atomically_replaced_file('foo.txt', 'w') as f: + f.write('hello jack.') + + """ + + (ffd, tempname) = tempfile.mkstemp(dir=os.path.dirname(name), + text=('b' not in mode)) + try: + try: + f = os.fdopen(ffd, mode, buffering) + except: + os.close(ffd) + raise + try: + yield f + finally: + f.close() + os.rename(tempname, name) + finally: + unlink(tempname) # nonexistant file is ignored + + def slashappend(s): """Append "/" to 's' if it doesn't aleady end in "/".""" if s and not s.endswith('/'): @@ -674,6 +723,49 @@ def mmap_readwrite_private(f, sz = 0, close=True): close) +_mincore = getattr(_helpers, 'mincore', None) +if _mincore: + # ./configure ensures that we're on Linux if MINCORE_INCORE isn't defined. + MINCORE_INCORE = getattr(_helpers, 'MINCORE_INCORE', 1) + + _fmincore_chunk_size = None + def _set_fmincore_chunk_size(): + global _fmincore_chunk_size + pref_chunk_size = 64 * 1024 * 1024 + chunk_size = sc_page_size + if (sc_page_size < pref_chunk_size): + chunk_size = sc_page_size * (pref_chunk_size / sc_page_size) + _fmincore_chunk_size = chunk_size + + def fmincore(fd): + """Return the mincore() data for fd as a bytearray whose values can be + tested via MINCORE_INCORE, or None if fd does not fully + support the operation.""" + st = os.fstat(fd) + if (st.st_size == 0): + return bytearray(0) + if not _fmincore_chunk_size: + _set_fmincore_chunk_size() + pages_per_chunk = _fmincore_chunk_size / sc_page_size; + page_count = (st.st_size + sc_page_size - 1) / sc_page_size; + chunk_count = page_count / _fmincore_chunk_size + if chunk_count < 1: + chunk_count = 1 + result = bytearray(page_count) + for ci in xrange(chunk_count): + pos = _fmincore_chunk_size * ci; + msize = min(_fmincore_chunk_size, st.st_size - pos) + try: + m = mmap.mmap(fd, msize, mmap.MAP_PRIVATE, 0, 0, pos) + except mmap.error as ex: + if ex.errno == errno.EINVAL or ex.errno == errno.ENODEV: + # Perhaps the file was a pipe, i.e. "... | bup split ..." + return None + raise ex + _mincore(m, msize, 0, result, ci * pages_per_chunk); + return result + + def parse_timestamp(epoch_str): """Return the number of nanoseconds since the epoch that are described by epoch_str (100ms, 100ns, ...); when epoch_str cannot be parsed, @@ -789,8 +881,8 @@ def parse_date_or_fatal(str, fatal): """Parses the given date or calls Option.fatal(). For now we expect a string that contains a float.""" try: - date = atof(str) - except ValueError, e: + date = float(str) + except ValueError as e: raise fatal('invalid date format (should be a float): %r' % e) else: return date @@ -803,14 +895,17 @@ def parse_excludes(options, fatal): for flag in options: (option, parameter) = flag if option == '--exclude': - excluded_paths.append(realpath(parameter)) + excluded_paths.append(resolve_parent(parameter)) elif option == '--exclude-from': try: - f = open(realpath(parameter)) - except IOError, e: + f = open(resolve_parent(parameter)) + except IOError as e: raise fatal("couldn't read %s" % parameter) for exclude_path in f.readlines(): - excluded_paths.append(realpath(exclude_path.strip())) + # FIXME: perhaps this should be rstrip('\n') + exclude_path = resolve_parent(exclude_path.strip()) + if exclude_path: + excluded_paths.append(exclude_path) return sorted(frozenset(excluded_paths)) @@ -824,18 +919,20 @@ def parse_rx_excludes(options, fatal): if option == '--exclude-rx': try: excluded_patterns.append(re.compile(parameter)) - except re.error, ex: + except re.error as ex: fatal('invalid --exclude-rx pattern (%s): %s' % (parameter, ex)) elif option == '--exclude-rx-from': try: - f = open(realpath(parameter)) - except IOError, e: + f = open(resolve_parent(parameter)) + except IOError as e: raise fatal("couldn't read %s" % parameter) for pattern in f.readlines(): spattern = pattern.rstrip('\n') + if not spattern: + continue try: excluded_patterns.append(re.compile(spattern)) - except re.error, ex: + except re.error as ex: fatal('invalid --exclude-rx pattern (%s): %s' % (spattern, ex)) return excluded_patterns @@ -886,6 +983,8 @@ def stripped_path_components(path, strip_prefixes): sorted_strip_prefixes = sorted(strip_prefixes, key=len, reverse=True) for bp in sorted_strip_prefixes: normalized_bp = os.path.abspath(bp) + if normalized_bp == '/': + continue if normalized_path.startswith(normalized_bp): prefix = normalized_path[:len(normalized_bp)] result = [] @@ -941,31 +1040,42 @@ def grafted_path_components(graft_points, path): return result return path_components(clean_path) -Sha1 = hashlib.sha1 - -def version_date(): - """Format bup's version date string for output.""" - return _version.DATE.split(' ')[0] - - -def version_commit(): - """Get the commit hash of bup's current version.""" - return _version.COMMIT +Sha1 = hashlib.sha1 -def version_tag(): - """Format bup's version tag (the official version number). - When generated from a commit other than one pointed to with a tag, the - returned string will be "unknown-" followed by the first seven positions of - the commit hash. - """ - names = _version.NAMES.strip() - assert(names[0] == '(') - assert(names[-1] == ')') - names = names[1:-1] - l = [n.strip() for n in names.split(',')] - for n in l: - if n.startswith('tag: bup-'): - return n[9:] - return 'unknown-%s' % _version.COMMIT[:7] +_localtime = getattr(_helpers, 'localtime', None) + +if _localtime: + bup_time = namedtuple('bup_time', ['tm_year', 'tm_mon', 'tm_mday', + 'tm_hour', 'tm_min', 'tm_sec', + 'tm_wday', 'tm_yday', + 'tm_isdst', 'tm_gmtoff', 'tm_zone']) + +# Define a localtime() that returns bup_time when possible. Note: +# this means that any helpers.localtime() results may need to be +# passed through to_py_time() before being passed to python's time +# module, which doesn't appear willing to ignore the extra items. +if _localtime: + def localtime(time): + return bup_time(*_helpers.localtime(time)) + def utc_offset_str(t): + """Return the local offset from UTC as "+hhmm" or "-hhmm" for time t. + If the current UTC offset does not represent an integer number + of minutes, the fractional component will be truncated.""" + off = localtime(t).tm_gmtoff + # Note: // doesn't truncate like C for negative values, it rounds down. + offmin = abs(off) // 60 + m = offmin % 60 + h = (offmin - m) // 60 + return "%+03d%02d" % (-h if off < 0 else h, m) + def to_py_time(x): + if isinstance(x, time.struct_time): + return x + return time.struct_time(x[:9]) +else: + localtime = time.localtime + def utc_offset_str(t): + return time.strftime('%z', localtime(t)) + def to_py_time(x): + return x