may be either a Metadata object, or an integer mode. Functions like
item_mode() and item_size() will return the mode and size in either
case. Any item.meta Metadata instances must not be modified directly.
-Make a copy to modify via item.meta.copy() if needed.
+Make a copy to modify via item.meta.copy() if needed, or call
+copy_item().
The want_meta argument is advisory for calls that accept it, and it
may not be honored. Callers must be able to handle an item.meta value
from __future__ import absolute_import, print_function
from collections import namedtuple
-from errno import ELOOP, ENOENT, ENOTDIR
-from itertools import chain, dropwhile, groupby, izip, tee
+from errno import EINVAL, ELOOP, ENOENT, ENOTDIR
+from itertools import chain, dropwhile, groupby, tee
+from random import randrange
from stat import S_IFDIR, S_IFLNK, S_IFREG, S_ISDIR, S_ISLNK, S_ISREG
from time import localtime, strftime
-import exceptions, re, sys
+import re, sys
-from bup import client, git, metadata
+from bup import git, metadata, vint
+from bup.compat import range
from bup.git import BUP_CHUNKED, cp, get_commit_items, parse_commit, tree_decode
from bup.helpers import debug2, last
from bup.metadata import Metadata
-from bup.repo import LocalRepo, RemoteRepo
+from bup.vint import read_bvec, write_bvec
+from bup.vint import read_vint, write_vint
+from bup.vint import read_vuint, write_vuint
+if sys.version_info[0] < 3:
+ from exceptions import IOError as py_IOError
+else:
+ py_IOError = IOError
-class IOError(exceptions.IOError):
+# We currently assume that it's always appropriate to just forward IOErrors
+# to a remote client.
+
+class IOError(py_IOError):
def __init__(self, errno, message, terminus=None):
- exceptions.IOError.__init__(self, errno, message)
+ py_IOError.__init__(self, errno, message)
self.terminus = terminus
+def write_ioerror(port, ex):
+ assert isinstance(ex, IOError)
+ write_vuint(port,
+ (1 if ex.errno is not None else 0)
+ | (2 if ex.message is not None else 0)
+ | (4 if ex.terminus is not None else 0))
+ if ex.errno is not None:
+ write_vint(port, ex.errno)
+ if ex.message is not None:
+ write_bvec(port, ex.message.encode('utf-8'))
+ if ex.terminus is not None:
+ write_resolution(port, ex.terminus)
+
+def read_ioerror(port):
+ mask = read_vuint(port)
+ no = read_vint(port) if 1 & mask else None
+ msg = read_bvec(port).decode('utf-8') if 2 & mask else None
+ term = read_resolution(port) if 4 & mask else None
+ return IOError(errno=no, message=msg, terminus=term)
+
+
default_file_mode = S_IFREG | 0o644
default_dir_mode = S_IFDIR | 0o755
default_symlink_mode = S_IFLNK | 0o755
_, obj_t, size = next(it)
return ofs + sum(len(b) for b in it)
+def _skip_chunks_before_offset(tree, offset):
+ prev_ent = next(tree, None)
+ if not prev_ent:
+ return tree
+ ent = None
+ for ent in tree:
+ ent_ofs = int(ent[1], 16)
+ if ent_ofs > offset:
+ return chain([prev_ent, ent], tree)
+ if ent_ofs == offset:
+ return chain([ent], tree)
+ prev_ent = ent
+ return [prev_ent]
+
def _tree_chunks(repo, tree, startofs):
"Tree should be a sequence of (name, mode, hash) as per tree_decode()."
assert(startofs >= 0)
# name is the chunk's hex offset in the original file
- tree = dropwhile(lambda (_1, name, _2): int(name, 16) < startofs, tree)
- for mode, name, oid in tree:
+ for mode, name, oid in _skip_chunks_before_offset(tree, startofs):
ofs = int(name, 16)
skipmore = startofs - ofs
if skipmore < 0:
return self._size
def seek(self, ofs):
- if ofs < 0:
- raise IOError(errno.EINVAL, 'Invalid argument')
- if ofs > self._compute_size():
- raise IOError(errno.EINVAL, 'Invalid argument')
+ if ofs < 0 or ofs > self._compute_size():
+ raise IOError(EINVAL, 'Invalid seek offset: %d' % ofs)
self.ofs = ofs
def tell(self):
return self.ofs
def read(self, count=-1):
+ size = self._compute_size()
+ if self.ofs >= size:
+ return ''
if count < 0:
- count = self._compute_size() - self.ofs
+ count = size - self.ofs
if not self.reader or self.reader.ofs != self.ofs:
self.reader = _ChunkReader(self._repo, self.oid, self.ofs)
try:
Item = namedtuple('Item', ('meta', 'oid'))
Chunky = namedtuple('Chunky', ('meta', 'oid'))
+FakeLink = namedtuple('FakeLink', ('meta', 'target'))
Root = namedtuple('Root', ('meta'))
Tags = namedtuple('Tags', ('meta'))
RevList = namedtuple('RevList', ('meta', 'oid'))
item_types = frozenset((Item, Chunky, Root, Tags, RevList, Commit))
real_tree_types = frozenset((Item, Commit))
+def write_item(port, item):
+ kind = type(item)
+ name = bytes(kind.__name__)
+ meta = item.meta
+ has_meta = 1 if isinstance(meta, Metadata) else 0
+ if kind in (Item, Chunky, RevList):
+ assert len(item.oid) == 20
+ if has_meta:
+ vint.send(port, 'sVs', name, has_meta, item.oid)
+ Metadata.write(meta, port, include_path=False)
+ else:
+ vint.send(port, 'sVsV', name, has_meta, item.oid, item.meta)
+ elif kind in (Root, Tags):
+ if has_meta:
+ vint.send(port, 'sV', name, has_meta)
+ Metadata.write(meta, port, include_path=False)
+ else:
+ vint.send(port, 'sVV', name, has_meta, item.meta)
+ elif kind == Commit:
+ assert len(item.oid) == 20
+ assert len(item.coid) == 20
+ if has_meta:
+ vint.send(port, 'sVss', name, has_meta, item.oid, item.coid)
+ Metadata.write(meta, port, include_path=False)
+ else:
+ vint.send(port, 'sVssV', name, has_meta, item.oid, item.coid,
+ item.meta)
+ elif kind == FakeLink:
+ if has_meta:
+ vint.send(port, 'sVs', name, has_meta, item.target)
+ Metadata.write(meta, port, include_path=False)
+ else:
+ vint.send(port, 'sVsV', name, has_meta, item.target, item.meta)
+ else:
+ assert False
+
+def read_item(port):
+ def read_m(port, has_meta):
+ if has_meta:
+ m = Metadata.read(port)
+ return m
+ return read_vuint(port)
+ kind, has_meta = vint.recv(port, 'sV')
+ if kind == b'Item':
+ oid, meta = read_bvec(port), read_m(port, has_meta)
+ return Item(oid=oid, meta=meta)
+ if kind == b'Chunky':
+ oid, meta = read_bvec(port), read_m(port, has_meta)
+ return Chunky(oid=oid, meta=meta)
+ if kind == b'RevList':
+ oid, meta = read_bvec(port), read_m(port, has_meta)
+ return RevList(oid=oid, meta=meta)
+ if kind == b'Root':
+ return Root(meta=read_m(port, has_meta))
+ if kind == b'Tags':
+ return Tags(meta=read_m(port, has_meta))
+ if kind == b'Commit':
+ oid, coid = vint.recv(port, 'ss')
+ meta = read_m(port, has_meta)
+ return Commit(oid=oid, coid=coid, meta=meta)
+ if kind == b'FakeLink':
+ target, meta = read_bvec(port), read_m(port, has_meta)
+ return FakeLink(target=target, meta=meta)
+ assert False
+
+def write_resolution(port, resolution):
+ write_vuint(port, len(resolution))
+ for name, item in resolution:
+ write_bvec(port, name)
+ if item:
+ port.write(b'\1')
+ write_item(port, item)
+ else:
+ port.write(b'\0')
+
+def read_resolution(port):
+ n = read_vuint(port)
+ result = []
+ for i in range(n):
+ name = read_bvec(port)
+ have_item = ord(port.read(1))
+ assert have_item in (0, 1)
+ item = read_item(port) if have_item else None
+ result.append((name, item))
+ return tuple(result)
+
+
_root = Root(meta=default_dir_mode)
_tags = Tags(meta=default_dir_mode)
### vfs cache
### A general purpose shared cache with (currently) cheap random
-### eviction. There is currently no weighting so a single commit item
-### is just as likely to be evicted as an entire "rev-list". See
+### eviction. At the moment there is no weighting so a single commit
+### item is just as likely to be evicted as an entire "rev-list". See
### is_valid_cache_key for a description of the expected content.
_cache = {}
def is_valid_cache_key(x):
"""Return logically true if x looks like it could be a valid cache key
(with respect to structure). Current valid cache entries:
- commit_oid -> commit
- commit_oid + ':r' -> rev-list
- i.e. rev-list -> {'.', commit, '2012...', next_commit, ...}
+ res:... -> resolution
+ itm:OID -> Commit
+ rvl:OID -> {'.', commit, '2012...', next_commit, ...}
"""
# Suspect we may eventually add "(container_oid, name) -> ...", and others.
x_t = type(x)
if x_t is bytes:
- if len(x) == 20:
+ tag = x[:4]
+ if tag in ('itm:', 'rvl:') and len(x) == 24:
return True
- if len(x) == 22 and x.endswith(b':r'):
+ if tag == 'res:':
return True
def cache_get(key):
global _cache
- assert is_valid_cache_key(key)
+ if not is_valid_cache_key(key):
+ raise Exception('invalid cache key: ' + repr(key))
return _cache.get(key)
def cache_notice(key, value):
global _cache, _cache_keys, _cache_max_items
- assert is_valid_cache_key(key)
+ if not is_valid_cache_key(key):
+ raise Exception('invalid cache key: ' + repr(key))
if key in _cache:
return
- _cache[key] = value
if len(_cache) < _cache_max_items:
+ _cache_keys.append(key)
+ _cache[key] = value
return
- victim_i = random.randrange(0, len(_cache_keys))
+ victim_i = randrange(0, len(_cache_keys))
victim = _cache_keys[victim_i]
+ del _cache[victim]
_cache_keys[victim_i] = key
- _cache.pop(victim)
-
+ _cache[key] = value
def cache_get_commit_item(oid, need_meta=True):
"""Return the requested tree item if it can be found in the cache.
When need_meta is true don't return a cached item that only has a
mode."""
# tree might be stored independently, or as '.' with its entries.
- item = cache_get(oid)
+ commit_key = b'itm:' + oid
+ item = cache_get(commit_key)
if item:
if not need_meta:
return item
if isinstance(item.meta, Metadata):
return item
- entries = cache_get(oid + b':r')
+ entries = cache_get(b'rvl:' + oid)
if entries:
return entries['.']
if commit:
return RevList(oid=oid, meta=commit.meta)
-
def copy_item(item):
"""Return a completely independent copy of item, such that
modifications will not affect the original.
"""
meta = getattr(item, 'meta', None)
- if not meta:
- return item
- return(item._replace(meta=meta.copy()))
+ if isinstance(meta, Metadata):
+ return(item._replace(meta=meta.copy()))
+ return item
def item_mode(item):
"""Return the integer mode (stat st_mode) for item."""
target = item.meta.symlink_target
if target:
return target
+ elif isinstance(item, FakeLink):
+ return item.target
return _readlink(repo, item.oid)
def _compute_item_size(repo, item):
meta = _find_treeish_oid_metadata(repo, commit.oid)
if meta:
commit = commit._replace(meta=meta)
- cache_notice(oid, commit)
+ commit_key = b'itm:' + oid
+ cache_notice(commit_key, commit)
return commit
def _revlist_item_from_oid(repo, oid, require_meta):
else:
ndig = len(str(ndup - 1))
fmt = '%s-' + '%0' + str(ndig) + 'd'
- for i in xrange(ndup - 1, -1, -1):
+ for i in range(ndup - 1, -1, -1):
yield fmt % (name, i)
def parse_rev(f):
if item:
return item
item = Commit(meta=default_dir_mode, oid=tree_oid, coid=coid)
- cache_notice(item.coid, item)
+ commit_key = b'itm:' + coid
+ cache_notice(commit_key, item)
return item
def cache_commit(repo, oid):
revs = None # Don't disturb the tees
rev_names = _reverse_suffix_duplicates(_name_for_rev(x) for x in rev_names)
rev_items = (_item_for_rev(x) for x in rev_items)
- latest = None
+ tip = None
for item in rev_items:
- latest = latest or item
name = next(rev_names)
+ tip = tip or (name, item)
entries[name] = item
- entries['latest'] = latest
- cache_notice(latest.coid + b':r', entries)
+ entries['latest'] = FakeLink(meta=default_symlink_mode, target=tip[0])
+ revlist_key = b'rvl:' + tip[1].coid
+ cache_notice(revlist_key, entries)
return entries
def revlist_items(repo, oid, names):
# For now, don't worry about the possibility of the contents being
# "too big" for the cache.
- entries = cache_get(oid + b':r')
+ revlist_key = b'rvl:' + oid
+ entries = cache_get(revlist_key)
if not entries:
entries = cache_commit(repo, oid)
assert repo
assert S_ISDIR(item_mode(item))
item_t = type(item)
-
if item_t in real_tree_types:
it = repo.cat(item.oid.encode('hex'))
- _, obj_type, size = next(it)
+ _, obj_t, size = next(it)
data = ''.join(it)
- if obj_type == 'tree':
- if want_meta:
- item_gen = tree_items_with_meta(repo, item.oid, data, names)
- else:
- item_gen = tree_items(item.oid, data, names)
- elif obj_type == 'commit':
- if want_meta:
- item_gen = tree_items_with_meta(repo, item.oid, tree_data, names)
- else:
- item_gen = tree_items(item.oid, tree_data, names)
- else:
+ if obj_t != 'tree':
for _ in it: pass
- raise Exception('unexpected git ' + obj_type)
+ # Note: it shouldn't be possible to see an Item with type
+ # 'commit' since a 'commit' should always produce a Commit.
+ raise Exception('unexpected git ' + obj_t)
+ if want_meta:
+ item_gen = tree_items_with_meta(repo, item.oid, data, names)
+ else:
+ item_gen = tree_items(item.oid, data, names)
elif item_t == RevList:
item_gen = revlist_items(repo, item.oid, names)
elif item_t == Root:
for x in item_gen:
yield x
-def _resolve_path(repo, path, parent=None, want_meta=True, deref=False):
+def _resolve_path(repo, path, parent=None, want_meta=True, follow=True):
+ cache_key = b'res:%d%d%d:%s\0%s' \
+ % (bool(want_meta), bool(follow), repo.id(),
+ ('/'.join(x[0] for x in parent) if parent else ''),
+ '/'.join(path))
+ resolution = cache_get(cache_key)
+ if resolution:
+ return resolution
+
+ def notice_resolution(r):
+ cache_notice(cache_key, r)
+ return r
+
def raise_dir_required_but_not_dir(path, parent, past):
raise IOError(ENOTDIR,
"path %r%s resolves to non-directory %r"
% (parent,))
is_absolute, must_be_dir, future = _decompose_path(path)
if must_be_dir:
- deref = True
+ follow = True
if not future: # path was effectively '.' or '/'
if is_absolute:
- return (('', _root),)
+ return notice_resolution((('', _root),))
if parent:
- return tuple(parent)
- return [('', _root)]
+ return notice_resolution(tuple(parent))
+ return notice_resolution((('', _root),))
if is_absolute:
past = [('', _root)]
else:
if not future:
if must_be_dir and not S_ISDIR(item_mode(past[-1][1])):
raise_dir_required_but_not_dir(path, parent, past)
- return tuple(past)
+ return notice_resolution(tuple(past))
segment = future.pop()
if segment == '..':
assert len(past) > 0
past[-1] = parent_name, parent_item
if not item:
past.append((segment, None),)
- return tuple(past)
+ return notice_resolution(tuple(past))
mode = item_mode(item)
if not S_ISLNK(mode):
if not S_ISDIR(mode):
terminus=past)
if must_be_dir:
raise_dir_required_but_not_dir(path, parent, past)
- return tuple(past)
+ return notice_resolution(tuple(past))
# It's treeish
if want_meta and type(item) in real_tree_types:
dir_meta = _find_treeish_oid_metadata(repo, item.oid)
item = item._replace(meta=dir_meta)
past.append((segment, item))
else: # symlink
- if not future and not deref:
+ if not future and not follow:
past.append((segment, item),)
continue
if hops > 100:
is_absolute, _, target_future = _decompose_path(target)
if is_absolute:
if not target_future: # path was effectively '/'
- return (('', _root),)
+ return notice_resolution((('', _root),))
past = [('', _root)]
future = target_future
else:
future.extend(target_future)
hops += 1
-def lresolve(repo, path, parent=None, want_meta=True):
- """Perform exactly the same function as resolve(), except if the final
- path element is a symbolic link, don't follow it, just return it
- in the result.
-
- """
- return _resolve_path(repo, path, parent=parent, want_meta=want_meta,
- deref=False)
-
-def resolve(repo, path, parent=None, want_meta=True):
+def resolve(repo, path, parent=None, want_meta=True, follow=True):
"""Follow the path in the virtual filesystem and return a tuple
representing the location, if any, denoted by the path. Each
element in the result tuple will be (name, info), where info will
be a VFS item that can be passed to functions like item_mode().
+ If follow is false, and if the final path element is a symbolic
+ link, don't follow it, just return it in the result.
+
If a path segment that does not exist is encountered during
resolution, the result will represent the location of the missing
item, and that item in the result will be None.
needed, make a copy via item.meta.copy() and modify that instead.
"""
+ if repo.is_remote():
+ # Redirect to the more efficient remote version
+ return repo.resolve(path, parent=parent, want_meta=want_meta,
+ follow=follow)
result = _resolve_path(repo, path, parent=parent, want_meta=want_meta,
- deref=True)
+ follow=follow)
_, leaf_item = result[-1]
- if leaf_item:
+ if leaf_item and follow:
assert not S_ISLNK(item_mode(leaf_item))
return result
def try_resolve(repo, path, parent=None, want_meta=True):
"""If path does not refer to a symlink, does not exist, or refers to a
- valid symlink, behave exactly like resolve(). If path refers to
- an invalid symlink, behave like lresolve.
+ valid symlink, behave exactly like resolve(..., follow=True). If
+ path refers to an invalid symlink, behave like resolve(...,
+ follow=False).
"""
- res = lresolve(repo, path, parent=parent, want_meta=want_meta)
+ res = resolve(repo, path, parent=parent, want_meta=want_meta, follow=False)
leaf_name, leaf_item = res[-1]
if not leaf_item:
return res
if not S_ISLNK(item_mode(leaf_item)):
return res
- deref = resolve(repo, leaf_name, parent=res[:-1], want_meta=want_meta)
- deref_name, deref_item = deref[-1]
- if deref_item:
- return deref
+ follow = resolve(repo, leaf_name, parent=res[:-1], want_meta=want_meta)
+ follow_name, follow_item = follow[-1]
+ if follow_item:
+ return follow
return res
def augment_item_meta(repo, item, include_size=False):
meta.mode = m
meta.uid = meta.gid = meta.atime = meta.mtime = meta.ctime = 0
if S_ISLNK(m):
- target = _readlink(repo, item.oid)
+ if isinstance(item, FakeLink):
+ target = item.target
+ else:
+ target = _readlink(repo, item.oid)
meta.symlink_target = target
meta.size = len(target)
elif include_size: