may be either a Metadata object, or an integer mode. Functions like
item_mode() and item_size() will return the mode and size in either
case. Any item.meta Metadata instances must not be modified directly.
-Make a copy to modify via item.meta.copy() if needed.
+Make a copy to modify via item.meta.copy() if needed, or call
+copy_item().
The want_meta argument is advisory for calls that accept it, and it
may not be honored. Callers must be able to handle an item.meta value
from __future__ import absolute_import, print_function
from collections import namedtuple
-from errno import ELOOP, ENOENT, ENOTDIR
-from itertools import chain, dropwhile, groupby, izip, tee
+from errno import EINVAL, ELOOP, ENOENT, ENOTDIR
+from itertools import chain, dropwhile, groupby, tee
+from random import randrange
from stat import S_IFDIR, S_IFLNK, S_IFREG, S_ISDIR, S_ISLNK, S_ISREG
from time import localtime, strftime
import exceptions, re, sys
from bup import client, git, metadata
+from bup.compat import range
from bup.git import BUP_CHUNKED, cp, get_commit_items, parse_commit, tree_decode
from bup.helpers import debug2, last
from bup.metadata import Metadata
_, obj_t, size = next(it)
return ofs + sum(len(b) for b in it)
+def _skip_chunks_before_offset(tree, offset):
+ prev_ent = next(tree, None)
+ if not prev_ent:
+ return tree
+ ent = None
+ for ent in tree:
+ ent_ofs = int(ent[1], 16)
+ if ent_ofs > offset:
+ return chain([prev_ent, ent], tree)
+ if ent_ofs == offset:
+ return chain([ent], tree)
+ prev_ent = ent
+ return [prev_ent]
+
def _tree_chunks(repo, tree, startofs):
"Tree should be a sequence of (name, mode, hash) as per tree_decode()."
assert(startofs >= 0)
# name is the chunk's hex offset in the original file
- tree = dropwhile(lambda (_1, name, _2): int(name, 16) < startofs, tree)
- for mode, name, oid in tree:
+ for mode, name, oid in _skip_chunks_before_offset(tree, startofs):
ofs = int(name, 16)
skipmore = startofs - ofs
if skipmore < 0:
return self._size
def seek(self, ofs):
- if ofs < 0:
- raise IOError(errno.EINVAL, 'Invalid argument')
- if ofs > self._compute_size():
- raise IOError(errno.EINVAL, 'Invalid argument')
+ if ofs < 0 or ofs > self._compute_size():
+ raise IOError(EINVAL, 'Invalid seek offset: %d' % ofs)
self.ofs = ofs
def tell(self):
return self.ofs
def read(self, count=-1):
+ size = self._compute_size()
+ if self.ofs >= size:
+ return ''
if count < 0:
- count = self._compute_size() - self.ofs
+ count = size - self.ofs
if not self.reader or self.reader.ofs != self.ofs:
self.reader = _ChunkReader(self._repo, self.oid, self.ofs)
try:
Item = namedtuple('Item', ('meta', 'oid'))
Chunky = namedtuple('Chunky', ('meta', 'oid'))
+FakeLink = namedtuple('FakeLink', ('meta', 'target'))
Root = namedtuple('Root', ('meta'))
Tags = namedtuple('Tags', ('meta'))
RevList = namedtuple('RevList', ('meta', 'oid'))
### vfs cache
### A general purpose shared cache with (currently) cheap random
-### eviction. There is currently no weighting so a single commit item
-### is just as likely to be evicted as an entire "rev-list". See
+### eviction. At the moment there is no weighting so a single commit
+### item is just as likely to be evicted as an entire "rev-list". See
### is_valid_cache_key for a description of the expected content.
_cache = {}
def is_valid_cache_key(x):
"""Return logically true if x looks like it could be a valid cache key
(with respect to structure). Current valid cache entries:
- commit_oid -> commit
- commit_oid + ':r' -> rev-list
- i.e. rev-list -> {'.', commit, '2012...', next_commit, ...}
+ res:... -> resolution
+ itm:OID -> Commit
+ rvl:OID -> {'.', commit, '2012...', next_commit, ...}
"""
# Suspect we may eventually add "(container_oid, name) -> ...", and others.
x_t = type(x)
if x_t is bytes:
- if len(x) == 20:
+ tag = x[:4]
+ if tag in ('itm:', 'rvl:') and len(x) == 24:
return True
- if len(x) == 22 and x.endswith(b':r'):
+ if tag == 'res:':
return True
def cache_get(key):
global _cache
- assert is_valid_cache_key(key)
+ if not is_valid_cache_key(key):
+ raise Exception('invalid cache key: ' + repr(key))
return _cache.get(key)
def cache_notice(key, value):
global _cache, _cache_keys, _cache_max_items
- assert is_valid_cache_key(key)
+ if not is_valid_cache_key(key):
+ raise Exception('invalid cache key: ' + repr(key))
if key in _cache:
return
- _cache[key] = value
if len(_cache) < _cache_max_items:
+ _cache_keys.append(key)
+ _cache[key] = value
return
- victim_i = random.randrange(0, len(_cache_keys))
+ victim_i = randrange(0, len(_cache_keys))
victim = _cache_keys[victim_i]
+ del _cache[victim]
_cache_keys[victim_i] = key
- _cache.pop(victim)
-
+ _cache[key] = value
def cache_get_commit_item(oid, need_meta=True):
"""Return the requested tree item if it can be found in the cache.
When need_meta is true don't return a cached item that only has a
mode."""
# tree might be stored independently, or as '.' with its entries.
- item = cache_get(oid)
+ commit_key = b'itm:' + oid
+ item = cache_get(commit_key)
if item:
if not need_meta:
return item
if isinstance(item.meta, Metadata):
return item
- entries = cache_get(oid + b':r')
+ entries = cache_get(b'rvl:' + oid)
if entries:
return entries['.']
if commit:
return RevList(oid=oid, meta=commit.meta)
-
def copy_item(item):
"""Return a completely independent copy of item, such that
modifications will not affect the original.
"""
meta = getattr(item, 'meta', None)
- if not meta:
- return item
- return(item._replace(meta=meta.copy()))
+ if isinstance(meta, Metadata):
+ return(item._replace(meta=meta.copy()))
+ return item
def item_mode(item):
"""Return the integer mode (stat st_mode) for item."""
target = item.meta.symlink_target
if target:
return target
+ elif isinstance(item, FakeLink):
+ return item.target
return _readlink(repo, item.oid)
def _compute_item_size(repo, item):
meta = _find_treeish_oid_metadata(repo, commit.oid)
if meta:
commit = commit._replace(meta=meta)
- cache_notice(oid, commit)
+ commit_key = b'itm:' + oid
+ cache_notice(commit_key, commit)
return commit
def _revlist_item_from_oid(repo, oid, require_meta):
for ref in names:
if ref in ('.', '.tag'):
continue
- it = repo.cat(ref)
+ it = repo.cat('refs/heads/' + ref)
oidx, typ, size = next(it)
if not oidx:
for _ in it: pass
else:
ndig = len(str(ndup - 1))
fmt = '%s-' + '%0' + str(ndig) + 'd'
- for i in xrange(ndup - 1, -1, -1):
+ for i in range(ndup - 1, -1, -1):
yield fmt % (name, i)
def parse_rev(f):
if item:
return item
item = Commit(meta=default_dir_mode, oid=tree_oid, coid=coid)
- cache_notice(item.coid, item)
+ commit_key = b'itm:' + coid
+ cache_notice(commit_key, item)
return item
def cache_commit(repo, oid):
revs = None # Don't disturb the tees
rev_names = _reverse_suffix_duplicates(_name_for_rev(x) for x in rev_names)
rev_items = (_item_for_rev(x) for x in rev_items)
- latest = None
+ tip = None
for item in rev_items:
- latest = latest or item
name = next(rev_names)
+ tip = tip or (name, item)
entries[name] = item
- entries['latest'] = latest
- cache_notice(latest.coid + b':r', entries)
+ entries['latest'] = FakeLink(meta=default_symlink_mode, target=tip[0])
+ revlist_key = b'rvl:' + tip[1].coid
+ cache_notice(revlist_key, entries)
return entries
def revlist_items(repo, oid, names):
# For now, don't worry about the possibility of the contents being
# "too big" for the cache.
- entries = cache_get(oid + b':r')
+ revlist_key = b'rvl:' + oid
+ entries = cache_get(revlist_key)
if not entries:
entries = cache_commit(repo, oid)
assert repo
assert S_ISDIR(item_mode(item))
item_t = type(item)
-
if item_t in real_tree_types:
it = repo.cat(item.oid.encode('hex'))
- _, obj_type, size = next(it)
+ _, obj_t, size = next(it)
data = ''.join(it)
- if obj_type == 'tree':
- if want_meta:
- item_gen = tree_items_with_meta(repo, item.oid, data, names)
- else:
- item_gen = tree_items(item.oid, data, names)
- elif obj_type == 'commit':
- if want_meta:
- item_gen = tree_items_with_meta(repo, item.oid, tree_data, names)
- else:
- item_gen = tree_items(item.oid, tree_data, names)
- else:
+ if obj_t != 'tree':
for _ in it: pass
- raise Exception('unexpected git ' + obj_type)
+ # Note: it shouldn't be possible to see an Item with type
+ # 'commit' since a 'commit' should always produce a Commit.
+ raise Exception('unexpected git ' + obj_t)
+ if want_meta:
+ item_gen = tree_items_with_meta(repo, item.oid, data, names)
+ else:
+ item_gen = tree_items(item.oid, data, names)
elif item_t == RevList:
item_gen = revlist_items(repo, item.oid, names)
elif item_t == Root:
yield x
def _resolve_path(repo, path, parent=None, want_meta=True, deref=False):
+ cache_key = b'res:%d%d%d:%s\0%s' \
+ % (bool(want_meta), bool(deref), repo.id(),
+ ('/'.join(x[0] for x in parent) if parent else ''),
+ '/'.join(path))
+ resolution = cache_get(cache_key)
+ if resolution:
+ return resolution
+
+ def notice_resolution(r):
+ cache_notice(cache_key, r)
+ return r
+
def raise_dir_required_but_not_dir(path, parent, past):
raise IOError(ENOTDIR,
"path %r%s resolves to non-directory %r"
deref = True
if not future: # path was effectively '.' or '/'
if is_absolute:
- return (('', _root),)
+ return notice_resolution((('', _root),))
if parent:
- return tuple(parent)
- return [('', _root)]
+ return notice_resolution(tuple(parent))
+ return notice_resolution((('', _root),))
if is_absolute:
past = [('', _root)]
else:
if not future:
if must_be_dir and not S_ISDIR(item_mode(past[-1][1])):
raise_dir_required_but_not_dir(path, parent, past)
- return tuple(past)
+ return notice_resolution(tuple(past))
segment = future.pop()
if segment == '..':
assert len(past) > 0
past[-1] = parent_name, parent_item
if not item:
past.append((segment, None),)
- return tuple(past)
+ return notice_resolution(tuple(past))
mode = item_mode(item)
if not S_ISLNK(mode):
if not S_ISDIR(mode):
terminus=past)
if must_be_dir:
raise_dir_required_but_not_dir(path, parent, past)
- return tuple(past)
+ return notice_resolution(tuple(past))
# It's treeish
if want_meta and type(item) in real_tree_types:
dir_meta = _find_treeish_oid_metadata(repo, item.oid)
is_absolute, _, target_future = _decompose_path(target)
if is_absolute:
if not target_future: # path was effectively '/'
- return (('', _root),)
+ return notice_resolution((('', _root),))
past = [('', _root)]
future = target_future
else:
meta.mode = m
meta.uid = meta.gid = meta.atime = meta.mtime = meta.ctime = 0
if S_ISLNK(m):
- target = _readlink(repo, item.oid)
+ if isinstance(item, FakeLink):
+ target = item.target
+ else:
+ target = _readlink(repo, item.oid)
meta.symlink_target = target
meta.size = len(target)
elif include_size: