from __future__ import absolute_import, print_function
from binascii import hexlify, unhexlify
from collections import namedtuple
-from errno import EINVAL, ELOOP, ENOENT, ENOTDIR
-from itertools import chain, dropwhile, groupby, tee
+from errno import EINVAL, ELOOP, ENOTDIR
+from itertools import chain, groupby, tee
from random import randrange
from stat import S_IFDIR, S_IFLNK, S_IFREG, S_ISDIR, S_ISLNK, S_ISREG
from time import localtime, strftime
import re, sys
-from bup import git, metadata, vint
-from bup.compat import hexstr, range
-from bup.git import BUP_CHUNKED, cp, get_commit_items, parse_commit, tree_decode
-from bup.helpers import debug2, last
+from bup import git, vint
+from bup.compat import hexstr, pending_raise
+from bup.git import BUP_CHUNKED, parse_commit, tree_decode
+from bup.helpers import debug2, last, nullcontext_if_not
from bup.io import path_msg
from bup.metadata import Metadata
from bup.vint import read_bvec, write_bvec
class _FileReader(object):
def __init__(self, repo, oid, known_size=None):
assert len(oid) == 20
+ self.closed = False
self.oid = oid
self.ofs = 0
self.reader = None
if not self._size:
self._size = _normal_or_chunked_file_size(self._repo, self.oid)
return self._size
-
+
def seek(self, ofs):
if ofs < 0 or ofs > self._compute_size():
raise IOError(EINVAL, 'Invalid seek offset: %d' % ofs)
return buf
def close(self):
- pass
+ self.closed = True
+
+ def __del__(self):
+ assert self.closed
def __enter__(self):
return self
+
def __exit__(self, type, value, traceback):
- self.close()
- return False
+ with pending_raise(value, rethrow=False):
+ self.close()
_multiple_slashes_rx = re.compile(br'//+')
if not parts:
must_be_dir = True # e.g. path was effectively '.' or '/', etc.
return is_absolute, must_be_dir, parts
-
+
Item = namedtuple('Item', ('meta', 'oid'))
Chunky = namedtuple('Chunky', ('meta', 'oid'))
RevList = namedtuple('RevList', ('meta', 'oid'))
Commit = namedtuple('Commit', ('meta', 'oid', 'coid'))
-item_types = frozenset((Item, Chunky, Root, Tags, RevList, Commit))
-real_tree_types = frozenset((Item, Commit))
+item_types = (Item, Chunky, Root, Tags, RevList, Commit)
+real_tree_types = (Item, Commit)
def write_item(port, item):
kind = type(item)
rvl:OID -> {'.', commit, '2012...', next_commit, ...}
"""
# Suspect we may eventually add "(container_oid, name) -> ...", and others.
- x_t = type(x)
- if x_t is bytes:
+ if isinstance(x, bytes):
tag = x[:4]
if tag in (b'itm:', b'rvl:') and len(x) == 24:
return True
if tag == b'res:':
return True
+ return False
def cache_get(key):
global _cache
raise Exception('invalid cache key: ' + repr(key))
return _cache.get(key)
-def cache_notice(key, value):
+def cache_notice(key, value, overwrite=False):
global _cache, _cache_keys, _cache_max_items
if not is_valid_cache_key(key):
raise Exception('invalid cache key: ' + repr(key))
if key in _cache:
+ if overwrite:
+ _cache[key] = value
return
if len(_cache) < _cache_max_items:
_cache_keys.append(key)
_cache_keys[victim_i] = key
_cache[key] = value
+def _has_metadata_if_needed(item, need_meta):
+ if not need_meta:
+ return True
+ if isinstance(item.meta, Metadata):
+ return True
+ return False
+
def cache_get_commit_item(oid, need_meta=True):
"""Return the requested tree item if it can be found in the cache.
When need_meta is true don't return a cached item that only has a
commit_key = b'itm:' + oid
item = cache_get(commit_key)
if item:
- if not need_meta:
- return item
- if isinstance(item.meta, Metadata):
+ if _has_metadata_if_needed(item, need_meta):
return item
entries = cache_get(b'rvl:' + oid)
if entries:
- return entries[b'.']
-
-def cache_get_revlist_item(oid, need_meta=True):
- commit = cache_get_commit_item(oid, need_meta=need_meta)
- if commit:
- return RevList(oid=oid, meta=commit.meta)
+ item = entries[b'.']
+ if _has_metadata_if_needed(item, need_meta):
+ return item
+ return None
def copy_item(item):
"""Return a completely independent copy of item, such that
"""Return (tree_bytes, bupm_oid) where bupm_oid will be None if the
tree has no metadata (i.e. older bup save, or non-bup tree).
- """
+ """
assert len(oid) == 20
it = repo.cat(hexlify(oid))
_, item_t, size = next(it)
if meta:
commit = commit._replace(meta=meta)
commit_key = b'itm:' + oid
- cache_notice(commit_key, commit)
+ cache_notice(commit_key, commit, overwrite=True)
return commit
def _revlist_item_from_oid(repo, oid, require_meta):
"""
# Sadly, the .bupm entries currently aren't in git tree order,
- # i.e. they don't account for the fact that git sorts trees
- # (including our chunked trees) as if their names ended with "/",
- # so "fo" sorts after "fo." iff fo is a directory. This makes
- # streaming impossible when we need the metadata.
+ # but in unmangled name order. They _do_ account for the fact
+ # that git sorts trees (including chunked trees) as if their
+ # names ended with "/" (so "fo" sorts after "fo." iff fo is a
+ # directory), but we apply this on the unmangled names in save
+ # rather than on the mangled names.
+ # This makes streaming impossible when we need the metadata.
def result_from_tree_entry(tree_entry):
gitmode, mangled_name, oid = tree_entry
name, kind = git.demangle_name(mangled_name, gitmode)
tree_ents = sorted(tree_ents, key=lambda x: x[0])
for ent in tree_ents:
yield ent
-
+
def tree_items(oid, tree_data, names=frozenset(), bupm=None):
def tree_item(ent_oid, kind, gitmode):
# Assumes the tree is properly formed, i.e. there are no
# duplicates, and entries will be in git tree order.
- if type(names) not in (frozenset, set):
+ if isinstance(names, (frozenset, set)):
names = frozenset(names)
remaining = len(names)
break
if mangled_name > b'.bupm':
break
- for item in tree_items(oid, tree_data, names, bupm):
- yield item
+ with nullcontext_if_not(bupm):
+ for item in tree_items(oid, tree_data, names, bupm):
+ yield item
_save_name_rx = re.compile(br'^\d\d\d\d-\d\d-\d\d-\d{6}(-\d+)?$')
-
+
def _reverse_suffix_duplicates(strs):
"""Yields the elements of strs, with any runs of duplicate values
suffixed with -N suffixes, where the zero padded integer N
cache_notice(commit_key, item)
return item
-def cache_commit(repo, oid):
+# non-string singleton
+_HAS_META_ENTRY = object()
+
+def cache_commit(repo, oid, require_meta=True):
"""Build, cache, and return a "name -> commit_item" dict of the entire
commit rev-list.
"""
- # For now, always cache with full metadata
entries = {}
- entries[b'.'] = _revlist_item_from_oid(repo, oid, True)
+ entries[b'.'] = _revlist_item_from_oid(repo, oid, require_meta)
revs = repo.rev_list((hexlify(oid),), format=b'%T %at',
parse=parse_rev)
rev_items, rev_names = tee(revs)
entries[name] = item
entries[b'latest'] = FakeLink(meta=default_symlink_mode, target=tip[0])
revlist_key = b'rvl:' + tip[1].coid
- cache_notice(revlist_key, entries)
+ entries[_HAS_META_ENTRY] = require_meta
+ cache_notice(revlist_key, entries, overwrite=True)
return entries
-def revlist_items(repo, oid, names):
+def revlist_items(repo, oid, names, require_meta=True):
assert len(oid) == 20
# Special case '.' instead of caching the whole history since it's
# the only way to get the metadata for the commit.
if names and all(x == b'.' for x in names):
- yield b'.', _revlist_item_from_oid(repo, oid, True)
+ yield b'.', _revlist_item_from_oid(repo, oid, require_meta)
return
# For now, don't worry about the possibility of the contents being
# "too big" for the cache.
revlist_key = b'rvl:' + oid
entries = cache_get(revlist_key)
+ if entries and require_meta and not entries[_HAS_META_ENTRY]:
+ entries = None
if not entries:
- entries = cache_commit(repo, oid)
+ entries = cache_commit(repo, oid, require_meta)
if not names:
- for name in sorted(entries.keys()):
+ for name in sorted((n for n in entries.keys() if n != _HAS_META_ENTRY)):
yield name, entries[name]
return
if b'.' in names:
yield b'.', entries[b'.']
for name in (n for n in names if n != b'.'):
+ if name == _HAS_META_ENTRY:
+ continue
commit = entries.get(name)
if commit:
yield name, commit
def tag_item(oid):
assert len(oid) == 20
+ cached = cache_get_commit_item(oid, need_meta=False)
+ if cached:
+ return cached
oidx = hexlify(oid)
it = repo.cat(oidx)
_, typ, size = next(it)
if typ == b'commit':
- return cache_get_commit_item(oid, need_meta=False) \
- or _commit_item_from_data(oid, b''.join(it))
+ return _commit_item_from_data(oid, b''.join(it))
for _ in it: pass
if typ == b'blob':
return Item(meta=default_file_mode, oid=oid)
return
# Assumes no duplicate refs
- if type(names) not in (frozenset, set):
+ if isinstance(names, (frozenset, set)):
names = frozenset(names)
remaining = len(names)
last_name = max(names)
global _root, _tags
assert repo
assert S_ISDIR(item_mode(item))
- item_t = type(item)
- if item_t in real_tree_types:
+ if isinstance(item, real_tree_types):
it = repo.cat(hexlify(item.oid))
_, obj_t, size = next(it)
data = b''.join(it)
item_gen = tree_items_with_meta(repo, item.oid, data, names)
else:
item_gen = tree_items(item.oid, data, names)
- elif item_t == RevList:
- item_gen = revlist_items(repo, item.oid, names)
- elif item_t == Root:
+ elif isinstance(item, RevList):
+ item_gen = revlist_items(repo, item.oid, names,
+ require_meta=want_meta)
+ elif isinstance(item, Root):
item_gen = root_items(repo, names, want_meta)
- elif item_t == Tags:
+ elif isinstance(item, Tags):
item_gen = tags_items(repo, names)
else:
raise Exception('unexpected VFS item ' + str(item))
if parent:
for x in parent:
assert len(x) == 2
- assert type(x[0]) in (bytes, str)
- assert type(x[1]) in item_types
+ assert isinstance(x[0], (bytes, str))
+ assert isinstance(x[1], item_types)
assert parent[0][1] == _root
if not S_ISDIR(item_mode(parent[-1][1])):
raise IOError(ENOTDIR,
raise_dir_required_but_not_dir(path, parent, past)
return notice_resolution(tuple(past))
# It's treeish
- if want_meta and type(item) in real_tree_types:
+ if want_meta and isinstance(item, real_tree_types):
dir_meta = _find_treeish_oid_metadata(repo, item.oid)
if dir_meta:
item = item._replace(meta=dir_meta)
else:
future.extend(target_future)
hops += 1
-
+
def resolve(repo, path, parent=None, want_meta=True, follow=True):
"""Follow the path in the virtual filesystem and return a tuple
representing the location, if any, denoted by the path. Each
# m is mode
meta = Metadata()
meta.mode = m
- meta.uid = meta.gid = meta.atime = meta.mtime = meta.ctime = 0
+ meta.uid = meta.gid = None
+ meta.atime = meta.mtime = meta.ctime = 0
if S_ISLNK(m):
if isinstance(item, FakeLink):
target = item.target