When traversing a directory using functions like contents(), the meta
value for any directories other than '.' will be a default directory
mode, not a Metadata object. This is because the actual metadata for
-a directory is stored inside the directory.
+a directory is stored inside the directory (see
+fill_in_metadata_if_dir() or ensure_item_has_metadata()).
-At the moment tagged commits (e.g. /.tag/some-commit) are represented
-as an item that is indistinguishable from a normal directory, so you
-cannot assume that the oid of an item satisfying
-S_ISDIR(item_mode(item)) refers to a tree.
+Commit items represent commits (e.g. /.tag/some-commit or
+/foo/latest), and for most purposes, they appear as the underlying
+tree. S_ISDIR(item_mode(item)) will return true for both tree Items
+and Commits and the commit's oid is the tree hash; the commit hash is
+item.coid.
"""
from __future__ import print_function
from collections import namedtuple
from errno import ELOOP, ENOENT, ENOTDIR
-from itertools import chain, dropwhile, izip
+from itertools import chain, dropwhile, groupby, izip, tee
from stat import S_IFDIR, S_IFLNK, S_IFREG, S_ISDIR, S_ISLNK, S_ISREG
from time import localtime, strftime
import exceptions, re, sys
class IOError(exceptions.IOError):
- def __init__(self, errno, message):
+ def __init__(self, errno, message, terminus=None):
exceptions.IOError.__init__(self, errno, message)
-
-class Loop(IOError):
- def __init__(self, message, terminus=None):
- IOError.__init__(self, ELOOP, message)
self.terminus = terminus
default_file_mode = S_IFREG | 0o644
_multiple_slashes_rx = re.compile(r'//+')
def _decompose_path(path):
- """Return a reversed list of path elements, omitting any occurrences
- of "." and ignoring any leading or trailing slash."""
+ """Return a boolean indicating whether the path is absolute, and a
+ reversed list of path elements, omitting any occurrences of "."
+ and ignoring any leading or trailing slash. If the path is
+ effectively '/' or '.', return an empty list.
+
+ """
path = re.sub(_multiple_slashes_rx, '/', path)
+ if path == '/':
+ return True, True, []
+ is_absolute = must_be_dir = False
if path.startswith('/'):
+ is_absolute = True
path = path[1:]
- if path.endswith('/'):
- path = path[:-1]
- result = [x for x in path.split('/') if x != '.']
- result.reverse()
- return result
+ for suffix in ('/', '/.'):
+ if path.endswith(suffix):
+ must_be_dir = True
+ path = path[:-len(suffix)]
+ parts = [x for x in path.split('/') if x != '.']
+ parts.reverse()
+ if not parts:
+ must_be_dir = True # e.g. path was effectively '.' or '/', etc.
+ return is_absolute, must_be_dir, parts
Item = namedtuple('Item', ('meta', 'oid'))
Root = namedtuple('Root', ('meta'))
Tags = namedtuple('Tags', ('meta'))
RevList = namedtuple('RevList', ('meta', 'oid'))
+Commit = namedtuple('Commit', ('meta', 'oid', 'coid'))
+
+item_types = frozenset((Item, Chunky, Root, Tags, RevList, Commit))
+real_tree_types = frozenset((Item, Commit))
_root = Root(meta=default_dir_mode)
_tags = Tags(meta=default_dir_mode)
+
def copy_item(item):
"""Return a completely independent copy of item, such that
modifications will not affect the original.
m.size = 0
return m
-def _tree_data_and_bupm(repo, oid):
+def tree_data_and_bupm(repo, oid):
"""Return (tree_bytes, bupm_oid) where bupm_oid will be None if the
tree has no metadata (i.e. older bup save, or non-bup tree).
break
return data, None
-def _find_dir_item_metadata(repo, item):
- """Return the metadata for the tree or commit item, or None if the
- tree has no metadata (i.e. older bup save, or non-bup tree).
+def _find_treeish_oid_metadata(repo, oid):
+ """Return the metadata for the tree or commit oid, or None if the tree
+ has no metadata (i.e. older bup save, or non-bup tree).
"""
- tree_data, bupm_oid = _tree_data_and_bupm(repo, item.oid)
+ tree_data, bupm_oid = tree_data_and_bupm(repo, oid)
if bupm_oid:
with _FileReader(repo, bupm_oid) as meta_stream:
return _read_dir_meta(meta_stream)
assert S_ISREG(item_mode(item))
return _FileReader(repo, item.oid)
-def augment_item_meta(repo, item, include_size=False):
- """Ensure item has a Metadata instance for item.meta. If item.meta is
- currently a mode, replace it with a compatible "fake" Metadata
- instance. If include_size is true, ensure item.meta.size is
- correct, computing it if needed. If item.meta is a Metadata
- instance, this call may modify it in place or replace it.
+def _commit_item_from_data(oid, data):
+ info = parse_commit(data)
+ return Commit(meta=default_dir_mode,
+ oid=info.tree.decode('hex'),
+ coid=oid)
- """
- # If we actually had parallelism, we'd need locking...
- assert repo
- m = item.meta
- if isinstance(m, Metadata):
- if include_size and m.size is None:
- m.size = _compute_item_size(repo, item)
- return item._replace(meta=m)
- return item
- # m is mode
- meta = Metadata()
- meta.mode = m
- meta.uid = meta.gid = meta.atime = meta.mtime = meta.ctime = 0
- if S_ISLNK(m):
- target = _readlink(repo, item.oid)
- meta.symlink_target = target
- meta.size = len(target)
- elif include_size:
- meta.size = _compute_item_size(repo, item)
- return item._replace(meta=meta)
-
-def _commit_meta_from_auth_sec(author_sec):
- m = Metadata()
- m.mode = default_dir_mode
- m.uid = m.gid = m.size = 0
- m.atime = m.mtime = m.ctime = author_sec * 10**9
- return m
-
-def _commit_meta_from_oidx(repo, oidx):
- it = repo.cat(oidx)
+def _commit_item_from_oid(repo, oid, require_meta):
+ it = repo.cat(oid.encode('hex'))
_, typ, size = next(it)
assert typ == 'commit'
- author_sec = parse_commit(''.join(it)).author_sec
- return _commit_meta_from_auth_sec(author_sec)
+ commit = _commit_item_from_data(oid, ''.join(it))
+ if require_meta:
+ meta = _find_treeish_oid_metadata(repo, commit.tree)
+ if meta:
+ commit = commit._replace(meta=meta)
+ return commit
+
+def _revlist_item_from_oid(repo, oid, require_meta):
+ if require_meta:
+ meta = _find_treeish_oid_metadata(repo, oid) or default_dir_mode
+ else:
+ meta = default_dir_mode
+ return RevList(oid=oid, meta=meta)
def parse_rev_auth_secs(f):
tree, author_secs = f.readline().split(None, 2)
# in parallel (i.e. meta vs refs).
for name, oid in tuple(repo.refs([], limit_to_heads=True)):
assert(name.startswith('refs/heads/'))
- name = name[11:]
- m = _commit_meta_from_oidx(repo, oid.encode('hex'))
- yield name, RevList(meta=m, oid=oid)
+ yield name[11:], _revlist_item_from_oid(repo, oid, False)
return
if '.' in names:
continue
assert typ == 'commit'
commit = parse_commit(''.join(it))
- yield ref, RevList(meta=_commit_meta_from_auth_sec(commit.author_sec),
- oid=oidx.decode('hex'))
+ yield ref, _revlist_item_from_oid(repo, oidx.decode('hex'), False)
def ordered_tree_entries(tree_data, bupm=None):
"""Yields (name, mangled_name, kind, gitmode, oid) for each item in
continue
assert name != '.'
if name not in names:
- if bupm:
- if (name + '/') > last_name:
- break # given git sort order, we're finished
- else:
- if name > last_name:
- break # given bupm sort order, we're finished
+ if name > last_name:
+ break # given bupm sort order, we're finished
if (kind == BUP_CHUNKED or not S_ISDIR(gitmode)) and bupm:
Metadata.read(bupm)
continue
for item in tree_items(oid, tree_data, names, bupm):
yield item
-_save_name_rx = re.compile(r'^\d\d\d\d-\d\d-\d\d-\d{6}$')
+_save_name_rx = re.compile(r'^\d\d\d\d-\d\d-\d\d-\d{6}(-\d+)?$')
+def _reverse_suffix_duplicates(strs):
+ """Yields the elements of strs, with any runs of duplicate values
+ suffixed with -N suffixes, where the zero padded integer N
+ decreases to 0 by 1 (e.g. 10, 09, ..., 00).
+
+ """
+ for name, duplicates in groupby(strs):
+ ndup = len(tuple(duplicates))
+ if ndup == 1:
+ yield name
+ else:
+ ndig = len(str(ndup - 1))
+ fmt = '%s-' + '%0' + str(ndig) + 'd'
+ for i in xrange(ndup - 1, -1, -1):
+ yield fmt % (name, i)
+
+def _name_for_rev(rev):
+ commit, (tree_oidx, utc) = rev
+ assert len(commit) == 40
+ return strftime('%Y-%m-%d-%H%M%S', localtime(utc))
+
+def _item_for_rev(rev):
+ commit, (tree_oidx, utc) = rev
+ assert len(commit) == 40
+ assert len(tree_oidx) == 40
+ return Commit(meta=default_dir_mode,
+ oid=tree_oidx.decode('hex'),
+ coid=commit.decode('hex'))
+
def revlist_items(repo, oid, names):
assert len(oid) == 20
oidx = oid.encode('hex')
-
- # There might well be duplicate names in this dir (time resolution is secs)
names = frozenset(name for name in (names or tuple()) \
if _save_name_rx.match(name) or name in ('.', 'latest'))
-
# Do this before we open the rev_list iterator so we're not nesting
if (not names) or ('.' in names):
- yield '.', RevList(oid=oid, meta=_commit_meta_from_oidx(repo, oidx))
-
+ yield '.', _revlist_item_from_oid(repo, oid, True)
+
revs = repo.rev_list((oidx,), format='%T %at', parse=parse_rev_auth_secs)
- first_rev = next(revs, None)
- revs = chain((first_rev,), revs)
+ rev_items, rev_names = tee(revs)
+ revs = None # Don't disturb the tees
+ rev_names = _reverse_suffix_duplicates(_name_for_rev(x) for x in rev_names)
+ rev_items = (_item_for_rev(x) for x in rev_items)
+ first_commit = None
if not names:
- for commit, (tree_oidx, utc) in revs:
- assert len(tree_oidx) == 40
- name = strftime('%Y-%m-%d-%H%M%S', localtime(utc))
- yield name, Item(meta=default_dir_mode, oid=tree_oidx.decode('hex'))
- if first_rev:
- commit, (tree_oidx, utc) = first_rev
- yield 'latest', Item(meta=default_dir_mode,
- oid=tree_oidx.decode('hex'))
+ for item in rev_items:
+ first_commit = first_commit or item
+ yield next(rev_names), item
+ yield 'latest', first_commit
return
# Revs are in reverse chronological order by default
last_name = min(names)
- for commit, (tree_oidx, utc) in revs:
- assert len(tree_oidx) == 40
- name = strftime('%Y-%m-%d-%H%M%S', localtime(utc))
+ for item in rev_items:
+ first_commit = first_commit or item
+ name = next(rev_names) # Might have -N dup suffix
if name < last_name:
break
if not name in names:
continue
- yield name, Item(meta=default_dir_mode, oid=tree_oidx.decode('hex'))
+ yield name, item
# FIXME: need real short circuit...
- for _ in revs:
- pass
+ for _ in rev_items: pass
+ for _ in rev_names: pass
- if first_rev and 'latest' in names:
- commit, (tree_oidx, utc) = first_rev
- yield 'latest', Item(meta=default_dir_mode, oid=tree_oidx.decode('hex'))
+ if 'latest' in names:
+ yield 'latest', first_commit
def tags_items(repo, names):
global _tags
it = repo.cat(oidx)
_, typ, size = next(it)
if typ == 'commit':
- tree_oid = parse_commit(''.join(it)).tree.decode('hex')
- assert len(tree_oid) == 20
- # FIXME: more efficient/bulk?
- return RevList(meta=_commit_meta_from_oidx(repo, oidx), oid=oid)
+ return _commit_item_from_data(oid, ''.join(it))
for _ in it: pass
if typ == 'blob':
return Item(meta=default_file_mode, oid=oid)
item) for all items, including, a first item named '.'
representing the container itself.
- Any given name might produce more than one result. For example,
- saves to a branch that happen within the same second currently end
- up with the same VFS timestmap, i.e. /foo/2017-09-10-150833/.
+ The meta value for any directories other than '.' will be a
+ default directory mode, not a Metadata object. This is because
+ the actual metadata for a directory is stored inside the directory
+ (see fill_in_metadata_if_dir() or ensure_item_has_metadata()).
Note that want_meta is advisory. For any given item, item.meta
might be a Metadata instance or a mode, and if the former,
"""
# Q: are we comfortable promising '.' first when no names?
+ global _root, _tags
assert repo
assert S_ISDIR(item_mode(item))
item_t = type(item)
- if item_t == Item:
+
+ if item_t in real_tree_types:
it = repo.cat(item.oid.encode('hex'))
_, obj_type, size = next(it)
data = ''.join(it)
else:
item_gen = tree_items(item.oid, data, names)
elif obj_type == 'commit':
- tree_oidx = parse_commit(data).tree
- it = repo.cat(tree_oidx)
- _, obj_type, size = next(it)
- assert obj_type == 'tree'
- tree_data = ''.join(it)
if want_meta:
- item_gen = tree_items_with_meta(repo, tree_oidx.decode('hex'),
- tree_data, names)
+ item_gen = tree_items_with_meta(repo, item.oid, tree_data, names)
else:
- item_gen = tree_items(tree_oidx.decode('hex'), tree_data, names)
+ item_gen = tree_items(item.oid, tree_data, names)
else:
for _ in it: pass
raise Exception('unexpected git ' + obj_type)
yield x
def _resolve_path(repo, path, parent=None, want_meta=True, deref=False):
+ def raise_dir_required_but_not_dir(path, parent, past):
+ raise IOError(ENOTDIR,
+ "path %r%s resolves to non-directory %r"
+ % (path,
+ ' (relative to %r)' % parent if parent else '',
+ past),
+ terminus=past)
+ global _root
assert repo
assert len(path)
- global _root
- future = _decompose_path(path)
- past = []
- if path.startswith('/'):
- assert(not parent)
- past = [('', _root)]
- if future == ['']: # path was effectively '/'
- return tuple(past)
- if not past and not parent:
- past = [('', _root)]
if parent:
- past = [parent]
+ for x in parent:
+ assert len(x) == 2
+ assert type(x[0]) in (bytes, str)
+ assert type(x[1]) in item_types
+ assert parent[0][1] == _root
+ if not S_ISDIR(item_mode(parent[-1][1])):
+ raise IOError(ENOTDIR,
+ 'path resolution parent %r is not a directory'
+ % (parent,))
+ is_absolute, must_be_dir, future = _decompose_path(path)
+ if must_be_dir:
+ deref = True
+ if not future: # path was effectively '.' or '/'
+ if is_absolute:
+ return (('', _root),)
+ if parent:
+ return tuple(parent)
+ return [('', _root)]
+ if is_absolute:
+ past = [('', _root)]
+ else:
+ past = list(parent) if parent else [('', _root)]
hops = 0
- result = None
while True:
+ if not future:
+ if must_be_dir and not S_ISDIR(item_mode(past[-1][1])):
+ raise_dir_required_but_not_dir(path, parent, past)
+ return tuple(past)
segment = future.pop()
if segment == '..':
+ assert len(past) > 0
if len(past) > 1: # .. from / is /
+ assert S_ISDIR(item_mode(past[-1][1]))
past.pop()
else:
parent_name, parent_item = past[-1]
assert dot == '.'
past[-1] = parent_name, parent_item
if not item:
- return tuple(past + [(segment, None)])
+ past.append((segment, None),)
+ return tuple(past)
mode = item_mode(item)
if not S_ISLNK(mode):
if not S_ISDIR(mode):
- assert(not future)
- return tuple(past + [(segment, item)])
+ past.append((segment, item),)
+ if future:
+ raise IOError(ENOTDIR,
+ 'path %r%s ends internally in non-directory here: %r'
+ % (path,
+ ' (relative to %r)' % parent if parent else '',
+ past),
+ terminus=past)
+ if must_be_dir:
+ raise_dir_required_but_not_dir(path, parent, past)
+ return tuple(past)
# It's treeish
- if want_meta and type(item) == Item:
- dir_meta = _find_dir_item_metadata(repo, item)
+ if want_meta and type(item) in real_tree_types:
+ dir_meta = _find_treeish_oid_metadata(repo, item.oid)
if dir_meta:
item = item._replace(meta=dir_meta)
- if not future:
- return tuple(past + [(segment, item)])
past.append((segment, item))
- else: # symlink
+ else: # symlink
if not future and not deref:
- return tuple(past + [(segment, item)])
+ past.append((segment, item),)
+ continue
+ if hops > 100:
+ raise IOError(ELOOP,
+ 'too many symlinks encountered while resolving %r%s'
+ % (path, ' relative to %r' % parent if parent else ''),
+ terminus=tuple(past + [(segment, item)]))
target = readlink(repo, item)
- target_future = _decompose_path(target)
- if target.startswith('/'):
- future = target_future
+ is_absolute, _, target_future = _decompose_path(target)
+ if is_absolute:
+ if not target_future: # path was effectively '/'
+ return (('', _root),)
past = [('', _root)]
- if target_future == ['']: # path was effectively '/'
- return tuple(past)
+ future = target_future
else:
- future = future + target_future
+ future.extend(target_future)
hops += 1
- if hops > 100:
- raise Loop('too many symlinks encountered while resolving %r%s'
- % (path,
- 'relative to %r' % parent if parent else ''))
def lresolve(repo, path, parent=None, want_meta=True):
- """Perform exactly the same function as resolve(), except if the
- final path element is a symbolic link, don't follow it, just
- return it in the result."""
+ """Perform exactly the same function as resolve(), except if the final
+ path element is a symbolic link, don't follow it, just return it
+ in the result.
+
+ """
return _resolve_path(repo, path, parent=parent, want_meta=want_meta,
deref=False)
-
def resolve(repo, path, parent=None, want_meta=True):
"""Follow the path in the virtual filesystem and return a tuple
resolution, the result will represent the location of the missing
item, and that item in the result will be None.
+ Any attempt to traverse a non-directory will raise a VFS ENOTDIR
+ IOError exception.
+
Any symlinks along the path, including at the end, will be
- resolved. A Loop exception will be raised if too many symlinks
- are traversed whiile following the path. raised if too many
- symlinks are traversed while following the path. That exception
- is effectively like a normal ELOOP IOError exception, but will
- include a terminus element describing the location of the failure,
- which will be a tuple of (name, info) elements.
-
- Currently, a path ending in '/' will still resolve if it exists,
- even if not a directory. The parent, if specified, must be a
- (name, item) tuple, and will provide the starting point for the
- resolution of the path. Currently, the path must be relative when
- a parent is provided. The result may include parent directly, so
- it must not be modified later. If this is a concern, pass in
- copy_item(parent) instead.
+ resolved. A VFS IOError with the errno attribute set to ELOOP
+ will be raised if too many symlinks are traversed while following
+ the path. That exception is effectively like a normal
+ ELOOP IOError exception, but will include a terminus element
+ describing the location of the failure, which will be a tuple of
+ (name, info) elements.
+
+ The parent, if specified, must be a sequence of (name, item)
+ tuples, and will provide the starting point for the resolution of
+ the path. If no parent is specified, resolution will start at
+ '/'.
+
+ The result may include elements of parent directly, so they must
+ not be modified later. If this is a concern, pass in "name,
+ copy_item(item) for name, item in parent" instead.
When want_meta is true, detailed metadata will be included in each
result item if it's avaiable, otherwise item.meta will be an
needed, make a copy via item.meta.copy() and modify that instead.
"""
- return _resolve_path(repo, path, parent=parent, want_meta=want_meta,
- deref=True)
+ result = _resolve_path(repo, path, parent=parent, want_meta=want_meta,
+ deref=True)
+ _, leaf_item = result[-1]
+ if leaf_item:
+ assert not S_ISLNK(item_mode(leaf_item))
+ return result
+
+def augment_item_meta(repo, item, include_size=False):
+ """Ensure item has a Metadata instance for item.meta. If item.meta is
+ currently a mode, replace it with a compatible "fake" Metadata
+ instance. If include_size is true, ensure item.meta.size is
+ correct, computing it if needed. If item.meta is a Metadata
+ instance, this call may modify it in place or replace it.
+
+ """
+ # If we actually had parallelism, we'd need locking...
+ assert repo
+ m = item.meta
+ if isinstance(m, Metadata):
+ if include_size and m.size is None:
+ m.size = _compute_item_size(repo, item)
+ return item._replace(meta=m)
+ return item
+ # m is mode
+ meta = Metadata()
+ meta.mode = m
+ meta.uid = meta.gid = meta.atime = meta.mtime = meta.ctime = 0
+ if S_ISLNK(m):
+ target = _readlink(repo, item.oid)
+ meta.symlink_target = target
+ meta.size = len(target)
+ elif include_size:
+ meta.size = _compute_item_size(repo, item)
+ return item._replace(meta=meta)
+
+def fill_in_metadata_if_dir(repo, item):
+ """If item is a directory and item.meta is not a Metadata instance,
+ attempt to find the metadata for the directory. If found, return
+ a new item augmented to include that metadata. Otherwise, return
+ item. May be useful for the output of contents().
+
+ """
+ if S_ISDIR(item_mode(item)) and not isinstance(item.meta, Metadata):
+ items = tuple(contents(repo, item, ('.',), want_meta=True))
+ assert len(items) == 1
+ assert items[0][0] == '.'
+ item = items[0][1]
+ return item
+
+def ensure_item_has_metadata(repo, item, include_size=False):
+ """If item is a directory, attempt to find and add its metadata. If
+ the item still doesn't have a Metadata instance for item.meta,
+ give it one via augment_item_meta(). May be useful for the output
+ of contents().
+
+ """
+ return augment_item_meta(repo,
+ fill_in_metadata_if_dir(repo, item),
+ include_size=include_size)