1 """Virtual File System interface to bup repository content.
3 This module provides a path-based interface to the content of a bup
6 The VFS is structured like this:
9 /SAVE-NAME/SAVE-DATE/...
12 Each path is represented by an item that has least an item.meta which
13 may be either a Metadata object, or an integer mode. Functions like
14 item_mode() and item_size() will return the mode and size in either
15 case. Any item.meta Metadata instances must not be modified directly.
16 Make a copy to modify via item.meta.copy() if needed, or call
19 The want_meta argument is advisory for calls that accept it, and it
20 may not be honored. Callers must be able to handle an item.meta value
21 that is either an instance of Metadata or an integer mode, perhaps
22 via item_mode() or augment_item_meta().
24 Setting want_meta=False is rarely desirable since it can limit the VFS
25 to only the metadata that git itself can represent, and so for
26 example, fifos and sockets will appear to be regular files
27 (e.g. S_ISREG(item_mode(item)) will be true). But the option is still
28 provided because it may be more efficient when just the path names or
29 the more limited metadata is sufficient.
31 Any given metadata object's size may be None, in which case the size
32 can be computed via item_size() or augment_item_meta(...,
35 When traversing a directory using functions like contents(), the meta
36 value for any directories other than '.' will be a default directory
37 mode, not a Metadata object. This is because the actual metadata for
38 a directory is stored inside the directory (see
39 fill_in_metadata_if_dir() or ensure_item_has_metadata()).
41 Commit items represent commits (e.g. /.tag/some-commit or
42 /foo/latest), and for most purposes, they appear as the underlying
43 tree. S_ISDIR(item_mode(item)) will return true for both tree Items
44 and Commits and the commit's oid is the tree hash; the commit hash is
49 from __future__ import absolute_import, print_function
50 from collections import namedtuple
51 from errno import EINVAL, ELOOP, ENOENT, ENOTDIR
52 from itertools import chain, dropwhile, groupby, tee
53 from random import randrange
54 from stat import S_IFDIR, S_IFLNK, S_IFREG, S_ISDIR, S_ISLNK, S_ISREG
55 from time import localtime, strftime
56 import exceptions, re, sys
58 from bup import client, git, metadata
59 from bup.compat import range
60 from bup.git import BUP_CHUNKED, cp, get_commit_items, parse_commit, tree_decode
61 from bup.helpers import debug2, last
62 from bup.metadata import Metadata
63 from bup.repo import LocalRepo, RemoteRepo
66 class IOError(exceptions.IOError):
67 def __init__(self, errno, message, terminus=None):
68 exceptions.IOError.__init__(self, errno, message)
69 self.terminus = terminus
71 default_file_mode = S_IFREG | 0o644
72 default_dir_mode = S_IFDIR | 0o755
73 default_symlink_mode = S_IFLNK | 0o755
75 def _default_mode_for_gitmode(gitmode):
77 return default_file_mode
79 return default_dir_mode
81 return default_symlink_mode
82 raise Exception('unexpected git mode ' + oct(gitmode))
84 def _normal_or_chunked_file_size(repo, oid):
85 """Return the size of the normal or chunked file indicated by oid."""
86 # FIXME: --batch-format CatPipe?
87 it = repo.cat(oid.encode('hex'))
88 _, obj_t, size = next(it)
90 while obj_t == 'tree':
91 mode, name, last_oid = last(tree_decode(''.join(it)))
93 it = repo.cat(last_oid.encode('hex'))
94 _, obj_t, size = next(it)
95 return ofs + sum(len(b) for b in it)
97 def _skip_chunks_before_offset(tree, offset):
98 prev_ent = next(tree, None)
103 ent_ofs = int(ent[1], 16)
105 return chain([prev_ent, ent], tree)
106 if ent_ofs == offset:
107 return chain([ent], tree)
111 def _tree_chunks(repo, tree, startofs):
112 "Tree should be a sequence of (name, mode, hash) as per tree_decode()."
113 assert(startofs >= 0)
114 # name is the chunk's hex offset in the original file
115 for mode, name, oid in _skip_chunks_before_offset(tree, startofs):
117 skipmore = startofs - ofs
120 it = repo.cat(oid.encode('hex'))
121 _, obj_t, size = next(it)
124 assert obj_t == 'tree'
125 for b in _tree_chunks(repo, tree_decode(data), skipmore):
128 assert obj_t == 'blob'
129 yield data[skipmore:]
132 def __init__(self, repo, oid, startofs):
133 it = repo.cat(oid.encode('hex'))
134 _, obj_t, size = next(it)
135 isdir = obj_t == 'tree'
138 self.it = _tree_chunks(repo, tree_decode(data), startofs)
142 self.blob = data[startofs:]
145 def next(self, size):
147 while len(out) < size:
148 if self.it and not self.blob:
150 self.blob = self.it.next()
151 except StopIteration:
154 want = size - len(out)
155 out += self.blob[:want]
156 self.blob = self.blob[want:]
159 debug2('next(%d) returned %d\n' % (size, len(out)))
163 class _FileReader(object):
164 def __init__(self, repo, oid, known_size=None):
165 assert len(oid) == 20
170 self._size = known_size
172 def _compute_size(self):
174 self._size = _normal_or_chunked_file_size(self._repo, self.oid)
178 if ofs < 0 or ofs > self._compute_size():
179 raise IOError(EINVAL, 'Invalid seek offset: %d' % ofs)
185 def read(self, count=-1):
186 size = self._compute_size()
190 count = size - self.ofs
191 if not self.reader or self.reader.ofs != self.ofs:
192 self.reader = _ChunkReader(self._repo, self.oid, self.ofs)
194 buf = self.reader.next(count)
197 raise # our offsets will be all screwed up otherwise
206 def __exit__(self, type, value, traceback):
210 _multiple_slashes_rx = re.compile(r'//+')
212 def _decompose_path(path):
213 """Return a boolean indicating whether the path is absolute, and a
214 reversed list of path elements, omitting any occurrences of "."
215 and ignoring any leading or trailing slash. If the path is
216 effectively '/' or '.', return an empty list.
219 path = re.sub(_multiple_slashes_rx, '/', path)
221 return True, True, []
222 is_absolute = must_be_dir = False
223 if path.startswith('/'):
226 for suffix in ('/', '/.'):
227 if path.endswith(suffix):
229 path = path[:-len(suffix)]
230 parts = [x for x in path.split('/') if x != '.']
233 must_be_dir = True # e.g. path was effectively '.' or '/', etc.
234 return is_absolute, must_be_dir, parts
237 Item = namedtuple('Item', ('meta', 'oid'))
238 Chunky = namedtuple('Chunky', ('meta', 'oid'))
239 FakeLink = namedtuple('FakeLink', ('meta', 'target'))
240 Root = namedtuple('Root', ('meta'))
241 Tags = namedtuple('Tags', ('meta'))
242 RevList = namedtuple('RevList', ('meta', 'oid'))
243 Commit = namedtuple('Commit', ('meta', 'oid', 'coid'))
245 item_types = frozenset((Item, Chunky, Root, Tags, RevList, Commit))
246 real_tree_types = frozenset((Item, Commit))
248 _root = Root(meta=default_dir_mode)
249 _tags = Tags(meta=default_dir_mode)
254 ### A general purpose shared cache with (currently) cheap random
255 ### eviction. At the moment there is no weighting so a single commit
256 ### item is just as likely to be evicted as an entire "rev-list". See
257 ### is_valid_cache_key for a description of the expected content.
261 _cache_max_items = 30000
264 global _cache, _cache_keys
268 def is_valid_cache_key(x):
269 """Return logically true if x looks like it could be a valid cache key
270 (with respect to structure). Current valid cache entries:
271 res:... -> resolution
273 rvl:OID -> {'.', commit, '2012...', next_commit, ...}
275 # Suspect we may eventually add "(container_oid, name) -> ...", and others.
279 if tag in ('itm:', 'rvl:') and len(x) == 24:
286 if not is_valid_cache_key(key):
287 raise Exception('invalid cache key: ' + repr(key))
288 return _cache.get(key)
290 def cache_notice(key, value):
291 global _cache, _cache_keys, _cache_max_items
292 if not is_valid_cache_key(key):
293 raise Exception('invalid cache key: ' + repr(key))
296 if len(_cache) < _cache_max_items:
297 _cache_keys.append(key)
300 victim_i = randrange(0, len(_cache_keys))
301 victim = _cache_keys[victim_i]
303 _cache_keys[victim_i] = key
306 def cache_get_commit_item(oid, need_meta=True):
307 """Return the requested tree item if it can be found in the cache.
308 When need_meta is true don't return a cached item that only has a
310 # tree might be stored independently, or as '.' with its entries.
311 commit_key = b'itm:' + oid
312 item = cache_get(commit_key)
316 if isinstance(item.meta, Metadata):
318 entries = cache_get(b'rvl:' + oid)
322 def cache_get_revlist_item(oid, need_meta=True):
323 commit = cache_get_commit_item(oid, need_meta=need_meta)
325 return RevList(oid=oid, meta=commit.meta)
328 """Return a completely independent copy of item, such that
329 modifications will not affect the original.
332 meta = getattr(item, 'meta', None)
333 if isinstance(meta, Metadata):
334 return(item._replace(meta=meta.copy()))
338 """Return the integer mode (stat st_mode) for item."""
340 if isinstance(m, Metadata):
344 def _read_dir_meta(bupm):
345 # This is because save writes unmodified Metadata() entries for
346 # fake parents -- test-save-strip-graft.sh demonstrates.
347 m = Metadata.read(bupm)
349 return default_dir_mode
350 assert m.mode is not None
355 def tree_data_and_bupm(repo, oid):
356 """Return (tree_bytes, bupm_oid) where bupm_oid will be None if the
357 tree has no metadata (i.e. older bup save, or non-bup tree).
360 assert len(oid) == 20
361 it = repo.cat(oid.encode('hex'))
362 _, item_t, size = next(it)
364 if item_t == 'commit':
365 commit = parse_commit(data)
366 it = repo.cat(commit.tree)
367 _, item_t, size = next(it)
369 assert item_t == 'tree'
370 elif item_t != 'tree':
371 raise Exception('%r is not a tree or commit' % oid.encode('hex'))
372 for _, mangled_name, sub_oid in tree_decode(data):
373 if mangled_name == '.bupm':
375 if mangled_name > '.bupm':
379 def _find_treeish_oid_metadata(repo, oid):
380 """Return the metadata for the tree or commit oid, or None if the tree
381 has no metadata (i.e. older bup save, or non-bup tree).
384 tree_data, bupm_oid = tree_data_and_bupm(repo, oid)
386 with _FileReader(repo, bupm_oid) as meta_stream:
387 return _read_dir_meta(meta_stream)
390 def _readlink(repo, oid):
391 return ''.join(repo.join(oid.encode('hex')))
393 def readlink(repo, item):
394 """Return the link target of item, which must be a symlink. Reads the
395 target from the repository if necessary."""
397 assert S_ISLNK(item_mode(item))
398 if isinstance(item.meta, Metadata):
399 target = item.meta.symlink_target
402 elif isinstance(item, FakeLink):
404 return _readlink(repo, item.oid)
406 def _compute_item_size(repo, item):
407 mode = item_mode(item)
409 size = _normal_or_chunked_file_size(repo, item.oid)
412 return len(_readlink(repo, item.oid))
415 def item_size(repo, item):
416 """Return the size of item, computing it if necessary."""
418 if isinstance(m, Metadata) and m.size is not None:
420 return _compute_item_size(repo, item)
422 def tree_data_reader(repo, oid):
423 """Return an open reader for all of the data contained within oid. If
424 oid refers to a tree, recursively concatenate all of its contents."""
425 return _FileReader(repo, oid)
427 def fopen(repo, item):
428 """Return an open reader for the given file item."""
429 assert S_ISREG(item_mode(item))
430 return tree_data_reader(repo, item.oid)
432 def _commit_item_from_data(oid, data):
433 info = parse_commit(data)
434 return Commit(meta=default_dir_mode,
435 oid=info.tree.decode('hex'),
438 def _commit_item_from_oid(repo, oid, require_meta):
439 commit = cache_get_commit_item(oid, need_meta=require_meta)
440 if commit and ((not require_meta) or isinstance(commit.meta, Metadata)):
442 it = repo.cat(oid.encode('hex'))
443 _, typ, size = next(it)
444 assert typ == 'commit'
445 commit = _commit_item_from_data(oid, ''.join(it))
447 meta = _find_treeish_oid_metadata(repo, commit.oid)
449 commit = commit._replace(meta=meta)
450 commit_key = b'itm:' + oid
451 cache_notice(commit_key, commit)
454 def _revlist_item_from_oid(repo, oid, require_meta):
455 commit = _commit_item_from_oid(repo, oid, require_meta)
456 return RevList(oid=oid, meta=commit.meta)
458 def root_items(repo, names=None, want_meta=True):
459 """Yield (name, item) for the items in '/' in the VFS. Return
460 everything if names is logically false, otherwise return only
461 items with a name in the collection.
464 # FIXME: what about non-leaf refs like 'refs/heads/foo/bar/baz?
470 # FIXME: maybe eventually support repo.clone() or something
471 # and pass in two repos, so we can drop the tuple() and stream
472 # in parallel (i.e. meta vs refs).
473 for name, oid in tuple(repo.refs([], limit_to_heads=True)):
474 assert(name.startswith('refs/heads/'))
475 yield name[11:], _revlist_item_from_oid(repo, oid, want_meta)
483 if ref in ('.', '.tag'):
485 it = repo.cat('refs/heads/' + ref)
486 oidx, typ, size = next(it)
490 assert typ == 'commit'
491 commit = parse_commit(''.join(it))
492 yield ref, _revlist_item_from_oid(repo, oidx.decode('hex'), want_meta)
494 def ordered_tree_entries(tree_data, bupm=None):
495 """Yields (name, mangled_name, kind, gitmode, oid) for each item in
496 tree, sorted by name.
499 # Sadly, the .bupm entries currently aren't in git tree order,
500 # i.e. they don't account for the fact that git sorts trees
501 # (including our chunked trees) as if their names ended with "/",
502 # so "fo" sorts after "fo." iff fo is a directory. This makes
503 # streaming impossible when we need the metadata.
504 def result_from_tree_entry(tree_entry):
505 gitmode, mangled_name, oid = tree_entry
506 name, kind = git.demangle_name(mangled_name, gitmode)
507 return name, mangled_name, kind, gitmode, oid
509 tree_ents = (result_from_tree_entry(x) for x in tree_decode(tree_data))
511 tree_ents = sorted(tree_ents, key=lambda x: x[0])
512 for ent in tree_ents:
515 def tree_items(oid, tree_data, names=frozenset(), bupm=None):
517 def tree_item(ent_oid, kind, gitmode):
518 if kind == BUP_CHUNKED:
519 meta = Metadata.read(bupm) if bupm else default_file_mode
520 return Chunky(oid=ent_oid, meta=meta)
523 # No metadata here (accessable via '.' inside ent_oid).
524 return Item(meta=default_dir_mode, oid=ent_oid)
526 return Item(oid=ent_oid,
527 meta=(Metadata.read(bupm) if bupm \
528 else _default_mode_for_gitmode(gitmode)))
530 assert len(oid) == 20
532 dot_meta = _read_dir_meta(bupm) if bupm else default_dir_mode
533 yield '.', Item(oid=oid, meta=dot_meta)
534 tree_entries = ordered_tree_entries(tree_data, bupm)
535 for name, mangled_name, kind, gitmode, ent_oid in tree_entries:
536 if mangled_name == '.bupm':
539 yield name, tree_item(ent_oid, kind, gitmode)
542 # Assumes the tree is properly formed, i.e. there are no
543 # duplicates, and entries will be in git tree order.
544 if type(names) not in (frozenset, set):
545 names = frozenset(names)
546 remaining = len(names)
548 # Account for the bupm sort order issue (cf. ordered_tree_entries above)
549 last_name = max(names) if bupm else max(names) + '/'
552 dot_meta = _read_dir_meta(bupm) if bupm else default_dir_mode
553 yield '.', Item(oid=oid, meta=dot_meta)
558 tree_entries = ordered_tree_entries(tree_data, bupm)
559 for name, mangled_name, kind, gitmode, ent_oid in tree_entries:
560 if mangled_name == '.bupm':
563 if name not in names:
565 break # given bupm sort order, we're finished
566 if (kind == BUP_CHUNKED or not S_ISDIR(gitmode)) and bupm:
569 yield name, tree_item(ent_oid, kind, gitmode)
574 def tree_items_with_meta(repo, oid, tree_data, names):
575 # For now, the .bupm order doesn't quite match git's, and we don't
576 # load the tree data incrementally anyway, so we just work in RAM
578 assert len(oid) == 20
580 for _, mangled_name, sub_oid in tree_decode(tree_data):
581 if mangled_name == '.bupm':
582 bupm = _FileReader(repo, sub_oid)
584 if mangled_name > '.bupm':
586 for item in tree_items(oid, tree_data, names, bupm):
589 _save_name_rx = re.compile(r'^\d\d\d\d-\d\d-\d\d-\d{6}(-\d+)?$')
591 def _reverse_suffix_duplicates(strs):
592 """Yields the elements of strs, with any runs of duplicate values
593 suffixed with -N suffixes, where the zero padded integer N
594 decreases to 0 by 1 (e.g. 10, 09, ..., 00).
597 for name, duplicates in groupby(strs):
598 ndup = len(tuple(duplicates))
602 ndig = len(str(ndup - 1))
603 fmt = '%s-' + '%0' + str(ndig) + 'd'
604 for i in range(ndup - 1, -1, -1):
605 yield fmt % (name, i)
608 items = f.readline().split(None)
609 assert len(items) == 2
610 tree, auth_sec = items
611 return tree.decode('hex'), int(auth_sec)
613 def _name_for_rev(rev):
614 commit_oidx, (tree_oid, utc) = rev
615 return strftime('%Y-%m-%d-%H%M%S', localtime(utc))
617 def _item_for_rev(rev):
618 commit_oidx, (tree_oid, utc) = rev
619 coid = commit_oidx.decode('hex')
620 item = cache_get_commit_item(coid, need_meta=False)
623 item = Commit(meta=default_dir_mode, oid=tree_oid, coid=coid)
624 commit_key = b'itm:' + coid
625 cache_notice(commit_key, item)
628 def cache_commit(repo, oid):
629 """Build, cache, and return a "name -> commit_item" dict of the entire
633 # For now, always cache with full metadata
635 entries['.'] = _revlist_item_from_oid(repo, oid, True)
636 revs = repo.rev_list((oid.encode('hex'),), format='%T %at',
638 rev_items, rev_names = tee(revs)
639 revs = None # Don't disturb the tees
640 rev_names = _reverse_suffix_duplicates(_name_for_rev(x) for x in rev_names)
641 rev_items = (_item_for_rev(x) for x in rev_items)
643 for item in rev_items:
644 name = next(rev_names)
645 tip = tip or (name, item)
647 entries['latest'] = FakeLink(meta=default_symlink_mode, target=tip[0])
648 revlist_key = b'rvl:' + tip[1].coid
649 cache_notice(revlist_key, entries)
652 def revlist_items(repo, oid, names):
653 assert len(oid) == 20
655 # Special case '.' instead of caching the whole history since it's
656 # the only way to get the metadata for the commit.
657 if names and all(x == '.' for x in names):
658 yield '.', _revlist_item_from_oid(repo, oid, True)
661 # For now, don't worry about the possibility of the contents being
662 # "too big" for the cache.
663 revlist_key = b'rvl:' + oid
664 entries = cache_get(revlist_key)
666 entries = cache_commit(repo, oid)
669 for name in sorted(entries.keys()):
670 yield name, entries[name]
673 names = frozenset(name for name in names
674 if _save_name_rx.match(name) or name in ('.', 'latest'))
677 yield '.', entries['.']
678 for name in (n for n in names if n != '.'):
679 commit = entries.get(name)
683 def tags_items(repo, names):
687 assert len(oid) == 20
688 oidx = oid.encode('hex')
690 _, typ, size = next(it)
692 return cache_get_commit_item(oid, need_meta=False) \
693 or _commit_item_from_data(oid, ''.join(it))
696 return Item(meta=default_file_mode, oid=oid)
698 return Item(meta=default_dir_mode, oid=oid)
699 raise Exception('unexpected tag type ' + typ + ' for tag ' + name)
703 # We have to pull these all into ram because tag_item calls cat()
704 for name, oid in tuple(repo.refs(names, limit_to_tags=True)):
705 assert(name.startswith('refs/tags/'))
707 yield name, tag_item(oid)
710 # Assumes no duplicate refs
711 if type(names) not in (frozenset, set):
712 names = frozenset(names)
713 remaining = len(names)
714 last_name = max(names)
721 for name, oid in repo.refs(names, limit_to_tags=True):
722 assert(name.startswith('refs/tags/'))
726 if name not in names:
728 yield name, tag_item(oid)
733 def contents(repo, item, names=None, want_meta=True):
734 """Yields information about the items contained in item. Yields
735 (name, item) for each name in names, if the name exists, in an
736 unspecified order. If there are no names, then yields (name,
737 item) for all items, including, a first item named '.'
738 representing the container itself.
740 The meta value for any directories other than '.' will be a
741 default directory mode, not a Metadata object. This is because
742 the actual metadata for a directory is stored inside the directory
743 (see fill_in_metadata_if_dir() or ensure_item_has_metadata()).
745 Note that want_meta is advisory. For any given item, item.meta
746 might be a Metadata instance or a mode, and if the former,
747 meta.size might be None. Missing sizes can be computed via via
748 item_size() or augment_item_meta(..., include_size=True).
750 Do not modify any item.meta Metadata instances directly. If
751 needed, make a copy via item.meta.copy() and modify that instead.
754 # Q: are we comfortable promising '.' first when no names?
757 assert S_ISDIR(item_mode(item))
759 if item_t in real_tree_types:
760 it = repo.cat(item.oid.encode('hex'))
761 _, obj_t, size = next(it)
765 # Note: it shouldn't be possible to see an Item with type
766 # 'commit' since a 'commit' should always produce a Commit.
767 raise Exception('unexpected git ' + obj_t)
769 item_gen = tree_items_with_meta(repo, item.oid, data, names)
771 item_gen = tree_items(item.oid, data, names)
772 elif item_t == RevList:
773 item_gen = revlist_items(repo, item.oid, names)
775 item_gen = root_items(repo, names, want_meta)
777 item_gen = tags_items(repo, names)
779 raise Exception('unexpected VFS item ' + str(item))
783 def _resolve_path(repo, path, parent=None, want_meta=True, deref=False):
784 cache_key = b'res:%d%d%d:%s\0%s' \
785 % (bool(want_meta), bool(deref), repo.id(),
786 ('/'.join(x[0] for x in parent) if parent else ''),
788 resolution = cache_get(cache_key)
792 def notice_resolution(r):
793 cache_notice(cache_key, r)
796 def raise_dir_required_but_not_dir(path, parent, past):
797 raise IOError(ENOTDIR,
798 "path %r%s resolves to non-directory %r"
800 ' (relative to %r)' % parent if parent else '',
809 assert type(x[0]) in (bytes, str)
810 assert type(x[1]) in item_types
811 assert parent[0][1] == _root
812 if not S_ISDIR(item_mode(parent[-1][1])):
813 raise IOError(ENOTDIR,
814 'path resolution parent %r is not a directory'
816 is_absolute, must_be_dir, future = _decompose_path(path)
819 if not future: # path was effectively '.' or '/'
821 return notice_resolution((('', _root),))
823 return notice_resolution(tuple(parent))
824 return notice_resolution((('', _root),))
828 past = list(parent) if parent else [('', _root)]
832 if must_be_dir and not S_ISDIR(item_mode(past[-1][1])):
833 raise_dir_required_but_not_dir(path, parent, past)
834 return notice_resolution(tuple(past))
835 segment = future.pop()
838 if len(past) > 1: # .. from / is /
839 assert S_ISDIR(item_mode(past[-1][1]))
842 parent_name, parent_item = past[-1]
843 wanted = (segment,) if not want_meta else ('.', segment)
844 items = tuple(contents(repo, parent_item, names=wanted,
845 want_meta=want_meta))
847 item = items[0][1] if items else None
848 else: # First item will be '.' and have the metadata
849 item = items[1][1] if len(items) == 2 else None
850 dot, dot_item = items[0]
852 past[-1] = parent_name, parent_item
854 past.append((segment, None),)
855 return notice_resolution(tuple(past))
856 mode = item_mode(item)
857 if not S_ISLNK(mode):
858 if not S_ISDIR(mode):
859 past.append((segment, item),)
861 raise IOError(ENOTDIR,
862 'path %r%s ends internally in non-directory here: %r'
864 ' (relative to %r)' % parent if parent else '',
868 raise_dir_required_but_not_dir(path, parent, past)
869 return notice_resolution(tuple(past))
871 if want_meta and type(item) in real_tree_types:
872 dir_meta = _find_treeish_oid_metadata(repo, item.oid)
874 item = item._replace(meta=dir_meta)
875 past.append((segment, item))
877 if not future and not deref:
878 past.append((segment, item),)
882 'too many symlinks encountered while resolving %r%s'
883 % (path, ' relative to %r' % parent if parent else ''),
884 terminus=tuple(past + [(segment, item)]))
885 target = readlink(repo, item)
886 is_absolute, _, target_future = _decompose_path(target)
888 if not target_future: # path was effectively '/'
889 return notice_resolution((('', _root),))
891 future = target_future
893 future.extend(target_future)
896 def lresolve(repo, path, parent=None, want_meta=True):
897 """Perform exactly the same function as resolve(), except if the final
898 path element is a symbolic link, don't follow it, just return it
902 return _resolve_path(repo, path, parent=parent, want_meta=want_meta,
905 def resolve(repo, path, parent=None, want_meta=True):
906 """Follow the path in the virtual filesystem and return a tuple
907 representing the location, if any, denoted by the path. Each
908 element in the result tuple will be (name, info), where info will
909 be a VFS item that can be passed to functions like item_mode().
911 If a path segment that does not exist is encountered during
912 resolution, the result will represent the location of the missing
913 item, and that item in the result will be None.
915 Any attempt to traverse a non-directory will raise a VFS ENOTDIR
918 Any symlinks along the path, including at the end, will be
919 resolved. A VFS IOError with the errno attribute set to ELOOP
920 will be raised if too many symlinks are traversed while following
921 the path. That exception is effectively like a normal
922 ELOOP IOError exception, but will include a terminus element
923 describing the location of the failure, which will be a tuple of
924 (name, info) elements.
926 The parent, if specified, must be a sequence of (name, item)
927 tuples, and will provide the starting point for the resolution of
928 the path. If no parent is specified, resolution will start at
931 The result may include elements of parent directly, so they must
932 not be modified later. If this is a concern, pass in "name,
933 copy_item(item) for name, item in parent" instead.
935 When want_meta is true, detailed metadata will be included in each
936 result item if it's avaiable, otherwise item.meta will be an
937 integer mode. The metadata size may or may not be provided, but
938 can be computed by item_size() or augment_item_meta(...,
939 include_size=True). Setting want_meta=False is rarely desirable
940 since it can limit the VFS to just the metadata git itself can
941 represent, and so, as an example, fifos and sockets will appear to
942 be regular files (e.g. S_ISREG(item_mode(item)) will be true) .
943 But the option is provided because it may be more efficient when
944 only the path names or the more limited metadata is sufficient.
946 Do not modify any item.meta Metadata instances directly. If
947 needed, make a copy via item.meta.copy() and modify that instead.
950 result = _resolve_path(repo, path, parent=parent, want_meta=want_meta,
952 _, leaf_item = result[-1]
954 assert not S_ISLNK(item_mode(leaf_item))
957 def try_resolve(repo, path, parent=None, want_meta=True):
958 """If path does not refer to a symlink, does not exist, or refers to a
959 valid symlink, behave exactly like resolve(). If path refers to
960 an invalid symlink, behave like lresolve.
963 res = lresolve(repo, path, parent=parent, want_meta=want_meta)
964 leaf_name, leaf_item = res[-1]
967 if not S_ISLNK(item_mode(leaf_item)):
969 deref = resolve(repo, leaf_name, parent=res[:-1], want_meta=want_meta)
970 deref_name, deref_item = deref[-1]
975 def augment_item_meta(repo, item, include_size=False):
976 """Ensure item has a Metadata instance for item.meta. If item.meta is
977 currently a mode, replace it with a compatible "fake" Metadata
978 instance. If include_size is true, ensure item.meta.size is
979 correct, computing it if needed. If item.meta is a Metadata
980 instance, this call may modify it in place or replace it.
983 # If we actually had parallelism, we'd need locking...
986 if isinstance(m, Metadata):
987 if include_size and m.size is None:
988 m.size = _compute_item_size(repo, item)
989 return item._replace(meta=m)
994 meta.uid = meta.gid = meta.atime = meta.mtime = meta.ctime = 0
996 if isinstance(item, FakeLink):
999 target = _readlink(repo, item.oid)
1000 meta.symlink_target = target
1001 meta.size = len(target)
1003 meta.size = _compute_item_size(repo, item)
1004 return item._replace(meta=meta)
1006 def fill_in_metadata_if_dir(repo, item):
1007 """If item is a directory and item.meta is not a Metadata instance,
1008 attempt to find the metadata for the directory. If found, return
1009 a new item augmented to include that metadata. Otherwise, return
1010 item. May be useful for the output of contents().
1013 if S_ISDIR(item_mode(item)) and not isinstance(item.meta, Metadata):
1014 items = tuple(contents(repo, item, ('.',), want_meta=True))
1015 assert len(items) == 1
1016 assert items[0][0] == '.'
1020 def ensure_item_has_metadata(repo, item, include_size=False):
1021 """If item is a directory, attempt to find and add its metadata. If
1022 the item still doesn't have a Metadata instance for item.meta,
1023 give it one via augment_item_meta(). May be useful for the output
1027 return augment_item_meta(repo,
1028 fill_in_metadata_if_dir(repo, item),
1029 include_size=include_size)