1 """Virtual File System interface to bup repository content.
3 This module provides a path-based interface to the content of a bup
6 The VFS is structured like this:
9 /SAVE-NAME/SAVE-DATE/...
12 Each path is represented by an item that has least an item.meta which
13 may be either a Metadata object, or an integer mode. Functions like
14 item_mode() and item_size() will return the mode and size in either
15 case. Any item.meta Metadata instances must not be modified directly.
16 Make a copy to modify via item.meta.copy() if needed, or call
19 The want_meta argument is advisory for calls that accept it, and it
20 may not be honored. Callers must be able to handle an item.meta value
21 that is either an instance of Metadata or an integer mode, perhaps
22 via item_mode() or augment_item_meta().
24 Setting want_meta=False is rarely desirable since it can limit the VFS
25 to only the metadata that git itself can represent, and so for
26 example, fifos and sockets will appear to be regular files
27 (e.g. S_ISREG(item_mode(item)) will be true). But the option is still
28 provided because it may be more efficient when just the path names or
29 the more limited metadata is sufficient.
31 Any given metadata object's size may be None, in which case the size
32 can be computed via item_size() or augment_item_meta(...,
35 When traversing a directory using functions like contents(), the meta
36 value for any directories other than '.' will be a default directory
37 mode, not a Metadata object. This is because the actual metadata for
38 a directory is stored inside the directory (see
39 fill_in_metadata_if_dir() or ensure_item_has_metadata()).
41 Commit items represent commits (e.g. /.tag/some-commit or
42 /foo/latest), and for most purposes, they appear as the underlying
43 tree. S_ISDIR(item_mode(item)) will return true for both tree Items
44 and Commits and the commit's oid is the tree hash; the commit hash is
49 from __future__ import absolute_import, print_function
50 from collections import namedtuple
51 from errno import EINVAL, ELOOP, ENOENT, ENOTDIR
52 from itertools import chain, dropwhile, groupby, tee
53 from random import randrange
54 from stat import S_IFDIR, S_IFLNK, S_IFREG, S_ISDIR, S_ISLNK, S_ISREG
55 from time import localtime, strftime
56 import exceptions, re, sys
58 from bup import git, metadata, vint
59 from bup.compat import range
60 from bup.git import BUP_CHUNKED, cp, get_commit_items, parse_commit, tree_decode
61 from bup.helpers import debug2, last
62 from bup.metadata import Metadata
63 from bup.vint import read_bvec, write_bvec
64 from bup.vint import read_vint, write_vint
65 from bup.vint import read_vuint, write_vuint
67 # We currently assume that it's always appropriate to just forward IOErrors
70 class IOError(exceptions.IOError):
71 def __init__(self, errno, message, terminus=None):
72 exceptions.IOError.__init__(self, errno, message)
73 self.terminus = terminus
75 def write_ioerror(port, ex):
76 assert isinstance(ex, IOError)
78 (1 if ex.errno is not None else 0)
79 | (2 if ex.message is not None else 0)
80 | (4 if ex.terminus is not None else 0))
81 if ex.errno is not None:
82 write_vint(port, ex.errno)
83 if ex.message is not None:
84 write_bvec(port, ex.message.encode('utf-8'))
85 if ex.terminus is not None:
86 write_resolution(port, ex.terminus)
88 def read_ioerror(port):
89 mask = read_vuint(port)
90 no = read_vint(port) if 1 & mask else None
91 msg = read_bvec(port).decode('utf-8') if 2 & mask else None
92 term = read_resolution(port) if 4 & mask else None
93 return IOError(errno=no, message=msg, terminus=term)
96 default_file_mode = S_IFREG | 0o644
97 default_dir_mode = S_IFDIR | 0o755
98 default_symlink_mode = S_IFLNK | 0o755
100 def _default_mode_for_gitmode(gitmode):
102 return default_file_mode
104 return default_dir_mode
106 return default_symlink_mode
107 raise Exception('unexpected git mode ' + oct(gitmode))
109 def _normal_or_chunked_file_size(repo, oid):
110 """Return the size of the normal or chunked file indicated by oid."""
111 # FIXME: --batch-format CatPipe?
112 it = repo.cat(oid.encode('hex'))
113 _, obj_t, size = next(it)
115 while obj_t == 'tree':
116 mode, name, last_oid = last(tree_decode(''.join(it)))
118 it = repo.cat(last_oid.encode('hex'))
119 _, obj_t, size = next(it)
120 return ofs + sum(len(b) for b in it)
122 def _skip_chunks_before_offset(tree, offset):
123 prev_ent = next(tree, None)
128 ent_ofs = int(ent[1], 16)
130 return chain([prev_ent, ent], tree)
131 if ent_ofs == offset:
132 return chain([ent], tree)
136 def _tree_chunks(repo, tree, startofs):
137 "Tree should be a sequence of (name, mode, hash) as per tree_decode()."
138 assert(startofs >= 0)
139 # name is the chunk's hex offset in the original file
140 for mode, name, oid in _skip_chunks_before_offset(tree, startofs):
142 skipmore = startofs - ofs
145 it = repo.cat(oid.encode('hex'))
146 _, obj_t, size = next(it)
149 assert obj_t == 'tree'
150 for b in _tree_chunks(repo, tree_decode(data), skipmore):
153 assert obj_t == 'blob'
154 yield data[skipmore:]
157 def __init__(self, repo, oid, startofs):
158 it = repo.cat(oid.encode('hex'))
159 _, obj_t, size = next(it)
160 isdir = obj_t == 'tree'
163 self.it = _tree_chunks(repo, tree_decode(data), startofs)
167 self.blob = data[startofs:]
170 def next(self, size):
172 while len(out) < size:
173 if self.it and not self.blob:
175 self.blob = self.it.next()
176 except StopIteration:
179 want = size - len(out)
180 out += self.blob[:want]
181 self.blob = self.blob[want:]
184 debug2('next(%d) returned %d\n' % (size, len(out)))
188 class _FileReader(object):
189 def __init__(self, repo, oid, known_size=None):
190 assert len(oid) == 20
195 self._size = known_size
197 def _compute_size(self):
199 self._size = _normal_or_chunked_file_size(self._repo, self.oid)
203 if ofs < 0 or ofs > self._compute_size():
204 raise IOError(EINVAL, 'Invalid seek offset: %d' % ofs)
210 def read(self, count=-1):
211 size = self._compute_size()
215 count = size - self.ofs
216 if not self.reader or self.reader.ofs != self.ofs:
217 self.reader = _ChunkReader(self._repo, self.oid, self.ofs)
219 buf = self.reader.next(count)
222 raise # our offsets will be all screwed up otherwise
231 def __exit__(self, type, value, traceback):
235 _multiple_slashes_rx = re.compile(r'//+')
237 def _decompose_path(path):
238 """Return a boolean indicating whether the path is absolute, and a
239 reversed list of path elements, omitting any occurrences of "."
240 and ignoring any leading or trailing slash. If the path is
241 effectively '/' or '.', return an empty list.
244 path = re.sub(_multiple_slashes_rx, '/', path)
246 return True, True, []
247 is_absolute = must_be_dir = False
248 if path.startswith('/'):
251 for suffix in ('/', '/.'):
252 if path.endswith(suffix):
254 path = path[:-len(suffix)]
255 parts = [x for x in path.split('/') if x != '.']
258 must_be_dir = True # e.g. path was effectively '.' or '/', etc.
259 return is_absolute, must_be_dir, parts
262 Item = namedtuple('Item', ('meta', 'oid'))
263 Chunky = namedtuple('Chunky', ('meta', 'oid'))
264 FakeLink = namedtuple('FakeLink', ('meta', 'target'))
265 Root = namedtuple('Root', ('meta'))
266 Tags = namedtuple('Tags', ('meta'))
267 RevList = namedtuple('RevList', ('meta', 'oid'))
268 Commit = namedtuple('Commit', ('meta', 'oid', 'coid'))
270 item_types = frozenset((Item, Chunky, Root, Tags, RevList, Commit))
271 real_tree_types = frozenset((Item, Commit))
273 def write_item(port, item):
275 name = bytes(kind.__name__)
277 has_meta = 1 if isinstance(meta, Metadata) else 0
278 if kind in (Item, Chunky, RevList):
279 assert len(item.oid) == 20
281 vint.send(port, 'sVs', name, has_meta, item.oid)
282 Metadata.write(meta, port, include_path=False)
284 vint.send(port, 'sVsV', name, has_meta, item.oid, item.meta)
285 elif kind in (Root, Tags):
287 vint.send(port, 'sV', name, has_meta)
288 Metadata.write(meta, port, include_path=False)
290 vint.send(port, 'sVV', name, has_meta, item.meta)
292 assert len(item.oid) == 20
293 assert len(item.coid) == 20
295 vint.send(port, 'sVss', name, has_meta, item.oid, item.coid)
296 Metadata.write(meta, port, include_path=False)
298 vint.send(port, 'sVssV', name, has_meta, item.oid, item.coid,
300 elif kind == FakeLink:
302 vint.send(port, 'sVs', name, has_meta, item.target)
303 Metadata.write(meta, port, include_path=False)
305 vint.send(port, 'sVsV', name, has_meta, item.target, item.meta)
310 def read_m(port, has_meta):
312 m = Metadata.read(port)
314 return read_vuint(port)
315 kind, has_meta = vint.recv(port, 'sV')
317 oid, meta = read_bvec(port), read_m(port, has_meta)
318 return Item(oid=oid, meta=meta)
319 if kind == b'Chunky':
320 oid, meta = read_bvec(port), read_m(port, has_meta)
321 return Chunky(oid=oid, meta=meta)
322 if kind == b'RevList':
323 oid, meta = read_bvec(port), read_m(port, has_meta)
324 return RevList(oid=oid, meta=meta)
326 return Root(meta=read_m(port, has_meta))
328 return Tags(meta=read_m(port, has_meta))
329 if kind == b'Commit':
330 oid, coid = vint.recv(port, 'ss')
331 meta = read_m(port, has_meta)
332 return Commit(oid=oid, coid=coid, meta=meta)
333 if kind == b'FakeLink':
334 target, meta = read_bvec(port), read_m(port, has_meta)
335 return FakeLink(target=target, meta=meta)
338 def write_resolution(port, resolution):
339 write_vuint(port, len(resolution))
340 for name, item in resolution:
341 write_bvec(port, name)
344 write_item(port, item)
348 def read_resolution(port):
352 name = read_bvec(port)
353 have_item = ord(port.read(1))
354 assert have_item in (0, 1)
355 item = read_item(port) if have_item else None
356 result.append((name, item))
360 _root = Root(meta=default_dir_mode)
361 _tags = Tags(meta=default_dir_mode)
366 ### A general purpose shared cache with (currently) cheap random
367 ### eviction. At the moment there is no weighting so a single commit
368 ### item is just as likely to be evicted as an entire "rev-list". See
369 ### is_valid_cache_key for a description of the expected content.
373 _cache_max_items = 30000
376 global _cache, _cache_keys
380 def is_valid_cache_key(x):
381 """Return logically true if x looks like it could be a valid cache key
382 (with respect to structure). Current valid cache entries:
383 res:... -> resolution
385 rvl:OID -> {'.', commit, '2012...', next_commit, ...}
387 # Suspect we may eventually add "(container_oid, name) -> ...", and others.
391 if tag in ('itm:', 'rvl:') and len(x) == 24:
398 if not is_valid_cache_key(key):
399 raise Exception('invalid cache key: ' + repr(key))
400 return _cache.get(key)
402 def cache_notice(key, value):
403 global _cache, _cache_keys, _cache_max_items
404 if not is_valid_cache_key(key):
405 raise Exception('invalid cache key: ' + repr(key))
408 if len(_cache) < _cache_max_items:
409 _cache_keys.append(key)
412 victim_i = randrange(0, len(_cache_keys))
413 victim = _cache_keys[victim_i]
415 _cache_keys[victim_i] = key
418 def cache_get_commit_item(oid, need_meta=True):
419 """Return the requested tree item if it can be found in the cache.
420 When need_meta is true don't return a cached item that only has a
422 # tree might be stored independently, or as '.' with its entries.
423 commit_key = b'itm:' + oid
424 item = cache_get(commit_key)
428 if isinstance(item.meta, Metadata):
430 entries = cache_get(b'rvl:' + oid)
434 def cache_get_revlist_item(oid, need_meta=True):
435 commit = cache_get_commit_item(oid, need_meta=need_meta)
437 return RevList(oid=oid, meta=commit.meta)
440 """Return a completely independent copy of item, such that
441 modifications will not affect the original.
444 meta = getattr(item, 'meta', None)
445 if isinstance(meta, Metadata):
446 return(item._replace(meta=meta.copy()))
450 """Return the integer mode (stat st_mode) for item."""
452 if isinstance(m, Metadata):
456 def _read_dir_meta(bupm):
457 # This is because save writes unmodified Metadata() entries for
458 # fake parents -- test-save-strip-graft.sh demonstrates.
459 m = Metadata.read(bupm)
461 return default_dir_mode
462 assert m.mode is not None
467 def tree_data_and_bupm(repo, oid):
468 """Return (tree_bytes, bupm_oid) where bupm_oid will be None if the
469 tree has no metadata (i.e. older bup save, or non-bup tree).
472 assert len(oid) == 20
473 it = repo.cat(oid.encode('hex'))
474 _, item_t, size = next(it)
476 if item_t == 'commit':
477 commit = parse_commit(data)
478 it = repo.cat(commit.tree)
479 _, item_t, size = next(it)
481 assert item_t == 'tree'
482 elif item_t != 'tree':
483 raise Exception('%r is not a tree or commit' % oid.encode('hex'))
484 for _, mangled_name, sub_oid in tree_decode(data):
485 if mangled_name == '.bupm':
487 if mangled_name > '.bupm':
491 def _find_treeish_oid_metadata(repo, oid):
492 """Return the metadata for the tree or commit oid, or None if the tree
493 has no metadata (i.e. older bup save, or non-bup tree).
496 tree_data, bupm_oid = tree_data_and_bupm(repo, oid)
498 with _FileReader(repo, bupm_oid) as meta_stream:
499 return _read_dir_meta(meta_stream)
502 def _readlink(repo, oid):
503 return ''.join(repo.join(oid.encode('hex')))
505 def readlink(repo, item):
506 """Return the link target of item, which must be a symlink. Reads the
507 target from the repository if necessary."""
509 assert S_ISLNK(item_mode(item))
510 if isinstance(item, FakeLink):
512 if isinstance(item.meta, Metadata):
513 target = item.meta.symlink_target
516 return _readlink(repo, item.oid)
518 def _compute_item_size(repo, item):
519 mode = item_mode(item)
521 size = _normal_or_chunked_file_size(repo, item.oid)
524 if isinstance(item, FakeLink):
525 return len(item.target)
526 return len(_readlink(repo, item.oid))
529 def item_size(repo, item):
530 """Return the size of item, computing it if necessary."""
532 if isinstance(m, Metadata) and m.size is not None:
534 return _compute_item_size(repo, item)
536 def tree_data_reader(repo, oid):
537 """Return an open reader for all of the data contained within oid. If
538 oid refers to a tree, recursively concatenate all of its contents."""
539 return _FileReader(repo, oid)
541 def fopen(repo, item):
542 """Return an open reader for the given file item."""
543 assert S_ISREG(item_mode(item))
544 return tree_data_reader(repo, item.oid)
546 def _commit_item_from_data(oid, data):
547 info = parse_commit(data)
548 return Commit(meta=default_dir_mode,
549 oid=info.tree.decode('hex'),
552 def _commit_item_from_oid(repo, oid, require_meta):
553 commit = cache_get_commit_item(oid, need_meta=require_meta)
554 if commit and ((not require_meta) or isinstance(commit.meta, Metadata)):
556 it = repo.cat(oid.encode('hex'))
557 _, typ, size = next(it)
558 assert typ == 'commit'
559 commit = _commit_item_from_data(oid, ''.join(it))
561 meta = _find_treeish_oid_metadata(repo, commit.oid)
563 commit = commit._replace(meta=meta)
564 commit_key = b'itm:' + oid
565 cache_notice(commit_key, commit)
568 def _revlist_item_from_oid(repo, oid, require_meta):
569 commit = _commit_item_from_oid(repo, oid, require_meta)
570 return RevList(oid=oid, meta=commit.meta)
572 def root_items(repo, names=None, want_meta=True):
573 """Yield (name, item) for the items in '/' in the VFS. Return
574 everything if names is logically false, otherwise return only
575 items with a name in the collection.
578 # FIXME: what about non-leaf refs like 'refs/heads/foo/bar/baz?
584 # FIXME: maybe eventually support repo.clone() or something
585 # and pass in two repos, so we can drop the tuple() and stream
586 # in parallel (i.e. meta vs refs).
587 for name, oid in tuple(repo.refs([], limit_to_heads=True)):
588 assert(name.startswith('refs/heads/'))
589 yield name[11:], _revlist_item_from_oid(repo, oid, want_meta)
597 if ref in ('.', '.tag'):
599 it = repo.cat('refs/heads/' + ref)
600 oidx, typ, size = next(it)
604 assert typ == 'commit'
605 commit = parse_commit(''.join(it))
606 yield ref, _revlist_item_from_oid(repo, oidx.decode('hex'), want_meta)
608 def ordered_tree_entries(tree_data, bupm=None):
609 """Yields (name, mangled_name, kind, gitmode, oid) for each item in
610 tree, sorted by name.
613 # Sadly, the .bupm entries currently aren't in git tree order,
614 # i.e. they don't account for the fact that git sorts trees
615 # (including our chunked trees) as if their names ended with "/",
616 # so "fo" sorts after "fo." iff fo is a directory. This makes
617 # streaming impossible when we need the metadata.
618 def result_from_tree_entry(tree_entry):
619 gitmode, mangled_name, oid = tree_entry
620 name, kind = git.demangle_name(mangled_name, gitmode)
621 return name, mangled_name, kind, gitmode, oid
623 tree_ents = (result_from_tree_entry(x) for x in tree_decode(tree_data))
625 tree_ents = sorted(tree_ents, key=lambda x: x[0])
626 for ent in tree_ents:
629 def tree_items(oid, tree_data, names=frozenset(), bupm=None):
631 def tree_item(ent_oid, kind, gitmode):
632 if kind == BUP_CHUNKED:
633 meta = Metadata.read(bupm) if bupm else default_file_mode
634 return Chunky(oid=ent_oid, meta=meta)
637 # No metadata here (accessable via '.' inside ent_oid).
638 return Item(meta=default_dir_mode, oid=ent_oid)
640 return Item(oid=ent_oid,
641 meta=(Metadata.read(bupm) if bupm \
642 else _default_mode_for_gitmode(gitmode)))
644 assert len(oid) == 20
646 dot_meta = _read_dir_meta(bupm) if bupm else default_dir_mode
647 yield '.', Item(oid=oid, meta=dot_meta)
648 tree_entries = ordered_tree_entries(tree_data, bupm)
649 for name, mangled_name, kind, gitmode, ent_oid in tree_entries:
650 if mangled_name == '.bupm':
653 yield name, tree_item(ent_oid, kind, gitmode)
656 # Assumes the tree is properly formed, i.e. there are no
657 # duplicates, and entries will be in git tree order.
658 if type(names) not in (frozenset, set):
659 names = frozenset(names)
660 remaining = len(names)
662 # Account for the bupm sort order issue (cf. ordered_tree_entries above)
663 last_name = max(names) if bupm else max(names) + '/'
666 dot_meta = _read_dir_meta(bupm) if bupm else default_dir_mode
667 yield '.', Item(oid=oid, meta=dot_meta)
672 tree_entries = ordered_tree_entries(tree_data, bupm)
673 for name, mangled_name, kind, gitmode, ent_oid in tree_entries:
674 if mangled_name == '.bupm':
677 if name not in names:
679 break # given bupm sort order, we're finished
680 if (kind == BUP_CHUNKED or not S_ISDIR(gitmode)) and bupm:
683 yield name, tree_item(ent_oid, kind, gitmode)
688 def tree_items_with_meta(repo, oid, tree_data, names):
689 # For now, the .bupm order doesn't quite match git's, and we don't
690 # load the tree data incrementally anyway, so we just work in RAM
692 assert len(oid) == 20
694 for _, mangled_name, sub_oid in tree_decode(tree_data):
695 if mangled_name == '.bupm':
696 bupm = _FileReader(repo, sub_oid)
698 if mangled_name > '.bupm':
700 for item in tree_items(oid, tree_data, names, bupm):
703 _save_name_rx = re.compile(r'^\d\d\d\d-\d\d-\d\d-\d{6}(-\d+)?$')
705 def _reverse_suffix_duplicates(strs):
706 """Yields the elements of strs, with any runs of duplicate values
707 suffixed with -N suffixes, where the zero padded integer N
708 decreases to 0 by 1 (e.g. 10, 09, ..., 00).
711 for name, duplicates in groupby(strs):
712 ndup = len(tuple(duplicates))
716 ndig = len(str(ndup - 1))
717 fmt = '%s-' + '%0' + str(ndig) + 'd'
718 for i in range(ndup - 1, -1, -1):
719 yield fmt % (name, i)
722 items = f.readline().split(None)
723 assert len(items) == 2
724 tree, auth_sec = items
725 return tree.decode('hex'), int(auth_sec)
727 def _name_for_rev(rev):
728 commit_oidx, (tree_oid, utc) = rev
729 return strftime('%Y-%m-%d-%H%M%S', localtime(utc))
731 def _item_for_rev(rev):
732 commit_oidx, (tree_oid, utc) = rev
733 coid = commit_oidx.decode('hex')
734 item = cache_get_commit_item(coid, need_meta=False)
737 item = Commit(meta=default_dir_mode, oid=tree_oid, coid=coid)
738 commit_key = b'itm:' + coid
739 cache_notice(commit_key, item)
742 def cache_commit(repo, oid):
743 """Build, cache, and return a "name -> commit_item" dict of the entire
747 # For now, always cache with full metadata
749 entries['.'] = _revlist_item_from_oid(repo, oid, True)
750 revs = repo.rev_list((oid.encode('hex'),), format='%T %at',
752 rev_items, rev_names = tee(revs)
753 revs = None # Don't disturb the tees
754 rev_names = _reverse_suffix_duplicates(_name_for_rev(x) for x in rev_names)
755 rev_items = (_item_for_rev(x) for x in rev_items)
757 for item in rev_items:
758 name = next(rev_names)
759 tip = tip or (name, item)
761 entries['latest'] = FakeLink(meta=default_symlink_mode, target=tip[0])
762 revlist_key = b'rvl:' + tip[1].coid
763 cache_notice(revlist_key, entries)
766 def revlist_items(repo, oid, names):
767 assert len(oid) == 20
769 # Special case '.' instead of caching the whole history since it's
770 # the only way to get the metadata for the commit.
771 if names and all(x == '.' for x in names):
772 yield '.', _revlist_item_from_oid(repo, oid, True)
775 # For now, don't worry about the possibility of the contents being
776 # "too big" for the cache.
777 revlist_key = b'rvl:' + oid
778 entries = cache_get(revlist_key)
780 entries = cache_commit(repo, oid)
783 for name in sorted(entries.keys()):
784 yield name, entries[name]
787 names = frozenset(name for name in names
788 if _save_name_rx.match(name) or name in ('.', 'latest'))
791 yield '.', entries['.']
792 for name in (n for n in names if n != '.'):
793 commit = entries.get(name)
797 def tags_items(repo, names):
801 assert len(oid) == 20
802 oidx = oid.encode('hex')
804 _, typ, size = next(it)
806 return cache_get_commit_item(oid, need_meta=False) \
807 or _commit_item_from_data(oid, ''.join(it))
810 return Item(meta=default_file_mode, oid=oid)
812 return Item(meta=default_dir_mode, oid=oid)
813 raise Exception('unexpected tag type ' + typ + ' for tag ' + name)
817 # We have to pull these all into ram because tag_item calls cat()
818 for name, oid in tuple(repo.refs(names, limit_to_tags=True)):
819 assert(name.startswith('refs/tags/'))
821 yield name, tag_item(oid)
824 # Assumes no duplicate refs
825 if type(names) not in (frozenset, set):
826 names = frozenset(names)
827 remaining = len(names)
828 last_name = max(names)
835 for name, oid in repo.refs(names, limit_to_tags=True):
836 assert(name.startswith('refs/tags/'))
840 if name not in names:
842 yield name, tag_item(oid)
847 def contents(repo, item, names=None, want_meta=True):
848 """Yields information about the items contained in item. Yields
849 (name, item) for each name in names, if the name exists, in an
850 unspecified order. If there are no names, then yields (name,
851 item) for all items, including, a first item named '.'
852 representing the container itself.
854 The meta value for any directories other than '.' will be a
855 default directory mode, not a Metadata object. This is because
856 the actual metadata for a directory is stored inside the directory
857 (see fill_in_metadata_if_dir() or ensure_item_has_metadata()).
859 Note that want_meta is advisory. For any given item, item.meta
860 might be a Metadata instance or a mode, and if the former,
861 meta.size might be None. Missing sizes can be computed via via
862 item_size() or augment_item_meta(..., include_size=True).
864 Do not modify any item.meta Metadata instances directly. If
865 needed, make a copy via item.meta.copy() and modify that instead.
868 # Q: are we comfortable promising '.' first when no names?
871 assert S_ISDIR(item_mode(item))
873 if item_t in real_tree_types:
874 it = repo.cat(item.oid.encode('hex'))
875 _, obj_t, size = next(it)
879 # Note: it shouldn't be possible to see an Item with type
880 # 'commit' since a 'commit' should always produce a Commit.
881 raise Exception('unexpected git ' + obj_t)
883 item_gen = tree_items_with_meta(repo, item.oid, data, names)
885 item_gen = tree_items(item.oid, data, names)
886 elif item_t == RevList:
887 item_gen = revlist_items(repo, item.oid, names)
889 item_gen = root_items(repo, names, want_meta)
891 item_gen = tags_items(repo, names)
893 raise Exception('unexpected VFS item ' + str(item))
897 def _resolve_path(repo, path, parent=None, want_meta=True, follow=True):
898 cache_key = b'res:%d%d%d:%s\0%s' \
899 % (bool(want_meta), bool(follow), repo.id(),
900 ('/'.join(x[0] for x in parent) if parent else ''),
902 resolution = cache_get(cache_key)
906 def notice_resolution(r):
907 cache_notice(cache_key, r)
910 def raise_dir_required_but_not_dir(path, parent, past):
911 raise IOError(ENOTDIR,
912 "path %r%s resolves to non-directory %r"
914 ' (relative to %r)' % parent if parent else '',
923 assert type(x[0]) in (bytes, str)
924 assert type(x[1]) in item_types
925 assert parent[0][1] == _root
926 if not S_ISDIR(item_mode(parent[-1][1])):
927 raise IOError(ENOTDIR,
928 'path resolution parent %r is not a directory'
930 is_absolute, must_be_dir, future = _decompose_path(path)
933 if not future: # path was effectively '.' or '/'
935 return notice_resolution((('', _root),))
937 return notice_resolution(tuple(parent))
938 return notice_resolution((('', _root),))
942 past = list(parent) if parent else [('', _root)]
946 if must_be_dir and not S_ISDIR(item_mode(past[-1][1])):
947 raise_dir_required_but_not_dir(path, parent, past)
948 return notice_resolution(tuple(past))
949 segment = future.pop()
952 if len(past) > 1: # .. from / is /
953 assert S_ISDIR(item_mode(past[-1][1]))
956 parent_name, parent_item = past[-1]
957 wanted = (segment,) if not want_meta else ('.', segment)
958 items = tuple(contents(repo, parent_item, names=wanted,
959 want_meta=want_meta))
961 item = items[0][1] if items else None
962 else: # First item will be '.' and have the metadata
963 item = items[1][1] if len(items) == 2 else None
964 dot, dot_item = items[0]
966 past[-1] = parent_name, parent_item
968 past.append((segment, None),)
969 return notice_resolution(tuple(past))
970 mode = item_mode(item)
971 if not S_ISLNK(mode):
972 if not S_ISDIR(mode):
973 past.append((segment, item),)
975 raise IOError(ENOTDIR,
976 'path %r%s ends internally in non-directory here: %r'
978 ' (relative to %r)' % parent if parent else '',
982 raise_dir_required_but_not_dir(path, parent, past)
983 return notice_resolution(tuple(past))
985 if want_meta and type(item) in real_tree_types:
986 dir_meta = _find_treeish_oid_metadata(repo, item.oid)
988 item = item._replace(meta=dir_meta)
989 past.append((segment, item))
991 if not future and not follow:
992 past.append((segment, item),)
996 'too many symlinks encountered while resolving %r%s'
997 % (path, ' relative to %r' % parent if parent else ''),
998 terminus=tuple(past + [(segment, item)]))
999 target = readlink(repo, item)
1000 is_absolute, _, target_future = _decompose_path(target)
1002 if not target_future: # path was effectively '/'
1003 return notice_resolution((('', _root),))
1004 past = [('', _root)]
1005 future = target_future
1007 future.extend(target_future)
1010 def resolve(repo, path, parent=None, want_meta=True, follow=True):
1011 """Follow the path in the virtual filesystem and return a tuple
1012 representing the location, if any, denoted by the path. Each
1013 element in the result tuple will be (name, info), where info will
1014 be a VFS item that can be passed to functions like item_mode().
1016 If follow is false, and if the final path element is a symbolic
1017 link, don't follow it, just return it in the result.
1019 If a path segment that does not exist is encountered during
1020 resolution, the result will represent the location of the missing
1021 item, and that item in the result will be None.
1023 Any attempt to traverse a non-directory will raise a VFS ENOTDIR
1026 Any symlinks along the path, including at the end, will be
1027 resolved. A VFS IOError with the errno attribute set to ELOOP
1028 will be raised if too many symlinks are traversed while following
1029 the path. That exception is effectively like a normal
1030 ELOOP IOError exception, but will include a terminus element
1031 describing the location of the failure, which will be a tuple of
1032 (name, info) elements.
1034 The parent, if specified, must be a sequence of (name, item)
1035 tuples, and will provide the starting point for the resolution of
1036 the path. If no parent is specified, resolution will start at
1039 The result may include elements of parent directly, so they must
1040 not be modified later. If this is a concern, pass in "name,
1041 copy_item(item) for name, item in parent" instead.
1043 When want_meta is true, detailed metadata will be included in each
1044 result item if it's avaiable, otherwise item.meta will be an
1045 integer mode. The metadata size may or may not be provided, but
1046 can be computed by item_size() or augment_item_meta(...,
1047 include_size=True). Setting want_meta=False is rarely desirable
1048 since it can limit the VFS to just the metadata git itself can
1049 represent, and so, as an example, fifos and sockets will appear to
1050 be regular files (e.g. S_ISREG(item_mode(item)) will be true) .
1051 But the option is provided because it may be more efficient when
1052 only the path names or the more limited metadata is sufficient.
1054 Do not modify any item.meta Metadata instances directly. If
1055 needed, make a copy via item.meta.copy() and modify that instead.
1058 if repo.is_remote():
1059 # Redirect to the more efficient remote version
1060 return repo.resolve(path, parent=parent, want_meta=want_meta,
1062 result = _resolve_path(repo, path, parent=parent, want_meta=want_meta,
1064 _, leaf_item = result[-1]
1065 if leaf_item and follow:
1066 assert not S_ISLNK(item_mode(leaf_item))
1069 def try_resolve(repo, path, parent=None, want_meta=True):
1070 """If path does not refer to a symlink, does not exist, or refers to a
1071 valid symlink, behave exactly like resolve(..., follow=True). If
1072 path refers to an invalid symlink, behave like resolve(...,
1076 res = resolve(repo, path, parent=parent, want_meta=want_meta, follow=False)
1077 leaf_name, leaf_item = res[-1]
1080 if not S_ISLNK(item_mode(leaf_item)):
1082 follow = resolve(repo, leaf_name, parent=res[:-1], want_meta=want_meta)
1083 follow_name, follow_item = follow[-1]
1088 def augment_item_meta(repo, item, include_size=False):
1089 """Ensure item has a Metadata instance for item.meta. If item.meta is
1090 currently a mode, replace it with a compatible "fake" Metadata
1091 instance. If include_size is true, ensure item.meta.size is
1092 correct, computing it if needed. If item.meta is a Metadata
1093 instance, this call may modify it in place or replace it.
1096 # If we actually had parallelism, we'd need locking...
1099 if isinstance(m, Metadata):
1100 if include_size and m.size is None:
1101 m.size = _compute_item_size(repo, item)
1102 return item._replace(meta=m)
1107 meta.uid = meta.gid = meta.atime = meta.mtime = meta.ctime = 0
1109 if isinstance(item, FakeLink):
1110 target = item.target
1112 target = _readlink(repo, item.oid)
1113 meta.symlink_target = target
1114 meta.size = len(target)
1116 meta.size = _compute_item_size(repo, item)
1117 return item._replace(meta=meta)
1119 def fill_in_metadata_if_dir(repo, item):
1120 """If item is a directory and item.meta is not a Metadata instance,
1121 attempt to find the metadata for the directory. If found, return
1122 a new item augmented to include that metadata. Otherwise, return
1123 item. May be useful for the output of contents().
1126 if S_ISDIR(item_mode(item)) and not isinstance(item.meta, Metadata):
1127 items = tuple(contents(repo, item, ('.',), want_meta=True))
1128 assert len(items) == 1
1129 assert items[0][0] == '.'
1133 def ensure_item_has_metadata(repo, item, include_size=False):
1134 """If item is a directory, attempt to find and add its metadata. If
1135 the item still doesn't have a Metadata instance for item.meta,
1136 give it one via augment_item_meta(). May be useful for the output
1140 return augment_item_meta(repo,
1141 fill_in_metadata_if_dir(repo, item),
1142 include_size=include_size)