1 """Virtual File System interface to bup repository content.
3 This module provides a path-based interface to the content of a bup
6 The VFS is structured like this:
9 /SAVE-NAME/SAVE-DATE/...
12 Each path is represented by an item that has least an item.meta which
13 may be either a Metadata object, or an integer mode. Functions like
14 item_mode() and item_size() will return the mode and size in either
15 case. Any item.meta Metadata instances must not be modified directly.
16 Make a copy to modify via item.meta.copy() if needed, or call
19 The want_meta argument is advisory for calls that accept it, and it
20 may not be honored. Callers must be able to handle an item.meta value
21 that is either an instance of Metadata or an integer mode, perhaps
22 via item_mode() or augment_item_meta().
24 Setting want_meta=False is rarely desirable since it can limit the VFS
25 to only the metadata that git itself can represent, and so for
26 example, fifos and sockets will appear to be regular files
27 (e.g. S_ISREG(item_mode(item)) will be true). But the option is still
28 provided because it may be more efficient when just the path names or
29 the more limited metadata is sufficient.
31 Any given metadata object's size may be None, in which case the size
32 can be computed via item_size() or augment_item_meta(...,
35 When traversing a directory using functions like contents(), the meta
36 value for any directories other than '.' will be a default directory
37 mode, not a Metadata object. This is because the actual metadata for
38 a directory is stored inside the directory (see
39 fill_in_metadata_if_dir() or ensure_item_has_metadata()).
41 Commit items represent commits (e.g. /.tag/some-commit or
42 /foo/latest), and for most purposes, they appear as the underlying
43 tree. S_ISDIR(item_mode(item)) will return true for both tree Items
44 and Commits and the commit's oid is the tree hash; the commit hash is
49 from __future__ import absolute_import, print_function
50 from collections import namedtuple
51 from errno import EINVAL, ELOOP, ENOENT, ENOTDIR
52 from itertools import chain, dropwhile, groupby, tee
53 from random import randrange
54 from stat import S_IFDIR, S_IFLNK, S_IFREG, S_ISDIR, S_ISLNK, S_ISREG
55 from time import localtime, strftime
58 from bup import git, metadata, vint
59 from bup.compat import range
60 from bup.git import BUP_CHUNKED, cp, get_commit_items, parse_commit, tree_decode
61 from bup.helpers import debug2, last
62 from bup.metadata import Metadata
63 from bup.vint import read_bvec, write_bvec
64 from bup.vint import read_vint, write_vint
65 from bup.vint import read_vuint, write_vuint
67 if sys.version_info[0] < 3:
68 from exceptions import IOError as py_IOError
72 # We currently assume that it's always appropriate to just forward IOErrors
75 class IOError(py_IOError):
76 def __init__(self, errno, message, terminus=None):
77 py_IOError.__init__(self, errno, message)
78 self.terminus = terminus
80 def write_ioerror(port, ex):
81 assert isinstance(ex, IOError)
83 (1 if ex.errno is not None else 0)
84 | (2 if ex.message is not None else 0)
85 | (4 if ex.terminus is not None else 0))
86 if ex.errno is not None:
87 write_vint(port, ex.errno)
88 if ex.message is not None:
89 write_bvec(port, ex.message.encode('utf-8'))
90 if ex.terminus is not None:
91 write_resolution(port, ex.terminus)
93 def read_ioerror(port):
94 mask = read_vuint(port)
95 no = read_vint(port) if 1 & mask else None
96 msg = read_bvec(port).decode('utf-8') if 2 & mask else None
97 term = read_resolution(port) if 4 & mask else None
98 return IOError(errno=no, message=msg, terminus=term)
101 default_file_mode = S_IFREG | 0o644
102 default_dir_mode = S_IFDIR | 0o755
103 default_symlink_mode = S_IFLNK | 0o755
105 def _default_mode_for_gitmode(gitmode):
107 return default_file_mode
109 return default_dir_mode
111 return default_symlink_mode
112 raise Exception('unexpected git mode ' + oct(gitmode))
114 def _normal_or_chunked_file_size(repo, oid):
115 """Return the size of the normal or chunked file indicated by oid."""
116 # FIXME: --batch-format CatPipe?
117 it = repo.cat(oid.encode('hex'))
118 _, obj_t, size = next(it)
120 while obj_t == 'tree':
121 mode, name, last_oid = last(tree_decode(''.join(it)))
123 it = repo.cat(last_oid.encode('hex'))
124 _, obj_t, size = next(it)
125 return ofs + sum(len(b) for b in it)
127 def _skip_chunks_before_offset(tree, offset):
128 prev_ent = next(tree, None)
133 ent_ofs = int(ent[1], 16)
135 return chain([prev_ent, ent], tree)
136 if ent_ofs == offset:
137 return chain([ent], tree)
141 def _tree_chunks(repo, tree, startofs):
142 "Tree should be a sequence of (name, mode, hash) as per tree_decode()."
143 assert(startofs >= 0)
144 # name is the chunk's hex offset in the original file
145 for mode, name, oid in _skip_chunks_before_offset(tree, startofs):
147 skipmore = startofs - ofs
150 it = repo.cat(oid.encode('hex'))
151 _, obj_t, size = next(it)
154 assert obj_t == 'tree'
155 for b in _tree_chunks(repo, tree_decode(data), skipmore):
158 assert obj_t == 'blob'
159 yield data[skipmore:]
162 def __init__(self, repo, oid, startofs):
163 it = repo.cat(oid.encode('hex'))
164 _, obj_t, size = next(it)
165 isdir = obj_t == 'tree'
168 self.it = _tree_chunks(repo, tree_decode(data), startofs)
172 self.blob = data[startofs:]
175 def next(self, size):
177 while len(out) < size:
178 if self.it and not self.blob:
180 self.blob = self.it.next()
181 except StopIteration:
184 want = size - len(out)
185 out += self.blob[:want]
186 self.blob = self.blob[want:]
189 debug2('next(%d) returned %d\n' % (size, len(out)))
193 class _FileReader(object):
194 def __init__(self, repo, oid, known_size=None):
195 assert len(oid) == 20
200 self._size = known_size
202 def _compute_size(self):
204 self._size = _normal_or_chunked_file_size(self._repo, self.oid)
208 if ofs < 0 or ofs > self._compute_size():
209 raise IOError(EINVAL, 'Invalid seek offset: %d' % ofs)
215 def read(self, count=-1):
216 size = self._compute_size()
220 count = size - self.ofs
221 if not self.reader or self.reader.ofs != self.ofs:
222 self.reader = _ChunkReader(self._repo, self.oid, self.ofs)
224 buf = self.reader.next(count)
227 raise # our offsets will be all screwed up otherwise
236 def __exit__(self, type, value, traceback):
240 _multiple_slashes_rx = re.compile(r'//+')
242 def _decompose_path(path):
243 """Return a boolean indicating whether the path is absolute, and a
244 reversed list of path elements, omitting any occurrences of "."
245 and ignoring any leading or trailing slash. If the path is
246 effectively '/' or '.', return an empty list.
249 path = re.sub(_multiple_slashes_rx, '/', path)
251 return True, True, []
252 is_absolute = must_be_dir = False
253 if path.startswith('/'):
256 for suffix in ('/', '/.'):
257 if path.endswith(suffix):
259 path = path[:-len(suffix)]
260 parts = [x for x in path.split('/') if x != '.']
263 must_be_dir = True # e.g. path was effectively '.' or '/', etc.
264 return is_absolute, must_be_dir, parts
267 Item = namedtuple('Item', ('meta', 'oid'))
268 Chunky = namedtuple('Chunky', ('meta', 'oid'))
269 FakeLink = namedtuple('FakeLink', ('meta', 'target'))
270 Root = namedtuple('Root', ('meta'))
271 Tags = namedtuple('Tags', ('meta'))
272 RevList = namedtuple('RevList', ('meta', 'oid'))
273 Commit = namedtuple('Commit', ('meta', 'oid', 'coid'))
275 item_types = frozenset((Item, Chunky, Root, Tags, RevList, Commit))
276 real_tree_types = frozenset((Item, Commit))
278 def write_item(port, item):
280 name = bytes(kind.__name__)
282 has_meta = 1 if isinstance(meta, Metadata) else 0
283 if kind in (Item, Chunky, RevList):
284 assert len(item.oid) == 20
286 vint.send(port, 'sVs', name, has_meta, item.oid)
287 Metadata.write(meta, port, include_path=False)
289 vint.send(port, 'sVsV', name, has_meta, item.oid, item.meta)
290 elif kind in (Root, Tags):
292 vint.send(port, 'sV', name, has_meta)
293 Metadata.write(meta, port, include_path=False)
295 vint.send(port, 'sVV', name, has_meta, item.meta)
297 assert len(item.oid) == 20
298 assert len(item.coid) == 20
300 vint.send(port, 'sVss', name, has_meta, item.oid, item.coid)
301 Metadata.write(meta, port, include_path=False)
303 vint.send(port, 'sVssV', name, has_meta, item.oid, item.coid,
305 elif kind == FakeLink:
307 vint.send(port, 'sVs', name, has_meta, item.target)
308 Metadata.write(meta, port, include_path=False)
310 vint.send(port, 'sVsV', name, has_meta, item.target, item.meta)
315 def read_m(port, has_meta):
317 m = Metadata.read(port)
319 return read_vuint(port)
320 kind, has_meta = vint.recv(port, 'sV')
322 oid, meta = read_bvec(port), read_m(port, has_meta)
323 return Item(oid=oid, meta=meta)
324 if kind == b'Chunky':
325 oid, meta = read_bvec(port), read_m(port, has_meta)
326 return Chunky(oid=oid, meta=meta)
327 if kind == b'RevList':
328 oid, meta = read_bvec(port), read_m(port, has_meta)
329 return RevList(oid=oid, meta=meta)
331 return Root(meta=read_m(port, has_meta))
333 return Tags(meta=read_m(port, has_meta))
334 if kind == b'Commit':
335 oid, coid = vint.recv(port, 'ss')
336 meta = read_m(port, has_meta)
337 return Commit(oid=oid, coid=coid, meta=meta)
338 if kind == b'FakeLink':
339 target, meta = read_bvec(port), read_m(port, has_meta)
340 return FakeLink(target=target, meta=meta)
343 def write_resolution(port, resolution):
344 write_vuint(port, len(resolution))
345 for name, item in resolution:
346 write_bvec(port, name)
349 write_item(port, item)
353 def read_resolution(port):
357 name = read_bvec(port)
358 have_item = ord(port.read(1))
359 assert have_item in (0, 1)
360 item = read_item(port) if have_item else None
361 result.append((name, item))
365 _root = Root(meta=default_dir_mode)
366 _tags = Tags(meta=default_dir_mode)
371 ### A general purpose shared cache with (currently) cheap random
372 ### eviction. At the moment there is no weighting so a single commit
373 ### item is just as likely to be evicted as an entire "rev-list". See
374 ### is_valid_cache_key for a description of the expected content.
378 _cache_max_items = 30000
381 global _cache, _cache_keys
385 def is_valid_cache_key(x):
386 """Return logically true if x looks like it could be a valid cache key
387 (with respect to structure). Current valid cache entries:
388 res:... -> resolution
390 rvl:OID -> {'.', commit, '2012...', next_commit, ...}
392 # Suspect we may eventually add "(container_oid, name) -> ...", and others.
396 if tag in ('itm:', 'rvl:') and len(x) == 24:
403 if not is_valid_cache_key(key):
404 raise Exception('invalid cache key: ' + repr(key))
405 return _cache.get(key)
407 def cache_notice(key, value):
408 global _cache, _cache_keys, _cache_max_items
409 if not is_valid_cache_key(key):
410 raise Exception('invalid cache key: ' + repr(key))
413 if len(_cache) < _cache_max_items:
414 _cache_keys.append(key)
417 victim_i = randrange(0, len(_cache_keys))
418 victim = _cache_keys[victim_i]
420 _cache_keys[victim_i] = key
423 def cache_get_commit_item(oid, need_meta=True):
424 """Return the requested tree item if it can be found in the cache.
425 When need_meta is true don't return a cached item that only has a
427 # tree might be stored independently, or as '.' with its entries.
428 commit_key = b'itm:' + oid
429 item = cache_get(commit_key)
433 if isinstance(item.meta, Metadata):
435 entries = cache_get(b'rvl:' + oid)
439 def cache_get_revlist_item(oid, need_meta=True):
440 commit = cache_get_commit_item(oid, need_meta=need_meta)
442 return RevList(oid=oid, meta=commit.meta)
445 """Return a completely independent copy of item, such that
446 modifications will not affect the original.
449 meta = getattr(item, 'meta', None)
450 if isinstance(meta, Metadata):
451 return(item._replace(meta=meta.copy()))
455 """Return the integer mode (stat st_mode) for item."""
457 if isinstance(m, Metadata):
461 def _read_dir_meta(bupm):
462 # This is because save writes unmodified Metadata() entries for
463 # fake parents -- test-save-strip-graft.sh demonstrates.
464 m = Metadata.read(bupm)
466 return default_dir_mode
467 assert m.mode is not None
472 def tree_data_and_bupm(repo, oid):
473 """Return (tree_bytes, bupm_oid) where bupm_oid will be None if the
474 tree has no metadata (i.e. older bup save, or non-bup tree).
477 assert len(oid) == 20
478 it = repo.cat(oid.encode('hex'))
479 _, item_t, size = next(it)
481 if item_t == 'commit':
482 commit = parse_commit(data)
483 it = repo.cat(commit.tree)
484 _, item_t, size = next(it)
486 assert item_t == 'tree'
487 elif item_t != 'tree':
488 raise Exception('%r is not a tree or commit' % oid.encode('hex'))
489 for _, mangled_name, sub_oid in tree_decode(data):
490 if mangled_name == '.bupm':
492 if mangled_name > '.bupm':
496 def _find_treeish_oid_metadata(repo, oid):
497 """Return the metadata for the tree or commit oid, or None if the tree
498 has no metadata (i.e. older bup save, or non-bup tree).
501 tree_data, bupm_oid = tree_data_and_bupm(repo, oid)
503 with _FileReader(repo, bupm_oid) as meta_stream:
504 return _read_dir_meta(meta_stream)
507 def _readlink(repo, oid):
508 return ''.join(repo.join(oid.encode('hex')))
510 def readlink(repo, item):
511 """Return the link target of item, which must be a symlink. Reads the
512 target from the repository if necessary."""
514 assert S_ISLNK(item_mode(item))
515 if isinstance(item.meta, Metadata):
516 target = item.meta.symlink_target
519 elif isinstance(item, FakeLink):
521 return _readlink(repo, item.oid)
523 def _compute_item_size(repo, item):
524 mode = item_mode(item)
526 size = _normal_or_chunked_file_size(repo, item.oid)
529 return len(_readlink(repo, item.oid))
532 def item_size(repo, item):
533 """Return the size of item, computing it if necessary."""
535 if isinstance(m, Metadata) and m.size is not None:
537 return _compute_item_size(repo, item)
539 def tree_data_reader(repo, oid):
540 """Return an open reader for all of the data contained within oid. If
541 oid refers to a tree, recursively concatenate all of its contents."""
542 return _FileReader(repo, oid)
544 def fopen(repo, item):
545 """Return an open reader for the given file item."""
546 assert S_ISREG(item_mode(item))
547 return tree_data_reader(repo, item.oid)
549 def _commit_item_from_data(oid, data):
550 info = parse_commit(data)
551 return Commit(meta=default_dir_mode,
552 oid=info.tree.decode('hex'),
555 def _commit_item_from_oid(repo, oid, require_meta):
556 commit = cache_get_commit_item(oid, need_meta=require_meta)
557 if commit and ((not require_meta) or isinstance(commit.meta, Metadata)):
559 it = repo.cat(oid.encode('hex'))
560 _, typ, size = next(it)
561 assert typ == 'commit'
562 commit = _commit_item_from_data(oid, ''.join(it))
564 meta = _find_treeish_oid_metadata(repo, commit.oid)
566 commit = commit._replace(meta=meta)
567 commit_key = b'itm:' + oid
568 cache_notice(commit_key, commit)
571 def _revlist_item_from_oid(repo, oid, require_meta):
572 commit = _commit_item_from_oid(repo, oid, require_meta)
573 return RevList(oid=oid, meta=commit.meta)
575 def root_items(repo, names=None, want_meta=True):
576 """Yield (name, item) for the items in '/' in the VFS. Return
577 everything if names is logically false, otherwise return only
578 items with a name in the collection.
581 # FIXME: what about non-leaf refs like 'refs/heads/foo/bar/baz?
587 # FIXME: maybe eventually support repo.clone() or something
588 # and pass in two repos, so we can drop the tuple() and stream
589 # in parallel (i.e. meta vs refs).
590 for name, oid in tuple(repo.refs([], limit_to_heads=True)):
591 assert(name.startswith('refs/heads/'))
592 yield name[11:], _revlist_item_from_oid(repo, oid, want_meta)
600 if ref in ('.', '.tag'):
602 it = repo.cat('refs/heads/' + ref)
603 oidx, typ, size = next(it)
607 assert typ == 'commit'
608 commit = parse_commit(''.join(it))
609 yield ref, _revlist_item_from_oid(repo, oidx.decode('hex'), want_meta)
611 def ordered_tree_entries(tree_data, bupm=None):
612 """Yields (name, mangled_name, kind, gitmode, oid) for each item in
613 tree, sorted by name.
616 # Sadly, the .bupm entries currently aren't in git tree order,
617 # i.e. they don't account for the fact that git sorts trees
618 # (including our chunked trees) as if their names ended with "/",
619 # so "fo" sorts after "fo." iff fo is a directory. This makes
620 # streaming impossible when we need the metadata.
621 def result_from_tree_entry(tree_entry):
622 gitmode, mangled_name, oid = tree_entry
623 name, kind = git.demangle_name(mangled_name, gitmode)
624 return name, mangled_name, kind, gitmode, oid
626 tree_ents = (result_from_tree_entry(x) for x in tree_decode(tree_data))
628 tree_ents = sorted(tree_ents, key=lambda x: x[0])
629 for ent in tree_ents:
632 def tree_items(oid, tree_data, names=frozenset(), bupm=None):
634 def tree_item(ent_oid, kind, gitmode):
635 if kind == BUP_CHUNKED:
636 meta = Metadata.read(bupm) if bupm else default_file_mode
637 return Chunky(oid=ent_oid, meta=meta)
640 # No metadata here (accessable via '.' inside ent_oid).
641 return Item(meta=default_dir_mode, oid=ent_oid)
643 return Item(oid=ent_oid,
644 meta=(Metadata.read(bupm) if bupm \
645 else _default_mode_for_gitmode(gitmode)))
647 assert len(oid) == 20
649 dot_meta = _read_dir_meta(bupm) if bupm else default_dir_mode
650 yield '.', Item(oid=oid, meta=dot_meta)
651 tree_entries = ordered_tree_entries(tree_data, bupm)
652 for name, mangled_name, kind, gitmode, ent_oid in tree_entries:
653 if mangled_name == '.bupm':
656 yield name, tree_item(ent_oid, kind, gitmode)
659 # Assumes the tree is properly formed, i.e. there are no
660 # duplicates, and entries will be in git tree order.
661 if type(names) not in (frozenset, set):
662 names = frozenset(names)
663 remaining = len(names)
665 # Account for the bupm sort order issue (cf. ordered_tree_entries above)
666 last_name = max(names) if bupm else max(names) + '/'
669 dot_meta = _read_dir_meta(bupm) if bupm else default_dir_mode
670 yield '.', Item(oid=oid, meta=dot_meta)
675 tree_entries = ordered_tree_entries(tree_data, bupm)
676 for name, mangled_name, kind, gitmode, ent_oid in tree_entries:
677 if mangled_name == '.bupm':
680 if name not in names:
682 break # given bupm sort order, we're finished
683 if (kind == BUP_CHUNKED or not S_ISDIR(gitmode)) and bupm:
686 yield name, tree_item(ent_oid, kind, gitmode)
691 def tree_items_with_meta(repo, oid, tree_data, names):
692 # For now, the .bupm order doesn't quite match git's, and we don't
693 # load the tree data incrementally anyway, so we just work in RAM
695 assert len(oid) == 20
697 for _, mangled_name, sub_oid in tree_decode(tree_data):
698 if mangled_name == '.bupm':
699 bupm = _FileReader(repo, sub_oid)
701 if mangled_name > '.bupm':
703 for item in tree_items(oid, tree_data, names, bupm):
706 _save_name_rx = re.compile(r'^\d\d\d\d-\d\d-\d\d-\d{6}(-\d+)?$')
708 def _reverse_suffix_duplicates(strs):
709 """Yields the elements of strs, with any runs of duplicate values
710 suffixed with -N suffixes, where the zero padded integer N
711 decreases to 0 by 1 (e.g. 10, 09, ..., 00).
714 for name, duplicates in groupby(strs):
715 ndup = len(tuple(duplicates))
719 ndig = len(str(ndup - 1))
720 fmt = '%s-' + '%0' + str(ndig) + 'd'
721 for i in range(ndup - 1, -1, -1):
722 yield fmt % (name, i)
725 items = f.readline().split(None)
726 assert len(items) == 2
727 tree, auth_sec = items
728 return tree.decode('hex'), int(auth_sec)
730 def _name_for_rev(rev):
731 commit_oidx, (tree_oid, utc) = rev
732 return strftime('%Y-%m-%d-%H%M%S', localtime(utc))
734 def _item_for_rev(rev):
735 commit_oidx, (tree_oid, utc) = rev
736 coid = commit_oidx.decode('hex')
737 item = cache_get_commit_item(coid, need_meta=False)
740 item = Commit(meta=default_dir_mode, oid=tree_oid, coid=coid)
741 commit_key = b'itm:' + coid
742 cache_notice(commit_key, item)
745 def cache_commit(repo, oid):
746 """Build, cache, and return a "name -> commit_item" dict of the entire
750 # For now, always cache with full metadata
752 entries['.'] = _revlist_item_from_oid(repo, oid, True)
753 revs = repo.rev_list((oid.encode('hex'),), format='%T %at',
755 rev_items, rev_names = tee(revs)
756 revs = None # Don't disturb the tees
757 rev_names = _reverse_suffix_duplicates(_name_for_rev(x) for x in rev_names)
758 rev_items = (_item_for_rev(x) for x in rev_items)
760 for item in rev_items:
761 name = next(rev_names)
762 tip = tip or (name, item)
764 entries['latest'] = FakeLink(meta=default_symlink_mode, target=tip[0])
765 revlist_key = b'rvl:' + tip[1].coid
766 cache_notice(revlist_key, entries)
769 def revlist_items(repo, oid, names):
770 assert len(oid) == 20
772 # Special case '.' instead of caching the whole history since it's
773 # the only way to get the metadata for the commit.
774 if names and all(x == '.' for x in names):
775 yield '.', _revlist_item_from_oid(repo, oid, True)
778 # For now, don't worry about the possibility of the contents being
779 # "too big" for the cache.
780 revlist_key = b'rvl:' + oid
781 entries = cache_get(revlist_key)
783 entries = cache_commit(repo, oid)
786 for name in sorted(entries.keys()):
787 yield name, entries[name]
790 names = frozenset(name for name in names
791 if _save_name_rx.match(name) or name in ('.', 'latest'))
794 yield '.', entries['.']
795 for name in (n for n in names if n != '.'):
796 commit = entries.get(name)
800 def tags_items(repo, names):
804 assert len(oid) == 20
805 oidx = oid.encode('hex')
807 _, typ, size = next(it)
809 return cache_get_commit_item(oid, need_meta=False) \
810 or _commit_item_from_data(oid, ''.join(it))
813 return Item(meta=default_file_mode, oid=oid)
815 return Item(meta=default_dir_mode, oid=oid)
816 raise Exception('unexpected tag type ' + typ + ' for tag ' + name)
820 # We have to pull these all into ram because tag_item calls cat()
821 for name, oid in tuple(repo.refs(names, limit_to_tags=True)):
822 assert(name.startswith('refs/tags/'))
824 yield name, tag_item(oid)
827 # Assumes no duplicate refs
828 if type(names) not in (frozenset, set):
829 names = frozenset(names)
830 remaining = len(names)
831 last_name = max(names)
838 for name, oid in repo.refs(names, limit_to_tags=True):
839 assert(name.startswith('refs/tags/'))
843 if name not in names:
845 yield name, tag_item(oid)
850 def contents(repo, item, names=None, want_meta=True):
851 """Yields information about the items contained in item. Yields
852 (name, item) for each name in names, if the name exists, in an
853 unspecified order. If there are no names, then yields (name,
854 item) for all items, including, a first item named '.'
855 representing the container itself.
857 The meta value for any directories other than '.' will be a
858 default directory mode, not a Metadata object. This is because
859 the actual metadata for a directory is stored inside the directory
860 (see fill_in_metadata_if_dir() or ensure_item_has_metadata()).
862 Note that want_meta is advisory. For any given item, item.meta
863 might be a Metadata instance or a mode, and if the former,
864 meta.size might be None. Missing sizes can be computed via via
865 item_size() or augment_item_meta(..., include_size=True).
867 Do not modify any item.meta Metadata instances directly. If
868 needed, make a copy via item.meta.copy() and modify that instead.
871 # Q: are we comfortable promising '.' first when no names?
874 assert S_ISDIR(item_mode(item))
876 if item_t in real_tree_types:
877 it = repo.cat(item.oid.encode('hex'))
878 _, obj_t, size = next(it)
882 # Note: it shouldn't be possible to see an Item with type
883 # 'commit' since a 'commit' should always produce a Commit.
884 raise Exception('unexpected git ' + obj_t)
886 item_gen = tree_items_with_meta(repo, item.oid, data, names)
888 item_gen = tree_items(item.oid, data, names)
889 elif item_t == RevList:
890 item_gen = revlist_items(repo, item.oid, names)
892 item_gen = root_items(repo, names, want_meta)
894 item_gen = tags_items(repo, names)
896 raise Exception('unexpected VFS item ' + str(item))
900 def _resolve_path(repo, path, parent=None, want_meta=True, follow=True):
901 cache_key = b'res:%d%d%d:%s\0%s' \
902 % (bool(want_meta), bool(follow), repo.id(),
903 ('/'.join(x[0] for x in parent) if parent else ''),
905 resolution = cache_get(cache_key)
909 def notice_resolution(r):
910 cache_notice(cache_key, r)
913 def raise_dir_required_but_not_dir(path, parent, past):
914 raise IOError(ENOTDIR,
915 "path %r%s resolves to non-directory %r"
917 ' (relative to %r)' % parent if parent else '',
926 assert type(x[0]) in (bytes, str)
927 assert type(x[1]) in item_types
928 assert parent[0][1] == _root
929 if not S_ISDIR(item_mode(parent[-1][1])):
930 raise IOError(ENOTDIR,
931 'path resolution parent %r is not a directory'
933 is_absolute, must_be_dir, future = _decompose_path(path)
936 if not future: # path was effectively '.' or '/'
938 return notice_resolution((('', _root),))
940 return notice_resolution(tuple(parent))
941 return notice_resolution((('', _root),))
945 past = list(parent) if parent else [('', _root)]
949 if must_be_dir and not S_ISDIR(item_mode(past[-1][1])):
950 raise_dir_required_but_not_dir(path, parent, past)
951 return notice_resolution(tuple(past))
952 segment = future.pop()
955 if len(past) > 1: # .. from / is /
956 assert S_ISDIR(item_mode(past[-1][1]))
959 parent_name, parent_item = past[-1]
960 wanted = (segment,) if not want_meta else ('.', segment)
961 items = tuple(contents(repo, parent_item, names=wanted,
962 want_meta=want_meta))
964 item = items[0][1] if items else None
965 else: # First item will be '.' and have the metadata
966 item = items[1][1] if len(items) == 2 else None
967 dot, dot_item = items[0]
969 past[-1] = parent_name, parent_item
971 past.append((segment, None),)
972 return notice_resolution(tuple(past))
973 mode = item_mode(item)
974 if not S_ISLNK(mode):
975 if not S_ISDIR(mode):
976 past.append((segment, item),)
978 raise IOError(ENOTDIR,
979 'path %r%s ends internally in non-directory here: %r'
981 ' (relative to %r)' % parent if parent else '',
985 raise_dir_required_but_not_dir(path, parent, past)
986 return notice_resolution(tuple(past))
988 if want_meta and type(item) in real_tree_types:
989 dir_meta = _find_treeish_oid_metadata(repo, item.oid)
991 item = item._replace(meta=dir_meta)
992 past.append((segment, item))
994 if not future and not follow:
995 past.append((segment, item),)
999 'too many symlinks encountered while resolving %r%s'
1000 % (path, ' relative to %r' % parent if parent else ''),
1001 terminus=tuple(past + [(segment, item)]))
1002 target = readlink(repo, item)
1003 is_absolute, _, target_future = _decompose_path(target)
1005 if not target_future: # path was effectively '/'
1006 return notice_resolution((('', _root),))
1007 past = [('', _root)]
1008 future = target_future
1010 future.extend(target_future)
1013 def resolve(repo, path, parent=None, want_meta=True, follow=True):
1014 """Follow the path in the virtual filesystem and return a tuple
1015 representing the location, if any, denoted by the path. Each
1016 element in the result tuple will be (name, info), where info will
1017 be a VFS item that can be passed to functions like item_mode().
1019 If follow is false, and if the final path element is a symbolic
1020 link, don't follow it, just return it in the result.
1022 If a path segment that does not exist is encountered during
1023 resolution, the result will represent the location of the missing
1024 item, and that item in the result will be None.
1026 Any attempt to traverse a non-directory will raise a VFS ENOTDIR
1029 Any symlinks along the path, including at the end, will be
1030 resolved. A VFS IOError with the errno attribute set to ELOOP
1031 will be raised if too many symlinks are traversed while following
1032 the path. That exception is effectively like a normal
1033 ELOOP IOError exception, but will include a terminus element
1034 describing the location of the failure, which will be a tuple of
1035 (name, info) elements.
1037 The parent, if specified, must be a sequence of (name, item)
1038 tuples, and will provide the starting point for the resolution of
1039 the path. If no parent is specified, resolution will start at
1042 The result may include elements of parent directly, so they must
1043 not be modified later. If this is a concern, pass in "name,
1044 copy_item(item) for name, item in parent" instead.
1046 When want_meta is true, detailed metadata will be included in each
1047 result item if it's avaiable, otherwise item.meta will be an
1048 integer mode. The metadata size may or may not be provided, but
1049 can be computed by item_size() or augment_item_meta(...,
1050 include_size=True). Setting want_meta=False is rarely desirable
1051 since it can limit the VFS to just the metadata git itself can
1052 represent, and so, as an example, fifos and sockets will appear to
1053 be regular files (e.g. S_ISREG(item_mode(item)) will be true) .
1054 But the option is provided because it may be more efficient when
1055 only the path names or the more limited metadata is sufficient.
1057 Do not modify any item.meta Metadata instances directly. If
1058 needed, make a copy via item.meta.copy() and modify that instead.
1061 if repo.is_remote():
1062 # Redirect to the more efficient remote version
1063 return repo.resolve(path, parent=parent, want_meta=want_meta,
1065 result = _resolve_path(repo, path, parent=parent, want_meta=want_meta,
1067 _, leaf_item = result[-1]
1068 if leaf_item and follow:
1069 assert not S_ISLNK(item_mode(leaf_item))
1072 def try_resolve(repo, path, parent=None, want_meta=True):
1073 """If path does not refer to a symlink, does not exist, or refers to a
1074 valid symlink, behave exactly like resolve(..., follow=True). If
1075 path refers to an invalid symlink, behave like resolve(...,
1079 res = resolve(repo, path, parent=parent, want_meta=want_meta, follow=False)
1080 leaf_name, leaf_item = res[-1]
1083 if not S_ISLNK(item_mode(leaf_item)):
1085 follow = resolve(repo, leaf_name, parent=res[:-1], want_meta=want_meta)
1086 follow_name, follow_item = follow[-1]
1091 def augment_item_meta(repo, item, include_size=False):
1092 """Ensure item has a Metadata instance for item.meta. If item.meta is
1093 currently a mode, replace it with a compatible "fake" Metadata
1094 instance. If include_size is true, ensure item.meta.size is
1095 correct, computing it if needed. If item.meta is a Metadata
1096 instance, this call may modify it in place or replace it.
1099 # If we actually had parallelism, we'd need locking...
1102 if isinstance(m, Metadata):
1103 if include_size and m.size is None:
1104 m.size = _compute_item_size(repo, item)
1105 return item._replace(meta=m)
1110 meta.uid = meta.gid = meta.atime = meta.mtime = meta.ctime = 0
1112 if isinstance(item, FakeLink):
1113 target = item.target
1115 target = _readlink(repo, item.oid)
1116 meta.symlink_target = target
1117 meta.size = len(target)
1119 meta.size = _compute_item_size(repo, item)
1120 return item._replace(meta=meta)
1122 def fill_in_metadata_if_dir(repo, item):
1123 """If item is a directory and item.meta is not a Metadata instance,
1124 attempt to find the metadata for the directory. If found, return
1125 a new item augmented to include that metadata. Otherwise, return
1126 item. May be useful for the output of contents().
1129 if S_ISDIR(item_mode(item)) and not isinstance(item.meta, Metadata):
1130 items = tuple(contents(repo, item, ('.',), want_meta=True))
1131 assert len(items) == 1
1132 assert items[0][0] == '.'
1136 def ensure_item_has_metadata(repo, item, include_size=False):
1137 """If item is a directory, attempt to find and add its metadata. If
1138 the item still doesn't have a Metadata instance for item.meta,
1139 give it one via augment_item_meta(). May be useful for the output
1143 return augment_item_meta(repo,
1144 fill_in_metadata_if_dir(repo, item),
1145 include_size=include_size)