1 """Virtual File System interface to bup repository content.
3 This module provides a path-based interface to the content of a bup
6 The VFS is structured like this:
9 /SAVE-NAME/SAVE-DATE/...
12 Each path is represented by an item that has least an item.meta which
13 may be either a Metadata object, or an integer mode. Functions like
14 item_mode() and item_size() will return the mode and size in either
15 case. Any item.meta Metadata instances must not be modified directly.
16 Make a copy to modify via item.meta.copy() if needed, or call
19 The want_meta argument is advisory for calls that accept it, and it
20 may not be honored. Callers must be able to handle an item.meta value
21 that is either an instance of Metadata or an integer mode, perhaps
22 via item_mode() or augment_item_meta().
24 Setting want_meta=False is rarely desirable since it can limit the VFS
25 to only the metadata that git itself can represent, and so for
26 example, fifos and sockets will appear to be regular files
27 (e.g. S_ISREG(item_mode(item)) will be true). But the option is still
28 provided because it may be more efficient when just the path names or
29 the more limited metadata is sufficient.
31 Any given metadata object's size may be None, in which case the size
32 can be computed via item_size() or augment_item_meta(...,
35 When traversing a directory using functions like contents(), the meta
36 value for any directories other than '.' will be a default directory
37 mode, not a Metadata object. This is because the actual metadata for
38 a directory is stored inside the directory (see
39 fill_in_metadata_if_dir() or ensure_item_has_metadata()).
41 Commit items represent commits (e.g. /.tag/some-commit or
42 /foo/latest), and for most purposes, they appear as the underlying
43 tree. S_ISDIR(item_mode(item)) will return true for both tree Items
44 and Commits and the commit's oid is the tree hash; the commit hash is
49 from __future__ import absolute_import, print_function
50 from binascii import hexlify, unhexlify
51 from collections import namedtuple
52 from errno import EINVAL, ELOOP, ENOTDIR
53 from itertools import chain, groupby, tee
54 from random import randrange
55 from stat import S_IFDIR, S_IFLNK, S_IFREG, S_ISDIR, S_ISLNK, S_ISREG
56 from time import localtime, strftime
59 from bup import git, vint
60 from bup.compat import hexstr, pending_raise
61 from bup.git import BUP_CHUNKED, parse_commit, tree_decode
62 from bup.helpers import debug2, last, nullcontext_if_not
63 from bup.io import path_msg
64 from bup.metadata import Metadata
65 from bup.vint import read_bvec, write_bvec
66 from bup.vint import read_vint, write_vint
67 from bup.vint import read_vuint, write_vuint
69 if sys.version_info[0] < 3:
70 from exceptions import IOError as py_IOError
74 # We currently assume that it's always appropriate to just forward IOErrors
77 class IOError(py_IOError):
78 def __init__(self, errno, message, terminus=None):
79 py_IOError.__init__(self, errno, message)
80 self.terminus = terminus
82 def write_ioerror(port, ex):
83 assert isinstance(ex, IOError)
85 (1 if ex.errno is not None else 0)
86 | (2 if ex.strerror is not None else 0)
87 | (4 if ex.terminus is not None else 0))
88 if ex.errno is not None:
89 write_vint(port, ex.errno)
90 if ex.strerror is not None:
91 write_bvec(port, ex.strerror.encode('utf-8'))
92 if ex.terminus is not None:
93 write_resolution(port, ex.terminus)
95 def read_ioerror(port):
96 mask = read_vuint(port)
97 no = read_vint(port) if 1 & mask else None
98 msg = read_bvec(port).decode('utf-8') if 2 & mask else None
99 term = read_resolution(port) if 4 & mask else None
100 return IOError(errno=no, message=msg, terminus=term)
103 default_file_mode = S_IFREG | 0o644
104 default_dir_mode = S_IFDIR | 0o755
105 default_symlink_mode = S_IFLNK | 0o755
107 def _default_mode_for_gitmode(gitmode):
109 return default_file_mode
111 return default_dir_mode
113 return default_symlink_mode
114 raise Exception('unexpected git mode ' + oct(gitmode))
116 def _normal_or_chunked_file_size(repo, oid):
117 """Return the size of the normal or chunked file indicated by oid."""
118 # FIXME: --batch-format CatPipe?
119 it = repo.cat(hexlify(oid))
120 _, obj_t, size = next(it)
122 while obj_t == b'tree':
123 mode, name, last_oid = last(tree_decode(b''.join(it)))
125 it = repo.cat(hexlify(last_oid))
126 _, obj_t, size = next(it)
127 return ofs + sum(len(b) for b in it)
129 def _skip_chunks_before_offset(tree, offset):
130 prev_ent = next(tree, None)
135 ent_ofs = int(ent[1], 16)
137 return chain([prev_ent, ent], tree)
138 if ent_ofs == offset:
139 return chain([ent], tree)
143 def _tree_chunks(repo, tree, startofs):
144 "Tree should be a sequence of (name, mode, hash) as per tree_decode()."
145 assert(startofs >= 0)
146 # name is the chunk's hex offset in the original file
147 for mode, name, oid in _skip_chunks_before_offset(tree, startofs):
149 skipmore = startofs - ofs
152 it = repo.cat(hexlify(oid))
153 _, obj_t, size = next(it)
156 assert obj_t == b'tree'
157 for b in _tree_chunks(repo, tree_decode(data), skipmore):
160 assert obj_t == b'blob'
161 yield data[skipmore:]
164 def __init__(self, repo, oid, startofs):
165 it = repo.cat(hexlify(oid))
166 _, obj_t, size = next(it)
167 isdir = obj_t == b'tree'
170 self.it = _tree_chunks(repo, tree_decode(data), startofs)
174 self.blob = data[startofs:]
177 def next(self, size):
179 while len(out) < size:
180 if self.it and not self.blob:
182 self.blob = next(self.it)
183 except StopIteration:
186 want = size - len(out)
187 out += self.blob[:want]
188 self.blob = self.blob[want:]
191 debug2('next(%d) returned %d\n' % (size, len(out)))
195 class _FileReader(object):
196 def __init__(self, repo, oid, known_size=None):
197 assert len(oid) == 20
203 self._size = known_size
205 def _compute_size(self):
207 self._size = _normal_or_chunked_file_size(self._repo, self.oid)
211 if ofs < 0 or ofs > self._compute_size():
212 raise IOError(EINVAL, 'Invalid seek offset: %d' % ofs)
218 def read(self, count=-1):
219 size = self._compute_size()
223 count = size - self.ofs
224 if not self.reader or self.reader.ofs != self.ofs:
225 self.reader = _ChunkReader(self._repo, self.oid, self.ofs)
227 buf = self.reader.next(count)
230 raise # our offsets will be all screwed up otherwise
243 def __exit__(self, type, value, traceback):
244 with pending_raise(value, rethrow=False):
247 _multiple_slashes_rx = re.compile(br'//+')
249 def _decompose_path(path):
250 """Return a boolean indicating whether the path is absolute, and a
251 reversed list of path elements, omitting any occurrences of "."
252 and ignoring any leading or trailing slash. If the path is
253 effectively '/' or '.', return an empty list.
256 path = re.sub(_multiple_slashes_rx, b'/', path)
258 return True, True, []
259 is_absolute = must_be_dir = False
260 if path.startswith(b'/'):
263 for suffix in (b'/', b'/.'):
264 if path.endswith(suffix):
266 path = path[:-len(suffix)]
267 parts = [x for x in path.split(b'/') if x != b'.']
270 must_be_dir = True # e.g. path was effectively '.' or '/', etc.
271 return is_absolute, must_be_dir, parts
274 Item = namedtuple('Item', ('meta', 'oid'))
275 Chunky = namedtuple('Chunky', ('meta', 'oid'))
276 FakeLink = namedtuple('FakeLink', ('meta', 'target'))
277 Root = namedtuple('Root', ('meta'))
278 Tags = namedtuple('Tags', ('meta'))
279 RevList = namedtuple('RevList', ('meta', 'oid'))
280 Commit = namedtuple('Commit', ('meta', 'oid', 'coid'))
282 item_types = (Item, Chunky, Root, Tags, RevList, Commit)
283 real_tree_types = (Item, Commit)
285 def write_item(port, item):
287 name = bytes(kind.__name__.encode('ascii'))
289 has_meta = 1 if isinstance(meta, Metadata) else 0
290 if kind in (Item, Chunky, RevList):
291 assert len(item.oid) == 20
293 vint.send(port, 'sVs', name, has_meta, item.oid)
294 Metadata.write(meta, port, include_path=False)
296 vint.send(port, 'sVsV', name, has_meta, item.oid, item.meta)
297 elif kind in (Root, Tags):
299 vint.send(port, 'sV', name, has_meta)
300 Metadata.write(meta, port, include_path=False)
302 vint.send(port, 'sVV', name, has_meta, item.meta)
304 assert len(item.oid) == 20
305 assert len(item.coid) == 20
307 vint.send(port, 'sVss', name, has_meta, item.oid, item.coid)
308 Metadata.write(meta, port, include_path=False)
310 vint.send(port, 'sVssV', name, has_meta, item.oid, item.coid,
312 elif kind == FakeLink:
314 vint.send(port, 'sVs', name, has_meta, item.target)
315 Metadata.write(meta, port, include_path=False)
317 vint.send(port, 'sVsV', name, has_meta, item.target, item.meta)
322 def read_m(port, has_meta):
324 m = Metadata.read(port)
326 return read_vuint(port)
327 kind, has_meta = vint.recv(port, 'sV')
329 oid, meta = read_bvec(port), read_m(port, has_meta)
330 return Item(oid=oid, meta=meta)
331 if kind == b'Chunky':
332 oid, meta = read_bvec(port), read_m(port, has_meta)
333 return Chunky(oid=oid, meta=meta)
334 if kind == b'RevList':
335 oid, meta = read_bvec(port), read_m(port, has_meta)
336 return RevList(oid=oid, meta=meta)
338 return Root(meta=read_m(port, has_meta))
340 return Tags(meta=read_m(port, has_meta))
341 if kind == b'Commit':
342 oid, coid = vint.recv(port, 'ss')
343 meta = read_m(port, has_meta)
344 return Commit(oid=oid, coid=coid, meta=meta)
345 if kind == b'FakeLink':
346 target, meta = read_bvec(port), read_m(port, has_meta)
347 return FakeLink(target=target, meta=meta)
350 def write_resolution(port, resolution):
351 write_vuint(port, len(resolution))
352 for name, item in resolution:
353 write_bvec(port, name)
356 write_item(port, item)
360 def read_resolution(port):
364 name = read_bvec(port)
365 have_item = ord(port.read(1))
366 assert have_item in (0, 1)
367 item = read_item(port) if have_item else None
368 result.append((name, item))
372 _root = Root(meta=default_dir_mode)
373 _tags = Tags(meta=default_dir_mode)
378 ### A general purpose shared cache with (currently) cheap random
379 ### eviction. At the moment there is no weighting so a single commit
380 ### item is just as likely to be evicted as an entire "rev-list". See
381 ### is_valid_cache_key for a description of the expected content.
385 _cache_max_items = 30000
388 global _cache, _cache_keys
392 def is_valid_cache_key(x):
393 """Return logically true if x looks like it could be a valid cache key
394 (with respect to structure). Current valid cache entries:
395 res:... -> resolution
397 rvl:OID -> {'.', commit, '2012...', next_commit, ...}
399 # Suspect we may eventually add "(container_oid, name) -> ...", and others.
400 if isinstance(x, bytes):
402 if tag in (b'itm:', b'rvl:') and len(x) == 24:
410 if not is_valid_cache_key(key):
411 raise Exception('invalid cache key: ' + repr(key))
412 return _cache.get(key)
414 def cache_notice(key, value, overwrite=False):
415 global _cache, _cache_keys, _cache_max_items
416 if not is_valid_cache_key(key):
417 raise Exception('invalid cache key: ' + repr(key))
422 if len(_cache) < _cache_max_items:
423 _cache_keys.append(key)
426 victim_i = randrange(0, len(_cache_keys))
427 victim = _cache_keys[victim_i]
429 _cache_keys[victim_i] = key
432 def _has_metadata_if_needed(item, need_meta):
435 if isinstance(item.meta, Metadata):
439 def cache_get_commit_item(oid, need_meta=True):
440 """Return the requested tree item if it can be found in the cache.
441 When need_meta is true don't return a cached item that only has a
443 # tree might be stored independently, or as '.' with its entries.
444 commit_key = b'itm:' + oid
445 item = cache_get(commit_key)
447 if _has_metadata_if_needed(item, need_meta):
449 entries = cache_get(b'rvl:' + oid)
452 if _has_metadata_if_needed(item, need_meta):
457 """Return a completely independent copy of item, such that
458 modifications will not affect the original.
461 meta = getattr(item, 'meta', None)
462 if isinstance(meta, Metadata):
463 return(item._replace(meta=meta.copy()))
467 """Return the integer mode (stat st_mode) for item."""
469 if isinstance(m, Metadata):
473 def _read_dir_meta(bupm):
474 # This is because save writes unmodified Metadata() entries for
475 # fake parents -- test-save-strip-graft.sh demonstrates.
476 m = Metadata.read(bupm)
478 return default_dir_mode
479 assert m.mode is not None
484 def tree_data_and_bupm(repo, oid):
485 """Return (tree_bytes, bupm_oid) where bupm_oid will be None if the
486 tree has no metadata (i.e. older bup save, or non-bup tree).
489 assert len(oid) == 20
490 it = repo.cat(hexlify(oid))
491 _, item_t, size = next(it)
493 if item_t == b'commit':
494 commit = parse_commit(data)
495 it = repo.cat(commit.tree)
496 _, item_t, size = next(it)
498 assert item_t == b'tree'
499 elif item_t != b'tree':
500 raise Exception('%s is not a tree or commit' % hexstr(oid))
501 for _, mangled_name, sub_oid in tree_decode(data):
502 if mangled_name == b'.bupm':
504 if mangled_name > b'.bupm':
508 def _find_treeish_oid_metadata(repo, oid):
509 """Return the metadata for the tree or commit oid, or None if the tree
510 has no metadata (i.e. older bup save, or non-bup tree).
513 tree_data, bupm_oid = tree_data_and_bupm(repo, oid)
515 with _FileReader(repo, bupm_oid) as meta_stream:
516 return _read_dir_meta(meta_stream)
519 def _readlink(repo, oid):
520 return b''.join(repo.join(hexlify(oid)))
522 def readlink(repo, item):
523 """Return the link target of item, which must be a symlink. Reads the
524 target from the repository if necessary."""
526 assert S_ISLNK(item_mode(item))
527 if isinstance(item, FakeLink):
529 if isinstance(item.meta, Metadata):
530 target = item.meta.symlink_target
533 return _readlink(repo, item.oid)
535 def _compute_item_size(repo, item):
536 mode = item_mode(item)
538 size = _normal_or_chunked_file_size(repo, item.oid)
541 if isinstance(item, FakeLink):
542 return len(item.target)
543 return len(_readlink(repo, item.oid))
546 def item_size(repo, item):
547 """Return the size of item, computing it if necessary."""
549 if isinstance(m, Metadata) and m.size is not None:
551 return _compute_item_size(repo, item)
553 def tree_data_reader(repo, oid):
554 """Return an open reader for all of the data contained within oid. If
555 oid refers to a tree, recursively concatenate all of its contents."""
556 return _FileReader(repo, oid)
558 def fopen(repo, item):
559 """Return an open reader for the given file item."""
560 assert S_ISREG(item_mode(item))
561 return tree_data_reader(repo, item.oid)
563 def _commit_item_from_data(oid, data):
564 info = parse_commit(data)
565 return Commit(meta=default_dir_mode,
566 oid=unhexlify(info.tree),
569 def _commit_item_from_oid(repo, oid, require_meta):
570 commit = cache_get_commit_item(oid, need_meta=require_meta)
571 if commit and ((not require_meta) or isinstance(commit.meta, Metadata)):
573 it = repo.cat(hexlify(oid))
574 _, typ, size = next(it)
575 assert typ == b'commit'
576 commit = _commit_item_from_data(oid, b''.join(it))
578 meta = _find_treeish_oid_metadata(repo, commit.oid)
580 commit = commit._replace(meta=meta)
581 commit_key = b'itm:' + oid
582 cache_notice(commit_key, commit, overwrite=True)
585 def _revlist_item_from_oid(repo, oid, require_meta):
586 commit = _commit_item_from_oid(repo, oid, require_meta)
587 return RevList(oid=oid, meta=commit.meta)
589 def root_items(repo, names=None, want_meta=True):
590 """Yield (name, item) for the items in '/' in the VFS. Return
591 everything if names is logically false, otherwise return only
592 items with a name in the collection.
595 # FIXME: what about non-leaf refs like 'refs/heads/foo/bar/baz?
601 # FIXME: maybe eventually support repo.clone() or something
602 # and pass in two repos, so we can drop the tuple() and stream
603 # in parallel (i.e. meta vs refs).
604 for name, oid in tuple(repo.refs([], limit_to_heads=True)):
605 assert(name.startswith(b'refs/heads/'))
606 yield name[11:], _revlist_item_from_oid(repo, oid, want_meta)
614 if ref in (b'.', b'.tag'):
616 it = repo.cat(b'refs/heads/' + ref)
617 oidx, typ, size = next(it)
621 assert typ == b'commit'
622 commit = parse_commit(b''.join(it))
623 yield ref, _revlist_item_from_oid(repo, unhexlify(oidx), want_meta)
625 def ordered_tree_entries(tree_data, bupm=None):
626 """Yields (name, mangled_name, kind, gitmode, oid) for each item in
627 tree, sorted by name.
630 # Sadly, the .bupm entries currently aren't in git tree order,
631 # but in unmangled name order. They _do_ account for the fact
632 # that git sorts trees (including chunked trees) as if their
633 # names ended with "/" (so "fo" sorts after "fo." iff fo is a
634 # directory), but we apply this on the unmangled names in save
635 # rather than on the mangled names.
636 # This makes streaming impossible when we need the metadata.
637 def result_from_tree_entry(tree_entry):
638 gitmode, mangled_name, oid = tree_entry
639 name, kind = git.demangle_name(mangled_name, gitmode)
640 return name, mangled_name, kind, gitmode, oid
642 tree_ents = (result_from_tree_entry(x) for x in tree_decode(tree_data))
644 tree_ents = sorted(tree_ents, key=lambda x: x[0])
645 for ent in tree_ents:
648 def tree_items(oid, tree_data, names=frozenset(), bupm=None):
650 def tree_item(ent_oid, kind, gitmode):
651 if kind == BUP_CHUNKED:
652 meta = Metadata.read(bupm) if bupm else default_file_mode
653 return Chunky(oid=ent_oid, meta=meta)
656 # No metadata here (accessable via '.' inside ent_oid).
657 return Item(meta=default_dir_mode, oid=ent_oid)
659 meta = Metadata.read(bupm) if bupm else None
660 # handle the case of metadata being empty/missing in bupm
661 # (or there not being bupm at all)
663 meta = _default_mode_for_gitmode(gitmode)
664 return Item(oid=ent_oid, meta=meta)
666 assert len(oid) == 20
668 dot_meta = _read_dir_meta(bupm) if bupm else default_dir_mode
669 yield b'.', Item(oid=oid, meta=dot_meta)
670 tree_entries = ordered_tree_entries(tree_data, bupm)
671 for name, mangled_name, kind, gitmode, ent_oid in tree_entries:
672 if mangled_name == b'.bupm':
675 yield name, tree_item(ent_oid, kind, gitmode)
678 # Assumes the tree is properly formed, i.e. there are no
679 # duplicates, and entries will be in git tree order.
680 if isinstance(names, (frozenset, set)):
681 names = frozenset(names)
682 remaining = len(names)
684 # Account for the bupm sort order issue (cf. ordered_tree_entries above)
685 last_name = max(names) if bupm else max(names) + b'/'
688 dot_meta = _read_dir_meta(bupm) if bupm else default_dir_mode
689 yield b'.', Item(oid=oid, meta=dot_meta)
694 tree_entries = ordered_tree_entries(tree_data, bupm)
695 for name, mangled_name, kind, gitmode, ent_oid in tree_entries:
696 if mangled_name == b'.bupm':
699 if name not in names:
701 break # given bupm sort order, we're finished
702 if (kind == BUP_CHUNKED or not S_ISDIR(gitmode)) and bupm:
705 yield name, tree_item(ent_oid, kind, gitmode)
710 def tree_items_with_meta(repo, oid, tree_data, names):
711 # For now, the .bupm order doesn't quite match git's, and we don't
712 # load the tree data incrementally anyway, so we just work in RAM
714 assert len(oid) == 20
716 for _, mangled_name, sub_oid in tree_decode(tree_data):
717 if mangled_name == b'.bupm':
718 bupm = _FileReader(repo, sub_oid)
720 if mangled_name > b'.bupm':
722 with nullcontext_if_not(bupm):
723 for item in tree_items(oid, tree_data, names, bupm):
726 _save_name_rx = re.compile(br'^\d\d\d\d-\d\d-\d\d-\d{6}(-\d+)?$')
728 def _reverse_suffix_duplicates(strs):
729 """Yields the elements of strs, with any runs of duplicate values
730 suffixed with -N suffixes, where the zero padded integer N
731 decreases to 0 by 1 (e.g. 10, 09, ..., 00).
734 for name, duplicates in groupby(strs):
735 ndup = len(tuple(duplicates))
739 ndig = len(str(ndup - 1))
740 fmt = b'%s-' + b'%0' + (b'%d' % ndig) + b'd'
741 for i in range(ndup - 1, -1, -1):
742 yield fmt % (name, i)
745 items = f.readline().split(None)
746 assert len(items) == 2
747 tree, auth_sec = items
748 return unhexlify(tree), int(auth_sec)
750 def _name_for_rev(rev):
751 commit_oidx, (tree_oid, utc) = rev
752 return strftime('%Y-%m-%d-%H%M%S', localtime(utc)).encode('ascii')
754 def _item_for_rev(rev):
755 commit_oidx, (tree_oid, utc) = rev
756 coid = unhexlify(commit_oidx)
757 item = cache_get_commit_item(coid, need_meta=False)
760 item = Commit(meta=default_dir_mode, oid=tree_oid, coid=coid)
761 commit_key = b'itm:' + coid
762 cache_notice(commit_key, item)
765 # non-string singleton
766 _HAS_META_ENTRY = object()
768 def cache_commit(repo, oid, require_meta=True):
769 """Build, cache, and return a "name -> commit_item" dict of the entire
774 entries[b'.'] = _revlist_item_from_oid(repo, oid, require_meta)
775 revs = repo.rev_list((hexlify(oid),), format=b'%T %at',
777 rev_items, rev_names = tee(revs)
778 revs = None # Don't disturb the tees
779 rev_names = _reverse_suffix_duplicates(_name_for_rev(x) for x in rev_names)
780 rev_items = (_item_for_rev(x) for x in rev_items)
782 for item in rev_items:
783 name = next(rev_names)
784 tip = tip or (name, item)
786 entries[b'latest'] = FakeLink(meta=default_symlink_mode, target=tip[0])
787 revlist_key = b'rvl:' + tip[1].coid
788 entries[_HAS_META_ENTRY] = require_meta
789 cache_notice(revlist_key, entries, overwrite=True)
792 def revlist_items(repo, oid, names, require_meta=True):
793 assert len(oid) == 20
795 # Special case '.' instead of caching the whole history since it's
796 # the only way to get the metadata for the commit.
797 if names and all(x == b'.' for x in names):
798 yield b'.', _revlist_item_from_oid(repo, oid, require_meta)
801 # For now, don't worry about the possibility of the contents being
802 # "too big" for the cache.
803 revlist_key = b'rvl:' + oid
804 entries = cache_get(revlist_key)
805 if entries and require_meta and not entries[_HAS_META_ENTRY]:
808 entries = cache_commit(repo, oid, require_meta)
811 for name in sorted((n for n in entries.keys() if n != _HAS_META_ENTRY)):
812 yield name, entries[name]
815 names = frozenset(name for name in names
816 if _save_name_rx.match(name) or name in (b'.', b'latest'))
819 yield b'.', entries[b'.']
820 for name in (n for n in names if n != b'.'):
821 if name == _HAS_META_ENTRY:
823 commit = entries.get(name)
827 def tags_items(repo, names):
831 assert len(oid) == 20
832 cached = cache_get_commit_item(oid, need_meta=False)
837 _, typ, size = next(it)
839 return _commit_item_from_data(oid, b''.join(it))
842 return Item(meta=default_file_mode, oid=oid)
844 return Item(meta=default_dir_mode, oid=oid)
845 raise Exception('unexpected tag type ' + typ.decode('ascii')
846 + ' for tag ' + path_msg(name))
850 # We have to pull these all into ram because tag_item calls cat()
851 for name, oid in tuple(repo.refs(names, limit_to_tags=True)):
852 assert(name.startswith(b'refs/tags/'))
854 yield name, tag_item(oid)
857 # Assumes no duplicate refs
858 if isinstance(names, (frozenset, set)):
859 names = frozenset(names)
860 remaining = len(names)
861 last_name = max(names)
868 for name, oid in repo.refs(names, limit_to_tags=True):
869 assert(name.startswith(b'refs/tags/'))
873 if name not in names:
875 yield name, tag_item(oid)
880 def contents(repo, item, names=None, want_meta=True):
881 """Yields information about the items contained in item. Yields
882 (name, item) for each name in names, if the name exists, in an
883 unspecified order. If there are no names, then yields (name,
884 item) for all items, including, a first item named '.'
885 representing the container itself.
887 The meta value for any directories other than '.' will be a
888 default directory mode, not a Metadata object. This is because
889 the actual metadata for a directory is stored inside the directory
890 (see fill_in_metadata_if_dir() or ensure_item_has_metadata()).
892 Note that want_meta is advisory. For any given item, item.meta
893 might be a Metadata instance or a mode, and if the former,
894 meta.size might be None. Missing sizes can be computed via via
895 item_size() or augment_item_meta(..., include_size=True).
897 Do not modify any item.meta Metadata instances directly. If
898 needed, make a copy via item.meta.copy() and modify that instead.
901 # Q: are we comfortable promising '.' first when no names?
904 assert S_ISDIR(item_mode(item))
905 if isinstance(item, real_tree_types):
906 it = repo.cat(hexlify(item.oid))
907 _, obj_t, size = next(it)
911 # Note: it shouldn't be possible to see an Item with type
912 # 'commit' since a 'commit' should always produce a Commit.
913 raise Exception('unexpected git ' + obj_t.decode('ascii'))
915 item_gen = tree_items_with_meta(repo, item.oid, data, names)
917 item_gen = tree_items(item.oid, data, names)
918 elif isinstance(item, RevList):
919 item_gen = revlist_items(repo, item.oid, names,
920 require_meta=want_meta)
921 elif isinstance(item, Root):
922 item_gen = root_items(repo, names, want_meta)
923 elif isinstance(item, Tags):
924 item_gen = tags_items(repo, names)
926 raise Exception('unexpected VFS item ' + str(item))
930 def _resolve_path(repo, path, parent=None, want_meta=True, follow=True):
931 cache_key = b'res:%d%d%d:%s\0%s' \
932 % (bool(want_meta), bool(follow), repo.id(),
933 (b'/'.join(x[0] for x in parent) if parent else b''),
935 resolution = cache_get(cache_key)
939 def notice_resolution(r):
940 cache_notice(cache_key, r)
943 def raise_dir_required_but_not_dir(path, parent, past):
944 raise IOError(ENOTDIR,
945 "path %s%s resolves to non-directory %r"
947 ' (relative to %r)' % parent if parent else '',
956 assert isinstance(x[0], (bytes, str))
957 assert isinstance(x[1], item_types)
958 assert parent[0][1] == _root
959 if not S_ISDIR(item_mode(parent[-1][1])):
960 raise IOError(ENOTDIR,
961 'path resolution parent %r is not a directory'
963 is_absolute, must_be_dir, future = _decompose_path(path)
966 if not future: # path was effectively '.' or '/'
968 return notice_resolution(((b'', _root),))
970 return notice_resolution(tuple(parent))
971 return notice_resolution(((b'', _root),))
973 past = [(b'', _root)]
975 past = list(parent) if parent else [(b'', _root)]
979 if must_be_dir and not S_ISDIR(item_mode(past[-1][1])):
980 raise_dir_required_but_not_dir(path, parent, past)
981 return notice_resolution(tuple(past))
982 segment = future.pop()
985 if len(past) > 1: # .. from / is /
986 assert S_ISDIR(item_mode(past[-1][1]))
989 parent_name, parent_item = past[-1]
990 wanted = (segment,) if not want_meta else (b'.', segment)
991 items = tuple(contents(repo, parent_item, names=wanted,
992 want_meta=want_meta))
994 item = items[0][1] if items else None
995 else: # First item will be '.' and have the metadata
996 item = items[1][1] if len(items) == 2 else None
997 dot, dot_item = items[0]
999 past[-1] = parent_name, parent_item
1001 past.append((segment, None),)
1002 return notice_resolution(tuple(past))
1003 mode = item_mode(item)
1004 if not S_ISLNK(mode):
1005 if not S_ISDIR(mode):
1006 past.append((segment, item),)
1008 raise IOError(ENOTDIR,
1009 'path %r%s ends internally in non-directory here: %r'
1011 ' (relative to %r)' % parent if parent else '',
1015 raise_dir_required_but_not_dir(path, parent, past)
1016 return notice_resolution(tuple(past))
1018 if want_meta and isinstance(item, real_tree_types):
1019 dir_meta = _find_treeish_oid_metadata(repo, item.oid)
1021 item = item._replace(meta=dir_meta)
1022 past.append((segment, item))
1024 if not future and not follow:
1025 past.append((segment, item),)
1028 raise IOError(ELOOP,
1029 'too many symlinks encountered while resolving %r%s'
1030 % (path, ' relative to %r' % parent if parent else ''),
1031 terminus=tuple(past + [(segment, item)]))
1032 target = readlink(repo, item)
1033 is_absolute, _, target_future = _decompose_path(target)
1035 if not target_future: # path was effectively '/'
1036 return notice_resolution(((b'', _root),))
1037 past = [(b'', _root)]
1038 future = target_future
1040 future.extend(target_future)
1043 def resolve(repo, path, parent=None, want_meta=True, follow=True):
1044 """Follow the path in the virtual filesystem and return a tuple
1045 representing the location, if any, denoted by the path. Each
1046 element in the result tuple will be (name, info), where info will
1047 be a VFS item that can be passed to functions like item_mode().
1049 If follow is false, and if the final path element is a symbolic
1050 link, don't follow it, just return it in the result.
1052 If a path segment that does not exist is encountered during
1053 resolution, the result will represent the location of the missing
1054 item, and that item in the result will be None.
1056 Any attempt to traverse a non-directory will raise a VFS ENOTDIR
1059 Any symlinks along the path, including at the end, will be
1060 resolved. A VFS IOError with the errno attribute set to ELOOP
1061 will be raised if too many symlinks are traversed while following
1062 the path. That exception is effectively like a normal
1063 ELOOP IOError exception, but will include a terminus element
1064 describing the location of the failure, which will be a tuple of
1065 (name, info) elements.
1067 The parent, if specified, must be a sequence of (name, item)
1068 tuples, and will provide the starting point for the resolution of
1069 the path. If no parent is specified, resolution will start at
1072 The result may include elements of parent directly, so they must
1073 not be modified later. If this is a concern, pass in "name,
1074 copy_item(item) for name, item in parent" instead.
1076 When want_meta is true, detailed metadata will be included in each
1077 result item if it's avaiable, otherwise item.meta will be an
1078 integer mode. The metadata size may or may not be provided, but
1079 can be computed by item_size() or augment_item_meta(...,
1080 include_size=True). Setting want_meta=False is rarely desirable
1081 since it can limit the VFS to just the metadata git itself can
1082 represent, and so, as an example, fifos and sockets will appear to
1083 be regular files (e.g. S_ISREG(item_mode(item)) will be true) .
1084 But the option is provided because it may be more efficient when
1085 only the path names or the more limited metadata is sufficient.
1087 Do not modify any item.meta Metadata instances directly. If
1088 needed, make a copy via item.meta.copy() and modify that instead.
1091 if repo.is_remote():
1092 # Redirect to the more efficient remote version
1093 return repo.resolve(path, parent=parent, want_meta=want_meta,
1095 result = _resolve_path(repo, path, parent=parent, want_meta=want_meta,
1097 _, leaf_item = result[-1]
1098 if leaf_item and follow:
1099 assert not S_ISLNK(item_mode(leaf_item))
1102 def try_resolve(repo, path, parent=None, want_meta=True):
1103 """If path does not refer to a symlink, does not exist, or refers to a
1104 valid symlink, behave exactly like resolve(..., follow=True). If
1105 path refers to an invalid symlink, behave like resolve(...,
1109 res = resolve(repo, path, parent=parent, want_meta=want_meta, follow=False)
1110 leaf_name, leaf_item = res[-1]
1113 if not S_ISLNK(item_mode(leaf_item)):
1115 follow = resolve(repo, leaf_name, parent=res[:-1], want_meta=want_meta)
1116 follow_name, follow_item = follow[-1]
1121 def augment_item_meta(repo, item, include_size=False):
1122 """Ensure item has a Metadata instance for item.meta. If item.meta is
1123 currently a mode, replace it with a compatible "fake" Metadata
1124 instance. If include_size is true, ensure item.meta.size is
1125 correct, computing it if needed. If item.meta is a Metadata
1126 instance, this call may modify it in place or replace it.
1129 # If we actually had parallelism, we'd need locking...
1132 if isinstance(m, Metadata):
1133 if include_size and m.size is None:
1134 m.size = _compute_item_size(repo, item)
1135 return item._replace(meta=m)
1140 meta.uid = meta.gid = None
1141 meta.atime = meta.mtime = meta.ctime = 0
1143 if isinstance(item, FakeLink):
1144 target = item.target
1146 target = _readlink(repo, item.oid)
1147 meta.symlink_target = target
1148 meta.size = len(target)
1150 meta.size = _compute_item_size(repo, item)
1151 return item._replace(meta=meta)
1153 def fill_in_metadata_if_dir(repo, item):
1154 """If item is a directory and item.meta is not a Metadata instance,
1155 attempt to find the metadata for the directory. If found, return
1156 a new item augmented to include that metadata. Otherwise, return
1157 item. May be useful for the output of contents().
1160 if S_ISDIR(item_mode(item)) and not isinstance(item.meta, Metadata):
1161 items = tuple(contents(repo, item, (b'.',), want_meta=True))
1162 assert len(items) == 1
1163 assert items[0][0] == b'.'
1167 def ensure_item_has_metadata(repo, item, include_size=False):
1168 """If item is a directory, attempt to find and add its metadata. If
1169 the item still doesn't have a Metadata instance for item.meta,
1170 give it one via augment_item_meta(). May be useful for the output
1174 return augment_item_meta(repo,
1175 fill_in_metadata_if_dir(repo, item),
1176 include_size=include_size)