1 """Virtual File System interface to bup repository content.
3 This module provides a path-based interface to the content of a bup
6 The VFS is structured like this:
9 /SAVE-NAME/SAVE-DATE/...
12 Each path is represented by an item that has least an item.meta which
13 may be either a Metadata object, or an integer mode. Functions like
14 item_mode() and item_size() will return the mode and size in either
15 case. Any item.meta Metadata instances must not be modified directly.
16 Make a copy to modify via item.meta.copy() if needed, or call
19 The want_meta argument is advisory for calls that accept it, and it
20 may not be honored. Callers must be able to handle an item.meta value
21 that is either an instance of Metadata or an integer mode, perhaps
22 via item_mode() or augment_item_meta().
24 Setting want_meta=False is rarely desirable since it can limit the VFS
25 to only the metadata that git itself can represent, and so for
26 example, fifos and sockets will appear to be regular files
27 (e.g. S_ISREG(item_mode(item)) will be true). But the option is still
28 provided because it may be more efficient when just the path names or
29 the more limited metadata is sufficient.
31 Any given metadata object's size may be None, in which case the size
32 can be computed via item_size() or augment_item_meta(...,
35 When traversing a directory using functions like contents(), the meta
36 value for any directories other than '.' will be a default directory
37 mode, not a Metadata object. This is because the actual metadata for
38 a directory is stored inside the directory (see
39 fill_in_metadata_if_dir() or ensure_item_has_metadata()).
41 Commit items represent commits (e.g. /.tag/some-commit or
42 /foo/latest), and for most purposes, they appear as the underlying
43 tree. S_ISDIR(item_mode(item)) will return true for both tree Items
44 and Commits and the commit's oid is the tree hash; the commit hash is
49 from __future__ import absolute_import, print_function
50 from binascii import hexlify, unhexlify
51 from collections import namedtuple
52 from errno import EINVAL, ELOOP, ENOTDIR
53 from itertools import chain, groupby, tee
54 from random import randrange
55 from stat import S_IFDIR, S_IFLNK, S_IFREG, S_ISDIR, S_ISLNK, S_ISREG
56 from time import localtime, strftime
59 from bup import git, vint
60 from bup.compat import hexstr, range, str_type
61 from bup.git import BUP_CHUNKED, parse_commit, tree_decode
62 from bup.helpers import debug2, last
63 from bup.io import path_msg
64 from bup.metadata import Metadata
65 from bup.vint import read_bvec, write_bvec
66 from bup.vint import read_vint, write_vint
67 from bup.vint import read_vuint, write_vuint
69 if sys.version_info[0] < 3:
70 from exceptions import IOError as py_IOError
74 # We currently assume that it's always appropriate to just forward IOErrors
77 class IOError(py_IOError):
78 def __init__(self, errno, message, terminus=None):
79 py_IOError.__init__(self, errno, message)
80 self.terminus = terminus
82 def write_ioerror(port, ex):
83 assert isinstance(ex, IOError)
85 (1 if ex.errno is not None else 0)
86 | (2 if ex.strerror is not None else 0)
87 | (4 if ex.terminus is not None else 0))
88 if ex.errno is not None:
89 write_vint(port, ex.errno)
90 if ex.strerror is not None:
91 write_bvec(port, ex.strerror.encode('utf-8'))
92 if ex.terminus is not None:
93 write_resolution(port, ex.terminus)
95 def read_ioerror(port):
96 mask = read_vuint(port)
97 no = read_vint(port) if 1 & mask else None
98 msg = read_bvec(port).decode('utf-8') if 2 & mask else None
99 term = read_resolution(port) if 4 & mask else None
100 return IOError(errno=no, message=msg, terminus=term)
103 default_file_mode = S_IFREG | 0o644
104 default_dir_mode = S_IFDIR | 0o755
105 default_symlink_mode = S_IFLNK | 0o755
107 def _default_mode_for_gitmode(gitmode):
109 return default_file_mode
111 return default_dir_mode
113 return default_symlink_mode
114 raise Exception('unexpected git mode ' + oct(gitmode))
116 def _normal_or_chunked_file_size(repo, oid):
117 """Return the size of the normal or chunked file indicated by oid."""
118 # FIXME: --batch-format CatPipe?
119 it = repo.cat(hexlify(oid))
120 _, obj_t, size = next(it)
122 while obj_t == b'tree':
123 mode, name, last_oid = last(tree_decode(b''.join(it)))
125 it = repo.cat(hexlify(last_oid))
126 _, obj_t, size = next(it)
127 return ofs + sum(len(b) for b in it)
129 def _skip_chunks_before_offset(tree, offset):
130 prev_ent = next(tree, None)
135 ent_ofs = int(ent[1], 16)
137 return chain([prev_ent, ent], tree)
138 if ent_ofs == offset:
139 return chain([ent], tree)
143 def _tree_chunks(repo, tree, startofs):
144 "Tree should be a sequence of (name, mode, hash) as per tree_decode()."
145 assert(startofs >= 0)
146 # name is the chunk's hex offset in the original file
147 for mode, name, oid in _skip_chunks_before_offset(tree, startofs):
149 skipmore = startofs - ofs
152 it = repo.cat(hexlify(oid))
153 _, obj_t, size = next(it)
156 assert obj_t == b'tree'
157 for b in _tree_chunks(repo, tree_decode(data), skipmore):
160 assert obj_t == b'blob'
161 yield data[skipmore:]
164 def __init__(self, repo, oid, startofs):
165 it = repo.cat(hexlify(oid))
166 _, obj_t, size = next(it)
167 isdir = obj_t == b'tree'
170 self.it = _tree_chunks(repo, tree_decode(data), startofs)
174 self.blob = data[startofs:]
177 def next(self, size):
179 while len(out) < size:
180 if self.it and not self.blob:
182 self.blob = next(self.it)
183 except StopIteration:
186 want = size - len(out)
187 out += self.blob[:want]
188 self.blob = self.blob[want:]
191 debug2('next(%d) returned %d\n' % (size, len(out)))
195 class _FileReader(object):
196 def __init__(self, repo, oid, known_size=None):
197 assert len(oid) == 20
202 self._size = known_size
204 def _compute_size(self):
206 self._size = _normal_or_chunked_file_size(self._repo, self.oid)
210 if ofs < 0 or ofs > self._compute_size():
211 raise IOError(EINVAL, 'Invalid seek offset: %d' % ofs)
217 def read(self, count=-1):
218 size = self._compute_size()
222 count = size - self.ofs
223 if not self.reader or self.reader.ofs != self.ofs:
224 self.reader = _ChunkReader(self._repo, self.oid, self.ofs)
226 buf = self.reader.next(count)
229 raise # our offsets will be all screwed up otherwise
238 def __exit__(self, type, value, traceback):
242 _multiple_slashes_rx = re.compile(br'//+')
244 def _decompose_path(path):
245 """Return a boolean indicating whether the path is absolute, and a
246 reversed list of path elements, omitting any occurrences of "."
247 and ignoring any leading or trailing slash. If the path is
248 effectively '/' or '.', return an empty list.
251 path = re.sub(_multiple_slashes_rx, b'/', path)
253 return True, True, []
254 is_absolute = must_be_dir = False
255 if path.startswith(b'/'):
258 for suffix in (b'/', b'/.'):
259 if path.endswith(suffix):
261 path = path[:-len(suffix)]
262 parts = [x for x in path.split(b'/') if x != b'.']
265 must_be_dir = True # e.g. path was effectively '.' or '/', etc.
266 return is_absolute, must_be_dir, parts
269 Item = namedtuple('Item', ('meta', 'oid'))
270 Chunky = namedtuple('Chunky', ('meta', 'oid'))
271 FakeLink = namedtuple('FakeLink', ('meta', 'target'))
272 Root = namedtuple('Root', ('meta'))
273 Tags = namedtuple('Tags', ('meta'))
274 RevList = namedtuple('RevList', ('meta', 'oid'))
275 Commit = namedtuple('Commit', ('meta', 'oid', 'coid'))
277 item_types = (Item, Chunky, Root, Tags, RevList, Commit)
278 real_tree_types = (Item, Commit)
280 def write_item(port, item):
282 name = bytes(kind.__name__.encode('ascii'))
284 has_meta = 1 if isinstance(meta, Metadata) else 0
285 if kind in (Item, Chunky, RevList):
286 assert len(item.oid) == 20
288 vint.send(port, 'sVs', name, has_meta, item.oid)
289 Metadata.write(meta, port, include_path=False)
291 vint.send(port, 'sVsV', name, has_meta, item.oid, item.meta)
292 elif kind in (Root, Tags):
294 vint.send(port, 'sV', name, has_meta)
295 Metadata.write(meta, port, include_path=False)
297 vint.send(port, 'sVV', name, has_meta, item.meta)
299 assert len(item.oid) == 20
300 assert len(item.coid) == 20
302 vint.send(port, 'sVss', name, has_meta, item.oid, item.coid)
303 Metadata.write(meta, port, include_path=False)
305 vint.send(port, 'sVssV', name, has_meta, item.oid, item.coid,
307 elif kind == FakeLink:
309 vint.send(port, 'sVs', name, has_meta, item.target)
310 Metadata.write(meta, port, include_path=False)
312 vint.send(port, 'sVsV', name, has_meta, item.target, item.meta)
317 def read_m(port, has_meta):
319 m = Metadata.read(port)
321 return read_vuint(port)
322 kind, has_meta = vint.recv(port, 'sV')
324 oid, meta = read_bvec(port), read_m(port, has_meta)
325 return Item(oid=oid, meta=meta)
326 if kind == b'Chunky':
327 oid, meta = read_bvec(port), read_m(port, has_meta)
328 return Chunky(oid=oid, meta=meta)
329 if kind == b'RevList':
330 oid, meta = read_bvec(port), read_m(port, has_meta)
331 return RevList(oid=oid, meta=meta)
333 return Root(meta=read_m(port, has_meta))
335 return Tags(meta=read_m(port, has_meta))
336 if kind == b'Commit':
337 oid, coid = vint.recv(port, 'ss')
338 meta = read_m(port, has_meta)
339 return Commit(oid=oid, coid=coid, meta=meta)
340 if kind == b'FakeLink':
341 target, meta = read_bvec(port), read_m(port, has_meta)
342 return FakeLink(target=target, meta=meta)
345 def write_resolution(port, resolution):
346 write_vuint(port, len(resolution))
347 for name, item in resolution:
348 write_bvec(port, name)
351 write_item(port, item)
355 def read_resolution(port):
359 name = read_bvec(port)
360 have_item = ord(port.read(1))
361 assert have_item in (0, 1)
362 item = read_item(port) if have_item else None
363 result.append((name, item))
367 _root = Root(meta=default_dir_mode)
368 _tags = Tags(meta=default_dir_mode)
373 ### A general purpose shared cache with (currently) cheap random
374 ### eviction. At the moment there is no weighting so a single commit
375 ### item is just as likely to be evicted as an entire "rev-list". See
376 ### is_valid_cache_key for a description of the expected content.
380 _cache_max_items = 30000
383 global _cache, _cache_keys
387 def is_valid_cache_key(x):
388 """Return logically true if x looks like it could be a valid cache key
389 (with respect to structure). Current valid cache entries:
390 res:... -> resolution
392 rvl:OID -> {'.', commit, '2012...', next_commit, ...}
394 # Suspect we may eventually add "(container_oid, name) -> ...", and others.
395 if isinstance(x, bytes):
397 if tag in (b'itm:', b'rvl:') and len(x) == 24:
404 if not is_valid_cache_key(key):
405 raise Exception('invalid cache key: ' + repr(key))
406 return _cache.get(key)
408 def cache_notice(key, value, overwrite=False):
409 global _cache, _cache_keys, _cache_max_items
410 if not is_valid_cache_key(key):
411 raise Exception('invalid cache key: ' + repr(key))
416 if len(_cache) < _cache_max_items:
417 _cache_keys.append(key)
420 victim_i = randrange(0, len(_cache_keys))
421 victim = _cache_keys[victim_i]
423 _cache_keys[victim_i] = key
426 def _has_metadata_if_needed(item, need_meta):
429 if isinstance(item.meta, Metadata):
433 def cache_get_commit_item(oid, need_meta=True):
434 """Return the requested tree item if it can be found in the cache.
435 When need_meta is true don't return a cached item that only has a
437 # tree might be stored independently, or as '.' with its entries.
438 commit_key = b'itm:' + oid
439 item = cache_get(commit_key)
441 if _has_metadata_if_needed(item, need_meta):
443 entries = cache_get(b'rvl:' + oid)
446 if _has_metadata_if_needed(item, need_meta):
451 """Return a completely independent copy of item, such that
452 modifications will not affect the original.
455 meta = getattr(item, 'meta', None)
456 if isinstance(meta, Metadata):
457 return(item._replace(meta=meta.copy()))
461 """Return the integer mode (stat st_mode) for item."""
463 if isinstance(m, Metadata):
467 def _read_dir_meta(bupm):
468 # This is because save writes unmodified Metadata() entries for
469 # fake parents -- test-save-strip-graft.sh demonstrates.
470 m = Metadata.read(bupm)
472 return default_dir_mode
473 assert m.mode is not None
478 def tree_data_and_bupm(repo, oid):
479 """Return (tree_bytes, bupm_oid) where bupm_oid will be None if the
480 tree has no metadata (i.e. older bup save, or non-bup tree).
483 assert len(oid) == 20
484 it = repo.cat(hexlify(oid))
485 _, item_t, size = next(it)
487 if item_t == b'commit':
488 commit = parse_commit(data)
489 it = repo.cat(commit.tree)
490 _, item_t, size = next(it)
492 assert item_t == b'tree'
493 elif item_t != b'tree':
494 raise Exception('%s is not a tree or commit' % hexstr(oid))
495 for _, mangled_name, sub_oid in tree_decode(data):
496 if mangled_name == b'.bupm':
498 if mangled_name > b'.bupm':
502 def _find_treeish_oid_metadata(repo, oid):
503 """Return the metadata for the tree or commit oid, or None if the tree
504 has no metadata (i.e. older bup save, or non-bup tree).
507 tree_data, bupm_oid = tree_data_and_bupm(repo, oid)
509 with _FileReader(repo, bupm_oid) as meta_stream:
510 return _read_dir_meta(meta_stream)
513 def _readlink(repo, oid):
514 return b''.join(repo.join(hexlify(oid)))
516 def readlink(repo, item):
517 """Return the link target of item, which must be a symlink. Reads the
518 target from the repository if necessary."""
520 assert S_ISLNK(item_mode(item))
521 if isinstance(item, FakeLink):
523 if isinstance(item.meta, Metadata):
524 target = item.meta.symlink_target
527 return _readlink(repo, item.oid)
529 def _compute_item_size(repo, item):
530 mode = item_mode(item)
532 size = _normal_or_chunked_file_size(repo, item.oid)
535 if isinstance(item, FakeLink):
536 return len(item.target)
537 return len(_readlink(repo, item.oid))
540 def item_size(repo, item):
541 """Return the size of item, computing it if necessary."""
543 if isinstance(m, Metadata) and m.size is not None:
545 return _compute_item_size(repo, item)
547 def tree_data_reader(repo, oid):
548 """Return an open reader for all of the data contained within oid. If
549 oid refers to a tree, recursively concatenate all of its contents."""
550 return _FileReader(repo, oid)
552 def fopen(repo, item):
553 """Return an open reader for the given file item."""
554 assert S_ISREG(item_mode(item))
555 return tree_data_reader(repo, item.oid)
557 def _commit_item_from_data(oid, data):
558 info = parse_commit(data)
559 return Commit(meta=default_dir_mode,
560 oid=unhexlify(info.tree),
563 def _commit_item_from_oid(repo, oid, require_meta):
564 commit = cache_get_commit_item(oid, need_meta=require_meta)
565 if commit and ((not require_meta) or isinstance(commit.meta, Metadata)):
567 it = repo.cat(hexlify(oid))
568 _, typ, size = next(it)
569 assert typ == b'commit'
570 commit = _commit_item_from_data(oid, b''.join(it))
572 meta = _find_treeish_oid_metadata(repo, commit.oid)
574 commit = commit._replace(meta=meta)
575 commit_key = b'itm:' + oid
576 cache_notice(commit_key, commit, overwrite=True)
579 def _revlist_item_from_oid(repo, oid, require_meta):
580 commit = _commit_item_from_oid(repo, oid, require_meta)
581 return RevList(oid=oid, meta=commit.meta)
583 def root_items(repo, names=None, want_meta=True):
584 """Yield (name, item) for the items in '/' in the VFS. Return
585 everything if names is logically false, otherwise return only
586 items with a name in the collection.
589 # FIXME: what about non-leaf refs like 'refs/heads/foo/bar/baz?
595 # FIXME: maybe eventually support repo.clone() or something
596 # and pass in two repos, so we can drop the tuple() and stream
597 # in parallel (i.e. meta vs refs).
598 for name, oid in tuple(repo.refs([], limit_to_heads=True)):
599 assert(name.startswith(b'refs/heads/'))
600 yield name[11:], _revlist_item_from_oid(repo, oid, want_meta)
608 if ref in (b'.', b'.tag'):
610 it = repo.cat(b'refs/heads/' + ref)
611 oidx, typ, size = next(it)
615 assert typ == b'commit'
616 commit = parse_commit(b''.join(it))
617 yield ref, _revlist_item_from_oid(repo, unhexlify(oidx), want_meta)
619 def ordered_tree_entries(tree_data, bupm=None):
620 """Yields (name, mangled_name, kind, gitmode, oid) for each item in
621 tree, sorted by name.
624 # Sadly, the .bupm entries currently aren't in git tree order,
625 # but in unmangled name order. They _do_ account for the fact
626 # that git sorts trees (including chunked trees) as if their
627 # names ended with "/" (so "fo" sorts after "fo." iff fo is a
628 # directory), but we apply this on the unmangled names in save
629 # rather than on the mangled names.
630 # This makes streaming impossible when we need the metadata.
631 def result_from_tree_entry(tree_entry):
632 gitmode, mangled_name, oid = tree_entry
633 name, kind = git.demangle_name(mangled_name, gitmode)
634 return name, mangled_name, kind, gitmode, oid
636 tree_ents = (result_from_tree_entry(x) for x in tree_decode(tree_data))
638 tree_ents = sorted(tree_ents, key=lambda x: x[0])
639 for ent in tree_ents:
642 def tree_items(oid, tree_data, names=frozenset(), bupm=None):
644 def tree_item(ent_oid, kind, gitmode):
645 if kind == BUP_CHUNKED:
646 meta = Metadata.read(bupm) if bupm else default_file_mode
647 return Chunky(oid=ent_oid, meta=meta)
650 # No metadata here (accessable via '.' inside ent_oid).
651 return Item(meta=default_dir_mode, oid=ent_oid)
653 meta = Metadata.read(bupm) if bupm else None
654 # handle the case of metadata being empty/missing in bupm
655 # (or there not being bupm at all)
657 meta = _default_mode_for_gitmode(gitmode)
658 return Item(oid=ent_oid, meta=meta)
660 assert len(oid) == 20
662 dot_meta = _read_dir_meta(bupm) if bupm else default_dir_mode
663 yield b'.', Item(oid=oid, meta=dot_meta)
664 tree_entries = ordered_tree_entries(tree_data, bupm)
665 for name, mangled_name, kind, gitmode, ent_oid in tree_entries:
666 if mangled_name == b'.bupm':
669 yield name, tree_item(ent_oid, kind, gitmode)
672 # Assumes the tree is properly formed, i.e. there are no
673 # duplicates, and entries will be in git tree order.
674 if isinstance(names, (frozenset, set)):
675 names = frozenset(names)
676 remaining = len(names)
678 # Account for the bupm sort order issue (cf. ordered_tree_entries above)
679 last_name = max(names) if bupm else max(names) + b'/'
682 dot_meta = _read_dir_meta(bupm) if bupm else default_dir_mode
683 yield b'.', Item(oid=oid, meta=dot_meta)
688 tree_entries = ordered_tree_entries(tree_data, bupm)
689 for name, mangled_name, kind, gitmode, ent_oid in tree_entries:
690 if mangled_name == b'.bupm':
693 if name not in names:
695 break # given bupm sort order, we're finished
696 if (kind == BUP_CHUNKED or not S_ISDIR(gitmode)) and bupm:
699 yield name, tree_item(ent_oid, kind, gitmode)
704 def tree_items_with_meta(repo, oid, tree_data, names):
705 # For now, the .bupm order doesn't quite match git's, and we don't
706 # load the tree data incrementally anyway, so we just work in RAM
708 assert len(oid) == 20
710 for _, mangled_name, sub_oid in tree_decode(tree_data):
711 if mangled_name == b'.bupm':
712 bupm = _FileReader(repo, sub_oid)
714 if mangled_name > b'.bupm':
716 for item in tree_items(oid, tree_data, names, bupm):
719 _save_name_rx = re.compile(br'^\d\d\d\d-\d\d-\d\d-\d{6}(-\d+)?$')
721 def _reverse_suffix_duplicates(strs):
722 """Yields the elements of strs, with any runs of duplicate values
723 suffixed with -N suffixes, where the zero padded integer N
724 decreases to 0 by 1 (e.g. 10, 09, ..., 00).
727 for name, duplicates in groupby(strs):
728 ndup = len(tuple(duplicates))
732 ndig = len(str(ndup - 1))
733 fmt = b'%s-' + b'%0' + (b'%d' % ndig) + b'd'
734 for i in range(ndup - 1, -1, -1):
735 yield fmt % (name, i)
738 items = f.readline().split(None)
739 assert len(items) == 2
740 tree, auth_sec = items
741 return unhexlify(tree), int(auth_sec)
743 def _name_for_rev(rev):
744 commit_oidx, (tree_oid, utc) = rev
745 return strftime('%Y-%m-%d-%H%M%S', localtime(utc)).encode('ascii')
747 def _item_for_rev(rev):
748 commit_oidx, (tree_oid, utc) = rev
749 coid = unhexlify(commit_oidx)
750 item = cache_get_commit_item(coid, need_meta=False)
753 item = Commit(meta=default_dir_mode, oid=tree_oid, coid=coid)
754 commit_key = b'itm:' + coid
755 cache_notice(commit_key, item)
758 # non-string singleton
759 _HAS_META_ENTRY = object()
761 def cache_commit(repo, oid, require_meta=True):
762 """Build, cache, and return a "name -> commit_item" dict of the entire
767 entries[b'.'] = _revlist_item_from_oid(repo, oid, require_meta)
768 revs = repo.rev_list((hexlify(oid),), format=b'%T %at',
770 rev_items, rev_names = tee(revs)
771 revs = None # Don't disturb the tees
772 rev_names = _reverse_suffix_duplicates(_name_for_rev(x) for x in rev_names)
773 rev_items = (_item_for_rev(x) for x in rev_items)
775 for item in rev_items:
776 name = next(rev_names)
777 tip = tip or (name, item)
779 entries[b'latest'] = FakeLink(meta=default_symlink_mode, target=tip[0])
780 revlist_key = b'rvl:' + tip[1].coid
781 entries[_HAS_META_ENTRY] = require_meta
782 cache_notice(revlist_key, entries, overwrite=True)
785 def revlist_items(repo, oid, names, require_meta=True):
786 assert len(oid) == 20
788 # Special case '.' instead of caching the whole history since it's
789 # the only way to get the metadata for the commit.
790 if names and all(x == b'.' for x in names):
791 yield b'.', _revlist_item_from_oid(repo, oid, require_meta)
794 # For now, don't worry about the possibility of the contents being
795 # "too big" for the cache.
796 revlist_key = b'rvl:' + oid
797 entries = cache_get(revlist_key)
798 if entries and require_meta and not entries[_HAS_META_ENTRY]:
801 entries = cache_commit(repo, oid, require_meta)
804 for name in sorted((n for n in entries.keys() if n != _HAS_META_ENTRY)):
805 yield name, entries[name]
808 names = frozenset(name for name in names
809 if _save_name_rx.match(name) or name in (b'.', b'latest'))
812 yield b'.', entries[b'.']
813 for name in (n for n in names if n != b'.'):
814 if name == _HAS_META_ENTRY:
816 commit = entries.get(name)
820 def tags_items(repo, names):
824 assert len(oid) == 20
827 _, typ, size = next(it)
829 return cache_get_commit_item(oid, need_meta=False) \
830 or _commit_item_from_data(oid, b''.join(it))
833 return Item(meta=default_file_mode, oid=oid)
835 return Item(meta=default_dir_mode, oid=oid)
836 raise Exception('unexpected tag type ' + typ.decode('ascii')
837 + ' for tag ' + path_msg(name))
841 # We have to pull these all into ram because tag_item calls cat()
842 for name, oid in tuple(repo.refs(names, limit_to_tags=True)):
843 assert(name.startswith(b'refs/tags/'))
845 yield name, tag_item(oid)
848 # Assumes no duplicate refs
849 if isinstance(names, (frozenset, set)):
850 names = frozenset(names)
851 remaining = len(names)
852 last_name = max(names)
859 for name, oid in repo.refs(names, limit_to_tags=True):
860 assert(name.startswith(b'refs/tags/'))
864 if name not in names:
866 yield name, tag_item(oid)
871 def contents(repo, item, names=None, want_meta=True):
872 """Yields information about the items contained in item. Yields
873 (name, item) for each name in names, if the name exists, in an
874 unspecified order. If there are no names, then yields (name,
875 item) for all items, including, a first item named '.'
876 representing the container itself.
878 The meta value for any directories other than '.' will be a
879 default directory mode, not a Metadata object. This is because
880 the actual metadata for a directory is stored inside the directory
881 (see fill_in_metadata_if_dir() or ensure_item_has_metadata()).
883 Note that want_meta is advisory. For any given item, item.meta
884 might be a Metadata instance or a mode, and if the former,
885 meta.size might be None. Missing sizes can be computed via via
886 item_size() or augment_item_meta(..., include_size=True).
888 Do not modify any item.meta Metadata instances directly. If
889 needed, make a copy via item.meta.copy() and modify that instead.
892 # Q: are we comfortable promising '.' first when no names?
895 assert S_ISDIR(item_mode(item))
896 if isinstance(item, real_tree_types):
897 it = repo.cat(hexlify(item.oid))
898 _, obj_t, size = next(it)
902 # Note: it shouldn't be possible to see an Item with type
903 # 'commit' since a 'commit' should always produce a Commit.
904 raise Exception('unexpected git ' + obj_t.decode('ascii'))
906 item_gen = tree_items_with_meta(repo, item.oid, data, names)
908 item_gen = tree_items(item.oid, data, names)
909 elif isinstance(item, RevList):
910 item_gen = revlist_items(repo, item.oid, names,
911 require_meta=want_meta)
912 elif isinstance(item, Root):
913 item_gen = root_items(repo, names, want_meta)
914 elif isinstance(item, Tags):
915 item_gen = tags_items(repo, names)
917 raise Exception('unexpected VFS item ' + str(item))
921 def _resolve_path(repo, path, parent=None, want_meta=True, follow=True):
922 cache_key = b'res:%d%d%d:%s\0%s' \
923 % (bool(want_meta), bool(follow), repo.id(),
924 (b'/'.join(x[0] for x in parent) if parent else b''),
926 resolution = cache_get(cache_key)
930 def notice_resolution(r):
931 cache_notice(cache_key, r)
934 def raise_dir_required_but_not_dir(path, parent, past):
935 raise IOError(ENOTDIR,
936 "path %s%s resolves to non-directory %r"
938 ' (relative to %r)' % parent if parent else '',
947 assert isinstance(x[0], (bytes, str_type))
948 assert isinstance(x[1], item_types)
949 assert parent[0][1] == _root
950 if not S_ISDIR(item_mode(parent[-1][1])):
951 raise IOError(ENOTDIR,
952 'path resolution parent %r is not a directory'
954 is_absolute, must_be_dir, future = _decompose_path(path)
957 if not future: # path was effectively '.' or '/'
959 return notice_resolution(((b'', _root),))
961 return notice_resolution(tuple(parent))
962 return notice_resolution(((b'', _root),))
964 past = [(b'', _root)]
966 past = list(parent) if parent else [(b'', _root)]
970 if must_be_dir and not S_ISDIR(item_mode(past[-1][1])):
971 raise_dir_required_but_not_dir(path, parent, past)
972 return notice_resolution(tuple(past))
973 segment = future.pop()
976 if len(past) > 1: # .. from / is /
977 assert S_ISDIR(item_mode(past[-1][1]))
980 parent_name, parent_item = past[-1]
981 wanted = (segment,) if not want_meta else (b'.', segment)
982 items = tuple(contents(repo, parent_item, names=wanted,
983 want_meta=want_meta))
985 item = items[0][1] if items else None
986 else: # First item will be '.' and have the metadata
987 item = items[1][1] if len(items) == 2 else None
988 dot, dot_item = items[0]
990 past[-1] = parent_name, parent_item
992 past.append((segment, None),)
993 return notice_resolution(tuple(past))
994 mode = item_mode(item)
995 if not S_ISLNK(mode):
996 if not S_ISDIR(mode):
997 past.append((segment, item),)
999 raise IOError(ENOTDIR,
1000 'path %r%s ends internally in non-directory here: %r'
1002 ' (relative to %r)' % parent if parent else '',
1006 raise_dir_required_but_not_dir(path, parent, past)
1007 return notice_resolution(tuple(past))
1009 if want_meta and isinstance(item, real_tree_types):
1010 dir_meta = _find_treeish_oid_metadata(repo, item.oid)
1012 item = item._replace(meta=dir_meta)
1013 past.append((segment, item))
1015 if not future and not follow:
1016 past.append((segment, item),)
1019 raise IOError(ELOOP,
1020 'too many symlinks encountered while resolving %r%s'
1021 % (path, ' relative to %r' % parent if parent else ''),
1022 terminus=tuple(past + [(segment, item)]))
1023 target = readlink(repo, item)
1024 is_absolute, _, target_future = _decompose_path(target)
1026 if not target_future: # path was effectively '/'
1027 return notice_resolution(((b'', _root),))
1028 past = [(b'', _root)]
1029 future = target_future
1031 future.extend(target_future)
1034 def resolve(repo, path, parent=None, want_meta=True, follow=True):
1035 """Follow the path in the virtual filesystem and return a tuple
1036 representing the location, if any, denoted by the path. Each
1037 element in the result tuple will be (name, info), where info will
1038 be a VFS item that can be passed to functions like item_mode().
1040 If follow is false, and if the final path element is a symbolic
1041 link, don't follow it, just return it in the result.
1043 If a path segment that does not exist is encountered during
1044 resolution, the result will represent the location of the missing
1045 item, and that item in the result will be None.
1047 Any attempt to traverse a non-directory will raise a VFS ENOTDIR
1050 Any symlinks along the path, including at the end, will be
1051 resolved. A VFS IOError with the errno attribute set to ELOOP
1052 will be raised if too many symlinks are traversed while following
1053 the path. That exception is effectively like a normal
1054 ELOOP IOError exception, but will include a terminus element
1055 describing the location of the failure, which will be a tuple of
1056 (name, info) elements.
1058 The parent, if specified, must be a sequence of (name, item)
1059 tuples, and will provide the starting point for the resolution of
1060 the path. If no parent is specified, resolution will start at
1063 The result may include elements of parent directly, so they must
1064 not be modified later. If this is a concern, pass in "name,
1065 copy_item(item) for name, item in parent" instead.
1067 When want_meta is true, detailed metadata will be included in each
1068 result item if it's avaiable, otherwise item.meta will be an
1069 integer mode. The metadata size may or may not be provided, but
1070 can be computed by item_size() or augment_item_meta(...,
1071 include_size=True). Setting want_meta=False is rarely desirable
1072 since it can limit the VFS to just the metadata git itself can
1073 represent, and so, as an example, fifos and sockets will appear to
1074 be regular files (e.g. S_ISREG(item_mode(item)) will be true) .
1075 But the option is provided because it may be more efficient when
1076 only the path names or the more limited metadata is sufficient.
1078 Do not modify any item.meta Metadata instances directly. If
1079 needed, make a copy via item.meta.copy() and modify that instead.
1082 if repo.is_remote():
1083 # Redirect to the more efficient remote version
1084 return repo.resolve(path, parent=parent, want_meta=want_meta,
1086 result = _resolve_path(repo, path, parent=parent, want_meta=want_meta,
1088 _, leaf_item = result[-1]
1089 if leaf_item and follow:
1090 assert not S_ISLNK(item_mode(leaf_item))
1093 def try_resolve(repo, path, parent=None, want_meta=True):
1094 """If path does not refer to a symlink, does not exist, or refers to a
1095 valid symlink, behave exactly like resolve(..., follow=True). If
1096 path refers to an invalid symlink, behave like resolve(...,
1100 res = resolve(repo, path, parent=parent, want_meta=want_meta, follow=False)
1101 leaf_name, leaf_item = res[-1]
1104 if not S_ISLNK(item_mode(leaf_item)):
1106 follow = resolve(repo, leaf_name, parent=res[:-1], want_meta=want_meta)
1107 follow_name, follow_item = follow[-1]
1112 def augment_item_meta(repo, item, include_size=False):
1113 """Ensure item has a Metadata instance for item.meta. If item.meta is
1114 currently a mode, replace it with a compatible "fake" Metadata
1115 instance. If include_size is true, ensure item.meta.size is
1116 correct, computing it if needed. If item.meta is a Metadata
1117 instance, this call may modify it in place or replace it.
1120 # If we actually had parallelism, we'd need locking...
1123 if isinstance(m, Metadata):
1124 if include_size and m.size is None:
1125 m.size = _compute_item_size(repo, item)
1126 return item._replace(meta=m)
1131 meta.uid = meta.gid = None
1132 meta.atime = meta.mtime = meta.ctime = 0
1134 if isinstance(item, FakeLink):
1135 target = item.target
1137 target = _readlink(repo, item.oid)
1138 meta.symlink_target = target
1139 meta.size = len(target)
1141 meta.size = _compute_item_size(repo, item)
1142 return item._replace(meta=meta)
1144 def fill_in_metadata_if_dir(repo, item):
1145 """If item is a directory and item.meta is not a Metadata instance,
1146 attempt to find the metadata for the directory. If found, return
1147 a new item augmented to include that metadata. Otherwise, return
1148 item. May be useful for the output of contents().
1151 if S_ISDIR(item_mode(item)) and not isinstance(item.meta, Metadata):
1152 items = tuple(contents(repo, item, (b'.',), want_meta=True))
1153 assert len(items) == 1
1154 assert items[0][0] == b'.'
1158 def ensure_item_has_metadata(repo, item, include_size=False):
1159 """If item is a directory, attempt to find and add its metadata. If
1160 the item still doesn't have a Metadata instance for item.meta,
1161 give it one via augment_item_meta(). May be useful for the output
1165 return augment_item_meta(repo,
1166 fill_in_metadata_if_dir(repo, item),
1167 include_size=include_size)