1 """Virtual File System interface to bup repository content.
3 This module provides a path-based interface to the content of a bup
6 The VFS is structured like this:
9 /SAVE-NAME/SAVE-DATE/...
12 Each path is represented by an item that has least an item.meta which
13 may be either a Metadata object, or an integer mode. Functions like
14 item_mode() and item_size() will return the mode and size in either
15 case. Any item.meta Metadata instances must not be modified directly.
16 Make a copy to modify via item.meta.copy() if needed, or call
19 The want_meta argument is advisory for calls that accept it, and it
20 may not be honored. Callers must be able to handle an item.meta value
21 that is either an instance of Metadata or an integer mode, perhaps
22 via item_mode() or augment_item_meta().
24 Setting want_meta=False is rarely desirable since it can limit the VFS
25 to only the metadata that git itself can represent, and so for
26 example, fifos and sockets will appear to be regular files
27 (e.g. S_ISREG(item_mode(item)) will be true). But the option is still
28 provided because it may be more efficient when just the path names or
29 the more limited metadata is sufficient.
31 Any given metadata object's size may be None, in which case the size
32 can be computed via item_size() or augment_item_meta(...,
35 When traversing a directory using functions like contents(), the meta
36 value for any directories other than '.' will be a default directory
37 mode, not a Metadata object. This is because the actual metadata for
38 a directory is stored inside the directory (see
39 fill_in_metadata_if_dir() or ensure_item_has_metadata()).
41 Commit items represent commits (e.g. /.tag/some-commit or
42 /foo/latest), and for most purposes, they appear as the underlying
43 tree. S_ISDIR(item_mode(item)) will return true for both tree Items
44 and Commits and the commit's oid is the tree hash; the commit hash is
49 from __future__ import absolute_import, print_function
50 from binascii import hexlify, unhexlify
51 from collections import namedtuple
52 from errno import EINVAL, ELOOP, ENOTDIR
53 from itertools import chain, groupby, tee
54 from random import randrange
55 from stat import S_IFDIR, S_IFLNK, S_IFREG, S_ISDIR, S_ISLNK, S_ISREG
56 from time import localtime, strftime
59 from bup import git, vint
60 from bup.compat import hexstr, pending_raise, range, str_type
61 from bup.git import BUP_CHUNKED, parse_commit, tree_decode
62 from bup.helpers import debug2, last
63 from bup.io import path_msg
64 from bup.metadata import Metadata
65 from bup.vint import read_bvec, write_bvec
66 from bup.vint import read_vint, write_vint
67 from bup.vint import read_vuint, write_vuint
69 if sys.version_info[0] < 3:
70 from exceptions import IOError as py_IOError
74 # We currently assume that it's always appropriate to just forward IOErrors
77 class IOError(py_IOError):
78 def __init__(self, errno, message, terminus=None):
79 py_IOError.__init__(self, errno, message)
80 self.terminus = terminus
82 def write_ioerror(port, ex):
83 assert isinstance(ex, IOError)
85 (1 if ex.errno is not None else 0)
86 | (2 if ex.strerror is not None else 0)
87 | (4 if ex.terminus is not None else 0))
88 if ex.errno is not None:
89 write_vint(port, ex.errno)
90 if ex.strerror is not None:
91 write_bvec(port, ex.strerror.encode('utf-8'))
92 if ex.terminus is not None:
93 write_resolution(port, ex.terminus)
95 def read_ioerror(port):
96 mask = read_vuint(port)
97 no = read_vint(port) if 1 & mask else None
98 msg = read_bvec(port).decode('utf-8') if 2 & mask else None
99 term = read_resolution(port) if 4 & mask else None
100 return IOError(errno=no, message=msg, terminus=term)
103 default_file_mode = S_IFREG | 0o644
104 default_dir_mode = S_IFDIR | 0o755
105 default_symlink_mode = S_IFLNK | 0o755
107 def _default_mode_for_gitmode(gitmode):
109 return default_file_mode
111 return default_dir_mode
113 return default_symlink_mode
114 raise Exception('unexpected git mode ' + oct(gitmode))
116 def _normal_or_chunked_file_size(repo, oid):
117 """Return the size of the normal or chunked file indicated by oid."""
118 # FIXME: --batch-format CatPipe?
119 it = repo.cat(hexlify(oid))
120 _, obj_t, size = next(it)
122 while obj_t == b'tree':
123 mode, name, last_oid = last(tree_decode(b''.join(it)))
125 it = repo.cat(hexlify(last_oid))
126 _, obj_t, size = next(it)
127 return ofs + sum(len(b) for b in it)
129 def _skip_chunks_before_offset(tree, offset):
130 prev_ent = next(tree, None)
135 ent_ofs = int(ent[1], 16)
137 return chain([prev_ent, ent], tree)
138 if ent_ofs == offset:
139 return chain([ent], tree)
143 def _tree_chunks(repo, tree, startofs):
144 "Tree should be a sequence of (name, mode, hash) as per tree_decode()."
145 assert(startofs >= 0)
146 # name is the chunk's hex offset in the original file
147 for mode, name, oid in _skip_chunks_before_offset(tree, startofs):
149 skipmore = startofs - ofs
152 it = repo.cat(hexlify(oid))
153 _, obj_t, size = next(it)
156 assert obj_t == b'tree'
157 for b in _tree_chunks(repo, tree_decode(data), skipmore):
160 assert obj_t == b'blob'
161 yield data[skipmore:]
164 def __init__(self, repo, oid, startofs):
165 it = repo.cat(hexlify(oid))
166 _, obj_t, size = next(it)
167 isdir = obj_t == b'tree'
170 self.it = _tree_chunks(repo, tree_decode(data), startofs)
174 self.blob = data[startofs:]
177 def next(self, size):
179 while len(out) < size:
180 if self.it and not self.blob:
182 self.blob = next(self.it)
183 except StopIteration:
186 want = size - len(out)
187 out += self.blob[:want]
188 self.blob = self.blob[want:]
191 debug2('next(%d) returned %d\n' % (size, len(out)))
195 class _FileReader(object):
196 def __init__(self, repo, oid, known_size=None):
197 assert len(oid) == 20
203 self._size = known_size
205 def _compute_size(self):
207 self._size = _normal_or_chunked_file_size(self._repo, self.oid)
211 if ofs < 0 or ofs > self._compute_size():
212 raise IOError(EINVAL, 'Invalid seek offset: %d' % ofs)
218 def read(self, count=-1):
219 size = self._compute_size()
223 count = size - self.ofs
224 if not self.reader or self.reader.ofs != self.ofs:
225 self.reader = _ChunkReader(self._repo, self.oid, self.ofs)
227 buf = self.reader.next(count)
230 raise # our offsets will be all screwed up otherwise
243 def __exit__(self, type, value, traceback):
244 with pending_raise(value, rethrow=False):
247 _multiple_slashes_rx = re.compile(br'//+')
249 def _decompose_path(path):
250 """Return a boolean indicating whether the path is absolute, and a
251 reversed list of path elements, omitting any occurrences of "."
252 and ignoring any leading or trailing slash. If the path is
253 effectively '/' or '.', return an empty list.
256 path = re.sub(_multiple_slashes_rx, b'/', path)
258 return True, True, []
259 is_absolute = must_be_dir = False
260 if path.startswith(b'/'):
263 for suffix in (b'/', b'/.'):
264 if path.endswith(suffix):
266 path = path[:-len(suffix)]
267 parts = [x for x in path.split(b'/') if x != b'.']
270 must_be_dir = True # e.g. path was effectively '.' or '/', etc.
271 return is_absolute, must_be_dir, parts
274 Item = namedtuple('Item', ('meta', 'oid'))
275 Chunky = namedtuple('Chunky', ('meta', 'oid'))
276 FakeLink = namedtuple('FakeLink', ('meta', 'target'))
277 Root = namedtuple('Root', ('meta'))
278 Tags = namedtuple('Tags', ('meta'))
279 RevList = namedtuple('RevList', ('meta', 'oid'))
280 Commit = namedtuple('Commit', ('meta', 'oid', 'coid'))
282 item_types = (Item, Chunky, Root, Tags, RevList, Commit)
283 real_tree_types = (Item, Commit)
285 def write_item(port, item):
287 name = bytes(kind.__name__.encode('ascii'))
289 has_meta = 1 if isinstance(meta, Metadata) else 0
290 if kind in (Item, Chunky, RevList):
291 assert len(item.oid) == 20
293 vint.send(port, 'sVs', name, has_meta, item.oid)
294 Metadata.write(meta, port, include_path=False)
296 vint.send(port, 'sVsV', name, has_meta, item.oid, item.meta)
297 elif kind in (Root, Tags):
299 vint.send(port, 'sV', name, has_meta)
300 Metadata.write(meta, port, include_path=False)
302 vint.send(port, 'sVV', name, has_meta, item.meta)
304 assert len(item.oid) == 20
305 assert len(item.coid) == 20
307 vint.send(port, 'sVss', name, has_meta, item.oid, item.coid)
308 Metadata.write(meta, port, include_path=False)
310 vint.send(port, 'sVssV', name, has_meta, item.oid, item.coid,
312 elif kind == FakeLink:
314 vint.send(port, 'sVs', name, has_meta, item.target)
315 Metadata.write(meta, port, include_path=False)
317 vint.send(port, 'sVsV', name, has_meta, item.target, item.meta)
322 def read_m(port, has_meta):
324 m = Metadata.read(port)
326 return read_vuint(port)
327 kind, has_meta = vint.recv(port, 'sV')
329 oid, meta = read_bvec(port), read_m(port, has_meta)
330 return Item(oid=oid, meta=meta)
331 if kind == b'Chunky':
332 oid, meta = read_bvec(port), read_m(port, has_meta)
333 return Chunky(oid=oid, meta=meta)
334 if kind == b'RevList':
335 oid, meta = read_bvec(port), read_m(port, has_meta)
336 return RevList(oid=oid, meta=meta)
338 return Root(meta=read_m(port, has_meta))
340 return Tags(meta=read_m(port, has_meta))
341 if kind == b'Commit':
342 oid, coid = vint.recv(port, 'ss')
343 meta = read_m(port, has_meta)
344 return Commit(oid=oid, coid=coid, meta=meta)
345 if kind == b'FakeLink':
346 target, meta = read_bvec(port), read_m(port, has_meta)
347 return FakeLink(target=target, meta=meta)
350 def write_resolution(port, resolution):
351 write_vuint(port, len(resolution))
352 for name, item in resolution:
353 write_bvec(port, name)
356 write_item(port, item)
360 def read_resolution(port):
364 name = read_bvec(port)
365 have_item = ord(port.read(1))
366 assert have_item in (0, 1)
367 item = read_item(port) if have_item else None
368 result.append((name, item))
372 _root = Root(meta=default_dir_mode)
373 _tags = Tags(meta=default_dir_mode)
378 ### A general purpose shared cache with (currently) cheap random
379 ### eviction. At the moment there is no weighting so a single commit
380 ### item is just as likely to be evicted as an entire "rev-list". See
381 ### is_valid_cache_key for a description of the expected content.
385 _cache_max_items = 30000
388 global _cache, _cache_keys
392 def is_valid_cache_key(x):
393 """Return logically true if x looks like it could be a valid cache key
394 (with respect to structure). Current valid cache entries:
395 res:... -> resolution
397 rvl:OID -> {'.', commit, '2012...', next_commit, ...}
399 # Suspect we may eventually add "(container_oid, name) -> ...", and others.
400 if isinstance(x, bytes):
402 if tag in (b'itm:', b'rvl:') and len(x) == 24:
410 if not is_valid_cache_key(key):
411 raise Exception('invalid cache key: ' + repr(key))
412 return _cache.get(key)
414 def cache_notice(key, value, overwrite=False):
415 global _cache, _cache_keys, _cache_max_items
416 if not is_valid_cache_key(key):
417 raise Exception('invalid cache key: ' + repr(key))
422 if len(_cache) < _cache_max_items:
423 _cache_keys.append(key)
426 victim_i = randrange(0, len(_cache_keys))
427 victim = _cache_keys[victim_i]
429 _cache_keys[victim_i] = key
432 def _has_metadata_if_needed(item, need_meta):
435 if isinstance(item.meta, Metadata):
439 def cache_get_commit_item(oid, need_meta=True):
440 """Return the requested tree item if it can be found in the cache.
441 When need_meta is true don't return a cached item that only has a
443 # tree might be stored independently, or as '.' with its entries.
444 commit_key = b'itm:' + oid
445 item = cache_get(commit_key)
447 if _has_metadata_if_needed(item, need_meta):
449 entries = cache_get(b'rvl:' + oid)
452 if _has_metadata_if_needed(item, need_meta):
457 """Return a completely independent copy of item, such that
458 modifications will not affect the original.
461 meta = getattr(item, 'meta', None)
462 if isinstance(meta, Metadata):
463 return(item._replace(meta=meta.copy()))
467 """Return the integer mode (stat st_mode) for item."""
469 if isinstance(m, Metadata):
473 def _read_dir_meta(bupm):
474 # This is because save writes unmodified Metadata() entries for
475 # fake parents -- test-save-strip-graft.sh demonstrates.
476 m = Metadata.read(bupm)
478 return default_dir_mode
479 assert m.mode is not None
484 def tree_data_and_bupm(repo, oid):
485 """Return (tree_bytes, bupm_oid) where bupm_oid will be None if the
486 tree has no metadata (i.e. older bup save, or non-bup tree).
489 assert len(oid) == 20
490 it = repo.cat(hexlify(oid))
491 _, item_t, size = next(it)
493 if item_t == b'commit':
494 commit = parse_commit(data)
495 it = repo.cat(commit.tree)
496 _, item_t, size = next(it)
498 assert item_t == b'tree'
499 elif item_t != b'tree':
500 raise Exception('%s is not a tree or commit' % hexstr(oid))
501 for _, mangled_name, sub_oid in tree_decode(data):
502 if mangled_name == b'.bupm':
504 if mangled_name > b'.bupm':
508 def _find_treeish_oid_metadata(repo, oid):
509 """Return the metadata for the tree or commit oid, or None if the tree
510 has no metadata (i.e. older bup save, or non-bup tree).
513 tree_data, bupm_oid = tree_data_and_bupm(repo, oid)
515 with _FileReader(repo, bupm_oid) as meta_stream:
516 return _read_dir_meta(meta_stream)
519 def _readlink(repo, oid):
520 return b''.join(repo.join(hexlify(oid)))
522 def readlink(repo, item):
523 """Return the link target of item, which must be a symlink. Reads the
524 target from the repository if necessary."""
526 assert S_ISLNK(item_mode(item))
527 if isinstance(item, FakeLink):
529 if isinstance(item.meta, Metadata):
530 target = item.meta.symlink_target
533 return _readlink(repo, item.oid)
535 def _compute_item_size(repo, item):
536 mode = item_mode(item)
538 size = _normal_or_chunked_file_size(repo, item.oid)
541 if isinstance(item, FakeLink):
542 return len(item.target)
543 return len(_readlink(repo, item.oid))
546 def item_size(repo, item):
547 """Return the size of item, computing it if necessary."""
549 if isinstance(m, Metadata) and m.size is not None:
551 return _compute_item_size(repo, item)
553 def tree_data_reader(repo, oid):
554 """Return an open reader for all of the data contained within oid. If
555 oid refers to a tree, recursively concatenate all of its contents."""
556 return _FileReader(repo, oid)
558 def fopen(repo, item):
559 """Return an open reader for the given file item."""
560 assert S_ISREG(item_mode(item))
561 return tree_data_reader(repo, item.oid)
563 def _commit_item_from_data(oid, data):
564 info = parse_commit(data)
565 return Commit(meta=default_dir_mode,
566 oid=unhexlify(info.tree),
569 def _commit_item_from_oid(repo, oid, require_meta):
570 commit = cache_get_commit_item(oid, need_meta=require_meta)
571 if commit and ((not require_meta) or isinstance(commit.meta, Metadata)):
573 it = repo.cat(hexlify(oid))
574 _, typ, size = next(it)
575 assert typ == b'commit'
576 commit = _commit_item_from_data(oid, b''.join(it))
578 meta = _find_treeish_oid_metadata(repo, commit.oid)
580 commit = commit._replace(meta=meta)
581 commit_key = b'itm:' + oid
582 cache_notice(commit_key, commit, overwrite=True)
585 def _revlist_item_from_oid(repo, oid, require_meta):
586 commit = _commit_item_from_oid(repo, oid, require_meta)
587 return RevList(oid=oid, meta=commit.meta)
589 def root_items(repo, names=None, want_meta=True):
590 """Yield (name, item) for the items in '/' in the VFS. Return
591 everything if names is logically false, otherwise return only
592 items with a name in the collection.
595 # FIXME: what about non-leaf refs like 'refs/heads/foo/bar/baz?
601 # FIXME: maybe eventually support repo.clone() or something
602 # and pass in two repos, so we can drop the tuple() and stream
603 # in parallel (i.e. meta vs refs).
604 for name, oid in tuple(repo.refs([], limit_to_heads=True)):
605 assert(name.startswith(b'refs/heads/'))
606 yield name[11:], _revlist_item_from_oid(repo, oid, want_meta)
614 if ref in (b'.', b'.tag'):
616 it = repo.cat(b'refs/heads/' + ref)
617 oidx, typ, size = next(it)
621 assert typ == b'commit'
622 commit = parse_commit(b''.join(it))
623 yield ref, _revlist_item_from_oid(repo, unhexlify(oidx), want_meta)
625 def ordered_tree_entries(tree_data, bupm=None):
626 """Yields (name, mangled_name, kind, gitmode, oid) for each item in
627 tree, sorted by name.
630 # Sadly, the .bupm entries currently aren't in git tree order,
631 # but in unmangled name order. They _do_ account for the fact
632 # that git sorts trees (including chunked trees) as if their
633 # names ended with "/" (so "fo" sorts after "fo." iff fo is a
634 # directory), but we apply this on the unmangled names in save
635 # rather than on the mangled names.
636 # This makes streaming impossible when we need the metadata.
637 def result_from_tree_entry(tree_entry):
638 gitmode, mangled_name, oid = tree_entry
639 name, kind = git.demangle_name(mangled_name, gitmode)
640 return name, mangled_name, kind, gitmode, oid
642 tree_ents = (result_from_tree_entry(x) for x in tree_decode(tree_data))
644 tree_ents = sorted(tree_ents, key=lambda x: x[0])
645 for ent in tree_ents:
648 def tree_items(oid, tree_data, names=frozenset(), bupm=None):
650 def tree_item(ent_oid, kind, gitmode):
651 if kind == BUP_CHUNKED:
652 meta = Metadata.read(bupm) if bupm else default_file_mode
653 return Chunky(oid=ent_oid, meta=meta)
656 # No metadata here (accessable via '.' inside ent_oid).
657 return Item(meta=default_dir_mode, oid=ent_oid)
659 meta = Metadata.read(bupm) if bupm else None
660 # handle the case of metadata being empty/missing in bupm
661 # (or there not being bupm at all)
663 meta = _default_mode_for_gitmode(gitmode)
664 return Item(oid=ent_oid, meta=meta)
666 assert len(oid) == 20
668 dot_meta = _read_dir_meta(bupm) if bupm else default_dir_mode
669 yield b'.', Item(oid=oid, meta=dot_meta)
670 tree_entries = ordered_tree_entries(tree_data, bupm)
671 for name, mangled_name, kind, gitmode, ent_oid in tree_entries:
672 if mangled_name == b'.bupm':
675 yield name, tree_item(ent_oid, kind, gitmode)
678 # Assumes the tree is properly formed, i.e. there are no
679 # duplicates, and entries will be in git tree order.
680 if isinstance(names, (frozenset, set)):
681 names = frozenset(names)
682 remaining = len(names)
684 # Account for the bupm sort order issue (cf. ordered_tree_entries above)
685 last_name = max(names) if bupm else max(names) + b'/'
688 dot_meta = _read_dir_meta(bupm) if bupm else default_dir_mode
689 yield b'.', Item(oid=oid, meta=dot_meta)
694 tree_entries = ordered_tree_entries(tree_data, bupm)
695 for name, mangled_name, kind, gitmode, ent_oid in tree_entries:
696 if mangled_name == b'.bupm':
699 if name not in names:
701 break # given bupm sort order, we're finished
702 if (kind == BUP_CHUNKED or not S_ISDIR(gitmode)) and bupm:
705 yield name, tree_item(ent_oid, kind, gitmode)
710 def tree_items_with_meta(repo, oid, tree_data, names):
711 # For now, the .bupm order doesn't quite match git's, and we don't
712 # load the tree data incrementally anyway, so we just work in RAM
714 assert len(oid) == 20
716 for _, mangled_name, sub_oid in tree_decode(tree_data):
717 if mangled_name == b'.bupm':
718 bupm = _FileReader(repo, sub_oid)
720 if mangled_name > b'.bupm':
722 for item in tree_items(oid, tree_data, names, bupm):
725 _save_name_rx = re.compile(br'^\d\d\d\d-\d\d-\d\d-\d{6}(-\d+)?$')
727 def _reverse_suffix_duplicates(strs):
728 """Yields the elements of strs, with any runs of duplicate values
729 suffixed with -N suffixes, where the zero padded integer N
730 decreases to 0 by 1 (e.g. 10, 09, ..., 00).
733 for name, duplicates in groupby(strs):
734 ndup = len(tuple(duplicates))
738 ndig = len(str(ndup - 1))
739 fmt = b'%s-' + b'%0' + (b'%d' % ndig) + b'd'
740 for i in range(ndup - 1, -1, -1):
741 yield fmt % (name, i)
744 items = f.readline().split(None)
745 assert len(items) == 2
746 tree, auth_sec = items
747 return unhexlify(tree), int(auth_sec)
749 def _name_for_rev(rev):
750 commit_oidx, (tree_oid, utc) = rev
751 return strftime('%Y-%m-%d-%H%M%S', localtime(utc)).encode('ascii')
753 def _item_for_rev(rev):
754 commit_oidx, (tree_oid, utc) = rev
755 coid = unhexlify(commit_oidx)
756 item = cache_get_commit_item(coid, need_meta=False)
759 item = Commit(meta=default_dir_mode, oid=tree_oid, coid=coid)
760 commit_key = b'itm:' + coid
761 cache_notice(commit_key, item)
764 # non-string singleton
765 _HAS_META_ENTRY = object()
767 def cache_commit(repo, oid, require_meta=True):
768 """Build, cache, and return a "name -> commit_item" dict of the entire
773 entries[b'.'] = _revlist_item_from_oid(repo, oid, require_meta)
774 revs = repo.rev_list((hexlify(oid),), format=b'%T %at',
776 rev_items, rev_names = tee(revs)
777 revs = None # Don't disturb the tees
778 rev_names = _reverse_suffix_duplicates(_name_for_rev(x) for x in rev_names)
779 rev_items = (_item_for_rev(x) for x in rev_items)
781 for item in rev_items:
782 name = next(rev_names)
783 tip = tip or (name, item)
785 entries[b'latest'] = FakeLink(meta=default_symlink_mode, target=tip[0])
786 revlist_key = b'rvl:' + tip[1].coid
787 entries[_HAS_META_ENTRY] = require_meta
788 cache_notice(revlist_key, entries, overwrite=True)
791 def revlist_items(repo, oid, names, require_meta=True):
792 assert len(oid) == 20
794 # Special case '.' instead of caching the whole history since it's
795 # the only way to get the metadata for the commit.
796 if names and all(x == b'.' for x in names):
797 yield b'.', _revlist_item_from_oid(repo, oid, require_meta)
800 # For now, don't worry about the possibility of the contents being
801 # "too big" for the cache.
802 revlist_key = b'rvl:' + oid
803 entries = cache_get(revlist_key)
804 if entries and require_meta and not entries[_HAS_META_ENTRY]:
807 entries = cache_commit(repo, oid, require_meta)
810 for name in sorted((n for n in entries.keys() if n != _HAS_META_ENTRY)):
811 yield name, entries[name]
814 names = frozenset(name for name in names
815 if _save_name_rx.match(name) or name in (b'.', b'latest'))
818 yield b'.', entries[b'.']
819 for name in (n for n in names if n != b'.'):
820 if name == _HAS_META_ENTRY:
822 commit = entries.get(name)
826 def tags_items(repo, names):
830 assert len(oid) == 20
833 _, typ, size = next(it)
835 return cache_get_commit_item(oid, need_meta=False) \
836 or _commit_item_from_data(oid, b''.join(it))
839 return Item(meta=default_file_mode, oid=oid)
841 return Item(meta=default_dir_mode, oid=oid)
842 raise Exception('unexpected tag type ' + typ.decode('ascii')
843 + ' for tag ' + path_msg(name))
847 # We have to pull these all into ram because tag_item calls cat()
848 for name, oid in tuple(repo.refs(names, limit_to_tags=True)):
849 assert(name.startswith(b'refs/tags/'))
851 yield name, tag_item(oid)
854 # Assumes no duplicate refs
855 if isinstance(names, (frozenset, set)):
856 names = frozenset(names)
857 remaining = len(names)
858 last_name = max(names)
865 for name, oid in repo.refs(names, limit_to_tags=True):
866 assert(name.startswith(b'refs/tags/'))
870 if name not in names:
872 yield name, tag_item(oid)
877 def contents(repo, item, names=None, want_meta=True):
878 """Yields information about the items contained in item. Yields
879 (name, item) for each name in names, if the name exists, in an
880 unspecified order. If there are no names, then yields (name,
881 item) for all items, including, a first item named '.'
882 representing the container itself.
884 The meta value for any directories other than '.' will be a
885 default directory mode, not a Metadata object. This is because
886 the actual metadata for a directory is stored inside the directory
887 (see fill_in_metadata_if_dir() or ensure_item_has_metadata()).
889 Note that want_meta is advisory. For any given item, item.meta
890 might be a Metadata instance or a mode, and if the former,
891 meta.size might be None. Missing sizes can be computed via via
892 item_size() or augment_item_meta(..., include_size=True).
894 Do not modify any item.meta Metadata instances directly. If
895 needed, make a copy via item.meta.copy() and modify that instead.
898 # Q: are we comfortable promising '.' first when no names?
901 assert S_ISDIR(item_mode(item))
902 if isinstance(item, real_tree_types):
903 it = repo.cat(hexlify(item.oid))
904 _, obj_t, size = next(it)
908 # Note: it shouldn't be possible to see an Item with type
909 # 'commit' since a 'commit' should always produce a Commit.
910 raise Exception('unexpected git ' + obj_t.decode('ascii'))
912 item_gen = tree_items_with_meta(repo, item.oid, data, names)
914 item_gen = tree_items(item.oid, data, names)
915 elif isinstance(item, RevList):
916 item_gen = revlist_items(repo, item.oid, names,
917 require_meta=want_meta)
918 elif isinstance(item, Root):
919 item_gen = root_items(repo, names, want_meta)
920 elif isinstance(item, Tags):
921 item_gen = tags_items(repo, names)
923 raise Exception('unexpected VFS item ' + str(item))
927 def _resolve_path(repo, path, parent=None, want_meta=True, follow=True):
928 cache_key = b'res:%d%d%d:%s\0%s' \
929 % (bool(want_meta), bool(follow), repo.id(),
930 (b'/'.join(x[0] for x in parent) if parent else b''),
932 resolution = cache_get(cache_key)
936 def notice_resolution(r):
937 cache_notice(cache_key, r)
940 def raise_dir_required_but_not_dir(path, parent, past):
941 raise IOError(ENOTDIR,
942 "path %s%s resolves to non-directory %r"
944 ' (relative to %r)' % parent if parent else '',
953 assert isinstance(x[0], (bytes, str_type))
954 assert isinstance(x[1], item_types)
955 assert parent[0][1] == _root
956 if not S_ISDIR(item_mode(parent[-1][1])):
957 raise IOError(ENOTDIR,
958 'path resolution parent %r is not a directory'
960 is_absolute, must_be_dir, future = _decompose_path(path)
963 if not future: # path was effectively '.' or '/'
965 return notice_resolution(((b'', _root),))
967 return notice_resolution(tuple(parent))
968 return notice_resolution(((b'', _root),))
970 past = [(b'', _root)]
972 past = list(parent) if parent else [(b'', _root)]
976 if must_be_dir and not S_ISDIR(item_mode(past[-1][1])):
977 raise_dir_required_but_not_dir(path, parent, past)
978 return notice_resolution(tuple(past))
979 segment = future.pop()
982 if len(past) > 1: # .. from / is /
983 assert S_ISDIR(item_mode(past[-1][1]))
986 parent_name, parent_item = past[-1]
987 wanted = (segment,) if not want_meta else (b'.', segment)
988 items = tuple(contents(repo, parent_item, names=wanted,
989 want_meta=want_meta))
991 item = items[0][1] if items else None
992 else: # First item will be '.' and have the metadata
993 item = items[1][1] if len(items) == 2 else None
994 dot, dot_item = items[0]
996 past[-1] = parent_name, parent_item
998 past.append((segment, None),)
999 return notice_resolution(tuple(past))
1000 mode = item_mode(item)
1001 if not S_ISLNK(mode):
1002 if not S_ISDIR(mode):
1003 past.append((segment, item),)
1005 raise IOError(ENOTDIR,
1006 'path %r%s ends internally in non-directory here: %r'
1008 ' (relative to %r)' % parent if parent else '',
1012 raise_dir_required_but_not_dir(path, parent, past)
1013 return notice_resolution(tuple(past))
1015 if want_meta and isinstance(item, real_tree_types):
1016 dir_meta = _find_treeish_oid_metadata(repo, item.oid)
1018 item = item._replace(meta=dir_meta)
1019 past.append((segment, item))
1021 if not future and not follow:
1022 past.append((segment, item),)
1025 raise IOError(ELOOP,
1026 'too many symlinks encountered while resolving %r%s'
1027 % (path, ' relative to %r' % parent if parent else ''),
1028 terminus=tuple(past + [(segment, item)]))
1029 target = readlink(repo, item)
1030 is_absolute, _, target_future = _decompose_path(target)
1032 if not target_future: # path was effectively '/'
1033 return notice_resolution(((b'', _root),))
1034 past = [(b'', _root)]
1035 future = target_future
1037 future.extend(target_future)
1040 def resolve(repo, path, parent=None, want_meta=True, follow=True):
1041 """Follow the path in the virtual filesystem and return a tuple
1042 representing the location, if any, denoted by the path. Each
1043 element in the result tuple will be (name, info), where info will
1044 be a VFS item that can be passed to functions like item_mode().
1046 If follow is false, and if the final path element is a symbolic
1047 link, don't follow it, just return it in the result.
1049 If a path segment that does not exist is encountered during
1050 resolution, the result will represent the location of the missing
1051 item, and that item in the result will be None.
1053 Any attempt to traverse a non-directory will raise a VFS ENOTDIR
1056 Any symlinks along the path, including at the end, will be
1057 resolved. A VFS IOError with the errno attribute set to ELOOP
1058 will be raised if too many symlinks are traversed while following
1059 the path. That exception is effectively like a normal
1060 ELOOP IOError exception, but will include a terminus element
1061 describing the location of the failure, which will be a tuple of
1062 (name, info) elements.
1064 The parent, if specified, must be a sequence of (name, item)
1065 tuples, and will provide the starting point for the resolution of
1066 the path. If no parent is specified, resolution will start at
1069 The result may include elements of parent directly, so they must
1070 not be modified later. If this is a concern, pass in "name,
1071 copy_item(item) for name, item in parent" instead.
1073 When want_meta is true, detailed metadata will be included in each
1074 result item if it's avaiable, otherwise item.meta will be an
1075 integer mode. The metadata size may or may not be provided, but
1076 can be computed by item_size() or augment_item_meta(...,
1077 include_size=True). Setting want_meta=False is rarely desirable
1078 since it can limit the VFS to just the metadata git itself can
1079 represent, and so, as an example, fifos and sockets will appear to
1080 be regular files (e.g. S_ISREG(item_mode(item)) will be true) .
1081 But the option is provided because it may be more efficient when
1082 only the path names or the more limited metadata is sufficient.
1084 Do not modify any item.meta Metadata instances directly. If
1085 needed, make a copy via item.meta.copy() and modify that instead.
1088 if repo.is_remote():
1089 # Redirect to the more efficient remote version
1090 return repo.resolve(path, parent=parent, want_meta=want_meta,
1092 result = _resolve_path(repo, path, parent=parent, want_meta=want_meta,
1094 _, leaf_item = result[-1]
1095 if leaf_item and follow:
1096 assert not S_ISLNK(item_mode(leaf_item))
1099 def try_resolve(repo, path, parent=None, want_meta=True):
1100 """If path does not refer to a symlink, does not exist, or refers to a
1101 valid symlink, behave exactly like resolve(..., follow=True). If
1102 path refers to an invalid symlink, behave like resolve(...,
1106 res = resolve(repo, path, parent=parent, want_meta=want_meta, follow=False)
1107 leaf_name, leaf_item = res[-1]
1110 if not S_ISLNK(item_mode(leaf_item)):
1112 follow = resolve(repo, leaf_name, parent=res[:-1], want_meta=want_meta)
1113 follow_name, follow_item = follow[-1]
1118 def augment_item_meta(repo, item, include_size=False):
1119 """Ensure item has a Metadata instance for item.meta. If item.meta is
1120 currently a mode, replace it with a compatible "fake" Metadata
1121 instance. If include_size is true, ensure item.meta.size is
1122 correct, computing it if needed. If item.meta is a Metadata
1123 instance, this call may modify it in place or replace it.
1126 # If we actually had parallelism, we'd need locking...
1129 if isinstance(m, Metadata):
1130 if include_size and m.size is None:
1131 m.size = _compute_item_size(repo, item)
1132 return item._replace(meta=m)
1137 meta.uid = meta.gid = None
1138 meta.atime = meta.mtime = meta.ctime = 0
1140 if isinstance(item, FakeLink):
1141 target = item.target
1143 target = _readlink(repo, item.oid)
1144 meta.symlink_target = target
1145 meta.size = len(target)
1147 meta.size = _compute_item_size(repo, item)
1148 return item._replace(meta=meta)
1150 def fill_in_metadata_if_dir(repo, item):
1151 """If item is a directory and item.meta is not a Metadata instance,
1152 attempt to find the metadata for the directory. If found, return
1153 a new item augmented to include that metadata. Otherwise, return
1154 item. May be useful for the output of contents().
1157 if S_ISDIR(item_mode(item)) and not isinstance(item.meta, Metadata):
1158 items = tuple(contents(repo, item, (b'.',), want_meta=True))
1159 assert len(items) == 1
1160 assert items[0][0] == b'.'
1164 def ensure_item_has_metadata(repo, item, include_size=False):
1165 """If item is a directory, attempt to find and add its metadata. If
1166 the item still doesn't have a Metadata instance for item.meta,
1167 give it one via augment_item_meta(). May be useful for the output
1171 return augment_item_meta(repo,
1172 fill_in_metadata_if_dir(repo, item),
1173 include_size=include_size)