+class MissingObject(KeyError):
+ def __init__(self, oid):
+ self.oid = oid
+ KeyError.__init__(self, 'object %r is missing' % hexlify(oid))
+
+
+WalkItem = namedtuple('WalkItem', ['oid', 'type', 'mode',
+ 'path', 'chunk_path', 'data'])
+# The path is the mangled path, and if an item represents a fragment
+# of a chunked file, the chunk_path will be the chunked subtree path
+# for the chunk, i.e. ['', '2d3115e', ...]. The top-level path for a
+# chunked file will have a chunk_path of ['']. So some chunk subtree
+# of the file '/foo/bar/baz' might look like this:
+#
+# item.path = ['foo', 'bar', 'baz.bup']
+# item.chunk_path = ['', '2d3115e', '016b097']
+# item.type = 'tree'
+# ...
+
+
+def walk_object(get_ref, oidx, stop_at=None, include_data=None):
+ """Yield everything reachable from oidx via get_ref (which must behave
+ like CatPipe get) as a WalkItem, stopping whenever stop_at(oidx)
+ returns true. Throw MissingObject if a hash encountered is
+ missing from the repository, and don't read or return blob content
+ in the data field unless include_data is set.
+
+ """
+ # Maintain the pending stack on the heap to avoid stack overflow
+ pending = [(oidx, [], [], None)]
+ while len(pending):
+ oidx, parent_path, chunk_path, mode = pending.pop()
+ oid = unhexlify(oidx)
+ if stop_at and stop_at(oidx):
+ continue
+
+ if (not include_data) and mode and stat.S_ISREG(mode):
+ # If the object is a "regular file", then it's a leaf in
+ # the graph, so we can skip reading the data if the caller
+ # hasn't requested it.
+ yield WalkItem(oid=oid, type=b'blob',
+ chunk_path=chunk_path, path=parent_path,
+ mode=mode,
+ data=None)
+ continue
+
+ item_it = get_ref(oidx)
+ get_oidx, typ, _ = next(item_it)
+ if not get_oidx:
+ raise MissingObject(unhexlify(oidx))
+ if typ not in (b'blob', b'commit', b'tree'):
+ raise Exception('unexpected repository object type %r' % typ)
+
+ # FIXME: set the mode based on the type when the mode is None
+ if typ == b'blob' and not include_data:
+ # Dump data until we can ask cat_pipe not to fetch it
+ for ignored in item_it:
+ pass
+ data = None
+ else:
+ data = b''.join(item_it)
+
+ yield WalkItem(oid=oid, type=typ,
+ chunk_path=chunk_path, path=parent_path,
+ mode=mode,
+ data=(data if include_data else None))
+
+ if typ == b'commit':
+ commit_items = parse_commit(data)
+ for pid in commit_items.parents:
+ pending.append((pid, parent_path, chunk_path, mode))
+ pending.append((commit_items.tree, parent_path, chunk_path,
+ hashsplit.GIT_MODE_TREE))
+ elif typ == b'tree':
+ for mode, name, ent_id in tree_decode(data):
+ demangled, bup_type = demangle_name(name, mode)
+ if chunk_path:
+ sub_path = parent_path
+ sub_chunk_path = chunk_path + [name]
+ else:
+ sub_path = parent_path + [name]
+ if bup_type == BUP_CHUNKED:
+ sub_chunk_path = [b'']
+ else:
+ sub_chunk_path = chunk_path
+ pending.append((hexlify(ent_id), sub_path, sub_chunk_path,
+ mode))