From: Rob Browning Date: Mon, 5 Jan 2015 00:47:42 +0000 (-0600) Subject: git.py: add walk_object X-Git-Tag: 0.28-rc1~19^2~2 X-Git-Url: https://arthur.barton.de/gitweb/?a=commitdiff_plain;h=b7e10498d1834aa330b44faf682b6c5bdc8d950a;hp=0a522f5ca20f4eee6b954eb6dee3c247832f9d54;p=bup.git git.py: add walk_object Add walk_object(), which can recursively traverse a git reference (tree, commit, blob, etc.). This will be used by "bup gc". Thanks to Aidan Hobson Sayers for suggesting improvements to an earlier version of the code. Signed-off-by: Rob Browning --- diff --git a/lib/bup/git.py b/lib/bup/git.py index 315d8f3..ba616d5 100644 --- a/lib/bup/git.py +++ b/lib/bup/git.py @@ -7,7 +7,7 @@ import errno, os, sys, zlib, time, subprocess, struct, stat, re, tempfile, glob from collections import namedtuple from itertools import islice -from bup import _helpers, path, midx, bloom, xstat +from bup import _helpers, hashsplit, path, midx, bloom, xstat from bup.helpers import (Sha1, add_error, chunkyreader, debug1, debug2, fdatasync, hostname, localtime, log, merge_iter, @@ -1230,3 +1230,92 @@ def tags(repo_dir = None): tags[c] = [] tags[c].append(name) # more than one tag can point at 'c' return tags + + +WalkItem = namedtuple('WalkItem', ['id', 'type', 'mode', + 'path', 'chunk_path', 'data']) +# The path is the mangled path, and if an item represents a fragment +# of a chunked file, the chunk_path will be the chunked subtree path +# for the chunk, i.e. ['', '2d3115e', ...]. The top-level path for a +# chunked file will have a chunk_path of ['']. So some chunk subtree +# of the file '/foo/bar/baz' might look like this: +# +# item.path = ['foo', 'bar', 'baz.bup'] +# item.chunk_path = ['', '2d3115e', '016b097'] +# item.type = 'tree' +# ... + + +def _walk_object(cat_pipe, id, + parent_path, chunk_path, + mode=None, + stop_at=None, + include_data=None): + + if stop_at and stop_at(id): + return + + item_it = cat_pipe.get(id) # FIXME: use include_data + type = item_it.next() + + if type not in ('blob', 'commit', 'tree'): + raise Exception('unexpected repository object type %r' % type) + + # FIXME: set the mode based on the type when the mode is None + + if type == 'blob' and not include_data: + # Dump data until we can ask cat_pipe not to fetch it + for ignored in item_it: + pass + data = None + else: + data = ''.join(item_it) + + yield WalkItem(id=id, type=type, + chunk_path=chunk_path, path=parent_path, + mode=mode, + data=(data if include_data else None)) + + if type == 'commit': + commit_items = parse_commit(data) + tree_id = commit_items.tree + for x in _walk_object(cat_pipe, tree_id, parent_path, chunk_path, + mode=hashsplit.GIT_MODE_TREE, + stop_at=stop_at, + include_data=include_data): + yield x + parents = commit_items.parents + for pid in parents: + for x in _walk_object(cat_pipe, pid, parent_path, chunk_path, + mode=mode, # Same mode as this child + stop_at=stop_at, + include_data=include_data): + yield x + elif type == 'tree': + for mode, name, ent_id in tree_decode(data): + demangled, bup_type = demangle_name(name, mode) + if chunk_path: + sub_path = parent_path + sub_chunk_path = chunk_path + [name] + else: + sub_path = parent_path + [name] + if bup_type == BUP_CHUNKED: + sub_chunk_path = [''] + else: + sub_chunk_path = chunk_path + for x in _walk_object(cat_pipe, ent_id.encode('hex'), + sub_path, sub_chunk_path, + mode=mode, + stop_at=stop_at, + include_data=include_data): + yield x + + +def walk_object(cat_pipe, id, + stop_at=None, + include_data=None): + """Yield everything reachable from id via cat_pipe as a WalkItem, + stopping whenever stop_at(id) returns true.""" + return _walk_object(cat_pipe, id, [], [], + stop_at=stop_at, + include_data=include_data)