]> arthur.barton.de Git - bup.git/commitdiff
walk_object: ignore blob content if not requested
authorBen Kelly <bk@ancilla.ca>
Thu, 10 Nov 2016 15:01:08 +0000 (10:01 -0500)
committerRob Browning <rlb@defaultvalue.org>
Wed, 23 Nov 2016 18:39:25 +0000 (12:39 -0600)
Previously, walk_object would read the contents of each object it
encountered, even if it subsequently threw away those contents.

With this change, it will skip reading the contents of blobs unless
the caller explicitly asked for blob contents. In particular, this
means operations that traverse the object graph without caring
about object contents, such as `bup gc`, now only need to read the
contents of tree and commit objects.

Tested: `make test`; local run against copy of production repo.
Signed-off-by: Ben Kelly <btk@google.com>
[rlb@defaultvalue.org: shorten commit summary; adjust indentation;
 adjust docstring and comment; test "not include_data" first; change to
 type='blob']

Reviewed-by: Rob Browning <rlb@defaultvalue.org>
Tested-by: Rob Browning <rlb@defaultvalue.org>
lib/bup/git.py

index 989936dd1e6858d7bd22a3cf8dbc1ac1f79af276..e0f285df00051fba4376d37884ad3101d9e00d44 100644 (file)
@@ -1270,8 +1270,9 @@ def walk_object(cat_pipe, id,
                 include_data=None):
     """Yield everything reachable from id via cat_pipe as a WalkItem,
     stopping whenever stop_at(id) returns true.  Throw MissingObject
-    if a hash encountered is missing from the repository.
-
+    if a hash encountered is missing from the repository, and don't
+    read or return blob content in the data field unless include_data
+    is set.
     """
     # Maintain the pending stack on the heap to avoid stack overflow
     pending = [(id, [], [], None)]
@@ -1280,7 +1281,17 @@ def walk_object(cat_pipe, id,
         if stop_at and stop_at(id):
             continue
 
-        item_it = cat_pipe.get(id)  # FIXME: use include_data
+        if (not include_data) and mode and stat.S_ISREG(mode):
+            # If the object is a "regular file", then it's a leaf in
+            # the graph, so we can skip reading the data if the caller
+            # hasn't requested it.
+            yield WalkItem(id=id, type='blob',
+                           chunk_path=chunk_path, path=parent_path,
+                           mode=mode,
+                           data=None)
+            continue
+
+        item_it = cat_pipe.get(id)
         type = item_it.next()
         if type not in ('blob', 'commit', 'tree'):
             raise Exception('unexpected repository object type %r' % type)