]> arthur.barton.de Git - bup.git/blobdiff - lib/bup/git.py
gc: restart catpipe after each new pack
[bup.git] / lib / bup / git.py
index fa677fe4521fece451b307f266332e7a250fe83f..fc9eb39cb2c35ec8b9d0cc67c4f8b1cdd0b84e24 100644 (file)
@@ -12,7 +12,8 @@ from bup.helpers import (Sha1, add_error, chunkyreader, debug1, debug2,
                          fdatasync,
                          hostname, localtime, log, merge_iter,
                          mmap_read, mmap_readwrite,
-                         progress, qprogress, unlink, username, userfullname,
+                         progress, qprogress, stat_if_exists,
+                         unlink, username, userfullname,
                          utc_offset_str)
 
 
@@ -252,6 +253,8 @@ def tree_decode(buf):
 
 
 def _encode_packobj(type, content, compression_level=1):
+    if compression_level not in (0, 1, 2, 3, 4, 5, 6, 7, 8, 9):
+        raise ValueError('invalid compression level %s' % compression_level)
     szout = ''
     sz = len(content)
     szbits = (sz & 0x0f) | (_typemap[type]<<4)
@@ -263,10 +266,6 @@ def _encode_packobj(type, content, compression_level=1):
             break
         szbits = sz & 0x7f
         sz >>= 7
-    if compression_level > 9:
-        raise BaseException("Compression Level Above 9")
-    elif compression_level < 0:
-        raise BaseException("Compression Level Below 0")
     z = zlib.compressobj(compression_level)
     yield szout
     yield z.compress(content)
@@ -677,15 +676,16 @@ class PackWriter:
         self._require_objcache()
         return self.objcache.exists(id, want_source=want_source)
 
-    def write(self, sha, type, content):
-        """Write an object to the pack file.  Fails if sha exists()."""
+    def just_write(self, sha, type, content):
+        """Write an object to the pack file, bypassing the objcache.  Fails if
+        sha exists()."""
         self._write(sha, type, content)
 
     def maybe_write(self, type, content):
         """Write an object to the pack file if not present and return its id."""
         sha = calc_hash(type, content)
         if not self.exists(sha):
-            self.write(sha, type, content)
+            self.just_write(sha, type, content)
             self._require_objcache()
             self.objcache.add(sha)
         return sha
@@ -844,13 +844,13 @@ def _gitenv(repo_dir = None):
     return env
 
 
-def list_refs(refname=None, repo_dir=None,
+def list_refs(refnames=None, repo_dir=None,
               limit_to_heads=False, limit_to_tags=False):
-    """Yield (refname, hash) tuples for all repository refs unless a ref
-    name is specified.  Given a ref name, only include tuples for that
-    particular ref.  The limits restrict the result items to
-    refs/heads or refs/tags.  If both limits are specified, items from
-    both sources will be included.
+    """Yield (refname, hash) tuples for all repository refs unless
+    refnames are specified.  In that case, only include tuples for
+    those refs.  The limits restrict the result items to refs/heads or
+    refs/tags.  If both limits are specified, items from both sources
+    will be included.
 
     """
     argv = ['git', 'show-ref']
@@ -859,8 +859,8 @@ def list_refs(refname=None, repo_dir=None,
     if limit_to_tags:
         argv.append('--tags')
     argv.append('--')
-    if refname:
-        argv += [refname]
+    if refnames:
+        argv += refnames
     p = subprocess.Popen(argv,
                          preexec_fn = _gitenv(repo_dir),
                          stdout = subprocess.PIPE)
@@ -876,7 +876,7 @@ def list_refs(refname=None, repo_dir=None,
 
 def read_ref(refname, repo_dir = None):
     """Get the commit id of the most recent commit made on a given ref."""
-    refs = list_refs(refname, repo_dir=repo_dir, limit_to_heads=True)
+    refs = list_refs(refnames=[refname], repo_dir=repo_dir, limit_to_heads=True)
     l = tuple(islice(refs, 2))
     if l:
         assert(len(l) == 1)
@@ -1015,21 +1015,20 @@ def init_repo(path=None):
 
 
 def check_repo_or_die(path=None):
-    """Make sure a bup repository exists, and abort if not.
-    If the path to a particular repository was not specified, this function
-    initializes the default repository automatically.
-    """
+    """Check to see if a bup repository probably exists, and abort if not."""
     guess_repo(path)
-    try:
-        os.stat(repo('objects/pack/.'))
-    except OSError as e:
-        if e.errno == errno.ENOENT:
-            log('error: %r is not a bup repository; run "bup init"\n'
-                % repo())
+    top = repo()
+    pst = stat_if_exists(top + '/objects/pack')
+    if pst and stat.S_ISDIR(pst.st_mode):
+        return
+    if not pst:
+        top_st = stat_if_exists(top)
+        if not top_st:
+            log('error: repository %r does not exist (see "bup help init")\n'
+                % top)
             sys.exit(15)
-        else:
-            log('error: %s\n' % e)
-            sys.exit(14)
+    log('error: %r is not a repository\n' % top)
+    sys.exit(14)
 
 
 _ver = None
@@ -1132,7 +1131,7 @@ class CatPipe:
         self.p = None
         self.inprogress = None
 
-    def _restart(self):
+    def restart(self):
         self._abort()
         self.p = subprocess.Popen(['git', 'cat-file', '--batch'],
                                   stdin=subprocess.PIPE,
@@ -1143,7 +1142,7 @@ class CatPipe:
 
     def _fast_get(self, id):
         if not self.p or self.p.poll() != None:
-            self._restart()
+            self.restart()
         assert(self.p)
         poll_result = self.p.poll()
         assert(poll_result == None)
@@ -1266,79 +1265,68 @@ WalkItem = namedtuple('WalkItem', ['id', 'type', 'mode',
 #   ...
 
 
-def _walk_object(cat_pipe, id,
-                 parent_path, chunk_path,
-                 mode=None,
-                 stop_at=None,
-                 include_data=None):
-
-    if stop_at and stop_at(id):
-        return
-
-    item_it = cat_pipe.get(id)  # FIXME: use include_data
-    type = item_it.next()
-
-    if type not in ('blob', 'commit', 'tree'):
-        raise Exception('unexpected repository object type %r' % type)
-
-    # FIXME: set the mode based on the type when the mode is None
-
-    if type == 'blob' and not include_data:
-        # Dump data until we can ask cat_pipe not to fetch it
-        for ignored in item_it:
-            pass
-        data = None
-    else:
-        data = ''.join(item_it)
-
-    yield  WalkItem(id=id, type=type,
-                    chunk_path=chunk_path, path=parent_path,
-                    mode=mode,
-                    data=(data if include_data else None))
-
-    if type == 'commit':
-        commit_items = parse_commit(data)
-        tree_id = commit_items.tree
-        for x in _walk_object(cat_pipe, tree_id, parent_path, chunk_path,
-                              mode=hashsplit.GIT_MODE_TREE,
-                              stop_at=stop_at,
-                              include_data=include_data):
-            yield x
-        parents = commit_items.parents
-        for pid in parents:
-            for x in _walk_object(cat_pipe, pid, parent_path, chunk_path,
-                                  mode=mode, # Same mode as this child
-                                  stop_at=stop_at,
-                                  include_data=include_data):
-                yield x
-    elif type == 'tree':
-        for mode, name, ent_id in tree_decode(data):
-            demangled, bup_type = demangle_name(name, mode)
-            if chunk_path:
-                sub_path = parent_path
-                sub_chunk_path = chunk_path + [name]
-            else:
-                sub_path = parent_path + [name]
-                if bup_type == BUP_CHUNKED:
-                    sub_chunk_path = ['']
-                else:
-                    sub_chunk_path = chunk_path
-            for x in _walk_object(cat_pipe, ent_id.encode('hex'),
-                                  sub_path, sub_chunk_path,
-                                  mode=mode,
-                                  stop_at=stop_at,
-                                  include_data=include_data):
-                yield x
-
-
 def walk_object(cat_pipe, id,
                 stop_at=None,
                 include_data=None):
     """Yield everything reachable from id via cat_pipe as a WalkItem,
     stopping whenever stop_at(id) returns true.  Throw MissingObject
-    if a hash encountered is missing from the repository.
-
+    if a hash encountered is missing from the repository, and don't
+    read or return blob content in the data field unless include_data
+    is set.
     """
-    return _walk_object(cat_pipe, id, [], [],
-                        stop_at=stop_at,
-                        include_data=include_data)
+    # Maintain the pending stack on the heap to avoid stack overflow
+    pending = [(id, [], [], None)]
+    while len(pending):
+        id, parent_path, chunk_path, mode = pending.pop()
+        if stop_at and stop_at(id):
+            continue
+
+        if (not include_data) and mode and stat.S_ISREG(mode):
+            # If the object is a "regular file", then it's a leaf in
+            # the graph, so we can skip reading the data if the caller
+            # hasn't requested it.
+            yield WalkItem(id=id, type='blob',
+                           chunk_path=chunk_path, path=parent_path,
+                           mode=mode,
+                           data=None)
+            continue
+
+        item_it = cat_pipe.get(id)
+        type = item_it.next()
+        if type not in ('blob', 'commit', 'tree'):
+            raise Exception('unexpected repository object type %r' % type)
+
+        # FIXME: set the mode based on the type when the mode is None
+        if type == 'blob' and not include_data:
+            # Dump data until we can ask cat_pipe not to fetch it
+            for ignored in item_it:
+                pass
+            data = None
+        else:
+            data = ''.join(item_it)
+
+        yield WalkItem(id=id, type=type,
+                       chunk_path=chunk_path, path=parent_path,
+                       mode=mode,
+                       data=(data if include_data else None))
+
+        if type == 'commit':
+            commit_items = parse_commit(data)
+            for pid in commit_items.parents:
+                pending.append((pid, parent_path, chunk_path, mode))
+            pending.append((commit_items.tree, parent_path, chunk_path,
+                            hashsplit.GIT_MODE_TREE))
+        elif type == 'tree':
+            for mode, name, ent_id in tree_decode(data):
+                demangled, bup_type = demangle_name(name, mode)
+                if chunk_path:
+                    sub_path = parent_path
+                    sub_chunk_path = chunk_path + [name]
+                else:
+                    sub_path = parent_path + [name]
+                    if bup_type == BUP_CHUNKED:
+                        sub_chunk_path = ['']
+                    else:
+                        sub_chunk_path = chunk_path
+                pending.append((ent_id.encode('hex'), sub_path, sub_chunk_path,
+                                mode))