]> arthur.barton.de Git - bup.git/commitdiff
Move commit directories in /.commit/??/
authorGabriel Filion <lelutin@gmail.com>
Fri, 26 Nov 2010 11:00:34 +0000 (06:00 -0500)
committerAvery Pennarun <apenwarr@gmail.com>
Wed, 1 Dec 2010 10:09:52 +0000 (02:09 -0800)
Currently, directories in which we can access files of a particular
commit are placed in each branch directory by which it is reachable.

To avoid possible repetitions of commit directories, move the
directories in a new top level hidden directory named /.commit.

This hidden directory is structured as a two level-deep directory
structure, wherein the first level represents the first byte (two
hexadecimal characters) of commit hashes, and the second level
represents the remainder of the hash.

With this movement, branch directories now contain only symlinks to the
commit directories in /.commit/??/

Also, in BranchList (formerly CommitList), the 'latest' commit was
computed on every iteration over a commit. I moved this calculation up
one level so that it is computed only once.

Signed-off-by: Gabriel Filion <lelutin@gmail.com>
lib/bup/vfs.py

index 891e9117afab33ab8c1a6c4016b9adcf5dc918a8..1103b3863fce3eb7228e5fc0dd02c2dd4b8129ac 100644 (file)
@@ -397,12 +397,61 @@ class Dir(Node):
                 self._subs[name] = File(self, name, mode, sha, bupmode)
 
 
+class CommitDir(Node):
+    """A directory that contains all commits that are reachable by a ref.
+
+    Contains a set of subdirectories named after the commits' first byte in
+    hexadecimal. Each of those directories contain all commits with hashes that
+    start the same as the directory name. The name used for those
+    subdirectories is the hash of the commit without the first byte. This
+    separation helps us avoid having too much directories on the same level as
+    the number of commits grows big.
+    """
+    def __init__(self, parent, name):
+        Node.__init__(self, parent, name, 040000, EMPTY_SHA)
+
+    def _mksubs(self):
+        self._subs = {}
+        refs = git.list_refs()
+        for ref in refs:
+            #debug2('ref name: %s\n' % ref[0])
+            revs = git.rev_list(ref[1].encode('hex'))
+            for (date, commit) in revs:
+                #debug2('commit: %s  date: %s\n' % (commit.encode('hex'), date))
+                commithex = commit.encode('hex')
+                containername = commithex[:2]
+                dirname = commithex[2:]
+                n1 = self._subs.get(containername)
+                if not n1:
+                    n1 = CommitList(self, containername)
+                    self._subs[containername] = n1
+
+                if n1.commits.get(dirname):
+                    # Stop work for this ref, the rest should already be present
+                    break
+
+                n1.commits[dirname] = (commit, date)
+
+
 class CommitList(Node):
-    """A reverse-chronological list of commits on a branch in bup's repository.
+    """A list of commits with hashes that start with the current node's name."""
+    def __init__(self, parent, name):
+        Node.__init__(self, parent, name, 040000, EMPTY_SHA)
+        self.commits = {}
+
+    def _mksubs(self):
+        self._subs = {}
+        for (name, (hash, date)) in self.commits.items():
+            n1 = Dir(self, name, 040000, hash)
+            n1.ctime = n1.mtime = date
+            self._subs[name] = n1
 
-    Represents each commit as a directory and a symlink that points to the
-    directory. The symlink is named after the date. Prepends a dot to each hash
-    to make commits look like hidden directories.
+
+class BranchList(Node):
+    """A list of links to commits reachable by a branch in bup's repository.
+
+    Represents each commit as a symlink that points to the commit directory in
+    /.commit/??/ . The symlink is named after the commit date.
     """
     def __init__(self, parent, name, hash):
         Node.__init__(self, parent, name, 040000, hash)
@@ -413,19 +462,20 @@ class CommitList(Node):
         for (date, commit) in revs:
             l = time.localtime(date)
             ls = time.strftime('%Y-%m-%d-%H%M%S', l)
-            commithex = '.' + commit.encode('hex')
-            n1 = Dir(self, commithex, 040000, commit)
-            n2 = FakeSymlink(self, ls, commithex)
-            n1.ctime = n1.mtime = n2.ctime = n2.mtime = date
-            self._subs[commithex] = n1
-            self._subs[ls] = n2
-            latest = max(revs)
+            commithex = commit.encode('hex')
+            target = '../.commit/%s/%s' % (commithex[:2], commithex[2:])
+            n1 = FakeSymlink(self, ls, target)
+            n1.ctime = n1.mtime = date
+            self._subs[ls] = n1
+
+        latest = max(revs)
         if latest:
             (date, commit) = latest
-            commithex = '.' + commit.encode('hex')
-            n2 = FakeSymlink(self, 'latest', commithex)
-            n2.ctime = n2.mtime = date
-            self._subs['latest'] = n2
+            commithex = commit.encode('hex')
+            target = '../.commit/%s/%s' % (commithex[:2], commithex[2:])
+            n1 = FakeSymlink(self, 'latest', target)
+            n1.ctime = n1.mtime = date
+            self._subs['latest'] = n1
 
 
 class RefList(Node):
@@ -433,18 +483,23 @@ class RefList(Node):
 
     The sub-nodes of the ref list are a series of CommitList for each commit
     hash pointed to by a branch.
+
+    Also, a special sub-node named '.commit' contains all commit directories
+    that are reachable via a ref (e.g. a branch).  See CommitDir for details.
     """
     def __init__(self, parent):
         Node.__init__(self, parent, '/', 040000, EMPTY_SHA)
 
     def _mksubs(self):
         self._subs = {}
+
+        commit_dir = CommitDir(self, '.commit')
+        self._subs['.commit'] = commit_dir
+
         for (name,sha) in git.list_refs():
             if name.startswith('refs/heads/'):
                 name = name[11:]
                 date = git.rev_get_date(sha.encode('hex'))
-                n1 = CommitList(self, name, sha)
+                n1 = BranchList(self, name, sha)
                 n1.ctime = n1.mtime = date
                 self._subs[name] = n1
-
-