]> arthur.barton.de Git - bup.git/commitdiff
bloom: avoid kernel disk flushes when we dirty a lot of pages.
authorAvery Pennarun <apenwarr@gmail.com>
Tue, 8 Feb 2011 03:09:06 +0000 (19:09 -0800)
committerAvery Pennarun <apenwarr@gmail.com>
Tue, 8 Feb 2011 03:54:07 +0000 (19:54 -0800)
Based on the number of objects we'll add to the bloom, decide if we want to
mmap() the pages as shared-writable ('immediate' write) or else map them
private-writable for later manual writing back to the file ('delayed'
write).

A bloom table's write access pattern is such that we dirty almost all the
pages after adding very few entries; essentially, we can expect to dirty
about n*k/4096 pages if we add n objects to the bloom with k hashes. But the
table is so big that dirtying *all* the pages often exceeds Linux's default
/proc/sys/vm/dirty_ratio or /proc/sys/vm/dirty_background_ratio,
thus causing it to start flushing the table before we're
finished... even though there's more than enough space to
store the bloom table in RAM.

To work around that behaviour, if we calculate that we'll probably end up
touching the whole table anyway (at least one bit flipped per memory page),
let's use a "private" mmap, which defeats Linux's ability to flush it to
disk.  Then we'll flush it as one big lump during close(), which doesn't
lose any time since we would have had to flush all the pages anyway.

While we're here, let's remove the readwrite=True option to
ShaBloom.create(); nobody's going to create a bloom file that isn't
writable.

Signed-off-by: Avery Pennarun <apenwarr@gmail.com>
cmd/bloom-cmd.py
lib/bup/git.py
lib/bup/helpers.py
lib/bup/t/tgit.py

index 8ee75869f9139d7068742bd4d0fa15823980b116..9709239a612584f053def4528d8977107bde4ac7 100755 (executable)
@@ -18,7 +18,7 @@ def do_bloom(path, outfilename):
 
     b = None
     if os.path.exists(outfilename):
-        b = git.ShaBloom(outfilename, readwrite=True)
+        b = git.ShaBloom(outfilename)
         if not b.valid():
             debug1("bloom: Existing invalid bloom found, regenerating.\n")
             b = None
@@ -42,17 +42,19 @@ def do_bloom(path, outfilename):
         log("bloom: Nothing to do\n")
         return
 
-    if b is not None:
+    if b:
         if len(b) != rest_count:
             log("bloom: size %d != idx total %d, regenerating\n"
                     % (len(b), rest_count))
             b = None
-        elif b.bits < git.MAX_BLOOM_BITS and \
-             b.pfalse_positive(add_count) > git.MAX_PFALSE_POSITIVE:
+        elif (b.bits < git.MAX_BLOOM_BITS and
+              b.pfalse_positive(add_count) > git.MAX_PFALSE_POSITIVE):
             log("bloom: %d more entries => %.2f false positive, regenerating\n"
                     % (add_count, b.pfalse_positive(add_count)))
             b = None
-    if b is None: # Need all idxs to build from scratch
+        else:
+            b = git.ShaBloom(outfilename, readwrite=True, expected=add_count)
+    if not b: # Need all idxs to build from scratch
         add += rest
         add_count += rest_count
     del rest
@@ -65,8 +67,7 @@ def do_bloom(path, outfilename):
     if b is None:
         tfname = os.path.join(path, 'bup.tmp.bloom')
         tf = open(tfname, 'w+')
-        b = git.ShaBloom.create(
-                tfname, f=tf, readwrite=True, expected=add_count, k=opt.k)
+        b = git.ShaBloom.create(tfname, f=tf, expected=add_count, k=opt.k)
     count = 0
     for name in add:
         ix = git.open_idx(name)
index 33a85c78f3bafbc0fa5d2d954f48c43b24822e5d..b75e80234b600aff7f0beeaa4247041b941e2c4f 100644 (file)
@@ -346,19 +346,46 @@ bloom_add = _helpers.bloom_add
 class ShaBloom:
     """Wrapper which contains data from multiple index files.
     """
-    def __init__(self, filename, f=None, readwrite=False):
+    def __init__(self, filename, f=None, readwrite=False, expected=-1):
         self.name = filename
         self.rwfile = None
         self.map = None
         assert(filename.endswith('.bloom'))
         if readwrite:
-            self.rwfile = f or open(filename, 'r+b')
-            self.map = mmap_readwrite(self.rwfile, close=False)
+            assert(expected > 0)
+            self.rwfile = f = f or open(filename, 'r+b')
+            f.seek(0)
+
+            # Decide if we want to mmap() the pages as writable ('immediate'
+            # write) or else map them privately for later writing back to
+            # the file ('delayed' write).  A bloom table's write access
+            # pattern is such that we dirty almost all the pages after adding
+            # very few entries.  But the table is so big that dirtying
+            # *all* the pages often exceeds Linux's default
+            # /proc/sys/vm/dirty_ratio or /proc/sys/vm/dirty_background_ratio,
+            # thus causing it to start flushing the table before we're
+            # finished... even though there's more than enough space to
+            # store the bloom table in RAM.
+            #
+            # To work around that behaviour, if we calculate that we'll
+            # probably end up touching the whole table anyway (at least
+            # one bit flipped per memory page), let's use a "private" mmap,
+            # which defeats Linux's ability to flush it to disk.  Then we'll
+            # flush it as one big lump during close().
+            pages = os.fstat(f.fileno()).st_size / 4096 * 5 # assume k=5
+            self.delaywrite = expected > pages
+            debug1('bloom: delaywrite=%r\n' % self.delaywrite)
+            if self.delaywrite:
+                self.map = mmap_readwrite_private(self.rwfile, close=False)
+            else:
+                self.map = mmap_readwrite(self.rwfile, close=False)
         else:
             self.rwfile = None
-            self.map = mmap_read(f or open(filename, 'rb'))
-        if str(self.map[0:4]) != 'BLOM':
-            log('Warning: skipping: invalid BLOM header in %r\n' % filename)
+            f = f or open(filename, 'rb')
+            self.map = mmap_read(f)
+        got = str(self.map[0:4])
+        if got != 'BLOM':
+            log('Warning: invalid BLOM header (%r) in %r\n' % (got, filename))
             return self._init_failed()
         ver = struct.unpack('!I', self.map[4:8])[0]
         if ver < BLOOM_VERSION:
@@ -379,7 +406,6 @@ class ShaBloom:
 
     def _init_failed(self):
         if self.map:
-            self.map.close()
             self.map = None
         if self.rwfile:
             self.rwfile.close()
@@ -394,12 +420,14 @@ class ShaBloom:
         self.close()
 
     def close(self):
-        if self.map:
-            if self.rwfile:
-                debug2("bloom: closing with %d entries\n" % self.entries)
-                self.map[12:16] = struct.pack('!I', self.entries)
+        if self.map and self.rwfile:
+            debug2("bloom: closing with %d entries\n" % self.entries)
+            self.map[12:16] = struct.pack('!I', self.entries)
+            if self.delaywrite:
+                self.rwfile.seek(0)
+                self.rwfile.write(self.map)
+            else:
                 self.map.flush()
-        if self.rwfile:
             self.rwfile.seek(16 + 2**self.bits)
             if self.idxnames:
                 self.rwfile.write('\0'.join(self.idxnames))
@@ -427,7 +455,7 @@ class ShaBloom:
         return found
 
     @classmethod
-    def create(cls, name, f=None, readwrite=False, expected=100000, k=None):
+    def create(cls, name, expected, delaywrite=None, f=None, k=None):
         """Create and return a bloom filter for `expected` entries."""
         bits = int(math.floor(math.log(expected*MAX_BITS_EACH/8,2)))
         k = k or ((bits <= MAX_BLOOM_BITS[5]) and 5 or 4)
@@ -443,7 +471,10 @@ class ShaBloom:
         # darwin, linux, bsd and solaris.
         f.truncate(16+2**bits)
         f.seek(0)
-        return cls(name, f=f, readwrite=readwrite)
+        if delaywrite != None and not delaywrite:
+            # tell it to expect very few objects, forcing a direct mmap
+            expected = 1
+        return cls(name, f=f, readwrite=True, expected=expected)
 
     def __len__(self):
         return self.entries
index 7b5eeadafaa78e051083d9119ab687e6882d0f65..da27edb44b29330e6a9998e7862a9cb1747ba248 100644 (file)
@@ -467,7 +467,6 @@ def _mmap_do(f, sz, flags, prot, close):
 
 def mmap_read(f, sz = 0, close=True):
     """Create a read-only memory mapped region on file 'f'.
-
     If sz is 0, the region will cover the entire file.
     """
     return _mmap_do(f, sz, mmap.MAP_PRIVATE, mmap.PROT_READ, close)
@@ -475,13 +474,22 @@ def mmap_read(f, sz = 0, close=True):
 
 def mmap_readwrite(f, sz = 0, close=True):
     """Create a read-write memory mapped region on file 'f'.
-
     If sz is 0, the region will cover the entire file.
     """
     return _mmap_do(f, sz, mmap.MAP_SHARED, mmap.PROT_READ|mmap.PROT_WRITE,
                     close)
 
 
+def mmap_readwrite_private(f, sz = 0, close=True):
+    """Create a read-write memory mapped region on file 'f'.
+    If sz is 0, the region will cover the entire file.
+    The map is private, which means the changes are never flushed back to the
+    file.
+    """
+    return _mmap_do(f, sz, mmap.MAP_PRIVATE, mmap.PROT_READ|mmap.PROT_WRITE,
+                    close)
+
+
 def parse_num(s):
     """Parse data size information into a float number.
 
index a7012f17ea3996683eac59078f2653eab9358070..f5e0237c064cceb8c58987ba87da15fcff818143 100644 (file)
@@ -124,7 +124,7 @@ def test_bloom():
     ix.name='dummy.idx'
     ix.shatable = ''.join(hashes)
     for k in (4, 5):
-        b = git.ShaBloom.create('pybuptest.bloom', readwrite=True, expected=100, k=k)
+        b = git.ShaBloom.create('pybuptest.bloom', expected=100, k=k)
         b.add_idx(ix)
         WVPASSLT(b.pfalse_positive(), .1)
         b.close()
@@ -141,9 +141,10 @@ def test_bloom():
         os.unlink('pybuptest.bloom')
 
     tf = tempfile.TemporaryFile()
-    b = git.ShaBloom.create('bup.bloom', f=tf, readwrite=True, expected=100)
+    b = git.ShaBloom.create('bup.bloom', f=tf, expected=100)
     WVPASSEQ(b.rwfile, tf)
     WVPASSEQ(b.k, 5)
     tf = tempfile.TemporaryFile()
-    b = git.ShaBloom.create('bup.bloom', f=tf, readwrite=True, expected=2**28)
+    b = git.ShaBloom.create('bup.bloom', f=tf, expected=2**28,
+                            delaywrite=False)
     WVPASSEQ(b.k, 4)