]> arthur.barton.de Git - bup.git/commitdiff
Add atomically_replaced_file for safer output
authorNathan Bird <ecthellion@gmail.com>
Thu, 17 Jul 2014 22:27:40 +0000 (18:27 -0400)
committerRob Browning <rlb@defaultvalue.org>
Tue, 5 Aug 2014 17:02:29 +0000 (12:02 -0500)
Use `tempfile.mkstemp` to write files safely and then rename them
atomically to the target filename at the end. This guarantees that two
concurrent processes trying to write the same target file will result
in one process "winning" with a consistent version.

Use this to protect:
 * midx writing
 * .idx cache writing

Previously both of these processes `open(destination + '.tmp', 'w')`
which would lead to concurrent processes writing the same tmp file at
the same time.

See also https://groups.google.com/d/msg/bup-list/qdPUaXGO1cI/g8K2aFaaXz0J

Signed-off-by: Nathan Bird <ecthellion@gmail.com>
[rlb@defaultvalue.org: adjust commit message]
Reviewed-by: Rob Browning <rlb@defaultvalue.org>
Tested-by: Rob Browning <rlb@defaultvalue.org>
cmd/midx-cmd.py
lib/bup/client.py
lib/bup/helpers.py
lib/bup/t/thelpers.py

index 5c0c1266f60bd0a548ecca6babe13330857ac7d0..a18707d8b3eb6a0e9dcb37a1cfdc053de0844990 100755 (executable)
@@ -114,19 +114,21 @@ def _do_midx(outdir, outfilename, infilenames, prefixstr):
         debug1('midx: table size: %d (%d bits)\n' % (entries*4, bits))
 
         unlink(outfilename)
-        f = open(outfilename + '.tmp', 'w+b')
-        f.write('MIDX')
-        f.write(struct.pack('!II', midx.MIDX_VERSION, bits))
-        assert(f.tell() == 12)
+        with atomically_replaced_file(outfilename, 'wb') as f:
+            f.write('MIDX')
+            f.write(struct.pack('!II', midx.MIDX_VERSION, bits))
+            assert(f.tell() == 12)
 
-        f.truncate(12 + 4*entries + 20*total + 4*total)
-        f.flush()
-        fdatasync(f.fileno())
+            f.truncate(12 + 4*entries + 20*total + 4*total)
+            f.flush()
+            fdatasync(f.fileno())
 
-        fmap = mmap_readwrite(f, close=False)
+            fmap = mmap_readwrite(f, close=False)
 
-        count = merge_into(fmap, bits, total, inp)
-        del fmap # Assume this calls msync() now.
+            count = merge_into(fmap, bits, total, inp)
+            del fmap # Assume this calls msync() now.
+            f.seek(0, os.SEEK_END)
+            f.write('\0'.join(allfilenames))
     finally:
         for ix in midxs:
             if isinstance(ix, midx.PackMidx):
@@ -134,10 +136,6 @@ def _do_midx(outdir, outfilename, infilenames, prefixstr):
         midxs = None
         inp = None
 
-    f.seek(0, os.SEEK_END)
-    f.write('\0'.join(allfilenames))
-    f.close()
-    os.rename(outfilename + '.tmp', outfilename)
 
     # This is just for testing (if you enable this, don't clear inp above)
     if 0:
index 54b8b50f824e1584f74335f5c4e706578197f90f..54fe9d59d3d318ce14a0a7285f069dcf7affcd9e 100644 (file)
@@ -189,17 +189,15 @@ class Client:
         self.conn.write('send-index %s\n' % name)
         n = struct.unpack('!I', self.conn.read(4))[0]
         assert(n)
-        f = open(fn + '.tmp', 'w')
-        count = 0
-        progress('Receiving index from server: %d/%d\r' % (count, n))
-        for b in chunkyreader(self.conn, n):
-            f.write(b)
-            count += len(b)
-            qprogress('Receiving index from server: %d/%d\r' % (count, n))
-        progress('Receiving index from server: %d/%d, done.\n' % (count, n))
-        self.check_ok()
-        f.close()
-        os.rename(fn + '.tmp', fn)
+        with atomically_replaced_file(fn, 'w') as f:
+            count = 0
+            progress('Receiving index from server: %d/%d\r' % (count, n))
+            for b in chunkyreader(self.conn, n):
+                f.write(b)
+                count += len(b)
+                qprogress('Receiving index from server: %d/%d\r' % (count, n))
+            progress('Receiving index from server: %d/%d, done.\n' % (count, n))
+            self.check_ok()
 
     def _make_objcache(self):
         return git.PackIdxList(self.cachedir)
index b55c4a981b4cf462e267b88a5192559c02dc72b5..34772368966ccf633aaf70b22fe47a02368fbee5 100644 (file)
@@ -2,8 +2,9 @@
 
 from ctypes import sizeof, c_void_p
 from os import environ
+from contextlib import contextmanager
 import sys, os, pwd, subprocess, errno, socket, select, mmap, stat, re, struct
-import hashlib, heapq, operator, time, grp
+import hashlib, heapq, operator, time, grp, tempfile
 
 from bup import _helpers
 import bup._helpers as _helpers
@@ -628,6 +629,42 @@ def chunkyreader(f, count = None):
             yield b
 
 
+@contextmanager
+def atomically_replaced_file(name, mode='w', buffering=-1):
+    """Write to file which will atomically become name when finished.
+
+    This contextmanager yields an open file object that is backed by a
+    temporary file which will be renamed (atomically) to the target
+    name if everything succeeds.
+
+    The mode and buffering arguments are handled exactly as with open,
+    and upon success the resulting file will have very restrictive
+    permissions, as per mkstemp.
+
+    E.g.::
+
+        with atomically_replaced_file('foo.txt', 'w') as f:
+            f.write('hello jack.')
+
+    """
+
+    (ffd, tempname) = tempfile.mkstemp(dir=os.path.dirname(name),
+                                       text=('b' not in mode))
+    try:
+        try:
+            f = os.fdopen(ffd, mode, buffering)
+        except:
+            os.close(ffd)
+            raise
+        try:
+            yield f
+        finally:
+            f.close()
+        os.rename(tempname, name)
+    finally:
+        unlink(tempname)  # nonexistant file is ignored
+
+
 def slashappend(s):
     """Append "/" to 's' if it doesn't aleady end in "/"."""
     if s and not s.endswith('/'):
index b5427076dddc9e93ffd882af8a4ef9aa744408e0..02e89efb10c7b6f1c309441e9dcc46cbe408119d 100644 (file)
@@ -1,6 +1,9 @@
 import helpers
 import math
 import os
+import os.path
+import tempfile
+import stat
 import bup._helpers as _helpers
 from bup.helpers import *
 from wvtest import *
@@ -143,3 +146,21 @@ def test_batchpipe():
     WVPASSEQ(next(batches), '0 1 2\n')
     WVPASSEQ(next(batches), '3 4\n')
     WVPASSEQ(next(batches, None), None)
+
+
+@wvtest
+def test_atomically_replaced_file():
+    target_file = os.path.join(tempfile.gettempdir(),
+                               'test_atomic_write')
+    try:
+        with atomically_replaced_file(target_file, mode='w') as f:
+            f.write('asdf')
+            WVPASSEQ(f.mode, 'w')
+        f = open(target_file, 'r')
+        WVPASSEQ(f.read(), 'asdf')
+
+        with atomically_replaced_file(target_file, mode='wb') as f:
+            f.write(os.urandom(20))
+            WVPASSEQ(f.mode, 'wb')
+    finally:
+        unlink(target_file)