From 035bad1573f3ec6649ef301731761cbebb666950 Mon Sep 17 00:00:00 2001 From: Avery Pennarun Date: Fri, 12 Mar 2010 18:46:40 -0500 Subject: [PATCH] git.PackWriter: avoid pack corruption if interrupted by a signal. PackWriter tries to "finish" a half-written pack in its destructor if interrupted. To do this, it flushes the stream, seeks back to the beginning to update the sha1sum and object count, then runs git-index-pack on it to create the .idx file. However, sometimes if you were unlucky, you'd interrupt PackWriter partway through writing an object to the pack. If only half an object exists at the end, it would have the wrong header and thus come out as corrupt when index-pack would run. Since our objects are meant to be small anyway, just make sure we write everything all in one file.write() operation. The files themselves are buffered, so this wouldn't survive a surprise termination of the whole unix process, but we wouldn't run index-pack in that case anyway, so it doesn't matter. Now when I press ctrl-c in 'bup save', it consistently writes the half-saved objects as it should. --- lib/bup/git.py | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/lib/bup/git.py b/lib/bup/git.py index 4c5a6ac..5a68893 100644 --- a/lib/bup/git.py +++ b/lib/bup/git.py @@ -351,9 +351,14 @@ class PackWriter: def _raw_write(self, datalist): self._open() f = self.file - for d in datalist: - f.write(d) - self.outbytes += len(d) + # in case we get interrupted (eg. KeyboardInterrupt), it's best if + # the file never has a *partial* blob. So let's make sure it's + # all-or-nothing. (The blob shouldn't be very big anyway, thanks + # to our hashsplit algorithm.) f.write() does its own buffering, + # but that's okay because we'll flush it in _end(). + oneblob = ''.join(datalist) + f.write(oneblob) + self.outbytes += len(oneblob) self.count += 1 def _write(self, bin, type, content): -- 2.39.2