From: Avery Pennarun Date: Wed, 22 Sep 2010 13:46:16 +0000 (-0700) Subject: git.py: support the old git pack .idx version 1 format. X-Git-Tag: bup-0.20~10 X-Git-Url: https://arthur.barton.de/gitweb/?p=bup.git;a=commitdiff_plain;h=4de60c883ad65215fd64313e76a0c7751a4ce900 git.py: support the old git pack .idx version 1 format. It's produced by git-fast-import, and I happen to want to read some of those. Signed-off-by: Avery Pennarun --- diff --git a/cmd/server-cmd.py b/cmd/server-cmd.py index 8ea3a37..6a875ff 100755 --- a/cmd/server-cmd.py +++ b/cmd/server-cmd.py @@ -30,7 +30,7 @@ def send_index(conn, name): git.check_repo_or_die() assert(name.find('/') < 0) assert(name.endswith('.idx')) - idx = git.PackIdx(git.repo('objects/pack/%s' % name)) + idx = git.open_idx(git.repo('objects/pack/%s' % name)) conn.write(struct.pack('!I', len(idx.map))) conn.write(idx.map) conn.ok() diff --git a/lib/bup/git.py b/lib/bup/git.py index de8b511..5395393 100644 --- a/lib/bup/git.py +++ b/lib/bup/git.py @@ -137,30 +137,23 @@ def _decode_packobj(buf): return (type, zlib.decompress(buf[i+1:])) -class PackIdxV2: - """Object representation of a Git pack index file.""" - def __init__(self, filename, f): - self.name = filename - self.idxnames = [self.name] - self.map = mmap_read(f) - assert(str(self.map[0:8]) == '\377tOc\0\0\0\2') - self.fanout = list(struct.unpack('!256I', - str(buffer(self.map, 8, 256*4)))) - self.fanout.append(0) # entry "-1" - nsha = self.fanout[255] - self.ofstable = buffer(self.map, - 8 + 256*4 + nsha*20 + nsha*4, - nsha*4) - self.ofs64table = buffer(self.map, - 8 + 256*4 + nsha*20 + nsha*4 + nsha*4) +class PackIdx: + def __init__(self): + assert(0) + + def find_offset(self, hash): + """Get the offset of an object inside the index file.""" + idx = self._idx_from_hash(hash) + if idx != None: + return self._ofs_from_idx(idx) + return None - def _ofs_from_idx(self, idx): - ofs = struct.unpack('!I', str(buffer(self.ofstable, idx*4, 4)))[0] - if ofs & 0x80000000: - idx64 = ofs & 0x7fffffff - ofs = struct.unpack('!I', - str(buffer(self.ofs64table, idx64*8, 8)))[0] - return ofs + def exists(self, hash): + """Return nonempty if the object exists in this index.""" + return hash and (self._idx_from_hash(hash) != None) and True or None + + def __len__(self): + return int(self.fanout[255]) def _idx_from_hash(self, hash): global _total_searches, _total_steps @@ -169,13 +162,12 @@ class PackIdxV2: b1 = ord(hash[0]) start = self.fanout[b1-1] # range -1..254 end = self.fanout[b1] # range 0..255 - buf = buffer(self.map, 8 + 256*4, end*20) want = str(hash) _total_steps += 1 # lookup table is a step while start < end: _total_steps += 1 mid = start + (end-start)/2 - v = str(buf[mid*20:(mid+1)*20]) + v = self._idx_to_hash(mid) if v < want: start = mid+1 elif v > want: @@ -184,23 +176,62 @@ class PackIdxV2: return mid return None - def find_offset(self, hash): - """Get the offset of an object inside the index file.""" - idx = self._idx_from_hash(hash) - if idx != None: - return self._ofs_from_idx(idx) - return None - def exists(self, hash): - """Return nonempty if the object exists in this index.""" - return hash and (self._idx_from_hash(hash) != None) and True or None +class PackIdxV1(PackIdx): + """Object representation of a Git pack index (version 1) file.""" + def __init__(self, filename, f): + self.name = filename + self.idxnames = [self.name] + self.map = mmap_read(f) + self.fanout = list(struct.unpack('!256I', + str(buffer(self.map, 0, 256*4)))) + self.fanout.append(0) # entry "-1" + nsha = self.fanout[255] + self.shatable = buffer(self.map, 256*4, nsha*24) + + def _ofs_from_idx(self, idx): + return struct.unpack('!I', str(self.shatable[idx*24 : idx*24+4]))[0] + + def _idx_to_hash(self, idx): + return str(self.shatable[idx*24+4 : idx*24+24]) def __iter__(self): for i in xrange(self.fanout[255]): - yield buffer(self.map, 8 + 256*4 + 20*i, 20) + yield buffer(self.map, 256*4 + 24*i + 4, 20) - def __len__(self): - return int(self.fanout[255]) + +class PackIdxV2(PackIdx): + """Object representation of a Git pack index (version 2) file.""" + def __init__(self, filename, f): + self.name = filename + self.idxnames = [self.name] + self.map = mmap_read(f) + assert(str(self.map[0:8]) == '\377tOc\0\0\0\2') + self.fanout = list(struct.unpack('!256I', + str(buffer(self.map, 8, 256*4)))) + self.fanout.append(0) # entry "-1" + nsha = self.fanout[255] + self.shatable = buffer(self.map, 8 + 256*4, nsha*20) + self.ofstable = buffer(self.map, + 8 + 256*4 + nsha*20 + nsha*4, + nsha*4) + self.ofs64table = buffer(self.map, + 8 + 256*4 + nsha*20 + nsha*4 + nsha*4) + + def _ofs_from_idx(self, idx): + ofs = struct.unpack('!I', str(buffer(self.ofstable, idx*4, 4)))[0] + if ofs & 0x80000000: + idx64 = ofs & 0x7fffffff + ofs = struct.unpack('!I', + str(buffer(self.ofs64table, idx64*8, 8)))[0] + return ofs + + def _idx_to_hash(self, idx): + return str(self.shatable[idx*20:(idx+1)*20]) + + def __iter__(self): + for i in xrange(self.fanout[255]): + yield buffer(self.map, 8 + 256*4 + 20*i, 20) extract_bits = _helpers.extract_bits @@ -389,7 +420,7 @@ class PackIdxList: for f in os.listdir(self.dir): full = os.path.join(self.dir, f) if f.endswith('.idx') and not d.get(full): - ix = PackIdx(full) + ix = open_idx(full) d[full] = ix self.packs = list(set(d.values())) debug1('PackIdxList: using %d index%s.\n' @@ -432,7 +463,7 @@ def open_idx(filename): raise GitError('%s: expected idx file version 2, got %d' % (filename, version)) else: - raise GitError('version 1 idx files not supported') + return PackIdxV1(filename, f) elif filename.endswith('.midx'): return PackMidx(filename) else: diff --git a/lib/bup/t/tgit.py b/lib/bup/t/tgit.py index 83faadc..c61b351 100644 --- a/lib/bup/t/tgit.py +++ b/lib/bup/t/tgit.py @@ -70,7 +70,7 @@ def testpacks(): WVPASS(os.path.exists(nameprefix + '.pack')) WVPASS(os.path.exists(nameprefix + '.idx')) - r = git.PackIdx(nameprefix + '.idx') + r = git.open_idx(nameprefix + '.idx') print repr(r.fanout) for i in range(nobj):