1 import os, errno, zlib, time, sha, subprocess, struct, mmap, stat, re
5 home_repodir = os.path.expanduser('~/.bup')
9 class GitError(Exception):
16 raise GitError('You should call check_repo_or_die()')
17 gd = os.path.join(repodir, '.git')
18 if os.path.exists(gd):
20 return os.path.join(repodir, sub)
24 def __init__(self, filename):
27 self.map = mmap.mmap(f.fileno(), 0,
28 mmap.MAP_SHARED, mmap.PROT_READ)
29 f.close() # map will persist beyond file close
30 assert(str(self.map[0:8]) == '\377tOc\0\0\0\2')
31 self.fanout = list(struct.unpack('!256I',
32 str(buffer(self.map, 8, 256*4))))
33 self.fanout.append(0) # entry "-1"
34 nsha = self.fanout[255]
35 self.ofstable = buffer(self.map,
36 8 + 256*4 + nsha*20 + nsha*4,
38 self.ofs64table = buffer(self.map,
39 8 + 256*4 + nsha*20 + nsha*4 + nsha*4)
41 def _ofs_from_idx(self, idx):
42 ofs = struct.unpack('!I', str(buffer(self.ofstable, idx*4, 4)))[0]
44 idx64 = ofs & 0x7fffffff
45 ofs = struct.unpack('!I',
46 str(buffer(self.ofs64table, idx64*8, 8)))[0]
49 def _idx_from_hash(self, hash):
50 assert(len(hash) == 20)
52 start = self.fanout[b1-1] # range -1..254
53 end = self.fanout[b1] # range 0..255
54 buf = buffer(self.map, 8 + 256*4, end*20)
57 mid = start + (end-start)/2
58 v = buffer(buf, mid*20, 20)
67 def find_offset(self, hash):
68 idx = self._idx_from_hash(hash)
70 return self._ofs_from_idx(idx)
73 def exists(self, hash):
74 return (self._idx_from_hash(hash) != None) and True or None
78 def __init__(self, dir):
82 for f in os.listdir(self.dir):
83 if f.endswith('.idx'):
84 self.packs.append(PackIndex(os.path.join(self.dir, f)))
86 def exists(self, hash):
89 for i in range(len(self.packs)):
92 # reorder so most recently used packs are searched first
93 self.packs = [p] + self.packs[:i] + self.packs[i+1:]
104 def calc_hash(type, content):
105 header = '%s %d\0' % (type, len(content))
106 sum = sha.sha(header)
111 def _shalist_sort_key(ent):
112 (mode, name, id) = ent
113 if stat.S_ISDIR(int(mode, 8)):
119 _typemap = dict(blob=3, tree=2, commit=1, tag=8)
121 def __init__(self, objcache_maker=None):
126 self.objcache_maker = objcache_maker
132 def _make_objcache(self):
133 if not self.objcache:
134 if self.objcache_maker:
135 self.objcache = self.objcache_maker()
137 self.objcache = MultiPackIndex(repo('objects/pack'))
141 self._make_objcache()
142 self.filename = repo('objects/bup%d' % os.getpid())
143 self.file = open(self.filename + '.pack', 'w+')
144 self.file.write('PACK\0\0\0\2\0\0\0\0')
146 def _raw_write(self, datalist):
151 self.outbytes += len(d)
154 def _write(self, bin, type, content):
161 szbits = (sz & 0x0f) | (_typemap[type]<<4)
164 if sz: szbits |= 0x80
165 out.append(chr(szbits))
171 z = zlib.compressobj(1)
172 out.append(z.compress(content))
173 out.append(z.flush())
178 def breakpoint(self):
180 self.outbytes = self.count = 0
183 def write(self, type, content):
184 return self._write(calc_hash(type, content), type, content)
186 def exists(self, id):
187 if not self.objcache:
188 self._make_objcache()
189 return self.objcache.exists(id)
191 def maybe_write(self, type, content):
192 bin = calc_hash(type, content)
193 if not self.exists(bin):
194 self._write(bin, type, content)
195 self.objcache.add(bin)
198 def new_blob(self, blob):
199 return self.maybe_write('blob', blob)
201 def new_tree(self, shalist):
202 shalist = sorted(shalist, key = _shalist_sort_key)
204 for (mode,name,bin) in shalist:
206 assert(mode[0] != '0')
208 assert(len(bin) == 20)
209 l.append('%s %s\0%s' % (mode,name,bin))
210 return self.maybe_write('tree', ''.join(l))
212 def _new_commit(self, tree, parent, author, adate, committer, cdate, msg):
214 if tree: l.append('tree %s' % tree.encode('hex'))
215 if parent: l.append('parent %s' % parent.encode('hex'))
216 if author: l.append('author %s %s' % (author, _git_date(adate)))
217 if committer: l.append('committer %s %s' % (committer, _git_date(cdate)))
220 return self.maybe_write('commit', '\n'.join(l))
222 def new_commit(self, parent, tree, msg):
224 userline = '%s <%s@%s>' % (userfullname(), username(), hostname())
225 commit = self._new_commit(tree, parent,
226 userline, now, userline, now,
235 os.unlink(self.filename + '.pack')
239 if not f: return None
242 # update object count
244 cp = struct.pack('!i', self.count)
248 # calculate the pack sha1sum
255 f.write(sum.digest())
260 p = subprocess.Popen(['git', 'index-pack', '-v',
262 self.filename + '.pack'],
263 preexec_fn = _gitenv,
264 stdout = subprocess.PIPE)
265 out = p.stdout.read().strip()
266 _git_wait('git index-pack', p)
268 raise GitError('git index-pack produced no output')
269 nameprefix = repo('objects/pack/%s' % out)
270 os.rename(self.filename + '.pack', nameprefix + '.pack')
271 os.rename(self.filename + '.idx', nameprefix + '.idx')
279 return time.strftime('%s %z', time.localtime(date))
283 os.environ['GIT_DIR'] = os.path.abspath(repo())
286 def read_ref(refname):
287 p = subprocess.Popen(['git', 'show-ref', '--', refname],
288 preexec_fn = _gitenv,
289 stdout = subprocess.PIPE)
290 out = p.stdout.read().strip()
291 rv = p.wait() # not fatal
295 return out.split()[0].decode('hex')
300 def update_ref(refname, newval, oldval):
303 p = subprocess.Popen(['git', 'update-ref', '--', refname,
304 newval.encode('hex'), oldval.encode('hex')],
305 preexec_fn = _gitenv)
306 _git_wait('git update-ref', p)
309 def guess_repo(path=None):
314 repodir = os.environ.get('BUP_DIR')
316 repodir = os.path.expanduser('~/.bup')
319 def init_repo(path=None):
322 if os.path.exists(d) and not os.path.isdir(os.path.join(d, '.')):
323 raise GitError('"%d" exists but is not a directory\n' % d)
324 p = subprocess.Popen(['git', '--bare', 'init'], stdout=sys.stderr,
325 preexec_fn = _gitenv)
326 _git_wait('git init', p)
327 p = subprocess.Popen(['git', 'config', 'pack.indexVersion', '2'],
328 stdout=sys.stderr, preexec_fn = _gitenv)
329 _git_wait('git config', p)
332 def check_repo_or_die(path=None):
334 if not os.path.isdir(repo('objects/pack/.')):
335 if repodir == home_repodir:
338 log('error: %r is not a bup/git repository\n' % repo())
344 while ofs < len(buf):
345 z = buf[ofs:].find('\0')
347 spl = buf[ofs:ofs+z].split(' ', 1)
348 assert(len(spl) == 2)
349 sha = buf[ofs+z+1:ofs+z+1+20]
351 yield (spl[0], spl[1], sha)
357 p = subprocess.Popen(['git', '--version'],
358 stdout=subprocess.PIPE)
359 gvs = p.stdout.read()
360 _git_wait('git --version', p)
361 m = re.match(r'git version (\S+.\S+)', gvs)
363 raise GitError('git --version weird output: %r' % gvs)
364 _ver = tuple(m.group(1).split('.'))
365 needed = ('1','5','4')
367 raise GitError('git version %s or higher is required; you have %s'
368 % ('.'.join(needed), '.'.join(_ver)))
372 def _git_wait(cmd, p):
375 raise GitError('%s returned %d' % (cmd, rv))
378 def _git_capture(argv):
379 p = subprocess.Popen(argv, stdout=subprocess.PIPE, preexec_fn = _gitenv)
381 _git_wait(repr(argv), p)
389 wanted = ('1','5','6')
392 log('warning: git version < %s; bup will be slow.\n'
395 self.get = self._slow_get
397 self.p = subprocess.Popen(['git', 'cat-file', '--batch'],
398 stdin=subprocess.PIPE,
399 stdout=subprocess.PIPE,
400 preexec_fn = _gitenv)
401 self.get = self._fast_get
403 def _fast_get(self, id):
404 assert(id.find('\n') < 0)
405 assert(id.find('\r') < 0)
407 self.p.stdin.write('%s\n' % id)
408 hdr = self.p.stdout.readline()
409 if hdr.endswith(' missing\n'):
410 raise GitError('blob %r is missing' % id)
412 if len(spl) != 3 or len(spl[0]) != 40:
413 raise GitError('expected blob, got %r' % spl)
414 (hex, type, size) = spl
416 for blob in chunkyreader(self.p.stdout, int(spl[2])):
418 assert(self.p.stdout.readline() == '\n')
420 def _slow_get(self, id):
421 assert(id.find('\n') < 0)
422 assert(id.find('\r') < 0)
424 type = _git_capture(['git', 'cat-file', '-t', id]).strip()
427 p = subprocess.Popen(['git', 'cat-file', type, id],
428 stdout=subprocess.PIPE,
429 preexec_fn = _gitenv)
430 for blob in chunkyreader(p.stdout):
432 _git_wait('git cat-file', p)
440 treefile = ''.join(it)
441 for (mode, name, sha) in _treeparse(treefile):
442 for blob in self.join(sha.encode('hex')):
444 elif type == 'commit':
445 treeline = ''.join(it).split('\n')[0]
446 assert(treeline.startswith('tree '))
447 for blob in self.join(treeline[5:]):
450 raise GitError('invalid object type %r: expected blob/tree/commit'
454 for d in self._join(self.get(id)):