1 import os, errno, zlib, time, sha, subprocess, struct, stat, re, tempfile
6 home_repodir = os.path.expanduser('~/.bup')
9 _typemap = { 'blob':3, 'tree':2, 'commit':1, 'tag':4 }
10 _typermap = { 3:'blob', 2:'tree', 1:'commit', 4:'tag' }
13 class GitError(Exception):
20 raise GitError('You should call check_repo_or_die()')
21 gd = os.path.join(repodir, '.git')
22 if os.path.exists(gd):
24 return os.path.join(repodir, sub)
27 def _encode_packobj(type, content):
30 szbits = (sz & 0x0f) | (_typemap[type]<<4)
39 z = zlib.compressobj(1)
41 yield z.compress(content)
45 def _encode_looseobj(type, content):
46 z = zlib.compressobj(1)
47 yield z.compress('%s %d\0' % (type, len(content)))
48 yield z.compress(content)
52 def _decode_looseobj(buf):
54 s = zlib.decompress(buf)
61 assert(type in _typemap)
62 assert(sz == len(content))
63 return (type, content)
66 def _decode_packobj(buf):
69 type = _typermap[(c & 0x70) >> 4]
76 sz |= (c & 0x7f) << shift
80 return (type, zlib.decompress(buf[i+1:]))
84 def __init__(self, filename):
86 self.map = mmap_read(open(filename))
87 assert(str(self.map[0:8]) == '\377tOc\0\0\0\2')
88 self.fanout = list(struct.unpack('!256I',
89 str(buffer(self.map, 8, 256*4))))
90 self.fanout.append(0) # entry "-1"
91 nsha = self.fanout[255]
92 self.ofstable = buffer(self.map,
93 8 + 256*4 + nsha*20 + nsha*4,
95 self.ofs64table = buffer(self.map,
96 8 + 256*4 + nsha*20 + nsha*4 + nsha*4)
98 def _ofs_from_idx(self, idx):
99 ofs = struct.unpack('!I', str(buffer(self.ofstable, idx*4, 4)))[0]
101 idx64 = ofs & 0x7fffffff
102 ofs = struct.unpack('!I',
103 str(buffer(self.ofs64table, idx64*8, 8)))[0]
106 def _idx_from_hash(self, hash):
107 assert(len(hash) == 20)
109 start = self.fanout[b1-1] # range -1..254
110 end = self.fanout[b1] # range 0..255
111 buf = buffer(self.map, 8 + 256*4, end*20)
114 mid = start + (end-start)/2
115 v = str(buf[mid*20:(mid+1)*20])
124 def find_offset(self, hash):
125 idx = self._idx_from_hash(hash)
127 return self._ofs_from_idx(idx)
130 def exists(self, hash):
131 return hash and (self._idx_from_hash(hash) != None) and True or None
134 for i in xrange(self.fanout[255]):
135 yield buffer(self.map, 8 + 256*4 + 20*i, 20)
138 return self.fanout[255]
141 def extract_bits(buf, bits):
143 v = struct.unpack('!Q', buf[0:8])[0]
144 v = (v >> (64-bits)) & mask
149 def __init__(self, filename):
151 assert(filename.endswith('.midx'))
152 self.map = mmap_read(open(filename))
153 assert(str(self.map[0:8]) == 'MIDX\0\0\0\1')
154 self.bits = struct.unpack('!I', self.map[8:12])[0]
155 self.entries = 2**self.bits
156 self.fanout = buffer(self.map, 12, self.entries*8)
157 shaofs = 12 + self.entries*8
158 nsha = self._fanget(self.entries-1)
159 self.shalist = buffer(self.map, shaofs, nsha*20)
160 self.idxnames = str(self.map[shaofs + 20*nsha:]).split('\0')
162 def _fanget(self, i):
164 s = self.fanout[start:start+8]
165 return struct.unpack('!Q', s)[0]
167 def exists(self, hash):
169 el = extract_bits(want, self.bits)
171 start = self._fanget(el-1)
174 end = self._fanget(el)
176 mid = start + (end-start)/2
177 v = str(self.shalist[mid*20:(mid+1)*20])
187 for i in xrange(self._fanget(self.entries-1)):
188 yield buffer(self.shalist, i*20, 20)
191 return self._fanget(self.entries-1)
195 class MultiPackIndex:
196 def __init__(self, dir):
198 assert(_mpi_count == 0)
208 assert(_mpi_count == 0)
210 def exists(self, hash):
211 if hash in self.also:
213 for i in range(len(self.packs)):
216 # reorder so most recently used packs are searched first
217 self.packs = [p] + self.packs[:i] + self.packs[i+1:]
223 d = dict([(p.name, 1) for p in self.packs])
224 if os.path.exists(self.dir):
227 for f in os.listdir(self.dir):
228 full = os.path.join(self.dir, f)
229 if f.endswith('.midx') and not d.get(full):
230 midxl.append(PackMidx(full))
231 midxl.sort(lambda x,y: -cmp(len(x),len(y)))
234 for sub in ix.idxnames:
235 if not d.get(os.path.join(self.dir, sub)):
236 self.packs.append(ix)
238 for name in ix.idxnames:
239 d[os.path.join(self.dir, name)] = 1
241 for f in os.listdir(self.dir):
242 full = os.path.join(self.dir, f)
243 if f.endswith('.idx') and not d.get(full):
244 self.packs.append(PackIndex(full))
246 #log('MultiPackIndex: using %d packs.\n' % len(self.packs))
255 def calc_hash(type, content):
256 header = '%s %d\0' % (type, len(content))
257 sum = sha.sha(header)
262 def _shalist_sort_key(ent):
263 (mode, name, id) = ent
264 if stat.S_ISDIR(int(mode, 8)):
271 def __init__(self, objcache_maker=None):
276 self.objcache_maker = objcache_maker
282 def _make_objcache(self):
283 if not self.objcache:
284 if self.objcache_maker:
285 self.objcache = self.objcache_maker()
287 self.objcache = MultiPackIndex(repo('objects/pack'))
291 self._make_objcache()
292 (fd,name) = tempfile.mkstemp(suffix='.pack', dir=repo('objects'))
293 self.file = os.fdopen(fd, 'w+b')
294 assert(name.endswith('.pack'))
295 self.filename = name[:-5]
296 self.file.write('PACK\0\0\0\2\0\0\0\0')
298 def _raw_write(self, datalist):
303 self.outbytes += len(d)
306 def _write(self, bin, type, content):
309 self._raw_write(_encode_packobj(type, content))
312 def breakpoint(self):
314 self.outbytes = self.count = 0
317 def write(self, type, content):
318 return self._write(calc_hash(type, content), type, content)
320 def exists(self, id):
321 if not self.objcache:
322 self._make_objcache()
323 return self.objcache.exists(id)
325 def maybe_write(self, type, content):
326 bin = calc_hash(type, content)
327 if not self.exists(bin):
328 self._write(bin, type, content)
329 self.objcache.add(bin)
332 def new_blob(self, blob):
333 return self.maybe_write('blob', blob)
335 def new_tree(self, shalist):
336 shalist = sorted(shalist, key = _shalist_sort_key)
338 for (mode,name,bin) in shalist:
340 assert(mode[0] != '0')
342 assert(len(bin) == 20)
343 l.append('%s %s\0%s' % (mode,name,bin))
344 return self.maybe_write('tree', ''.join(l))
346 def _new_commit(self, tree, parent, author, adate, committer, cdate, msg):
348 if tree: l.append('tree %s' % tree.encode('hex'))
349 if parent: l.append('parent %s' % parent.encode('hex'))
350 if author: l.append('author %s %s' % (author, _git_date(adate)))
351 if committer: l.append('committer %s %s' % (committer, _git_date(cdate)))
354 return self.maybe_write('commit', '\n'.join(l))
356 def new_commit(self, parent, tree, msg):
358 userline = '%s <%s@%s>' % (userfullname(), username(), hostname())
359 commit = self._new_commit(tree, parent,
360 userline, now, userline, now,
369 os.unlink(self.filename + '.pack')
373 if not f: return None
377 # update object count
379 cp = struct.pack('!i', self.count)
383 # calculate the pack sha1sum
390 f.write(sum.digest())
394 p = subprocess.Popen(['git', 'index-pack', '-v',
396 self.filename + '.pack'],
397 preexec_fn = _gitenv,
398 stdout = subprocess.PIPE)
399 out = p.stdout.read().strip()
400 _git_wait('git index-pack', p)
402 raise GitError('git index-pack produced no output')
403 nameprefix = repo('objects/pack/%s' % out)
404 if os.path.exists(self.filename + '.map'):
405 os.unlink(self.filename + '.map')
406 os.rename(self.filename + '.pack', nameprefix + '.pack')
407 os.rename(self.filename + '.idx', nameprefix + '.idx')
415 return time.strftime('%s %z', time.localtime(date))
419 os.environ['GIT_DIR'] = os.path.abspath(repo())
422 def list_refs(refname = None):
423 argv = ['git', 'show-ref', '--']
426 p = subprocess.Popen(argv, preexec_fn = _gitenv, stdout = subprocess.PIPE)
427 out = p.stdout.read().strip()
428 rv = p.wait() # not fatal
432 for d in out.split('\n'):
433 (sha, name) = d.split(' ', 1)
434 yield (name, sha.decode('hex'))
437 def read_ref(refname):
438 l = list(list_refs(refname))
447 assert(not ref.startswith('-'))
448 argv = ['git', 'rev-list', '--pretty=format:%ct', ref, '--']
449 p = subprocess.Popen(argv, preexec_fn = _gitenv, stdout = subprocess.PIPE)
453 if s.startswith('commit '):
454 commit = s[7:].decode('hex')
458 rv = p.wait() # not fatal
460 raise GitError, 'git rev-list returned error %d' % rv
463 def update_ref(refname, newval, oldval):
466 assert(refname.startswith('refs/heads/'))
467 p = subprocess.Popen(['git', 'update-ref', refname,
468 newval.encode('hex'), oldval.encode('hex')],
469 preexec_fn = _gitenv)
470 _git_wait('git update-ref', p)
473 def guess_repo(path=None):
478 repodir = os.environ.get('BUP_DIR')
480 repodir = os.path.expanduser('~/.bup')
483 def init_repo(path=None):
486 if os.path.exists(d) and not os.path.isdir(os.path.join(d, '.')):
487 raise GitError('"%d" exists but is not a directory\n' % d)
488 p = subprocess.Popen(['git', '--bare', 'init'], stdout=sys.stderr,
489 preexec_fn = _gitenv)
490 _git_wait('git init', p)
491 p = subprocess.Popen(['git', 'config', 'pack.indexVersion', '2'],
492 stdout=sys.stderr, preexec_fn = _gitenv)
493 _git_wait('git config', p)
496 def check_repo_or_die(path=None):
498 if not os.path.isdir(repo('objects/pack/.')):
499 if repodir == home_repodir:
502 log('error: %r is not a bup/git repository\n' % repo())
508 while ofs < len(buf):
509 z = buf[ofs:].find('\0')
511 spl = buf[ofs:ofs+z].split(' ', 1)
512 assert(len(spl) == 2)
513 sha = buf[ofs+z+1:ofs+z+1+20]
515 yield (spl[0], spl[1], sha)
522 p = subprocess.Popen(['git', '--version'],
523 stdout=subprocess.PIPE)
524 gvs = p.stdout.read()
525 _git_wait('git --version', p)
526 m = re.match(r'git version (\S+.\S+)', gvs)
528 raise GitError('git --version weird output: %r' % gvs)
529 _ver = tuple(m.group(1).split('.'))
530 needed = ('1','5', '3', '1')
532 raise GitError('git version %s or higher is required; you have %s'
533 % ('.'.join(needed), '.'.join(_ver)))
537 def _git_wait(cmd, p):
540 raise GitError('%s returned %d' % (cmd, rv))
543 def _git_capture(argv):
544 p = subprocess.Popen(argv, stdout=subprocess.PIPE, preexec_fn = _gitenv)
546 _git_wait(repr(argv), p)
554 wanted = ('1','5','6')
557 log('warning: git version < %s; bup will be slow.\n'
560 self.get = self._slow_get
562 self.p = subprocess.Popen(['git', 'cat-file', '--batch'],
563 stdin=subprocess.PIPE,
564 stdout=subprocess.PIPE,
565 preexec_fn = _gitenv)
566 self.get = self._fast_get
568 def _fast_get(self, id):
569 assert(id.find('\n') < 0)
570 assert(id.find('\r') < 0)
572 self.p.stdin.write('%s\n' % id)
573 hdr = self.p.stdout.readline()
574 if hdr.endswith(' missing\n'):
575 raise KeyError('blob %r is missing' % id)
577 if len(spl) != 3 or len(spl[0]) != 40:
578 raise GitError('expected blob, got %r' % spl)
579 (hex, type, size) = spl
580 it = iter(chunkyreader(self.p.stdout, int(spl[2])))
585 except StopIteration:
588 assert(self.p.stdout.readline() == '\n')
590 def _slow_get(self, id):
591 assert(id.find('\n') < 0)
592 assert(id.find('\r') < 0)
594 type = _git_capture(['git', 'cat-file', '-t', id]).strip()
597 p = subprocess.Popen(['git', 'cat-file', type, id],
598 stdout=subprocess.PIPE,
599 preexec_fn = _gitenv)
600 for blob in chunkyreader(p.stdout):
602 _git_wait('git cat-file', p)
610 treefile = ''.join(it)
611 for (mode, name, sha) in _treeparse(treefile):
612 for blob in self.join(sha.encode('hex')):
614 elif type == 'commit':
615 treeline = ''.join(it).split('\n')[0]
616 assert(treeline.startswith('tree '))
617 for blob in self.join(treeline[5:]):
620 raise GitError('invalid object type %r: expected blob/tree/commit'
624 for d in self._join(self.get(id)):