1 import os, errno, zlib, time, sha, subprocess, struct, stat, re, tempfile
3 from bup.helpers import *
7 home_repodir = os.path.expanduser('~/.bup')
10 _typemap = { 'blob':3, 'tree':2, 'commit':1, 'tag':4 }
11 _typermap = { 3:'blob', 2:'tree', 1:'commit', 4:'tag' }
14 class GitError(Exception):
21 raise GitError('You should call check_repo_or_die()')
22 gd = os.path.join(repodir, '.git')
23 if os.path.exists(gd):
25 return os.path.join(repodir, sub)
28 def _encode_packobj(type, content):
31 szbits = (sz & 0x0f) | (_typemap[type]<<4)
40 z = zlib.compressobj(1)
42 yield z.compress(content)
46 def _encode_looseobj(type, content):
47 z = zlib.compressobj(1)
48 yield z.compress('%s %d\0' % (type, len(content)))
49 yield z.compress(content)
53 def _decode_looseobj(buf):
55 s = zlib.decompress(buf)
62 assert(type in _typemap)
63 assert(sz == len(content))
64 return (type, content)
67 def _decode_packobj(buf):
70 type = _typermap[(c & 0x70) >> 4]
77 sz |= (c & 0x7f) << shift
81 return (type, zlib.decompress(buf[i+1:]))
85 def __init__(self, filename):
87 self.map = mmap_read(open(filename))
88 assert(str(self.map[0:8]) == '\377tOc\0\0\0\2')
89 self.fanout = list(struct.unpack('!256I',
90 str(buffer(self.map, 8, 256*4))))
91 self.fanout.append(0) # entry "-1"
92 nsha = self.fanout[255]
93 self.ofstable = buffer(self.map,
94 8 + 256*4 + nsha*20 + nsha*4,
96 self.ofs64table = buffer(self.map,
97 8 + 256*4 + nsha*20 + nsha*4 + nsha*4)
99 def _ofs_from_idx(self, idx):
100 ofs = struct.unpack('!I', str(buffer(self.ofstable, idx*4, 4)))[0]
102 idx64 = ofs & 0x7fffffff
103 ofs = struct.unpack('!I',
104 str(buffer(self.ofs64table, idx64*8, 8)))[0]
107 def _idx_from_hash(self, hash):
108 assert(len(hash) == 20)
110 start = self.fanout[b1-1] # range -1..254
111 end = self.fanout[b1] # range 0..255
112 buf = buffer(self.map, 8 + 256*4, end*20)
115 mid = start + (end-start)/2
116 v = str(buf[mid*20:(mid+1)*20])
125 def find_offset(self, hash):
126 idx = self._idx_from_hash(hash)
128 return self._ofs_from_idx(idx)
131 def exists(self, hash):
132 return hash and (self._idx_from_hash(hash) != None) and True or None
135 for i in xrange(self.fanout[255]):
136 yield buffer(self.map, 8 + 256*4 + 20*i, 20)
139 return int(self.fanout[255])
142 def extract_bits(buf, bits):
144 v = struct.unpack('!I', buf[0:4])[0]
145 v = (v >> (32-bits)) & mask
150 def __init__(self, filename):
152 assert(filename.endswith('.midx'))
153 self.map = mmap_read(open(filename))
154 if str(self.map[0:8]) == 'MIDX\0\0\0\1':
155 log('Warning: ignoring old-style midx %r\n' % filename)
158 self.fanout = buffer('\0\0\0\0')
159 self.shalist = buffer('\0'*20)
162 assert(str(self.map[0:8]) == 'MIDX\0\0\0\2')
163 self.bits = struct.unpack('!I', self.map[8:12])[0]
164 self.entries = 2**self.bits
165 self.fanout = buffer(self.map, 12, self.entries*4)
166 shaofs = 12 + self.entries*4
167 nsha = self._fanget(self.entries-1)
168 self.shalist = buffer(self.map, shaofs, nsha*20)
169 self.idxnames = str(self.map[shaofs + 20*nsha:]).split('\0')
171 def _fanget(self, i):
173 s = self.fanout[start:start+4]
174 return struct.unpack('!I', s)[0]
176 def exists(self, hash):
178 el = extract_bits(want, self.bits)
180 start = self._fanget(el-1)
183 end = self._fanget(el)
185 mid = start + (end-start)/2
186 v = str(self.shalist[mid*20:(mid+1)*20])
196 for i in xrange(self._fanget(self.entries-1)):
197 yield buffer(self.shalist, i*20, 20)
200 return int(self._fanget(self.entries-1))
205 def __init__(self, dir):
207 assert(_mpi_count == 0) # these things suck tons of VM; don't waste it
217 assert(_mpi_count == 0)
220 return iter(idxmerge(self.packs))
222 def exists(self, hash):
223 if hash in self.also:
225 for i in range(len(self.packs)):
228 # reorder so most recently used packs are searched first
229 self.packs = [p] + self.packs[:i] + self.packs[i+1:]
233 def refresh(self, skip_midx = False):
234 skip_midx = skip_midx or ignore_midx
235 d = dict((p.name, p) for p in self.packs
236 if not skip_midx or not isinstance(p, PackMidx))
237 if os.path.exists(self.dir):
240 for ix in self.packs:
241 if isinstance(ix, PackMidx):
242 for name in ix.idxnames:
243 d[os.path.join(self.dir, name)] = ix
244 for f in os.listdir(self.dir):
245 full = os.path.join(self.dir, f)
246 if f.endswith('.midx') and not d.get(full):
248 (mxd, mxf) = os.path.split(mx.name)
250 for n in mx.idxnames:
251 if not os.path.exists(os.path.join(mxd, n)):
252 log(('warning: index %s missing\n' +
253 ' used by %s\n') % (n, mxf))
257 midxl.sort(lambda x,y: -cmp(len(x),len(y)))
260 for sub in ix.idxnames:
261 found = d.get(os.path.join(self.dir, sub))
262 if not found or isinstance(found, PackIdx):
263 # doesn't exist, or exists but not in a midx
265 for name in ix.idxnames:
266 d[os.path.join(self.dir, name)] = ix
270 log('midx: removing redundant: %s\n'
271 % os.path.basename(ix.name))
273 for f in os.listdir(self.dir):
274 full = os.path.join(self.dir, f)
275 if f.endswith('.idx') and not d.get(full):
278 self.packs = list(set(d.values()))
279 log('PackIdxList: using %d index%s.\n'
280 % (len(self.packs), len(self.packs)!=1 and 'es' or ''))
289 def calc_hash(type, content):
290 header = '%s %d\0' % (type, len(content))
291 sum = sha.sha(header)
296 def _shalist_sort_key(ent):
297 (mode, name, id) = ent
298 if stat.S_ISDIR(int(mode, 8)):
304 def idxmerge(idxlist):
305 total = sum(len(i) for i in idxlist)
306 iters = (iter(i) for i in idxlist)
307 heap = [(next(it), it) for it in iters]
312 if (count % 10024) == 0:
313 progress('Reading indexes: %.2f%% (%d/%d)\r'
314 % (count*100.0/total, count, total))
322 heapq.heapreplace(heap, (e, it))
325 log('Reading indexes: %.2f%% (%d/%d), done.\n' % (100, total, total))
329 def __init__(self, objcache_maker=None):
334 self.objcache_maker = objcache_maker
340 def _make_objcache(self):
341 if not self.objcache:
342 if self.objcache_maker:
343 self.objcache = self.objcache_maker()
345 self.objcache = PackIdxList(repo('objects/pack'))
349 self._make_objcache()
350 (fd,name) = tempfile.mkstemp(suffix='.pack', dir=repo('objects'))
351 self.file = os.fdopen(fd, 'w+b')
352 assert(name.endswith('.pack'))
353 self.filename = name[:-5]
354 self.file.write('PACK\0\0\0\2\0\0\0\0')
356 def _raw_write(self, datalist):
359 # in case we get interrupted (eg. KeyboardInterrupt), it's best if
360 # the file never has a *partial* blob. So let's make sure it's
361 # all-or-nothing. (The blob shouldn't be very big anyway, thanks
362 # to our hashsplit algorithm.) f.write() does its own buffering,
363 # but that's okay because we'll flush it in _end().
364 oneblob = ''.join(datalist)
366 self.outbytes += len(oneblob)
369 def _write(self, bin, type, content):
372 self._raw_write(_encode_packobj(type, content))
375 def breakpoint(self):
377 self.outbytes = self.count = 0
380 def write(self, type, content):
381 return self._write(calc_hash(type, content), type, content)
383 def exists(self, id):
384 if not self.objcache:
385 self._make_objcache()
386 return self.objcache.exists(id)
388 def maybe_write(self, type, content):
389 bin = calc_hash(type, content)
390 if not self.exists(bin):
391 self._write(bin, type, content)
392 self.objcache.add(bin)
395 def new_blob(self, blob):
396 return self.maybe_write('blob', blob)
398 def new_tree(self, shalist):
399 shalist = sorted(shalist, key = _shalist_sort_key)
401 for (mode,name,bin) in shalist:
404 assert(mode[0] != '0')
406 assert(len(bin) == 20)
407 l.append('%s %s\0%s' % (mode,name,bin))
408 return self.maybe_write('tree', ''.join(l))
410 def _new_commit(self, tree, parent, author, adate, committer, cdate, msg):
412 if tree: l.append('tree %s' % tree.encode('hex'))
413 if parent: l.append('parent %s' % parent.encode('hex'))
414 if author: l.append('author %s %s' % (author, _git_date(adate)))
415 if committer: l.append('committer %s %s' % (committer, _git_date(cdate)))
418 return self.maybe_write('commit', '\n'.join(l))
420 def new_commit(self, parent, tree, msg):
422 userline = '%s <%s@%s>' % (userfullname(), username(), hostname())
423 commit = self._new_commit(tree, parent,
424 userline, now, userline, now,
433 os.unlink(self.filename + '.pack')
437 if not f: return None
441 # update object count
443 cp = struct.pack('!i', self.count)
447 # calculate the pack sha1sum
454 f.write(sum.digest())
458 p = subprocess.Popen(['git', 'index-pack', '-v',
460 self.filename + '.pack'],
461 preexec_fn = _gitenv,
462 stdout = subprocess.PIPE)
463 out = p.stdout.read().strip()
464 _git_wait('git index-pack', p)
466 raise GitError('git index-pack produced no output')
467 nameprefix = repo('objects/pack/%s' % out)
468 if os.path.exists(self.filename + '.map'):
469 os.unlink(self.filename + '.map')
470 os.rename(self.filename + '.pack', nameprefix + '.pack')
471 os.rename(self.filename + '.idx', nameprefix + '.idx')
479 return time.strftime('%s %z', time.localtime(date))
483 os.environ['GIT_DIR'] = os.path.abspath(repo())
486 def list_refs(refname = None):
487 argv = ['git', 'show-ref', '--']
490 p = subprocess.Popen(argv, preexec_fn = _gitenv, stdout = subprocess.PIPE)
491 out = p.stdout.read().strip()
492 rv = p.wait() # not fatal
496 for d in out.split('\n'):
497 (sha, name) = d.split(' ', 1)
498 yield (name, sha.decode('hex'))
501 def read_ref(refname):
502 l = list(list_refs(refname))
510 def rev_list(ref, count=None):
511 assert(not ref.startswith('-'))
514 opts += ['-n', str(atoi(count))]
515 argv = ['git', 'rev-list', '--pretty=format:%ct'] + opts + [ref, '--']
516 p = subprocess.Popen(argv, preexec_fn = _gitenv, stdout = subprocess.PIPE)
520 if s.startswith('commit '):
521 commit = s[7:].decode('hex')
525 rv = p.wait() # not fatal
527 raise GitError, 'git rev-list returned error %d' % rv
530 def rev_get_date(ref):
531 for (date, commit) in rev_list(ref, count=1):
533 raise GitError, 'no such commit %r' % ref
536 def update_ref(refname, newval, oldval):
539 assert(refname.startswith('refs/heads/'))
540 p = subprocess.Popen(['git', 'update-ref', refname,
541 newval.encode('hex'), oldval.encode('hex')],
542 preexec_fn = _gitenv)
543 _git_wait('git update-ref', p)
546 def guess_repo(path=None):
551 repodir = os.environ.get('BUP_DIR')
553 repodir = os.path.expanduser('~/.bup')
556 def init_repo(path=None):
559 if os.path.exists(d) and not os.path.isdir(os.path.join(d, '.')):
560 raise GitError('"%d" exists but is not a directory\n' % d)
561 p = subprocess.Popen(['git', '--bare', 'init'], stdout=sys.stderr,
562 preexec_fn = _gitenv)
563 _git_wait('git init', p)
564 p = subprocess.Popen(['git', 'config', 'pack.indexVersion', '2'],
565 stdout=sys.stderr, preexec_fn = _gitenv)
566 _git_wait('git config', p)
569 def check_repo_or_die(path=None):
571 if not os.path.isdir(repo('objects/pack/.')):
572 if repodir == home_repodir:
575 log('error: %r is not a bup/git repository\n' % repo())
581 while ofs < len(buf):
582 z = buf[ofs:].find('\0')
584 spl = buf[ofs:ofs+z].split(' ', 1)
585 assert(len(spl) == 2)
586 sha = buf[ofs+z+1:ofs+z+1+20]
588 yield (spl[0], spl[1], sha)
595 p = subprocess.Popen(['git', '--version'],
596 stdout=subprocess.PIPE)
597 gvs = p.stdout.read()
598 _git_wait('git --version', p)
599 m = re.match(r'git version (\S+.\S+)', gvs)
601 raise GitError('git --version weird output: %r' % gvs)
602 _ver = tuple(m.group(1).split('.'))
603 needed = ('1','5', '3', '1')
605 raise GitError('git version %s or higher is required; you have %s'
606 % ('.'.join(needed), '.'.join(_ver)))
610 def _git_wait(cmd, p):
613 raise GitError('%s returned %d' % (cmd, rv))
616 def _git_capture(argv):
617 p = subprocess.Popen(argv, stdout=subprocess.PIPE, preexec_fn = _gitenv)
619 _git_wait(repr(argv), p)
627 wanted = ('1','5','6')
630 log('warning: git version < %s; bup will be slow.\n'
633 self.get = self._slow_get
635 self.p = subprocess.Popen(['git', 'cat-file', '--batch'],
636 stdin=subprocess.PIPE,
637 stdout=subprocess.PIPE,
638 preexec_fn = _gitenv)
639 self.get = self._fast_get
640 self.inprogress = None
642 def _fast_get(self, id):
644 log('_fast_get: opening %r while %r is open'
645 % (id, self.inprogress))
646 assert(not self.inprogress)
647 assert(id.find('\n') < 0)
648 assert(id.find('\r') < 0)
651 self.p.stdin.write('%s\n' % id)
652 hdr = self.p.stdout.readline()
653 if hdr.endswith(' missing\n'):
654 raise KeyError('blob %r is missing' % id)
656 if len(spl) != 3 or len(spl[0]) != 40:
657 raise GitError('expected blob, got %r' % spl)
658 (hex, type, size) = spl
661 assert(self.p.stdout.readline() == '\n')
662 self.inprogress = None
664 it = AutoFlushIter(chunkyreader(self.p.stdout, int(spl[2])),
671 def _slow_get(self, id):
672 assert(id.find('\n') < 0)
673 assert(id.find('\r') < 0)
675 type = _git_capture(['git', 'cat-file', '-t', id]).strip()
678 p = subprocess.Popen(['git', 'cat-file', type, id],
679 stdout=subprocess.PIPE,
680 preexec_fn = _gitenv)
681 for blob in chunkyreader(p.stdout):
683 _git_wait('git cat-file', p)
691 treefile = ''.join(it)
692 for (mode, name, sha) in _treeparse(treefile):
693 for blob in self.join(sha.encode('hex')):
695 elif type == 'commit':
696 treeline = ''.join(it).split('\n')[0]
697 assert(treeline.startswith('tree '))
698 for blob in self.join(treeline[5:]):
701 raise GitError('invalid object type %r: expected blob/tree/commit'
706 for d in self._join(self.get(id)):
708 except StopIteration: