1 import os, errno, zlib, time, subprocess, struct, stat, re, tempfile
3 from bup.helpers import *
7 home_repodir = os.path.expanduser('~/.bup')
10 _typemap = { 'blob':3, 'tree':2, 'commit':1, 'tag':4 }
11 _typermap = { 3:'blob', 2:'tree', 1:'commit', 4:'tag' }
14 class GitError(Exception):
21 raise GitError('You should call check_repo_or_die()')
22 gd = os.path.join(repodir, '.git')
23 if os.path.exists(gd):
25 return os.path.join(repodir, sub)
28 def mangle_name(name, mode, gitmode):
29 if stat.S_ISREG(mode) and not stat.S_ISREG(gitmode):
31 elif name.endswith('.bup') or name[:-1].endswith('.bup'):
37 (BUP_NORMAL, BUP_CHUNKED) = (0,1)
38 def demangle_name(name):
39 if name.endswith('.bupl'):
40 return (name[:-5], BUP_NORMAL)
41 elif name.endswith('.bup'):
42 return (name[:-4], BUP_CHUNKED)
44 return (name, BUP_NORMAL)
47 def _encode_packobj(type, content):
50 szbits = (sz & 0x0f) | (_typemap[type]<<4)
59 z = zlib.compressobj(1)
61 yield z.compress(content)
65 def _encode_looseobj(type, content):
66 z = zlib.compressobj(1)
67 yield z.compress('%s %d\0' % (type, len(content)))
68 yield z.compress(content)
72 def _decode_looseobj(buf):
74 s = zlib.decompress(buf)
81 assert(type in _typemap)
82 assert(sz == len(content))
83 return (type, content)
86 def _decode_packobj(buf):
89 type = _typermap[(c & 0x70) >> 4]
96 sz |= (c & 0x7f) << shift
100 return (type, zlib.decompress(buf[i+1:]))
104 def __init__(self, filename):
106 self.map = mmap_read(open(filename))
107 assert(str(self.map[0:8]) == '\377tOc\0\0\0\2')
108 self.fanout = list(struct.unpack('!256I',
109 str(buffer(self.map, 8, 256*4))))
110 self.fanout.append(0) # entry "-1"
111 nsha = self.fanout[255]
112 self.ofstable = buffer(self.map,
113 8 + 256*4 + nsha*20 + nsha*4,
115 self.ofs64table = buffer(self.map,
116 8 + 256*4 + nsha*20 + nsha*4 + nsha*4)
118 def _ofs_from_idx(self, idx):
119 ofs = struct.unpack('!I', str(buffer(self.ofstable, idx*4, 4)))[0]
121 idx64 = ofs & 0x7fffffff
122 ofs = struct.unpack('!I',
123 str(buffer(self.ofs64table, idx64*8, 8)))[0]
126 def _idx_from_hash(self, hash):
127 assert(len(hash) == 20)
129 start = self.fanout[b1-1] # range -1..254
130 end = self.fanout[b1] # range 0..255
131 buf = buffer(self.map, 8 + 256*4, end*20)
134 mid = start + (end-start)/2
135 v = str(buf[mid*20:(mid+1)*20])
144 def find_offset(self, hash):
145 idx = self._idx_from_hash(hash)
147 return self._ofs_from_idx(idx)
150 def exists(self, hash):
151 return hash and (self._idx_from_hash(hash) != None) and True or None
154 for i in xrange(self.fanout[255]):
155 yield buffer(self.map, 8 + 256*4 + 20*i, 20)
158 return int(self.fanout[255])
161 def extract_bits(buf, bits):
163 v = struct.unpack('!I', buf[0:4])[0]
164 v = (v >> (32-bits)) & mask
169 def __init__(self, filename):
171 assert(filename.endswith('.midx'))
172 self.map = mmap_read(open(filename))
173 if str(self.map[0:8]) == 'MIDX\0\0\0\1':
174 log('Warning: ignoring old-style midx %r\n' % filename)
177 self.fanout = buffer('\0\0\0\0')
178 self.shalist = buffer('\0'*20)
181 assert(str(self.map[0:8]) == 'MIDX\0\0\0\2')
182 self.bits = struct.unpack('!I', self.map[8:12])[0]
183 self.entries = 2**self.bits
184 self.fanout = buffer(self.map, 12, self.entries*4)
185 shaofs = 12 + self.entries*4
186 nsha = self._fanget(self.entries-1)
187 self.shalist = buffer(self.map, shaofs, nsha*20)
188 self.idxnames = str(self.map[shaofs + 20*nsha:]).split('\0')
190 def _fanget(self, i):
192 s = self.fanout[start:start+4]
193 return struct.unpack('!I', s)[0]
195 def exists(self, hash):
197 el = extract_bits(want, self.bits)
199 start = self._fanget(el-1)
202 end = self._fanget(el)
204 mid = start + (end-start)/2
205 v = str(self.shalist[mid*20:(mid+1)*20])
215 for i in xrange(self._fanget(self.entries-1)):
216 yield buffer(self.shalist, i*20, 20)
219 return int(self._fanget(self.entries-1))
224 def __init__(self, dir):
226 assert(_mpi_count == 0) # these things suck tons of VM; don't waste it
236 assert(_mpi_count == 0)
239 return iter(idxmerge(self.packs))
241 def exists(self, hash):
242 if hash in self.also:
244 for i in range(len(self.packs)):
247 # reorder so most recently used packs are searched first
248 self.packs = [p] + self.packs[:i] + self.packs[i+1:]
252 def refresh(self, skip_midx = False):
253 skip_midx = skip_midx or ignore_midx
254 d = dict((p.name, p) for p in self.packs
255 if not skip_midx or not isinstance(p, PackMidx))
256 if os.path.exists(self.dir):
259 for ix in self.packs:
260 if isinstance(ix, PackMidx):
261 for name in ix.idxnames:
262 d[os.path.join(self.dir, name)] = ix
263 for f in os.listdir(self.dir):
264 full = os.path.join(self.dir, f)
265 if f.endswith('.midx') and not d.get(full):
267 (mxd, mxf) = os.path.split(mx.name)
269 for n in mx.idxnames:
270 if not os.path.exists(os.path.join(mxd, n)):
271 log(('warning: index %s missing\n' +
272 ' used by %s\n') % (n, mxf))
276 midxl.sort(lambda x,y: -cmp(len(x),len(y)))
279 for sub in ix.idxnames:
280 found = d.get(os.path.join(self.dir, sub))
281 if not found or isinstance(found, PackIdx):
282 # doesn't exist, or exists but not in a midx
284 for name in ix.idxnames:
285 d[os.path.join(self.dir, name)] = ix
289 log('midx: removing redundant: %s\n'
290 % os.path.basename(ix.name))
292 for f in os.listdir(self.dir):
293 full = os.path.join(self.dir, f)
294 if f.endswith('.idx') and not d.get(full):
297 self.packs = list(set(d.values()))
298 log('PackIdxList: using %d index%s.\n'
299 % (len(self.packs), len(self.packs)!=1 and 'es' or ''))
308 def calc_hash(type, content):
309 header = '%s %d\0' % (type, len(content))
315 def _shalist_sort_key(ent):
316 (mode, name, id) = ent
317 if stat.S_ISDIR(int(mode, 8)):
323 def idxmerge(idxlist):
324 total = sum(len(i) for i in idxlist)
325 iters = (iter(i) for i in idxlist)
326 heap = [(next(it), it) for it in iters]
331 if (count % 10024) == 0:
332 progress('Reading indexes: %.2f%% (%d/%d)\r'
333 % (count*100.0/total, count, total))
341 heapq.heapreplace(heap, (e, it))
344 log('Reading indexes: %.2f%% (%d/%d), done.\n' % (100, total, total))
348 def __init__(self, objcache_maker=None):
353 self.objcache_maker = objcache_maker
359 def _make_objcache(self):
360 if not self.objcache:
361 if self.objcache_maker:
362 self.objcache = self.objcache_maker()
364 self.objcache = PackIdxList(repo('objects/pack'))
368 self._make_objcache()
369 (fd,name) = tempfile.mkstemp(suffix='.pack', dir=repo('objects'))
370 self.file = os.fdopen(fd, 'w+b')
371 assert(name.endswith('.pack'))
372 self.filename = name[:-5]
373 self.file.write('PACK\0\0\0\2\0\0\0\0')
375 def _raw_write(self, datalist):
378 # in case we get interrupted (eg. KeyboardInterrupt), it's best if
379 # the file never has a *partial* blob. So let's make sure it's
380 # all-or-nothing. (The blob shouldn't be very big anyway, thanks
381 # to our hashsplit algorithm.) f.write() does its own buffering,
382 # but that's okay because we'll flush it in _end().
383 oneblob = ''.join(datalist)
385 self.outbytes += len(oneblob)
388 def _write(self, bin, type, content):
391 self._raw_write(_encode_packobj(type, content))
394 def breakpoint(self):
396 self.outbytes = self.count = 0
399 def write(self, type, content):
400 return self._write(calc_hash(type, content), type, content)
402 def exists(self, id):
403 if not self.objcache:
404 self._make_objcache()
405 return self.objcache.exists(id)
407 def maybe_write(self, type, content):
408 bin = calc_hash(type, content)
409 if not self.exists(bin):
410 self._write(bin, type, content)
411 self.objcache.add(bin)
414 def new_blob(self, blob):
415 return self.maybe_write('blob', blob)
417 def new_tree(self, shalist):
418 shalist = sorted(shalist, key = _shalist_sort_key)
420 for (mode,name,bin) in shalist:
423 assert(mode[0] != '0')
425 assert(len(bin) == 20)
426 l.append('%s %s\0%s' % (mode,name,bin))
427 return self.maybe_write('tree', ''.join(l))
429 def _new_commit(self, tree, parent, author, adate, committer, cdate, msg):
431 if tree: l.append('tree %s' % tree.encode('hex'))
432 if parent: l.append('parent %s' % parent.encode('hex'))
433 if author: l.append('author %s %s' % (author, _git_date(adate)))
434 if committer: l.append('committer %s %s' % (committer, _git_date(cdate)))
437 return self.maybe_write('commit', '\n'.join(l))
439 def new_commit(self, parent, tree, msg):
441 userline = '%s <%s@%s>' % (userfullname(), username(), hostname())
442 commit = self._new_commit(tree, parent,
443 userline, now, userline, now,
452 os.unlink(self.filename + '.pack')
456 if not f: return None
460 # update object count
462 cp = struct.pack('!i', self.count)
466 # calculate the pack sha1sum
473 f.write(sum.digest())
477 p = subprocess.Popen(['git', 'index-pack', '-v',
479 self.filename + '.pack'],
480 preexec_fn = _gitenv,
481 stdout = subprocess.PIPE)
482 out = p.stdout.read().strip()
483 _git_wait('git index-pack', p)
485 raise GitError('git index-pack produced no output')
486 nameprefix = repo('objects/pack/%s' % out)
487 if os.path.exists(self.filename + '.map'):
488 os.unlink(self.filename + '.map')
489 os.rename(self.filename + '.pack', nameprefix + '.pack')
490 os.rename(self.filename + '.idx', nameprefix + '.idx')
498 return '%d %s' % (date, time.strftime('%z', time.localtime(date)))
502 os.environ['GIT_DIR'] = os.path.abspath(repo())
505 def list_refs(refname = None):
506 argv = ['git', 'show-ref', '--']
509 p = subprocess.Popen(argv, preexec_fn = _gitenv, stdout = subprocess.PIPE)
510 out = p.stdout.read().strip()
511 rv = p.wait() # not fatal
515 for d in out.split('\n'):
516 (sha, name) = d.split(' ', 1)
517 yield (name, sha.decode('hex'))
520 def read_ref(refname):
521 l = list(list_refs(refname))
529 def rev_list(ref, count=None):
530 assert(not ref.startswith('-'))
533 opts += ['-n', str(atoi(count))]
534 argv = ['git', 'rev-list', '--pretty=format:%ct'] + opts + [ref, '--']
535 p = subprocess.Popen(argv, preexec_fn = _gitenv, stdout = subprocess.PIPE)
539 if s.startswith('commit '):
540 commit = s[7:].decode('hex')
544 rv = p.wait() # not fatal
546 raise GitError, 'git rev-list returned error %d' % rv
549 def rev_get_date(ref):
550 for (date, commit) in rev_list(ref, count=1):
552 raise GitError, 'no such commit %r' % ref
555 def update_ref(refname, newval, oldval):
558 assert(refname.startswith('refs/heads/'))
559 p = subprocess.Popen(['git', 'update-ref', refname,
560 newval.encode('hex'), oldval.encode('hex')],
561 preexec_fn = _gitenv)
562 _git_wait('git update-ref', p)
565 def guess_repo(path=None):
570 repodir = os.environ.get('BUP_DIR')
572 repodir = os.path.expanduser('~/.bup')
575 def init_repo(path=None):
578 if os.path.exists(d) and not os.path.isdir(os.path.join(d, '.')):
579 raise GitError('"%d" exists but is not a directory\n' % d)
580 p = subprocess.Popen(['git', '--bare', 'init'], stdout=sys.stderr,
581 preexec_fn = _gitenv)
582 _git_wait('git init', p)
583 p = subprocess.Popen(['git', 'config', 'pack.indexVersion', '2'],
584 stdout=sys.stderr, preexec_fn = _gitenv)
585 _git_wait('git config', p)
588 def check_repo_or_die(path=None):
590 if not os.path.isdir(repo('objects/pack/.')):
591 if repodir == home_repodir:
594 log('error: %r is not a bup/git repository\n' % repo())
600 while ofs < len(buf):
601 z = buf[ofs:].find('\0')
603 spl = buf[ofs:ofs+z].split(' ', 1)
604 assert(len(spl) == 2)
605 sha = buf[ofs+z+1:ofs+z+1+20]
607 yield (spl[0], spl[1], sha)
614 p = subprocess.Popen(['git', '--version'],
615 stdout=subprocess.PIPE)
616 gvs = p.stdout.read()
617 _git_wait('git --version', p)
618 m = re.match(r'git version (\S+.\S+)', gvs)
620 raise GitError('git --version weird output: %r' % gvs)
621 _ver = tuple(m.group(1).split('.'))
622 needed = ('1','5', '3', '1')
624 raise GitError('git version %s or higher is required; you have %s'
625 % ('.'.join(needed), '.'.join(_ver)))
629 def _git_wait(cmd, p):
632 raise GitError('%s returned %d' % (cmd, rv))
635 def _git_capture(argv):
636 p = subprocess.Popen(argv, stdout=subprocess.PIPE, preexec_fn = _gitenv)
638 _git_wait(repr(argv), p)
643 def __init__(self, it, onabort = None):
645 self.onabort = onabort
653 return self.it.next()
654 except StopIteration, e:
675 wanted = ('1','5','6')
678 log('warning: git version < %s; bup will be slow.\n'
681 self.get = self._slow_get
683 self.p = self.inprogress = None
684 self.get = self._fast_get
688 self.p.stdout.close()
691 self.inprogress = None
695 self.p = subprocess.Popen(['git', 'cat-file', '--batch'],
696 stdin=subprocess.PIPE,
697 stdout=subprocess.PIPE,
698 preexec_fn = _gitenv)
700 def _fast_get(self, id):
701 if not self.p or self.p.poll() != None:
704 assert(self.p.poll() == None)
706 log('_fast_get: opening %r while %r is open'
707 % (id, self.inprogress))
708 assert(not self.inprogress)
709 assert(id.find('\n') < 0)
710 assert(id.find('\r') < 0)
713 self.p.stdin.write('%s\n' % id)
714 hdr = self.p.stdout.readline()
715 if hdr.endswith(' missing\n'):
716 raise KeyError('blob %r is missing' % id)
718 if len(spl) != 3 or len(spl[0]) != 40:
719 raise GitError('expected blob, got %r' % spl)
720 (hex, type, size) = spl
722 it = AbortableIter(chunkyreader(self.p.stdout, int(spl[2])),
723 onabort = self._abort)
728 assert(self.p.stdout.readline() == '\n')
729 self.inprogress = None
734 def _slow_get(self, id):
735 assert(id.find('\n') < 0)
736 assert(id.find('\r') < 0)
738 type = _git_capture(['git', 'cat-file', '-t', id]).strip()
741 p = subprocess.Popen(['git', 'cat-file', type, id],
742 stdout=subprocess.PIPE,
743 preexec_fn = _gitenv)
744 for blob in chunkyreader(p.stdout):
746 _git_wait('git cat-file', p)
754 treefile = ''.join(it)
755 for (mode, name, sha) in _treeparse(treefile):
756 for blob in self.join(sha.encode('hex')):
758 elif type == 'commit':
759 treeline = ''.join(it).split('\n')[0]
760 assert(treeline.startswith('tree '))
761 for blob in self.join(treeline[5:]):
764 raise GitError('invalid object type %r: expected blob/tree/commit'
769 for d in self._join(self.get(id)):
771 except StopIteration: