1 """Git interaction library.
2 bup repositories are in Git format. This library allows us to
3 interact with the Git data structures.
5 import os, sys, zlib, time, subprocess, struct, stat, re, tempfile, glob
6 from collections import namedtuple
7 from itertools import islice
9 from bup.helpers import *
10 from bup import _helpers, path, midx, bloom, xstat
12 max_pack_size = 1000*1000*1000 # larger packs will slow down pruning
13 max_pack_objects = 200*1000 # cache memory usage is about 83 bytes per object
19 _typemap = { 'blob':3, 'tree':2, 'commit':1, 'tag':4 }
20 _typermap = { 3:'blob', 2:'tree', 1:'commit', 4:'tag' }
26 class GitError(Exception):
30 def parse_tz_offset(s):
31 """UTC offset in seconds."""
32 tz_off = (int(s[1:3]) * 60 * 60) + (int(s[3:5]) * 60)
38 # FIXME: derived from http://git.rsbx.net/Documents/Git_Data_Formats.txt
39 # Make sure that's authoritative.
40 _start_end_char = r'[^ .,:;<>"\'\0\n]'
41 _content_char = r'[^\0\n<>]'
42 _safe_str_rx = '(?:%s{1,2}|(?:%s%s*%s))' \
44 _start_end_char, _content_char, _start_end_char)
45 _tz_rx = r'[-+]\d\d[0-5]\d'
46 _parent_rx = r'(?:parent [abcdefABCDEF0123456789]{40}\n)'
47 _commit_rx = re.compile(r'''tree (?P<tree>[abcdefABCDEF0123456789]{40})
48 (?P<parents>%s*)author (?P<author_name>%s) <(?P<author_mail>%s)> (?P<asec>\d+) (?P<atz>%s)
49 committer (?P<committer_name>%s) <(?P<committer_mail>%s)> (?P<csec>\d+) (?P<ctz>%s)
51 (?P<message>(?:.|\n)*)''' % (_parent_rx,
52 _safe_str_rx, _safe_str_rx, _tz_rx,
53 _safe_str_rx, _safe_str_rx, _tz_rx))
54 _parent_hash_rx = re.compile(r'\s*parent ([abcdefABCDEF0123456789]{40})\s*')
57 # Note that the author_sec and committer_sec values are (UTC) epoch seconds.
58 CommitInfo = namedtuple('CommitInfo', ['tree', 'parents',
59 'author_name', 'author_mail',
60 'author_sec', 'author_offset',
61 'committer_name', 'committer_mail',
62 'committer_sec', 'committer_offset',
65 def parse_commit(content):
66 commit_match = re.match(_commit_rx, content)
68 raise Exception('cannot parse commit %r' % content)
69 matches = commit_match.groupdict()
70 return CommitInfo(tree=matches['tree'],
71 parents=re.findall(_parent_hash_rx, matches['parents']),
72 author_name=matches['author_name'],
73 author_mail=matches['author_mail'],
74 author_sec=int(matches['asec']),
75 author_offset=parse_tz_offset(matches['atz']),
76 committer_name=matches['committer_name'],
77 committer_mail=matches['committer_mail'],
78 committer_sec=int(matches['csec']),
79 committer_offset=parse_tz_offset(matches['ctz']),
80 message=matches['message'])
83 def get_commit_items(id, cp):
84 commit_it = cp.get(id)
85 assert(commit_it.next() == 'commit')
86 commit_content = ''.join(commit_it)
87 return parse_commit(commit_content)
90 def repo(sub = '', repo_dir=None):
91 """Get the path to the git repository or one of its subdirectories."""
93 repo_dir = repo_dir or repodir
95 raise GitError('You should call check_repo_or_die()')
97 # If there's a .git subdirectory, then the actual repo is in there.
98 gd = os.path.join(repo_dir, '.git')
99 if os.path.exists(gd):
102 return os.path.join(repo_dir, sub)
106 return re.sub(r'([^0-9a-z]|\b)([0-9a-z]{7})[0-9a-z]{33}([^0-9a-z]|\b)',
111 full = os.path.abspath(path)
112 fullrepo = os.path.abspath(repo(''))
113 if not fullrepo.endswith('/'):
115 if full.startswith(fullrepo):
116 path = full[len(fullrepo):]
117 if path.startswith('index-cache/'):
118 path = path[len('index-cache/'):]
119 return shorten_hash(path)
123 paths = [repo('objects/pack')]
124 paths += glob.glob(repo('index-cache/*/.'))
128 def auto_midx(objdir):
129 args = [path.exe(), 'midx', '--auto', '--dir', objdir]
131 rv = subprocess.call(args, stdout=open('/dev/null', 'w'))
133 # make sure 'args' gets printed to help with debugging
134 add_error('%r: exception: %s' % (args, e))
137 add_error('%r: returned %d' % (args, rv))
139 args = [path.exe(), 'bloom', '--dir', objdir]
141 rv = subprocess.call(args, stdout=open('/dev/null', 'w'))
143 # make sure 'args' gets printed to help with debugging
144 add_error('%r: exception: %s' % (args, e))
147 add_error('%r: returned %d' % (args, rv))
150 def mangle_name(name, mode, gitmode):
151 """Mangle a file name to present an abstract name for segmented files.
152 Mangled file names will have the ".bup" extension added to them. If a
153 file's name already ends with ".bup", a ".bupl" extension is added to
154 disambiguate normal files from segmented ones.
156 if stat.S_ISREG(mode) and not stat.S_ISREG(gitmode):
157 assert(stat.S_ISDIR(gitmode))
159 elif name.endswith('.bup') or name[:-1].endswith('.bup'):
160 return name + '.bupl'
165 (BUP_NORMAL, BUP_CHUNKED) = (0,1)
166 def demangle_name(name):
167 """Remove name mangling from a file name, if necessary.
169 The return value is a tuple (demangled_filename,mode), where mode is one of
172 * BUP_NORMAL : files that should be read as-is from the repository
173 * BUP_CHUNKED : files that were chunked and need to be reassembled
175 For more information on the name mangling algorithm, see mangle_name()
177 if name.endswith('.bupl'):
178 return (name[:-5], BUP_NORMAL)
179 elif name.endswith('.bup'):
180 return (name[:-4], BUP_CHUNKED)
182 return (name, BUP_NORMAL)
185 def calc_hash(type, content):
186 """Calculate some content's hash in the Git fashion."""
187 header = '%s %d\0' % (type, len(content))
193 def shalist_item_sort_key(ent):
194 (mode, name, id) = ent
195 assert(mode+0 == mode)
196 if stat.S_ISDIR(mode):
202 def tree_encode(shalist):
203 """Generate a git tree object from (mode,name,hash) tuples."""
204 shalist = sorted(shalist, key = shalist_item_sort_key)
206 for (mode,name,bin) in shalist:
208 assert(mode+0 == mode)
210 assert(len(bin) == 20)
211 s = '%o %s\0%s' % (mode,name,bin)
212 assert(s[0] != '0') # 0-padded octal is not acceptable in a git tree
217 def tree_decode(buf):
218 """Generate a list of (mode,name,hash) from the git tree object in buf."""
220 while ofs < len(buf):
221 z = buf.find('\0', ofs)
223 spl = buf[ofs:z].split(' ', 1)
224 assert(len(spl) == 2)
226 sha = buf[z+1:z+1+20]
228 yield (int(mode, 8), name, sha)
231 def _encode_packobj(type, content, compression_level=1):
234 szbits = (sz & 0x0f) | (_typemap[type]<<4)
237 if sz: szbits |= 0x80
243 if compression_level > 9:
244 compression_level = 9
245 elif compression_level < 0:
246 compression_level = 0
247 z = zlib.compressobj(compression_level)
249 yield z.compress(content)
253 def _encode_looseobj(type, content, compression_level=1):
254 z = zlib.compressobj(compression_level)
255 yield z.compress('%s %d\0' % (type, len(content)))
256 yield z.compress(content)
260 def _decode_looseobj(buf):
262 s = zlib.decompress(buf)
269 assert(type in _typemap)
270 assert(sz == len(content))
271 return (type, content)
274 def _decode_packobj(buf):
277 type = _typermap[(c & 0x70) >> 4]
284 sz |= (c & 0x7f) << shift
288 return (type, zlib.decompress(buf[i+1:]))
295 def find_offset(self, hash):
296 """Get the offset of an object inside the index file."""
297 idx = self._idx_from_hash(hash)
299 return self._ofs_from_idx(idx)
302 def exists(self, hash, want_source=False):
303 """Return nonempty if the object exists in this index."""
304 if hash and (self._idx_from_hash(hash) != None):
305 return want_source and os.path.basename(self.name) or True
309 return int(self.fanout[255])
311 def _idx_from_hash(self, hash):
312 global _total_searches, _total_steps
314 assert(len(hash) == 20)
316 start = self.fanout[b1-1] # range -1..254
317 end = self.fanout[b1] # range 0..255
319 _total_steps += 1 # lookup table is a step
322 mid = start + (end-start)/2
323 v = self._idx_to_hash(mid)
333 class PackIdxV1(PackIdx):
334 """Object representation of a Git pack index (version 1) file."""
335 def __init__(self, filename, f):
337 self.idxnames = [self.name]
338 self.map = mmap_read(f)
339 self.fanout = list(struct.unpack('!256I',
340 str(buffer(self.map, 0, 256*4))))
341 self.fanout.append(0) # entry "-1"
342 nsha = self.fanout[255]
344 self.shatable = buffer(self.map, self.sha_ofs, nsha*24)
346 def _ofs_from_idx(self, idx):
347 return struct.unpack('!I', str(self.shatable[idx*24 : idx*24+4]))[0]
349 def _idx_to_hash(self, idx):
350 return str(self.shatable[idx*24+4 : idx*24+24])
353 for i in xrange(self.fanout[255]):
354 yield buffer(self.map, 256*4 + 24*i + 4, 20)
357 class PackIdxV2(PackIdx):
358 """Object representation of a Git pack index (version 2) file."""
359 def __init__(self, filename, f):
361 self.idxnames = [self.name]
362 self.map = mmap_read(f)
363 assert(str(self.map[0:8]) == '\377tOc\0\0\0\2')
364 self.fanout = list(struct.unpack('!256I',
365 str(buffer(self.map, 8, 256*4))))
366 self.fanout.append(0) # entry "-1"
367 nsha = self.fanout[255]
368 self.sha_ofs = 8 + 256*4
369 self.shatable = buffer(self.map, self.sha_ofs, nsha*20)
370 self.ofstable = buffer(self.map,
371 self.sha_ofs + nsha*20 + nsha*4,
373 self.ofs64table = buffer(self.map,
374 8 + 256*4 + nsha*20 + nsha*4 + nsha*4)
376 def _ofs_from_idx(self, idx):
377 ofs = struct.unpack('!I', str(buffer(self.ofstable, idx*4, 4)))[0]
379 idx64 = ofs & 0x7fffffff
380 ofs = struct.unpack('!Q',
381 str(buffer(self.ofs64table, idx64*8, 8)))[0]
384 def _idx_to_hash(self, idx):
385 return str(self.shatable[idx*20:(idx+1)*20])
388 for i in xrange(self.fanout[255]):
389 yield buffer(self.map, 8 + 256*4 + 20*i, 20)
394 def __init__(self, dir):
396 assert(_mpi_count == 0) # these things suck tons of VM; don't waste it
401 self.do_bloom = False
408 assert(_mpi_count == 0)
411 return iter(idxmerge(self.packs))
414 return sum(len(pack) for pack in self.packs)
416 def exists(self, hash, want_source=False):
417 """Return nonempty if the object exists in the index files."""
418 global _total_searches
420 if hash in self.also:
422 if self.do_bloom and self.bloom:
423 if self.bloom.exists(hash):
424 self.do_bloom = False
426 _total_searches -= 1 # was counted by bloom
428 for i in xrange(len(self.packs)):
430 _total_searches -= 1 # will be incremented by sub-pack
431 ix = p.exists(hash, want_source=want_source)
433 # reorder so most recently used packs are searched first
434 self.packs = [p] + self.packs[:i] + self.packs[i+1:]
439 def refresh(self, skip_midx = False):
440 """Refresh the index list.
441 This method verifies if .midx files were superseded (e.g. all of its
442 contents are in another, bigger .midx file) and removes the superseded
445 If skip_midx is True, all work on .midx files will be skipped and .midx
446 files will be removed from the list.
448 The module-global variable 'ignore_midx' can force this function to
449 always act as if skip_midx was True.
451 self.bloom = None # Always reopen the bloom as it may have been relaced
452 self.do_bloom = False
453 skip_midx = skip_midx or ignore_midx
454 d = dict((p.name, p) for p in self.packs
455 if not skip_midx or not isinstance(p, midx.PackMidx))
456 if os.path.exists(self.dir):
459 for ix in self.packs:
460 if isinstance(ix, midx.PackMidx):
461 for name in ix.idxnames:
462 d[os.path.join(self.dir, name)] = ix
463 for full in glob.glob(os.path.join(self.dir,'*.midx')):
465 mx = midx.PackMidx(full)
466 (mxd, mxf) = os.path.split(mx.name)
468 for n in mx.idxnames:
469 if not os.path.exists(os.path.join(mxd, n)):
470 log(('warning: index %s missing\n' +
471 ' used by %s\n') % (n, mxf))
479 midxl.sort(key=lambda ix:
480 (-len(ix), -xstat.stat(ix.name).st_mtime))
483 for sub in ix.idxnames:
484 found = d.get(os.path.join(self.dir, sub))
485 if not found or isinstance(found, PackIdx):
486 # doesn't exist, or exists but not in a midx
491 for name in ix.idxnames:
492 d[os.path.join(self.dir, name)] = ix
493 elif not ix.force_keep:
494 debug1('midx: removing redundant: %s\n'
495 % os.path.basename(ix.name))
498 for full in glob.glob(os.path.join(self.dir,'*.idx')):
506 bfull = os.path.join(self.dir, 'bup.bloom')
507 if self.bloom is None and os.path.exists(bfull):
508 self.bloom = bloom.ShaBloom(bfull)
509 self.packs = list(set(d.values()))
510 self.packs.sort(lambda x,y: -cmp(len(x),len(y)))
511 if self.bloom and self.bloom.valid() and len(self.bloom) >= len(self):
515 debug1('PackIdxList: using %d index%s.\n'
516 % (len(self.packs), len(self.packs)!=1 and 'es' or ''))
519 """Insert an additional object in the list."""
523 def open_idx(filename):
524 if filename.endswith('.idx'):
525 f = open(filename, 'rb')
527 if header[0:4] == '\377tOc':
528 version = struct.unpack('!I', header[4:8])[0]
530 return PackIdxV2(filename, f)
532 raise GitError('%s: expected idx file version 2, got %d'
533 % (filename, version))
534 elif len(header) == 8 and header[0:4] < '\377tOc':
535 return PackIdxV1(filename, f)
537 raise GitError('%s: unrecognized idx file header' % filename)
538 elif filename.endswith('.midx'):
539 return midx.PackMidx(filename)
541 raise GitError('idx filenames must end with .idx or .midx')
544 def idxmerge(idxlist, final_progress=True):
545 """Generate a list of all the objects reachable in a PackIdxList."""
546 def pfunc(count, total):
547 qprogress('Reading indexes: %.2f%% (%d/%d)\r'
548 % (count*100.0/total, count, total))
549 def pfinal(count, total):
551 progress('Reading indexes: %.2f%% (%d/%d), done.\n'
552 % (100, total, total))
553 return merge_iter(idxlist, 10024, pfunc, pfinal)
556 def _make_objcache():
557 return PackIdxList(repo('objects/pack'))
560 """Writes Git objects inside a pack file."""
561 def __init__(self, objcache_maker=_make_objcache, compression_level=1):
567 self.objcache_maker = objcache_maker
569 self.compression_level = compression_level
576 (fd,name) = tempfile.mkstemp(suffix='.pack', dir=repo('objects'))
577 self.file = os.fdopen(fd, 'w+b')
578 assert(name.endswith('.pack'))
579 self.filename = name[:-5]
580 self.file.write('PACK\0\0\0\2\0\0\0\0')
581 self.idx = list(list() for i in xrange(256))
583 def _raw_write(self, datalist, sha):
586 # in case we get interrupted (eg. KeyboardInterrupt), it's best if
587 # the file never has a *partial* blob. So let's make sure it's
588 # all-or-nothing. (The blob shouldn't be very big anyway, thanks
589 # to our hashsplit algorithm.) f.write() does its own buffering,
590 # but that's okay because we'll flush it in _end().
591 oneblob = ''.join(datalist)
595 raise GitError, e, sys.exc_info()[2]
597 crc = zlib.crc32(oneblob) & 0xffffffff
598 self._update_idx(sha, crc, nw)
603 def _update_idx(self, sha, crc, size):
606 self.idx[ord(sha[0])].append((sha, crc, self.file.tell() - size))
608 def _write(self, sha, type, content):
612 sha = calc_hash(type, content)
613 size, crc = self._raw_write(_encode_packobj(type, content,
614 self.compression_level),
616 if self.outbytes >= max_pack_size or self.count >= max_pack_objects:
620 def breakpoint(self):
621 """Clear byte and object counts and return the last processed id."""
623 self.outbytes = self.count = 0
626 def _require_objcache(self):
627 if self.objcache is None and self.objcache_maker:
628 self.objcache = self.objcache_maker()
629 if self.objcache is None:
631 "PackWriter not opened or can't check exists w/o objcache")
633 def exists(self, id, want_source=False):
634 """Return non-empty if an object is found in the object cache."""
635 self._require_objcache()
636 return self.objcache.exists(id, want_source=want_source)
638 def maybe_write(self, type, content):
639 """Write an object to the pack file if not present and return its id."""
640 sha = calc_hash(type, content)
641 if not self.exists(sha):
642 self._write(sha, type, content)
643 self._require_objcache()
644 self.objcache.add(sha)
647 def new_blob(self, blob):
648 """Create a blob object in the pack with the supplied content."""
649 return self.maybe_write('blob', blob)
651 def new_tree(self, shalist):
652 """Create a tree object in the pack."""
653 content = tree_encode(shalist)
654 return self.maybe_write('tree', content)
656 def _new_commit(self, tree, parent, author, adate, committer, cdate, msg):
658 if tree: l.append('tree %s' % tree.encode('hex'))
659 if parent: l.append('parent %s' % parent.encode('hex'))
660 if author: l.append('author %s %s' % (author, _git_date(adate)))
661 if committer: l.append('committer %s %s' % (committer, _git_date(cdate)))
664 return self.maybe_write('commit', '\n'.join(l))
666 def new_commit(self, parent, tree, date, msg):
667 """Create a commit object in the pack."""
668 userline = '%s <%s@%s>' % (userfullname(), username(), hostname())
669 commit = self._new_commit(tree, parent,
670 userline, date, userline, date,
675 """Remove the pack file from disk."""
681 os.unlink(self.filename + '.pack')
683 def _end(self, run_midx=True):
685 if not f: return None
691 # update object count
693 cp = struct.pack('!i', self.count)
697 # calculate the pack sha1sum
700 for b in chunkyreader(f):
702 packbin = sum.digest()
706 obj_list_sha = self._write_pack_idx_v2(self.filename + '.idx', idx, packbin)
708 nameprefix = repo('objects/pack/pack-%s' % obj_list_sha)
709 if os.path.exists(self.filename + '.map'):
710 os.unlink(self.filename + '.map')
711 os.rename(self.filename + '.pack', nameprefix + '.pack')
712 os.rename(self.filename + '.idx', nameprefix + '.idx')
715 auto_midx(repo('objects/pack'))
718 def close(self, run_midx=True):
719 """Close the pack file and move it to its definitive path."""
720 return self._end(run_midx=run_midx)
722 def _write_pack_idx_v2(self, filename, idx, packbin):
725 for entry in section:
726 if entry[2] >= 2**31:
729 # Length: header + fan-out + shas-and-crcs + overflow-offsets
730 index_len = 8 + (4 * 256) + (28 * self.count) + (8 * ofs64_count)
732 idx_f = open(filename, 'w+b')
734 idx_f.truncate(index_len)
735 idx_map = mmap_readwrite(idx_f, close=False)
736 count = _helpers.write_idx(filename, idx_map, idx, self.count)
737 assert(count == self.count)
739 if idx_map: idx_map.close()
742 idx_f = open(filename, 'a+b')
747 b = idx_f.read(8 + 4*256)
750 obj_list_sum = Sha1()
751 for b in chunkyreader(idx_f, 20*self.count):
753 obj_list_sum.update(b)
754 namebase = obj_list_sum.hexdigest()
756 for b in chunkyreader(idx_f):
758 idx_f.write(idx_sum.digest())
765 return '%d %s' % (date, utc_offset_str(date))
768 def _gitenv(repo_dir = None):
772 os.environ['GIT_DIR'] = os.path.abspath(repo_dir)
776 def list_refs(refname=None, repo_dir=None,
777 limit_to_heads=False, limit_to_tags=False):
778 """Yield (refname, hash) tuples for all repository refs unless a ref
779 name is specified. Given a ref name, only include tuples for that
780 particular ref. The limits restrict the result items to
781 refs/heads or refs/tags. If both limits are specified, items from
782 both sources will be included.
785 argv = ['git', 'show-ref']
787 argv.append('--heads')
789 argv.append('--tags')
793 p = subprocess.Popen(argv,
794 preexec_fn = _gitenv(repo_dir),
795 stdout = subprocess.PIPE)
796 out = p.stdout.read().strip()
797 rv = p.wait() # not fatal
801 for d in out.split('\n'):
802 (sha, name) = d.split(' ', 1)
803 yield (name, sha.decode('hex'))
806 def read_ref(refname, repo_dir = None):
807 """Get the commit id of the most recent commit made on a given ref."""
808 refs = list_refs(refname, repo_dir=repo_dir, limit_to_heads=True)
809 l = tuple(islice(refs, 2))
817 def rev_list(ref, count=None, repo_dir=None):
818 """Generate a list of reachable commits in reverse chronological order.
820 This generator walks through commits, from child to parent, that are
821 reachable via the specified ref and yields a series of tuples of the form
824 If count is a non-zero integer, limit the number of commits to "count"
827 assert(not ref.startswith('-'))
830 opts += ['-n', str(atoi(count))]
831 argv = ['git', 'rev-list', '--pretty=format:%at'] + opts + [ref, '--']
832 p = subprocess.Popen(argv,
833 preexec_fn = _gitenv(repo_dir),
834 stdout = subprocess.PIPE)
838 if s.startswith('commit '):
839 commit = s[7:].decode('hex')
843 rv = p.wait() # not fatal
845 raise GitError, 'git rev-list returned error %d' % rv
848 def get_commit_dates(refs, repo_dir=None):
849 """Get the dates for the specified commit refs. For now, every unique
850 string in refs must resolve to a different commit or this
851 function will fail."""
854 commit = get_commit_items(ref, cp(repo_dir))
855 result.append(commit.author_sec)
859 def rev_parse(committish, repo_dir=None):
860 """Resolve the full hash for 'committish', if it exists.
862 Should be roughly equivalent to 'git rev-parse'.
864 Returns the hex value of the hash if it is found, None if 'committish' does
865 not correspond to anything.
867 head = read_ref(committish, repo_dir=repo_dir)
869 debug2("resolved from ref: commit = %s\n" % head.encode('hex'))
872 pL = PackIdxList(repo('objects/pack', repo_dir=repo_dir))
874 if len(committish) == 40:
876 hash = committish.decode('hex')
886 def update_ref(refname, newval, oldval, repo_dir=None):
887 """Update a repository reference."""
890 assert(refname.startswith('refs/heads/') \
891 or refname.startswith('refs/tags/'))
892 p = subprocess.Popen(['git', 'update-ref', refname,
893 newval.encode('hex'), oldval.encode('hex')],
894 preexec_fn = _gitenv(repo_dir))
895 _git_wait('git update-ref', p)
898 def delete_ref(refname):
899 """Delete a repository reference."""
900 assert(refname.startswith('refs/'))
901 p = subprocess.Popen(['git', 'update-ref', '-d', refname],
902 preexec_fn = _gitenv())
903 _git_wait('git update-ref', p)
906 def guess_repo(path=None):
907 """Set the path value in the global variable "repodir".
908 This makes bup look for an existing bup repository, but not fail if a
909 repository doesn't exist. Usually, if you are interacting with a bup
910 repository, you would not be calling this function but using
917 repodir = os.environ.get('BUP_DIR')
919 repodir = os.path.expanduser('~/.bup')
922 def init_repo(path=None):
923 """Create the Git bare repository for bup in a given path."""
925 d = repo() # appends a / to the path
926 parent = os.path.dirname(os.path.dirname(d))
927 if parent and not os.path.exists(parent):
928 raise GitError('parent directory "%s" does not exist\n' % parent)
929 if os.path.exists(d) and not os.path.isdir(os.path.join(d, '.')):
930 raise GitError('"%s" exists but is not a directory\n' % d)
931 p = subprocess.Popen(['git', '--bare', 'init'], stdout=sys.stderr,
932 preexec_fn = _gitenv())
933 _git_wait('git init', p)
934 # Force the index version configuration in order to ensure bup works
935 # regardless of the version of the installed Git binary.
936 p = subprocess.Popen(['git', 'config', 'pack.indexVersion', '2'],
937 stdout=sys.stderr, preexec_fn = _gitenv())
938 _git_wait('git config', p)
940 p = subprocess.Popen(['git', 'config', 'core.logAllRefUpdates', 'true'],
941 stdout=sys.stderr, preexec_fn = _gitenv())
942 _git_wait('git config', p)
945 def check_repo_or_die(path=None):
946 """Make sure a bup repository exists, and abort if not.
947 If the path to a particular repository was not specified, this function
948 initializes the default repository automatically.
952 os.stat(repo('objects/pack/.'))
954 if e.errno == errno.ENOENT:
955 log('error: %r is not a bup repository; run "bup init"\n'
959 log('error: %s\n' % e)
965 """Get Git's version and ensure a usable version is installed.
967 The returned version is formatted as an ordered tuple with each position
968 representing a digit in the version tag. For example, the following tuple
969 would represent version 1.6.6.9:
975 p = subprocess.Popen(['git', '--version'],
976 stdout=subprocess.PIPE)
977 gvs = p.stdout.read()
978 _git_wait('git --version', p)
979 m = re.match(r'git version (\S+.\S+)', gvs)
981 raise GitError('git --version weird output: %r' % gvs)
982 _ver = tuple(m.group(1).split('.'))
983 needed = ('1','5', '3', '1')
985 raise GitError('git version %s or higher is required; you have %s'
986 % ('.'.join(needed), '.'.join(_ver)))
990 def _git_wait(cmd, p):
993 raise GitError('%s returned %d' % (cmd, rv))
996 def _git_capture(argv):
997 p = subprocess.Popen(argv, stdout=subprocess.PIPE, preexec_fn = _gitenv())
999 _git_wait(repr(argv), p)
1003 class _AbortableIter:
1004 def __init__(self, it, onabort = None):
1006 self.onabort = onabort
1014 return self.it.next()
1015 except StopIteration, e:
1023 """Abort iteration and call the abortion callback, if needed."""
1035 """Link to 'git cat-file' that is used to retrieve blob data."""
1036 def __init__(self, repo_dir = None):
1038 self.repo_dir = repo_dir
1039 wanted = ('1','5','6')
1042 log('warning: git version < %s; bup will be slow.\n'
1045 self.get = self._slow_get
1047 self.p = self.inprogress = None
1048 self.get = self._fast_get
1052 self.p.stdout.close()
1053 self.p.stdin.close()
1055 self.inprogress = None
1059 self.p = subprocess.Popen(['git', 'cat-file', '--batch'],
1060 stdin=subprocess.PIPE,
1061 stdout=subprocess.PIPE,
1064 preexec_fn = _gitenv(self.repo_dir))
1066 def _fast_get(self, id):
1067 if not self.p or self.p.poll() != None:
1070 poll_result = self.p.poll()
1071 assert(poll_result == None)
1073 log('_fast_get: opening %r while %r is open\n'
1074 % (id, self.inprogress))
1075 assert(not self.inprogress)
1076 assert(id.find('\n') < 0)
1077 assert(id.find('\r') < 0)
1078 assert(not id.startswith('-'))
1079 self.inprogress = id
1080 self.p.stdin.write('%s\n' % id)
1081 self.p.stdin.flush()
1082 hdr = self.p.stdout.readline()
1083 if hdr.endswith(' missing\n'):
1084 self.inprogress = None
1085 raise KeyError('blob %r is missing' % id)
1086 spl = hdr.split(' ')
1087 if len(spl) != 3 or len(spl[0]) != 40:
1088 raise GitError('expected blob, got %r' % spl)
1089 (hex, type, size) = spl
1091 it = _AbortableIter(chunkyreader(self.p.stdout, int(spl[2])),
1092 onabort = self._abort)
1097 readline_result = self.p.stdout.readline()
1098 assert(readline_result == '\n')
1099 self.inprogress = None
1100 except Exception, e:
1104 def _slow_get(self, id):
1105 assert(id.find('\n') < 0)
1106 assert(id.find('\r') < 0)
1107 assert(id[0] != '-')
1108 type = _git_capture(['git', 'cat-file', '-t', id]).strip()
1111 p = subprocess.Popen(['git', 'cat-file', type, id],
1112 stdout=subprocess.PIPE,
1113 preexec_fn = _gitenv(self.repo_dir))
1114 for blob in chunkyreader(p.stdout):
1116 _git_wait('git cat-file', p)
1118 def _join(self, it):
1123 elif type == 'tree':
1124 treefile = ''.join(it)
1125 for (mode, name, sha) in tree_decode(treefile):
1126 for blob in self.join(sha.encode('hex')):
1128 elif type == 'commit':
1129 treeline = ''.join(it).split('\n')[0]
1130 assert(treeline.startswith('tree '))
1131 for blob in self.join(treeline[5:]):
1134 raise GitError('invalid object type %r: expected blob/tree/commit'
1138 """Generate a list of the content of all blobs that can be reached
1139 from an object. The hash given in 'id' must point to a blob, a tree
1140 or a commit. The content of all blobs that can be seen from trees or
1141 commits will be added to the list.
1144 for d in self._join(self.get(id)):
1146 except StopIteration:
1152 def cp(repo_dir=None):
1153 """Create a CatPipe object or reuse the already existing one."""
1157 repo_dir = os.path.abspath(repo_dir)
1158 cp = _cp.get(repo_dir)
1160 cp = CatPipe(repo_dir)
1165 def tags(repo_dir = None):
1166 """Return a dictionary of all tags in the form {hash: [tag_names, ...]}."""
1168 for n, c in list_refs(repo_dir = repo_dir, limit_to_tags=True):
1169 assert(n.startswith('refs/tags/'))
1173 tags[c].append(name) # more than one tag can point at 'c'