1 """Git interaction library.
2 bup repositories are in Git format. This library allows us to
3 interact with the Git data structures.
5 import os, sys, zlib, time, subprocess, struct, stat, re, tempfile, glob
6 from collections import namedtuple
8 from bup.helpers import *
9 from bup import _helpers, path, midx, bloom, xstat
11 max_pack_size = 1000*1000*1000 # larger packs will slow down pruning
12 max_pack_objects = 200*1000 # cache memory usage is about 83 bytes per object
18 _typemap = { 'blob':3, 'tree':2, 'commit':1, 'tag':4 }
19 _typermap = { 3:'blob', 2:'tree', 1:'commit', 4:'tag' }
25 class GitError(Exception):
29 def parse_tz_offset(s):
30 """UTC offset in seconds."""
31 tz_off = (int(s[1:3]) * 60 * 60) + (int(s[3:5]) * 60)
37 # FIXME: derived from http://git.rsbx.net/Documents/Git_Data_Formats.txt
38 # Make sure that's authoritative.
39 _start_end_char = r'[^ .,:;<>"\'\0\n]'
40 _content_char = r'[^\0\n<>]'
41 _safe_str_rx = '(?:%s{1,2}|(?:%s%s*%s))' \
43 _start_end_char, _content_char, _start_end_char)
44 _tz_rx = r'[-+]\d\d[0-5]\d'
45 _parent_rx = r'(?:parent [abcdefABCDEF0123456789]{40}\n)'
46 _commit_rx = re.compile(r'''tree (?P<tree>[abcdefABCDEF0123456789]{40})
47 (?P<parents>%s*)author (?P<author_name>%s) <(?P<author_mail>%s)> (?P<asec>\d+) (?P<atz>%s)
48 committer (?P<committer_name>%s) <(?P<committer_mail>%s)> (?P<csec>\d+) (?P<ctz>%s)
50 (?P<message>(?:.|\n)*)''' % (_parent_rx,
51 _safe_str_rx, _safe_str_rx, _tz_rx,
52 _safe_str_rx, _safe_str_rx, _tz_rx))
53 _parent_hash_rx = re.compile(r'\s*parent ([abcdefABCDEF0123456789]{40})\s*')
56 # Note that the author_sec and committer_sec values are (UTC) epoch seconds.
57 CommitInfo = namedtuple('CommitInfo', ['tree', 'parents',
58 'author_name', 'author_mail',
59 'author_sec', 'author_offset',
60 'committer_name', 'committer_mail',
61 'committer_sec', 'committer_offset',
64 def parse_commit(content):
65 commit_match = re.match(_commit_rx, content)
67 raise Exception('cannot parse commit %r' % content)
68 matches = commit_match.groupdict()
69 return CommitInfo(tree=matches['tree'],
70 parents=re.findall(_parent_hash_rx, matches['parents']),
71 author_name=matches['author_name'],
72 author_mail=matches['author_mail'],
73 author_sec=int(matches['asec']),
74 author_offset=parse_tz_offset(matches['atz']),
75 committer_name=matches['committer_name'],
76 committer_mail=matches['committer_mail'],
77 committer_sec=int(matches['csec']),
78 committer_offset=parse_tz_offset(matches['ctz']),
79 message=matches['message'])
82 def get_commit_items(id, cp):
83 commit_it = cp.get(id)
84 assert(commit_it.next() == 'commit')
85 commit_content = ''.join(commit_it)
86 return parse_commit(commit_content)
89 def repo(sub = '', repo_dir=None):
90 """Get the path to the git repository or one of its subdirectories."""
92 repo_dir = repo_dir or repodir
94 raise GitError('You should call check_repo_or_die()')
96 # If there's a .git subdirectory, then the actual repo is in there.
97 gd = os.path.join(repo_dir, '.git')
98 if os.path.exists(gd):
101 return os.path.join(repo_dir, sub)
105 return re.sub(r'([^0-9a-z]|\b)([0-9a-z]{7})[0-9a-z]{33}([^0-9a-z]|\b)',
110 full = os.path.abspath(path)
111 fullrepo = os.path.abspath(repo(''))
112 if not fullrepo.endswith('/'):
114 if full.startswith(fullrepo):
115 path = full[len(fullrepo):]
116 if path.startswith('index-cache/'):
117 path = path[len('index-cache/'):]
118 return shorten_hash(path)
122 paths = [repo('objects/pack')]
123 paths += glob.glob(repo('index-cache/*/.'))
127 def auto_midx(objdir):
128 args = [path.exe(), 'midx', '--auto', '--dir', objdir]
130 rv = subprocess.call(args, stdout=open('/dev/null', 'w'))
132 # make sure 'args' gets printed to help with debugging
133 add_error('%r: Exception: %s!' % (args, e))
136 add_error('%r: Returned %d!' % (args, rv))
138 args = [path.exe(), 'bloom', '--dir', objdir]
140 rv = subprocess.call(args, stdout=open('/dev/null', 'w'))
142 # make sure 'args' gets printed to help with debugging
143 add_error('%r: Exception: %s!' % (args, e))
146 add_error('%r: Returned %d!' % (args, rv))
149 def mangle_name(name, mode, gitmode):
150 """Mangle a file name to present an abstract name for segmented files.
151 Mangled file names will have the ".bup" extension added to them. If a
152 file's name already ends with ".bup", a ".bupl" extension is added to
153 disambiguate normal files from segmented ones.
155 if stat.S_ISREG(mode) and not stat.S_ISREG(gitmode):
156 assert(stat.S_ISDIR(gitmode))
158 elif name.endswith('.bup') or name[:-1].endswith('.bup'):
159 return name + '.bupl'
164 (BUP_NORMAL, BUP_CHUNKED) = (0,1)
165 def demangle_name(name):
166 """Remove name mangling from a file name, if necessary.
168 The return value is a tuple (demangled_filename,mode), where mode is one of
171 * BUP_NORMAL : files that should be read as-is from the repository
172 * BUP_CHUNKED : files that were chunked and need to be reassembled
174 For more information on the name mangling algorithm, see mangle_name()
176 if name.endswith('.bupl'):
177 return (name[:-5], BUP_NORMAL)
178 elif name.endswith('.bup'):
179 return (name[:-4], BUP_CHUNKED)
181 return (name, BUP_NORMAL)
184 def calc_hash(type, content):
185 """Calculate some content's hash in the Git fashion."""
186 header = '%s %d\0' % (type, len(content))
192 def shalist_item_sort_key(ent):
193 (mode, name, id) = ent
194 assert(mode+0 == mode)
195 if stat.S_ISDIR(mode):
201 def tree_encode(shalist):
202 """Generate a git tree object from (mode,name,hash) tuples."""
203 shalist = sorted(shalist, key = shalist_item_sort_key)
205 for (mode,name,bin) in shalist:
207 assert(mode+0 == mode)
209 assert(len(bin) == 20)
210 s = '%o %s\0%s' % (mode,name,bin)
211 assert(s[0] != '0') # 0-padded octal is not acceptable in a git tree
216 def tree_decode(buf):
217 """Generate a list of (mode,name,hash) from the git tree object in buf."""
219 while ofs < len(buf):
220 z = buf.find('\0', ofs)
222 spl = buf[ofs:z].split(' ', 1)
223 assert(len(spl) == 2)
225 sha = buf[z+1:z+1+20]
227 yield (int(mode, 8), name, sha)
230 def _encode_packobj(type, content, compression_level=1):
233 szbits = (sz & 0x0f) | (_typemap[type]<<4)
236 if sz: szbits |= 0x80
242 if compression_level > 9:
243 compression_level = 9
244 elif compression_level < 0:
245 compression_level = 0
246 z = zlib.compressobj(compression_level)
248 yield z.compress(content)
252 def _encode_looseobj(type, content, compression_level=1):
253 z = zlib.compressobj(compression_level)
254 yield z.compress('%s %d\0' % (type, len(content)))
255 yield z.compress(content)
259 def _decode_looseobj(buf):
261 s = zlib.decompress(buf)
268 assert(type in _typemap)
269 assert(sz == len(content))
270 return (type, content)
273 def _decode_packobj(buf):
276 type = _typermap[(c & 0x70) >> 4]
283 sz |= (c & 0x7f) << shift
287 return (type, zlib.decompress(buf[i+1:]))
294 def find_offset(self, hash):
295 """Get the offset of an object inside the index file."""
296 idx = self._idx_from_hash(hash)
298 return self._ofs_from_idx(idx)
301 def exists(self, hash, want_source=False):
302 """Return nonempty if the object exists in this index."""
303 if hash and (self._idx_from_hash(hash) != None):
304 return want_source and os.path.basename(self.name) or True
308 return int(self.fanout[255])
310 def _idx_from_hash(self, hash):
311 global _total_searches, _total_steps
313 assert(len(hash) == 20)
315 start = self.fanout[b1-1] # range -1..254
316 end = self.fanout[b1] # range 0..255
318 _total_steps += 1 # lookup table is a step
321 mid = start + (end-start)/2
322 v = self._idx_to_hash(mid)
332 class PackIdxV1(PackIdx):
333 """Object representation of a Git pack index (version 1) file."""
334 def __init__(self, filename, f):
336 self.idxnames = [self.name]
337 self.map = mmap_read(f)
338 self.fanout = list(struct.unpack('!256I',
339 str(buffer(self.map, 0, 256*4))))
340 self.fanout.append(0) # entry "-1"
341 nsha = self.fanout[255]
343 self.shatable = buffer(self.map, self.sha_ofs, nsha*24)
345 def _ofs_from_idx(self, idx):
346 return struct.unpack('!I', str(self.shatable[idx*24 : idx*24+4]))[0]
348 def _idx_to_hash(self, idx):
349 return str(self.shatable[idx*24+4 : idx*24+24])
352 for i in xrange(self.fanout[255]):
353 yield buffer(self.map, 256*4 + 24*i + 4, 20)
356 class PackIdxV2(PackIdx):
357 """Object representation of a Git pack index (version 2) file."""
358 def __init__(self, filename, f):
360 self.idxnames = [self.name]
361 self.map = mmap_read(f)
362 assert(str(self.map[0:8]) == '\377tOc\0\0\0\2')
363 self.fanout = list(struct.unpack('!256I',
364 str(buffer(self.map, 8, 256*4))))
365 self.fanout.append(0) # entry "-1"
366 nsha = self.fanout[255]
367 self.sha_ofs = 8 + 256*4
368 self.shatable = buffer(self.map, self.sha_ofs, nsha*20)
369 self.ofstable = buffer(self.map,
370 self.sha_ofs + nsha*20 + nsha*4,
372 self.ofs64table = buffer(self.map,
373 8 + 256*4 + nsha*20 + nsha*4 + nsha*4)
375 def _ofs_from_idx(self, idx):
376 ofs = struct.unpack('!I', str(buffer(self.ofstable, idx*4, 4)))[0]
378 idx64 = ofs & 0x7fffffff
379 ofs = struct.unpack('!Q',
380 str(buffer(self.ofs64table, idx64*8, 8)))[0]
383 def _idx_to_hash(self, idx):
384 return str(self.shatable[idx*20:(idx+1)*20])
387 for i in xrange(self.fanout[255]):
388 yield buffer(self.map, 8 + 256*4 + 20*i, 20)
393 def __init__(self, dir):
395 assert(_mpi_count == 0) # these things suck tons of VM; don't waste it
400 self.do_bloom = False
407 assert(_mpi_count == 0)
410 return iter(idxmerge(self.packs))
413 return sum(len(pack) for pack in self.packs)
415 def exists(self, hash, want_source=False):
416 """Return nonempty if the object exists in the index files."""
417 global _total_searches
419 if hash in self.also:
421 if self.do_bloom and self.bloom:
422 if self.bloom.exists(hash):
423 self.do_bloom = False
425 _total_searches -= 1 # was counted by bloom
427 for i in xrange(len(self.packs)):
429 _total_searches -= 1 # will be incremented by sub-pack
430 ix = p.exists(hash, want_source=want_source)
432 # reorder so most recently used packs are searched first
433 self.packs = [p] + self.packs[:i] + self.packs[i+1:]
438 def refresh(self, skip_midx = False):
439 """Refresh the index list.
440 This method verifies if .midx files were superseded (e.g. all of its
441 contents are in another, bigger .midx file) and removes the superseded
444 If skip_midx is True, all work on .midx files will be skipped and .midx
445 files will be removed from the list.
447 The module-global variable 'ignore_midx' can force this function to
448 always act as if skip_midx was True.
450 self.bloom = None # Always reopen the bloom as it may have been relaced
451 self.do_bloom = False
452 skip_midx = skip_midx or ignore_midx
453 d = dict((p.name, p) for p in self.packs
454 if not skip_midx or not isinstance(p, midx.PackMidx))
455 if os.path.exists(self.dir):
458 for ix in self.packs:
459 if isinstance(ix, midx.PackMidx):
460 for name in ix.idxnames:
461 d[os.path.join(self.dir, name)] = ix
462 for full in glob.glob(os.path.join(self.dir,'*.midx')):
464 mx = midx.PackMidx(full)
465 (mxd, mxf) = os.path.split(mx.name)
467 for n in mx.idxnames:
468 if not os.path.exists(os.path.join(mxd, n)):
469 log('Warning: Index %s missing!' % n)
470 log(' used by %s\n' % mxf)
478 midxl.sort(key=lambda ix:
479 (-len(ix), -xstat.stat(ix.name).st_mtime))
482 for sub in ix.idxnames:
483 found = d.get(os.path.join(self.dir, sub))
484 if not found or isinstance(found, PackIdx):
485 # doesn't exist, or exists but not in a midx
490 for name in ix.idxnames:
491 d[os.path.join(self.dir, name)] = ix
492 elif not ix.force_keep:
493 debug1('Multi-index: Removing redundant: %s'
494 % os.path.basename(ix.name))
497 for full in glob.glob(os.path.join(self.dir,'*.idx')):
505 bfull = os.path.join(self.dir, 'bup.bloom')
506 if self.bloom is None and os.path.exists(bfull):
507 self.bloom = bloom.ShaBloom(bfull)
508 self.packs = list(set(d.values()))
509 self.packs.sort(lambda x,y: -cmp(len(x),len(y)))
510 if self.bloom and self.bloom.valid() and len(self.bloom) >= len(self):
514 debug1('PackIdxList: Using %d index%s.'
515 % (len(self.packs), len(self.packs)!=1 and 'es' or ''))
518 """Insert an additional object in the list."""
522 def open_idx(filename):
523 if filename.endswith('.idx'):
524 f = open(filename, 'rb')
526 if header[0:4] == '\377tOc':
527 version = struct.unpack('!I', header[4:8])[0]
529 return PackIdxV2(filename, f)
531 raise GitError('%s: expected idx file version 2, got %d'
532 % (filename, version))
533 elif len(header) == 8 and header[0:4] < '\377tOc':
534 return PackIdxV1(filename, f)
536 raise GitError('%s: unrecognized idx file header' % filename)
537 elif filename.endswith('.midx'):
538 return midx.PackMidx(filename)
540 raise GitError('idx filenames must end with .idx or .midx')
543 def idxmerge(idxlist, final_progress=True):
544 """Generate a list of all the objects reachable in a PackIdxList."""
545 def pfunc(count, total):
546 progress_update('Reading indexes: %.2f%% (%d/%d) ...'
547 % (count*100.0/total, count, total), False)
548 def pfinal(count, total):
550 progress_end('Reading indexes: %.2f%% (%d/%d), done.'
551 % (100, total, total))
552 return merge_iter(idxlist, 10024, pfunc, pfinal)
555 def _make_objcache():
556 return PackIdxList(repo('objects/pack'))
559 """Writes Git objects inside a pack file."""
560 def __init__(self, objcache_maker=_make_objcache, compression_level=1):
566 self.objcache_maker = objcache_maker
568 self.compression_level = compression_level
575 (fd,name) = tempfile.mkstemp(suffix='.pack', dir=repo('objects'))
576 self.file = os.fdopen(fd, 'w+b')
577 assert(name.endswith('.pack'))
578 self.filename = name[:-5]
579 self.file.write('PACK\0\0\0\2\0\0\0\0')
580 self.idx = list(list() for i in xrange(256))
582 def _raw_write(self, datalist, sha):
585 # in case we get interrupted (eg. KeyboardInterrupt), it's best if
586 # the file never has a *partial* blob. So let's make sure it's
587 # all-or-nothing. (The blob shouldn't be very big anyway, thanks
588 # to our hashsplit algorithm.) f.write() does its own buffering,
589 # but that's okay because we'll flush it in _end().
590 oneblob = ''.join(datalist)
594 raise GitError, e, sys.exc_info()[2]
596 crc = zlib.crc32(oneblob) & 0xffffffff
597 self._update_idx(sha, crc, nw)
602 def _update_idx(self, sha, crc, size):
605 self.idx[ord(sha[0])].append((sha, crc, self.file.tell() - size))
607 def _write(self, sha, type, content):
611 sha = calc_hash(type, content)
612 size, crc = self._raw_write(_encode_packobj(type, content,
613 self.compression_level),
615 if self.outbytes >= max_pack_size or self.count >= max_pack_objects:
619 def breakpoint(self):
620 """Clear byte and object counts and return the last processed id."""
622 self.outbytes = self.count = 0
625 def _require_objcache(self):
626 if self.objcache is None and self.objcache_maker:
627 self.objcache = self.objcache_maker()
628 if self.objcache is None:
630 "PackWriter not opened or can't check exists w/o objcache")
632 def exists(self, id, want_source=False):
633 """Return non-empty if an object is found in the object cache."""
634 self._require_objcache()
635 return self.objcache.exists(id, want_source=want_source)
637 def maybe_write(self, type, content):
638 """Write an object to the pack file if not present and return its id."""
639 sha = calc_hash(type, content)
640 if not self.exists(sha):
641 self._write(sha, type, content)
642 self._require_objcache()
643 self.objcache.add(sha)
646 def new_blob(self, blob):
647 """Create a blob object in the pack with the supplied content."""
648 return self.maybe_write('blob', blob)
650 def new_tree(self, shalist):
651 """Create a tree object in the pack."""
652 content = tree_encode(shalist)
653 return self.maybe_write('tree', content)
655 def _new_commit(self, tree, parent, author, adate, committer, cdate, msg):
657 if tree: l.append('tree %s' % tree.encode('hex'))
658 if parent: l.append('parent %s' % parent.encode('hex'))
659 if author: l.append('author %s %s' % (author, _git_date(adate)))
660 if committer: l.append('committer %s %s' % (committer, _git_date(cdate)))
663 return self.maybe_write('commit', '\n'.join(l))
665 def new_commit(self, parent, tree, date, msg):
666 """Create a commit object in the pack."""
667 userline = '%s <%s@%s>' % (userfullname(), username(), hostname())
668 commit = self._new_commit(tree, parent,
669 userline, date, userline, date,
674 """Remove the pack file from disk."""
680 os.unlink(self.filename + '.pack')
682 def _end(self, run_midx=True):
684 if not f: return None
690 # update object count
692 cp = struct.pack('!i', self.count)
696 # calculate the pack sha1sum
699 for b in chunkyreader(f):
701 packbin = sum.digest()
705 obj_list_sha = self._write_pack_idx_v2(self.filename + '.idx', idx, packbin)
707 nameprefix = repo('objects/pack/pack-%s' % obj_list_sha)
708 if os.path.exists(self.filename + '.map'):
709 os.unlink(self.filename + '.map')
710 os.rename(self.filename + '.pack', nameprefix + '.pack')
711 os.rename(self.filename + '.idx', nameprefix + '.idx')
714 auto_midx(repo('objects/pack'))
717 def close(self, run_midx=True):
718 """Close the pack file and move it to its definitive path."""
719 return self._end(run_midx=run_midx)
721 def _write_pack_idx_v2(self, filename, idx, packbin):
724 for entry in section:
725 if entry[2] >= 2**31:
728 # Length: header + fan-out + shas-and-crcs + overflow-offsets
729 index_len = 8 + (4 * 256) + (28 * self.count) + (8 * ofs64_count)
731 idx_f = open(filename, 'w+b')
733 idx_f.truncate(index_len)
734 idx_map = mmap_readwrite(idx_f, close=False)
735 count = _helpers.write_idx(filename, idx_map, idx, self.count)
736 assert(count == self.count)
738 if idx_map: idx_map.close()
741 idx_f = open(filename, 'a+b')
746 b = idx_f.read(8 + 4*256)
749 obj_list_sum = Sha1()
750 for b in chunkyreader(idx_f, 20*self.count):
752 obj_list_sum.update(b)
753 namebase = obj_list_sum.hexdigest()
755 for b in chunkyreader(idx_f):
757 idx_f.write(idx_sum.digest())
764 return '%d %s' % (date, time.strftime('%z', time.localtime(date)))
767 def _gitenv(repo_dir = None):
771 os.environ['GIT_DIR'] = os.path.abspath(repo_dir)
775 def list_refs(refname = None, repo_dir = None):
776 """Generate a list of tuples in the form (refname,hash).
777 If a ref name is specified, list only this particular ref.
779 argv = ['git', 'show-ref', '--']
782 p = subprocess.Popen(argv,
783 preexec_fn = _gitenv(repo_dir),
784 stdout = subprocess.PIPE)
785 out = p.stdout.read().strip()
786 rv = p.wait() # not fatal
790 for d in out.split('\n'):
791 (sha, name) = d.split(' ', 1)
792 yield (name, sha.decode('hex'))
795 def read_ref(refname, repo_dir = None):
796 """Get the commit id of the most recent commit made on a given ref."""
797 l = list(list_refs(refname, repo_dir))
805 def rev_list(ref, count=None, repo_dir=None):
806 """Generate a list of reachable commits in reverse chronological order.
808 This generator walks through commits, from child to parent, that are
809 reachable via the specified ref and yields a series of tuples of the form
812 If count is a non-zero integer, limit the number of commits to "count"
815 assert(not ref.startswith('-'))
818 opts += ['-n', str(atoi(count))]
819 argv = ['git', 'rev-list', '--pretty=format:%at'] + opts + [ref, '--']
820 p = subprocess.Popen(argv,
821 preexec_fn = _gitenv(repo_dir),
822 stdout = subprocess.PIPE)
826 if s.startswith('commit '):
827 commit = s[7:].decode('hex')
831 rv = p.wait() # not fatal
833 raise GitError, 'git rev-list returned error %d' % rv
836 def get_commit_dates(refs, repo_dir=None):
837 """Get the dates for the specified commit refs. For now, every unique
838 string in refs must resolve to a different commit or this
839 function will fail."""
842 commit = get_commit_items(ref, cp(repo_dir))
843 result.append(commit.author_sec)
847 def rev_parse(committish, repo_dir=None):
848 """Resolve the full hash for 'committish', if it exists.
850 Should be roughly equivalent to 'git rev-parse'.
852 Returns the hex value of the hash if it is found, None if 'committish' does
853 not correspond to anything.
855 head = read_ref(committish, repo_dir=repo_dir)
857 debug2("Resolved from ref: commit = %s" % head.encode('hex'))
860 pL = PackIdxList(repo('objects/pack', repo_dir=repo_dir))
862 if len(committish) == 40:
864 hash = committish.decode('hex')
874 def update_ref(refname, newval, oldval, repo_dir=None):
875 """Update a repository reference."""
878 assert(refname.startswith('refs/heads/') \
879 or refname.startswith('refs/tags/'))
880 p = subprocess.Popen(['git', 'update-ref', refname,
881 newval.encode('hex'), oldval.encode('hex')],
882 preexec_fn = _gitenv(repo_dir))
883 _git_wait('git update-ref', p)
886 def delete_ref(refname):
887 """Delete a repository reference."""
888 assert(refname.startswith('refs/'))
889 p = subprocess.Popen(['git', 'update-ref', '-d', refname],
890 preexec_fn = _gitenv())
891 _git_wait('git update-ref', p)
894 def guess_repo(path=None):
895 """Set the path value in the global variable "repodir".
896 This makes bup look for an existing bup repository, but not fail if a
897 repository doesn't exist. Usually, if you are interacting with a bup
898 repository, you would not be calling this function but using
905 repodir = os.environ.get('BUP_DIR')
907 repodir = os.path.expanduser('~/.bup')
910 def init_repo(path=None):
911 """Create the Git bare repository for bup in a given path."""
913 d = repo() # appends a / to the path
914 parent = os.path.dirname(os.path.dirname(d))
915 if parent and not os.path.exists(parent):
916 raise GitError('parent directory "%s" does not exist' % parent)
917 if os.path.exists(d) and not os.path.isdir(os.path.join(d, '.')):
918 raise GitError('"%s" exists but is not a directory' % d)
919 p = subprocess.Popen(['git', '--bare', 'init'], stdout=sys.stderr,
920 preexec_fn = _gitenv())
921 _git_wait('git init', p)
922 # Force the index version configuration in order to ensure bup works
923 # regardless of the version of the installed Git binary.
924 p = subprocess.Popen(['git', 'config', 'pack.indexVersion', '2'],
925 stdout=sys.stderr, preexec_fn = _gitenv())
926 _git_wait('git config', p)
928 p = subprocess.Popen(['git', 'config', 'core.logAllRefUpdates', 'true'],
929 stdout=sys.stderr, preexec_fn = _gitenv())
930 _git_wait('git config', p)
933 def check_repo_or_die(path=None):
934 """Make sure a bup repository exists, and abort if not.
935 If the path to a particular repository was not specified, this function
936 initializes the default repository automatically.
940 os.stat(repo('objects/pack/.'))
942 if e.errno == errno.ENOENT:
943 log('Error: %r is not a bup repository; run "bup init"!'
947 log('Error: %s!' % e)
953 """Get Git's version and ensure a usable version is installed.
955 The returned version is formatted as an ordered tuple with each position
956 representing a digit in the version tag. For example, the following tuple
957 would represent version 1.6.6.9:
963 p = subprocess.Popen(['git', '--version'],
964 stdout=subprocess.PIPE)
965 gvs = p.stdout.read()
966 _git_wait('git --version', p)
967 m = re.match(r'git version (\S+.\S+)', gvs)
969 raise GitError('git --version weird output: %r' % gvs)
970 _ver = tuple(m.group(1).split('.'))
971 needed = ('1','5', '3', '1')
973 raise GitError('git version %s or higher is required; you have %s'
974 % ('.'.join(needed), '.'.join(_ver)))
978 def _git_wait(cmd, p):
981 raise GitError('%s returned %d' % (cmd, rv))
984 def _git_capture(argv):
985 p = subprocess.Popen(argv, stdout=subprocess.PIPE, preexec_fn = _gitenv())
987 _git_wait(repr(argv), p)
991 class _AbortableIter:
992 def __init__(self, it, onabort = None):
994 self.onabort = onabort
1002 return self.it.next()
1003 except StopIteration, e:
1011 """Abort iteration and call the abortion callback, if needed."""
1023 """Link to 'git cat-file' that is used to retrieve blob data."""
1024 def __init__(self, repo_dir = None):
1026 self.repo_dir = repo_dir
1027 wanted = ('1','5','6')
1030 log('Warning: Git version < %s; bup will be slow.'
1033 self.get = self._slow_get
1035 self.p = self.inprogress = None
1036 self.get = self._fast_get
1040 self.p.stdout.close()
1041 self.p.stdin.close()
1043 self.inprogress = None
1047 self.p = subprocess.Popen(['git', 'cat-file', '--batch'],
1048 stdin=subprocess.PIPE,
1049 stdout=subprocess.PIPE,
1052 preexec_fn = _gitenv(self.repo_dir))
1054 def _fast_get(self, id):
1055 if not self.p or self.p.poll() != None:
1058 poll_result = self.p.poll()
1059 assert(poll_result == None)
1061 log('_fast_get: opening %r while %r is open'
1062 % (id, self.inprogress))
1063 assert(not self.inprogress)
1064 assert(id.find('\n') < 0)
1065 assert(id.find('\r') < 0)
1066 assert(not id.startswith('-'))
1067 self.inprogress = id
1068 self.p.stdin.write('%s\n' % id)
1069 self.p.stdin.flush()
1070 hdr = self.p.stdout.readline()
1071 if hdr.endswith(' missing\n'):
1072 self.inprogress = None
1073 raise KeyError('blob %r is missing' % id)
1074 spl = hdr.split(' ')
1075 if len(spl) != 3 or len(spl[0]) != 40:
1076 raise GitError('expected blob, got %r' % spl)
1077 (hex, type, size) = spl
1079 it = _AbortableIter(chunkyreader(self.p.stdout, int(spl[2])),
1080 onabort = self._abort)
1085 readline_result = self.p.stdout.readline()
1086 assert(readline_result == '\n')
1087 self.inprogress = None
1088 except Exception, e:
1092 def _slow_get(self, id):
1093 assert(id.find('\n') < 0)
1094 assert(id.find('\r') < 0)
1095 assert(id[0] != '-')
1096 type = _git_capture(['git', 'cat-file', '-t', id]).strip()
1099 p = subprocess.Popen(['git', 'cat-file', type, id],
1100 stdout=subprocess.PIPE,
1101 preexec_fn = _gitenv(self.repo_dir))
1102 for blob in chunkyreader(p.stdout):
1104 _git_wait('git cat-file', p)
1106 def _join(self, it):
1111 elif type == 'tree':
1112 treefile = ''.join(it)
1113 for (mode, name, sha) in tree_decode(treefile):
1114 for blob in self.join(sha.encode('hex')):
1116 elif type == 'commit':
1117 treeline = ''.join(it).split('\n')[0]
1118 assert(treeline.startswith('tree '))
1119 for blob in self.join(treeline[5:]):
1122 raise GitError('invalid object type %r: expected blob/tree/commit'
1126 """Generate a list of the content of all blobs that can be reached
1127 from an object. The hash given in 'id' must point to a blob, a tree
1128 or a commit. The content of all blobs that can be seen from trees or
1129 commits will be added to the list.
1132 for d in self._join(self.get(id)):
1134 except StopIteration:
1140 def cp(repo_dir=None):
1141 """Create a CatPipe object or reuse the already existing one."""
1145 repo_dir = os.path.abspath(repo_dir)
1146 cp = _cp.get(repo_dir)
1148 cp = CatPipe(repo_dir)
1153 def tags(repo_dir = None):
1154 """Return a dictionary of all tags in the form {hash: [tag_names, ...]}."""
1156 for (n,c) in list_refs(repo_dir = repo_dir):
1157 if n.startswith('refs/tags/'):
1162 tags[c].append(name) # more than one tag can point at 'c'