1 """Git interaction library.
2 bup repositories are in Git format. This library allows us to
3 interact with the Git data structures.
5 import os, sys, zlib, time, subprocess, struct, stat, re, tempfile, glob
6 from collections import namedtuple
7 from itertools import islice
9 from bup.helpers import *
10 from bup import _helpers, path, midx, bloom, xstat
12 max_pack_size = 1000*1000*1000 # larger packs will slow down pruning
13 max_pack_objects = 200*1000 # cache memory usage is about 83 bytes per object
19 _typemap = { 'blob':3, 'tree':2, 'commit':1, 'tag':4 }
20 _typermap = { 3:'blob', 2:'tree', 1:'commit', 4:'tag' }
26 class GitError(Exception):
30 def parse_tz_offset(s):
31 """UTC offset in seconds."""
32 tz_off = (int(s[1:3]) * 60 * 60) + (int(s[3:5]) * 60)
38 # FIXME: derived from http://git.rsbx.net/Documents/Git_Data_Formats.txt
39 # Make sure that's authoritative.
40 _start_end_char = r'[^ .,:;<>"\'\0\n]'
41 _content_char = r'[^\0\n<>]'
42 _safe_str_rx = '(?:%s{1,2}|(?:%s%s*%s))' \
44 _start_end_char, _content_char, _start_end_char)
45 _tz_rx = r'[-+]\d\d[0-5]\d'
46 _parent_rx = r'(?:parent [abcdefABCDEF0123456789]{40}\n)'
47 _commit_rx = re.compile(r'''tree (?P<tree>[abcdefABCDEF0123456789]{40})
48 (?P<parents>%s*)author (?P<author_name>%s) <(?P<author_mail>%s)> (?P<asec>\d+) (?P<atz>%s)
49 committer (?P<committer_name>%s) <(?P<committer_mail>%s)> (?P<csec>\d+) (?P<ctz>%s)
51 (?P<message>(?:.|\n)*)''' % (_parent_rx,
52 _safe_str_rx, _safe_str_rx, _tz_rx,
53 _safe_str_rx, _safe_str_rx, _tz_rx))
54 _parent_hash_rx = re.compile(r'\s*parent ([abcdefABCDEF0123456789]{40})\s*')
57 # Note that the author_sec and committer_sec values are (UTC) epoch seconds.
58 CommitInfo = namedtuple('CommitInfo', ['tree', 'parents',
59 'author_name', 'author_mail',
60 'author_sec', 'author_offset',
61 'committer_name', 'committer_mail',
62 'committer_sec', 'committer_offset',
65 def parse_commit(content):
66 commit_match = re.match(_commit_rx, content)
68 raise Exception('cannot parse commit %r' % content)
69 matches = commit_match.groupdict()
70 return CommitInfo(tree=matches['tree'],
71 parents=re.findall(_parent_hash_rx, matches['parents']),
72 author_name=matches['author_name'],
73 author_mail=matches['author_mail'],
74 author_sec=int(matches['asec']),
75 author_offset=parse_tz_offset(matches['atz']),
76 committer_name=matches['committer_name'],
77 committer_mail=matches['committer_mail'],
78 committer_sec=int(matches['csec']),
79 committer_offset=parse_tz_offset(matches['ctz']),
80 message=matches['message'])
83 def get_commit_items(id, cp):
84 commit_it = cp.get(id)
85 assert(commit_it.next() == 'commit')
86 commit_content = ''.join(commit_it)
87 return parse_commit(commit_content)
90 def repo(sub = '', repo_dir=None):
91 """Get the path to the git repository or one of its subdirectories."""
93 repo_dir = repo_dir or repodir
95 raise GitError('You should call check_repo_or_die()')
97 # If there's a .git subdirectory, then the actual repo is in there.
98 gd = os.path.join(repo_dir, '.git')
99 if os.path.exists(gd):
102 return os.path.join(repo_dir, sub)
106 return re.sub(r'([^0-9a-z]|\b)([0-9a-z]{7})[0-9a-z]{33}([^0-9a-z]|\b)',
111 full = os.path.abspath(path)
112 fullrepo = os.path.abspath(repo(''))
113 if not fullrepo.endswith('/'):
115 if full.startswith(fullrepo):
116 path = full[len(fullrepo):]
117 if path.startswith('index-cache/'):
118 path = path[len('index-cache/'):]
119 return shorten_hash(path)
123 paths = [repo('objects/pack')]
124 paths += glob.glob(repo('index-cache/*/.'))
128 def auto_midx(objdir):
129 args = [path.exe(), 'midx', '--auto', '--dir', objdir]
131 rv = subprocess.call(args, stdout=open('/dev/null', 'w'))
133 # make sure 'args' gets printed to help with debugging
134 add_error('%r: exception: %s' % (args, e))
137 add_error('%r: returned %d' % (args, rv))
139 args = [path.exe(), 'bloom', '--dir', objdir]
141 rv = subprocess.call(args, stdout=open('/dev/null', 'w'))
143 # make sure 'args' gets printed to help with debugging
144 add_error('%r: exception: %s' % (args, e))
147 add_error('%r: returned %d' % (args, rv))
150 def mangle_name(name, mode, gitmode):
151 """Mangle a file name to present an abstract name for segmented files.
152 Mangled file names will have the ".bup" extension added to them. If a
153 file's name already ends with ".bup", a ".bupl" extension is added to
154 disambiguate normal files from segmented ones.
156 if stat.S_ISREG(mode) and not stat.S_ISREG(gitmode):
157 assert(stat.S_ISDIR(gitmode))
159 elif name.endswith('.bup') or name[:-1].endswith('.bup'):
160 return name + '.bupl'
165 (BUP_NORMAL, BUP_CHUNKED) = (0,1)
166 def demangle_name(name, mode):
167 """Remove name mangling from a file name, if necessary.
169 The return value is a tuple (demangled_filename,mode), where mode is one of
172 * BUP_NORMAL : files that should be read as-is from the repository
173 * BUP_CHUNKED : files that were chunked and need to be reassembled
175 For more information on the name mangling algorithm, see mangle_name()
177 if name.endswith('.bupl'):
178 return (name[:-5], BUP_NORMAL)
179 elif name.endswith('.bup'):
180 return (name[:-4], BUP_CHUNKED)
181 elif name.endswith('.bupm'):
183 BUP_CHUNKED if stat.S_ISDIR(mode) else BUP_NORMAL)
185 return (name, BUP_NORMAL)
188 def calc_hash(type, content):
189 """Calculate some content's hash in the Git fashion."""
190 header = '%s %d\0' % (type, len(content))
196 def shalist_item_sort_key(ent):
197 (mode, name, id) = ent
198 assert(mode+0 == mode)
199 if stat.S_ISDIR(mode):
205 def tree_encode(shalist):
206 """Generate a git tree object from (mode,name,hash) tuples."""
207 shalist = sorted(shalist, key = shalist_item_sort_key)
209 for (mode,name,bin) in shalist:
211 assert(mode+0 == mode)
213 assert(len(bin) == 20)
214 s = '%o %s\0%s' % (mode,name,bin)
215 assert(s[0] != '0') # 0-padded octal is not acceptable in a git tree
220 def tree_decode(buf):
221 """Generate a list of (mode,name,hash) from the git tree object in buf."""
223 while ofs < len(buf):
224 z = buf.find('\0', ofs)
226 spl = buf[ofs:z].split(' ', 1)
227 assert(len(spl) == 2)
229 sha = buf[z+1:z+1+20]
231 yield (int(mode, 8), name, sha)
234 def _encode_packobj(type, content, compression_level=1):
237 szbits = (sz & 0x0f) | (_typemap[type]<<4)
240 if sz: szbits |= 0x80
246 if compression_level > 9:
247 compression_level = 9
248 elif compression_level < 0:
249 compression_level = 0
250 z = zlib.compressobj(compression_level)
252 yield z.compress(content)
256 def _encode_looseobj(type, content, compression_level=1):
257 z = zlib.compressobj(compression_level)
258 yield z.compress('%s %d\0' % (type, len(content)))
259 yield z.compress(content)
263 def _decode_looseobj(buf):
265 s = zlib.decompress(buf)
272 assert(type in _typemap)
273 assert(sz == len(content))
274 return (type, content)
277 def _decode_packobj(buf):
280 type = _typermap[(c & 0x70) >> 4]
287 sz |= (c & 0x7f) << shift
291 return (type, zlib.decompress(buf[i+1:]))
298 def find_offset(self, hash):
299 """Get the offset of an object inside the index file."""
300 idx = self._idx_from_hash(hash)
302 return self._ofs_from_idx(idx)
305 def exists(self, hash, want_source=False):
306 """Return nonempty if the object exists in this index."""
307 if hash and (self._idx_from_hash(hash) != None):
308 return want_source and os.path.basename(self.name) or True
312 return int(self.fanout[255])
314 def _idx_from_hash(self, hash):
315 global _total_searches, _total_steps
317 assert(len(hash) == 20)
319 start = self.fanout[b1-1] # range -1..254
320 end = self.fanout[b1] # range 0..255
322 _total_steps += 1 # lookup table is a step
325 mid = start + (end-start)/2
326 v = self._idx_to_hash(mid)
336 class PackIdxV1(PackIdx):
337 """Object representation of a Git pack index (version 1) file."""
338 def __init__(self, filename, f):
340 self.idxnames = [self.name]
341 self.map = mmap_read(f)
342 self.fanout = list(struct.unpack('!256I',
343 str(buffer(self.map, 0, 256*4))))
344 self.fanout.append(0) # entry "-1"
345 nsha = self.fanout[255]
347 self.shatable = buffer(self.map, self.sha_ofs, nsha*24)
349 def _ofs_from_idx(self, idx):
350 return struct.unpack('!I', str(self.shatable[idx*24 : idx*24+4]))[0]
352 def _idx_to_hash(self, idx):
353 return str(self.shatable[idx*24+4 : idx*24+24])
356 for i in xrange(self.fanout[255]):
357 yield buffer(self.map, 256*4 + 24*i + 4, 20)
360 class PackIdxV2(PackIdx):
361 """Object representation of a Git pack index (version 2) file."""
362 def __init__(self, filename, f):
364 self.idxnames = [self.name]
365 self.map = mmap_read(f)
366 assert(str(self.map[0:8]) == '\377tOc\0\0\0\2')
367 self.fanout = list(struct.unpack('!256I',
368 str(buffer(self.map, 8, 256*4))))
369 self.fanout.append(0) # entry "-1"
370 nsha = self.fanout[255]
371 self.sha_ofs = 8 + 256*4
372 self.shatable = buffer(self.map, self.sha_ofs, nsha*20)
373 self.ofstable = buffer(self.map,
374 self.sha_ofs + nsha*20 + nsha*4,
376 self.ofs64table = buffer(self.map,
377 8 + 256*4 + nsha*20 + nsha*4 + nsha*4)
379 def _ofs_from_idx(self, idx):
380 ofs = struct.unpack('!I', str(buffer(self.ofstable, idx*4, 4)))[0]
382 idx64 = ofs & 0x7fffffff
383 ofs = struct.unpack('!Q',
384 str(buffer(self.ofs64table, idx64*8, 8)))[0]
387 def _idx_to_hash(self, idx):
388 return str(self.shatable[idx*20:(idx+1)*20])
391 for i in xrange(self.fanout[255]):
392 yield buffer(self.map, 8 + 256*4 + 20*i, 20)
397 def __init__(self, dir):
399 assert(_mpi_count == 0) # these things suck tons of VM; don't waste it
404 self.do_bloom = False
411 assert(_mpi_count == 0)
414 return iter(idxmerge(self.packs))
417 return sum(len(pack) for pack in self.packs)
419 def exists(self, hash, want_source=False):
420 """Return nonempty if the object exists in the index files."""
421 global _total_searches
423 if hash in self.also:
425 if self.do_bloom and self.bloom:
426 if self.bloom.exists(hash):
427 self.do_bloom = False
429 _total_searches -= 1 # was counted by bloom
431 for i in xrange(len(self.packs)):
433 _total_searches -= 1 # will be incremented by sub-pack
434 ix = p.exists(hash, want_source=want_source)
436 # reorder so most recently used packs are searched first
437 self.packs = [p] + self.packs[:i] + self.packs[i+1:]
442 def refresh(self, skip_midx = False):
443 """Refresh the index list.
444 This method verifies if .midx files were superseded (e.g. all of its
445 contents are in another, bigger .midx file) and removes the superseded
448 If skip_midx is True, all work on .midx files will be skipped and .midx
449 files will be removed from the list.
451 The module-global variable 'ignore_midx' can force this function to
452 always act as if skip_midx was True.
454 self.bloom = None # Always reopen the bloom as it may have been relaced
455 self.do_bloom = False
456 skip_midx = skip_midx or ignore_midx
457 d = dict((p.name, p) for p in self.packs
458 if not skip_midx or not isinstance(p, midx.PackMidx))
459 if os.path.exists(self.dir):
462 for ix in self.packs:
463 if isinstance(ix, midx.PackMidx):
464 for name in ix.idxnames:
465 d[os.path.join(self.dir, name)] = ix
466 for full in glob.glob(os.path.join(self.dir,'*.midx')):
468 mx = midx.PackMidx(full)
469 (mxd, mxf) = os.path.split(mx.name)
471 for n in mx.idxnames:
472 if not os.path.exists(os.path.join(mxd, n)):
473 log(('warning: index %s missing\n' +
474 ' used by %s\n') % (n, mxf))
482 midxl.sort(key=lambda ix:
483 (-len(ix), -xstat.stat(ix.name).st_mtime))
486 for sub in ix.idxnames:
487 found = d.get(os.path.join(self.dir, sub))
488 if not found or isinstance(found, PackIdx):
489 # doesn't exist, or exists but not in a midx
494 for name in ix.idxnames:
495 d[os.path.join(self.dir, name)] = ix
496 elif not ix.force_keep:
497 debug1('midx: removing redundant: %s\n'
498 % os.path.basename(ix.name))
501 for full in glob.glob(os.path.join(self.dir,'*.idx')):
509 bfull = os.path.join(self.dir, 'bup.bloom')
510 if self.bloom is None and os.path.exists(bfull):
511 self.bloom = bloom.ShaBloom(bfull)
512 self.packs = list(set(d.values()))
513 self.packs.sort(lambda x,y: -cmp(len(x),len(y)))
514 if self.bloom and self.bloom.valid() and len(self.bloom) >= len(self):
518 debug1('PackIdxList: using %d index%s.\n'
519 % (len(self.packs), len(self.packs)!=1 and 'es' or ''))
522 """Insert an additional object in the list."""
526 def open_idx(filename):
527 if filename.endswith('.idx'):
528 f = open(filename, 'rb')
530 if header[0:4] == '\377tOc':
531 version = struct.unpack('!I', header[4:8])[0]
533 return PackIdxV2(filename, f)
535 raise GitError('%s: expected idx file version 2, got %d'
536 % (filename, version))
537 elif len(header) == 8 and header[0:4] < '\377tOc':
538 return PackIdxV1(filename, f)
540 raise GitError('%s: unrecognized idx file header' % filename)
541 elif filename.endswith('.midx'):
542 return midx.PackMidx(filename)
544 raise GitError('idx filenames must end with .idx or .midx')
547 def idxmerge(idxlist, final_progress=True):
548 """Generate a list of all the objects reachable in a PackIdxList."""
549 def pfunc(count, total):
550 qprogress('Reading indexes: %.2f%% (%d/%d)\r'
551 % (count*100.0/total, count, total))
552 def pfinal(count, total):
554 progress('Reading indexes: %.2f%% (%d/%d), done.\n'
555 % (100, total, total))
556 return merge_iter(idxlist, 10024, pfunc, pfinal)
559 def _make_objcache():
560 return PackIdxList(repo('objects/pack'))
563 """Writes Git objects inside a pack file."""
564 def __init__(self, objcache_maker=_make_objcache, compression_level=1):
570 self.objcache_maker = objcache_maker
572 self.compression_level = compression_level
579 (fd,name) = tempfile.mkstemp(suffix='.pack', dir=repo('objects'))
580 self.file = os.fdopen(fd, 'w+b')
581 assert(name.endswith('.pack'))
582 self.filename = name[:-5]
583 self.file.write('PACK\0\0\0\2\0\0\0\0')
584 self.idx = list(list() for i in xrange(256))
586 def _raw_write(self, datalist, sha):
589 # in case we get interrupted (eg. KeyboardInterrupt), it's best if
590 # the file never has a *partial* blob. So let's make sure it's
591 # all-or-nothing. (The blob shouldn't be very big anyway, thanks
592 # to our hashsplit algorithm.) f.write() does its own buffering,
593 # but that's okay because we'll flush it in _end().
594 oneblob = ''.join(datalist)
598 raise GitError, e, sys.exc_info()[2]
600 crc = zlib.crc32(oneblob) & 0xffffffff
601 self._update_idx(sha, crc, nw)
606 def _update_idx(self, sha, crc, size):
609 self.idx[ord(sha[0])].append((sha, crc, self.file.tell() - size))
611 def _write(self, sha, type, content):
615 sha = calc_hash(type, content)
616 size, crc = self._raw_write(_encode_packobj(type, content,
617 self.compression_level),
619 if self.outbytes >= max_pack_size or self.count >= max_pack_objects:
623 def breakpoint(self):
624 """Clear byte and object counts and return the last processed id."""
626 self.outbytes = self.count = 0
629 def _require_objcache(self):
630 if self.objcache is None and self.objcache_maker:
631 self.objcache = self.objcache_maker()
632 if self.objcache is None:
634 "PackWriter not opened or can't check exists w/o objcache")
636 def exists(self, id, want_source=False):
637 """Return non-empty if an object is found in the object cache."""
638 self._require_objcache()
639 return self.objcache.exists(id, want_source=want_source)
641 def maybe_write(self, type, content):
642 """Write an object to the pack file if not present and return its id."""
643 sha = calc_hash(type, content)
644 if not self.exists(sha):
645 self._write(sha, type, content)
646 self._require_objcache()
647 self.objcache.add(sha)
650 def new_blob(self, blob):
651 """Create a blob object in the pack with the supplied content."""
652 return self.maybe_write('blob', blob)
654 def new_tree(self, shalist):
655 """Create a tree object in the pack."""
656 content = tree_encode(shalist)
657 return self.maybe_write('tree', content)
659 def _new_commit(self, tree, parent, author, adate, committer, cdate, msg):
661 if tree: l.append('tree %s' % tree.encode('hex'))
662 if parent: l.append('parent %s' % parent.encode('hex'))
663 if author: l.append('author %s %s' % (author, _git_date(adate)))
664 if committer: l.append('committer %s %s' % (committer, _git_date(cdate)))
667 return self.maybe_write('commit', '\n'.join(l))
669 def new_commit(self, parent, tree, date, msg):
670 """Create a commit object in the pack."""
671 userline = '%s <%s@%s>' % (userfullname(), username(), hostname())
672 commit = self._new_commit(tree, parent,
673 userline, date, userline, date,
678 """Remove the pack file from disk."""
684 os.unlink(self.filename + '.pack')
686 def _end(self, run_midx=True):
688 if not f: return None
694 # update object count
696 cp = struct.pack('!i', self.count)
700 # calculate the pack sha1sum
703 for b in chunkyreader(f):
705 packbin = sum.digest()
709 obj_list_sha = self._write_pack_idx_v2(self.filename + '.idx', idx, packbin)
711 nameprefix = repo('objects/pack/pack-%s' % obj_list_sha)
712 if os.path.exists(self.filename + '.map'):
713 os.unlink(self.filename + '.map')
714 os.rename(self.filename + '.pack', nameprefix + '.pack')
715 os.rename(self.filename + '.idx', nameprefix + '.idx')
718 auto_midx(repo('objects/pack'))
721 def close(self, run_midx=True):
722 """Close the pack file and move it to its definitive path."""
723 return self._end(run_midx=run_midx)
725 def _write_pack_idx_v2(self, filename, idx, packbin):
728 for entry in section:
729 if entry[2] >= 2**31:
732 # Length: header + fan-out + shas-and-crcs + overflow-offsets
733 index_len = 8 + (4 * 256) + (28 * self.count) + (8 * ofs64_count)
735 idx_f = open(filename, 'w+b')
737 idx_f.truncate(index_len)
738 idx_map = mmap_readwrite(idx_f, close=False)
739 count = _helpers.write_idx(filename, idx_map, idx, self.count)
740 assert(count == self.count)
742 if idx_map: idx_map.close()
745 idx_f = open(filename, 'a+b')
750 b = idx_f.read(8 + 4*256)
753 obj_list_sum = Sha1()
754 for b in chunkyreader(idx_f, 20*self.count):
756 obj_list_sum.update(b)
757 namebase = obj_list_sum.hexdigest()
759 for b in chunkyreader(idx_f):
761 idx_f.write(idx_sum.digest())
768 return '%d %s' % (date, utc_offset_str(date))
771 def _gitenv(repo_dir = None):
775 os.environ['GIT_DIR'] = os.path.abspath(repo_dir)
779 def list_refs(refname=None, repo_dir=None,
780 limit_to_heads=False, limit_to_tags=False):
781 """Yield (refname, hash) tuples for all repository refs unless a ref
782 name is specified. Given a ref name, only include tuples for that
783 particular ref. The limits restrict the result items to
784 refs/heads or refs/tags. If both limits are specified, items from
785 both sources will be included.
788 argv = ['git', 'show-ref']
790 argv.append('--heads')
792 argv.append('--tags')
796 p = subprocess.Popen(argv,
797 preexec_fn = _gitenv(repo_dir),
798 stdout = subprocess.PIPE)
799 out = p.stdout.read().strip()
800 rv = p.wait() # not fatal
804 for d in out.split('\n'):
805 (sha, name) = d.split(' ', 1)
806 yield (name, sha.decode('hex'))
809 def read_ref(refname, repo_dir = None):
810 """Get the commit id of the most recent commit made on a given ref."""
811 refs = list_refs(refname, repo_dir=repo_dir, limit_to_heads=True)
812 l = tuple(islice(refs, 2))
820 def rev_list(ref, count=None, repo_dir=None):
821 """Generate a list of reachable commits in reverse chronological order.
823 This generator walks through commits, from child to parent, that are
824 reachable via the specified ref and yields a series of tuples of the form
827 If count is a non-zero integer, limit the number of commits to "count"
830 assert(not ref.startswith('-'))
833 opts += ['-n', str(atoi(count))]
834 argv = ['git', 'rev-list', '--pretty=format:%at'] + opts + [ref, '--']
835 p = subprocess.Popen(argv,
836 preexec_fn = _gitenv(repo_dir),
837 stdout = subprocess.PIPE)
841 if s.startswith('commit '):
842 commit = s[7:].decode('hex')
846 rv = p.wait() # not fatal
848 raise GitError, 'git rev-list returned error %d' % rv
851 def get_commit_dates(refs, repo_dir=None):
852 """Get the dates for the specified commit refs. For now, every unique
853 string in refs must resolve to a different commit or this
854 function will fail."""
857 commit = get_commit_items(ref, cp(repo_dir))
858 result.append(commit.author_sec)
862 def rev_parse(committish, repo_dir=None):
863 """Resolve the full hash for 'committish', if it exists.
865 Should be roughly equivalent to 'git rev-parse'.
867 Returns the hex value of the hash if it is found, None if 'committish' does
868 not correspond to anything.
870 head = read_ref(committish, repo_dir=repo_dir)
872 debug2("resolved from ref: commit = %s\n" % head.encode('hex'))
875 pL = PackIdxList(repo('objects/pack', repo_dir=repo_dir))
877 if len(committish) == 40:
879 hash = committish.decode('hex')
889 def update_ref(refname, newval, oldval, repo_dir=None):
890 """Update a repository reference."""
893 assert(refname.startswith('refs/heads/') \
894 or refname.startswith('refs/tags/'))
895 p = subprocess.Popen(['git', 'update-ref', refname,
896 newval.encode('hex'), oldval.encode('hex')],
897 preexec_fn = _gitenv(repo_dir))
898 _git_wait('git update-ref', p)
901 def delete_ref(refname):
902 """Delete a repository reference."""
903 assert(refname.startswith('refs/'))
904 p = subprocess.Popen(['git', 'update-ref', '-d', refname],
905 preexec_fn = _gitenv())
906 _git_wait('git update-ref', p)
909 def guess_repo(path=None):
910 """Set the path value in the global variable "repodir".
911 This makes bup look for an existing bup repository, but not fail if a
912 repository doesn't exist. Usually, if you are interacting with a bup
913 repository, you would not be calling this function but using
920 repodir = os.environ.get('BUP_DIR')
922 repodir = os.path.expanduser('~/.bup')
925 def init_repo(path=None):
926 """Create the Git bare repository for bup in a given path."""
928 d = repo() # appends a / to the path
929 parent = os.path.dirname(os.path.dirname(d))
930 if parent and not os.path.exists(parent):
931 raise GitError('parent directory "%s" does not exist\n' % parent)
932 if os.path.exists(d) and not os.path.isdir(os.path.join(d, '.')):
933 raise GitError('"%s" exists but is not a directory\n' % d)
934 p = subprocess.Popen(['git', '--bare', 'init'], stdout=sys.stderr,
935 preexec_fn = _gitenv())
936 _git_wait('git init', p)
937 # Force the index version configuration in order to ensure bup works
938 # regardless of the version of the installed Git binary.
939 p = subprocess.Popen(['git', 'config', 'pack.indexVersion', '2'],
940 stdout=sys.stderr, preexec_fn = _gitenv())
941 _git_wait('git config', p)
943 p = subprocess.Popen(['git', 'config', 'core.logAllRefUpdates', 'true'],
944 stdout=sys.stderr, preexec_fn = _gitenv())
945 _git_wait('git config', p)
948 def check_repo_or_die(path=None):
949 """Make sure a bup repository exists, and abort if not.
950 If the path to a particular repository was not specified, this function
951 initializes the default repository automatically.
955 os.stat(repo('objects/pack/.'))
957 if e.errno == errno.ENOENT:
958 log('error: %r is not a bup repository; run "bup init"\n'
962 log('error: %s\n' % e)
968 """Get Git's version and ensure a usable version is installed.
970 The returned version is formatted as an ordered tuple with each position
971 representing a digit in the version tag. For example, the following tuple
972 would represent version 1.6.6.9:
978 p = subprocess.Popen(['git', '--version'],
979 stdout=subprocess.PIPE)
980 gvs = p.stdout.read()
981 _git_wait('git --version', p)
982 m = re.match(r'git version (\S+.\S+)', gvs)
984 raise GitError('git --version weird output: %r' % gvs)
985 _ver = tuple(m.group(1).split('.'))
986 needed = ('1','5', '3', '1')
988 raise GitError('git version %s or higher is required; you have %s'
989 % ('.'.join(needed), '.'.join(_ver)))
993 def _git_wait(cmd, p):
996 raise GitError('%s returned %d' % (cmd, rv))
999 def _git_capture(argv):
1000 p = subprocess.Popen(argv, stdout=subprocess.PIPE, preexec_fn = _gitenv())
1002 _git_wait(repr(argv), p)
1006 class _AbortableIter:
1007 def __init__(self, it, onabort = None):
1009 self.onabort = onabort
1017 return self.it.next()
1018 except StopIteration, e:
1026 """Abort iteration and call the abortion callback, if needed."""
1038 """Link to 'git cat-file' that is used to retrieve blob data."""
1039 def __init__(self, repo_dir = None):
1041 self.repo_dir = repo_dir
1042 wanted = ('1','5','6')
1045 log('warning: git version < %s; bup will be slow.\n'
1048 self.get = self._slow_get
1050 self.p = self.inprogress = None
1051 self.get = self._fast_get
1055 self.p.stdout.close()
1056 self.p.stdin.close()
1058 self.inprogress = None
1062 self.p = subprocess.Popen(['git', 'cat-file', '--batch'],
1063 stdin=subprocess.PIPE,
1064 stdout=subprocess.PIPE,
1067 preexec_fn = _gitenv(self.repo_dir))
1069 def _fast_get(self, id):
1070 if not self.p or self.p.poll() != None:
1073 poll_result = self.p.poll()
1074 assert(poll_result == None)
1076 log('_fast_get: opening %r while %r is open\n'
1077 % (id, self.inprogress))
1078 assert(not self.inprogress)
1079 assert(id.find('\n') < 0)
1080 assert(id.find('\r') < 0)
1081 assert(not id.startswith('-'))
1082 self.inprogress = id
1083 self.p.stdin.write('%s\n' % id)
1084 self.p.stdin.flush()
1085 hdr = self.p.stdout.readline()
1086 if hdr.endswith(' missing\n'):
1087 self.inprogress = None
1088 raise KeyError('blob %r is missing' % id)
1089 spl = hdr.split(' ')
1090 if len(spl) != 3 or len(spl[0]) != 40:
1091 raise GitError('expected blob, got %r' % spl)
1092 (hex, type, size) = spl
1094 it = _AbortableIter(chunkyreader(self.p.stdout, int(spl[2])),
1095 onabort = self._abort)
1100 readline_result = self.p.stdout.readline()
1101 assert(readline_result == '\n')
1102 self.inprogress = None
1103 except Exception, e:
1107 def _slow_get(self, id):
1108 assert(id.find('\n') < 0)
1109 assert(id.find('\r') < 0)
1110 assert(id[0] != '-')
1111 type = _git_capture(['git', 'cat-file', '-t', id]).strip()
1114 p = subprocess.Popen(['git', 'cat-file', type, id],
1115 stdout=subprocess.PIPE,
1116 preexec_fn = _gitenv(self.repo_dir))
1117 for blob in chunkyreader(p.stdout):
1119 _git_wait('git cat-file', p)
1121 def _join(self, it):
1126 elif type == 'tree':
1127 treefile = ''.join(it)
1128 for (mode, name, sha) in tree_decode(treefile):
1129 for blob in self.join(sha.encode('hex')):
1131 elif type == 'commit':
1132 treeline = ''.join(it).split('\n')[0]
1133 assert(treeline.startswith('tree '))
1134 for blob in self.join(treeline[5:]):
1137 raise GitError('invalid object type %r: expected blob/tree/commit'
1141 """Generate a list of the content of all blobs that can be reached
1142 from an object. The hash given in 'id' must point to a blob, a tree
1143 or a commit. The content of all blobs that can be seen from trees or
1144 commits will be added to the list.
1147 for d in self._join(self.get(id)):
1149 except StopIteration:
1155 def cp(repo_dir=None):
1156 """Create a CatPipe object or reuse the already existing one."""
1160 repo_dir = os.path.abspath(repo_dir)
1161 cp = _cp.get(repo_dir)
1163 cp = CatPipe(repo_dir)
1168 def tags(repo_dir = None):
1169 """Return a dictionary of all tags in the form {hash: [tag_names, ...]}."""
1171 for n, c in list_refs(repo_dir = repo_dir, limit_to_tags=True):
1172 assert(n.startswith('refs/tags/'))
1176 tags[c].append(name) # more than one tag can point at 'c'