1 """Git interaction library.
2 bup repositories are in Git format. This library allows us to
3 interact with the Git data structures.
6 import errno, os, sys, zlib, time, subprocess, struct, stat, re, tempfile, glob
7 from collections import namedtuple
8 from itertools import islice
10 from bup import _helpers, path, midx, bloom, xstat
11 from bup.helpers import (Sha1, add_error, chunkyreader, debug1, debug2,
12 hostname, log, merge_iter, mmap_read, mmap_readwrite,
13 progress, qprogress, unlink, username, userfullname,
17 max_pack_size = 1000*1000*1000 # larger packs will slow down pruning
18 max_pack_objects = 200*1000 # cache memory usage is about 83 bytes per object
24 _typemap = { 'blob':3, 'tree':2, 'commit':1, 'tag':4 }
25 _typermap = { 3:'blob', 2:'tree', 1:'commit', 4:'tag' }
31 class GitError(Exception):
35 def parse_tz_offset(s):
36 """UTC offset in seconds."""
37 tz_off = (int(s[1:3]) * 60 * 60) + (int(s[3:5]) * 60)
43 # FIXME: derived from http://git.rsbx.net/Documents/Git_Data_Formats.txt
44 # Make sure that's authoritative.
45 _start_end_char = r'[^ .,:;<>"\'\0\n]'
46 _content_char = r'[^\0\n<>]'
47 _safe_str_rx = '(?:%s{1,2}|(?:%s%s*%s))' \
49 _start_end_char, _content_char, _start_end_char)
50 _tz_rx = r'[-+]\d\d[0-5]\d'
51 _parent_rx = r'(?:parent [abcdefABCDEF0123456789]{40}\n)'
52 _commit_rx = re.compile(r'''tree (?P<tree>[abcdefABCDEF0123456789]{40})
53 (?P<parents>%s*)author (?P<author_name>%s) <(?P<author_mail>%s)> (?P<asec>\d+) (?P<atz>%s)
54 committer (?P<committer_name>%s) <(?P<committer_mail>%s)> (?P<csec>\d+) (?P<ctz>%s)
56 (?P<message>(?:.|\n)*)''' % (_parent_rx,
57 _safe_str_rx, _safe_str_rx, _tz_rx,
58 _safe_str_rx, _safe_str_rx, _tz_rx))
59 _parent_hash_rx = re.compile(r'\s*parent ([abcdefABCDEF0123456789]{40})\s*')
62 # Note that the author_sec and committer_sec values are (UTC) epoch seconds.
63 CommitInfo = namedtuple('CommitInfo', ['tree', 'parents',
64 'author_name', 'author_mail',
65 'author_sec', 'author_offset',
66 'committer_name', 'committer_mail',
67 'committer_sec', 'committer_offset',
70 def parse_commit(content):
71 commit_match = re.match(_commit_rx, content)
73 raise Exception('cannot parse commit %r' % content)
74 matches = commit_match.groupdict()
75 return CommitInfo(tree=matches['tree'],
76 parents=re.findall(_parent_hash_rx, matches['parents']),
77 author_name=matches['author_name'],
78 author_mail=matches['author_mail'],
79 author_sec=int(matches['asec']),
80 author_offset=parse_tz_offset(matches['atz']),
81 committer_name=matches['committer_name'],
82 committer_mail=matches['committer_mail'],
83 committer_sec=int(matches['csec']),
84 committer_offset=parse_tz_offset(matches['ctz']),
85 message=matches['message'])
88 def get_commit_items(id, cp):
89 commit_it = cp.get(id)
90 assert(commit_it.next() == 'commit')
91 commit_content = ''.join(commit_it)
92 return parse_commit(commit_content)
95 def repo(sub = '', repo_dir=None):
96 """Get the path to the git repository or one of its subdirectories."""
98 repo_dir = repo_dir or repodir
100 raise GitError('You should call check_repo_or_die()')
102 # If there's a .git subdirectory, then the actual repo is in there.
103 gd = os.path.join(repo_dir, '.git')
104 if os.path.exists(gd):
107 return os.path.join(repo_dir, sub)
111 return re.sub(r'([^0-9a-z]|\b)([0-9a-z]{7})[0-9a-z]{33}([^0-9a-z]|\b)',
116 full = os.path.abspath(path)
117 fullrepo = os.path.abspath(repo(''))
118 if not fullrepo.endswith('/'):
120 if full.startswith(fullrepo):
121 path = full[len(fullrepo):]
122 if path.startswith('index-cache/'):
123 path = path[len('index-cache/'):]
124 return shorten_hash(path)
128 paths = [repo('objects/pack')]
129 paths += glob.glob(repo('index-cache/*/.'))
133 def auto_midx(objdir):
134 args = [path.exe(), 'midx', '--auto', '--dir', objdir]
136 rv = subprocess.call(args, stdout=open('/dev/null', 'w'))
138 # make sure 'args' gets printed to help with debugging
139 add_error('%r: exception: %s' % (args, e))
142 add_error('%r: returned %d' % (args, rv))
144 args = [path.exe(), 'bloom', '--dir', objdir]
146 rv = subprocess.call(args, stdout=open('/dev/null', 'w'))
148 # make sure 'args' gets printed to help with debugging
149 add_error('%r: exception: %s' % (args, e))
152 add_error('%r: returned %d' % (args, rv))
155 def mangle_name(name, mode, gitmode):
156 """Mangle a file name to present an abstract name for segmented files.
157 Mangled file names will have the ".bup" extension added to them. If a
158 file's name already ends with ".bup", a ".bupl" extension is added to
159 disambiguate normal files from segmented ones.
161 if stat.S_ISREG(mode) and not stat.S_ISREG(gitmode):
162 assert(stat.S_ISDIR(gitmode))
164 elif name.endswith('.bup') or name[:-1].endswith('.bup'):
165 return name + '.bupl'
170 (BUP_NORMAL, BUP_CHUNKED) = (0,1)
171 def demangle_name(name, mode):
172 """Remove name mangling from a file name, if necessary.
174 The return value is a tuple (demangled_filename,mode), where mode is one of
177 * BUP_NORMAL : files that should be read as-is from the repository
178 * BUP_CHUNKED : files that were chunked and need to be reassembled
180 For more information on the name mangling algorithm, see mangle_name()
182 if name.endswith('.bupl'):
183 return (name[:-5], BUP_NORMAL)
184 elif name.endswith('.bup'):
185 return (name[:-4], BUP_CHUNKED)
186 elif name.endswith('.bupm'):
188 BUP_CHUNKED if stat.S_ISDIR(mode) else BUP_NORMAL)
190 return (name, BUP_NORMAL)
193 def calc_hash(type, content):
194 """Calculate some content's hash in the Git fashion."""
195 header = '%s %d\0' % (type, len(content))
201 def shalist_item_sort_key(ent):
202 (mode, name, id) = ent
203 assert(mode+0 == mode)
204 if stat.S_ISDIR(mode):
210 def tree_encode(shalist):
211 """Generate a git tree object from (mode,name,hash) tuples."""
212 shalist = sorted(shalist, key = shalist_item_sort_key)
214 for (mode,name,bin) in shalist:
216 assert(mode+0 == mode)
218 assert(len(bin) == 20)
219 s = '%o %s\0%s' % (mode,name,bin)
220 assert(s[0] != '0') # 0-padded octal is not acceptable in a git tree
225 def tree_decode(buf):
226 """Generate a list of (mode,name,hash) from the git tree object in buf."""
228 while ofs < len(buf):
229 z = buf.find('\0', ofs)
231 spl = buf[ofs:z].split(' ', 1)
232 assert(len(spl) == 2)
234 sha = buf[z+1:z+1+20]
236 yield (int(mode, 8), name, sha)
239 def _encode_packobj(type, content, compression_level=1):
242 szbits = (sz & 0x0f) | (_typemap[type]<<4)
245 if sz: szbits |= 0x80
251 if compression_level > 9:
252 compression_level = 9
253 elif compression_level < 0:
254 compression_level = 0
255 z = zlib.compressobj(compression_level)
257 yield z.compress(content)
261 def _encode_looseobj(type, content, compression_level=1):
262 z = zlib.compressobj(compression_level)
263 yield z.compress('%s %d\0' % (type, len(content)))
264 yield z.compress(content)
268 def _decode_looseobj(buf):
270 s = zlib.decompress(buf)
277 assert(type in _typemap)
278 assert(sz == len(content))
279 return (type, content)
282 def _decode_packobj(buf):
285 type = _typermap[(c & 0x70) >> 4]
292 sz |= (c & 0x7f) << shift
296 return (type, zlib.decompress(buf[i+1:]))
303 def find_offset(self, hash):
304 """Get the offset of an object inside the index file."""
305 idx = self._idx_from_hash(hash)
307 return self._ofs_from_idx(idx)
310 def exists(self, hash, want_source=False):
311 """Return nonempty if the object exists in this index."""
312 if hash and (self._idx_from_hash(hash) != None):
313 return want_source and os.path.basename(self.name) or True
317 return int(self.fanout[255])
319 def _idx_from_hash(self, hash):
320 global _total_searches, _total_steps
322 assert(len(hash) == 20)
324 start = self.fanout[b1-1] # range -1..254
325 end = self.fanout[b1] # range 0..255
327 _total_steps += 1 # lookup table is a step
330 mid = start + (end-start)/2
331 v = self._idx_to_hash(mid)
341 class PackIdxV1(PackIdx):
342 """Object representation of a Git pack index (version 1) file."""
343 def __init__(self, filename, f):
345 self.idxnames = [self.name]
346 self.map = mmap_read(f)
347 self.fanout = list(struct.unpack('!256I',
348 str(buffer(self.map, 0, 256*4))))
349 self.fanout.append(0) # entry "-1"
350 nsha = self.fanout[255]
352 self.shatable = buffer(self.map, self.sha_ofs, nsha*24)
354 def _ofs_from_idx(self, idx):
355 return struct.unpack('!I', str(self.shatable[idx*24 : idx*24+4]))[0]
357 def _idx_to_hash(self, idx):
358 return str(self.shatable[idx*24+4 : idx*24+24])
361 for i in xrange(self.fanout[255]):
362 yield buffer(self.map, 256*4 + 24*i + 4, 20)
365 class PackIdxV2(PackIdx):
366 """Object representation of a Git pack index (version 2) file."""
367 def __init__(self, filename, f):
369 self.idxnames = [self.name]
370 self.map = mmap_read(f)
371 assert(str(self.map[0:8]) == '\377tOc\0\0\0\2')
372 self.fanout = list(struct.unpack('!256I',
373 str(buffer(self.map, 8, 256*4))))
374 self.fanout.append(0) # entry "-1"
375 nsha = self.fanout[255]
376 self.sha_ofs = 8 + 256*4
377 self.shatable = buffer(self.map, self.sha_ofs, nsha*20)
378 self.ofstable = buffer(self.map,
379 self.sha_ofs + nsha*20 + nsha*4,
381 self.ofs64table = buffer(self.map,
382 8 + 256*4 + nsha*20 + nsha*4 + nsha*4)
384 def _ofs_from_idx(self, idx):
385 ofs = struct.unpack('!I', str(buffer(self.ofstable, idx*4, 4)))[0]
387 idx64 = ofs & 0x7fffffff
388 ofs = struct.unpack('!Q',
389 str(buffer(self.ofs64table, idx64*8, 8)))[0]
392 def _idx_to_hash(self, idx):
393 return str(self.shatable[idx*20:(idx+1)*20])
396 for i in xrange(self.fanout[255]):
397 yield buffer(self.map, 8 + 256*4 + 20*i, 20)
402 def __init__(self, dir):
404 assert(_mpi_count == 0) # these things suck tons of VM; don't waste it
409 self.do_bloom = False
416 assert(_mpi_count == 0)
419 return iter(idxmerge(self.packs))
422 return sum(len(pack) for pack in self.packs)
424 def exists(self, hash, want_source=False):
425 """Return nonempty if the object exists in the index files."""
426 global _total_searches
428 if hash in self.also:
430 if self.do_bloom and self.bloom:
431 if self.bloom.exists(hash):
432 self.do_bloom = False
434 _total_searches -= 1 # was counted by bloom
436 for i in xrange(len(self.packs)):
438 _total_searches -= 1 # will be incremented by sub-pack
439 ix = p.exists(hash, want_source=want_source)
441 # reorder so most recently used packs are searched first
442 self.packs = [p] + self.packs[:i] + self.packs[i+1:]
447 def refresh(self, skip_midx = False):
448 """Refresh the index list.
449 This method verifies if .midx files were superseded (e.g. all of its
450 contents are in another, bigger .midx file) and removes the superseded
453 If skip_midx is True, all work on .midx files will be skipped and .midx
454 files will be removed from the list.
456 The module-global variable 'ignore_midx' can force this function to
457 always act as if skip_midx was True.
459 self.bloom = None # Always reopen the bloom as it may have been relaced
460 self.do_bloom = False
461 skip_midx = skip_midx or ignore_midx
462 d = dict((p.name, p) for p in self.packs
463 if not skip_midx or not isinstance(p, midx.PackMidx))
464 if os.path.exists(self.dir):
467 for ix in self.packs:
468 if isinstance(ix, midx.PackMidx):
469 for name in ix.idxnames:
470 d[os.path.join(self.dir, name)] = ix
471 for full in glob.glob(os.path.join(self.dir,'*.midx')):
473 mx = midx.PackMidx(full)
474 (mxd, mxf) = os.path.split(mx.name)
476 for n in mx.idxnames:
477 if not os.path.exists(os.path.join(mxd, n)):
478 log(('warning: index %s missing\n' +
479 ' used by %s\n') % (n, mxf))
487 midxl.sort(key=lambda ix:
488 (-len(ix), -xstat.stat(ix.name).st_mtime))
491 for sub in ix.idxnames:
492 found = d.get(os.path.join(self.dir, sub))
493 if not found or isinstance(found, PackIdx):
494 # doesn't exist, or exists but not in a midx
499 for name in ix.idxnames:
500 d[os.path.join(self.dir, name)] = ix
501 elif not ix.force_keep:
502 debug1('midx: removing redundant: %s\n'
503 % os.path.basename(ix.name))
506 for full in glob.glob(os.path.join(self.dir,'*.idx')):
510 except GitError as e:
514 bfull = os.path.join(self.dir, 'bup.bloom')
515 if self.bloom is None and os.path.exists(bfull):
516 self.bloom = bloom.ShaBloom(bfull)
517 self.packs = list(set(d.values()))
518 self.packs.sort(lambda x,y: -cmp(len(x),len(y)))
519 if self.bloom and self.bloom.valid() and len(self.bloom) >= len(self):
523 debug1('PackIdxList: using %d index%s.\n'
524 % (len(self.packs), len(self.packs)!=1 and 'es' or ''))
527 """Insert an additional object in the list."""
531 def open_idx(filename):
532 if filename.endswith('.idx'):
533 f = open(filename, 'rb')
535 if header[0:4] == '\377tOc':
536 version = struct.unpack('!I', header[4:8])[0]
538 return PackIdxV2(filename, f)
540 raise GitError('%s: expected idx file version 2, got %d'
541 % (filename, version))
542 elif len(header) == 8 and header[0:4] < '\377tOc':
543 return PackIdxV1(filename, f)
545 raise GitError('%s: unrecognized idx file header' % filename)
546 elif filename.endswith('.midx'):
547 return midx.PackMidx(filename)
549 raise GitError('idx filenames must end with .idx or .midx')
552 def idxmerge(idxlist, final_progress=True):
553 """Generate a list of all the objects reachable in a PackIdxList."""
554 def pfunc(count, total):
555 qprogress('Reading indexes: %.2f%% (%d/%d)\r'
556 % (count*100.0/total, count, total))
557 def pfinal(count, total):
559 progress('Reading indexes: %.2f%% (%d/%d), done.\n'
560 % (100, total, total))
561 return merge_iter(idxlist, 10024, pfunc, pfinal)
564 def _make_objcache():
565 return PackIdxList(repo('objects/pack'))
568 """Writes Git objects inside a pack file."""
569 def __init__(self, objcache_maker=_make_objcache, compression_level=1):
575 self.objcache_maker = objcache_maker
577 self.compression_level = compression_level
584 (fd,name) = tempfile.mkstemp(suffix='.pack', dir=repo('objects'))
586 self.file = os.fdopen(fd, 'w+b')
590 assert(name.endswith('.pack'))
591 self.filename = name[:-5]
592 self.file.write('PACK\0\0\0\2\0\0\0\0')
593 self.idx = list(list() for i in xrange(256))
595 def _raw_write(self, datalist, sha):
598 # in case we get interrupted (eg. KeyboardInterrupt), it's best if
599 # the file never has a *partial* blob. So let's make sure it's
600 # all-or-nothing. (The blob shouldn't be very big anyway, thanks
601 # to our hashsplit algorithm.) f.write() does its own buffering,
602 # but that's okay because we'll flush it in _end().
603 oneblob = ''.join(datalist)
607 raise GitError, e, sys.exc_info()[2]
609 crc = zlib.crc32(oneblob) & 0xffffffff
610 self._update_idx(sha, crc, nw)
615 def _update_idx(self, sha, crc, size):
618 self.idx[ord(sha[0])].append((sha, crc, self.file.tell() - size))
620 def _write(self, sha, type, content):
624 sha = calc_hash(type, content)
625 size, crc = self._raw_write(_encode_packobj(type, content,
626 self.compression_level),
628 if self.outbytes >= max_pack_size or self.count >= max_pack_objects:
632 def breakpoint(self):
633 """Clear byte and object counts and return the last processed id."""
635 self.outbytes = self.count = 0
638 def _require_objcache(self):
639 if self.objcache is None and self.objcache_maker:
640 self.objcache = self.objcache_maker()
641 if self.objcache is None:
643 "PackWriter not opened or can't check exists w/o objcache")
645 def exists(self, id, want_source=False):
646 """Return non-empty if an object is found in the object cache."""
647 self._require_objcache()
648 return self.objcache.exists(id, want_source=want_source)
650 def maybe_write(self, type, content):
651 """Write an object to the pack file if not present and return its id."""
652 sha = calc_hash(type, content)
653 if not self.exists(sha):
654 self._write(sha, type, content)
655 self._require_objcache()
656 self.objcache.add(sha)
659 def new_blob(self, blob):
660 """Create a blob object in the pack with the supplied content."""
661 return self.maybe_write('blob', blob)
663 def new_tree(self, shalist):
664 """Create a tree object in the pack."""
665 content = tree_encode(shalist)
666 return self.maybe_write('tree', content)
668 def _new_commit(self, tree, parent, author, adate, committer, cdate, msg):
670 if tree: l.append('tree %s' % tree.encode('hex'))
671 if parent: l.append('parent %s' % parent.encode('hex'))
672 if author: l.append('author %s %s' % (author, _git_date(adate)))
673 if committer: l.append('committer %s %s' % (committer, _git_date(cdate)))
676 return self.maybe_write('commit', '\n'.join(l))
678 def new_commit(self, parent, tree, date, msg):
679 """Create a commit object in the pack."""
680 userline = '%s <%s@%s>' % (userfullname(), username(), hostname())
681 commit = self._new_commit(tree, parent,
682 userline, date, userline, date,
687 """Remove the pack file from disk."""
693 os.unlink(self.filename + '.pack')
697 def _end(self, run_midx=True):
699 if not f: return None
706 # update object count
708 cp = struct.pack('!i', self.count)
712 # calculate the pack sha1sum
715 for b in chunkyreader(f):
717 packbin = sum.digest()
722 obj_list_sha = self._write_pack_idx_v2(self.filename + '.idx', idx, packbin)
724 nameprefix = repo('objects/pack/pack-%s' % obj_list_sha)
725 if os.path.exists(self.filename + '.map'):
726 os.unlink(self.filename + '.map')
727 os.rename(self.filename + '.pack', nameprefix + '.pack')
728 os.rename(self.filename + '.idx', nameprefix + '.idx')
731 auto_midx(repo('objects/pack'))
734 def close(self, run_midx=True):
735 """Close the pack file and move it to its definitive path."""
736 return self._end(run_midx=run_midx)
738 def _write_pack_idx_v2(self, filename, idx, packbin):
741 for entry in section:
742 if entry[2] >= 2**31:
745 # Length: header + fan-out + shas-and-crcs + overflow-offsets
746 index_len = 8 + (4 * 256) + (28 * self.count) + (8 * ofs64_count)
748 idx_f = open(filename, 'w+b')
750 idx_f.truncate(index_len)
751 idx_map = mmap_readwrite(idx_f, close=False)
753 count = _helpers.write_idx(filename, idx_map, idx, self.count)
754 assert(count == self.count)
760 idx_f = open(filename, 'a+b')
765 b = idx_f.read(8 + 4*256)
768 obj_list_sum = Sha1()
769 for b in chunkyreader(idx_f, 20*self.count):
771 obj_list_sum.update(b)
772 namebase = obj_list_sum.hexdigest()
774 for b in chunkyreader(idx_f):
776 idx_f.write(idx_sum.digest())
783 return '%d %s' % (date, utc_offset_str(date))
786 def _gitenv(repo_dir = None):
790 os.environ['GIT_DIR'] = os.path.abspath(repo_dir)
794 def list_refs(refname=None, repo_dir=None,
795 limit_to_heads=False, limit_to_tags=False):
796 """Yield (refname, hash) tuples for all repository refs unless a ref
797 name is specified. Given a ref name, only include tuples for that
798 particular ref. The limits restrict the result items to
799 refs/heads or refs/tags. If both limits are specified, items from
800 both sources will be included.
803 argv = ['git', 'show-ref']
805 argv.append('--heads')
807 argv.append('--tags')
811 p = subprocess.Popen(argv,
812 preexec_fn = _gitenv(repo_dir),
813 stdout = subprocess.PIPE)
814 out = p.stdout.read().strip()
815 rv = p.wait() # not fatal
819 for d in out.split('\n'):
820 (sha, name) = d.split(' ', 1)
821 yield (name, sha.decode('hex'))
824 def read_ref(refname, repo_dir = None):
825 """Get the commit id of the most recent commit made on a given ref."""
826 refs = list_refs(refname, repo_dir=repo_dir, limit_to_heads=True)
827 l = tuple(islice(refs, 2))
835 def rev_list(ref, count=None, repo_dir=None):
836 """Generate a list of reachable commits in reverse chronological order.
838 This generator walks through commits, from child to parent, that are
839 reachable via the specified ref and yields a series of tuples of the form
842 If count is a non-zero integer, limit the number of commits to "count"
845 assert(not ref.startswith('-'))
848 opts += ['-n', str(atoi(count))]
849 argv = ['git', 'rev-list', '--pretty=format:%at'] + opts + [ref, '--']
850 p = subprocess.Popen(argv,
851 preexec_fn = _gitenv(repo_dir),
852 stdout = subprocess.PIPE)
856 if s.startswith('commit '):
857 commit = s[7:].decode('hex')
861 rv = p.wait() # not fatal
863 raise GitError, 'git rev-list returned error %d' % rv
866 def get_commit_dates(refs, repo_dir=None):
867 """Get the dates for the specified commit refs. For now, every unique
868 string in refs must resolve to a different commit or this
869 function will fail."""
872 commit = get_commit_items(ref, cp(repo_dir))
873 result.append(commit.author_sec)
877 def rev_parse(committish, repo_dir=None):
878 """Resolve the full hash for 'committish', if it exists.
880 Should be roughly equivalent to 'git rev-parse'.
882 Returns the hex value of the hash if it is found, None if 'committish' does
883 not correspond to anything.
885 head = read_ref(committish, repo_dir=repo_dir)
887 debug2("resolved from ref: commit = %s\n" % head.encode('hex'))
890 pL = PackIdxList(repo('objects/pack', repo_dir=repo_dir))
892 if len(committish) == 40:
894 hash = committish.decode('hex')
904 def update_ref(refname, newval, oldval, repo_dir=None):
905 """Update a repository reference."""
908 assert(refname.startswith('refs/heads/') \
909 or refname.startswith('refs/tags/'))
910 p = subprocess.Popen(['git', 'update-ref', refname,
911 newval.encode('hex'), oldval.encode('hex')],
912 preexec_fn = _gitenv(repo_dir))
913 _git_wait('git update-ref', p)
916 def delete_ref(refname):
917 """Delete a repository reference."""
918 assert(refname.startswith('refs/'))
919 p = subprocess.Popen(['git', 'update-ref', '-d', refname],
920 preexec_fn = _gitenv())
921 _git_wait('git update-ref', p)
924 def guess_repo(path=None):
925 """Set the path value in the global variable "repodir".
926 This makes bup look for an existing bup repository, but not fail if a
927 repository doesn't exist. Usually, if you are interacting with a bup
928 repository, you would not be calling this function but using
935 repodir = os.environ.get('BUP_DIR')
937 repodir = os.path.expanduser('~/.bup')
940 def init_repo(path=None):
941 """Create the Git bare repository for bup in a given path."""
943 d = repo() # appends a / to the path
944 parent = os.path.dirname(os.path.dirname(d))
945 if parent and not os.path.exists(parent):
946 raise GitError('parent directory "%s" does not exist\n' % parent)
947 if os.path.exists(d) and not os.path.isdir(os.path.join(d, '.')):
948 raise GitError('"%s" exists but is not a directory\n' % d)
949 p = subprocess.Popen(['git', '--bare', 'init'], stdout=sys.stderr,
950 preexec_fn = _gitenv())
951 _git_wait('git init', p)
952 # Force the index version configuration in order to ensure bup works
953 # regardless of the version of the installed Git binary.
954 p = subprocess.Popen(['git', 'config', 'pack.indexVersion', '2'],
955 stdout=sys.stderr, preexec_fn = _gitenv())
956 _git_wait('git config', p)
958 p = subprocess.Popen(['git', 'config', 'core.logAllRefUpdates', 'true'],
959 stdout=sys.stderr, preexec_fn = _gitenv())
960 _git_wait('git config', p)
963 def check_repo_or_die(path=None):
964 """Make sure a bup repository exists, and abort if not.
965 If the path to a particular repository was not specified, this function
966 initializes the default repository automatically.
970 os.stat(repo('objects/pack/.'))
972 if e.errno == errno.ENOENT:
973 log('error: %r is not a bup repository; run "bup init"\n'
977 log('error: %s\n' % e)
983 """Get Git's version and ensure a usable version is installed.
985 The returned version is formatted as an ordered tuple with each position
986 representing a digit in the version tag. For example, the following tuple
987 would represent version 1.6.6.9:
993 p = subprocess.Popen(['git', '--version'],
994 stdout=subprocess.PIPE)
995 gvs = p.stdout.read()
996 _git_wait('git --version', p)
997 m = re.match(r'git version (\S+.\S+)', gvs)
999 raise GitError('git --version weird output: %r' % gvs)
1000 _ver = tuple(m.group(1).split('.'))
1001 needed = ('1','5', '3', '1')
1003 raise GitError('git version %s or higher is required; you have %s'
1004 % ('.'.join(needed), '.'.join(_ver)))
1008 def _git_wait(cmd, p):
1011 raise GitError('%s returned %d' % (cmd, rv))
1014 def _git_capture(argv):
1015 p = subprocess.Popen(argv, stdout=subprocess.PIPE, preexec_fn = _gitenv())
1017 _git_wait(repr(argv), p)
1021 class _AbortableIter:
1022 def __init__(self, it, onabort = None):
1024 self.onabort = onabort
1032 return self.it.next()
1033 except StopIteration as e:
1041 """Abort iteration and call the abortion callback, if needed."""
1053 """Link to 'git cat-file' that is used to retrieve blob data."""
1054 def __init__(self, repo_dir = None):
1056 self.repo_dir = repo_dir
1057 wanted = ('1','5','6')
1060 log('warning: git version < %s; bup will be slow.\n'
1063 self.get = self._slow_get
1065 self.p = self.inprogress = None
1066 self.get = self._fast_get
1070 self.p.stdout.close()
1071 self.p.stdin.close()
1073 self.inprogress = None
1077 self.p = subprocess.Popen(['git', 'cat-file', '--batch'],
1078 stdin=subprocess.PIPE,
1079 stdout=subprocess.PIPE,
1082 preexec_fn = _gitenv(self.repo_dir))
1084 def _fast_get(self, id):
1085 if not self.p or self.p.poll() != None:
1088 poll_result = self.p.poll()
1089 assert(poll_result == None)
1091 log('_fast_get: opening %r while %r is open\n'
1092 % (id, self.inprogress))
1093 assert(not self.inprogress)
1094 assert(id.find('\n') < 0)
1095 assert(id.find('\r') < 0)
1096 assert(not id.startswith('-'))
1097 self.inprogress = id
1098 self.p.stdin.write('%s\n' % id)
1099 self.p.stdin.flush()
1100 hdr = self.p.stdout.readline()
1101 if hdr.endswith(' missing\n'):
1102 self.inprogress = None
1103 raise KeyError('blob %r is missing' % id)
1104 spl = hdr.split(' ')
1105 if len(spl) != 3 or len(spl[0]) != 40:
1106 raise GitError('expected blob, got %r' % spl)
1107 (hex, type, size) = spl
1109 it = _AbortableIter(chunkyreader(self.p.stdout, int(spl[2])),
1110 onabort = self._abort)
1115 readline_result = self.p.stdout.readline()
1116 assert(readline_result == '\n')
1117 self.inprogress = None
1118 except Exception as e:
1122 def _slow_get(self, id):
1123 assert(id.find('\n') < 0)
1124 assert(id.find('\r') < 0)
1125 assert(id[0] != '-')
1126 type = _git_capture(['git', 'cat-file', '-t', id]).strip()
1129 p = subprocess.Popen(['git', 'cat-file', type, id],
1130 stdout=subprocess.PIPE,
1131 preexec_fn = _gitenv(self.repo_dir))
1132 for blob in chunkyreader(p.stdout):
1134 _git_wait('git cat-file', p)
1136 def _join(self, it):
1141 elif type == 'tree':
1142 treefile = ''.join(it)
1143 for (mode, name, sha) in tree_decode(treefile):
1144 for blob in self.join(sha.encode('hex')):
1146 elif type == 'commit':
1147 treeline = ''.join(it).split('\n')[0]
1148 assert(treeline.startswith('tree '))
1149 for blob in self.join(treeline[5:]):
1152 raise GitError('invalid object type %r: expected blob/tree/commit'
1156 """Generate a list of the content of all blobs that can be reached
1157 from an object. The hash given in 'id' must point to a blob, a tree
1158 or a commit. The content of all blobs that can be seen from trees or
1159 commits will be added to the list.
1162 for d in self._join(self.get(id)):
1164 except StopIteration:
1170 def cp(repo_dir=None):
1171 """Create a CatPipe object or reuse the already existing one."""
1175 repo_dir = os.path.abspath(repo_dir)
1176 cp = _cp.get(repo_dir)
1178 cp = CatPipe(repo_dir)
1183 def tags(repo_dir = None):
1184 """Return a dictionary of all tags in the form {hash: [tag_names, ...]}."""
1186 for n, c in list_refs(repo_dir = repo_dir, limit_to_tags=True):
1187 assert(n.startswith('refs/tags/'))
1191 tags[c].append(name) # more than one tag can point at 'c'