1 """Git interaction library.
2 bup repositories are in Git format. This library allows us to
3 interact with the Git data structures.
6 import errno, os, sys, zlib, time, subprocess, struct, stat, re, tempfile, glob
7 from collections import namedtuple
8 from itertools import islice
10 from bup import _helpers, path, midx, bloom, xstat
11 from bup.helpers import (Sha1, add_error, chunkyreader, debug1, debug2,
12 hostname, log, merge_iter, mmap_read, mmap_readwrite,
13 progress, qprogress, unlink, username, userfullname,
17 max_pack_size = 1000*1000*1000 # larger packs will slow down pruning
18 max_pack_objects = 200*1000 # cache memory usage is about 83 bytes per object
24 _typemap = { 'blob':3, 'tree':2, 'commit':1, 'tag':4 }
25 _typermap = { 3:'blob', 2:'tree', 1:'commit', 4:'tag' }
31 class GitError(Exception):
35 def parse_tz_offset(s):
36 """UTC offset in seconds."""
37 tz_off = (int(s[1:3]) * 60 * 60) + (int(s[3:5]) * 60)
43 # FIXME: derived from http://git.rsbx.net/Documents/Git_Data_Formats.txt
44 # Make sure that's authoritative.
45 _start_end_char = r'[^ .,:;<>"\'\0\n]'
46 _content_char = r'[^\0\n<>]'
47 _safe_str_rx = '(?:%s{1,2}|(?:%s%s*%s))' \
49 _start_end_char, _content_char, _start_end_char)
50 _tz_rx = r'[-+]\d\d[0-5]\d'
51 _parent_rx = r'(?:parent [abcdefABCDEF0123456789]{40}\n)'
52 _commit_rx = re.compile(r'''tree (?P<tree>[abcdefABCDEF0123456789]{40})
53 (?P<parents>%s*)author (?P<author_name>%s) <(?P<author_mail>%s)> (?P<asec>\d+) (?P<atz>%s)
54 committer (?P<committer_name>%s) <(?P<committer_mail>%s)> (?P<csec>\d+) (?P<ctz>%s)
56 (?P<message>(?:.|\n)*)''' % (_parent_rx,
57 _safe_str_rx, _safe_str_rx, _tz_rx,
58 _safe_str_rx, _safe_str_rx, _tz_rx))
59 _parent_hash_rx = re.compile(r'\s*parent ([abcdefABCDEF0123456789]{40})\s*')
62 # Note that the author_sec and committer_sec values are (UTC) epoch seconds.
63 CommitInfo = namedtuple('CommitInfo', ['tree', 'parents',
64 'author_name', 'author_mail',
65 'author_sec', 'author_offset',
66 'committer_name', 'committer_mail',
67 'committer_sec', 'committer_offset',
70 def parse_commit(content):
71 commit_match = re.match(_commit_rx, content)
73 raise Exception('cannot parse commit %r' % content)
74 matches = commit_match.groupdict()
75 return CommitInfo(tree=matches['tree'],
76 parents=re.findall(_parent_hash_rx, matches['parents']),
77 author_name=matches['author_name'],
78 author_mail=matches['author_mail'],
79 author_sec=int(matches['asec']),
80 author_offset=parse_tz_offset(matches['atz']),
81 committer_name=matches['committer_name'],
82 committer_mail=matches['committer_mail'],
83 committer_sec=int(matches['csec']),
84 committer_offset=parse_tz_offset(matches['ctz']),
85 message=matches['message'])
88 def get_commit_items(id, cp):
89 commit_it = cp.get(id)
90 assert(commit_it.next() == 'commit')
91 commit_content = ''.join(commit_it)
92 return parse_commit(commit_content)
95 def repo(sub = '', repo_dir=None):
96 """Get the path to the git repository or one of its subdirectories."""
98 repo_dir = repo_dir or repodir
100 raise GitError('You should call check_repo_or_die()')
102 # If there's a .git subdirectory, then the actual repo is in there.
103 gd = os.path.join(repo_dir, '.git')
104 if os.path.exists(gd):
107 return os.path.join(repo_dir, sub)
111 return re.sub(r'([^0-9a-z]|\b)([0-9a-z]{7})[0-9a-z]{33}([^0-9a-z]|\b)',
116 full = os.path.abspath(path)
117 fullrepo = os.path.abspath(repo(''))
118 if not fullrepo.endswith('/'):
120 if full.startswith(fullrepo):
121 path = full[len(fullrepo):]
122 if path.startswith('index-cache/'):
123 path = path[len('index-cache/'):]
124 return shorten_hash(path)
128 paths = [repo('objects/pack')]
129 paths += glob.glob(repo('index-cache/*/.'))
133 def auto_midx(objdir):
134 args = [path.exe(), 'midx', '--auto', '--dir', objdir]
136 rv = subprocess.call(args, stdout=open('/dev/null', 'w'))
138 # make sure 'args' gets printed to help with debugging
139 add_error('%r: exception: %s' % (args, e))
142 add_error('%r: returned %d' % (args, rv))
144 args = [path.exe(), 'bloom', '--dir', objdir]
146 rv = subprocess.call(args, stdout=open('/dev/null', 'w'))
148 # make sure 'args' gets printed to help with debugging
149 add_error('%r: exception: %s' % (args, e))
152 add_error('%r: returned %d' % (args, rv))
155 def mangle_name(name, mode, gitmode):
156 """Mangle a file name to present an abstract name for segmented files.
157 Mangled file names will have the ".bup" extension added to them. If a
158 file's name already ends with ".bup", a ".bupl" extension is added to
159 disambiguate normal files from segmented ones.
161 if stat.S_ISREG(mode) and not stat.S_ISREG(gitmode):
162 assert(stat.S_ISDIR(gitmode))
164 elif name.endswith('.bup') or name[:-1].endswith('.bup'):
165 return name + '.bupl'
170 (BUP_NORMAL, BUP_CHUNKED) = (0,1)
171 def demangle_name(name, mode):
172 """Remove name mangling from a file name, if necessary.
174 The return value is a tuple (demangled_filename,mode), where mode is one of
177 * BUP_NORMAL : files that should be read as-is from the repository
178 * BUP_CHUNKED : files that were chunked and need to be reassembled
180 For more information on the name mangling algorithm, see mangle_name()
182 if name.endswith('.bupl'):
183 return (name[:-5], BUP_NORMAL)
184 elif name.endswith('.bup'):
185 return (name[:-4], BUP_CHUNKED)
186 elif name.endswith('.bupm'):
188 BUP_CHUNKED if stat.S_ISDIR(mode) else BUP_NORMAL)
190 return (name, BUP_NORMAL)
193 def calc_hash(type, content):
194 """Calculate some content's hash in the Git fashion."""
195 header = '%s %d\0' % (type, len(content))
201 def shalist_item_sort_key(ent):
202 (mode, name, id) = ent
203 assert(mode+0 == mode)
204 if stat.S_ISDIR(mode):
210 def tree_encode(shalist):
211 """Generate a git tree object from (mode,name,hash) tuples."""
212 shalist = sorted(shalist, key = shalist_item_sort_key)
214 for (mode,name,bin) in shalist:
216 assert(mode+0 == mode)
218 assert(len(bin) == 20)
219 s = '%o %s\0%s' % (mode,name,bin)
220 assert(s[0] != '0') # 0-padded octal is not acceptable in a git tree
225 def tree_decode(buf):
226 """Generate a list of (mode,name,hash) from the git tree object in buf."""
228 while ofs < len(buf):
229 z = buf.find('\0', ofs)
231 spl = buf[ofs:z].split(' ', 1)
232 assert(len(spl) == 2)
234 sha = buf[z+1:z+1+20]
236 yield (int(mode, 8), name, sha)
239 def _encode_packobj(type, content, compression_level=1):
242 szbits = (sz & 0x0f) | (_typemap[type]<<4)
245 if sz: szbits |= 0x80
251 if compression_level > 9:
252 compression_level = 9
253 elif compression_level < 0:
254 compression_level = 0
255 z = zlib.compressobj(compression_level)
257 yield z.compress(content)
261 def _encode_looseobj(type, content, compression_level=1):
262 z = zlib.compressobj(compression_level)
263 yield z.compress('%s %d\0' % (type, len(content)))
264 yield z.compress(content)
268 def _decode_looseobj(buf):
270 s = zlib.decompress(buf)
277 assert(type in _typemap)
278 assert(sz == len(content))
279 return (type, content)
282 def _decode_packobj(buf):
285 type = _typermap[(c & 0x70) >> 4]
292 sz |= (c & 0x7f) << shift
296 return (type, zlib.decompress(buf[i+1:]))
303 def find_offset(self, hash):
304 """Get the offset of an object inside the index file."""
305 idx = self._idx_from_hash(hash)
307 return self._ofs_from_idx(idx)
310 def exists(self, hash, want_source=False):
311 """Return nonempty if the object exists in this index."""
312 if hash and (self._idx_from_hash(hash) != None):
313 return want_source and os.path.basename(self.name) or True
317 return int(self.fanout[255])
319 def _idx_from_hash(self, hash):
320 global _total_searches, _total_steps
322 assert(len(hash) == 20)
324 start = self.fanout[b1-1] # range -1..254
325 end = self.fanout[b1] # range 0..255
327 _total_steps += 1 # lookup table is a step
330 mid = start + (end-start)/2
331 v = self._idx_to_hash(mid)
341 class PackIdxV1(PackIdx):
342 """Object representation of a Git pack index (version 1) file."""
343 def __init__(self, filename, f):
345 self.idxnames = [self.name]
346 self.map = mmap_read(f)
347 self.fanout = list(struct.unpack('!256I',
348 str(buffer(self.map, 0, 256*4))))
349 self.fanout.append(0) # entry "-1"
350 nsha = self.fanout[255]
352 self.shatable = buffer(self.map, self.sha_ofs, nsha*24)
354 def _ofs_from_idx(self, idx):
355 return struct.unpack('!I', str(self.shatable[idx*24 : idx*24+4]))[0]
357 def _idx_to_hash(self, idx):
358 return str(self.shatable[idx*24+4 : idx*24+24])
361 for i in xrange(self.fanout[255]):
362 yield buffer(self.map, 256*4 + 24*i + 4, 20)
365 class PackIdxV2(PackIdx):
366 """Object representation of a Git pack index (version 2) file."""
367 def __init__(self, filename, f):
369 self.idxnames = [self.name]
370 self.map = mmap_read(f)
371 assert(str(self.map[0:8]) == '\377tOc\0\0\0\2')
372 self.fanout = list(struct.unpack('!256I',
373 str(buffer(self.map, 8, 256*4))))
374 self.fanout.append(0) # entry "-1"
375 nsha = self.fanout[255]
376 self.sha_ofs = 8 + 256*4
377 self.shatable = buffer(self.map, self.sha_ofs, nsha*20)
378 self.ofstable = buffer(self.map,
379 self.sha_ofs + nsha*20 + nsha*4,
381 self.ofs64table = buffer(self.map,
382 8 + 256*4 + nsha*20 + nsha*4 + nsha*4)
384 def _ofs_from_idx(self, idx):
385 ofs = struct.unpack('!I', str(buffer(self.ofstable, idx*4, 4)))[0]
387 idx64 = ofs & 0x7fffffff
388 ofs = struct.unpack('!Q',
389 str(buffer(self.ofs64table, idx64*8, 8)))[0]
392 def _idx_to_hash(self, idx):
393 return str(self.shatable[idx*20:(idx+1)*20])
396 for i in xrange(self.fanout[255]):
397 yield buffer(self.map, 8 + 256*4 + 20*i, 20)
402 def __init__(self, dir):
404 assert(_mpi_count == 0) # these things suck tons of VM; don't waste it
409 self.do_bloom = False
416 assert(_mpi_count == 0)
419 return iter(idxmerge(self.packs))
422 return sum(len(pack) for pack in self.packs)
424 def exists(self, hash, want_source=False):
425 """Return nonempty if the object exists in the index files."""
426 global _total_searches
428 if hash in self.also:
430 if self.do_bloom and self.bloom:
431 if self.bloom.exists(hash):
432 self.do_bloom = False
434 _total_searches -= 1 # was counted by bloom
436 for i in xrange(len(self.packs)):
438 _total_searches -= 1 # will be incremented by sub-pack
439 ix = p.exists(hash, want_source=want_source)
441 # reorder so most recently used packs are searched first
442 self.packs = [p] + self.packs[:i] + self.packs[i+1:]
447 def refresh(self, skip_midx = False):
448 """Refresh the index list.
449 This method verifies if .midx files were superseded (e.g. all of its
450 contents are in another, bigger .midx file) and removes the superseded
453 If skip_midx is True, all work on .midx files will be skipped and .midx
454 files will be removed from the list.
456 The module-global variable 'ignore_midx' can force this function to
457 always act as if skip_midx was True.
459 self.bloom = None # Always reopen the bloom as it may have been relaced
460 self.do_bloom = False
461 skip_midx = skip_midx or ignore_midx
462 d = dict((p.name, p) for p in self.packs
463 if not skip_midx or not isinstance(p, midx.PackMidx))
464 if os.path.exists(self.dir):
467 for ix in self.packs:
468 if isinstance(ix, midx.PackMidx):
469 for name in ix.idxnames:
470 d[os.path.join(self.dir, name)] = ix
471 for full in glob.glob(os.path.join(self.dir,'*.midx')):
473 mx = midx.PackMidx(full)
474 (mxd, mxf) = os.path.split(mx.name)
476 for n in mx.idxnames:
477 if not os.path.exists(os.path.join(mxd, n)):
478 log(('warning: index %s missing\n' +
479 ' used by %s\n') % (n, mxf))
487 midxl.sort(key=lambda ix:
488 (-len(ix), -xstat.stat(ix.name).st_mtime))
491 for sub in ix.idxnames:
492 found = d.get(os.path.join(self.dir, sub))
493 if not found or isinstance(found, PackIdx):
494 # doesn't exist, or exists but not in a midx
499 for name in ix.idxnames:
500 d[os.path.join(self.dir, name)] = ix
501 elif not ix.force_keep:
502 debug1('midx: removing redundant: %s\n'
503 % os.path.basename(ix.name))
506 for full in glob.glob(os.path.join(self.dir,'*.idx')):
510 except GitError as e:
514 bfull = os.path.join(self.dir, 'bup.bloom')
515 if self.bloom is None and os.path.exists(bfull):
516 self.bloom = bloom.ShaBloom(bfull)
517 self.packs = list(set(d.values()))
518 self.packs.sort(lambda x,y: -cmp(len(x),len(y)))
519 if self.bloom and self.bloom.valid() and len(self.bloom) >= len(self):
523 debug1('PackIdxList: using %d index%s.\n'
524 % (len(self.packs), len(self.packs)!=1 and 'es' or ''))
527 """Insert an additional object in the list."""
531 def open_idx(filename):
532 if filename.endswith('.idx'):
533 f = open(filename, 'rb')
535 if header[0:4] == '\377tOc':
536 version = struct.unpack('!I', header[4:8])[0]
538 return PackIdxV2(filename, f)
540 raise GitError('%s: expected idx file version 2, got %d'
541 % (filename, version))
542 elif len(header) == 8 and header[0:4] < '\377tOc':
543 return PackIdxV1(filename, f)
545 raise GitError('%s: unrecognized idx file header' % filename)
546 elif filename.endswith('.midx'):
547 return midx.PackMidx(filename)
549 raise GitError('idx filenames must end with .idx or .midx')
552 def idxmerge(idxlist, final_progress=True):
553 """Generate a list of all the objects reachable in a PackIdxList."""
554 def pfunc(count, total):
555 qprogress('Reading indexes: %.2f%% (%d/%d)\r'
556 % (count*100.0/total, count, total))
557 def pfinal(count, total):
559 progress('Reading indexes: %.2f%% (%d/%d), done.\n'
560 % (100, total, total))
561 return merge_iter(idxlist, 10024, pfunc, pfinal)
564 def _make_objcache():
565 return PackIdxList(repo('objects/pack'))
568 """Writes Git objects inside a pack file."""
569 def __init__(self, objcache_maker=_make_objcache, compression_level=1):
575 self.objcache_maker = objcache_maker
577 self.compression_level = compression_level
584 (fd,name) = tempfile.mkstemp(suffix='.pack', dir=repo('objects'))
585 self.file = os.fdopen(fd, 'w+b')
586 assert(name.endswith('.pack'))
587 self.filename = name[:-5]
588 self.file.write('PACK\0\0\0\2\0\0\0\0')
589 self.idx = list(list() for i in xrange(256))
591 def _raw_write(self, datalist, sha):
594 # in case we get interrupted (eg. KeyboardInterrupt), it's best if
595 # the file never has a *partial* blob. So let's make sure it's
596 # all-or-nothing. (The blob shouldn't be very big anyway, thanks
597 # to our hashsplit algorithm.) f.write() does its own buffering,
598 # but that's okay because we'll flush it in _end().
599 oneblob = ''.join(datalist)
603 raise GitError, e, sys.exc_info()[2]
605 crc = zlib.crc32(oneblob) & 0xffffffff
606 self._update_idx(sha, crc, nw)
611 def _update_idx(self, sha, crc, size):
614 self.idx[ord(sha[0])].append((sha, crc, self.file.tell() - size))
616 def _write(self, sha, type, content):
620 sha = calc_hash(type, content)
621 size, crc = self._raw_write(_encode_packobj(type, content,
622 self.compression_level),
624 if self.outbytes >= max_pack_size or self.count >= max_pack_objects:
628 def breakpoint(self):
629 """Clear byte and object counts and return the last processed id."""
631 self.outbytes = self.count = 0
634 def _require_objcache(self):
635 if self.objcache is None and self.objcache_maker:
636 self.objcache = self.objcache_maker()
637 if self.objcache is None:
639 "PackWriter not opened or can't check exists w/o objcache")
641 def exists(self, id, want_source=False):
642 """Return non-empty if an object is found in the object cache."""
643 self._require_objcache()
644 return self.objcache.exists(id, want_source=want_source)
646 def maybe_write(self, type, content):
647 """Write an object to the pack file if not present and return its id."""
648 sha = calc_hash(type, content)
649 if not self.exists(sha):
650 self._write(sha, type, content)
651 self._require_objcache()
652 self.objcache.add(sha)
655 def new_blob(self, blob):
656 """Create a blob object in the pack with the supplied content."""
657 return self.maybe_write('blob', blob)
659 def new_tree(self, shalist):
660 """Create a tree object in the pack."""
661 content = tree_encode(shalist)
662 return self.maybe_write('tree', content)
664 def _new_commit(self, tree, parent, author, adate, committer, cdate, msg):
666 if tree: l.append('tree %s' % tree.encode('hex'))
667 if parent: l.append('parent %s' % parent.encode('hex'))
668 if author: l.append('author %s %s' % (author, _git_date(adate)))
669 if committer: l.append('committer %s %s' % (committer, _git_date(cdate)))
672 return self.maybe_write('commit', '\n'.join(l))
674 def new_commit(self, parent, tree, date, msg):
675 """Create a commit object in the pack."""
676 userline = '%s <%s@%s>' % (userfullname(), username(), hostname())
677 commit = self._new_commit(tree, parent,
678 userline, date, userline, date,
683 """Remove the pack file from disk."""
689 os.unlink(self.filename + '.pack')
691 def _end(self, run_midx=True):
693 if not f: return None
699 # update object count
701 cp = struct.pack('!i', self.count)
705 # calculate the pack sha1sum
708 for b in chunkyreader(f):
710 packbin = sum.digest()
714 obj_list_sha = self._write_pack_idx_v2(self.filename + '.idx', idx, packbin)
716 nameprefix = repo('objects/pack/pack-%s' % obj_list_sha)
717 if os.path.exists(self.filename + '.map'):
718 os.unlink(self.filename + '.map')
719 os.rename(self.filename + '.pack', nameprefix + '.pack')
720 os.rename(self.filename + '.idx', nameprefix + '.idx')
723 auto_midx(repo('objects/pack'))
726 def close(self, run_midx=True):
727 """Close the pack file and move it to its definitive path."""
728 return self._end(run_midx=run_midx)
730 def _write_pack_idx_v2(self, filename, idx, packbin):
733 for entry in section:
734 if entry[2] >= 2**31:
737 # Length: header + fan-out + shas-and-crcs + overflow-offsets
738 index_len = 8 + (4 * 256) + (28 * self.count) + (8 * ofs64_count)
740 idx_f = open(filename, 'w+b')
742 idx_f.truncate(index_len)
743 idx_map = mmap_readwrite(idx_f, close=False)
744 count = _helpers.write_idx(filename, idx_map, idx, self.count)
745 assert(count == self.count)
747 if idx_map: idx_map.close()
750 idx_f = open(filename, 'a+b')
755 b = idx_f.read(8 + 4*256)
758 obj_list_sum = Sha1()
759 for b in chunkyreader(idx_f, 20*self.count):
761 obj_list_sum.update(b)
762 namebase = obj_list_sum.hexdigest()
764 for b in chunkyreader(idx_f):
766 idx_f.write(idx_sum.digest())
773 return '%d %s' % (date, utc_offset_str(date))
776 def _gitenv(repo_dir = None):
780 os.environ['GIT_DIR'] = os.path.abspath(repo_dir)
784 def list_refs(refname=None, repo_dir=None,
785 limit_to_heads=False, limit_to_tags=False):
786 """Yield (refname, hash) tuples for all repository refs unless a ref
787 name is specified. Given a ref name, only include tuples for that
788 particular ref. The limits restrict the result items to
789 refs/heads or refs/tags. If both limits are specified, items from
790 both sources will be included.
793 argv = ['git', 'show-ref']
795 argv.append('--heads')
797 argv.append('--tags')
801 p = subprocess.Popen(argv,
802 preexec_fn = _gitenv(repo_dir),
803 stdout = subprocess.PIPE)
804 out = p.stdout.read().strip()
805 rv = p.wait() # not fatal
809 for d in out.split('\n'):
810 (sha, name) = d.split(' ', 1)
811 yield (name, sha.decode('hex'))
814 def read_ref(refname, repo_dir = None):
815 """Get the commit id of the most recent commit made on a given ref."""
816 refs = list_refs(refname, repo_dir=repo_dir, limit_to_heads=True)
817 l = tuple(islice(refs, 2))
825 def rev_list(ref, count=None, repo_dir=None):
826 """Generate a list of reachable commits in reverse chronological order.
828 This generator walks through commits, from child to parent, that are
829 reachable via the specified ref and yields a series of tuples of the form
832 If count is a non-zero integer, limit the number of commits to "count"
835 assert(not ref.startswith('-'))
838 opts += ['-n', str(atoi(count))]
839 argv = ['git', 'rev-list', '--pretty=format:%at'] + opts + [ref, '--']
840 p = subprocess.Popen(argv,
841 preexec_fn = _gitenv(repo_dir),
842 stdout = subprocess.PIPE)
846 if s.startswith('commit '):
847 commit = s[7:].decode('hex')
851 rv = p.wait() # not fatal
853 raise GitError, 'git rev-list returned error %d' % rv
856 def get_commit_dates(refs, repo_dir=None):
857 """Get the dates for the specified commit refs. For now, every unique
858 string in refs must resolve to a different commit or this
859 function will fail."""
862 commit = get_commit_items(ref, cp(repo_dir))
863 result.append(commit.author_sec)
867 def rev_parse(committish, repo_dir=None):
868 """Resolve the full hash for 'committish', if it exists.
870 Should be roughly equivalent to 'git rev-parse'.
872 Returns the hex value of the hash if it is found, None if 'committish' does
873 not correspond to anything.
875 head = read_ref(committish, repo_dir=repo_dir)
877 debug2("resolved from ref: commit = %s\n" % head.encode('hex'))
880 pL = PackIdxList(repo('objects/pack', repo_dir=repo_dir))
882 if len(committish) == 40:
884 hash = committish.decode('hex')
894 def update_ref(refname, newval, oldval, repo_dir=None):
895 """Update a repository reference."""
898 assert(refname.startswith('refs/heads/') \
899 or refname.startswith('refs/tags/'))
900 p = subprocess.Popen(['git', 'update-ref', refname,
901 newval.encode('hex'), oldval.encode('hex')],
902 preexec_fn = _gitenv(repo_dir))
903 _git_wait('git update-ref', p)
906 def delete_ref(refname):
907 """Delete a repository reference."""
908 assert(refname.startswith('refs/'))
909 p = subprocess.Popen(['git', 'update-ref', '-d', refname],
910 preexec_fn = _gitenv())
911 _git_wait('git update-ref', p)
914 def guess_repo(path=None):
915 """Set the path value in the global variable "repodir".
916 This makes bup look for an existing bup repository, but not fail if a
917 repository doesn't exist. Usually, if you are interacting with a bup
918 repository, you would not be calling this function but using
925 repodir = os.environ.get('BUP_DIR')
927 repodir = os.path.expanduser('~/.bup')
930 def init_repo(path=None):
931 """Create the Git bare repository for bup in a given path."""
933 d = repo() # appends a / to the path
934 parent = os.path.dirname(os.path.dirname(d))
935 if parent and not os.path.exists(parent):
936 raise GitError('parent directory "%s" does not exist\n' % parent)
937 if os.path.exists(d) and not os.path.isdir(os.path.join(d, '.')):
938 raise GitError('"%s" exists but is not a directory\n' % d)
939 p = subprocess.Popen(['git', '--bare', 'init'], stdout=sys.stderr,
940 preexec_fn = _gitenv())
941 _git_wait('git init', p)
942 # Force the index version configuration in order to ensure bup works
943 # regardless of the version of the installed Git binary.
944 p = subprocess.Popen(['git', 'config', 'pack.indexVersion', '2'],
945 stdout=sys.stderr, preexec_fn = _gitenv())
946 _git_wait('git config', p)
948 p = subprocess.Popen(['git', 'config', 'core.logAllRefUpdates', 'true'],
949 stdout=sys.stderr, preexec_fn = _gitenv())
950 _git_wait('git config', p)
953 def check_repo_or_die(path=None):
954 """Make sure a bup repository exists, and abort if not.
955 If the path to a particular repository was not specified, this function
956 initializes the default repository automatically.
960 os.stat(repo('objects/pack/.'))
962 if e.errno == errno.ENOENT:
963 log('error: %r is not a bup repository; run "bup init"\n'
967 log('error: %s\n' % e)
973 """Get Git's version and ensure a usable version is installed.
975 The returned version is formatted as an ordered tuple with each position
976 representing a digit in the version tag. For example, the following tuple
977 would represent version 1.6.6.9:
983 p = subprocess.Popen(['git', '--version'],
984 stdout=subprocess.PIPE)
985 gvs = p.stdout.read()
986 _git_wait('git --version', p)
987 m = re.match(r'git version (\S+.\S+)', gvs)
989 raise GitError('git --version weird output: %r' % gvs)
990 _ver = tuple(m.group(1).split('.'))
991 needed = ('1','5', '3', '1')
993 raise GitError('git version %s or higher is required; you have %s'
994 % ('.'.join(needed), '.'.join(_ver)))
998 def _git_wait(cmd, p):
1001 raise GitError('%s returned %d' % (cmd, rv))
1004 def _git_capture(argv):
1005 p = subprocess.Popen(argv, stdout=subprocess.PIPE, preexec_fn = _gitenv())
1007 _git_wait(repr(argv), p)
1011 class _AbortableIter:
1012 def __init__(self, it, onabort = None):
1014 self.onabort = onabort
1022 return self.it.next()
1023 except StopIteration as e:
1031 """Abort iteration and call the abortion callback, if needed."""
1043 """Link to 'git cat-file' that is used to retrieve blob data."""
1044 def __init__(self, repo_dir = None):
1046 self.repo_dir = repo_dir
1047 wanted = ('1','5','6')
1050 log('warning: git version < %s; bup will be slow.\n'
1053 self.get = self._slow_get
1055 self.p = self.inprogress = None
1056 self.get = self._fast_get
1060 self.p.stdout.close()
1061 self.p.stdin.close()
1063 self.inprogress = None
1067 self.p = subprocess.Popen(['git', 'cat-file', '--batch'],
1068 stdin=subprocess.PIPE,
1069 stdout=subprocess.PIPE,
1072 preexec_fn = _gitenv(self.repo_dir))
1074 def _fast_get(self, id):
1075 if not self.p or self.p.poll() != None:
1078 poll_result = self.p.poll()
1079 assert(poll_result == None)
1081 log('_fast_get: opening %r while %r is open\n'
1082 % (id, self.inprogress))
1083 assert(not self.inprogress)
1084 assert(id.find('\n') < 0)
1085 assert(id.find('\r') < 0)
1086 assert(not id.startswith('-'))
1087 self.inprogress = id
1088 self.p.stdin.write('%s\n' % id)
1089 self.p.stdin.flush()
1090 hdr = self.p.stdout.readline()
1091 if hdr.endswith(' missing\n'):
1092 self.inprogress = None
1093 raise KeyError('blob %r is missing' % id)
1094 spl = hdr.split(' ')
1095 if len(spl) != 3 or len(spl[0]) != 40:
1096 raise GitError('expected blob, got %r' % spl)
1097 (hex, type, size) = spl
1099 it = _AbortableIter(chunkyreader(self.p.stdout, int(spl[2])),
1100 onabort = self._abort)
1105 readline_result = self.p.stdout.readline()
1106 assert(readline_result == '\n')
1107 self.inprogress = None
1108 except Exception as e:
1112 def _slow_get(self, id):
1113 assert(id.find('\n') < 0)
1114 assert(id.find('\r') < 0)
1115 assert(id[0] != '-')
1116 type = _git_capture(['git', 'cat-file', '-t', id]).strip()
1119 p = subprocess.Popen(['git', 'cat-file', type, id],
1120 stdout=subprocess.PIPE,
1121 preexec_fn = _gitenv(self.repo_dir))
1122 for blob in chunkyreader(p.stdout):
1124 _git_wait('git cat-file', p)
1126 def _join(self, it):
1131 elif type == 'tree':
1132 treefile = ''.join(it)
1133 for (mode, name, sha) in tree_decode(treefile):
1134 for blob in self.join(sha.encode('hex')):
1136 elif type == 'commit':
1137 treeline = ''.join(it).split('\n')[0]
1138 assert(treeline.startswith('tree '))
1139 for blob in self.join(treeline[5:]):
1142 raise GitError('invalid object type %r: expected blob/tree/commit'
1146 """Generate a list of the content of all blobs that can be reached
1147 from an object. The hash given in 'id' must point to a blob, a tree
1148 or a commit. The content of all blobs that can be seen from trees or
1149 commits will be added to the list.
1152 for d in self._join(self.get(id)):
1154 except StopIteration:
1160 def cp(repo_dir=None):
1161 """Create a CatPipe object or reuse the already existing one."""
1165 repo_dir = os.path.abspath(repo_dir)
1166 cp = _cp.get(repo_dir)
1168 cp = CatPipe(repo_dir)
1173 def tags(repo_dir = None):
1174 """Return a dictionary of all tags in the form {hash: [tag_names, ...]}."""
1176 for n, c in list_refs(repo_dir = repo_dir, limit_to_tags=True):
1177 assert(n.startswith('refs/tags/'))
1181 tags[c].append(name) # more than one tag can point at 'c'