1 """Git interaction library.
2 bup repositories are in Git format. This library allows us to
3 interact with the Git data structures.
6 import errno, os, sys, zlib, time, subprocess, struct, stat, re, tempfile, glob
7 from collections import namedtuple
8 from itertools import islice
10 from bup import _helpers, path, midx, bloom, xstat
11 from bup.helpers import (Sha1, add_error, chunkyreader, debug1, debug2,
12 hostname, log, merge_iter, mmap_read, mmap_readwrite,
13 progress, qprogress, unlink, username, userfullname,
17 max_pack_size = 1000*1000*1000 # larger packs will slow down pruning
18 max_pack_objects = 200*1000 # cache memory usage is about 83 bytes per object
24 _typemap = { 'blob':3, 'tree':2, 'commit':1, 'tag':4 }
25 _typermap = { 3:'blob', 2:'tree', 1:'commit', 4:'tag' }
31 class GitError(Exception):
35 def parse_tz_offset(s):
36 """UTC offset in seconds."""
37 tz_off = (int(s[1:3]) * 60 * 60) + (int(s[3:5]) * 60)
43 # FIXME: derived from http://git.rsbx.net/Documents/Git_Data_Formats.txt
44 # Make sure that's authoritative.
45 _start_end_char = r'[^ .,:;<>"\'\0\n]'
46 _content_char = r'[^\0\n<>]'
47 _safe_str_rx = '(?:%s{1,2}|(?:%s%s*%s))' \
49 _start_end_char, _content_char, _start_end_char)
50 _tz_rx = r'[-+]\d\d[0-5]\d'
51 _parent_rx = r'(?:parent [abcdefABCDEF0123456789]{40}\n)'
52 _commit_rx = re.compile(r'''tree (?P<tree>[abcdefABCDEF0123456789]{40})
53 (?P<parents>%s*)author (?P<author_name>%s) <(?P<author_mail>%s)> (?P<asec>\d+) (?P<atz>%s)
54 committer (?P<committer_name>%s) <(?P<committer_mail>%s)> (?P<csec>\d+) (?P<ctz>%s)
56 (?P<message>(?:.|\n)*)''' % (_parent_rx,
57 _safe_str_rx, _safe_str_rx, _tz_rx,
58 _safe_str_rx, _safe_str_rx, _tz_rx))
59 _parent_hash_rx = re.compile(r'\s*parent ([abcdefABCDEF0123456789]{40})\s*')
62 # Note that the author_sec and committer_sec values are (UTC) epoch seconds.
63 CommitInfo = namedtuple('CommitInfo', ['tree', 'parents',
64 'author_name', 'author_mail',
65 'author_sec', 'author_offset',
66 'committer_name', 'committer_mail',
67 'committer_sec', 'committer_offset',
70 def parse_commit(content):
71 commit_match = re.match(_commit_rx, content)
73 raise Exception('cannot parse commit %r' % content)
74 matches = commit_match.groupdict()
75 return CommitInfo(tree=matches['tree'],
76 parents=re.findall(_parent_hash_rx, matches['parents']),
77 author_name=matches['author_name'],
78 author_mail=matches['author_mail'],
79 author_sec=int(matches['asec']),
80 author_offset=parse_tz_offset(matches['atz']),
81 committer_name=matches['committer_name'],
82 committer_mail=matches['committer_mail'],
83 committer_sec=int(matches['csec']),
84 committer_offset=parse_tz_offset(matches['ctz']),
85 message=matches['message'])
88 def get_commit_items(id, cp):
89 commit_it = cp.get(id)
90 assert(commit_it.next() == 'commit')
91 commit_content = ''.join(commit_it)
92 return parse_commit(commit_content)
95 def repo(sub = '', repo_dir=None):
96 """Get the path to the git repository or one of its subdirectories."""
98 repo_dir = repo_dir or repodir
100 raise GitError('You should call check_repo_or_die()')
102 # If there's a .git subdirectory, then the actual repo is in there.
103 gd = os.path.join(repo_dir, '.git')
104 if os.path.exists(gd):
107 return os.path.join(repo_dir, sub)
111 return re.sub(r'([^0-9a-z]|\b)([0-9a-z]{7})[0-9a-z]{33}([^0-9a-z]|\b)',
116 full = os.path.abspath(path)
117 fullrepo = os.path.abspath(repo(''))
118 if not fullrepo.endswith('/'):
120 if full.startswith(fullrepo):
121 path = full[len(fullrepo):]
122 if path.startswith('index-cache/'):
123 path = path[len('index-cache/'):]
124 return shorten_hash(path)
128 paths = [repo('objects/pack')]
129 paths += glob.glob(repo('index-cache/*/.'))
133 def auto_midx(objdir):
134 args = [path.exe(), 'midx', '--auto', '--dir', objdir]
136 rv = subprocess.call(args, stdout=open('/dev/null', 'w'))
138 # make sure 'args' gets printed to help with debugging
139 add_error('%r: exception: %s' % (args, e))
142 add_error('%r: returned %d' % (args, rv))
144 args = [path.exe(), 'bloom', '--dir', objdir]
146 rv = subprocess.call(args, stdout=open('/dev/null', 'w'))
148 # make sure 'args' gets printed to help with debugging
149 add_error('%r: exception: %s' % (args, e))
152 add_error('%r: returned %d' % (args, rv))
155 def mangle_name(name, mode, gitmode):
156 """Mangle a file name to present an abstract name for segmented files.
157 Mangled file names will have the ".bup" extension added to them. If a
158 file's name already ends with ".bup", a ".bupl" extension is added to
159 disambiguate normal files from segmented ones.
161 if stat.S_ISREG(mode) and not stat.S_ISREG(gitmode):
162 assert(stat.S_ISDIR(gitmode))
164 elif name.endswith('.bup') or name[:-1].endswith('.bup'):
165 return name + '.bupl'
170 (BUP_NORMAL, BUP_CHUNKED) = (0,1)
171 def demangle_name(name, mode):
172 """Remove name mangling from a file name, if necessary.
174 The return value is a tuple (demangled_filename,mode), where mode is one of
177 * BUP_NORMAL : files that should be read as-is from the repository
178 * BUP_CHUNKED : files that were chunked and need to be reassembled
180 For more information on the name mangling algorithm, see mangle_name()
182 if name.endswith('.bupl'):
183 return (name[:-5], BUP_NORMAL)
184 elif name.endswith('.bup'):
185 return (name[:-4], BUP_CHUNKED)
186 elif name.endswith('.bupm'):
188 BUP_CHUNKED if stat.S_ISDIR(mode) else BUP_NORMAL)
190 return (name, BUP_NORMAL)
193 def calc_hash(type, content):
194 """Calculate some content's hash in the Git fashion."""
195 header = '%s %d\0' % (type, len(content))
201 def shalist_item_sort_key(ent):
202 (mode, name, id) = ent
203 assert(mode+0 == mode)
204 if stat.S_ISDIR(mode):
210 def tree_encode(shalist):
211 """Generate a git tree object from (mode,name,hash) tuples."""
212 shalist = sorted(shalist, key = shalist_item_sort_key)
214 for (mode,name,bin) in shalist:
216 assert(mode+0 == mode)
218 assert(len(bin) == 20)
219 s = '%o %s\0%s' % (mode,name,bin)
220 assert(s[0] != '0') # 0-padded octal is not acceptable in a git tree
225 def tree_decode(buf):
226 """Generate a list of (mode,name,hash) from the git tree object in buf."""
228 while ofs < len(buf):
229 z = buf.find('\0', ofs)
231 spl = buf[ofs:z].split(' ', 1)
232 assert(len(spl) == 2)
234 sha = buf[z+1:z+1+20]
236 yield (int(mode, 8), name, sha)
239 def _encode_packobj(type, content, compression_level=1):
242 szbits = (sz & 0x0f) | (_typemap[type]<<4)
245 if sz: szbits |= 0x80
251 if compression_level > 9:
252 compression_level = 9
253 elif compression_level < 0:
254 compression_level = 0
255 z = zlib.compressobj(compression_level)
257 yield z.compress(content)
261 def _encode_looseobj(type, content, compression_level=1):
262 z = zlib.compressobj(compression_level)
263 yield z.compress('%s %d\0' % (type, len(content)))
264 yield z.compress(content)
268 def _decode_looseobj(buf):
270 s = zlib.decompress(buf)
277 assert(type in _typemap)
278 assert(sz == len(content))
279 return (type, content)
282 def _decode_packobj(buf):
285 type = _typermap[(c & 0x70) >> 4]
292 sz |= (c & 0x7f) << shift
296 return (type, zlib.decompress(buf[i+1:]))
303 def find_offset(self, hash):
304 """Get the offset of an object inside the index file."""
305 idx = self._idx_from_hash(hash)
307 return self._ofs_from_idx(idx)
310 def exists(self, hash, want_source=False):
311 """Return nonempty if the object exists in this index."""
312 if hash and (self._idx_from_hash(hash) != None):
313 return want_source and os.path.basename(self.name) or True
317 return int(self.fanout[255])
319 def _idx_from_hash(self, hash):
320 global _total_searches, _total_steps
322 assert(len(hash) == 20)
324 start = self.fanout[b1-1] # range -1..254
325 end = self.fanout[b1] # range 0..255
327 _total_steps += 1 # lookup table is a step
330 mid = start + (end-start)/2
331 v = self._idx_to_hash(mid)
341 class PackIdxV1(PackIdx):
342 """Object representation of a Git pack index (version 1) file."""
343 def __init__(self, filename, f):
345 self.idxnames = [self.name]
346 self.map = mmap_read(f)
347 self.fanout = list(struct.unpack('!256I',
348 str(buffer(self.map, 0, 256*4))))
349 self.fanout.append(0) # entry "-1"
350 nsha = self.fanout[255]
352 self.shatable = buffer(self.map, self.sha_ofs, nsha*24)
354 def _ofs_from_idx(self, idx):
355 return struct.unpack('!I', str(self.shatable[idx*24 : idx*24+4]))[0]
357 def _idx_to_hash(self, idx):
358 return str(self.shatable[idx*24+4 : idx*24+24])
361 for i in xrange(self.fanout[255]):
362 yield buffer(self.map, 256*4 + 24*i + 4, 20)
365 class PackIdxV2(PackIdx):
366 """Object representation of a Git pack index (version 2) file."""
367 def __init__(self, filename, f):
369 self.idxnames = [self.name]
370 self.map = mmap_read(f)
371 assert(str(self.map[0:8]) == '\377tOc\0\0\0\2')
372 self.fanout = list(struct.unpack('!256I',
373 str(buffer(self.map, 8, 256*4))))
374 self.fanout.append(0) # entry "-1"
375 nsha = self.fanout[255]
376 self.sha_ofs = 8 + 256*4
377 self.shatable = buffer(self.map, self.sha_ofs, nsha*20)
378 self.ofstable = buffer(self.map,
379 self.sha_ofs + nsha*20 + nsha*4,
381 self.ofs64table = buffer(self.map,
382 8 + 256*4 + nsha*20 + nsha*4 + nsha*4)
384 def _ofs_from_idx(self, idx):
385 ofs = struct.unpack('!I', str(buffer(self.ofstable, idx*4, 4)))[0]
387 idx64 = ofs & 0x7fffffff
388 ofs = struct.unpack('!Q',
389 str(buffer(self.ofs64table, idx64*8, 8)))[0]
392 def _idx_to_hash(self, idx):
393 return str(self.shatable[idx*20:(idx+1)*20])
396 for i in xrange(self.fanout[255]):
397 yield buffer(self.map, 8 + 256*4 + 20*i, 20)
402 def __init__(self, dir):
404 assert(_mpi_count == 0) # these things suck tons of VM; don't waste it
409 self.do_bloom = False
416 assert(_mpi_count == 0)
419 return iter(idxmerge(self.packs))
422 return sum(len(pack) for pack in self.packs)
424 def exists(self, hash, want_source=False):
425 """Return nonempty if the object exists in the index files."""
426 global _total_searches
428 if hash in self.also:
430 if self.do_bloom and self.bloom:
431 if self.bloom.exists(hash):
432 self.do_bloom = False
434 _total_searches -= 1 # was counted by bloom
436 for i in xrange(len(self.packs)):
438 _total_searches -= 1 # will be incremented by sub-pack
439 ix = p.exists(hash, want_source=want_source)
441 # reorder so most recently used packs are searched first
442 self.packs = [p] + self.packs[:i] + self.packs[i+1:]
447 def refresh(self, skip_midx = False):
448 """Refresh the index list.
449 This method verifies if .midx files were superseded (e.g. all of its
450 contents are in another, bigger .midx file) and removes the superseded
453 If skip_midx is True, all work on .midx files will be skipped and .midx
454 files will be removed from the list.
456 The module-global variable 'ignore_midx' can force this function to
457 always act as if skip_midx was True.
459 self.bloom = None # Always reopen the bloom as it may have been relaced
460 self.do_bloom = False
461 skip_midx = skip_midx or ignore_midx
462 d = dict((p.name, p) for p in self.packs
463 if not skip_midx or not isinstance(p, midx.PackMidx))
464 if os.path.exists(self.dir):
467 for ix in self.packs:
468 if isinstance(ix, midx.PackMidx):
469 for name in ix.idxnames:
470 d[os.path.join(self.dir, name)] = ix
471 for full in glob.glob(os.path.join(self.dir,'*.midx')):
473 mx = midx.PackMidx(full)
474 (mxd, mxf) = os.path.split(mx.name)
476 for n in mx.idxnames:
477 if not os.path.exists(os.path.join(mxd, n)):
478 log(('warning: index %s missing\n' +
479 ' used by %s\n') % (n, mxf))
487 midxl.sort(key=lambda ix:
488 (-len(ix), -xstat.stat(ix.name).st_mtime))
491 for sub in ix.idxnames:
492 found = d.get(os.path.join(self.dir, sub))
493 if not found or isinstance(found, PackIdx):
494 # doesn't exist, or exists but not in a midx
499 for name in ix.idxnames:
500 d[os.path.join(self.dir, name)] = ix
501 elif not ix.force_keep:
502 debug1('midx: removing redundant: %s\n'
503 % os.path.basename(ix.name))
506 for full in glob.glob(os.path.join(self.dir,'*.idx')):
510 except GitError as e:
514 bfull = os.path.join(self.dir, 'bup.bloom')
515 if self.bloom is None and os.path.exists(bfull):
516 self.bloom = bloom.ShaBloom(bfull)
517 self.packs = list(set(d.values()))
518 self.packs.sort(lambda x,y: -cmp(len(x),len(y)))
519 if self.bloom and self.bloom.valid() and len(self.bloom) >= len(self):
523 debug1('PackIdxList: using %d index%s.\n'
524 % (len(self.packs), len(self.packs)!=1 and 'es' or ''))
527 """Insert an additional object in the list."""
531 def open_idx(filename):
532 if filename.endswith('.idx'):
533 f = open(filename, 'rb')
535 if header[0:4] == '\377tOc':
536 version = struct.unpack('!I', header[4:8])[0]
538 return PackIdxV2(filename, f)
540 raise GitError('%s: expected idx file version 2, got %d'
541 % (filename, version))
542 elif len(header) == 8 and header[0:4] < '\377tOc':
543 return PackIdxV1(filename, f)
545 raise GitError('%s: unrecognized idx file header' % filename)
546 elif filename.endswith('.midx'):
547 return midx.PackMidx(filename)
549 raise GitError('idx filenames must end with .idx or .midx')
552 def idxmerge(idxlist, final_progress=True):
553 """Generate a list of all the objects reachable in a PackIdxList."""
554 def pfunc(count, total):
555 qprogress('Reading indexes: %.2f%% (%d/%d)\r'
556 % (count*100.0/total, count, total))
557 def pfinal(count, total):
559 progress('Reading indexes: %.2f%% (%d/%d), done.\n'
560 % (100, total, total))
561 return merge_iter(idxlist, 10024, pfunc, pfinal)
564 def _make_objcache():
565 return PackIdxList(repo('objects/pack'))
568 """Writes Git objects inside a pack file."""
569 def __init__(self, objcache_maker=_make_objcache, compression_level=1):
575 self.objcache_maker = objcache_maker
577 self.compression_level = compression_level
584 (fd,name) = tempfile.mkstemp(suffix='.pack', dir=repo('objects'))
585 self.file = os.fdopen(fd, 'w+b')
586 assert(name.endswith('.pack'))
587 self.filename = name[:-5]
588 self.file.write('PACK\0\0\0\2\0\0\0\0')
589 self.idx = list(list() for i in xrange(256))
591 def _raw_write(self, datalist, sha):
594 # in case we get interrupted (eg. KeyboardInterrupt), it's best if
595 # the file never has a *partial* blob. So let's make sure it's
596 # all-or-nothing. (The blob shouldn't be very big anyway, thanks
597 # to our hashsplit algorithm.) f.write() does its own buffering,
598 # but that's okay because we'll flush it in _end().
599 oneblob = ''.join(datalist)
603 raise GitError, e, sys.exc_info()[2]
605 crc = zlib.crc32(oneblob) & 0xffffffff
606 self._update_idx(sha, crc, nw)
611 def _update_idx(self, sha, crc, size):
614 self.idx[ord(sha[0])].append((sha, crc, self.file.tell() - size))
616 def _write(self, sha, type, content):
620 sha = calc_hash(type, content)
621 size, crc = self._raw_write(_encode_packobj(type, content,
622 self.compression_level),
624 if self.outbytes >= max_pack_size or self.count >= max_pack_objects:
628 def breakpoint(self):
629 """Clear byte and object counts and return the last processed id."""
631 self.outbytes = self.count = 0
634 def _require_objcache(self):
635 if self.objcache is None and self.objcache_maker:
636 self.objcache = self.objcache_maker()
637 if self.objcache is None:
639 "PackWriter not opened or can't check exists w/o objcache")
641 def exists(self, id, want_source=False):
642 """Return non-empty if an object is found in the object cache."""
643 self._require_objcache()
644 return self.objcache.exists(id, want_source=want_source)
646 def maybe_write(self, type, content):
647 """Write an object to the pack file if not present and return its id."""
648 sha = calc_hash(type, content)
649 if not self.exists(sha):
650 self._write(sha, type, content)
651 self._require_objcache()
652 self.objcache.add(sha)
655 def new_blob(self, blob):
656 """Create a blob object in the pack with the supplied content."""
657 return self.maybe_write('blob', blob)
659 def new_tree(self, shalist):
660 """Create a tree object in the pack."""
661 content = tree_encode(shalist)
662 return self.maybe_write('tree', content)
664 def _new_commit(self, tree, parent, author, adate, committer, cdate, msg):
666 if tree: l.append('tree %s' % tree.encode('hex'))
667 if parent: l.append('parent %s' % parent.encode('hex'))
668 if author: l.append('author %s %s' % (author, _git_date(adate)))
669 if committer: l.append('committer %s %s' % (committer, _git_date(cdate)))
672 return self.maybe_write('commit', '\n'.join(l))
674 def new_commit(self, parent, tree, date, msg):
675 """Create a commit object in the pack."""
676 userline = '%s <%s@%s>' % (userfullname(), username(), hostname())
677 commit = self._new_commit(tree, parent,
678 userline, date, userline, date,
683 """Remove the pack file from disk."""
689 os.unlink(self.filename + '.pack')
693 def _end(self, run_midx=True):
695 if not f: return None
702 # update object count
704 cp = struct.pack('!i', self.count)
708 # calculate the pack sha1sum
711 for b in chunkyreader(f):
713 packbin = sum.digest()
718 obj_list_sha = self._write_pack_idx_v2(self.filename + '.idx', idx, packbin)
720 nameprefix = repo('objects/pack/pack-%s' % obj_list_sha)
721 if os.path.exists(self.filename + '.map'):
722 os.unlink(self.filename + '.map')
723 os.rename(self.filename + '.pack', nameprefix + '.pack')
724 os.rename(self.filename + '.idx', nameprefix + '.idx')
727 auto_midx(repo('objects/pack'))
730 def close(self, run_midx=True):
731 """Close the pack file and move it to its definitive path."""
732 return self._end(run_midx=run_midx)
734 def _write_pack_idx_v2(self, filename, idx, packbin):
737 for entry in section:
738 if entry[2] >= 2**31:
741 # Length: header + fan-out + shas-and-crcs + overflow-offsets
742 index_len = 8 + (4 * 256) + (28 * self.count) + (8 * ofs64_count)
744 idx_f = open(filename, 'w+b')
746 idx_f.truncate(index_len)
747 idx_map = mmap_readwrite(idx_f, close=False)
748 count = _helpers.write_idx(filename, idx_map, idx, self.count)
749 assert(count == self.count)
751 if idx_map: idx_map.close()
754 idx_f = open(filename, 'a+b')
759 b = idx_f.read(8 + 4*256)
762 obj_list_sum = Sha1()
763 for b in chunkyreader(idx_f, 20*self.count):
765 obj_list_sum.update(b)
766 namebase = obj_list_sum.hexdigest()
768 for b in chunkyreader(idx_f):
770 idx_f.write(idx_sum.digest())
777 return '%d %s' % (date, utc_offset_str(date))
780 def _gitenv(repo_dir = None):
784 os.environ['GIT_DIR'] = os.path.abspath(repo_dir)
788 def list_refs(refname=None, repo_dir=None,
789 limit_to_heads=False, limit_to_tags=False):
790 """Yield (refname, hash) tuples for all repository refs unless a ref
791 name is specified. Given a ref name, only include tuples for that
792 particular ref. The limits restrict the result items to
793 refs/heads or refs/tags. If both limits are specified, items from
794 both sources will be included.
797 argv = ['git', 'show-ref']
799 argv.append('--heads')
801 argv.append('--tags')
805 p = subprocess.Popen(argv,
806 preexec_fn = _gitenv(repo_dir),
807 stdout = subprocess.PIPE)
808 out = p.stdout.read().strip()
809 rv = p.wait() # not fatal
813 for d in out.split('\n'):
814 (sha, name) = d.split(' ', 1)
815 yield (name, sha.decode('hex'))
818 def read_ref(refname, repo_dir = None):
819 """Get the commit id of the most recent commit made on a given ref."""
820 refs = list_refs(refname, repo_dir=repo_dir, limit_to_heads=True)
821 l = tuple(islice(refs, 2))
829 def rev_list(ref, count=None, repo_dir=None):
830 """Generate a list of reachable commits in reverse chronological order.
832 This generator walks through commits, from child to parent, that are
833 reachable via the specified ref and yields a series of tuples of the form
836 If count is a non-zero integer, limit the number of commits to "count"
839 assert(not ref.startswith('-'))
842 opts += ['-n', str(atoi(count))]
843 argv = ['git', 'rev-list', '--pretty=format:%at'] + opts + [ref, '--']
844 p = subprocess.Popen(argv,
845 preexec_fn = _gitenv(repo_dir),
846 stdout = subprocess.PIPE)
850 if s.startswith('commit '):
851 commit = s[7:].decode('hex')
855 rv = p.wait() # not fatal
857 raise GitError, 'git rev-list returned error %d' % rv
860 def get_commit_dates(refs, repo_dir=None):
861 """Get the dates for the specified commit refs. For now, every unique
862 string in refs must resolve to a different commit or this
863 function will fail."""
866 commit = get_commit_items(ref, cp(repo_dir))
867 result.append(commit.author_sec)
871 def rev_parse(committish, repo_dir=None):
872 """Resolve the full hash for 'committish', if it exists.
874 Should be roughly equivalent to 'git rev-parse'.
876 Returns the hex value of the hash if it is found, None if 'committish' does
877 not correspond to anything.
879 head = read_ref(committish, repo_dir=repo_dir)
881 debug2("resolved from ref: commit = %s\n" % head.encode('hex'))
884 pL = PackIdxList(repo('objects/pack', repo_dir=repo_dir))
886 if len(committish) == 40:
888 hash = committish.decode('hex')
898 def update_ref(refname, newval, oldval, repo_dir=None):
899 """Update a repository reference."""
902 assert(refname.startswith('refs/heads/') \
903 or refname.startswith('refs/tags/'))
904 p = subprocess.Popen(['git', 'update-ref', refname,
905 newval.encode('hex'), oldval.encode('hex')],
906 preexec_fn = _gitenv(repo_dir))
907 _git_wait('git update-ref', p)
910 def delete_ref(refname):
911 """Delete a repository reference."""
912 assert(refname.startswith('refs/'))
913 p = subprocess.Popen(['git', 'update-ref', '-d', refname],
914 preexec_fn = _gitenv())
915 _git_wait('git update-ref', p)
918 def guess_repo(path=None):
919 """Set the path value in the global variable "repodir".
920 This makes bup look for an existing bup repository, but not fail if a
921 repository doesn't exist. Usually, if you are interacting with a bup
922 repository, you would not be calling this function but using
929 repodir = os.environ.get('BUP_DIR')
931 repodir = os.path.expanduser('~/.bup')
934 def init_repo(path=None):
935 """Create the Git bare repository for bup in a given path."""
937 d = repo() # appends a / to the path
938 parent = os.path.dirname(os.path.dirname(d))
939 if parent and not os.path.exists(parent):
940 raise GitError('parent directory "%s" does not exist\n' % parent)
941 if os.path.exists(d) and not os.path.isdir(os.path.join(d, '.')):
942 raise GitError('"%s" exists but is not a directory\n' % d)
943 p = subprocess.Popen(['git', '--bare', 'init'], stdout=sys.stderr,
944 preexec_fn = _gitenv())
945 _git_wait('git init', p)
946 # Force the index version configuration in order to ensure bup works
947 # regardless of the version of the installed Git binary.
948 p = subprocess.Popen(['git', 'config', 'pack.indexVersion', '2'],
949 stdout=sys.stderr, preexec_fn = _gitenv())
950 _git_wait('git config', p)
952 p = subprocess.Popen(['git', 'config', 'core.logAllRefUpdates', 'true'],
953 stdout=sys.stderr, preexec_fn = _gitenv())
954 _git_wait('git config', p)
957 def check_repo_or_die(path=None):
958 """Make sure a bup repository exists, and abort if not.
959 If the path to a particular repository was not specified, this function
960 initializes the default repository automatically.
964 os.stat(repo('objects/pack/.'))
966 if e.errno == errno.ENOENT:
967 log('error: %r is not a bup repository; run "bup init"\n'
971 log('error: %s\n' % e)
977 """Get Git's version and ensure a usable version is installed.
979 The returned version is formatted as an ordered tuple with each position
980 representing a digit in the version tag. For example, the following tuple
981 would represent version 1.6.6.9:
987 p = subprocess.Popen(['git', '--version'],
988 stdout=subprocess.PIPE)
989 gvs = p.stdout.read()
990 _git_wait('git --version', p)
991 m = re.match(r'git version (\S+.\S+)', gvs)
993 raise GitError('git --version weird output: %r' % gvs)
994 _ver = tuple(m.group(1).split('.'))
995 needed = ('1','5', '3', '1')
997 raise GitError('git version %s or higher is required; you have %s'
998 % ('.'.join(needed), '.'.join(_ver)))
1002 def _git_wait(cmd, p):
1005 raise GitError('%s returned %d' % (cmd, rv))
1008 def _git_capture(argv):
1009 p = subprocess.Popen(argv, stdout=subprocess.PIPE, preexec_fn = _gitenv())
1011 _git_wait(repr(argv), p)
1015 class _AbortableIter:
1016 def __init__(self, it, onabort = None):
1018 self.onabort = onabort
1026 return self.it.next()
1027 except StopIteration as e:
1035 """Abort iteration and call the abortion callback, if needed."""
1047 """Link to 'git cat-file' that is used to retrieve blob data."""
1048 def __init__(self, repo_dir = None):
1050 self.repo_dir = repo_dir
1051 wanted = ('1','5','6')
1054 log('warning: git version < %s; bup will be slow.\n'
1057 self.get = self._slow_get
1059 self.p = self.inprogress = None
1060 self.get = self._fast_get
1064 self.p.stdout.close()
1065 self.p.stdin.close()
1067 self.inprogress = None
1071 self.p = subprocess.Popen(['git', 'cat-file', '--batch'],
1072 stdin=subprocess.PIPE,
1073 stdout=subprocess.PIPE,
1076 preexec_fn = _gitenv(self.repo_dir))
1078 def _fast_get(self, id):
1079 if not self.p or self.p.poll() != None:
1082 poll_result = self.p.poll()
1083 assert(poll_result == None)
1085 log('_fast_get: opening %r while %r is open\n'
1086 % (id, self.inprogress))
1087 assert(not self.inprogress)
1088 assert(id.find('\n') < 0)
1089 assert(id.find('\r') < 0)
1090 assert(not id.startswith('-'))
1091 self.inprogress = id
1092 self.p.stdin.write('%s\n' % id)
1093 self.p.stdin.flush()
1094 hdr = self.p.stdout.readline()
1095 if hdr.endswith(' missing\n'):
1096 self.inprogress = None
1097 raise KeyError('blob %r is missing' % id)
1098 spl = hdr.split(' ')
1099 if len(spl) != 3 or len(spl[0]) != 40:
1100 raise GitError('expected blob, got %r' % spl)
1101 (hex, type, size) = spl
1103 it = _AbortableIter(chunkyreader(self.p.stdout, int(spl[2])),
1104 onabort = self._abort)
1109 readline_result = self.p.stdout.readline()
1110 assert(readline_result == '\n')
1111 self.inprogress = None
1112 except Exception as e:
1116 def _slow_get(self, id):
1117 assert(id.find('\n') < 0)
1118 assert(id.find('\r') < 0)
1119 assert(id[0] != '-')
1120 type = _git_capture(['git', 'cat-file', '-t', id]).strip()
1123 p = subprocess.Popen(['git', 'cat-file', type, id],
1124 stdout=subprocess.PIPE,
1125 preexec_fn = _gitenv(self.repo_dir))
1126 for blob in chunkyreader(p.stdout):
1128 _git_wait('git cat-file', p)
1130 def _join(self, it):
1135 elif type == 'tree':
1136 treefile = ''.join(it)
1137 for (mode, name, sha) in tree_decode(treefile):
1138 for blob in self.join(sha.encode('hex')):
1140 elif type == 'commit':
1141 treeline = ''.join(it).split('\n')[0]
1142 assert(treeline.startswith('tree '))
1143 for blob in self.join(treeline[5:]):
1146 raise GitError('invalid object type %r: expected blob/tree/commit'
1150 """Generate a list of the content of all blobs that can be reached
1151 from an object. The hash given in 'id' must point to a blob, a tree
1152 or a commit. The content of all blobs that can be seen from trees or
1153 commits will be added to the list.
1156 for d in self._join(self.get(id)):
1158 except StopIteration:
1164 def cp(repo_dir=None):
1165 """Create a CatPipe object or reuse the already existing one."""
1169 repo_dir = os.path.abspath(repo_dir)
1170 cp = _cp.get(repo_dir)
1172 cp = CatPipe(repo_dir)
1177 def tags(repo_dir = None):
1178 """Return a dictionary of all tags in the form {hash: [tag_names, ...]}."""
1180 for n, c in list_refs(repo_dir = repo_dir, limit_to_tags=True):
1181 assert(n.startswith('refs/tags/'))
1185 tags[c].append(name) # more than one tag can point at 'c'