1 """Git interaction library.
2 bup repositories are in Git format. This library allows us to
3 interact with the Git data structures.
6 import errno, os, sys, zlib, time, subprocess, struct, stat, re, tempfile, glob
7 from collections import namedtuple
8 from itertools import islice
10 from bup import _helpers, path, midx, bloom, xstat
11 from bup.helpers import (Sha1, add_error, chunkyreader, debug1, debug2,
12 hostname, log, merge_iter, mmap_read, mmap_readwrite,
13 progress, qprogress, unlink, username, userfullname,
17 max_pack_size = 1000*1000*1000 # larger packs will slow down pruning
18 max_pack_objects = 200*1000 # cache memory usage is about 83 bytes per object
24 _typemap = { 'blob':3, 'tree':2, 'commit':1, 'tag':4 }
25 _typermap = { 3:'blob', 2:'tree', 1:'commit', 4:'tag' }
31 class GitError(Exception):
35 def parse_tz_offset(s):
36 """UTC offset in seconds."""
37 tz_off = (int(s[1:3]) * 60 * 60) + (int(s[3:5]) * 60)
43 # FIXME: derived from http://git.rsbx.net/Documents/Git_Data_Formats.txt
44 # Make sure that's authoritative.
45 _start_end_char = r'[^ .,:;<>"\'\0\n]'
46 _content_char = r'[^\0\n<>]'
47 _safe_str_rx = '(?:%s{1,2}|(?:%s%s*%s))' \
49 _start_end_char, _content_char, _start_end_char)
50 _tz_rx = r'[-+]\d\d[0-5]\d'
51 _parent_rx = r'(?:parent [abcdefABCDEF0123456789]{40}\n)'
52 _commit_rx = re.compile(r'''tree (?P<tree>[abcdefABCDEF0123456789]{40})
53 (?P<parents>%s*)author (?P<author_name>%s) <(?P<author_mail>%s)> (?P<asec>\d+) (?P<atz>%s)
54 committer (?P<committer_name>%s) <(?P<committer_mail>%s)> (?P<csec>\d+) (?P<ctz>%s)
56 (?P<message>(?:.|\n)*)''' % (_parent_rx,
57 _safe_str_rx, _safe_str_rx, _tz_rx,
58 _safe_str_rx, _safe_str_rx, _tz_rx))
59 _parent_hash_rx = re.compile(r'\s*parent ([abcdefABCDEF0123456789]{40})\s*')
62 # Note that the author_sec and committer_sec values are (UTC) epoch seconds.
63 CommitInfo = namedtuple('CommitInfo', ['tree', 'parents',
64 'author_name', 'author_mail',
65 'author_sec', 'author_offset',
66 'committer_name', 'committer_mail',
67 'committer_sec', 'committer_offset',
70 def parse_commit(content):
71 commit_match = re.match(_commit_rx, content)
73 raise Exception('cannot parse commit %r' % content)
74 matches = commit_match.groupdict()
75 return CommitInfo(tree=matches['tree'],
76 parents=re.findall(_parent_hash_rx, matches['parents']),
77 author_name=matches['author_name'],
78 author_mail=matches['author_mail'],
79 author_sec=int(matches['asec']),
80 author_offset=parse_tz_offset(matches['atz']),
81 committer_name=matches['committer_name'],
82 committer_mail=matches['committer_mail'],
83 committer_sec=int(matches['csec']),
84 committer_offset=parse_tz_offset(matches['ctz']),
85 message=matches['message'])
88 def get_commit_items(id, cp):
89 commit_it = cp.get(id)
90 assert(commit_it.next() == 'commit')
91 commit_content = ''.join(commit_it)
92 return parse_commit(commit_content)
95 def repo(sub = '', repo_dir=None):
96 """Get the path to the git repository or one of its subdirectories."""
98 repo_dir = repo_dir or repodir
100 raise GitError('You should call check_repo_or_die()')
102 # If there's a .git subdirectory, then the actual repo is in there.
103 gd = os.path.join(repo_dir, '.git')
104 if os.path.exists(gd):
107 return os.path.join(repo_dir, sub)
111 return re.sub(r'([^0-9a-z]|\b)([0-9a-z]{7})[0-9a-z]{33}([^0-9a-z]|\b)',
116 full = os.path.abspath(path)
117 fullrepo = os.path.abspath(repo(''))
118 if not fullrepo.endswith('/'):
120 if full.startswith(fullrepo):
121 path = full[len(fullrepo):]
122 if path.startswith('index-cache/'):
123 path = path[len('index-cache/'):]
124 return shorten_hash(path)
128 paths = [repo('objects/pack')]
129 paths += glob.glob(repo('index-cache/*/.'))
133 def auto_midx(objdir):
134 args = [path.exe(), 'midx', '--auto', '--dir', objdir]
136 rv = subprocess.call(args, stdout=open('/dev/null', 'w'))
138 # make sure 'args' gets printed to help with debugging
139 add_error('%r: exception: %s' % (args, e))
142 add_error('%r: returned %d' % (args, rv))
144 args = [path.exe(), 'bloom', '--dir', objdir]
146 rv = subprocess.call(args, stdout=open('/dev/null', 'w'))
148 # make sure 'args' gets printed to help with debugging
149 add_error('%r: exception: %s' % (args, e))
152 add_error('%r: returned %d' % (args, rv))
155 def mangle_name(name, mode, gitmode):
156 """Mangle a file name to present an abstract name for segmented files.
157 Mangled file names will have the ".bup" extension added to them. If a
158 file's name already ends with ".bup", a ".bupl" extension is added to
159 disambiguate normal files from segmented ones.
161 if stat.S_ISREG(mode) and not stat.S_ISREG(gitmode):
162 assert(stat.S_ISDIR(gitmode))
164 elif name.endswith('.bup') or name[:-1].endswith('.bup'):
165 return name + '.bupl'
170 (BUP_NORMAL, BUP_CHUNKED) = (0,1)
171 def demangle_name(name, mode):
172 """Remove name mangling from a file name, if necessary.
174 The return value is a tuple (demangled_filename,mode), where mode is one of
177 * BUP_NORMAL : files that should be read as-is from the repository
178 * BUP_CHUNKED : files that were chunked and need to be reassembled
180 For more information on the name mangling algorithm, see mangle_name()
182 if name.endswith('.bupl'):
183 return (name[:-5], BUP_NORMAL)
184 elif name.endswith('.bup'):
185 return (name[:-4], BUP_CHUNKED)
186 elif name.endswith('.bupm'):
188 BUP_CHUNKED if stat.S_ISDIR(mode) else BUP_NORMAL)
190 return (name, BUP_NORMAL)
193 def calc_hash(type, content):
194 """Calculate some content's hash in the Git fashion."""
195 header = '%s %d\0' % (type, len(content))
201 def shalist_item_sort_key(ent):
202 (mode, name, id) = ent
203 assert(mode+0 == mode)
204 if stat.S_ISDIR(mode):
210 def tree_encode(shalist):
211 """Generate a git tree object from (mode,name,hash) tuples."""
212 shalist = sorted(shalist, key = shalist_item_sort_key)
214 for (mode,name,bin) in shalist:
216 assert(mode+0 == mode)
218 assert(len(bin) == 20)
219 s = '%o %s\0%s' % (mode,name,bin)
220 assert(s[0] != '0') # 0-padded octal is not acceptable in a git tree
225 def tree_decode(buf):
226 """Generate a list of (mode,name,hash) from the git tree object in buf."""
228 while ofs < len(buf):
229 z = buf.find('\0', ofs)
231 spl = buf[ofs:z].split(' ', 1)
232 assert(len(spl) == 2)
234 sha = buf[z+1:z+1+20]
236 yield (int(mode, 8), name, sha)
239 def _encode_packobj(type, content, compression_level=1):
242 szbits = (sz & 0x0f) | (_typemap[type]<<4)
245 if sz: szbits |= 0x80
251 if compression_level > 9:
252 compression_level = 9
253 elif compression_level < 0:
254 compression_level = 0
255 z = zlib.compressobj(compression_level)
257 yield z.compress(content)
261 def _encode_looseobj(type, content, compression_level=1):
262 z = zlib.compressobj(compression_level)
263 yield z.compress('%s %d\0' % (type, len(content)))
264 yield z.compress(content)
268 def _decode_looseobj(buf):
270 s = zlib.decompress(buf)
277 assert(type in _typemap)
278 assert(sz == len(content))
279 return (type, content)
282 def _decode_packobj(buf):
285 type = _typermap[(c & 0x70) >> 4]
292 sz |= (c & 0x7f) << shift
296 return (type, zlib.decompress(buf[i+1:]))
303 def find_offset(self, hash):
304 """Get the offset of an object inside the index file."""
305 idx = self._idx_from_hash(hash)
307 return self._ofs_from_idx(idx)
310 def exists(self, hash, want_source=False):
311 """Return nonempty if the object exists in this index."""
312 if hash and (self._idx_from_hash(hash) != None):
313 return want_source and os.path.basename(self.name) or True
317 return int(self.fanout[255])
319 def _idx_from_hash(self, hash):
320 global _total_searches, _total_steps
322 assert(len(hash) == 20)
324 start = self.fanout[b1-1] # range -1..254
325 end = self.fanout[b1] # range 0..255
327 _total_steps += 1 # lookup table is a step
330 mid = start + (end-start)/2
331 v = self._idx_to_hash(mid)
341 class PackIdxV1(PackIdx):
342 """Object representation of a Git pack index (version 1) file."""
343 def __init__(self, filename, f):
345 self.idxnames = [self.name]
346 self.map = mmap_read(f)
347 self.fanout = list(struct.unpack('!256I',
348 str(buffer(self.map, 0, 256*4))))
349 self.fanout.append(0) # entry "-1"
350 nsha = self.fanout[255]
352 self.shatable = buffer(self.map, self.sha_ofs, nsha*24)
354 def _ofs_from_idx(self, idx):
355 return struct.unpack('!I', str(self.shatable[idx*24 : idx*24+4]))[0]
357 def _idx_to_hash(self, idx):
358 return str(self.shatable[idx*24+4 : idx*24+24])
361 for i in xrange(self.fanout[255]):
362 yield buffer(self.map, 256*4 + 24*i + 4, 20)
365 class PackIdxV2(PackIdx):
366 """Object representation of a Git pack index (version 2) file."""
367 def __init__(self, filename, f):
369 self.idxnames = [self.name]
370 self.map = mmap_read(f)
371 assert(str(self.map[0:8]) == '\377tOc\0\0\0\2')
372 self.fanout = list(struct.unpack('!256I',
373 str(buffer(self.map, 8, 256*4))))
374 self.fanout.append(0) # entry "-1"
375 nsha = self.fanout[255]
376 self.sha_ofs = 8 + 256*4
377 self.shatable = buffer(self.map, self.sha_ofs, nsha*20)
378 self.ofstable = buffer(self.map,
379 self.sha_ofs + nsha*20 + nsha*4,
381 self.ofs64table = buffer(self.map,
382 8 + 256*4 + nsha*20 + nsha*4 + nsha*4)
384 def _ofs_from_idx(self, idx):
385 ofs = struct.unpack('!I', str(buffer(self.ofstable, idx*4, 4)))[0]
387 idx64 = ofs & 0x7fffffff
388 ofs = struct.unpack('!Q',
389 str(buffer(self.ofs64table, idx64*8, 8)))[0]
392 def _idx_to_hash(self, idx):
393 return str(self.shatable[idx*20:(idx+1)*20])
396 for i in xrange(self.fanout[255]):
397 yield buffer(self.map, 8 + 256*4 + 20*i, 20)
402 def __init__(self, dir):
404 assert(_mpi_count == 0) # these things suck tons of VM; don't waste it
409 self.do_bloom = False
416 assert(_mpi_count == 0)
419 return iter(idxmerge(self.packs))
422 return sum(len(pack) for pack in self.packs)
424 def exists(self, hash, want_source=False):
425 """Return nonempty if the object exists in the index files."""
426 global _total_searches
428 if hash in self.also:
430 if self.do_bloom and self.bloom:
431 if self.bloom.exists(hash):
432 self.do_bloom = False
434 _total_searches -= 1 # was counted by bloom
436 for i in xrange(len(self.packs)):
438 _total_searches -= 1 # will be incremented by sub-pack
439 ix = p.exists(hash, want_source=want_source)
441 # reorder so most recently used packs are searched first
442 self.packs = [p] + self.packs[:i] + self.packs[i+1:]
447 def refresh(self, skip_midx = False):
448 """Refresh the index list.
449 This method verifies if .midx files were superseded (e.g. all of its
450 contents are in another, bigger .midx file) and removes the superseded
453 If skip_midx is True, all work on .midx files will be skipped and .midx
454 files will be removed from the list.
456 The module-global variable 'ignore_midx' can force this function to
457 always act as if skip_midx was True.
459 self.bloom = None # Always reopen the bloom as it may have been relaced
460 self.do_bloom = False
461 skip_midx = skip_midx or ignore_midx
462 d = dict((p.name, p) for p in self.packs
463 if not skip_midx or not isinstance(p, midx.PackMidx))
464 if os.path.exists(self.dir):
467 for ix in self.packs:
468 if isinstance(ix, midx.PackMidx):
469 for name in ix.idxnames:
470 d[os.path.join(self.dir, name)] = ix
471 for full in glob.glob(os.path.join(self.dir,'*.midx')):
473 mx = midx.PackMidx(full)
474 (mxd, mxf) = os.path.split(mx.name)
476 for n in mx.idxnames:
477 if not os.path.exists(os.path.join(mxd, n)):
478 log(('warning: index %s missing\n' +
479 ' used by %s\n') % (n, mxf))
487 midxl.sort(key=lambda ix:
488 (-len(ix), -xstat.stat(ix.name).st_mtime))
491 for sub in ix.idxnames:
492 found = d.get(os.path.join(self.dir, sub))
493 if not found or isinstance(found, PackIdx):
494 # doesn't exist, or exists but not in a midx
499 for name in ix.idxnames:
500 d[os.path.join(self.dir, name)] = ix
501 elif not ix.force_keep:
502 debug1('midx: removing redundant: %s\n'
503 % os.path.basename(ix.name))
506 for full in glob.glob(os.path.join(self.dir,'*.idx')):
510 except GitError as e:
514 bfull = os.path.join(self.dir, 'bup.bloom')
515 if self.bloom is None and os.path.exists(bfull):
516 self.bloom = bloom.ShaBloom(bfull)
517 self.packs = list(set(d.values()))
518 self.packs.sort(lambda x,y: -cmp(len(x),len(y)))
519 if self.bloom and self.bloom.valid() and len(self.bloom) >= len(self):
523 debug1('PackIdxList: using %d index%s.\n'
524 % (len(self.packs), len(self.packs)!=1 and 'es' or ''))
527 """Insert an additional object in the list."""
531 def open_idx(filename):
532 if filename.endswith('.idx'):
533 f = open(filename, 'rb')
535 if header[0:4] == '\377tOc':
536 version = struct.unpack('!I', header[4:8])[0]
538 return PackIdxV2(filename, f)
540 raise GitError('%s: expected idx file version 2, got %d'
541 % (filename, version))
542 elif len(header) == 8 and header[0:4] < '\377tOc':
543 return PackIdxV1(filename, f)
545 raise GitError('%s: unrecognized idx file header' % filename)
546 elif filename.endswith('.midx'):
547 return midx.PackMidx(filename)
549 raise GitError('idx filenames must end with .idx or .midx')
552 def idxmerge(idxlist, final_progress=True):
553 """Generate a list of all the objects reachable in a PackIdxList."""
554 def pfunc(count, total):
555 qprogress('Reading indexes: %.2f%% (%d/%d)\r'
556 % (count*100.0/total, count, total))
557 def pfinal(count, total):
559 progress('Reading indexes: %.2f%% (%d/%d), done.\n'
560 % (100, total, total))
561 return merge_iter(idxlist, 10024, pfunc, pfinal)
564 def _make_objcache():
565 return PackIdxList(repo('objects/pack'))
568 """Writes Git objects inside a pack file."""
569 def __init__(self, objcache_maker=_make_objcache, compression_level=1):
575 self.objcache_maker = objcache_maker
577 self.compression_level = compression_level
584 (fd,name) = tempfile.mkstemp(suffix='.pack', dir=repo('objects'))
585 self.file = os.fdopen(fd, 'w+b')
586 assert(name.endswith('.pack'))
587 self.filename = name[:-5]
588 self.file.write('PACK\0\0\0\2\0\0\0\0')
589 self.idx = list(list() for i in xrange(256))
591 def _raw_write(self, datalist, sha):
594 # in case we get interrupted (eg. KeyboardInterrupt), it's best if
595 # the file never has a *partial* blob. So let's make sure it's
596 # all-or-nothing. (The blob shouldn't be very big anyway, thanks
597 # to our hashsplit algorithm.) f.write() does its own buffering,
598 # but that's okay because we'll flush it in _end().
599 oneblob = ''.join(datalist)
603 raise GitError, e, sys.exc_info()[2]
605 crc = zlib.crc32(oneblob) & 0xffffffff
606 self._update_idx(sha, crc, nw)
611 def _update_idx(self, sha, crc, size):
614 self.idx[ord(sha[0])].append((sha, crc, self.file.tell() - size))
616 def _write(self, sha, type, content):
620 sha = calc_hash(type, content)
621 size, crc = self._raw_write(_encode_packobj(type, content,
622 self.compression_level),
624 if self.outbytes >= max_pack_size or self.count >= max_pack_objects:
628 def breakpoint(self):
629 """Clear byte and object counts and return the last processed id."""
631 self.outbytes = self.count = 0
634 def _require_objcache(self):
635 if self.objcache is None and self.objcache_maker:
636 self.objcache = self.objcache_maker()
637 if self.objcache is None:
639 "PackWriter not opened or can't check exists w/o objcache")
641 def exists(self, id, want_source=False):
642 """Return non-empty if an object is found in the object cache."""
643 self._require_objcache()
644 return self.objcache.exists(id, want_source=want_source)
646 def maybe_write(self, type, content):
647 """Write an object to the pack file if not present and return its id."""
648 sha = calc_hash(type, content)
649 if not self.exists(sha):
650 self._write(sha, type, content)
651 self._require_objcache()
652 self.objcache.add(sha)
655 def new_blob(self, blob):
656 """Create a blob object in the pack with the supplied content."""
657 return self.maybe_write('blob', blob)
659 def new_tree(self, shalist):
660 """Create a tree object in the pack."""
661 content = tree_encode(shalist)
662 return self.maybe_write('tree', content)
664 def _new_commit(self, tree, parent, author, adate, committer, cdate, msg):
666 if tree: l.append('tree %s' % tree.encode('hex'))
667 if parent: l.append('parent %s' % parent.encode('hex'))
668 if author: l.append('author %s %s' % (author, _git_date(adate)))
669 if committer: l.append('committer %s %s' % (committer, _git_date(cdate)))
672 return self.maybe_write('commit', '\n'.join(l))
674 def new_commit(self, parent, tree, date, msg):
675 """Create a commit object in the pack."""
676 userline = '%s <%s@%s>' % (userfullname(), username(), hostname())
677 commit = self._new_commit(tree, parent,
678 userline, date, userline, date,
683 """Remove the pack file from disk."""
689 os.unlink(self.filename + '.pack')
693 def _end(self, run_midx=True):
695 if not f: return None
701 # update object count
703 cp = struct.pack('!i', self.count)
707 # calculate the pack sha1sum
710 for b in chunkyreader(f):
712 packbin = sum.digest()
716 obj_list_sha = self._write_pack_idx_v2(self.filename + '.idx', idx, packbin)
718 nameprefix = repo('objects/pack/pack-%s' % obj_list_sha)
719 if os.path.exists(self.filename + '.map'):
720 os.unlink(self.filename + '.map')
721 os.rename(self.filename + '.pack', nameprefix + '.pack')
722 os.rename(self.filename + '.idx', nameprefix + '.idx')
725 auto_midx(repo('objects/pack'))
728 def close(self, run_midx=True):
729 """Close the pack file and move it to its definitive path."""
730 return self._end(run_midx=run_midx)
732 def _write_pack_idx_v2(self, filename, idx, packbin):
735 for entry in section:
736 if entry[2] >= 2**31:
739 # Length: header + fan-out + shas-and-crcs + overflow-offsets
740 index_len = 8 + (4 * 256) + (28 * self.count) + (8 * ofs64_count)
742 idx_f = open(filename, 'w+b')
744 idx_f.truncate(index_len)
745 idx_map = mmap_readwrite(idx_f, close=False)
746 count = _helpers.write_idx(filename, idx_map, idx, self.count)
747 assert(count == self.count)
749 if idx_map: idx_map.close()
752 idx_f = open(filename, 'a+b')
757 b = idx_f.read(8 + 4*256)
760 obj_list_sum = Sha1()
761 for b in chunkyreader(idx_f, 20*self.count):
763 obj_list_sum.update(b)
764 namebase = obj_list_sum.hexdigest()
766 for b in chunkyreader(idx_f):
768 idx_f.write(idx_sum.digest())
775 return '%d %s' % (date, utc_offset_str(date))
778 def _gitenv(repo_dir = None):
782 os.environ['GIT_DIR'] = os.path.abspath(repo_dir)
786 def list_refs(refname=None, repo_dir=None,
787 limit_to_heads=False, limit_to_tags=False):
788 """Yield (refname, hash) tuples for all repository refs unless a ref
789 name is specified. Given a ref name, only include tuples for that
790 particular ref. The limits restrict the result items to
791 refs/heads or refs/tags. If both limits are specified, items from
792 both sources will be included.
795 argv = ['git', 'show-ref']
797 argv.append('--heads')
799 argv.append('--tags')
803 p = subprocess.Popen(argv,
804 preexec_fn = _gitenv(repo_dir),
805 stdout = subprocess.PIPE)
806 out = p.stdout.read().strip()
807 rv = p.wait() # not fatal
811 for d in out.split('\n'):
812 (sha, name) = d.split(' ', 1)
813 yield (name, sha.decode('hex'))
816 def read_ref(refname, repo_dir = None):
817 """Get the commit id of the most recent commit made on a given ref."""
818 refs = list_refs(refname, repo_dir=repo_dir, limit_to_heads=True)
819 l = tuple(islice(refs, 2))
827 def rev_list(ref, count=None, repo_dir=None):
828 """Generate a list of reachable commits in reverse chronological order.
830 This generator walks through commits, from child to parent, that are
831 reachable via the specified ref and yields a series of tuples of the form
834 If count is a non-zero integer, limit the number of commits to "count"
837 assert(not ref.startswith('-'))
840 opts += ['-n', str(atoi(count))]
841 argv = ['git', 'rev-list', '--pretty=format:%at'] + opts + [ref, '--']
842 p = subprocess.Popen(argv,
843 preexec_fn = _gitenv(repo_dir),
844 stdout = subprocess.PIPE)
848 if s.startswith('commit '):
849 commit = s[7:].decode('hex')
853 rv = p.wait() # not fatal
855 raise GitError, 'git rev-list returned error %d' % rv
858 def get_commit_dates(refs, repo_dir=None):
859 """Get the dates for the specified commit refs. For now, every unique
860 string in refs must resolve to a different commit or this
861 function will fail."""
864 commit = get_commit_items(ref, cp(repo_dir))
865 result.append(commit.author_sec)
869 def rev_parse(committish, repo_dir=None):
870 """Resolve the full hash for 'committish', if it exists.
872 Should be roughly equivalent to 'git rev-parse'.
874 Returns the hex value of the hash if it is found, None if 'committish' does
875 not correspond to anything.
877 head = read_ref(committish, repo_dir=repo_dir)
879 debug2("resolved from ref: commit = %s\n" % head.encode('hex'))
882 pL = PackIdxList(repo('objects/pack', repo_dir=repo_dir))
884 if len(committish) == 40:
886 hash = committish.decode('hex')
896 def update_ref(refname, newval, oldval, repo_dir=None):
897 """Update a repository reference."""
900 assert(refname.startswith('refs/heads/') \
901 or refname.startswith('refs/tags/'))
902 p = subprocess.Popen(['git', 'update-ref', refname,
903 newval.encode('hex'), oldval.encode('hex')],
904 preexec_fn = _gitenv(repo_dir))
905 _git_wait('git update-ref', p)
908 def delete_ref(refname):
909 """Delete a repository reference."""
910 assert(refname.startswith('refs/'))
911 p = subprocess.Popen(['git', 'update-ref', '-d', refname],
912 preexec_fn = _gitenv())
913 _git_wait('git update-ref', p)
916 def guess_repo(path=None):
917 """Set the path value in the global variable "repodir".
918 This makes bup look for an existing bup repository, but not fail if a
919 repository doesn't exist. Usually, if you are interacting with a bup
920 repository, you would not be calling this function but using
927 repodir = os.environ.get('BUP_DIR')
929 repodir = os.path.expanduser('~/.bup')
932 def init_repo(path=None):
933 """Create the Git bare repository for bup in a given path."""
935 d = repo() # appends a / to the path
936 parent = os.path.dirname(os.path.dirname(d))
937 if parent and not os.path.exists(parent):
938 raise GitError('parent directory "%s" does not exist\n' % parent)
939 if os.path.exists(d) and not os.path.isdir(os.path.join(d, '.')):
940 raise GitError('"%s" exists but is not a directory\n' % d)
941 p = subprocess.Popen(['git', '--bare', 'init'], stdout=sys.stderr,
942 preexec_fn = _gitenv())
943 _git_wait('git init', p)
944 # Force the index version configuration in order to ensure bup works
945 # regardless of the version of the installed Git binary.
946 p = subprocess.Popen(['git', 'config', 'pack.indexVersion', '2'],
947 stdout=sys.stderr, preexec_fn = _gitenv())
948 _git_wait('git config', p)
950 p = subprocess.Popen(['git', 'config', 'core.logAllRefUpdates', 'true'],
951 stdout=sys.stderr, preexec_fn = _gitenv())
952 _git_wait('git config', p)
955 def check_repo_or_die(path=None):
956 """Make sure a bup repository exists, and abort if not.
957 If the path to a particular repository was not specified, this function
958 initializes the default repository automatically.
962 os.stat(repo('objects/pack/.'))
964 if e.errno == errno.ENOENT:
965 log('error: %r is not a bup repository; run "bup init"\n'
969 log('error: %s\n' % e)
975 """Get Git's version and ensure a usable version is installed.
977 The returned version is formatted as an ordered tuple with each position
978 representing a digit in the version tag. For example, the following tuple
979 would represent version 1.6.6.9:
985 p = subprocess.Popen(['git', '--version'],
986 stdout=subprocess.PIPE)
987 gvs = p.stdout.read()
988 _git_wait('git --version', p)
989 m = re.match(r'git version (\S+.\S+)', gvs)
991 raise GitError('git --version weird output: %r' % gvs)
992 _ver = tuple(m.group(1).split('.'))
993 needed = ('1','5', '3', '1')
995 raise GitError('git version %s or higher is required; you have %s'
996 % ('.'.join(needed), '.'.join(_ver)))
1000 def _git_wait(cmd, p):
1003 raise GitError('%s returned %d' % (cmd, rv))
1006 def _git_capture(argv):
1007 p = subprocess.Popen(argv, stdout=subprocess.PIPE, preexec_fn = _gitenv())
1009 _git_wait(repr(argv), p)
1013 class _AbortableIter:
1014 def __init__(self, it, onabort = None):
1016 self.onabort = onabort
1024 return self.it.next()
1025 except StopIteration as e:
1033 """Abort iteration and call the abortion callback, if needed."""
1045 """Link to 'git cat-file' that is used to retrieve blob data."""
1046 def __init__(self, repo_dir = None):
1048 self.repo_dir = repo_dir
1049 wanted = ('1','5','6')
1052 log('warning: git version < %s; bup will be slow.\n'
1055 self.get = self._slow_get
1057 self.p = self.inprogress = None
1058 self.get = self._fast_get
1062 self.p.stdout.close()
1063 self.p.stdin.close()
1065 self.inprogress = None
1069 self.p = subprocess.Popen(['git', 'cat-file', '--batch'],
1070 stdin=subprocess.PIPE,
1071 stdout=subprocess.PIPE,
1074 preexec_fn = _gitenv(self.repo_dir))
1076 def _fast_get(self, id):
1077 if not self.p or self.p.poll() != None:
1080 poll_result = self.p.poll()
1081 assert(poll_result == None)
1083 log('_fast_get: opening %r while %r is open\n'
1084 % (id, self.inprogress))
1085 assert(not self.inprogress)
1086 assert(id.find('\n') < 0)
1087 assert(id.find('\r') < 0)
1088 assert(not id.startswith('-'))
1089 self.inprogress = id
1090 self.p.stdin.write('%s\n' % id)
1091 self.p.stdin.flush()
1092 hdr = self.p.stdout.readline()
1093 if hdr.endswith(' missing\n'):
1094 self.inprogress = None
1095 raise KeyError('blob %r is missing' % id)
1096 spl = hdr.split(' ')
1097 if len(spl) != 3 or len(spl[0]) != 40:
1098 raise GitError('expected blob, got %r' % spl)
1099 (hex, type, size) = spl
1101 it = _AbortableIter(chunkyreader(self.p.stdout, int(spl[2])),
1102 onabort = self._abort)
1107 readline_result = self.p.stdout.readline()
1108 assert(readline_result == '\n')
1109 self.inprogress = None
1110 except Exception as e:
1114 def _slow_get(self, id):
1115 assert(id.find('\n') < 0)
1116 assert(id.find('\r') < 0)
1117 assert(id[0] != '-')
1118 type = _git_capture(['git', 'cat-file', '-t', id]).strip()
1121 p = subprocess.Popen(['git', 'cat-file', type, id],
1122 stdout=subprocess.PIPE,
1123 preexec_fn = _gitenv(self.repo_dir))
1124 for blob in chunkyreader(p.stdout):
1126 _git_wait('git cat-file', p)
1128 def _join(self, it):
1133 elif type == 'tree':
1134 treefile = ''.join(it)
1135 for (mode, name, sha) in tree_decode(treefile):
1136 for blob in self.join(sha.encode('hex')):
1138 elif type == 'commit':
1139 treeline = ''.join(it).split('\n')[0]
1140 assert(treeline.startswith('tree '))
1141 for blob in self.join(treeline[5:]):
1144 raise GitError('invalid object type %r: expected blob/tree/commit'
1148 """Generate a list of the content of all blobs that can be reached
1149 from an object. The hash given in 'id' must point to a blob, a tree
1150 or a commit. The content of all blobs that can be seen from trees or
1151 commits will be added to the list.
1154 for d in self._join(self.get(id)):
1156 except StopIteration:
1162 def cp(repo_dir=None):
1163 """Create a CatPipe object or reuse the already existing one."""
1167 repo_dir = os.path.abspath(repo_dir)
1168 cp = _cp.get(repo_dir)
1170 cp = CatPipe(repo_dir)
1175 def tags(repo_dir = None):
1176 """Return a dictionary of all tags in the form {hash: [tag_names, ...]}."""
1178 for n, c in list_refs(repo_dir = repo_dir, limit_to_tags=True):
1179 assert(n.startswith('refs/tags/'))
1183 tags[c].append(name) # more than one tag can point at 'c'