1 """Git interaction library.
2 bup repositories are in Git format. This library allows us to
3 interact with the Git data structures.
6 import errno, os, sys, zlib, time, subprocess, struct, stat, re, tempfile, glob
7 from collections import namedtuple
8 from itertools import islice
10 from bup import _helpers, path, midx, bloom, xstat
11 from bup.helpers import (Sha1, add_error, chunkyreader, debug1, debug2,
13 hostname, log, merge_iter, mmap_read, mmap_readwrite,
14 progress, qprogress, unlink, username, userfullname,
18 max_pack_size = 1000*1000*1000 # larger packs will slow down pruning
19 max_pack_objects = 200*1000 # cache memory usage is about 83 bytes per object
25 _typemap = { 'blob':3, 'tree':2, 'commit':1, 'tag':4 }
26 _typermap = { 3:'blob', 2:'tree', 1:'commit', 4:'tag' }
32 class GitError(Exception):
36 def parse_tz_offset(s):
37 """UTC offset in seconds."""
38 tz_off = (int(s[1:3]) * 60 * 60) + (int(s[3:5]) * 60)
44 # FIXME: derived from http://git.rsbx.net/Documents/Git_Data_Formats.txt
45 # Make sure that's authoritative.
46 _start_end_char = r'[^ .,:;<>"\'\0\n]'
47 _content_char = r'[^\0\n<>]'
48 _safe_str_rx = '(?:%s{1,2}|(?:%s%s*%s))' \
50 _start_end_char, _content_char, _start_end_char)
51 _tz_rx = r'[-+]\d\d[0-5]\d'
52 _parent_rx = r'(?:parent [abcdefABCDEF0123456789]{40}\n)'
53 _commit_rx = re.compile(r'''tree (?P<tree>[abcdefABCDEF0123456789]{40})
54 (?P<parents>%s*)author (?P<author_name>%s) <(?P<author_mail>%s)> (?P<asec>\d+) (?P<atz>%s)
55 committer (?P<committer_name>%s) <(?P<committer_mail>%s)> (?P<csec>\d+) (?P<ctz>%s)
57 (?P<message>(?:.|\n)*)''' % (_parent_rx,
58 _safe_str_rx, _safe_str_rx, _tz_rx,
59 _safe_str_rx, _safe_str_rx, _tz_rx))
60 _parent_hash_rx = re.compile(r'\s*parent ([abcdefABCDEF0123456789]{40})\s*')
63 # Note that the author_sec and committer_sec values are (UTC) epoch seconds.
64 CommitInfo = namedtuple('CommitInfo', ['tree', 'parents',
65 'author_name', 'author_mail',
66 'author_sec', 'author_offset',
67 'committer_name', 'committer_mail',
68 'committer_sec', 'committer_offset',
71 def parse_commit(content):
72 commit_match = re.match(_commit_rx, content)
74 raise Exception('cannot parse commit %r' % content)
75 matches = commit_match.groupdict()
76 return CommitInfo(tree=matches['tree'],
77 parents=re.findall(_parent_hash_rx, matches['parents']),
78 author_name=matches['author_name'],
79 author_mail=matches['author_mail'],
80 author_sec=int(matches['asec']),
81 author_offset=parse_tz_offset(matches['atz']),
82 committer_name=matches['committer_name'],
83 committer_mail=matches['committer_mail'],
84 committer_sec=int(matches['csec']),
85 committer_offset=parse_tz_offset(matches['ctz']),
86 message=matches['message'])
89 def get_commit_items(id, cp):
90 commit_it = cp.get(id)
91 assert(commit_it.next() == 'commit')
92 commit_content = ''.join(commit_it)
93 return parse_commit(commit_content)
96 def _local_git_date_str(epoch_sec):
97 return '%d %s' % (epoch_sec, utc_offset_str(epoch_sec))
100 def _git_date_str(epoch_sec, tz_offset_sec):
101 offs = tz_offset_sec // 60
102 return '%d %s%02d%02d' \
104 '+' if offs >= 0 else '-',
109 def repo(sub = '', repo_dir=None):
110 """Get the path to the git repository or one of its subdirectories."""
112 repo_dir = repo_dir or repodir
114 raise GitError('You should call check_repo_or_die()')
116 # If there's a .git subdirectory, then the actual repo is in there.
117 gd = os.path.join(repo_dir, '.git')
118 if os.path.exists(gd):
121 return os.path.join(repo_dir, sub)
125 return re.sub(r'([^0-9a-z]|\b)([0-9a-z]{7})[0-9a-z]{33}([^0-9a-z]|\b)',
130 full = os.path.abspath(path)
131 fullrepo = os.path.abspath(repo(''))
132 if not fullrepo.endswith('/'):
134 if full.startswith(fullrepo):
135 path = full[len(fullrepo):]
136 if path.startswith('index-cache/'):
137 path = path[len('index-cache/'):]
138 return shorten_hash(path)
142 paths = [repo('objects/pack')]
143 paths += glob.glob(repo('index-cache/*/.'))
147 def auto_midx(objdir):
148 args = [path.exe(), 'midx', '--auto', '--dir', objdir]
150 rv = subprocess.call(args, stdout=open('/dev/null', 'w'))
152 # make sure 'args' gets printed to help with debugging
153 add_error('%r: exception: %s' % (args, e))
156 add_error('%r: returned %d' % (args, rv))
158 args = [path.exe(), 'bloom', '--dir', objdir]
160 rv = subprocess.call(args, stdout=open('/dev/null', 'w'))
162 # make sure 'args' gets printed to help with debugging
163 add_error('%r: exception: %s' % (args, e))
166 add_error('%r: returned %d' % (args, rv))
169 def mangle_name(name, mode, gitmode):
170 """Mangle a file name to present an abstract name for segmented files.
171 Mangled file names will have the ".bup" extension added to them. If a
172 file's name already ends with ".bup", a ".bupl" extension is added to
173 disambiguate normal files from segmented ones.
175 if stat.S_ISREG(mode) and not stat.S_ISREG(gitmode):
176 assert(stat.S_ISDIR(gitmode))
178 elif name.endswith('.bup') or name[:-1].endswith('.bup'):
179 return name + '.bupl'
184 (BUP_NORMAL, BUP_CHUNKED) = (0,1)
185 def demangle_name(name, mode):
186 """Remove name mangling from a file name, if necessary.
188 The return value is a tuple (demangled_filename,mode), where mode is one of
191 * BUP_NORMAL : files that should be read as-is from the repository
192 * BUP_CHUNKED : files that were chunked and need to be reassembled
194 For more information on the name mangling algorithm, see mangle_name()
196 if name.endswith('.bupl'):
197 return (name[:-5], BUP_NORMAL)
198 elif name.endswith('.bup'):
199 return (name[:-4], BUP_CHUNKED)
200 elif name.endswith('.bupm'):
202 BUP_CHUNKED if stat.S_ISDIR(mode) else BUP_NORMAL)
204 return (name, BUP_NORMAL)
207 def calc_hash(type, content):
208 """Calculate some content's hash in the Git fashion."""
209 header = '%s %d\0' % (type, len(content))
215 def shalist_item_sort_key(ent):
216 (mode, name, id) = ent
217 assert(mode+0 == mode)
218 if stat.S_ISDIR(mode):
224 def tree_encode(shalist):
225 """Generate a git tree object from (mode,name,hash) tuples."""
226 shalist = sorted(shalist, key = shalist_item_sort_key)
228 for (mode,name,bin) in shalist:
230 assert(mode+0 == mode)
232 assert(len(bin) == 20)
233 s = '%o %s\0%s' % (mode,name,bin)
234 assert(s[0] != '0') # 0-padded octal is not acceptable in a git tree
239 def tree_decode(buf):
240 """Generate a list of (mode,name,hash) from the git tree object in buf."""
242 while ofs < len(buf):
243 z = buf.find('\0', ofs)
245 spl = buf[ofs:z].split(' ', 1)
246 assert(len(spl) == 2)
248 sha = buf[z+1:z+1+20]
250 yield (int(mode, 8), name, sha)
253 def _encode_packobj(type, content, compression_level=1):
256 szbits = (sz & 0x0f) | (_typemap[type]<<4)
259 if sz: szbits |= 0x80
265 if compression_level > 9:
266 compression_level = 9
267 elif compression_level < 0:
268 compression_level = 0
269 z = zlib.compressobj(compression_level)
271 yield z.compress(content)
275 def _encode_looseobj(type, content, compression_level=1):
276 z = zlib.compressobj(compression_level)
277 yield z.compress('%s %d\0' % (type, len(content)))
278 yield z.compress(content)
282 def _decode_looseobj(buf):
284 s = zlib.decompress(buf)
291 assert(type in _typemap)
292 assert(sz == len(content))
293 return (type, content)
296 def _decode_packobj(buf):
299 type = _typermap[(c & 0x70) >> 4]
306 sz |= (c & 0x7f) << shift
310 return (type, zlib.decompress(buf[i+1:]))
317 def find_offset(self, hash):
318 """Get the offset of an object inside the index file."""
319 idx = self._idx_from_hash(hash)
321 return self._ofs_from_idx(idx)
324 def exists(self, hash, want_source=False):
325 """Return nonempty if the object exists in this index."""
326 if hash and (self._idx_from_hash(hash) != None):
327 return want_source and os.path.basename(self.name) or True
331 return int(self.fanout[255])
333 def _idx_from_hash(self, hash):
334 global _total_searches, _total_steps
336 assert(len(hash) == 20)
338 start = self.fanout[b1-1] # range -1..254
339 end = self.fanout[b1] # range 0..255
341 _total_steps += 1 # lookup table is a step
344 mid = start + (end-start)/2
345 v = self._idx_to_hash(mid)
355 class PackIdxV1(PackIdx):
356 """Object representation of a Git pack index (version 1) file."""
357 def __init__(self, filename, f):
359 self.idxnames = [self.name]
360 self.map = mmap_read(f)
361 self.fanout = list(struct.unpack('!256I',
362 str(buffer(self.map, 0, 256*4))))
363 self.fanout.append(0) # entry "-1"
364 nsha = self.fanout[255]
366 self.shatable = buffer(self.map, self.sha_ofs, nsha*24)
368 def _ofs_from_idx(self, idx):
369 return struct.unpack('!I', str(self.shatable[idx*24 : idx*24+4]))[0]
371 def _idx_to_hash(self, idx):
372 return str(self.shatable[idx*24+4 : idx*24+24])
375 for i in xrange(self.fanout[255]):
376 yield buffer(self.map, 256*4 + 24*i + 4, 20)
379 class PackIdxV2(PackIdx):
380 """Object representation of a Git pack index (version 2) file."""
381 def __init__(self, filename, f):
383 self.idxnames = [self.name]
384 self.map = mmap_read(f)
385 assert(str(self.map[0:8]) == '\377tOc\0\0\0\2')
386 self.fanout = list(struct.unpack('!256I',
387 str(buffer(self.map, 8, 256*4))))
388 self.fanout.append(0) # entry "-1"
389 nsha = self.fanout[255]
390 self.sha_ofs = 8 + 256*4
391 self.shatable = buffer(self.map, self.sha_ofs, nsha*20)
392 self.ofstable = buffer(self.map,
393 self.sha_ofs + nsha*20 + nsha*4,
395 self.ofs64table = buffer(self.map,
396 8 + 256*4 + nsha*20 + nsha*4 + nsha*4)
398 def _ofs_from_idx(self, idx):
399 ofs = struct.unpack('!I', str(buffer(self.ofstable, idx*4, 4)))[0]
401 idx64 = ofs & 0x7fffffff
402 ofs = struct.unpack('!Q',
403 str(buffer(self.ofs64table, idx64*8, 8)))[0]
406 def _idx_to_hash(self, idx):
407 return str(self.shatable[idx*20:(idx+1)*20])
410 for i in xrange(self.fanout[255]):
411 yield buffer(self.map, 8 + 256*4 + 20*i, 20)
416 def __init__(self, dir):
418 assert(_mpi_count == 0) # these things suck tons of VM; don't waste it
423 self.do_bloom = False
430 assert(_mpi_count == 0)
433 return iter(idxmerge(self.packs))
436 return sum(len(pack) for pack in self.packs)
438 def exists(self, hash, want_source=False):
439 """Return nonempty if the object exists in the index files."""
440 global _total_searches
442 if hash in self.also:
444 if self.do_bloom and self.bloom:
445 if self.bloom.exists(hash):
446 self.do_bloom = False
448 _total_searches -= 1 # was counted by bloom
450 for i in xrange(len(self.packs)):
452 _total_searches -= 1 # will be incremented by sub-pack
453 ix = p.exists(hash, want_source=want_source)
455 # reorder so most recently used packs are searched first
456 self.packs = [p] + self.packs[:i] + self.packs[i+1:]
461 def refresh(self, skip_midx = False):
462 """Refresh the index list.
463 This method verifies if .midx files were superseded (e.g. all of its
464 contents are in another, bigger .midx file) and removes the superseded
467 If skip_midx is True, all work on .midx files will be skipped and .midx
468 files will be removed from the list.
470 The module-global variable 'ignore_midx' can force this function to
471 always act as if skip_midx was True.
473 self.bloom = None # Always reopen the bloom as it may have been relaced
474 self.do_bloom = False
475 skip_midx = skip_midx or ignore_midx
476 d = dict((p.name, p) for p in self.packs
477 if not skip_midx or not isinstance(p, midx.PackMidx))
478 if os.path.exists(self.dir):
481 for ix in self.packs:
482 if isinstance(ix, midx.PackMidx):
483 for name in ix.idxnames:
484 d[os.path.join(self.dir, name)] = ix
485 for full in glob.glob(os.path.join(self.dir,'*.midx')):
487 mx = midx.PackMidx(full)
488 (mxd, mxf) = os.path.split(mx.name)
490 for n in mx.idxnames:
491 if not os.path.exists(os.path.join(mxd, n)):
492 log(('warning: index %s missing\n' +
493 ' used by %s\n') % (n, mxf))
501 midxl.sort(key=lambda ix:
502 (-len(ix), -xstat.stat(ix.name).st_mtime))
505 for sub in ix.idxnames:
506 found = d.get(os.path.join(self.dir, sub))
507 if not found or isinstance(found, PackIdx):
508 # doesn't exist, or exists but not in a midx
513 for name in ix.idxnames:
514 d[os.path.join(self.dir, name)] = ix
515 elif not ix.force_keep:
516 debug1('midx: removing redundant: %s\n'
517 % os.path.basename(ix.name))
520 for full in glob.glob(os.path.join(self.dir,'*.idx')):
524 except GitError as e:
528 bfull = os.path.join(self.dir, 'bup.bloom')
529 if self.bloom is None and os.path.exists(bfull):
530 self.bloom = bloom.ShaBloom(bfull)
531 self.packs = list(set(d.values()))
532 self.packs.sort(lambda x,y: -cmp(len(x),len(y)))
533 if self.bloom and self.bloom.valid() and len(self.bloom) >= len(self):
537 debug1('PackIdxList: using %d index%s.\n'
538 % (len(self.packs), len(self.packs)!=1 and 'es' or ''))
541 """Insert an additional object in the list."""
545 def open_idx(filename):
546 if filename.endswith('.idx'):
547 f = open(filename, 'rb')
549 if header[0:4] == '\377tOc':
550 version = struct.unpack('!I', header[4:8])[0]
552 return PackIdxV2(filename, f)
554 raise GitError('%s: expected idx file version 2, got %d'
555 % (filename, version))
556 elif len(header) == 8 and header[0:4] < '\377tOc':
557 return PackIdxV1(filename, f)
559 raise GitError('%s: unrecognized idx file header' % filename)
560 elif filename.endswith('.midx'):
561 return midx.PackMidx(filename)
563 raise GitError('idx filenames must end with .idx or .midx')
566 def idxmerge(idxlist, final_progress=True):
567 """Generate a list of all the objects reachable in a PackIdxList."""
568 def pfunc(count, total):
569 qprogress('Reading indexes: %.2f%% (%d/%d)\r'
570 % (count*100.0/total, count, total))
571 def pfinal(count, total):
573 progress('Reading indexes: %.2f%% (%d/%d), done.\n'
574 % (100, total, total))
575 return merge_iter(idxlist, 10024, pfunc, pfinal)
578 def _make_objcache():
579 return PackIdxList(repo('objects/pack'))
582 """Writes Git objects inside a pack file."""
583 def __init__(self, objcache_maker=_make_objcache, compression_level=1):
590 self.objcache_maker = objcache_maker
592 self.compression_level = compression_level
599 objdir = dir=repo('objects')
600 fd, name = tempfile.mkstemp(suffix='.pack', dir=objdir)
602 self.file = os.fdopen(fd, 'w+b')
607 self.parentfd = os.open(objdir, os.O_RDONLY)
613 assert(name.endswith('.pack'))
614 self.filename = name[:-5]
615 self.file.write('PACK\0\0\0\2\0\0\0\0')
616 self.idx = list(list() for i in xrange(256))
618 def _raw_write(self, datalist, sha):
621 # in case we get interrupted (eg. KeyboardInterrupt), it's best if
622 # the file never has a *partial* blob. So let's make sure it's
623 # all-or-nothing. (The blob shouldn't be very big anyway, thanks
624 # to our hashsplit algorithm.) f.write() does its own buffering,
625 # but that's okay because we'll flush it in _end().
626 oneblob = ''.join(datalist)
630 raise GitError, e, sys.exc_info()[2]
632 crc = zlib.crc32(oneblob) & 0xffffffff
633 self._update_idx(sha, crc, nw)
638 def _update_idx(self, sha, crc, size):
641 self.idx[ord(sha[0])].append((sha, crc, self.file.tell() - size))
643 def _write(self, sha, type, content):
647 sha = calc_hash(type, content)
648 size, crc = self._raw_write(_encode_packobj(type, content,
649 self.compression_level),
651 if self.outbytes >= max_pack_size or self.count >= max_pack_objects:
655 def breakpoint(self):
656 """Clear byte and object counts and return the last processed id."""
658 self.outbytes = self.count = 0
661 def _require_objcache(self):
662 if self.objcache is None and self.objcache_maker:
663 self.objcache = self.objcache_maker()
664 if self.objcache is None:
666 "PackWriter not opened or can't check exists w/o objcache")
668 def exists(self, id, want_source=False):
669 """Return non-empty if an object is found in the object cache."""
670 self._require_objcache()
671 return self.objcache.exists(id, want_source=want_source)
673 def maybe_write(self, type, content):
674 """Write an object to the pack file if not present and return its id."""
675 sha = calc_hash(type, content)
676 if not self.exists(sha):
677 self._write(sha, type, content)
678 self._require_objcache()
679 self.objcache.add(sha)
682 def new_blob(self, blob):
683 """Create a blob object in the pack with the supplied content."""
684 return self.maybe_write('blob', blob)
686 def new_tree(self, shalist):
687 """Create a tree object in the pack."""
688 content = tree_encode(shalist)
689 return self.maybe_write('tree', content)
691 def new_commit(self, tree, parent,
692 author, adate_sec, adate_tz,
693 committer, cdate_sec, cdate_tz,
695 """Create a commit object in the pack. The date_sec values must be
696 epoch-seconds, and if a tz is None, the local timezone is assumed."""
698 adate_str = _git_date_str(adate_sec, adate_tz)
700 adate_str = _local_git_date_str(adate_sec)
702 cdate_str = _git_date_str(cdate_sec, cdate_tz)
704 cdate_str = _local_git_date_str(cdate_sec)
706 if tree: l.append('tree %s' % tree.encode('hex'))
707 if parent: l.append('parent %s' % parent.encode('hex'))
708 if author: l.append('author %s %s' % (author, adate_str))
709 if committer: l.append('committer %s %s' % (committer, cdate_str))
712 return self.maybe_write('commit', '\n'.join(l))
715 """Remove the pack file from disk."""
724 os.unlink(self.filename + '.pack')
731 def _end(self, run_midx=True):
733 if not f: return None
740 # update object count
742 cp = struct.pack('!i', self.count)
746 # calculate the pack sha1sum
749 for b in chunkyreader(f):
751 packbin = sum.digest()
753 fdatasync(f.fileno())
757 obj_list_sha = self._write_pack_idx_v2(self.filename + '.idx', idx, packbin)
759 nameprefix = repo('objects/pack/pack-%s' % obj_list_sha)
760 if os.path.exists(self.filename + '.map'):
761 os.unlink(self.filename + '.map')
762 os.rename(self.filename + '.pack', nameprefix + '.pack')
763 os.rename(self.filename + '.idx', nameprefix + '.idx')
765 os.fsync(self.parentfd)
767 os.close(self.parentfd)
770 auto_midx(repo('objects/pack'))
773 def close(self, run_midx=True):
774 """Close the pack file and move it to its definitive path."""
775 return self._end(run_midx=run_midx)
777 def _write_pack_idx_v2(self, filename, idx, packbin):
780 for entry in section:
781 if entry[2] >= 2**31:
784 # Length: header + fan-out + shas-and-crcs + overflow-offsets
785 index_len = 8 + (4 * 256) + (28 * self.count) + (8 * ofs64_count)
787 idx_f = open(filename, 'w+b')
789 idx_f.truncate(index_len)
790 fdatasync(idx_f.fileno())
791 idx_map = mmap_readwrite(idx_f, close=False)
793 count = _helpers.write_idx(filename, idx_map, idx, self.count)
794 assert(count == self.count)
801 idx_f = open(filename, 'a+b')
806 b = idx_f.read(8 + 4*256)
809 obj_list_sum = Sha1()
810 for b in chunkyreader(idx_f, 20*self.count):
812 obj_list_sum.update(b)
813 namebase = obj_list_sum.hexdigest()
815 for b in chunkyreader(idx_f):
817 idx_f.write(idx_sum.digest())
818 fdatasync(idx_f.fileno())
824 def _gitenv(repo_dir = None):
828 os.environ['GIT_DIR'] = os.path.abspath(repo_dir)
832 def list_refs(refname=None, repo_dir=None,
833 limit_to_heads=False, limit_to_tags=False):
834 """Yield (refname, hash) tuples for all repository refs unless a ref
835 name is specified. Given a ref name, only include tuples for that
836 particular ref. The limits restrict the result items to
837 refs/heads or refs/tags. If both limits are specified, items from
838 both sources will be included.
841 argv = ['git', 'show-ref']
843 argv.append('--heads')
845 argv.append('--tags')
849 p = subprocess.Popen(argv,
850 preexec_fn = _gitenv(repo_dir),
851 stdout = subprocess.PIPE)
852 out = p.stdout.read().strip()
853 rv = p.wait() # not fatal
857 for d in out.split('\n'):
858 (sha, name) = d.split(' ', 1)
859 yield (name, sha.decode('hex'))
862 def read_ref(refname, repo_dir = None):
863 """Get the commit id of the most recent commit made on a given ref."""
864 refs = list_refs(refname, repo_dir=repo_dir, limit_to_heads=True)
865 l = tuple(islice(refs, 2))
873 def rev_list(ref, count=None, repo_dir=None):
874 """Generate a list of reachable commits in reverse chronological order.
876 This generator walks through commits, from child to parent, that are
877 reachable via the specified ref and yields a series of tuples of the form
880 If count is a non-zero integer, limit the number of commits to "count"
883 assert(not ref.startswith('-'))
886 opts += ['-n', str(atoi(count))]
887 argv = ['git', 'rev-list', '--pretty=format:%at'] + opts + [ref, '--']
888 p = subprocess.Popen(argv,
889 preexec_fn = _gitenv(repo_dir),
890 stdout = subprocess.PIPE)
894 if s.startswith('commit '):
895 commit = s[7:].decode('hex')
899 rv = p.wait() # not fatal
901 raise GitError, 'git rev-list returned error %d' % rv
904 def get_commit_dates(refs, repo_dir=None):
905 """Get the dates for the specified commit refs. For now, every unique
906 string in refs must resolve to a different commit or this
907 function will fail."""
910 commit = get_commit_items(ref, cp(repo_dir))
911 result.append(commit.author_sec)
915 def rev_parse(committish, repo_dir=None):
916 """Resolve the full hash for 'committish', if it exists.
918 Should be roughly equivalent to 'git rev-parse'.
920 Returns the hex value of the hash if it is found, None if 'committish' does
921 not correspond to anything.
923 head = read_ref(committish, repo_dir=repo_dir)
925 debug2("resolved from ref: commit = %s\n" % head.encode('hex'))
928 pL = PackIdxList(repo('objects/pack', repo_dir=repo_dir))
930 if len(committish) == 40:
932 hash = committish.decode('hex')
942 def update_ref(refname, newval, oldval, repo_dir=None):
943 """Update a repository reference."""
946 assert(refname.startswith('refs/heads/') \
947 or refname.startswith('refs/tags/'))
948 p = subprocess.Popen(['git', 'update-ref', refname,
949 newval.encode('hex'), oldval.encode('hex')],
950 preexec_fn = _gitenv(repo_dir))
951 _git_wait('git update-ref', p)
954 def delete_ref(refname):
955 """Delete a repository reference."""
956 assert(refname.startswith('refs/'))
957 p = subprocess.Popen(['git', 'update-ref', '-d', refname],
958 preexec_fn = _gitenv())
959 _git_wait('git update-ref', p)
962 def guess_repo(path=None):
963 """Set the path value in the global variable "repodir".
964 This makes bup look for an existing bup repository, but not fail if a
965 repository doesn't exist. Usually, if you are interacting with a bup
966 repository, you would not be calling this function but using
973 repodir = os.environ.get('BUP_DIR')
975 repodir = os.path.expanduser('~/.bup')
978 def init_repo(path=None):
979 """Create the Git bare repository for bup in a given path."""
981 d = repo() # appends a / to the path
982 parent = os.path.dirname(os.path.dirname(d))
983 if parent and not os.path.exists(parent):
984 raise GitError('parent directory "%s" does not exist\n' % parent)
985 if os.path.exists(d) and not os.path.isdir(os.path.join(d, '.')):
986 raise GitError('"%s" exists but is not a directory\n' % d)
987 p = subprocess.Popen(['git', '--bare', 'init'], stdout=sys.stderr,
988 preexec_fn = _gitenv())
989 _git_wait('git init', p)
990 # Force the index version configuration in order to ensure bup works
991 # regardless of the version of the installed Git binary.
992 p = subprocess.Popen(['git', 'config', 'pack.indexVersion', '2'],
993 stdout=sys.stderr, preexec_fn = _gitenv())
994 _git_wait('git config', p)
996 p = subprocess.Popen(['git', 'config', 'core.logAllRefUpdates', 'true'],
997 stdout=sys.stderr, preexec_fn = _gitenv())
998 _git_wait('git config', p)
1001 def check_repo_or_die(path=None):
1002 """Make sure a bup repository exists, and abort if not.
1003 If the path to a particular repository was not specified, this function
1004 initializes the default repository automatically.
1008 os.stat(repo('objects/pack/.'))
1009 except OSError as e:
1010 if e.errno == errno.ENOENT:
1011 log('error: %r is not a bup repository; run "bup init"\n'
1015 log('error: %s\n' % e)
1021 """Get Git's version and ensure a usable version is installed.
1023 The returned version is formatted as an ordered tuple with each position
1024 representing a digit in the version tag. For example, the following tuple
1025 would represent version 1.6.6.9:
1027 ('1', '6', '6', '9')
1031 p = subprocess.Popen(['git', '--version'],
1032 stdout=subprocess.PIPE)
1033 gvs = p.stdout.read()
1034 _git_wait('git --version', p)
1035 m = re.match(r'git version (\S+.\S+)', gvs)
1037 raise GitError('git --version weird output: %r' % gvs)
1038 _ver = tuple(m.group(1).split('.'))
1039 needed = ('1','5', '3', '1')
1041 raise GitError('git version %s or higher is required; you have %s'
1042 % ('.'.join(needed), '.'.join(_ver)))
1046 def _git_wait(cmd, p):
1049 raise GitError('%s returned %d' % (cmd, rv))
1052 def _git_capture(argv):
1053 p = subprocess.Popen(argv, stdout=subprocess.PIPE, preexec_fn = _gitenv())
1055 _git_wait(repr(argv), p)
1059 class _AbortableIter:
1060 def __init__(self, it, onabort = None):
1062 self.onabort = onabort
1070 return self.it.next()
1071 except StopIteration as e:
1079 """Abort iteration and call the abortion callback, if needed."""
1091 """Link to 'git cat-file' that is used to retrieve blob data."""
1092 def __init__(self, repo_dir = None):
1094 self.repo_dir = repo_dir
1095 wanted = ('1','5','6')
1098 log('warning: git version < %s; bup will be slow.\n'
1101 self.get = self._slow_get
1103 self.p = self.inprogress = None
1104 self.get = self._fast_get
1108 self.p.stdout.close()
1109 self.p.stdin.close()
1111 self.inprogress = None
1115 self.p = subprocess.Popen(['git', 'cat-file', '--batch'],
1116 stdin=subprocess.PIPE,
1117 stdout=subprocess.PIPE,
1120 preexec_fn = _gitenv(self.repo_dir))
1122 def _fast_get(self, id):
1123 if not self.p or self.p.poll() != None:
1126 poll_result = self.p.poll()
1127 assert(poll_result == None)
1129 log('_fast_get: opening %r while %r is open\n'
1130 % (id, self.inprogress))
1131 assert(not self.inprogress)
1132 assert(id.find('\n') < 0)
1133 assert(id.find('\r') < 0)
1134 assert(not id.startswith('-'))
1135 self.inprogress = id
1136 self.p.stdin.write('%s\n' % id)
1137 self.p.stdin.flush()
1138 hdr = self.p.stdout.readline()
1139 if hdr.endswith(' missing\n'):
1140 self.inprogress = None
1141 raise KeyError('blob %r is missing' % id)
1142 spl = hdr.split(' ')
1143 if len(spl) != 3 or len(spl[0]) != 40:
1144 raise GitError('expected blob, got %r' % spl)
1145 (hex, type, size) = spl
1147 it = _AbortableIter(chunkyreader(self.p.stdout, int(spl[2])),
1148 onabort = self._abort)
1153 readline_result = self.p.stdout.readline()
1154 assert(readline_result == '\n')
1155 self.inprogress = None
1156 except Exception as e:
1160 def _slow_get(self, id):
1161 assert(id.find('\n') < 0)
1162 assert(id.find('\r') < 0)
1163 assert(id[0] != '-')
1164 type = _git_capture(['git', 'cat-file', '-t', id]).strip()
1167 p = subprocess.Popen(['git', 'cat-file', type, id],
1168 stdout=subprocess.PIPE,
1169 preexec_fn = _gitenv(self.repo_dir))
1170 for blob in chunkyreader(p.stdout):
1172 _git_wait('git cat-file', p)
1174 def _join(self, it):
1179 elif type == 'tree':
1180 treefile = ''.join(it)
1181 for (mode, name, sha) in tree_decode(treefile):
1182 for blob in self.join(sha.encode('hex')):
1184 elif type == 'commit':
1185 treeline = ''.join(it).split('\n')[0]
1186 assert(treeline.startswith('tree '))
1187 for blob in self.join(treeline[5:]):
1190 raise GitError('invalid object type %r: expected blob/tree/commit'
1194 """Generate a list of the content of all blobs that can be reached
1195 from an object. The hash given in 'id' must point to a blob, a tree
1196 or a commit. The content of all blobs that can be seen from trees or
1197 commits will be added to the list.
1200 for d in self._join(self.get(id)):
1202 except StopIteration:
1208 def cp(repo_dir=None):
1209 """Create a CatPipe object or reuse the already existing one."""
1213 repo_dir = os.path.abspath(repo_dir)
1214 cp = _cp.get(repo_dir)
1216 cp = CatPipe(repo_dir)
1221 def tags(repo_dir = None):
1222 """Return a dictionary of all tags in the form {hash: [tag_names, ...]}."""
1224 for n, c in list_refs(repo_dir = repo_dir, limit_to_tags=True):
1225 assert(n.startswith('refs/tags/'))
1229 tags[c].append(name) # more than one tag can point at 'c'