1 """Git interaction library.
2 bup repositories are in Git format. This library allows us to
3 interact with the Git data structures.
6 from __future__ import absolute_import
7 import errno, os, sys, zlib, time, subprocess, struct, stat, re, tempfile, glob
8 from collections import namedtuple
9 from itertools import islice
10 from numbers import Integral
12 from bup import _helpers, compat, hashsplit, path, midx, bloom, xstat
13 from bup.compat import range
14 from bup.helpers import (Sha1, add_error, chunkyreader, debug1, debug2,
16 hostname, localtime, log, merge_iter,
17 mmap_read, mmap_readwrite,
19 progress, qprogress, shstr, stat_if_exists,
20 unlink, username, userfullname,
25 repodir = None # The default repository, once initialized
27 _typemap = { 'blob':3, 'tree':2, 'commit':1, 'tag':4 }
28 _typermap = { 3:'blob', 2:'tree', 1:'commit', 4:'tag' }
34 class GitError(Exception):
38 def _git_wait(cmd, p):
41 raise GitError('%s returned %d' % (shstr(cmd), rv))
43 def _git_capture(argv):
44 p = subprocess.Popen(argv, stdout=subprocess.PIPE, preexec_fn = _gitenv())
46 _git_wait(repr(argv), p)
49 def git_config_get(option, repo_dir=None):
50 cmd = ('git', 'config', '--get', option)
51 p = subprocess.Popen(cmd, stdout=subprocess.PIPE,
52 preexec_fn=_gitenv(repo_dir=repo_dir))
58 raise GitError('%s returned %d' % (cmd, rc))
62 def parse_tz_offset(s):
63 """UTC offset in seconds."""
64 tz_off = (int(s[1:3]) * 60 * 60) + (int(s[3:5]) * 60)
70 # FIXME: derived from http://git.rsbx.net/Documents/Git_Data_Formats.txt
71 # Make sure that's authoritative.
72 _start_end_char = r'[^ .,:;<>"\'\0\n]'
73 _content_char = r'[^\0\n<>]'
74 _safe_str_rx = '(?:%s{1,2}|(?:%s%s*%s))' \
76 _start_end_char, _content_char, _start_end_char)
77 _tz_rx = r'[-+]\d\d[0-5]\d'
78 _parent_rx = r'(?:parent [abcdefABCDEF0123456789]{40}\n)'
79 _commit_rx = re.compile(r'''tree (?P<tree>[abcdefABCDEF0123456789]{40})
80 (?P<parents>%s*)author (?P<author_name>%s) <(?P<author_mail>%s)> (?P<asec>\d+) (?P<atz>%s)
81 committer (?P<committer_name>%s) <(?P<committer_mail>%s)> (?P<csec>\d+) (?P<ctz>%s)
83 (?P<message>(?:.|\n)*)''' % (_parent_rx,
84 _safe_str_rx, _safe_str_rx, _tz_rx,
85 _safe_str_rx, _safe_str_rx, _tz_rx))
86 _parent_hash_rx = re.compile(r'\s*parent ([abcdefABCDEF0123456789]{40})\s*')
89 # Note that the author_sec and committer_sec values are (UTC) epoch seconds.
90 CommitInfo = namedtuple('CommitInfo', ['tree', 'parents',
91 'author_name', 'author_mail',
92 'author_sec', 'author_offset',
93 'committer_name', 'committer_mail',
94 'committer_sec', 'committer_offset',
97 def parse_commit(content):
98 commit_match = re.match(_commit_rx, content)
100 raise Exception('cannot parse commit %r' % content)
101 matches = commit_match.groupdict()
102 return CommitInfo(tree=matches['tree'],
103 parents=re.findall(_parent_hash_rx, matches['parents']),
104 author_name=matches['author_name'],
105 author_mail=matches['author_mail'],
106 author_sec=int(matches['asec']),
107 author_offset=parse_tz_offset(matches['atz']),
108 committer_name=matches['committer_name'],
109 committer_mail=matches['committer_mail'],
110 committer_sec=int(matches['csec']),
111 committer_offset=parse_tz_offset(matches['ctz']),
112 message=matches['message'])
115 def get_commit_items(id, cp):
116 commit_it = cp.get(id)
117 _, typ, _ = next(commit_it)
118 assert(typ == 'commit')
119 commit_content = ''.join(commit_it)
120 return parse_commit(commit_content)
123 def _local_git_date_str(epoch_sec):
124 return '%d %s' % (epoch_sec, utc_offset_str(epoch_sec))
127 def _git_date_str(epoch_sec, tz_offset_sec):
128 offs = tz_offset_sec // 60
129 return '%d %s%02d%02d' \
131 '+' if offs >= 0 else '-',
136 def repo(sub = '', repo_dir=None):
137 """Get the path to the git repository or one of its subdirectories."""
138 repo_dir = repo_dir or repodir
140 raise GitError('You should call check_repo_or_die()')
142 # If there's a .git subdirectory, then the actual repo is in there.
143 gd = os.path.join(repo_dir, '.git')
144 if os.path.exists(gd):
147 return os.path.join(repo_dir, sub)
151 return re.sub(r'([^0-9a-z]|\b)([0-9a-z]{7})[0-9a-z]{33}([^0-9a-z]|\b)',
156 full = os.path.abspath(path)
157 fullrepo = os.path.abspath(repo(''))
158 if not fullrepo.endswith('/'):
160 if full.startswith(fullrepo):
161 path = full[len(fullrepo):]
162 if path.startswith('index-cache/'):
163 path = path[len('index-cache/'):]
164 return shorten_hash(path)
168 paths = [repo('objects/pack')]
169 paths += glob.glob(repo('index-cache/*/.'))
173 def auto_midx(objdir):
174 args = [path.exe(), 'midx', '--auto', '--dir', objdir]
176 rv = subprocess.call(args, stdout=open('/dev/null', 'w'))
178 # make sure 'args' gets printed to help with debugging
179 add_error('%r: exception: %s' % (args, e))
182 add_error('%r: returned %d' % (args, rv))
184 args = [path.exe(), 'bloom', '--dir', objdir]
186 rv = subprocess.call(args, stdout=open('/dev/null', 'w'))
188 # make sure 'args' gets printed to help with debugging
189 add_error('%r: exception: %s' % (args, e))
192 add_error('%r: returned %d' % (args, rv))
195 def mangle_name(name, mode, gitmode):
196 """Mangle a file name to present an abstract name for segmented files.
197 Mangled file names will have the ".bup" extension added to them. If a
198 file's name already ends with ".bup", a ".bupl" extension is added to
199 disambiguate normal files from segmented ones.
201 if stat.S_ISREG(mode) and not stat.S_ISREG(gitmode):
202 assert(stat.S_ISDIR(gitmode))
204 elif name.endswith('.bup') or name[:-1].endswith('.bup'):
205 return name + '.bupl'
210 (BUP_NORMAL, BUP_CHUNKED) = (0,1)
211 def demangle_name(name, mode):
212 """Remove name mangling from a file name, if necessary.
214 The return value is a tuple (demangled_filename,mode), where mode is one of
217 * BUP_NORMAL : files that should be read as-is from the repository
218 * BUP_CHUNKED : files that were chunked and need to be reassembled
220 For more information on the name mangling algorithm, see mangle_name()
222 if name.endswith('.bupl'):
223 return (name[:-5], BUP_NORMAL)
224 elif name.endswith('.bup'):
225 return (name[:-4], BUP_CHUNKED)
226 elif name.endswith('.bupm'):
228 BUP_CHUNKED if stat.S_ISDIR(mode) else BUP_NORMAL)
230 return (name, BUP_NORMAL)
233 def calc_hash(type, content):
234 """Calculate some content's hash in the Git fashion."""
235 header = '%s %d\0' % (type, len(content))
241 def shalist_item_sort_key(ent):
242 (mode, name, id) = ent
243 assert(mode+0 == mode)
244 if stat.S_ISDIR(mode):
250 def tree_encode(shalist):
251 """Generate a git tree object from (mode,name,hash) tuples."""
252 shalist = sorted(shalist, key = shalist_item_sort_key)
254 for (mode,name,bin) in shalist:
256 assert(mode+0 == mode)
258 assert(len(bin) == 20)
259 s = '%o %s\0%s' % (mode,name,bin)
260 assert(s[0] != '0') # 0-padded octal is not acceptable in a git tree
265 def tree_decode(buf):
266 """Generate a list of (mode,name,hash) from the git tree object in buf."""
268 while ofs < len(buf):
269 z = buf.find('\0', ofs)
271 spl = buf[ofs:z].split(' ', 1)
272 assert(len(spl) == 2)
274 sha = buf[z+1:z+1+20]
276 yield (int(mode, 8), name, sha)
279 def _encode_packobj(type, content, compression_level=1):
280 if compression_level not in (0, 1, 2, 3, 4, 5, 6, 7, 8, 9):
281 raise ValueError('invalid compression level %s' % compression_level)
284 szbits = (sz & 0x0f) | (_typemap[type]<<4)
287 if sz: szbits |= 0x80
293 z = zlib.compressobj(compression_level)
295 yield z.compress(content)
299 def _encode_looseobj(type, content, compression_level=1):
300 z = zlib.compressobj(compression_level)
301 yield z.compress('%s %d\0' % (type, len(content)))
302 yield z.compress(content)
306 def _decode_looseobj(buf):
308 s = zlib.decompress(buf)
315 assert(type in _typemap)
316 assert(sz == len(content))
317 return (type, content)
320 def _decode_packobj(buf):
323 type = _typermap[(c & 0x70) >> 4]
330 sz |= (c & 0x7f) << shift
334 return (type, zlib.decompress(buf[i+1:]))
341 def find_offset(self, hash):
342 """Get the offset of an object inside the index file."""
343 idx = self._idx_from_hash(hash)
345 return self._ofs_from_idx(idx)
348 def exists(self, hash, want_source=False):
349 """Return nonempty if the object exists in this index."""
350 if hash and (self._idx_from_hash(hash) != None):
351 return want_source and os.path.basename(self.name) or True
355 return int(self.fanout[255])
357 def _idx_from_hash(self, hash):
358 global _total_searches, _total_steps
360 assert(len(hash) == 20)
362 start = self.fanout[b1-1] # range -1..254
363 end = self.fanout[b1] # range 0..255
365 _total_steps += 1 # lookup table is a step
368 mid = start + (end-start)/2
369 v = self._idx_to_hash(mid)
379 class PackIdxV1(PackIdx):
380 """Object representation of a Git pack index (version 1) file."""
381 def __init__(self, filename, f):
383 self.idxnames = [self.name]
384 self.map = mmap_read(f)
385 self.fanout = list(struct.unpack('!256I',
386 str(buffer(self.map, 0, 256*4))))
387 self.fanout.append(0) # entry "-1"
388 nsha = self.fanout[255]
390 self.shatable = buffer(self.map, self.sha_ofs, nsha*24)
392 def _ofs_from_idx(self, idx):
393 return struct.unpack('!I', str(self.shatable[idx*24 : idx*24+4]))[0]
395 def _idx_to_hash(self, idx):
396 return str(self.shatable[idx*24+4 : idx*24+24])
399 for i in range(self.fanout[255]):
400 yield buffer(self.map, 256*4 + 24*i + 4, 20)
403 class PackIdxV2(PackIdx):
404 """Object representation of a Git pack index (version 2) file."""
405 def __init__(self, filename, f):
407 self.idxnames = [self.name]
408 self.map = mmap_read(f)
409 assert(str(self.map[0:8]) == '\377tOc\0\0\0\2')
410 self.fanout = list(struct.unpack('!256I',
411 str(buffer(self.map, 8, 256*4))))
412 self.fanout.append(0) # entry "-1"
413 nsha = self.fanout[255]
414 self.sha_ofs = 8 + 256*4
415 self.shatable = buffer(self.map, self.sha_ofs, nsha*20)
416 self.ofstable = buffer(self.map,
417 self.sha_ofs + nsha*20 + nsha*4,
419 self.ofs64table = buffer(self.map,
420 8 + 256*4 + nsha*20 + nsha*4 + nsha*4)
422 def _ofs_from_idx(self, idx):
423 ofs = struct.unpack('!I', str(buffer(self.ofstable, idx*4, 4)))[0]
425 idx64 = ofs & 0x7fffffff
426 ofs = struct.unpack('!Q',
427 str(buffer(self.ofs64table, idx64*8, 8)))[0]
430 def _idx_to_hash(self, idx):
431 return str(self.shatable[idx*20:(idx+1)*20])
434 for i in range(self.fanout[255]):
435 yield buffer(self.map, 8 + 256*4 + 20*i, 20)
440 def __init__(self, dir):
442 assert(_mpi_count == 0) # these things suck tons of VM; don't waste it
447 self.do_bloom = False
454 assert(_mpi_count == 0)
457 return iter(idxmerge(self.packs))
460 return sum(len(pack) for pack in self.packs)
462 def exists(self, hash, want_source=False):
463 """Return nonempty if the object exists in the index files."""
464 global _total_searches
466 if hash in self.also:
468 if self.do_bloom and self.bloom:
469 if self.bloom.exists(hash):
470 self.do_bloom = False
472 _total_searches -= 1 # was counted by bloom
474 for i in xrange(len(self.packs)):
476 _total_searches -= 1 # will be incremented by sub-pack
477 ix = p.exists(hash, want_source=want_source)
479 # reorder so most recently used packs are searched first
480 self.packs = [p] + self.packs[:i] + self.packs[i+1:]
485 def refresh(self, skip_midx = False):
486 """Refresh the index list.
487 This method verifies if .midx files were superseded (e.g. all of its
488 contents are in another, bigger .midx file) and removes the superseded
491 If skip_midx is True, all work on .midx files will be skipped and .midx
492 files will be removed from the list.
494 The module-global variable 'ignore_midx' can force this function to
495 always act as if skip_midx was True.
497 self.bloom = None # Always reopen the bloom as it may have been relaced
498 self.do_bloom = False
499 skip_midx = skip_midx or ignore_midx
500 d = dict((p.name, p) for p in self.packs
501 if not skip_midx or not isinstance(p, midx.PackMidx))
502 if os.path.exists(self.dir):
505 for ix in self.packs:
506 if isinstance(ix, midx.PackMidx):
507 for name in ix.idxnames:
508 d[os.path.join(self.dir, name)] = ix
509 for full in glob.glob(os.path.join(self.dir,'*.midx')):
511 mx = midx.PackMidx(full)
512 (mxd, mxf) = os.path.split(mx.name)
514 for n in mx.idxnames:
515 if not os.path.exists(os.path.join(mxd, n)):
516 log(('warning: index %s missing\n' +
517 ' used by %s\n') % (n, mxf))
525 midxl.sort(key=lambda ix:
526 (-len(ix), -xstat.stat(ix.name).st_mtime))
529 for sub in ix.idxnames:
530 found = d.get(os.path.join(self.dir, sub))
531 if not found or isinstance(found, PackIdx):
532 # doesn't exist, or exists but not in a midx
537 for name in ix.idxnames:
538 d[os.path.join(self.dir, name)] = ix
539 elif not ix.force_keep:
540 debug1('midx: removing redundant: %s\n'
541 % os.path.basename(ix.name))
544 for full in glob.glob(os.path.join(self.dir,'*.idx')):
548 except GitError as e:
552 bfull = os.path.join(self.dir, 'bup.bloom')
553 if self.bloom is None and os.path.exists(bfull):
554 self.bloom = bloom.ShaBloom(bfull)
555 self.packs = list(set(d.values()))
556 self.packs.sort(reverse=True, key=lambda x: len(x))
557 if self.bloom and self.bloom.valid() and len(self.bloom) >= len(self):
561 debug1('PackIdxList: using %d index%s.\n'
562 % (len(self.packs), len(self.packs)!=1 and 'es' or ''))
565 """Insert an additional object in the list."""
569 def open_idx(filename):
570 if filename.endswith('.idx'):
571 f = open(filename, 'rb')
573 if header[0:4] == '\377tOc':
574 version = struct.unpack('!I', header[4:8])[0]
576 return PackIdxV2(filename, f)
578 raise GitError('%s: expected idx file version 2, got %d'
579 % (filename, version))
580 elif len(header) == 8 and header[0:4] < '\377tOc':
581 return PackIdxV1(filename, f)
583 raise GitError('%s: unrecognized idx file header' % filename)
584 elif filename.endswith('.midx'):
585 return midx.PackMidx(filename)
587 raise GitError('idx filenames must end with .idx or .midx')
590 def idxmerge(idxlist, final_progress=True):
591 """Generate a list of all the objects reachable in a PackIdxList."""
592 def pfunc(count, total):
593 qprogress('Reading indexes: %.2f%% (%d/%d)\r'
594 % (count*100.0/total, count, total))
595 def pfinal(count, total):
597 progress('Reading indexes: %.2f%% (%d/%d), done.\n'
598 % (100, total, total))
599 return merge_iter(idxlist, 10024, pfunc, pfinal)
602 def _make_objcache():
603 return PackIdxList(repo('objects/pack'))
605 # bup-gc assumes that it can disable all PackWriter activities
606 # (bloom/midx/cache) via the constructor and close() arguments.
609 """Writes Git objects inside a pack file."""
610 def __init__(self, objcache_maker=_make_objcache, compression_level=1,
611 run_midx=True, on_pack_finish=None,
612 max_pack_size=None, max_pack_objects=None, repo_dir=None):
613 self.repo_dir = repo_dir or repo()
620 self.objcache_maker = objcache_maker
622 self.compression_level = compression_level
623 self.run_midx=run_midx
624 self.on_pack_finish = on_pack_finish
625 if not max_pack_size:
626 max_pack_size = git_config_get('pack.packSizeLimit',
627 repo_dir=self.repo_dir)
628 if max_pack_size is not None:
629 max_pack_size = parse_num(max_pack_size)
630 if not max_pack_size:
631 # larger packs slow down pruning
632 max_pack_size = 1000 * 1000 * 1000
633 self.max_pack_size = max_pack_size
634 # cache memory usage is about 83 bytes per object
635 self.max_pack_objects = max_pack_objects if max_pack_objects \
636 else max(1, self.max_pack_size // 5000)
644 def __exit__(self, type, value, traceback):
649 objdir = dir = os.path.join(self.repo_dir, 'objects')
650 fd, name = tempfile.mkstemp(suffix='.pack', dir=objdir)
652 self.file = os.fdopen(fd, 'w+b')
657 self.parentfd = os.open(objdir, os.O_RDONLY)
663 assert(name.endswith('.pack'))
664 self.filename = name[:-5]
665 self.file.write('PACK\0\0\0\2\0\0\0\0')
666 self.idx = list(list() for i in xrange(256))
668 def _raw_write(self, datalist, sha):
671 # in case we get interrupted (eg. KeyboardInterrupt), it's best if
672 # the file never has a *partial* blob. So let's make sure it's
673 # all-or-nothing. (The blob shouldn't be very big anyway, thanks
674 # to our hashsplit algorithm.) f.write() does its own buffering,
675 # but that's okay because we'll flush it in _end().
676 oneblob = ''.join(datalist)
680 raise GitError, e, sys.exc_info()[2]
682 crc = zlib.crc32(oneblob) & 0xffffffff
683 self._update_idx(sha, crc, nw)
688 def _update_idx(self, sha, crc, size):
691 self.idx[ord(sha[0])].append((sha, crc, self.file.tell() - size))
693 def _write(self, sha, type, content):
697 sha = calc_hash(type, content)
698 size, crc = self._raw_write(_encode_packobj(type, content,
699 self.compression_level),
701 if self.outbytes >= self.max_pack_size \
702 or self.count >= self.max_pack_objects:
706 def breakpoint(self):
707 """Clear byte and object counts and return the last processed id."""
708 id = self._end(self.run_midx)
709 self.outbytes = self.count = 0
712 def _require_objcache(self):
713 if self.objcache is None and self.objcache_maker:
714 self.objcache = self.objcache_maker()
715 if self.objcache is None:
717 "PackWriter not opened or can't check exists w/o objcache")
719 def exists(self, id, want_source=False):
720 """Return non-empty if an object is found in the object cache."""
721 self._require_objcache()
722 return self.objcache.exists(id, want_source=want_source)
724 def just_write(self, sha, type, content):
725 """Write an object to the pack file, bypassing the objcache. Fails if
727 self._write(sha, type, content)
729 def maybe_write(self, type, content):
730 """Write an object to the pack file if not present and return its id."""
731 sha = calc_hash(type, content)
732 if not self.exists(sha):
733 self.just_write(sha, type, content)
734 self._require_objcache()
735 self.objcache.add(sha)
738 def new_blob(self, blob):
739 """Create a blob object in the pack with the supplied content."""
740 return self.maybe_write('blob', blob)
742 def new_tree(self, shalist):
743 """Create a tree object in the pack."""
744 content = tree_encode(shalist)
745 return self.maybe_write('tree', content)
747 def new_commit(self, tree, parent,
748 author, adate_sec, adate_tz,
749 committer, cdate_sec, cdate_tz,
751 """Create a commit object in the pack. The date_sec values must be
752 epoch-seconds, and if a tz is None, the local timezone is assumed."""
754 adate_str = _git_date_str(adate_sec, adate_tz)
756 adate_str = _local_git_date_str(adate_sec)
758 cdate_str = _git_date_str(cdate_sec, cdate_tz)
760 cdate_str = _local_git_date_str(cdate_sec)
762 if tree: l.append('tree %s' % tree.encode('hex'))
763 if parent: l.append('parent %s' % parent.encode('hex'))
764 if author: l.append('author %s %s' % (author, adate_str))
765 if committer: l.append('committer %s %s' % (committer, cdate_str))
768 return self.maybe_write('commit', '\n'.join(l))
771 """Remove the pack file from disk."""
780 os.unlink(self.filename + '.pack')
787 def _end(self, run_midx=True):
789 if not f: return None
796 # update object count
798 cp = struct.pack('!i', self.count)
802 # calculate the pack sha1sum
805 for b in chunkyreader(f):
807 packbin = sum.digest()
809 fdatasync(f.fileno())
813 obj_list_sha = self._write_pack_idx_v2(self.filename + '.idx', idx, packbin)
814 nameprefix = os.path.join(self.repo_dir,
815 'objects/pack/pack-' + obj_list_sha)
816 if os.path.exists(self.filename + '.map'):
817 os.unlink(self.filename + '.map')
818 os.rename(self.filename + '.pack', nameprefix + '.pack')
819 os.rename(self.filename + '.idx', nameprefix + '.idx')
821 os.fsync(self.parentfd)
823 os.close(self.parentfd)
826 auto_midx(os.path.join(self.repo_dir, 'objects/pack'))
828 if self.on_pack_finish:
829 self.on_pack_finish(nameprefix)
833 def close(self, run_midx=True):
834 """Close the pack file and move it to its definitive path."""
835 return self._end(run_midx=run_midx)
837 def _write_pack_idx_v2(self, filename, idx, packbin):
840 for entry in section:
841 if entry[2] >= 2**31:
844 # Length: header + fan-out + shas-and-crcs + overflow-offsets
845 index_len = 8 + (4 * 256) + (28 * self.count) + (8 * ofs64_count)
847 idx_f = open(filename, 'w+b')
849 idx_f.truncate(index_len)
850 fdatasync(idx_f.fileno())
851 idx_map = mmap_readwrite(idx_f, close=False)
853 count = _helpers.write_idx(filename, idx_map, idx, self.count)
854 assert(count == self.count)
861 idx_f = open(filename, 'a+b')
866 b = idx_f.read(8 + 4*256)
869 obj_list_sum = Sha1()
870 for b in chunkyreader(idx_f, 20*self.count):
872 obj_list_sum.update(b)
873 namebase = obj_list_sum.hexdigest()
875 for b in chunkyreader(idx_f):
877 idx_f.write(idx_sum.digest())
878 fdatasync(idx_f.fileno())
884 def _gitenv(repo_dir = None):
888 os.environ['GIT_DIR'] = os.path.abspath(repo_dir)
892 def list_refs(patterns=None, repo_dir=None,
893 limit_to_heads=False, limit_to_tags=False):
894 """Yield (refname, hash) tuples for all repository refs unless
895 patterns are specified. In that case, only include tuples for
896 refs matching those patterns (cf. git-show-ref(1)). The limits
897 restrict the result items to refs/heads or refs/tags. If both
898 limits are specified, items from both sources will be included.
901 argv = ['git', 'show-ref']
903 argv.append('--heads')
905 argv.append('--tags')
908 argv.extend(patterns)
909 p = subprocess.Popen(argv,
910 preexec_fn = _gitenv(repo_dir),
911 stdout = subprocess.PIPE)
912 out = p.stdout.read().strip()
913 rv = p.wait() # not fatal
917 for d in out.split('\n'):
918 (sha, name) = d.split(' ', 1)
919 yield (name, sha.decode('hex'))
922 def read_ref(refname, repo_dir = None):
923 """Get the commit id of the most recent commit made on a given ref."""
924 refs = list_refs(patterns=[refname], repo_dir=repo_dir, limit_to_heads=True)
925 l = tuple(islice(refs, 2))
933 def rev_list_invocation(ref_or_refs, count=None, format=None):
934 if isinstance(ref_or_refs, compat.str_type):
935 refs = (ref_or_refs,)
938 argv = ['git', 'rev-list']
939 if isinstance(count, Integral):
940 argv.extend(['-n', str(count)])
942 raise ValueError('unexpected count argument %r' % count)
945 argv.append('--pretty=format:' + format)
947 assert not ref.startswith('-')
953 def rev_list(ref_or_refs, count=None, parse=None, format=None, repo_dir=None):
954 """Yield information about commits as per "git rev-list". If a format
955 is not provided, yield one hex hash at a time. If a format is
956 provided, pass it to rev-list and call parse(git_stdout) for each
957 commit with the stream positioned just after the rev-list "commit
958 HASH" header line. When a format is provided yield (oidx,
959 parse(git_stdout)) for each commit.
962 assert bool(parse) == bool(format)
963 p = subprocess.Popen(rev_list_invocation(ref_or_refs, count=count,
965 preexec_fn = _gitenv(repo_dir),
966 stdout = subprocess.PIPE)
968 for line in p.stdout:
971 line = p.stdout.readline()
974 if not s.startswith('commit '):
975 raise Exception('unexpected line ' + s)
976 yield s[7:], parse(p.stdout)
977 line = p.stdout.readline()
979 rv = p.wait() # not fatal
981 raise GitError, 'git rev-list returned error %d' % rv
984 def get_commit_dates(refs, repo_dir=None):
985 """Get the dates for the specified commit refs. For now, every unique
986 string in refs must resolve to a different commit or this
987 function will fail."""
990 commit = get_commit_items(ref, cp(repo_dir))
991 result.append(commit.author_sec)
995 def rev_parse(committish, repo_dir=None):
996 """Resolve the full hash for 'committish', if it exists.
998 Should be roughly equivalent to 'git rev-parse'.
1000 Returns the hex value of the hash if it is found, None if 'committish' does
1001 not correspond to anything.
1003 head = read_ref(committish, repo_dir=repo_dir)
1005 debug2("resolved from ref: commit = %s\n" % head.encode('hex'))
1008 pL = PackIdxList(repo('objects/pack', repo_dir=repo_dir))
1010 if len(committish) == 40:
1012 hash = committish.decode('hex')
1022 def update_ref(refname, newval, oldval, repo_dir=None):
1023 """Update a repository reference."""
1026 assert(refname.startswith('refs/heads/') \
1027 or refname.startswith('refs/tags/'))
1028 p = subprocess.Popen(['git', 'update-ref', refname,
1029 newval.encode('hex'), oldval.encode('hex')],
1030 preexec_fn = _gitenv(repo_dir))
1031 _git_wait('git update-ref', p)
1034 def delete_ref(refname, oldvalue=None):
1035 """Delete a repository reference (see git update-ref(1))."""
1036 assert(refname.startswith('refs/'))
1037 oldvalue = [] if not oldvalue else [oldvalue]
1038 p = subprocess.Popen(['git', 'update-ref', '-d', refname] + oldvalue,
1039 preexec_fn = _gitenv())
1040 _git_wait('git update-ref', p)
1043 def guess_repo(path=None):
1044 """Set the path value in the global variable "repodir".
1045 This makes bup look for an existing bup repository, but not fail if a
1046 repository doesn't exist. Usually, if you are interacting with a bup
1047 repository, you would not be calling this function but using
1048 check_repo_or_die().
1054 repodir = os.environ.get('BUP_DIR')
1056 repodir = os.path.expanduser('~/.bup')
1059 def init_repo(path=None):
1060 """Create the Git bare repository for bup in a given path."""
1062 d = repo() # appends a / to the path
1063 parent = os.path.dirname(os.path.dirname(d))
1064 if parent and not os.path.exists(parent):
1065 raise GitError('parent directory "%s" does not exist\n' % parent)
1066 if os.path.exists(d) and not os.path.isdir(os.path.join(d, '.')):
1067 raise GitError('"%s" exists but is not a directory\n' % d)
1068 p = subprocess.Popen(['git', '--bare', 'init'], stdout=sys.stderr,
1069 preexec_fn = _gitenv())
1070 _git_wait('git init', p)
1071 # Force the index version configuration in order to ensure bup works
1072 # regardless of the version of the installed Git binary.
1073 p = subprocess.Popen(['git', 'config', 'pack.indexVersion', '2'],
1074 stdout=sys.stderr, preexec_fn = _gitenv())
1075 _git_wait('git config', p)
1077 p = subprocess.Popen(['git', 'config', 'core.logAllRefUpdates', 'true'],
1078 stdout=sys.stderr, preexec_fn = _gitenv())
1079 _git_wait('git config', p)
1082 def check_repo_or_die(path=None):
1083 """Check to see if a bup repository probably exists, and abort if not."""
1086 pst = stat_if_exists(top + '/objects/pack')
1087 if pst and stat.S_ISDIR(pst.st_mode):
1090 top_st = stat_if_exists(top)
1092 log('error: repository %r does not exist (see "bup help init")\n'
1095 log('error: %r is not a repository\n' % top)
1101 """Get Git's version and ensure a usable version is installed.
1103 The returned version is formatted as an ordered tuple with each position
1104 representing a digit in the version tag. For example, the following tuple
1105 would represent version 1.6.6.9:
1107 ('1', '6', '6', '9')
1111 p = subprocess.Popen(['git', '--version'],
1112 stdout=subprocess.PIPE)
1113 gvs = p.stdout.read()
1114 _git_wait('git --version', p)
1115 m = re.match(r'git version (\S+.\S+)', gvs)
1117 raise GitError('git --version weird output: %r' % gvs)
1118 _ver = tuple(m.group(1).split('.'))
1119 needed = ('1','5', '3', '1')
1121 raise GitError('git version %s or higher is required; you have %s'
1122 % ('.'.join(needed), '.'.join(_ver)))
1126 class _AbortableIter:
1127 def __init__(self, it, onabort = None):
1129 self.onabort = onabort
1137 return next(self.it)
1138 except StopIteration as e:
1146 """Abort iteration and call the abortion callback, if needed."""
1158 """Link to 'git cat-file' that is used to retrieve blob data."""
1159 def __init__(self, repo_dir = None):
1161 self.repo_dir = repo_dir
1162 wanted = ('1','5','6')
1164 log('error: git version must be at least 1.5.6\n')
1166 self.p = self.inprogress = None
1170 self.p.stdout.close()
1171 self.p.stdin.close()
1173 self.inprogress = None
1177 self.p = subprocess.Popen(['git', 'cat-file', '--batch'],
1178 stdin=subprocess.PIPE,
1179 stdout=subprocess.PIPE,
1182 preexec_fn = _gitenv(self.repo_dir))
1185 """Yield (oidx, type, size), followed by the data referred to by ref.
1186 If ref does not exist, only yield (None, None, None).
1189 if not self.p or self.p.poll() != None:
1192 poll_result = self.p.poll()
1193 assert(poll_result == None)
1195 log('get: opening %r while %r is open\n' % (ref, self.inprogress))
1196 assert(not self.inprogress)
1197 assert(ref.find('\n') < 0)
1198 assert(ref.find('\r') < 0)
1199 assert(not ref.startswith('-'))
1200 self.inprogress = ref
1201 self.p.stdin.write('%s\n' % ref)
1202 self.p.stdin.flush()
1203 hdr = self.p.stdout.readline()
1204 if hdr.endswith(' missing\n'):
1205 self.inprogress = None
1206 yield None, None, None
1208 info = hdr.split(' ')
1209 if len(info) != 3 or len(info[0]) != 40:
1210 raise GitError('expected object (id, type, size), got %r' % info)
1211 oidx, typ, size = info
1213 it = _AbortableIter(chunkyreader(self.p.stdout, size),
1214 onabort=self._abort)
1216 yield oidx, typ, size
1219 readline_result = self.p.stdout.readline()
1220 assert(readline_result == '\n')
1221 self.inprogress = None
1222 except Exception as e:
1226 def _join(self, it):
1227 _, typ, _ = next(it)
1232 treefile = ''.join(it)
1233 for (mode, name, sha) in tree_decode(treefile):
1234 for blob in self.join(sha.encode('hex')):
1236 elif typ == 'commit':
1237 treeline = ''.join(it).split('\n')[0]
1238 assert(treeline.startswith('tree '))
1239 for blob in self.join(treeline[5:]):
1242 raise GitError('invalid object type %r: expected blob/tree/commit'
1246 """Generate a list of the content of all blobs that can be reached
1247 from an object. The hash given in 'id' must point to a blob, a tree
1248 or a commit. The content of all blobs that can be seen from trees or
1249 commits will be added to the list.
1252 for d in self._join(self.get(id)):
1254 except StopIteration:
1260 def cp(repo_dir=None):
1261 """Create a CatPipe object or reuse the already existing one."""
1264 repo_dir = repodir or repo()
1265 repo_dir = os.path.abspath(repo_dir)
1266 cp = _cp.get(repo_dir)
1268 cp = CatPipe(repo_dir)
1273 def tags(repo_dir = None):
1274 """Return a dictionary of all tags in the form {hash: [tag_names, ...]}."""
1276 for n, c in list_refs(repo_dir = repo_dir, limit_to_tags=True):
1277 assert(n.startswith('refs/tags/'))
1281 tags[c].append(name) # more than one tag can point at 'c'
1285 class MissingObject(KeyError):
1286 def __init__(self, oid):
1288 KeyError.__init__(self, 'object %r is missing' % oid.encode('hex'))
1291 WalkItem = namedtuple('WalkItem', ['oid', 'type', 'mode',
1292 'path', 'chunk_path', 'data'])
1293 # The path is the mangled path, and if an item represents a fragment
1294 # of a chunked file, the chunk_path will be the chunked subtree path
1295 # for the chunk, i.e. ['', '2d3115e', ...]. The top-level path for a
1296 # chunked file will have a chunk_path of ['']. So some chunk subtree
1297 # of the file '/foo/bar/baz' might look like this:
1299 # item.path = ['foo', 'bar', 'baz.bup']
1300 # item.chunk_path = ['', '2d3115e', '016b097']
1301 # item.type = 'tree'
1305 def walk_object(cat_pipe, oidx,
1308 """Yield everything reachable from oidx via cat_pipe as a WalkItem,
1309 stopping whenever stop_at(oidx) returns true. Throw MissingObject
1310 if a hash encountered is missing from the repository, and don't
1311 read or return blob content in the data field unless include_data
1314 # Maintain the pending stack on the heap to avoid stack overflow
1315 pending = [(oidx, [], [], None)]
1317 oidx, parent_path, chunk_path, mode = pending.pop()
1318 oid = oidx.decode('hex')
1319 if stop_at and stop_at(oidx):
1322 if (not include_data) and mode and stat.S_ISREG(mode):
1323 # If the object is a "regular file", then it's a leaf in
1324 # the graph, so we can skip reading the data if the caller
1325 # hasn't requested it.
1326 yield WalkItem(oid=oid, type='blob',
1327 chunk_path=chunk_path, path=parent_path,
1332 item_it = cat_pipe.get(oidx)
1333 get_oidx, typ, _ = next(item_it)
1335 raise MissingObject(oidx.decode('hex'))
1336 if typ not in ('blob', 'commit', 'tree'):
1337 raise Exception('unexpected repository object type %r' % typ)
1339 # FIXME: set the mode based on the type when the mode is None
1340 if typ == 'blob' and not include_data:
1341 # Dump data until we can ask cat_pipe not to fetch it
1342 for ignored in item_it:
1346 data = ''.join(item_it)
1348 yield WalkItem(oid=oid, type=typ,
1349 chunk_path=chunk_path, path=parent_path,
1351 data=(data if include_data else None))
1354 commit_items = parse_commit(data)
1355 for pid in commit_items.parents:
1356 pending.append((pid, parent_path, chunk_path, mode))
1357 pending.append((commit_items.tree, parent_path, chunk_path,
1358 hashsplit.GIT_MODE_TREE))
1360 for mode, name, ent_id in tree_decode(data):
1361 demangled, bup_type = demangle_name(name, mode)
1363 sub_path = parent_path
1364 sub_chunk_path = chunk_path + [name]
1366 sub_path = parent_path + [name]
1367 if bup_type == BUP_CHUNKED:
1368 sub_chunk_path = ['']
1370 sub_chunk_path = chunk_path
1371 pending.append((ent_id.encode('hex'), sub_path, sub_chunk_path,