1 """Git interaction library.
2 bup repositories are in Git format. This library allows us to
3 interact with the Git data structures.
6 from __future__ import absolute_import
7 import errno, os, sys, zlib, time, subprocess, struct, stat, re, tempfile, glob
8 from collections import namedtuple
9 from itertools import islice
10 from numbers import Integral
12 from bup import _helpers, compat, hashsplit, path, midx, bloom, xstat
13 from bup.compat import range
14 from bup.helpers import (Sha1, add_error, chunkyreader, debug1, debug2,
16 hostname, localtime, log,
19 mmap_read, mmap_readwrite,
21 progress, qprogress, shstr, stat_if_exists,
22 unlink, username, userfullname,
27 repodir = None # The default repository, once initialized
29 _typemap = { 'blob':3, 'tree':2, 'commit':1, 'tag':4 }
30 _typermap = { 3:'blob', 2:'tree', 1:'commit', 4:'tag' }
36 class GitError(Exception):
40 def _gitenv(repo_dir=None):
43 return merge_dict(os.environ, {'GIT_DIR': os.path.abspath(repo_dir)})
45 def _git_wait(cmd, p):
48 raise GitError('%s returned %d' % (shstr(cmd), rv))
50 def _git_capture(argv):
51 p = subprocess.Popen(argv, stdout=subprocess.PIPE, env=_gitenv())
53 _git_wait(repr(argv), p)
56 def git_config_get(option, repo_dir=None):
57 cmd = ('git', 'config', '--get', option)
58 p = subprocess.Popen(cmd, stdout=subprocess.PIPE,
59 env=_gitenv(repo_dir=repo_dir))
65 raise GitError('%s returned %d' % (cmd, rc))
69 def parse_tz_offset(s):
70 """UTC offset in seconds."""
71 tz_off = (int(s[1:3]) * 60 * 60) + (int(s[3:5]) * 60)
77 # FIXME: derived from http://git.rsbx.net/Documents/Git_Data_Formats.txt
78 # Make sure that's authoritative.
79 _start_end_char = r'[^ .,:;<>"\'\0\n]'
80 _content_char = r'[^\0\n<>]'
81 _safe_str_rx = '(?:%s{1,2}|(?:%s%s*%s))' \
83 _start_end_char, _content_char, _start_end_char)
84 _tz_rx = r'[-+]\d\d[0-5]\d'
85 _parent_rx = r'(?:parent [abcdefABCDEF0123456789]{40}\n)'
86 # Assumes every following line starting with a space is part of the
87 # mergetag. Is there a formal commit blob spec?
88 _mergetag_rx = r'(?:\nmergetag object [abcdefABCDEF0123456789]{40}(?:\n [^\0\n]*)*)'
89 _commit_rx = re.compile(r'''tree (?P<tree>[abcdefABCDEF0123456789]{40})
90 (?P<parents>%s*)author (?P<author_name>%s) <(?P<author_mail>%s)> (?P<asec>\d+) (?P<atz>%s)
91 committer (?P<committer_name>%s) <(?P<committer_mail>%s)> (?P<csec>\d+) (?P<ctz>%s)(?P<mergetag>%s?)
93 (?P<message>(?:.|\n)*)''' % (_parent_rx,
94 _safe_str_rx, _safe_str_rx, _tz_rx,
95 _safe_str_rx, _safe_str_rx, _tz_rx,
97 _parent_hash_rx = re.compile(r'\s*parent ([abcdefABCDEF0123456789]{40})\s*')
99 # Note that the author_sec and committer_sec values are (UTC) epoch
100 # seconds, and for now the mergetag is not included.
101 CommitInfo = namedtuple('CommitInfo', ['tree', 'parents',
102 'author_name', 'author_mail',
103 'author_sec', 'author_offset',
104 'committer_name', 'committer_mail',
105 'committer_sec', 'committer_offset',
108 def parse_commit(content):
109 commit_match = re.match(_commit_rx, content)
111 raise Exception('cannot parse commit %r' % content)
112 matches = commit_match.groupdict()
113 return CommitInfo(tree=matches['tree'],
114 parents=re.findall(_parent_hash_rx, matches['parents']),
115 author_name=matches['author_name'],
116 author_mail=matches['author_mail'],
117 author_sec=int(matches['asec']),
118 author_offset=parse_tz_offset(matches['atz']),
119 committer_name=matches['committer_name'],
120 committer_mail=matches['committer_mail'],
121 committer_sec=int(matches['csec']),
122 committer_offset=parse_tz_offset(matches['ctz']),
123 message=matches['message'])
126 def get_cat_data(cat_iterator, expected_type):
127 _, kind, _ = next(cat_iterator)
128 if kind != expected_type:
129 raise Exception('expected %r, saw %r' % (expected_type, kind))
130 return ''.join(cat_iterator)
132 def get_commit_items(id, cp):
133 return parse_commit(get_cat_data(cp.get(id), 'commit'))
135 def _local_git_date_str(epoch_sec):
136 return '%d %s' % (epoch_sec, utc_offset_str(epoch_sec))
139 def _git_date_str(epoch_sec, tz_offset_sec):
140 offs = tz_offset_sec // 60
141 return '%d %s%02d%02d' \
143 '+' if offs >= 0 else '-',
148 def repo(sub = '', repo_dir=None):
149 """Get the path to the git repository or one of its subdirectories."""
150 repo_dir = repo_dir or repodir
152 raise GitError('You should call check_repo_or_die()')
154 # If there's a .git subdirectory, then the actual repo is in there.
155 gd = os.path.join(repo_dir, '.git')
156 if os.path.exists(gd):
159 return os.path.join(repo_dir, sub)
163 return re.sub(r'([^0-9a-z]|\b)([0-9a-z]{7})[0-9a-z]{33}([^0-9a-z]|\b)',
168 full = os.path.abspath(path)
169 fullrepo = os.path.abspath(repo(''))
170 if not fullrepo.endswith('/'):
172 if full.startswith(fullrepo):
173 path = full[len(fullrepo):]
174 if path.startswith('index-cache/'):
175 path = path[len('index-cache/'):]
176 return shorten_hash(path)
180 paths = [repo('objects/pack')]
181 paths += glob.glob(repo('index-cache/*/.'))
185 def auto_midx(objdir):
186 args = [path.exe(), 'midx', '--auto', '--dir', objdir]
188 rv = subprocess.call(args, stdout=open('/dev/null', 'w'))
190 # make sure 'args' gets printed to help with debugging
191 add_error('%r: exception: %s' % (args, e))
194 add_error('%r: returned %d' % (args, rv))
196 args = [path.exe(), 'bloom', '--dir', objdir]
198 rv = subprocess.call(args, stdout=open('/dev/null', 'w'))
200 # make sure 'args' gets printed to help with debugging
201 add_error('%r: exception: %s' % (args, e))
204 add_error('%r: returned %d' % (args, rv))
207 def mangle_name(name, mode, gitmode):
208 """Mangle a file name to present an abstract name for segmented files.
209 Mangled file names will have the ".bup" extension added to them. If a
210 file's name already ends with ".bup", a ".bupl" extension is added to
211 disambiguate normal files from segmented ones.
213 if stat.S_ISREG(mode) and not stat.S_ISREG(gitmode):
214 assert(stat.S_ISDIR(gitmode))
216 elif name.endswith('.bup') or name[:-1].endswith('.bup'):
217 return name + '.bupl'
222 (BUP_NORMAL, BUP_CHUNKED) = (0,1)
223 def demangle_name(name, mode):
224 """Remove name mangling from a file name, if necessary.
226 The return value is a tuple (demangled_filename,mode), where mode is one of
229 * BUP_NORMAL : files that should be read as-is from the repository
230 * BUP_CHUNKED : files that were chunked and need to be reassembled
232 For more information on the name mangling algorithm, see mangle_name()
234 if name.endswith('.bupl'):
235 return (name[:-5], BUP_NORMAL)
236 elif name.endswith('.bup'):
237 return (name[:-4], BUP_CHUNKED)
238 elif name.endswith('.bupm'):
240 BUP_CHUNKED if stat.S_ISDIR(mode) else BUP_NORMAL)
242 return (name, BUP_NORMAL)
245 def calc_hash(type, content):
246 """Calculate some content's hash in the Git fashion."""
247 header = '%s %d\0' % (type, len(content))
253 def shalist_item_sort_key(ent):
254 (mode, name, id) = ent
255 assert(mode+0 == mode)
256 if stat.S_ISDIR(mode):
262 def tree_encode(shalist):
263 """Generate a git tree object from (mode,name,hash) tuples."""
264 shalist = sorted(shalist, key = shalist_item_sort_key)
266 for (mode,name,bin) in shalist:
268 assert(mode+0 == mode)
270 assert(len(bin) == 20)
271 s = '%o %s\0%s' % (mode,name,bin)
272 assert(s[0] != '0') # 0-padded octal is not acceptable in a git tree
277 def tree_decode(buf):
278 """Generate a list of (mode,name,hash) from the git tree object in buf."""
280 while ofs < len(buf):
281 z = buf.find('\0', ofs)
283 spl = buf[ofs:z].split(' ', 1)
284 assert(len(spl) == 2)
286 sha = buf[z+1:z+1+20]
288 yield (int(mode, 8), name, sha)
291 def _encode_packobj(type, content, compression_level=1):
292 if compression_level not in (0, 1, 2, 3, 4, 5, 6, 7, 8, 9):
293 raise ValueError('invalid compression level %s' % compression_level)
296 szbits = (sz & 0x0f) | (_typemap[type]<<4)
299 if sz: szbits |= 0x80
305 z = zlib.compressobj(compression_level)
307 yield z.compress(content)
311 def _encode_looseobj(type, content, compression_level=1):
312 z = zlib.compressobj(compression_level)
313 yield z.compress('%s %d\0' % (type, len(content)))
314 yield z.compress(content)
318 def _decode_looseobj(buf):
320 s = zlib.decompress(buf)
327 assert(type in _typemap)
328 assert(sz == len(content))
329 return (type, content)
332 def _decode_packobj(buf):
335 type = _typermap[(c & 0x70) >> 4]
342 sz |= (c & 0x7f) << shift
346 return (type, zlib.decompress(buf[i+1:]))
353 def find_offset(self, hash):
354 """Get the offset of an object inside the index file."""
355 idx = self._idx_from_hash(hash)
357 return self._ofs_from_idx(idx)
360 def exists(self, hash, want_source=False):
361 """Return nonempty if the object exists in this index."""
362 if hash and (self._idx_from_hash(hash) != None):
363 return want_source and os.path.basename(self.name) or True
367 return int(self.fanout[255])
369 def _idx_from_hash(self, hash):
370 global _total_searches, _total_steps
372 assert(len(hash) == 20)
374 start = self.fanout[b1-1] # range -1..254
375 end = self.fanout[b1] # range 0..255
377 _total_steps += 1 # lookup table is a step
380 mid = start + (end-start)/2
381 v = self._idx_to_hash(mid)
391 class PackIdxV1(PackIdx):
392 """Object representation of a Git pack index (version 1) file."""
393 def __init__(self, filename, f):
395 self.idxnames = [self.name]
396 self.map = mmap_read(f)
397 self.fanout = list(struct.unpack('!256I',
398 str(buffer(self.map, 0, 256*4))))
399 self.fanout.append(0) # entry "-1"
400 nsha = self.fanout[255]
402 self.shatable = buffer(self.map, self.sha_ofs, nsha*24)
404 def _ofs_from_idx(self, idx):
405 return struct.unpack('!I', str(self.shatable[idx*24 : idx*24+4]))[0]
407 def _idx_to_hash(self, idx):
408 return str(self.shatable[idx*24+4 : idx*24+24])
411 for i in range(self.fanout[255]):
412 yield buffer(self.map, 256*4 + 24*i + 4, 20)
415 class PackIdxV2(PackIdx):
416 """Object representation of a Git pack index (version 2) file."""
417 def __init__(self, filename, f):
419 self.idxnames = [self.name]
420 self.map = mmap_read(f)
421 assert(str(self.map[0:8]) == '\377tOc\0\0\0\2')
422 self.fanout = list(struct.unpack('!256I',
423 str(buffer(self.map, 8, 256*4))))
424 self.fanout.append(0) # entry "-1"
425 nsha = self.fanout[255]
426 self.sha_ofs = 8 + 256*4
427 self.shatable = buffer(self.map, self.sha_ofs, nsha*20)
428 self.ofstable = buffer(self.map,
429 self.sha_ofs + nsha*20 + nsha*4,
431 self.ofs64table = buffer(self.map,
432 8 + 256*4 + nsha*20 + nsha*4 + nsha*4)
434 def _ofs_from_idx(self, idx):
435 ofs = struct.unpack('!I', str(buffer(self.ofstable, idx*4, 4)))[0]
437 idx64 = ofs & 0x7fffffff
438 ofs = struct.unpack('!Q',
439 str(buffer(self.ofs64table, idx64*8, 8)))[0]
442 def _idx_to_hash(self, idx):
443 return str(self.shatable[idx*20:(idx+1)*20])
446 for i in range(self.fanout[255]):
447 yield buffer(self.map, 8 + 256*4 + 20*i, 20)
452 def __init__(self, dir):
454 assert(_mpi_count == 0) # these things suck tons of VM; don't waste it
459 self.do_bloom = False
466 assert(_mpi_count == 0)
469 return iter(idxmerge(self.packs))
472 return sum(len(pack) for pack in self.packs)
474 def exists(self, hash, want_source=False):
475 """Return nonempty if the object exists in the index files."""
476 global _total_searches
478 if hash in self.also:
480 if self.do_bloom and self.bloom:
481 if self.bloom.exists(hash):
482 self.do_bloom = False
484 _total_searches -= 1 # was counted by bloom
486 for i in xrange(len(self.packs)):
488 _total_searches -= 1 # will be incremented by sub-pack
489 ix = p.exists(hash, want_source=want_source)
491 # reorder so most recently used packs are searched first
492 self.packs = [p] + self.packs[:i] + self.packs[i+1:]
497 def refresh(self, skip_midx = False):
498 """Refresh the index list.
499 This method verifies if .midx files were superseded (e.g. all of its
500 contents are in another, bigger .midx file) and removes the superseded
503 If skip_midx is True, all work on .midx files will be skipped and .midx
504 files will be removed from the list.
506 The module-global variable 'ignore_midx' can force this function to
507 always act as if skip_midx was True.
509 self.bloom = None # Always reopen the bloom as it may have been relaced
510 self.do_bloom = False
511 skip_midx = skip_midx or ignore_midx
512 d = dict((p.name, p) for p in self.packs
513 if not skip_midx or not isinstance(p, midx.PackMidx))
514 if os.path.exists(self.dir):
517 for ix in self.packs:
518 if isinstance(ix, midx.PackMidx):
519 for name in ix.idxnames:
520 d[os.path.join(self.dir, name)] = ix
521 for full in glob.glob(os.path.join(self.dir,'*.midx')):
523 mx = midx.PackMidx(full)
524 (mxd, mxf) = os.path.split(mx.name)
526 for n in mx.idxnames:
527 if not os.path.exists(os.path.join(mxd, n)):
528 log(('warning: index %s missing\n' +
529 ' used by %s\n') % (n, mxf))
537 midxl.sort(key=lambda ix:
538 (-len(ix), -xstat.stat(ix.name).st_mtime))
541 for sub in ix.idxnames:
542 found = d.get(os.path.join(self.dir, sub))
543 if not found or isinstance(found, PackIdx):
544 # doesn't exist, or exists but not in a midx
549 for name in ix.idxnames:
550 d[os.path.join(self.dir, name)] = ix
551 elif not ix.force_keep:
552 debug1('midx: removing redundant: %s\n'
553 % os.path.basename(ix.name))
556 for full in glob.glob(os.path.join(self.dir,'*.idx')):
560 except GitError as e:
564 bfull = os.path.join(self.dir, 'bup.bloom')
565 if self.bloom is None and os.path.exists(bfull):
566 self.bloom = bloom.ShaBloom(bfull)
567 self.packs = list(set(d.values()))
568 self.packs.sort(reverse=True, key=lambda x: len(x))
569 if self.bloom and self.bloom.valid() and len(self.bloom) >= len(self):
573 debug1('PackIdxList: using %d index%s.\n'
574 % (len(self.packs), len(self.packs)!=1 and 'es' or ''))
577 """Insert an additional object in the list."""
581 def open_idx(filename):
582 if filename.endswith('.idx'):
583 f = open(filename, 'rb')
585 if header[0:4] == '\377tOc':
586 version = struct.unpack('!I', header[4:8])[0]
588 return PackIdxV2(filename, f)
590 raise GitError('%s: expected idx file version 2, got %d'
591 % (filename, version))
592 elif len(header) == 8 and header[0:4] < '\377tOc':
593 return PackIdxV1(filename, f)
595 raise GitError('%s: unrecognized idx file header' % filename)
596 elif filename.endswith('.midx'):
597 return midx.PackMidx(filename)
599 raise GitError('idx filenames must end with .idx or .midx')
602 def idxmerge(idxlist, final_progress=True):
603 """Generate a list of all the objects reachable in a PackIdxList."""
604 def pfunc(count, total):
605 qprogress('Reading indexes: %.2f%% (%d/%d)\r'
606 % (count*100.0/total, count, total))
607 def pfinal(count, total):
609 progress('Reading indexes: %.2f%% (%d/%d), done.\n'
610 % (100, total, total))
611 return merge_iter(idxlist, 10024, pfunc, pfinal)
614 def _make_objcache():
615 return PackIdxList(repo('objects/pack'))
617 # bup-gc assumes that it can disable all PackWriter activities
618 # (bloom/midx/cache) via the constructor and close() arguments.
621 """Writes Git objects inside a pack file."""
622 def __init__(self, objcache_maker=_make_objcache, compression_level=1,
623 run_midx=True, on_pack_finish=None,
624 max_pack_size=None, max_pack_objects=None, repo_dir=None):
625 self.repo_dir = repo_dir or repo()
632 self.objcache_maker = objcache_maker
634 self.compression_level = compression_level
635 self.run_midx=run_midx
636 self.on_pack_finish = on_pack_finish
637 if not max_pack_size:
638 max_pack_size = git_config_get('pack.packSizeLimit',
639 repo_dir=self.repo_dir)
640 if max_pack_size is not None:
641 max_pack_size = parse_num(max_pack_size)
642 if not max_pack_size:
643 # larger packs slow down pruning
644 max_pack_size = 1000 * 1000 * 1000
645 self.max_pack_size = max_pack_size
646 # cache memory usage is about 83 bytes per object
647 self.max_pack_objects = max_pack_objects if max_pack_objects \
648 else max(1, self.max_pack_size // 5000)
656 def __exit__(self, type, value, traceback):
661 objdir = dir = os.path.join(self.repo_dir, 'objects')
662 fd, name = tempfile.mkstemp(suffix='.pack', dir=objdir)
664 self.file = os.fdopen(fd, 'w+b')
669 self.parentfd = os.open(objdir, os.O_RDONLY)
675 assert(name.endswith('.pack'))
676 self.filename = name[:-5]
677 self.file.write('PACK\0\0\0\2\0\0\0\0')
678 self.idx = list(list() for i in xrange(256))
680 def _raw_write(self, datalist, sha):
683 # in case we get interrupted (eg. KeyboardInterrupt), it's best if
684 # the file never has a *partial* blob. So let's make sure it's
685 # all-or-nothing. (The blob shouldn't be very big anyway, thanks
686 # to our hashsplit algorithm.) f.write() does its own buffering,
687 # but that's okay because we'll flush it in _end().
688 oneblob = ''.join(datalist)
692 raise GitError, e, sys.exc_info()[2]
694 crc = zlib.crc32(oneblob) & 0xffffffff
695 self._update_idx(sha, crc, nw)
700 def _update_idx(self, sha, crc, size):
703 self.idx[ord(sha[0])].append((sha, crc, self.file.tell() - size))
705 def _write(self, sha, type, content):
709 sha = calc_hash(type, content)
710 size, crc = self._raw_write(_encode_packobj(type, content,
711 self.compression_level),
713 if self.outbytes >= self.max_pack_size \
714 or self.count >= self.max_pack_objects:
718 def breakpoint(self):
719 """Clear byte and object counts and return the last processed id."""
720 id = self._end(self.run_midx)
721 self.outbytes = self.count = 0
724 def _require_objcache(self):
725 if self.objcache is None and self.objcache_maker:
726 self.objcache = self.objcache_maker()
727 if self.objcache is None:
729 "PackWriter not opened or can't check exists w/o objcache")
731 def exists(self, id, want_source=False):
732 """Return non-empty if an object is found in the object cache."""
733 self._require_objcache()
734 return self.objcache.exists(id, want_source=want_source)
736 def just_write(self, sha, type, content):
737 """Write an object to the pack file without checking for duplication."""
738 self._write(sha, type, content)
739 # If nothing else, gc doesn't have/want an objcache
740 if self.objcache is not None:
741 self.objcache.add(sha)
743 def maybe_write(self, type, content):
744 """Write an object to the pack file if not present and return its id."""
745 sha = calc_hash(type, content)
746 if not self.exists(sha):
747 self._require_objcache()
748 self.just_write(sha, type, content)
751 def new_blob(self, blob):
752 """Create a blob object in the pack with the supplied content."""
753 return self.maybe_write('blob', blob)
755 def new_tree(self, shalist):
756 """Create a tree object in the pack."""
757 content = tree_encode(shalist)
758 return self.maybe_write('tree', content)
760 def new_commit(self, tree, parent,
761 author, adate_sec, adate_tz,
762 committer, cdate_sec, cdate_tz,
764 """Create a commit object in the pack. The date_sec values must be
765 epoch-seconds, and if a tz is None, the local timezone is assumed."""
767 adate_str = _git_date_str(adate_sec, adate_tz)
769 adate_str = _local_git_date_str(adate_sec)
771 cdate_str = _git_date_str(cdate_sec, cdate_tz)
773 cdate_str = _local_git_date_str(cdate_sec)
775 if tree: l.append('tree %s' % tree.encode('hex'))
776 if parent: l.append('parent %s' % parent.encode('hex'))
777 if author: l.append('author %s %s' % (author, adate_str))
778 if committer: l.append('committer %s %s' % (committer, cdate_str))
781 return self.maybe_write('commit', '\n'.join(l))
784 """Remove the pack file from disk."""
793 os.unlink(self.filename + '.pack')
800 def _end(self, run_midx=True):
802 if not f: return None
809 # update object count
811 cp = struct.pack('!i', self.count)
815 # calculate the pack sha1sum
818 for b in chunkyreader(f):
820 packbin = sum.digest()
822 fdatasync(f.fileno())
826 obj_list_sha = self._write_pack_idx_v2(self.filename + '.idx', idx, packbin)
827 nameprefix = os.path.join(self.repo_dir,
828 'objects/pack/pack-' + obj_list_sha)
829 if os.path.exists(self.filename + '.map'):
830 os.unlink(self.filename + '.map')
831 os.rename(self.filename + '.pack', nameprefix + '.pack')
832 os.rename(self.filename + '.idx', nameprefix + '.idx')
834 os.fsync(self.parentfd)
836 os.close(self.parentfd)
839 auto_midx(os.path.join(self.repo_dir, 'objects/pack'))
841 if self.on_pack_finish:
842 self.on_pack_finish(nameprefix)
846 def close(self, run_midx=True):
847 """Close the pack file and move it to its definitive path."""
848 return self._end(run_midx=run_midx)
850 def _write_pack_idx_v2(self, filename, idx, packbin):
853 for entry in section:
854 if entry[2] >= 2**31:
857 # Length: header + fan-out + shas-and-crcs + overflow-offsets
858 index_len = 8 + (4 * 256) + (28 * self.count) + (8 * ofs64_count)
860 idx_f = open(filename, 'w+b')
862 idx_f.truncate(index_len)
863 fdatasync(idx_f.fileno())
864 idx_map = mmap_readwrite(idx_f, close=False)
866 count = _helpers.write_idx(filename, idx_map, idx, self.count)
867 assert(count == self.count)
874 idx_f = open(filename, 'a+b')
879 b = idx_f.read(8 + 4*256)
882 obj_list_sum = Sha1()
883 for b in chunkyreader(idx_f, 20*self.count):
885 obj_list_sum.update(b)
886 namebase = obj_list_sum.hexdigest()
888 for b in chunkyreader(idx_f):
890 idx_f.write(idx_sum.digest())
891 fdatasync(idx_f.fileno())
897 def list_refs(patterns=None, repo_dir=None,
898 limit_to_heads=False, limit_to_tags=False):
899 """Yield (refname, hash) tuples for all repository refs unless
900 patterns are specified. In that case, only include tuples for
901 refs matching those patterns (cf. git-show-ref(1)). The limits
902 restrict the result items to refs/heads or refs/tags. If both
903 limits are specified, items from both sources will be included.
906 argv = ['git', 'show-ref']
908 argv.append('--heads')
910 argv.append('--tags')
913 argv.extend(patterns)
914 p = subprocess.Popen(argv, env=_gitenv(repo_dir), stdout=subprocess.PIPE)
915 out = p.stdout.read().strip()
916 rv = p.wait() # not fatal
920 for d in out.split('\n'):
921 (sha, name) = d.split(' ', 1)
922 yield (name, sha.decode('hex'))
925 def read_ref(refname, repo_dir = None):
926 """Get the commit id of the most recent commit made on a given ref."""
927 refs = list_refs(patterns=[refname], repo_dir=repo_dir, limit_to_heads=True)
928 l = tuple(islice(refs, 2))
936 def rev_list_invocation(ref_or_refs, count=None, format=None):
937 if isinstance(ref_or_refs, compat.str_type):
938 refs = (ref_or_refs,)
941 argv = ['git', 'rev-list']
942 if isinstance(count, Integral):
943 argv.extend(['-n', str(count)])
945 raise ValueError('unexpected count argument %r' % count)
948 argv.append('--pretty=format:' + format)
950 assert not ref.startswith('-')
956 def rev_list(ref_or_refs, count=None, parse=None, format=None, repo_dir=None):
957 """Yield information about commits as per "git rev-list". If a format
958 is not provided, yield one hex hash at a time. If a format is
959 provided, pass it to rev-list and call parse(git_stdout) for each
960 commit with the stream positioned just after the rev-list "commit
961 HASH" header line. When a format is provided yield (oidx,
962 parse(git_stdout)) for each commit.
965 assert bool(parse) == bool(format)
966 p = subprocess.Popen(rev_list_invocation(ref_or_refs, count=count,
968 env=_gitenv(repo_dir),
969 stdout = subprocess.PIPE)
971 for line in p.stdout:
974 line = p.stdout.readline()
977 if not s.startswith('commit '):
978 raise Exception('unexpected line ' + s)
981 yield s, parse(p.stdout)
982 line = p.stdout.readline()
984 rv = p.wait() # not fatal
986 raise GitError, 'git rev-list returned error %d' % rv
989 def get_commit_dates(refs, repo_dir=None):
990 """Get the dates for the specified commit refs. For now, every unique
991 string in refs must resolve to a different commit or this
992 function will fail."""
995 commit = get_commit_items(ref, cp(repo_dir))
996 result.append(commit.author_sec)
1000 def rev_parse(committish, repo_dir=None):
1001 """Resolve the full hash for 'committish', if it exists.
1003 Should be roughly equivalent to 'git rev-parse'.
1005 Returns the hex value of the hash if it is found, None if 'committish' does
1006 not correspond to anything.
1008 head = read_ref(committish, repo_dir=repo_dir)
1010 debug2("resolved from ref: commit = %s\n" % head.encode('hex'))
1013 pL = PackIdxList(repo('objects/pack', repo_dir=repo_dir))
1015 if len(committish) == 40:
1017 hash = committish.decode('hex')
1027 def update_ref(refname, newval, oldval, repo_dir=None):
1028 """Update a repository reference."""
1031 assert(refname.startswith('refs/heads/') \
1032 or refname.startswith('refs/tags/'))
1033 p = subprocess.Popen(['git', 'update-ref', refname,
1034 newval.encode('hex'), oldval.encode('hex')],
1035 env=_gitenv(repo_dir))
1036 _git_wait('git update-ref', p)
1039 def delete_ref(refname, oldvalue=None):
1040 """Delete a repository reference (see git update-ref(1))."""
1041 assert(refname.startswith('refs/'))
1042 oldvalue = [] if not oldvalue else [oldvalue]
1043 p = subprocess.Popen(['git', 'update-ref', '-d', refname] + oldvalue,
1045 _git_wait('git update-ref', p)
1048 def guess_repo(path=None):
1049 """Set the path value in the global variable "repodir".
1050 This makes bup look for an existing bup repository, but not fail if a
1051 repository doesn't exist. Usually, if you are interacting with a bup
1052 repository, you would not be calling this function but using
1053 check_repo_or_die().
1059 repodir = os.environ.get('BUP_DIR')
1061 repodir = os.path.expanduser('~/.bup')
1064 def init_repo(path=None):
1065 """Create the Git bare repository for bup in a given path."""
1067 d = repo() # appends a / to the path
1068 parent = os.path.dirname(os.path.dirname(d))
1069 if parent and not os.path.exists(parent):
1070 raise GitError('parent directory "%s" does not exist\n' % parent)
1071 if os.path.exists(d) and not os.path.isdir(os.path.join(d, '.')):
1072 raise GitError('"%s" exists but is not a directory\n' % d)
1073 p = subprocess.Popen(['git', '--bare', 'init'], stdout=sys.stderr,
1075 _git_wait('git init', p)
1076 # Force the index version configuration in order to ensure bup works
1077 # regardless of the version of the installed Git binary.
1078 p = subprocess.Popen(['git', 'config', 'pack.indexVersion', '2'],
1079 stdout=sys.stderr, env=_gitenv())
1080 _git_wait('git config', p)
1082 p = subprocess.Popen(['git', 'config', 'core.logAllRefUpdates', 'true'],
1083 stdout=sys.stderr, env=_gitenv())
1084 _git_wait('git config', p)
1087 def check_repo_or_die(path=None):
1088 """Check to see if a bup repository probably exists, and abort if not."""
1091 pst = stat_if_exists(top + '/objects/pack')
1092 if pst and stat.S_ISDIR(pst.st_mode):
1095 top_st = stat_if_exists(top)
1097 log('error: repository %r does not exist (see "bup help init")\n'
1100 log('error: %r is not a repository\n' % top)
1106 """Get Git's version and ensure a usable version is installed.
1108 The returned version is formatted as an ordered tuple with each position
1109 representing a digit in the version tag. For example, the following tuple
1110 would represent version 1.6.6.9:
1112 ('1', '6', '6', '9')
1116 p = subprocess.Popen(['git', '--version'],
1117 stdout=subprocess.PIPE)
1118 gvs = p.stdout.read()
1119 _git_wait('git --version', p)
1120 m = re.match(r'git version (\S+.\S+)', gvs)
1122 raise GitError('git --version weird output: %r' % gvs)
1123 _ver = tuple(m.group(1).split('.'))
1124 needed = ('1','5', '3', '1')
1126 raise GitError('git version %s or higher is required; you have %s'
1127 % ('.'.join(needed), '.'.join(_ver)))
1131 class _AbortableIter:
1132 def __init__(self, it, onabort = None):
1134 self.onabort = onabort
1142 return next(self.it)
1143 except StopIteration as e:
1151 """Abort iteration and call the abortion callback, if needed."""
1163 """Link to 'git cat-file' that is used to retrieve blob data."""
1164 def __init__(self, repo_dir = None):
1166 self.repo_dir = repo_dir
1167 wanted = ('1','5','6')
1169 log('error: git version must be at least 1.5.6\n')
1171 self.p = self.inprogress = None
1175 self.p.stdout.close()
1176 self.p.stdin.close()
1178 self.inprogress = None
1182 self.p = subprocess.Popen(['git', 'cat-file', '--batch'],
1183 stdin=subprocess.PIPE,
1184 stdout=subprocess.PIPE,
1187 env=_gitenv(self.repo_dir))
1190 """Yield (oidx, type, size), followed by the data referred to by ref.
1191 If ref does not exist, only yield (None, None, None).
1194 if not self.p or self.p.poll() != None:
1197 poll_result = self.p.poll()
1198 assert(poll_result == None)
1200 log('get: opening %r while %r is open\n' % (ref, self.inprogress))
1201 assert(not self.inprogress)
1202 assert(ref.find('\n') < 0)
1203 assert(ref.find('\r') < 0)
1204 assert(not ref.startswith('-'))
1205 self.inprogress = ref
1206 self.p.stdin.write('%s\n' % ref)
1207 self.p.stdin.flush()
1208 hdr = self.p.stdout.readline()
1209 if hdr.endswith(' missing\n'):
1210 self.inprogress = None
1211 yield None, None, None
1213 info = hdr.split(' ')
1214 if len(info) != 3 or len(info[0]) != 40:
1215 raise GitError('expected object (id, type, size), got %r' % info)
1216 oidx, typ, size = info
1218 it = _AbortableIter(chunkyreader(self.p.stdout, size),
1219 onabort=self._abort)
1221 yield oidx, typ, size
1224 readline_result = self.p.stdout.readline()
1225 assert(readline_result == '\n')
1226 self.inprogress = None
1227 except Exception as e:
1231 def _join(self, it):
1232 _, typ, _ = next(it)
1237 treefile = ''.join(it)
1238 for (mode, name, sha) in tree_decode(treefile):
1239 for blob in self.join(sha.encode('hex')):
1241 elif typ == 'commit':
1242 treeline = ''.join(it).split('\n')[0]
1243 assert(treeline.startswith('tree '))
1244 for blob in self.join(treeline[5:]):
1247 raise GitError('invalid object type %r: expected blob/tree/commit'
1251 """Generate a list of the content of all blobs that can be reached
1252 from an object. The hash given in 'id' must point to a blob, a tree
1253 or a commit. The content of all blobs that can be seen from trees or
1254 commits will be added to the list.
1257 for d in self._join(self.get(id)):
1259 except StopIteration:
1265 def cp(repo_dir=None):
1266 """Create a CatPipe object or reuse the already existing one."""
1269 repo_dir = repodir or repo()
1270 repo_dir = os.path.abspath(repo_dir)
1271 cp = _cp.get(repo_dir)
1273 cp = CatPipe(repo_dir)
1278 def tags(repo_dir = None):
1279 """Return a dictionary of all tags in the form {hash: [tag_names, ...]}."""
1281 for n, c in list_refs(repo_dir = repo_dir, limit_to_tags=True):
1282 assert(n.startswith('refs/tags/'))
1286 tags[c].append(name) # more than one tag can point at 'c'
1290 class MissingObject(KeyError):
1291 def __init__(self, oid):
1293 KeyError.__init__(self, 'object %r is missing' % oid.encode('hex'))
1296 WalkItem = namedtuple('WalkItem', ['oid', 'type', 'mode',
1297 'path', 'chunk_path', 'data'])
1298 # The path is the mangled path, and if an item represents a fragment
1299 # of a chunked file, the chunk_path will be the chunked subtree path
1300 # for the chunk, i.e. ['', '2d3115e', ...]. The top-level path for a
1301 # chunked file will have a chunk_path of ['']. So some chunk subtree
1302 # of the file '/foo/bar/baz' might look like this:
1304 # item.path = ['foo', 'bar', 'baz.bup']
1305 # item.chunk_path = ['', '2d3115e', '016b097']
1306 # item.type = 'tree'
1310 def walk_object(get_ref, oidx, stop_at=None, include_data=None):
1311 """Yield everything reachable from oidx via get_ref (which must behave
1312 like CatPipe get) as a WalkItem, stopping whenever stop_at(oidx)
1313 returns true. Throw MissingObject if a hash encountered is
1314 missing from the repository, and don't read or return blob content
1315 in the data field unless include_data is set.
1318 # Maintain the pending stack on the heap to avoid stack overflow
1319 pending = [(oidx, [], [], None)]
1321 oidx, parent_path, chunk_path, mode = pending.pop()
1322 oid = oidx.decode('hex')
1323 if stop_at and stop_at(oidx):
1326 if (not include_data) and mode and stat.S_ISREG(mode):
1327 # If the object is a "regular file", then it's a leaf in
1328 # the graph, so we can skip reading the data if the caller
1329 # hasn't requested it.
1330 yield WalkItem(oid=oid, type='blob',
1331 chunk_path=chunk_path, path=parent_path,
1336 item_it = get_ref(oidx)
1337 get_oidx, typ, _ = next(item_it)
1339 raise MissingObject(oidx.decode('hex'))
1340 if typ not in ('blob', 'commit', 'tree'):
1341 raise Exception('unexpected repository object type %r' % typ)
1343 # FIXME: set the mode based on the type when the mode is None
1344 if typ == 'blob' and not include_data:
1345 # Dump data until we can ask cat_pipe not to fetch it
1346 for ignored in item_it:
1350 data = ''.join(item_it)
1352 yield WalkItem(oid=oid, type=typ,
1353 chunk_path=chunk_path, path=parent_path,
1355 data=(data if include_data else None))
1358 commit_items = parse_commit(data)
1359 for pid in commit_items.parents:
1360 pending.append((pid, parent_path, chunk_path, mode))
1361 pending.append((commit_items.tree, parent_path, chunk_path,
1362 hashsplit.GIT_MODE_TREE))
1364 for mode, name, ent_id in tree_decode(data):
1365 demangled, bup_type = demangle_name(name, mode)
1367 sub_path = parent_path
1368 sub_chunk_path = chunk_path + [name]
1370 sub_path = parent_path + [name]
1371 if bup_type == BUP_CHUNKED:
1372 sub_chunk_path = ['']
1374 sub_chunk_path = chunk_path
1375 pending.append((ent_id.encode('hex'), sub_path, sub_chunk_path,