1 """Git interaction library.
2 bup repositories are in Git format. This library allows us to
3 interact with the Git data structures.
6 from __future__ import absolute_import
7 import errno, os, sys, zlib, time, subprocess, struct, stat, re, tempfile, glob
8 from collections import namedtuple
9 from itertools import islice
10 from numbers import Integral
12 from bup import _helpers, compat, hashsplit, path, midx, bloom, xstat
13 from bup.compat import range
14 from bup.helpers import (Sha1, add_error, chunkyreader, debug1, debug2,
16 hostname, localtime, log, merge_iter,
17 mmap_read, mmap_readwrite,
19 progress, qprogress, shstr, stat_if_exists,
20 unlink, username, userfullname,
25 repodir = None # The default repository, once initialized
27 _typemap = { 'blob':3, 'tree':2, 'commit':1, 'tag':4 }
28 _typermap = { 3:'blob', 2:'tree', 1:'commit', 4:'tag' }
34 class GitError(Exception):
38 def _git_wait(cmd, p):
41 raise GitError('%s returned %d' % (shstr(cmd), rv))
43 def _git_capture(argv):
44 p = subprocess.Popen(argv, stdout=subprocess.PIPE, preexec_fn = _gitenv())
46 _git_wait(repr(argv), p)
49 def git_config_get(option, repo_dir=None):
50 cmd = ('git', 'config', '--get', option)
51 p = subprocess.Popen(cmd, stdout=subprocess.PIPE,
52 preexec_fn=_gitenv(repo_dir=repo_dir))
58 raise GitError('%s returned %d' % (cmd, rc))
62 def parse_tz_offset(s):
63 """UTC offset in seconds."""
64 tz_off = (int(s[1:3]) * 60 * 60) + (int(s[3:5]) * 60)
70 # FIXME: derived from http://git.rsbx.net/Documents/Git_Data_Formats.txt
71 # Make sure that's authoritative.
72 _start_end_char = r'[^ .,:;<>"\'\0\n]'
73 _content_char = r'[^\0\n<>]'
74 _safe_str_rx = '(?:%s{1,2}|(?:%s%s*%s))' \
76 _start_end_char, _content_char, _start_end_char)
77 _tz_rx = r'[-+]\d\d[0-5]\d'
78 _parent_rx = r'(?:parent [abcdefABCDEF0123456789]{40}\n)'
79 _commit_rx = re.compile(r'''tree (?P<tree>[abcdefABCDEF0123456789]{40})
80 (?P<parents>%s*)author (?P<author_name>%s) <(?P<author_mail>%s)> (?P<asec>\d+) (?P<atz>%s)
81 committer (?P<committer_name>%s) <(?P<committer_mail>%s)> (?P<csec>\d+) (?P<ctz>%s)
83 (?P<message>(?:.|\n)*)''' % (_parent_rx,
84 _safe_str_rx, _safe_str_rx, _tz_rx,
85 _safe_str_rx, _safe_str_rx, _tz_rx))
86 _parent_hash_rx = re.compile(r'\s*parent ([abcdefABCDEF0123456789]{40})\s*')
89 # Note that the author_sec and committer_sec values are (UTC) epoch seconds.
90 CommitInfo = namedtuple('CommitInfo', ['tree', 'parents',
91 'author_name', 'author_mail',
92 'author_sec', 'author_offset',
93 'committer_name', 'committer_mail',
94 'committer_sec', 'committer_offset',
97 def parse_commit(content):
98 commit_match = re.match(_commit_rx, content)
100 raise Exception('cannot parse commit %r' % content)
101 matches = commit_match.groupdict()
102 return CommitInfo(tree=matches['tree'],
103 parents=re.findall(_parent_hash_rx, matches['parents']),
104 author_name=matches['author_name'],
105 author_mail=matches['author_mail'],
106 author_sec=int(matches['asec']),
107 author_offset=parse_tz_offset(matches['atz']),
108 committer_name=matches['committer_name'],
109 committer_mail=matches['committer_mail'],
110 committer_sec=int(matches['csec']),
111 committer_offset=parse_tz_offset(matches['ctz']),
112 message=matches['message'])
115 def get_cat_data(cat_iterator, expected_type):
116 _, kind, _ = next(cat_iterator)
117 if kind != expected_type:
118 raise Exception('expected %r, saw %r' % (expected_type, kind))
119 return ''.join(cat_iterator)
121 def get_commit_items(id, cp):
122 return parse_commit(get_cat_data(cp.get(id), 'commit'))
124 def _local_git_date_str(epoch_sec):
125 return '%d %s' % (epoch_sec, utc_offset_str(epoch_sec))
128 def _git_date_str(epoch_sec, tz_offset_sec):
129 offs = tz_offset_sec // 60
130 return '%d %s%02d%02d' \
132 '+' if offs >= 0 else '-',
137 def repo(sub = '', repo_dir=None):
138 """Get the path to the git repository or one of its subdirectories."""
139 repo_dir = repo_dir or repodir
141 raise GitError('You should call check_repo_or_die()')
143 # If there's a .git subdirectory, then the actual repo is in there.
144 gd = os.path.join(repo_dir, '.git')
145 if os.path.exists(gd):
148 return os.path.join(repo_dir, sub)
152 return re.sub(r'([^0-9a-z]|\b)([0-9a-z]{7})[0-9a-z]{33}([^0-9a-z]|\b)',
157 full = os.path.abspath(path)
158 fullrepo = os.path.abspath(repo(''))
159 if not fullrepo.endswith('/'):
161 if full.startswith(fullrepo):
162 path = full[len(fullrepo):]
163 if path.startswith('index-cache/'):
164 path = path[len('index-cache/'):]
165 return shorten_hash(path)
169 paths = [repo('objects/pack')]
170 paths += glob.glob(repo('index-cache/*/.'))
174 def auto_midx(objdir):
175 args = [path.exe(), 'midx', '--auto', '--dir', objdir]
177 rv = subprocess.call(args, stdout=open('/dev/null', 'w'))
179 # make sure 'args' gets printed to help with debugging
180 add_error('%r: exception: %s' % (args, e))
183 add_error('%r: returned %d' % (args, rv))
185 args = [path.exe(), 'bloom', '--dir', objdir]
187 rv = subprocess.call(args, stdout=open('/dev/null', 'w'))
189 # make sure 'args' gets printed to help with debugging
190 add_error('%r: exception: %s' % (args, e))
193 add_error('%r: returned %d' % (args, rv))
196 def mangle_name(name, mode, gitmode):
197 """Mangle a file name to present an abstract name for segmented files.
198 Mangled file names will have the ".bup" extension added to them. If a
199 file's name already ends with ".bup", a ".bupl" extension is added to
200 disambiguate normal files from segmented ones.
202 if stat.S_ISREG(mode) and not stat.S_ISREG(gitmode):
203 assert(stat.S_ISDIR(gitmode))
205 elif name.endswith('.bup') or name[:-1].endswith('.bup'):
206 return name + '.bupl'
211 (BUP_NORMAL, BUP_CHUNKED) = (0,1)
212 def demangle_name(name, mode):
213 """Remove name mangling from a file name, if necessary.
215 The return value is a tuple (demangled_filename,mode), where mode is one of
218 * BUP_NORMAL : files that should be read as-is from the repository
219 * BUP_CHUNKED : files that were chunked and need to be reassembled
221 For more information on the name mangling algorithm, see mangle_name()
223 if name.endswith('.bupl'):
224 return (name[:-5], BUP_NORMAL)
225 elif name.endswith('.bup'):
226 return (name[:-4], BUP_CHUNKED)
227 elif name.endswith('.bupm'):
229 BUP_CHUNKED if stat.S_ISDIR(mode) else BUP_NORMAL)
231 return (name, BUP_NORMAL)
234 def calc_hash(type, content):
235 """Calculate some content's hash in the Git fashion."""
236 header = '%s %d\0' % (type, len(content))
242 def shalist_item_sort_key(ent):
243 (mode, name, id) = ent
244 assert(mode+0 == mode)
245 if stat.S_ISDIR(mode):
251 def tree_encode(shalist):
252 """Generate a git tree object from (mode,name,hash) tuples."""
253 shalist = sorted(shalist, key = shalist_item_sort_key)
255 for (mode,name,bin) in shalist:
257 assert(mode+0 == mode)
259 assert(len(bin) == 20)
260 s = '%o %s\0%s' % (mode,name,bin)
261 assert(s[0] != '0') # 0-padded octal is not acceptable in a git tree
266 def tree_decode(buf):
267 """Generate a list of (mode,name,hash) from the git tree object in buf."""
269 while ofs < len(buf):
270 z = buf.find('\0', ofs)
272 spl = buf[ofs:z].split(' ', 1)
273 assert(len(spl) == 2)
275 sha = buf[z+1:z+1+20]
277 yield (int(mode, 8), name, sha)
280 def _encode_packobj(type, content, compression_level=1):
281 if compression_level not in (0, 1, 2, 3, 4, 5, 6, 7, 8, 9):
282 raise ValueError('invalid compression level %s' % compression_level)
285 szbits = (sz & 0x0f) | (_typemap[type]<<4)
288 if sz: szbits |= 0x80
294 z = zlib.compressobj(compression_level)
296 yield z.compress(content)
300 def _encode_looseobj(type, content, compression_level=1):
301 z = zlib.compressobj(compression_level)
302 yield z.compress('%s %d\0' % (type, len(content)))
303 yield z.compress(content)
307 def _decode_looseobj(buf):
309 s = zlib.decompress(buf)
316 assert(type in _typemap)
317 assert(sz == len(content))
318 return (type, content)
321 def _decode_packobj(buf):
324 type = _typermap[(c & 0x70) >> 4]
331 sz |= (c & 0x7f) << shift
335 return (type, zlib.decompress(buf[i+1:]))
342 def find_offset(self, hash):
343 """Get the offset of an object inside the index file."""
344 idx = self._idx_from_hash(hash)
346 return self._ofs_from_idx(idx)
349 def exists(self, hash, want_source=False):
350 """Return nonempty if the object exists in this index."""
351 if hash and (self._idx_from_hash(hash) != None):
352 return want_source and os.path.basename(self.name) or True
356 return int(self.fanout[255])
358 def _idx_from_hash(self, hash):
359 global _total_searches, _total_steps
361 assert(len(hash) == 20)
363 start = self.fanout[b1-1] # range -1..254
364 end = self.fanout[b1] # range 0..255
366 _total_steps += 1 # lookup table is a step
369 mid = start + (end-start)/2
370 v = self._idx_to_hash(mid)
380 class PackIdxV1(PackIdx):
381 """Object representation of a Git pack index (version 1) file."""
382 def __init__(self, filename, f):
384 self.idxnames = [self.name]
385 self.map = mmap_read(f)
386 self.fanout = list(struct.unpack('!256I',
387 str(buffer(self.map, 0, 256*4))))
388 self.fanout.append(0) # entry "-1"
389 nsha = self.fanout[255]
391 self.shatable = buffer(self.map, self.sha_ofs, nsha*24)
393 def _ofs_from_idx(self, idx):
394 return struct.unpack('!I', str(self.shatable[idx*24 : idx*24+4]))[0]
396 def _idx_to_hash(self, idx):
397 return str(self.shatable[idx*24+4 : idx*24+24])
400 for i in range(self.fanout[255]):
401 yield buffer(self.map, 256*4 + 24*i + 4, 20)
404 class PackIdxV2(PackIdx):
405 """Object representation of a Git pack index (version 2) file."""
406 def __init__(self, filename, f):
408 self.idxnames = [self.name]
409 self.map = mmap_read(f)
410 assert(str(self.map[0:8]) == '\377tOc\0\0\0\2')
411 self.fanout = list(struct.unpack('!256I',
412 str(buffer(self.map, 8, 256*4))))
413 self.fanout.append(0) # entry "-1"
414 nsha = self.fanout[255]
415 self.sha_ofs = 8 + 256*4
416 self.shatable = buffer(self.map, self.sha_ofs, nsha*20)
417 self.ofstable = buffer(self.map,
418 self.sha_ofs + nsha*20 + nsha*4,
420 self.ofs64table = buffer(self.map,
421 8 + 256*4 + nsha*20 + nsha*4 + nsha*4)
423 def _ofs_from_idx(self, idx):
424 ofs = struct.unpack('!I', str(buffer(self.ofstable, idx*4, 4)))[0]
426 idx64 = ofs & 0x7fffffff
427 ofs = struct.unpack('!Q',
428 str(buffer(self.ofs64table, idx64*8, 8)))[0]
431 def _idx_to_hash(self, idx):
432 return str(self.shatable[idx*20:(idx+1)*20])
435 for i in range(self.fanout[255]):
436 yield buffer(self.map, 8 + 256*4 + 20*i, 20)
441 def __init__(self, dir):
443 assert(_mpi_count == 0) # these things suck tons of VM; don't waste it
448 self.do_bloom = False
455 assert(_mpi_count == 0)
458 return iter(idxmerge(self.packs))
461 return sum(len(pack) for pack in self.packs)
463 def exists(self, hash, want_source=False):
464 """Return nonempty if the object exists in the index files."""
465 global _total_searches
467 if hash in self.also:
469 if self.do_bloom and self.bloom:
470 if self.bloom.exists(hash):
471 self.do_bloom = False
473 _total_searches -= 1 # was counted by bloom
475 for i in xrange(len(self.packs)):
477 _total_searches -= 1 # will be incremented by sub-pack
478 ix = p.exists(hash, want_source=want_source)
480 # reorder so most recently used packs are searched first
481 self.packs = [p] + self.packs[:i] + self.packs[i+1:]
486 def refresh(self, skip_midx = False):
487 """Refresh the index list.
488 This method verifies if .midx files were superseded (e.g. all of its
489 contents are in another, bigger .midx file) and removes the superseded
492 If skip_midx is True, all work on .midx files will be skipped and .midx
493 files will be removed from the list.
495 The module-global variable 'ignore_midx' can force this function to
496 always act as if skip_midx was True.
498 self.bloom = None # Always reopen the bloom as it may have been relaced
499 self.do_bloom = False
500 skip_midx = skip_midx or ignore_midx
501 d = dict((p.name, p) for p in self.packs
502 if not skip_midx or not isinstance(p, midx.PackMidx))
503 if os.path.exists(self.dir):
506 for ix in self.packs:
507 if isinstance(ix, midx.PackMidx):
508 for name in ix.idxnames:
509 d[os.path.join(self.dir, name)] = ix
510 for full in glob.glob(os.path.join(self.dir,'*.midx')):
512 mx = midx.PackMidx(full)
513 (mxd, mxf) = os.path.split(mx.name)
515 for n in mx.idxnames:
516 if not os.path.exists(os.path.join(mxd, n)):
517 log(('warning: index %s missing\n' +
518 ' used by %s\n') % (n, mxf))
526 midxl.sort(key=lambda ix:
527 (-len(ix), -xstat.stat(ix.name).st_mtime))
530 for sub in ix.idxnames:
531 found = d.get(os.path.join(self.dir, sub))
532 if not found or isinstance(found, PackIdx):
533 # doesn't exist, or exists but not in a midx
538 for name in ix.idxnames:
539 d[os.path.join(self.dir, name)] = ix
540 elif not ix.force_keep:
541 debug1('midx: removing redundant: %s\n'
542 % os.path.basename(ix.name))
545 for full in glob.glob(os.path.join(self.dir,'*.idx')):
549 except GitError as e:
553 bfull = os.path.join(self.dir, 'bup.bloom')
554 if self.bloom is None and os.path.exists(bfull):
555 self.bloom = bloom.ShaBloom(bfull)
556 self.packs = list(set(d.values()))
557 self.packs.sort(reverse=True, key=lambda x: len(x))
558 if self.bloom and self.bloom.valid() and len(self.bloom) >= len(self):
562 debug1('PackIdxList: using %d index%s.\n'
563 % (len(self.packs), len(self.packs)!=1 and 'es' or ''))
566 """Insert an additional object in the list."""
570 def open_idx(filename):
571 if filename.endswith('.idx'):
572 f = open(filename, 'rb')
574 if header[0:4] == '\377tOc':
575 version = struct.unpack('!I', header[4:8])[0]
577 return PackIdxV2(filename, f)
579 raise GitError('%s: expected idx file version 2, got %d'
580 % (filename, version))
581 elif len(header) == 8 and header[0:4] < '\377tOc':
582 return PackIdxV1(filename, f)
584 raise GitError('%s: unrecognized idx file header' % filename)
585 elif filename.endswith('.midx'):
586 return midx.PackMidx(filename)
588 raise GitError('idx filenames must end with .idx or .midx')
591 def idxmerge(idxlist, final_progress=True):
592 """Generate a list of all the objects reachable in a PackIdxList."""
593 def pfunc(count, total):
594 qprogress('Reading indexes: %.2f%% (%d/%d)\r'
595 % (count*100.0/total, count, total))
596 def pfinal(count, total):
598 progress('Reading indexes: %.2f%% (%d/%d), done.\n'
599 % (100, total, total))
600 return merge_iter(idxlist, 10024, pfunc, pfinal)
603 def _make_objcache():
604 return PackIdxList(repo('objects/pack'))
606 # bup-gc assumes that it can disable all PackWriter activities
607 # (bloom/midx/cache) via the constructor and close() arguments.
610 """Writes Git objects inside a pack file."""
611 def __init__(self, objcache_maker=_make_objcache, compression_level=1,
612 run_midx=True, on_pack_finish=None,
613 max_pack_size=None, max_pack_objects=None, repo_dir=None):
614 self.repo_dir = repo_dir or repo()
621 self.objcache_maker = objcache_maker
623 self.compression_level = compression_level
624 self.run_midx=run_midx
625 self.on_pack_finish = on_pack_finish
626 if not max_pack_size:
627 max_pack_size = git_config_get('pack.packSizeLimit',
628 repo_dir=self.repo_dir)
629 if max_pack_size is not None:
630 max_pack_size = parse_num(max_pack_size)
631 if not max_pack_size:
632 # larger packs slow down pruning
633 max_pack_size = 1000 * 1000 * 1000
634 self.max_pack_size = max_pack_size
635 # cache memory usage is about 83 bytes per object
636 self.max_pack_objects = max_pack_objects if max_pack_objects \
637 else max(1, self.max_pack_size // 5000)
645 def __exit__(self, type, value, traceback):
650 objdir = dir = os.path.join(self.repo_dir, 'objects')
651 fd, name = tempfile.mkstemp(suffix='.pack', dir=objdir)
653 self.file = os.fdopen(fd, 'w+b')
658 self.parentfd = os.open(objdir, os.O_RDONLY)
664 assert(name.endswith('.pack'))
665 self.filename = name[:-5]
666 self.file.write('PACK\0\0\0\2\0\0\0\0')
667 self.idx = list(list() for i in xrange(256))
669 def _raw_write(self, datalist, sha):
672 # in case we get interrupted (eg. KeyboardInterrupt), it's best if
673 # the file never has a *partial* blob. So let's make sure it's
674 # all-or-nothing. (The blob shouldn't be very big anyway, thanks
675 # to our hashsplit algorithm.) f.write() does its own buffering,
676 # but that's okay because we'll flush it in _end().
677 oneblob = ''.join(datalist)
681 raise GitError, e, sys.exc_info()[2]
683 crc = zlib.crc32(oneblob) & 0xffffffff
684 self._update_idx(sha, crc, nw)
689 def _update_idx(self, sha, crc, size):
692 self.idx[ord(sha[0])].append((sha, crc, self.file.tell() - size))
694 def _write(self, sha, type, content):
698 sha = calc_hash(type, content)
699 size, crc = self._raw_write(_encode_packobj(type, content,
700 self.compression_level),
702 if self.outbytes >= self.max_pack_size \
703 or self.count >= self.max_pack_objects:
707 def breakpoint(self):
708 """Clear byte and object counts and return the last processed id."""
709 id = self._end(self.run_midx)
710 self.outbytes = self.count = 0
713 def _require_objcache(self):
714 if self.objcache is None and self.objcache_maker:
715 self.objcache = self.objcache_maker()
716 if self.objcache is None:
718 "PackWriter not opened or can't check exists w/o objcache")
720 def exists(self, id, want_source=False):
721 """Return non-empty if an object is found in the object cache."""
722 self._require_objcache()
723 return self.objcache.exists(id, want_source=want_source)
725 def just_write(self, sha, type, content):
726 """Write an object to the pack file without checking for duplication."""
727 self._write(sha, type, content)
728 # If nothing else, gc doesn't have/want an objcache
729 if self.objcache is not None:
730 self.objcache.add(sha)
732 def maybe_write(self, type, content):
733 """Write an object to the pack file if not present and return its id."""
734 sha = calc_hash(type, content)
735 if not self.exists(sha):
736 self._require_objcache()
737 self.just_write(sha, type, content)
740 def new_blob(self, blob):
741 """Create a blob object in the pack with the supplied content."""
742 return self.maybe_write('blob', blob)
744 def new_tree(self, shalist):
745 """Create a tree object in the pack."""
746 content = tree_encode(shalist)
747 return self.maybe_write('tree', content)
749 def new_commit(self, tree, parent,
750 author, adate_sec, adate_tz,
751 committer, cdate_sec, cdate_tz,
753 """Create a commit object in the pack. The date_sec values must be
754 epoch-seconds, and if a tz is None, the local timezone is assumed."""
756 adate_str = _git_date_str(adate_sec, adate_tz)
758 adate_str = _local_git_date_str(adate_sec)
760 cdate_str = _git_date_str(cdate_sec, cdate_tz)
762 cdate_str = _local_git_date_str(cdate_sec)
764 if tree: l.append('tree %s' % tree.encode('hex'))
765 if parent: l.append('parent %s' % parent.encode('hex'))
766 if author: l.append('author %s %s' % (author, adate_str))
767 if committer: l.append('committer %s %s' % (committer, cdate_str))
770 return self.maybe_write('commit', '\n'.join(l))
773 """Remove the pack file from disk."""
782 os.unlink(self.filename + '.pack')
789 def _end(self, run_midx=True):
791 if not f: return None
798 # update object count
800 cp = struct.pack('!i', self.count)
804 # calculate the pack sha1sum
807 for b in chunkyreader(f):
809 packbin = sum.digest()
811 fdatasync(f.fileno())
815 obj_list_sha = self._write_pack_idx_v2(self.filename + '.idx', idx, packbin)
816 nameprefix = os.path.join(self.repo_dir,
817 'objects/pack/pack-' + obj_list_sha)
818 if os.path.exists(self.filename + '.map'):
819 os.unlink(self.filename + '.map')
820 os.rename(self.filename + '.pack', nameprefix + '.pack')
821 os.rename(self.filename + '.idx', nameprefix + '.idx')
823 os.fsync(self.parentfd)
825 os.close(self.parentfd)
828 auto_midx(os.path.join(self.repo_dir, 'objects/pack'))
830 if self.on_pack_finish:
831 self.on_pack_finish(nameprefix)
835 def close(self, run_midx=True):
836 """Close the pack file and move it to its definitive path."""
837 return self._end(run_midx=run_midx)
839 def _write_pack_idx_v2(self, filename, idx, packbin):
842 for entry in section:
843 if entry[2] >= 2**31:
846 # Length: header + fan-out + shas-and-crcs + overflow-offsets
847 index_len = 8 + (4 * 256) + (28 * self.count) + (8 * ofs64_count)
849 idx_f = open(filename, 'w+b')
851 idx_f.truncate(index_len)
852 fdatasync(idx_f.fileno())
853 idx_map = mmap_readwrite(idx_f, close=False)
855 count = _helpers.write_idx(filename, idx_map, idx, self.count)
856 assert(count == self.count)
863 idx_f = open(filename, 'a+b')
868 b = idx_f.read(8 + 4*256)
871 obj_list_sum = Sha1()
872 for b in chunkyreader(idx_f, 20*self.count):
874 obj_list_sum.update(b)
875 namebase = obj_list_sum.hexdigest()
877 for b in chunkyreader(idx_f):
879 idx_f.write(idx_sum.digest())
880 fdatasync(idx_f.fileno())
886 def _gitenv(repo_dir = None):
890 os.environ['GIT_DIR'] = os.path.abspath(repo_dir)
894 def list_refs(patterns=None, repo_dir=None,
895 limit_to_heads=False, limit_to_tags=False):
896 """Yield (refname, hash) tuples for all repository refs unless
897 patterns are specified. In that case, only include tuples for
898 refs matching those patterns (cf. git-show-ref(1)). The limits
899 restrict the result items to refs/heads or refs/tags. If both
900 limits are specified, items from both sources will be included.
903 argv = ['git', 'show-ref']
905 argv.append('--heads')
907 argv.append('--tags')
910 argv.extend(patterns)
911 p = subprocess.Popen(argv,
912 preexec_fn = _gitenv(repo_dir),
913 stdout = subprocess.PIPE)
914 out = p.stdout.read().strip()
915 rv = p.wait() # not fatal
919 for d in out.split('\n'):
920 (sha, name) = d.split(' ', 1)
921 yield (name, sha.decode('hex'))
924 def read_ref(refname, repo_dir = None):
925 """Get the commit id of the most recent commit made on a given ref."""
926 refs = list_refs(patterns=[refname], repo_dir=repo_dir, limit_to_heads=True)
927 l = tuple(islice(refs, 2))
935 def rev_list_invocation(ref_or_refs, count=None, format=None):
936 if isinstance(ref_or_refs, compat.str_type):
937 refs = (ref_or_refs,)
940 argv = ['git', 'rev-list']
941 if isinstance(count, Integral):
942 argv.extend(['-n', str(count)])
944 raise ValueError('unexpected count argument %r' % count)
947 argv.append('--pretty=format:' + format)
949 assert not ref.startswith('-')
955 def rev_list(ref_or_refs, count=None, parse=None, format=None, repo_dir=None):
956 """Yield information about commits as per "git rev-list". If a format
957 is not provided, yield one hex hash at a time. If a format is
958 provided, pass it to rev-list and call parse(git_stdout) for each
959 commit with the stream positioned just after the rev-list "commit
960 HASH" header line. When a format is provided yield (oidx,
961 parse(git_stdout)) for each commit.
964 assert bool(parse) == bool(format)
965 p = subprocess.Popen(rev_list_invocation(ref_or_refs, count=count,
967 preexec_fn = _gitenv(repo_dir),
968 stdout = subprocess.PIPE)
970 for line in p.stdout:
973 line = p.stdout.readline()
976 if not s.startswith('commit '):
977 raise Exception('unexpected line ' + s)
978 yield s[7:], parse(p.stdout)
979 line = p.stdout.readline()
981 rv = p.wait() # not fatal
983 raise GitError, 'git rev-list returned error %d' % rv
986 def get_commit_dates(refs, repo_dir=None):
987 """Get the dates for the specified commit refs. For now, every unique
988 string in refs must resolve to a different commit or this
989 function will fail."""
992 commit = get_commit_items(ref, cp(repo_dir))
993 result.append(commit.author_sec)
997 def rev_parse(committish, repo_dir=None):
998 """Resolve the full hash for 'committish', if it exists.
1000 Should be roughly equivalent to 'git rev-parse'.
1002 Returns the hex value of the hash if it is found, None if 'committish' does
1003 not correspond to anything.
1005 head = read_ref(committish, repo_dir=repo_dir)
1007 debug2("resolved from ref: commit = %s\n" % head.encode('hex'))
1010 pL = PackIdxList(repo('objects/pack', repo_dir=repo_dir))
1012 if len(committish) == 40:
1014 hash = committish.decode('hex')
1024 def update_ref(refname, newval, oldval, repo_dir=None):
1025 """Update a repository reference."""
1028 assert(refname.startswith('refs/heads/') \
1029 or refname.startswith('refs/tags/'))
1030 p = subprocess.Popen(['git', 'update-ref', refname,
1031 newval.encode('hex'), oldval.encode('hex')],
1032 preexec_fn = _gitenv(repo_dir))
1033 _git_wait('git update-ref', p)
1036 def delete_ref(refname, oldvalue=None):
1037 """Delete a repository reference (see git update-ref(1))."""
1038 assert(refname.startswith('refs/'))
1039 oldvalue = [] if not oldvalue else [oldvalue]
1040 p = subprocess.Popen(['git', 'update-ref', '-d', refname] + oldvalue,
1041 preexec_fn = _gitenv())
1042 _git_wait('git update-ref', p)
1045 def guess_repo(path=None):
1046 """Set the path value in the global variable "repodir".
1047 This makes bup look for an existing bup repository, but not fail if a
1048 repository doesn't exist. Usually, if you are interacting with a bup
1049 repository, you would not be calling this function but using
1050 check_repo_or_die().
1056 repodir = os.environ.get('BUP_DIR')
1058 repodir = os.path.expanduser('~/.bup')
1061 def init_repo(path=None):
1062 """Create the Git bare repository for bup in a given path."""
1064 d = repo() # appends a / to the path
1065 parent = os.path.dirname(os.path.dirname(d))
1066 if parent and not os.path.exists(parent):
1067 raise GitError('parent directory "%s" does not exist\n' % parent)
1068 if os.path.exists(d) and not os.path.isdir(os.path.join(d, '.')):
1069 raise GitError('"%s" exists but is not a directory\n' % d)
1070 p = subprocess.Popen(['git', '--bare', 'init'], stdout=sys.stderr,
1071 preexec_fn = _gitenv())
1072 _git_wait('git init', p)
1073 # Force the index version configuration in order to ensure bup works
1074 # regardless of the version of the installed Git binary.
1075 p = subprocess.Popen(['git', 'config', 'pack.indexVersion', '2'],
1076 stdout=sys.stderr, preexec_fn = _gitenv())
1077 _git_wait('git config', p)
1079 p = subprocess.Popen(['git', 'config', 'core.logAllRefUpdates', 'true'],
1080 stdout=sys.stderr, preexec_fn = _gitenv())
1081 _git_wait('git config', p)
1084 def check_repo_or_die(path=None):
1085 """Check to see if a bup repository probably exists, and abort if not."""
1088 pst = stat_if_exists(top + '/objects/pack')
1089 if pst and stat.S_ISDIR(pst.st_mode):
1092 top_st = stat_if_exists(top)
1094 log('error: repository %r does not exist (see "bup help init")\n'
1097 log('error: %r is not a repository\n' % top)
1103 """Get Git's version and ensure a usable version is installed.
1105 The returned version is formatted as an ordered tuple with each position
1106 representing a digit in the version tag. For example, the following tuple
1107 would represent version 1.6.6.9:
1109 ('1', '6', '6', '9')
1113 p = subprocess.Popen(['git', '--version'],
1114 stdout=subprocess.PIPE)
1115 gvs = p.stdout.read()
1116 _git_wait('git --version', p)
1117 m = re.match(r'git version (\S+.\S+)', gvs)
1119 raise GitError('git --version weird output: %r' % gvs)
1120 _ver = tuple(m.group(1).split('.'))
1121 needed = ('1','5', '3', '1')
1123 raise GitError('git version %s or higher is required; you have %s'
1124 % ('.'.join(needed), '.'.join(_ver)))
1128 class _AbortableIter:
1129 def __init__(self, it, onabort = None):
1131 self.onabort = onabort
1139 return next(self.it)
1140 except StopIteration as e:
1148 """Abort iteration and call the abortion callback, if needed."""
1160 """Link to 'git cat-file' that is used to retrieve blob data."""
1161 def __init__(self, repo_dir = None):
1163 self.repo_dir = repo_dir
1164 wanted = ('1','5','6')
1166 log('error: git version must be at least 1.5.6\n')
1168 self.p = self.inprogress = None
1172 self.p.stdout.close()
1173 self.p.stdin.close()
1175 self.inprogress = None
1179 self.p = subprocess.Popen(['git', 'cat-file', '--batch'],
1180 stdin=subprocess.PIPE,
1181 stdout=subprocess.PIPE,
1184 preexec_fn = _gitenv(self.repo_dir))
1187 """Yield (oidx, type, size), followed by the data referred to by ref.
1188 If ref does not exist, only yield (None, None, None).
1191 if not self.p or self.p.poll() != None:
1194 poll_result = self.p.poll()
1195 assert(poll_result == None)
1197 log('get: opening %r while %r is open\n' % (ref, self.inprogress))
1198 assert(not self.inprogress)
1199 assert(ref.find('\n') < 0)
1200 assert(ref.find('\r') < 0)
1201 assert(not ref.startswith('-'))
1202 self.inprogress = ref
1203 self.p.stdin.write('%s\n' % ref)
1204 self.p.stdin.flush()
1205 hdr = self.p.stdout.readline()
1206 if hdr.endswith(' missing\n'):
1207 self.inprogress = None
1208 yield None, None, None
1210 info = hdr.split(' ')
1211 if len(info) != 3 or len(info[0]) != 40:
1212 raise GitError('expected object (id, type, size), got %r' % info)
1213 oidx, typ, size = info
1215 it = _AbortableIter(chunkyreader(self.p.stdout, size),
1216 onabort=self._abort)
1218 yield oidx, typ, size
1221 readline_result = self.p.stdout.readline()
1222 assert(readline_result == '\n')
1223 self.inprogress = None
1224 except Exception as e:
1228 def _join(self, it):
1229 _, typ, _ = next(it)
1234 treefile = ''.join(it)
1235 for (mode, name, sha) in tree_decode(treefile):
1236 for blob in self.join(sha.encode('hex')):
1238 elif typ == 'commit':
1239 treeline = ''.join(it).split('\n')[0]
1240 assert(treeline.startswith('tree '))
1241 for blob in self.join(treeline[5:]):
1244 raise GitError('invalid object type %r: expected blob/tree/commit'
1248 """Generate a list of the content of all blobs that can be reached
1249 from an object. The hash given in 'id' must point to a blob, a tree
1250 or a commit. The content of all blobs that can be seen from trees or
1251 commits will be added to the list.
1254 for d in self._join(self.get(id)):
1256 except StopIteration:
1262 def cp(repo_dir=None):
1263 """Create a CatPipe object or reuse the already existing one."""
1266 repo_dir = repodir or repo()
1267 repo_dir = os.path.abspath(repo_dir)
1268 cp = _cp.get(repo_dir)
1270 cp = CatPipe(repo_dir)
1275 def tags(repo_dir = None):
1276 """Return a dictionary of all tags in the form {hash: [tag_names, ...]}."""
1278 for n, c in list_refs(repo_dir = repo_dir, limit_to_tags=True):
1279 assert(n.startswith('refs/tags/'))
1283 tags[c].append(name) # more than one tag can point at 'c'
1287 class MissingObject(KeyError):
1288 def __init__(self, oid):
1290 KeyError.__init__(self, 'object %r is missing' % oid.encode('hex'))
1293 WalkItem = namedtuple('WalkItem', ['oid', 'type', 'mode',
1294 'path', 'chunk_path', 'data'])
1295 # The path is the mangled path, and if an item represents a fragment
1296 # of a chunked file, the chunk_path will be the chunked subtree path
1297 # for the chunk, i.e. ['', '2d3115e', ...]. The top-level path for a
1298 # chunked file will have a chunk_path of ['']. So some chunk subtree
1299 # of the file '/foo/bar/baz' might look like this:
1301 # item.path = ['foo', 'bar', 'baz.bup']
1302 # item.chunk_path = ['', '2d3115e', '016b097']
1303 # item.type = 'tree'
1307 def walk_object(get_ref, oidx, stop_at=None, include_data=None):
1308 """Yield everything reachable from oidx via get_ref (which must behave
1309 like CatPipe get) as a WalkItem, stopping whenever stop_at(oidx)
1310 returns true. Throw MissingObject if a hash encountered is
1311 missing from the repository, and don't read or return blob content
1312 in the data field unless include_data is set.
1315 # Maintain the pending stack on the heap to avoid stack overflow
1316 pending = [(oidx, [], [], None)]
1318 oidx, parent_path, chunk_path, mode = pending.pop()
1319 oid = oidx.decode('hex')
1320 if stop_at and stop_at(oidx):
1323 if (not include_data) and mode and stat.S_ISREG(mode):
1324 # If the object is a "regular file", then it's a leaf in
1325 # the graph, so we can skip reading the data if the caller
1326 # hasn't requested it.
1327 yield WalkItem(oid=oid, type='blob',
1328 chunk_path=chunk_path, path=parent_path,
1333 item_it = get_ref(oidx)
1334 get_oidx, typ, _ = next(item_it)
1336 raise MissingObject(oidx.decode('hex'))
1337 if typ not in ('blob', 'commit', 'tree'):
1338 raise Exception('unexpected repository object type %r' % typ)
1340 # FIXME: set the mode based on the type when the mode is None
1341 if typ == 'blob' and not include_data:
1342 # Dump data until we can ask cat_pipe not to fetch it
1343 for ignored in item_it:
1347 data = ''.join(item_it)
1349 yield WalkItem(oid=oid, type=typ,
1350 chunk_path=chunk_path, path=parent_path,
1352 data=(data if include_data else None))
1355 commit_items = parse_commit(data)
1356 for pid in commit_items.parents:
1357 pending.append((pid, parent_path, chunk_path, mode))
1358 pending.append((commit_items.tree, parent_path, chunk_path,
1359 hashsplit.GIT_MODE_TREE))
1361 for mode, name, ent_id in tree_decode(data):
1362 demangled, bup_type = demangle_name(name, mode)
1364 sub_path = parent_path
1365 sub_chunk_path = chunk_path + [name]
1367 sub_path = parent_path + [name]
1368 if bup_type == BUP_CHUNKED:
1369 sub_chunk_path = ['']
1371 sub_chunk_path = chunk_path
1372 pending.append((ent_id.encode('hex'), sub_path, sub_chunk_path,