1 """Git interaction library.
2 bup repositories are in Git format. This library allows us to
3 interact with the Git data structures.
6 from __future__ import absolute_import
7 import errno, os, sys, zlib, time, subprocess, struct, stat, re, tempfile, glob
8 from collections import namedtuple
9 from itertools import islice
10 from numbers import Integral
12 from bup import _helpers, compat, hashsplit, path, midx, bloom, xstat
13 from bup.compat import range
14 from bup.helpers import (Sha1, add_error, chunkyreader, debug1, debug2,
16 hostname, localtime, log, merge_iter,
17 mmap_read, mmap_readwrite,
19 progress, qprogress, shstr, stat_if_exists,
20 unlink, username, userfullname,
25 repodir = None # The default repository, once initialized
27 _typemap = { 'blob':3, 'tree':2, 'commit':1, 'tag':4 }
28 _typermap = { 3:'blob', 2:'tree', 1:'commit', 4:'tag' }
34 class GitError(Exception):
38 def _git_wait(cmd, p):
41 raise GitError('%s returned %d' % (shstr(cmd), rv))
43 def _git_capture(argv):
44 p = subprocess.Popen(argv, stdout=subprocess.PIPE, preexec_fn = _gitenv())
46 _git_wait(repr(argv), p)
49 def git_config_get(option, repo_dir=None):
50 cmd = ('git', 'config', '--get', option)
51 p = subprocess.Popen(cmd, stdout=subprocess.PIPE,
52 preexec_fn=_gitenv(repo_dir=repo_dir))
58 raise GitError('%s returned %d' % (cmd, rc))
62 def parse_tz_offset(s):
63 """UTC offset in seconds."""
64 tz_off = (int(s[1:3]) * 60 * 60) + (int(s[3:5]) * 60)
70 # FIXME: derived from http://git.rsbx.net/Documents/Git_Data_Formats.txt
71 # Make sure that's authoritative.
72 _start_end_char = r'[^ .,:;<>"\'\0\n]'
73 _content_char = r'[^\0\n<>]'
74 _safe_str_rx = '(?:%s{1,2}|(?:%s%s*%s))' \
76 _start_end_char, _content_char, _start_end_char)
77 _tz_rx = r'[-+]\d\d[0-5]\d'
78 _parent_rx = r'(?:parent [abcdefABCDEF0123456789]{40}\n)'
79 _commit_rx = re.compile(r'''tree (?P<tree>[abcdefABCDEF0123456789]{40})
80 (?P<parents>%s*)author (?P<author_name>%s) <(?P<author_mail>%s)> (?P<asec>\d+) (?P<atz>%s)
81 committer (?P<committer_name>%s) <(?P<committer_mail>%s)> (?P<csec>\d+) (?P<ctz>%s)
83 (?P<message>(?:.|\n)*)''' % (_parent_rx,
84 _safe_str_rx, _safe_str_rx, _tz_rx,
85 _safe_str_rx, _safe_str_rx, _tz_rx))
86 _parent_hash_rx = re.compile(r'\s*parent ([abcdefABCDEF0123456789]{40})\s*')
89 # Note that the author_sec and committer_sec values are (UTC) epoch seconds.
90 CommitInfo = namedtuple('CommitInfo', ['tree', 'parents',
91 'author_name', 'author_mail',
92 'author_sec', 'author_offset',
93 'committer_name', 'committer_mail',
94 'committer_sec', 'committer_offset',
97 def parse_commit(content):
98 commit_match = re.match(_commit_rx, content)
100 raise Exception('cannot parse commit %r' % content)
101 matches = commit_match.groupdict()
102 return CommitInfo(tree=matches['tree'],
103 parents=re.findall(_parent_hash_rx, matches['parents']),
104 author_name=matches['author_name'],
105 author_mail=matches['author_mail'],
106 author_sec=int(matches['asec']),
107 author_offset=parse_tz_offset(matches['atz']),
108 committer_name=matches['committer_name'],
109 committer_mail=matches['committer_mail'],
110 committer_sec=int(matches['csec']),
111 committer_offset=parse_tz_offset(matches['ctz']),
112 message=matches['message'])
115 def get_cat_data(cat_iterator, expected_type):
116 _, kind, _ = next(cat_iterator)
117 if kind != expected_type:
118 raise Exception('expected %r, saw %r' % (expected_type, kind))
119 return ''.join(cat_iterator)
121 def get_commit_items(id, cp):
122 return parse_commit(get_cat_data(cp.get(id), 'commit'))
124 def _local_git_date_str(epoch_sec):
125 return '%d %s' % (epoch_sec, utc_offset_str(epoch_sec))
128 def _git_date_str(epoch_sec, tz_offset_sec):
129 offs = tz_offset_sec // 60
130 return '%d %s%02d%02d' \
132 '+' if offs >= 0 else '-',
137 def repo(sub = '', repo_dir=None):
138 """Get the path to the git repository or one of its subdirectories."""
139 repo_dir = repo_dir or repodir
141 raise GitError('You should call check_repo_or_die()')
143 # If there's a .git subdirectory, then the actual repo is in there.
144 gd = os.path.join(repo_dir, '.git')
145 if os.path.exists(gd):
148 return os.path.join(repo_dir, sub)
152 return re.sub(r'([^0-9a-z]|\b)([0-9a-z]{7})[0-9a-z]{33}([^0-9a-z]|\b)',
157 full = os.path.abspath(path)
158 fullrepo = os.path.abspath(repo(''))
159 if not fullrepo.endswith('/'):
161 if full.startswith(fullrepo):
162 path = full[len(fullrepo):]
163 if path.startswith('index-cache/'):
164 path = path[len('index-cache/'):]
165 return shorten_hash(path)
169 paths = [repo('objects/pack')]
170 paths += glob.glob(repo('index-cache/*/.'))
174 def auto_midx(objdir):
175 args = [path.exe(), 'midx', '--auto', '--dir', objdir]
177 rv = subprocess.call(args, stdout=open('/dev/null', 'w'))
179 # make sure 'args' gets printed to help with debugging
180 add_error('%r: exception: %s' % (args, e))
183 add_error('%r: returned %d' % (args, rv))
185 args = [path.exe(), 'bloom', '--dir', objdir]
187 rv = subprocess.call(args, stdout=open('/dev/null', 'w'))
189 # make sure 'args' gets printed to help with debugging
190 add_error('%r: exception: %s' % (args, e))
193 add_error('%r: returned %d' % (args, rv))
196 def mangle_name(name, mode, gitmode):
197 """Mangle a file name to present an abstract name for segmented files.
198 Mangled file names will have the ".bup" extension added to them. If a
199 file's name already ends with ".bup", a ".bupl" extension is added to
200 disambiguate normal files from segmented ones.
202 if stat.S_ISREG(mode) and not stat.S_ISREG(gitmode):
203 assert(stat.S_ISDIR(gitmode))
205 elif name.endswith('.bup') or name[:-1].endswith('.bup'):
206 return name + '.bupl'
211 (BUP_NORMAL, BUP_CHUNKED) = (0,1)
212 def demangle_name(name, mode):
213 """Remove name mangling from a file name, if necessary.
215 The return value is a tuple (demangled_filename,mode), where mode is one of
218 * BUP_NORMAL : files that should be read as-is from the repository
219 * BUP_CHUNKED : files that were chunked and need to be reassembled
221 For more information on the name mangling algorithm, see mangle_name()
223 if name.endswith('.bupl'):
224 return (name[:-5], BUP_NORMAL)
225 elif name.endswith('.bup'):
226 return (name[:-4], BUP_CHUNKED)
227 elif name.endswith('.bupm'):
229 BUP_CHUNKED if stat.S_ISDIR(mode) else BUP_NORMAL)
231 return (name, BUP_NORMAL)
234 def calc_hash(type, content):
235 """Calculate some content's hash in the Git fashion."""
236 header = '%s %d\0' % (type, len(content))
242 def shalist_item_sort_key(ent):
243 (mode, name, id) = ent
244 assert(mode+0 == mode)
245 if stat.S_ISDIR(mode):
251 def tree_encode(shalist):
252 """Generate a git tree object from (mode,name,hash) tuples."""
253 shalist = sorted(shalist, key = shalist_item_sort_key)
255 for (mode,name,bin) in shalist:
257 assert(mode+0 == mode)
259 assert(len(bin) == 20)
260 s = '%o %s\0%s' % (mode,name,bin)
261 assert(s[0] != '0') # 0-padded octal is not acceptable in a git tree
266 def tree_decode(buf):
267 """Generate a list of (mode,name,hash) from the git tree object in buf."""
269 while ofs < len(buf):
270 z = buf.find('\0', ofs)
272 spl = buf[ofs:z].split(' ', 1)
273 assert(len(spl) == 2)
275 sha = buf[z+1:z+1+20]
277 yield (int(mode, 8), name, sha)
280 def _encode_packobj(type, content, compression_level=1):
281 if compression_level not in (0, 1, 2, 3, 4, 5, 6, 7, 8, 9):
282 raise ValueError('invalid compression level %s' % compression_level)
285 szbits = (sz & 0x0f) | (_typemap[type]<<4)
288 if sz: szbits |= 0x80
294 z = zlib.compressobj(compression_level)
296 yield z.compress(content)
300 def _encode_looseobj(type, content, compression_level=1):
301 z = zlib.compressobj(compression_level)
302 yield z.compress('%s %d\0' % (type, len(content)))
303 yield z.compress(content)
307 def _decode_looseobj(buf):
309 s = zlib.decompress(buf)
316 assert(type in _typemap)
317 assert(sz == len(content))
318 return (type, content)
321 def _decode_packobj(buf):
324 type = _typermap[(c & 0x70) >> 4]
331 sz |= (c & 0x7f) << shift
335 return (type, zlib.decompress(buf[i+1:]))
342 def find_offset(self, hash):
343 """Get the offset of an object inside the index file."""
344 idx = self._idx_from_hash(hash)
346 return self._ofs_from_idx(idx)
349 def exists(self, hash, want_source=False):
350 """Return nonempty if the object exists in this index."""
351 if hash and (self._idx_from_hash(hash) != None):
352 return want_source and os.path.basename(self.name) or True
356 return int(self.fanout[255])
358 def _idx_from_hash(self, hash):
359 global _total_searches, _total_steps
361 assert(len(hash) == 20)
363 start = self.fanout[b1-1] # range -1..254
364 end = self.fanout[b1] # range 0..255
366 _total_steps += 1 # lookup table is a step
369 mid = start + (end-start)/2
370 v = self._idx_to_hash(mid)
380 class PackIdxV1(PackIdx):
381 """Object representation of a Git pack index (version 1) file."""
382 def __init__(self, filename, f):
384 self.idxnames = [self.name]
385 self.map = mmap_read(f)
386 self.fanout = list(struct.unpack('!256I',
387 str(buffer(self.map, 0, 256*4))))
388 self.fanout.append(0) # entry "-1"
389 nsha = self.fanout[255]
391 self.shatable = buffer(self.map, self.sha_ofs, nsha*24)
393 def _ofs_from_idx(self, idx):
394 return struct.unpack('!I', str(self.shatable[idx*24 : idx*24+4]))[0]
396 def _idx_to_hash(self, idx):
397 return str(self.shatable[idx*24+4 : idx*24+24])
400 for i in range(self.fanout[255]):
401 yield buffer(self.map, 256*4 + 24*i + 4, 20)
404 class PackIdxV2(PackIdx):
405 """Object representation of a Git pack index (version 2) file."""
406 def __init__(self, filename, f):
408 self.idxnames = [self.name]
409 self.map = mmap_read(f)
410 assert(str(self.map[0:8]) == '\377tOc\0\0\0\2')
411 self.fanout = list(struct.unpack('!256I',
412 str(buffer(self.map, 8, 256*4))))
413 self.fanout.append(0) # entry "-1"
414 nsha = self.fanout[255]
415 self.sha_ofs = 8 + 256*4
416 self.shatable = buffer(self.map, self.sha_ofs, nsha*20)
417 self.ofstable = buffer(self.map,
418 self.sha_ofs + nsha*20 + nsha*4,
420 self.ofs64table = buffer(self.map,
421 8 + 256*4 + nsha*20 + nsha*4 + nsha*4)
423 def _ofs_from_idx(self, idx):
424 ofs = struct.unpack('!I', str(buffer(self.ofstable, idx*4, 4)))[0]
426 idx64 = ofs & 0x7fffffff
427 ofs = struct.unpack('!Q',
428 str(buffer(self.ofs64table, idx64*8, 8)))[0]
431 def _idx_to_hash(self, idx):
432 return str(self.shatable[idx*20:(idx+1)*20])
435 for i in range(self.fanout[255]):
436 yield buffer(self.map, 8 + 256*4 + 20*i, 20)
441 def __init__(self, dir):
443 assert(_mpi_count == 0) # these things suck tons of VM; don't waste it
448 self.do_bloom = False
455 assert(_mpi_count == 0)
458 return iter(idxmerge(self.packs))
461 return sum(len(pack) for pack in self.packs)
463 def exists(self, hash, want_source=False):
464 """Return nonempty if the object exists in the index files."""
465 global _total_searches
467 if hash in self.also:
469 if self.do_bloom and self.bloom:
470 if self.bloom.exists(hash):
471 self.do_bloom = False
473 _total_searches -= 1 # was counted by bloom
475 for i in xrange(len(self.packs)):
477 _total_searches -= 1 # will be incremented by sub-pack
478 ix = p.exists(hash, want_source=want_source)
480 # reorder so most recently used packs are searched first
481 self.packs = [p] + self.packs[:i] + self.packs[i+1:]
486 def refresh(self, skip_midx = False):
487 """Refresh the index list.
488 This method verifies if .midx files were superseded (e.g. all of its
489 contents are in another, bigger .midx file) and removes the superseded
492 If skip_midx is True, all work on .midx files will be skipped and .midx
493 files will be removed from the list.
495 The module-global variable 'ignore_midx' can force this function to
496 always act as if skip_midx was True.
498 self.bloom = None # Always reopen the bloom as it may have been relaced
499 self.do_bloom = False
500 skip_midx = skip_midx or ignore_midx
501 d = dict((p.name, p) for p in self.packs
502 if not skip_midx or not isinstance(p, midx.PackMidx))
503 if os.path.exists(self.dir):
506 for ix in self.packs:
507 if isinstance(ix, midx.PackMidx):
508 for name in ix.idxnames:
509 d[os.path.join(self.dir, name)] = ix
510 for full in glob.glob(os.path.join(self.dir,'*.midx')):
512 mx = midx.PackMidx(full)
513 (mxd, mxf) = os.path.split(mx.name)
515 for n in mx.idxnames:
516 if not os.path.exists(os.path.join(mxd, n)):
517 log(('warning: index %s missing\n' +
518 ' used by %s\n') % (n, mxf))
526 midxl.sort(key=lambda ix:
527 (-len(ix), -xstat.stat(ix.name).st_mtime))
530 for sub in ix.idxnames:
531 found = d.get(os.path.join(self.dir, sub))
532 if not found or isinstance(found, PackIdx):
533 # doesn't exist, or exists but not in a midx
538 for name in ix.idxnames:
539 d[os.path.join(self.dir, name)] = ix
540 elif not ix.force_keep:
541 debug1('midx: removing redundant: %s\n'
542 % os.path.basename(ix.name))
545 for full in glob.glob(os.path.join(self.dir,'*.idx')):
549 except GitError as e:
553 bfull = os.path.join(self.dir, 'bup.bloom')
554 if self.bloom is None and os.path.exists(bfull):
555 self.bloom = bloom.ShaBloom(bfull)
556 self.packs = list(set(d.values()))
557 self.packs.sort(reverse=True, key=lambda x: len(x))
558 if self.bloom and self.bloom.valid() and len(self.bloom) >= len(self):
562 debug1('PackIdxList: using %d index%s.\n'
563 % (len(self.packs), len(self.packs)!=1 and 'es' or ''))
566 """Insert an additional object in the list."""
570 def open_idx(filename):
571 if filename.endswith('.idx'):
572 f = open(filename, 'rb')
574 if header[0:4] == '\377tOc':
575 version = struct.unpack('!I', header[4:8])[0]
577 return PackIdxV2(filename, f)
579 raise GitError('%s: expected idx file version 2, got %d'
580 % (filename, version))
581 elif len(header) == 8 and header[0:4] < '\377tOc':
582 return PackIdxV1(filename, f)
584 raise GitError('%s: unrecognized idx file header' % filename)
585 elif filename.endswith('.midx'):
586 return midx.PackMidx(filename)
588 raise GitError('idx filenames must end with .idx or .midx')
591 def idxmerge(idxlist, final_progress=True):
592 """Generate a list of all the objects reachable in a PackIdxList."""
593 def pfunc(count, total):
594 qprogress('Reading indexes: %.2f%% (%d/%d)\r'
595 % (count*100.0/total, count, total))
596 def pfinal(count, total):
598 progress('Reading indexes: %.2f%% (%d/%d), done.\n'
599 % (100, total, total))
600 return merge_iter(idxlist, 10024, pfunc, pfinal)
603 def _make_objcache():
604 return PackIdxList(repo('objects/pack'))
606 # bup-gc assumes that it can disable all PackWriter activities
607 # (bloom/midx/cache) via the constructor and close() arguments.
610 """Writes Git objects inside a pack file."""
611 def __init__(self, objcache_maker=_make_objcache, compression_level=1,
612 run_midx=True, on_pack_finish=None,
613 max_pack_size=None, max_pack_objects=None, repo_dir=None):
614 self.repo_dir = repo_dir or repo()
621 self.objcache_maker = objcache_maker
623 self.compression_level = compression_level
624 self.run_midx=run_midx
625 self.on_pack_finish = on_pack_finish
626 if not max_pack_size:
627 max_pack_size = git_config_get('pack.packSizeLimit',
628 repo_dir=self.repo_dir)
629 if max_pack_size is not None:
630 max_pack_size = parse_num(max_pack_size)
631 if not max_pack_size:
632 # larger packs slow down pruning
633 max_pack_size = 1000 * 1000 * 1000
634 self.max_pack_size = max_pack_size
635 # cache memory usage is about 83 bytes per object
636 self.max_pack_objects = max_pack_objects if max_pack_objects \
637 else max(1, self.max_pack_size // 5000)
645 def __exit__(self, type, value, traceback):
650 objdir = dir = os.path.join(self.repo_dir, 'objects')
651 fd, name = tempfile.mkstemp(suffix='.pack', dir=objdir)
653 self.file = os.fdopen(fd, 'w+b')
658 self.parentfd = os.open(objdir, os.O_RDONLY)
664 assert(name.endswith('.pack'))
665 self.filename = name[:-5]
666 self.file.write('PACK\0\0\0\2\0\0\0\0')
667 self.idx = list(list() for i in xrange(256))
669 def _raw_write(self, datalist, sha):
672 # in case we get interrupted (eg. KeyboardInterrupt), it's best if
673 # the file never has a *partial* blob. So let's make sure it's
674 # all-or-nothing. (The blob shouldn't be very big anyway, thanks
675 # to our hashsplit algorithm.) f.write() does its own buffering,
676 # but that's okay because we'll flush it in _end().
677 oneblob = ''.join(datalist)
681 raise GitError, e, sys.exc_info()[2]
683 crc = zlib.crc32(oneblob) & 0xffffffff
684 self._update_idx(sha, crc, nw)
689 def _update_idx(self, sha, crc, size):
692 self.idx[ord(sha[0])].append((sha, crc, self.file.tell() - size))
694 def _write(self, sha, type, content):
698 sha = calc_hash(type, content)
699 size, crc = self._raw_write(_encode_packobj(type, content,
700 self.compression_level),
702 if self.outbytes >= self.max_pack_size \
703 or self.count >= self.max_pack_objects:
707 def breakpoint(self):
708 """Clear byte and object counts and return the last processed id."""
709 id = self._end(self.run_midx)
710 self.outbytes = self.count = 0
713 def _require_objcache(self):
714 if self.objcache is None and self.objcache_maker:
715 self.objcache = self.objcache_maker()
716 if self.objcache is None:
718 "PackWriter not opened or can't check exists w/o objcache")
720 def exists(self, id, want_source=False):
721 """Return non-empty if an object is found in the object cache."""
722 self._require_objcache()
723 return self.objcache.exists(id, want_source=want_source)
725 def just_write(self, sha, type, content):
726 """Write an object to the pack file, bypassing the objcache. Fails if
728 self._write(sha, type, content)
730 def maybe_write(self, type, content):
731 """Write an object to the pack file if not present and return its id."""
732 sha = calc_hash(type, content)
733 if not self.exists(sha):
734 self.just_write(sha, type, content)
735 self._require_objcache()
736 self.objcache.add(sha)
739 def new_blob(self, blob):
740 """Create a blob object in the pack with the supplied content."""
741 return self.maybe_write('blob', blob)
743 def new_tree(self, shalist):
744 """Create a tree object in the pack."""
745 content = tree_encode(shalist)
746 return self.maybe_write('tree', content)
748 def new_commit(self, tree, parent,
749 author, adate_sec, adate_tz,
750 committer, cdate_sec, cdate_tz,
752 """Create a commit object in the pack. The date_sec values must be
753 epoch-seconds, and if a tz is None, the local timezone is assumed."""
755 adate_str = _git_date_str(adate_sec, adate_tz)
757 adate_str = _local_git_date_str(adate_sec)
759 cdate_str = _git_date_str(cdate_sec, cdate_tz)
761 cdate_str = _local_git_date_str(cdate_sec)
763 if tree: l.append('tree %s' % tree.encode('hex'))
764 if parent: l.append('parent %s' % parent.encode('hex'))
765 if author: l.append('author %s %s' % (author, adate_str))
766 if committer: l.append('committer %s %s' % (committer, cdate_str))
769 return self.maybe_write('commit', '\n'.join(l))
772 """Remove the pack file from disk."""
781 os.unlink(self.filename + '.pack')
788 def _end(self, run_midx=True):
790 if not f: return None
797 # update object count
799 cp = struct.pack('!i', self.count)
803 # calculate the pack sha1sum
806 for b in chunkyreader(f):
808 packbin = sum.digest()
810 fdatasync(f.fileno())
814 obj_list_sha = self._write_pack_idx_v2(self.filename + '.idx', idx, packbin)
815 nameprefix = os.path.join(self.repo_dir,
816 'objects/pack/pack-' + obj_list_sha)
817 if os.path.exists(self.filename + '.map'):
818 os.unlink(self.filename + '.map')
819 os.rename(self.filename + '.pack', nameprefix + '.pack')
820 os.rename(self.filename + '.idx', nameprefix + '.idx')
822 os.fsync(self.parentfd)
824 os.close(self.parentfd)
827 auto_midx(os.path.join(self.repo_dir, 'objects/pack'))
829 if self.on_pack_finish:
830 self.on_pack_finish(nameprefix)
834 def close(self, run_midx=True):
835 """Close the pack file and move it to its definitive path."""
836 return self._end(run_midx=run_midx)
838 def _write_pack_idx_v2(self, filename, idx, packbin):
841 for entry in section:
842 if entry[2] >= 2**31:
845 # Length: header + fan-out + shas-and-crcs + overflow-offsets
846 index_len = 8 + (4 * 256) + (28 * self.count) + (8 * ofs64_count)
848 idx_f = open(filename, 'w+b')
850 idx_f.truncate(index_len)
851 fdatasync(idx_f.fileno())
852 idx_map = mmap_readwrite(idx_f, close=False)
854 count = _helpers.write_idx(filename, idx_map, idx, self.count)
855 assert(count == self.count)
862 idx_f = open(filename, 'a+b')
867 b = idx_f.read(8 + 4*256)
870 obj_list_sum = Sha1()
871 for b in chunkyreader(idx_f, 20*self.count):
873 obj_list_sum.update(b)
874 namebase = obj_list_sum.hexdigest()
876 for b in chunkyreader(idx_f):
878 idx_f.write(idx_sum.digest())
879 fdatasync(idx_f.fileno())
885 def _gitenv(repo_dir = None):
889 os.environ['GIT_DIR'] = os.path.abspath(repo_dir)
893 def list_refs(patterns=None, repo_dir=None,
894 limit_to_heads=False, limit_to_tags=False):
895 """Yield (refname, hash) tuples for all repository refs unless
896 patterns are specified. In that case, only include tuples for
897 refs matching those patterns (cf. git-show-ref(1)). The limits
898 restrict the result items to refs/heads or refs/tags. If both
899 limits are specified, items from both sources will be included.
902 argv = ['git', 'show-ref']
904 argv.append('--heads')
906 argv.append('--tags')
909 argv.extend(patterns)
910 p = subprocess.Popen(argv,
911 preexec_fn = _gitenv(repo_dir),
912 stdout = subprocess.PIPE)
913 out = p.stdout.read().strip()
914 rv = p.wait() # not fatal
918 for d in out.split('\n'):
919 (sha, name) = d.split(' ', 1)
920 yield (name, sha.decode('hex'))
923 def read_ref(refname, repo_dir = None):
924 """Get the commit id of the most recent commit made on a given ref."""
925 refs = list_refs(patterns=[refname], repo_dir=repo_dir, limit_to_heads=True)
926 l = tuple(islice(refs, 2))
934 def rev_list_invocation(ref_or_refs, count=None, format=None):
935 if isinstance(ref_or_refs, compat.str_type):
936 refs = (ref_or_refs,)
939 argv = ['git', 'rev-list']
940 if isinstance(count, Integral):
941 argv.extend(['-n', str(count)])
943 raise ValueError('unexpected count argument %r' % count)
946 argv.append('--pretty=format:' + format)
948 assert not ref.startswith('-')
954 def rev_list(ref_or_refs, count=None, parse=None, format=None, repo_dir=None):
955 """Yield information about commits as per "git rev-list". If a format
956 is not provided, yield one hex hash at a time. If a format is
957 provided, pass it to rev-list and call parse(git_stdout) for each
958 commit with the stream positioned just after the rev-list "commit
959 HASH" header line. When a format is provided yield (oidx,
960 parse(git_stdout)) for each commit.
963 assert bool(parse) == bool(format)
964 p = subprocess.Popen(rev_list_invocation(ref_or_refs, count=count,
966 preexec_fn = _gitenv(repo_dir),
967 stdout = subprocess.PIPE)
969 for line in p.stdout:
972 line = p.stdout.readline()
975 if not s.startswith('commit '):
976 raise Exception('unexpected line ' + s)
977 yield s[7:], parse(p.stdout)
978 line = p.stdout.readline()
980 rv = p.wait() # not fatal
982 raise GitError, 'git rev-list returned error %d' % rv
985 def get_commit_dates(refs, repo_dir=None):
986 """Get the dates for the specified commit refs. For now, every unique
987 string in refs must resolve to a different commit or this
988 function will fail."""
991 commit = get_commit_items(ref, cp(repo_dir))
992 result.append(commit.author_sec)
996 def rev_parse(committish, repo_dir=None):
997 """Resolve the full hash for 'committish', if it exists.
999 Should be roughly equivalent to 'git rev-parse'.
1001 Returns the hex value of the hash if it is found, None if 'committish' does
1002 not correspond to anything.
1004 head = read_ref(committish, repo_dir=repo_dir)
1006 debug2("resolved from ref: commit = %s\n" % head.encode('hex'))
1009 pL = PackIdxList(repo('objects/pack', repo_dir=repo_dir))
1011 if len(committish) == 40:
1013 hash = committish.decode('hex')
1023 def update_ref(refname, newval, oldval, repo_dir=None):
1024 """Update a repository reference."""
1027 assert(refname.startswith('refs/heads/') \
1028 or refname.startswith('refs/tags/'))
1029 p = subprocess.Popen(['git', 'update-ref', refname,
1030 newval.encode('hex'), oldval.encode('hex')],
1031 preexec_fn = _gitenv(repo_dir))
1032 _git_wait('git update-ref', p)
1035 def delete_ref(refname, oldvalue=None):
1036 """Delete a repository reference (see git update-ref(1))."""
1037 assert(refname.startswith('refs/'))
1038 oldvalue = [] if not oldvalue else [oldvalue]
1039 p = subprocess.Popen(['git', 'update-ref', '-d', refname] + oldvalue,
1040 preexec_fn = _gitenv())
1041 _git_wait('git update-ref', p)
1044 def guess_repo(path=None):
1045 """Set the path value in the global variable "repodir".
1046 This makes bup look for an existing bup repository, but not fail if a
1047 repository doesn't exist. Usually, if you are interacting with a bup
1048 repository, you would not be calling this function but using
1049 check_repo_or_die().
1055 repodir = os.environ.get('BUP_DIR')
1057 repodir = os.path.expanduser('~/.bup')
1060 def init_repo(path=None):
1061 """Create the Git bare repository for bup in a given path."""
1063 d = repo() # appends a / to the path
1064 parent = os.path.dirname(os.path.dirname(d))
1065 if parent and not os.path.exists(parent):
1066 raise GitError('parent directory "%s" does not exist\n' % parent)
1067 if os.path.exists(d) and not os.path.isdir(os.path.join(d, '.')):
1068 raise GitError('"%s" exists but is not a directory\n' % d)
1069 p = subprocess.Popen(['git', '--bare', 'init'], stdout=sys.stderr,
1070 preexec_fn = _gitenv())
1071 _git_wait('git init', p)
1072 # Force the index version configuration in order to ensure bup works
1073 # regardless of the version of the installed Git binary.
1074 p = subprocess.Popen(['git', 'config', 'pack.indexVersion', '2'],
1075 stdout=sys.stderr, preexec_fn = _gitenv())
1076 _git_wait('git config', p)
1078 p = subprocess.Popen(['git', 'config', 'core.logAllRefUpdates', 'true'],
1079 stdout=sys.stderr, preexec_fn = _gitenv())
1080 _git_wait('git config', p)
1083 def check_repo_or_die(path=None):
1084 """Check to see if a bup repository probably exists, and abort if not."""
1087 pst = stat_if_exists(top + '/objects/pack')
1088 if pst and stat.S_ISDIR(pst.st_mode):
1091 top_st = stat_if_exists(top)
1093 log('error: repository %r does not exist (see "bup help init")\n'
1096 log('error: %r is not a repository\n' % top)
1102 """Get Git's version and ensure a usable version is installed.
1104 The returned version is formatted as an ordered tuple with each position
1105 representing a digit in the version tag. For example, the following tuple
1106 would represent version 1.6.6.9:
1108 ('1', '6', '6', '9')
1112 p = subprocess.Popen(['git', '--version'],
1113 stdout=subprocess.PIPE)
1114 gvs = p.stdout.read()
1115 _git_wait('git --version', p)
1116 m = re.match(r'git version (\S+.\S+)', gvs)
1118 raise GitError('git --version weird output: %r' % gvs)
1119 _ver = tuple(m.group(1).split('.'))
1120 needed = ('1','5', '3', '1')
1122 raise GitError('git version %s or higher is required; you have %s'
1123 % ('.'.join(needed), '.'.join(_ver)))
1127 class _AbortableIter:
1128 def __init__(self, it, onabort = None):
1130 self.onabort = onabort
1138 return next(self.it)
1139 except StopIteration as e:
1147 """Abort iteration and call the abortion callback, if needed."""
1159 """Link to 'git cat-file' that is used to retrieve blob data."""
1160 def __init__(self, repo_dir = None):
1162 self.repo_dir = repo_dir
1163 wanted = ('1','5','6')
1165 log('error: git version must be at least 1.5.6\n')
1167 self.p = self.inprogress = None
1171 self.p.stdout.close()
1172 self.p.stdin.close()
1174 self.inprogress = None
1178 self.p = subprocess.Popen(['git', 'cat-file', '--batch'],
1179 stdin=subprocess.PIPE,
1180 stdout=subprocess.PIPE,
1183 preexec_fn = _gitenv(self.repo_dir))
1186 """Yield (oidx, type, size), followed by the data referred to by ref.
1187 If ref does not exist, only yield (None, None, None).
1190 if not self.p or self.p.poll() != None:
1193 poll_result = self.p.poll()
1194 assert(poll_result == None)
1196 log('get: opening %r while %r is open\n' % (ref, self.inprogress))
1197 assert(not self.inprogress)
1198 assert(ref.find('\n') < 0)
1199 assert(ref.find('\r') < 0)
1200 assert(not ref.startswith('-'))
1201 self.inprogress = ref
1202 self.p.stdin.write('%s\n' % ref)
1203 self.p.stdin.flush()
1204 hdr = self.p.stdout.readline()
1205 if hdr.endswith(' missing\n'):
1206 self.inprogress = None
1207 yield None, None, None
1209 info = hdr.split(' ')
1210 if len(info) != 3 or len(info[0]) != 40:
1211 raise GitError('expected object (id, type, size), got %r' % info)
1212 oidx, typ, size = info
1214 it = _AbortableIter(chunkyreader(self.p.stdout, size),
1215 onabort=self._abort)
1217 yield oidx, typ, size
1220 readline_result = self.p.stdout.readline()
1221 assert(readline_result == '\n')
1222 self.inprogress = None
1223 except Exception as e:
1227 def _join(self, it):
1228 _, typ, _ = next(it)
1233 treefile = ''.join(it)
1234 for (mode, name, sha) in tree_decode(treefile):
1235 for blob in self.join(sha.encode('hex')):
1237 elif typ == 'commit':
1238 treeline = ''.join(it).split('\n')[0]
1239 assert(treeline.startswith('tree '))
1240 for blob in self.join(treeline[5:]):
1243 raise GitError('invalid object type %r: expected blob/tree/commit'
1247 """Generate a list of the content of all blobs that can be reached
1248 from an object. The hash given in 'id' must point to a blob, a tree
1249 or a commit. The content of all blobs that can be seen from trees or
1250 commits will be added to the list.
1253 for d in self._join(self.get(id)):
1255 except StopIteration:
1261 def cp(repo_dir=None):
1262 """Create a CatPipe object or reuse the already existing one."""
1265 repo_dir = repodir or repo()
1266 repo_dir = os.path.abspath(repo_dir)
1267 cp = _cp.get(repo_dir)
1269 cp = CatPipe(repo_dir)
1274 def tags(repo_dir = None):
1275 """Return a dictionary of all tags in the form {hash: [tag_names, ...]}."""
1277 for n, c in list_refs(repo_dir = repo_dir, limit_to_tags=True):
1278 assert(n.startswith('refs/tags/'))
1282 tags[c].append(name) # more than one tag can point at 'c'
1286 class MissingObject(KeyError):
1287 def __init__(self, oid):
1289 KeyError.__init__(self, 'object %r is missing' % oid.encode('hex'))
1292 WalkItem = namedtuple('WalkItem', ['oid', 'type', 'mode',
1293 'path', 'chunk_path', 'data'])
1294 # The path is the mangled path, and if an item represents a fragment
1295 # of a chunked file, the chunk_path will be the chunked subtree path
1296 # for the chunk, i.e. ['', '2d3115e', ...]. The top-level path for a
1297 # chunked file will have a chunk_path of ['']. So some chunk subtree
1298 # of the file '/foo/bar/baz' might look like this:
1300 # item.path = ['foo', 'bar', 'baz.bup']
1301 # item.chunk_path = ['', '2d3115e', '016b097']
1302 # item.type = 'tree'
1306 def walk_object(get_ref, oidx, stop_at=None, include_data=None):
1307 """Yield everything reachable from oidx via get_ref (which must behave
1308 like CatPipe get) as a WalkItem, stopping whenever stop_at(oidx)
1309 returns true. Throw MissingObject if a hash encountered is
1310 missing from the repository, and don't read or return blob content
1311 in the data field unless include_data is set.
1314 # Maintain the pending stack on the heap to avoid stack overflow
1315 pending = [(oidx, [], [], None)]
1317 oidx, parent_path, chunk_path, mode = pending.pop()
1318 oid = oidx.decode('hex')
1319 if stop_at and stop_at(oidx):
1322 if (not include_data) and mode and stat.S_ISREG(mode):
1323 # If the object is a "regular file", then it's a leaf in
1324 # the graph, so we can skip reading the data if the caller
1325 # hasn't requested it.
1326 yield WalkItem(oid=oid, type='blob',
1327 chunk_path=chunk_path, path=parent_path,
1332 item_it = get_ref(oidx)
1333 get_oidx, typ, _ = next(item_it)
1335 raise MissingObject(oidx.decode('hex'))
1336 if typ not in ('blob', 'commit', 'tree'):
1337 raise Exception('unexpected repository object type %r' % typ)
1339 # FIXME: set the mode based on the type when the mode is None
1340 if typ == 'blob' and not include_data:
1341 # Dump data until we can ask cat_pipe not to fetch it
1342 for ignored in item_it:
1346 data = ''.join(item_it)
1348 yield WalkItem(oid=oid, type=typ,
1349 chunk_path=chunk_path, path=parent_path,
1351 data=(data if include_data else None))
1354 commit_items = parse_commit(data)
1355 for pid in commit_items.parents:
1356 pending.append((pid, parent_path, chunk_path, mode))
1357 pending.append((commit_items.tree, parent_path, chunk_path,
1358 hashsplit.GIT_MODE_TREE))
1360 for mode, name, ent_id in tree_decode(data):
1361 demangled, bup_type = demangle_name(name, mode)
1363 sub_path = parent_path
1364 sub_chunk_path = chunk_path + [name]
1366 sub_path = parent_path + [name]
1367 if bup_type == BUP_CHUNKED:
1368 sub_chunk_path = ['']
1370 sub_chunk_path = chunk_path
1371 pending.append((ent_id.encode('hex'), sub_path, sub_chunk_path,