1 """Git interaction library.
2 bup repositories are in Git format. This library allows us to
3 interact with the Git data structures.
6 from __future__ import absolute_import
7 import errno, os, sys, zlib, time, subprocess, struct, stat, re, tempfile, glob
8 from collections import namedtuple
9 from itertools import islice
10 from numbers import Integral
12 from bup import _helpers, compat, hashsplit, path, midx, bloom, xstat
13 from bup.helpers import (Sha1, add_error, chunkyreader, debug1, debug2,
15 hostname, localtime, log, merge_iter,
16 mmap_read, mmap_readwrite,
18 progress, qprogress, shstr, stat_if_exists,
19 unlink, username, userfullname,
24 repodir = None # The default repository, once initialized
26 _typemap = { 'blob':3, 'tree':2, 'commit':1, 'tag':4 }
27 _typermap = { 3:'blob', 2:'tree', 1:'commit', 4:'tag' }
33 class GitError(Exception):
37 def _git_wait(cmd, p):
40 raise GitError('%s returned %d' % (shstr(cmd), rv))
42 def _git_capture(argv):
43 p = subprocess.Popen(argv, stdout=subprocess.PIPE, preexec_fn = _gitenv())
45 _git_wait(repr(argv), p)
48 def git_config_get(option, repo_dir=None):
49 cmd = ('git', 'config', '--get', option)
50 p = subprocess.Popen(cmd, stdout=subprocess.PIPE,
51 preexec_fn=_gitenv(repo_dir=repo_dir))
57 raise GitError('%s returned %d' % (cmd, rc))
61 def parse_tz_offset(s):
62 """UTC offset in seconds."""
63 tz_off = (int(s[1:3]) * 60 * 60) + (int(s[3:5]) * 60)
69 # FIXME: derived from http://git.rsbx.net/Documents/Git_Data_Formats.txt
70 # Make sure that's authoritative.
71 _start_end_char = r'[^ .,:;<>"\'\0\n]'
72 _content_char = r'[^\0\n<>]'
73 _safe_str_rx = '(?:%s{1,2}|(?:%s%s*%s))' \
75 _start_end_char, _content_char, _start_end_char)
76 _tz_rx = r'[-+]\d\d[0-5]\d'
77 _parent_rx = r'(?:parent [abcdefABCDEF0123456789]{40}\n)'
78 _commit_rx = re.compile(r'''tree (?P<tree>[abcdefABCDEF0123456789]{40})
79 (?P<parents>%s*)author (?P<author_name>%s) <(?P<author_mail>%s)> (?P<asec>\d+) (?P<atz>%s)
80 committer (?P<committer_name>%s) <(?P<committer_mail>%s)> (?P<csec>\d+) (?P<ctz>%s)
82 (?P<message>(?:.|\n)*)''' % (_parent_rx,
83 _safe_str_rx, _safe_str_rx, _tz_rx,
84 _safe_str_rx, _safe_str_rx, _tz_rx))
85 _parent_hash_rx = re.compile(r'\s*parent ([abcdefABCDEF0123456789]{40})\s*')
88 # Note that the author_sec and committer_sec values are (UTC) epoch seconds.
89 CommitInfo = namedtuple('CommitInfo', ['tree', 'parents',
90 'author_name', 'author_mail',
91 'author_sec', 'author_offset',
92 'committer_name', 'committer_mail',
93 'committer_sec', 'committer_offset',
96 def parse_commit(content):
97 commit_match = re.match(_commit_rx, content)
99 raise Exception('cannot parse commit %r' % content)
100 matches = commit_match.groupdict()
101 return CommitInfo(tree=matches['tree'],
102 parents=re.findall(_parent_hash_rx, matches['parents']),
103 author_name=matches['author_name'],
104 author_mail=matches['author_mail'],
105 author_sec=int(matches['asec']),
106 author_offset=parse_tz_offset(matches['atz']),
107 committer_name=matches['committer_name'],
108 committer_mail=matches['committer_mail'],
109 committer_sec=int(matches['csec']),
110 committer_offset=parse_tz_offset(matches['ctz']),
111 message=matches['message'])
114 def get_commit_items(id, cp):
115 commit_it = cp.get(id)
116 _, typ, _ = next(commit_it)
117 assert(typ == 'commit')
118 commit_content = ''.join(commit_it)
119 return parse_commit(commit_content)
122 def _local_git_date_str(epoch_sec):
123 return '%d %s' % (epoch_sec, utc_offset_str(epoch_sec))
126 def _git_date_str(epoch_sec, tz_offset_sec):
127 offs = tz_offset_sec // 60
128 return '%d %s%02d%02d' \
130 '+' if offs >= 0 else '-',
135 def repo(sub = '', repo_dir=None):
136 """Get the path to the git repository or one of its subdirectories."""
137 repo_dir = repo_dir or repodir
139 raise GitError('You should call check_repo_or_die()')
141 # If there's a .git subdirectory, then the actual repo is in there.
142 gd = os.path.join(repo_dir, '.git')
143 if os.path.exists(gd):
146 return os.path.join(repo_dir, sub)
150 return re.sub(r'([^0-9a-z]|\b)([0-9a-z]{7})[0-9a-z]{33}([^0-9a-z]|\b)',
155 full = os.path.abspath(path)
156 fullrepo = os.path.abspath(repo(''))
157 if not fullrepo.endswith('/'):
159 if full.startswith(fullrepo):
160 path = full[len(fullrepo):]
161 if path.startswith('index-cache/'):
162 path = path[len('index-cache/'):]
163 return shorten_hash(path)
167 paths = [repo('objects/pack')]
168 paths += glob.glob(repo('index-cache/*/.'))
172 def auto_midx(objdir):
173 args = [path.exe(), 'midx', '--auto', '--dir', objdir]
175 rv = subprocess.call(args, stdout=open('/dev/null', 'w'))
177 # make sure 'args' gets printed to help with debugging
178 add_error('%r: exception: %s' % (args, e))
181 add_error('%r: returned %d' % (args, rv))
183 args = [path.exe(), 'bloom', '--dir', objdir]
185 rv = subprocess.call(args, stdout=open('/dev/null', 'w'))
187 # make sure 'args' gets printed to help with debugging
188 add_error('%r: exception: %s' % (args, e))
191 add_error('%r: returned %d' % (args, rv))
194 def mangle_name(name, mode, gitmode):
195 """Mangle a file name to present an abstract name for segmented files.
196 Mangled file names will have the ".bup" extension added to them. If a
197 file's name already ends with ".bup", a ".bupl" extension is added to
198 disambiguate normal files from segmented ones.
200 if stat.S_ISREG(mode) and not stat.S_ISREG(gitmode):
201 assert(stat.S_ISDIR(gitmode))
203 elif name.endswith('.bup') or name[:-1].endswith('.bup'):
204 return name + '.bupl'
209 (BUP_NORMAL, BUP_CHUNKED) = (0,1)
210 def demangle_name(name, mode):
211 """Remove name mangling from a file name, if necessary.
213 The return value is a tuple (demangled_filename,mode), where mode is one of
216 * BUP_NORMAL : files that should be read as-is from the repository
217 * BUP_CHUNKED : files that were chunked and need to be reassembled
219 For more information on the name mangling algorithm, see mangle_name()
221 if name.endswith('.bupl'):
222 return (name[:-5], BUP_NORMAL)
223 elif name.endswith('.bup'):
224 return (name[:-4], BUP_CHUNKED)
225 elif name.endswith('.bupm'):
227 BUP_CHUNKED if stat.S_ISDIR(mode) else BUP_NORMAL)
229 return (name, BUP_NORMAL)
232 def calc_hash(type, content):
233 """Calculate some content's hash in the Git fashion."""
234 header = '%s %d\0' % (type, len(content))
240 def shalist_item_sort_key(ent):
241 (mode, name, id) = ent
242 assert(mode+0 == mode)
243 if stat.S_ISDIR(mode):
249 def tree_encode(shalist):
250 """Generate a git tree object from (mode,name,hash) tuples."""
251 shalist = sorted(shalist, key = shalist_item_sort_key)
253 for (mode,name,bin) in shalist:
255 assert(mode+0 == mode)
257 assert(len(bin) == 20)
258 s = '%o %s\0%s' % (mode,name,bin)
259 assert(s[0] != '0') # 0-padded octal is not acceptable in a git tree
264 def tree_decode(buf):
265 """Generate a list of (mode,name,hash) from the git tree object in buf."""
267 while ofs < len(buf):
268 z = buf.find('\0', ofs)
270 spl = buf[ofs:z].split(' ', 1)
271 assert(len(spl) == 2)
273 sha = buf[z+1:z+1+20]
275 yield (int(mode, 8), name, sha)
278 def _encode_packobj(type, content, compression_level=1):
279 if compression_level not in (0, 1, 2, 3, 4, 5, 6, 7, 8, 9):
280 raise ValueError('invalid compression level %s' % compression_level)
283 szbits = (sz & 0x0f) | (_typemap[type]<<4)
286 if sz: szbits |= 0x80
292 z = zlib.compressobj(compression_level)
294 yield z.compress(content)
298 def _encode_looseobj(type, content, compression_level=1):
299 z = zlib.compressobj(compression_level)
300 yield z.compress('%s %d\0' % (type, len(content)))
301 yield z.compress(content)
305 def _decode_looseobj(buf):
307 s = zlib.decompress(buf)
314 assert(type in _typemap)
315 assert(sz == len(content))
316 return (type, content)
319 def _decode_packobj(buf):
322 type = _typermap[(c & 0x70) >> 4]
329 sz |= (c & 0x7f) << shift
333 return (type, zlib.decompress(buf[i+1:]))
340 def find_offset(self, hash):
341 """Get the offset of an object inside the index file."""
342 idx = self._idx_from_hash(hash)
344 return self._ofs_from_idx(idx)
347 def exists(self, hash, want_source=False):
348 """Return nonempty if the object exists in this index."""
349 if hash and (self._idx_from_hash(hash) != None):
350 return want_source and os.path.basename(self.name) or True
354 return int(self.fanout[255])
356 def _idx_from_hash(self, hash):
357 global _total_searches, _total_steps
359 assert(len(hash) == 20)
361 start = self.fanout[b1-1] # range -1..254
362 end = self.fanout[b1] # range 0..255
364 _total_steps += 1 # lookup table is a step
367 mid = start + (end-start)/2
368 v = self._idx_to_hash(mid)
378 class PackIdxV1(PackIdx):
379 """Object representation of a Git pack index (version 1) file."""
380 def __init__(self, filename, f):
382 self.idxnames = [self.name]
383 self.map = mmap_read(f)
384 self.fanout = list(struct.unpack('!256I',
385 str(buffer(self.map, 0, 256*4))))
386 self.fanout.append(0) # entry "-1"
387 nsha = self.fanout[255]
389 self.shatable = buffer(self.map, self.sha_ofs, nsha*24)
391 def _ofs_from_idx(self, idx):
392 return struct.unpack('!I', str(self.shatable[idx*24 : idx*24+4]))[0]
394 def _idx_to_hash(self, idx):
395 return str(self.shatable[idx*24+4 : idx*24+24])
398 for i in xrange(self.fanout[255]):
399 yield buffer(self.map, 256*4 + 24*i + 4, 20)
402 class PackIdxV2(PackIdx):
403 """Object representation of a Git pack index (version 2) file."""
404 def __init__(self, filename, f):
406 self.idxnames = [self.name]
407 self.map = mmap_read(f)
408 assert(str(self.map[0:8]) == '\377tOc\0\0\0\2')
409 self.fanout = list(struct.unpack('!256I',
410 str(buffer(self.map, 8, 256*4))))
411 self.fanout.append(0) # entry "-1"
412 nsha = self.fanout[255]
413 self.sha_ofs = 8 + 256*4
414 self.shatable = buffer(self.map, self.sha_ofs, nsha*20)
415 self.ofstable = buffer(self.map,
416 self.sha_ofs + nsha*20 + nsha*4,
418 self.ofs64table = buffer(self.map,
419 8 + 256*4 + nsha*20 + nsha*4 + nsha*4)
421 def _ofs_from_idx(self, idx):
422 ofs = struct.unpack('!I', str(buffer(self.ofstable, idx*4, 4)))[0]
424 idx64 = ofs & 0x7fffffff
425 ofs = struct.unpack('!Q',
426 str(buffer(self.ofs64table, idx64*8, 8)))[0]
429 def _idx_to_hash(self, idx):
430 return str(self.shatable[idx*20:(idx+1)*20])
433 for i in xrange(self.fanout[255]):
434 yield buffer(self.map, 8 + 256*4 + 20*i, 20)
439 def __init__(self, dir):
441 assert(_mpi_count == 0) # these things suck tons of VM; don't waste it
446 self.do_bloom = False
453 assert(_mpi_count == 0)
456 return iter(idxmerge(self.packs))
459 return sum(len(pack) for pack in self.packs)
461 def exists(self, hash, want_source=False):
462 """Return nonempty if the object exists in the index files."""
463 global _total_searches
465 if hash in self.also:
467 if self.do_bloom and self.bloom:
468 if self.bloom.exists(hash):
469 self.do_bloom = False
471 _total_searches -= 1 # was counted by bloom
473 for i in xrange(len(self.packs)):
475 _total_searches -= 1 # will be incremented by sub-pack
476 ix = p.exists(hash, want_source=want_source)
478 # reorder so most recently used packs are searched first
479 self.packs = [p] + self.packs[:i] + self.packs[i+1:]
484 def refresh(self, skip_midx = False):
485 """Refresh the index list.
486 This method verifies if .midx files were superseded (e.g. all of its
487 contents are in another, bigger .midx file) and removes the superseded
490 If skip_midx is True, all work on .midx files will be skipped and .midx
491 files will be removed from the list.
493 The module-global variable 'ignore_midx' can force this function to
494 always act as if skip_midx was True.
496 self.bloom = None # Always reopen the bloom as it may have been relaced
497 self.do_bloom = False
498 skip_midx = skip_midx or ignore_midx
499 d = dict((p.name, p) for p in self.packs
500 if not skip_midx or not isinstance(p, midx.PackMidx))
501 if os.path.exists(self.dir):
504 for ix in self.packs:
505 if isinstance(ix, midx.PackMidx):
506 for name in ix.idxnames:
507 d[os.path.join(self.dir, name)] = ix
508 for full in glob.glob(os.path.join(self.dir,'*.midx')):
510 mx = midx.PackMidx(full)
511 (mxd, mxf) = os.path.split(mx.name)
513 for n in mx.idxnames:
514 if not os.path.exists(os.path.join(mxd, n)):
515 log(('warning: index %s missing\n' +
516 ' used by %s\n') % (n, mxf))
524 midxl.sort(key=lambda ix:
525 (-len(ix), -xstat.stat(ix.name).st_mtime))
528 for sub in ix.idxnames:
529 found = d.get(os.path.join(self.dir, sub))
530 if not found or isinstance(found, PackIdx):
531 # doesn't exist, or exists but not in a midx
536 for name in ix.idxnames:
537 d[os.path.join(self.dir, name)] = ix
538 elif not ix.force_keep:
539 debug1('midx: removing redundant: %s\n'
540 % os.path.basename(ix.name))
543 for full in glob.glob(os.path.join(self.dir,'*.idx')):
547 except GitError as e:
551 bfull = os.path.join(self.dir, 'bup.bloom')
552 if self.bloom is None and os.path.exists(bfull):
553 self.bloom = bloom.ShaBloom(bfull)
554 self.packs = list(set(d.values()))
555 self.packs.sort(reverse=True, key=lambda x: len(x))
556 if self.bloom and self.bloom.valid() and len(self.bloom) >= len(self):
560 debug1('PackIdxList: using %d index%s.\n'
561 % (len(self.packs), len(self.packs)!=1 and 'es' or ''))
564 """Insert an additional object in the list."""
568 def open_idx(filename):
569 if filename.endswith('.idx'):
570 f = open(filename, 'rb')
572 if header[0:4] == '\377tOc':
573 version = struct.unpack('!I', header[4:8])[0]
575 return PackIdxV2(filename, f)
577 raise GitError('%s: expected idx file version 2, got %d'
578 % (filename, version))
579 elif len(header) == 8 and header[0:4] < '\377tOc':
580 return PackIdxV1(filename, f)
582 raise GitError('%s: unrecognized idx file header' % filename)
583 elif filename.endswith('.midx'):
584 return midx.PackMidx(filename)
586 raise GitError('idx filenames must end with .idx or .midx')
589 def idxmerge(idxlist, final_progress=True):
590 """Generate a list of all the objects reachable in a PackIdxList."""
591 def pfunc(count, total):
592 qprogress('Reading indexes: %.2f%% (%d/%d)\r'
593 % (count*100.0/total, count, total))
594 def pfinal(count, total):
596 progress('Reading indexes: %.2f%% (%d/%d), done.\n'
597 % (100, total, total))
598 return merge_iter(idxlist, 10024, pfunc, pfinal)
601 def _make_objcache():
602 return PackIdxList(repo('objects/pack'))
604 # bup-gc assumes that it can disable all PackWriter activities
605 # (bloom/midx/cache) via the constructor and close() arguments.
608 """Writes Git objects inside a pack file."""
609 def __init__(self, objcache_maker=_make_objcache, compression_level=1,
610 run_midx=True, on_pack_finish=None,
611 max_pack_size=None, max_pack_objects=None, repo_dir=None):
612 self.repo_dir = repo_dir or repo()
619 self.objcache_maker = objcache_maker
621 self.compression_level = compression_level
622 self.run_midx=run_midx
623 self.on_pack_finish = on_pack_finish
624 if not max_pack_size:
625 max_pack_size = git_config_get('pack.packSizeLimit',
626 repo_dir=self.repo_dir)
627 if max_pack_size is not None:
628 max_pack_size = parse_num(max_pack_size)
629 if not max_pack_size:
630 # larger packs slow down pruning
631 max_pack_size = 1000 * 1000 * 1000
632 self.max_pack_size = max_pack_size
633 # cache memory usage is about 83 bytes per object
634 self.max_pack_objects = max_pack_objects if max_pack_objects \
635 else max(1, self.max_pack_size // 5000)
643 def __exit__(self, type, value, traceback):
648 objdir = dir = os.path.join(self.repo_dir, 'objects')
649 fd, name = tempfile.mkstemp(suffix='.pack', dir=objdir)
651 self.file = os.fdopen(fd, 'w+b')
656 self.parentfd = os.open(objdir, os.O_RDONLY)
662 assert(name.endswith('.pack'))
663 self.filename = name[:-5]
664 self.file.write('PACK\0\0\0\2\0\0\0\0')
665 self.idx = list(list() for i in xrange(256))
667 def _raw_write(self, datalist, sha):
670 # in case we get interrupted (eg. KeyboardInterrupt), it's best if
671 # the file never has a *partial* blob. So let's make sure it's
672 # all-or-nothing. (The blob shouldn't be very big anyway, thanks
673 # to our hashsplit algorithm.) f.write() does its own buffering,
674 # but that's okay because we'll flush it in _end().
675 oneblob = ''.join(datalist)
679 raise GitError, e, sys.exc_info()[2]
681 crc = zlib.crc32(oneblob) & 0xffffffff
682 self._update_idx(sha, crc, nw)
687 def _update_idx(self, sha, crc, size):
690 self.idx[ord(sha[0])].append((sha, crc, self.file.tell() - size))
692 def _write(self, sha, type, content):
696 sha = calc_hash(type, content)
697 size, crc = self._raw_write(_encode_packobj(type, content,
698 self.compression_level),
700 if self.outbytes >= self.max_pack_size \
701 or self.count >= self.max_pack_objects:
705 def breakpoint(self):
706 """Clear byte and object counts and return the last processed id."""
707 id = self._end(self.run_midx)
708 self.outbytes = self.count = 0
711 def _require_objcache(self):
712 if self.objcache is None and self.objcache_maker:
713 self.objcache = self.objcache_maker()
714 if self.objcache is None:
716 "PackWriter not opened or can't check exists w/o objcache")
718 def exists(self, id, want_source=False):
719 """Return non-empty if an object is found in the object cache."""
720 self._require_objcache()
721 return self.objcache.exists(id, want_source=want_source)
723 def just_write(self, sha, type, content):
724 """Write an object to the pack file, bypassing the objcache. Fails if
726 self._write(sha, type, content)
728 def maybe_write(self, type, content):
729 """Write an object to the pack file if not present and return its id."""
730 sha = calc_hash(type, content)
731 if not self.exists(sha):
732 self.just_write(sha, type, content)
733 self._require_objcache()
734 self.objcache.add(sha)
737 def new_blob(self, blob):
738 """Create a blob object in the pack with the supplied content."""
739 return self.maybe_write('blob', blob)
741 def new_tree(self, shalist):
742 """Create a tree object in the pack."""
743 content = tree_encode(shalist)
744 return self.maybe_write('tree', content)
746 def new_commit(self, tree, parent,
747 author, adate_sec, adate_tz,
748 committer, cdate_sec, cdate_tz,
750 """Create a commit object in the pack. The date_sec values must be
751 epoch-seconds, and if a tz is None, the local timezone is assumed."""
753 adate_str = _git_date_str(adate_sec, adate_tz)
755 adate_str = _local_git_date_str(adate_sec)
757 cdate_str = _git_date_str(cdate_sec, cdate_tz)
759 cdate_str = _local_git_date_str(cdate_sec)
761 if tree: l.append('tree %s' % tree.encode('hex'))
762 if parent: l.append('parent %s' % parent.encode('hex'))
763 if author: l.append('author %s %s' % (author, adate_str))
764 if committer: l.append('committer %s %s' % (committer, cdate_str))
767 return self.maybe_write('commit', '\n'.join(l))
770 """Remove the pack file from disk."""
779 os.unlink(self.filename + '.pack')
786 def _end(self, run_midx=True):
788 if not f: return None
795 # update object count
797 cp = struct.pack('!i', self.count)
801 # calculate the pack sha1sum
804 for b in chunkyreader(f):
806 packbin = sum.digest()
808 fdatasync(f.fileno())
812 obj_list_sha = self._write_pack_idx_v2(self.filename + '.idx', idx, packbin)
813 nameprefix = os.path.join(self.repo_dir,
814 'objects/pack/pack-' + obj_list_sha)
815 if os.path.exists(self.filename + '.map'):
816 os.unlink(self.filename + '.map')
817 os.rename(self.filename + '.pack', nameprefix + '.pack')
818 os.rename(self.filename + '.idx', nameprefix + '.idx')
820 os.fsync(self.parentfd)
822 os.close(self.parentfd)
825 auto_midx(os.path.join(self.repo_dir, 'objects/pack'))
827 if self.on_pack_finish:
828 self.on_pack_finish(nameprefix)
832 def close(self, run_midx=True):
833 """Close the pack file and move it to its definitive path."""
834 return self._end(run_midx=run_midx)
836 def _write_pack_idx_v2(self, filename, idx, packbin):
839 for entry in section:
840 if entry[2] >= 2**31:
843 # Length: header + fan-out + shas-and-crcs + overflow-offsets
844 index_len = 8 + (4 * 256) + (28 * self.count) + (8 * ofs64_count)
846 idx_f = open(filename, 'w+b')
848 idx_f.truncate(index_len)
849 fdatasync(idx_f.fileno())
850 idx_map = mmap_readwrite(idx_f, close=False)
852 count = _helpers.write_idx(filename, idx_map, idx, self.count)
853 assert(count == self.count)
860 idx_f = open(filename, 'a+b')
865 b = idx_f.read(8 + 4*256)
868 obj_list_sum = Sha1()
869 for b in chunkyreader(idx_f, 20*self.count):
871 obj_list_sum.update(b)
872 namebase = obj_list_sum.hexdigest()
874 for b in chunkyreader(idx_f):
876 idx_f.write(idx_sum.digest())
877 fdatasync(idx_f.fileno())
883 def _gitenv(repo_dir = None):
887 os.environ['GIT_DIR'] = os.path.abspath(repo_dir)
891 def list_refs(patterns=None, repo_dir=None,
892 limit_to_heads=False, limit_to_tags=False):
893 """Yield (refname, hash) tuples for all repository refs unless
894 patterns are specified. In that case, only include tuples for
895 refs matching those patterns (cf. git-show-ref(1)). The limits
896 restrict the result items to refs/heads or refs/tags. If both
897 limits are specified, items from both sources will be included.
900 argv = ['git', 'show-ref']
902 argv.append('--heads')
904 argv.append('--tags')
907 argv.extend(patterns)
908 p = subprocess.Popen(argv,
909 preexec_fn = _gitenv(repo_dir),
910 stdout = subprocess.PIPE)
911 out = p.stdout.read().strip()
912 rv = p.wait() # not fatal
916 for d in out.split('\n'):
917 (sha, name) = d.split(' ', 1)
918 yield (name, sha.decode('hex'))
921 def read_ref(refname, repo_dir = None):
922 """Get the commit id of the most recent commit made on a given ref."""
923 refs = list_refs(patterns=[refname], repo_dir=repo_dir, limit_to_heads=True)
924 l = tuple(islice(refs, 2))
932 def rev_list_invocation(ref_or_refs, count=None, format=None):
933 if isinstance(ref_or_refs, compat.str_type):
934 refs = (ref_or_refs,)
937 argv = ['git', 'rev-list']
938 if isinstance(count, Integral):
939 argv.extend(['-n', str(count)])
941 raise ValueError('unexpected count argument %r' % count)
944 argv.append('--pretty=format:' + format)
946 assert not ref.startswith('-')
952 def rev_list(ref_or_refs, count=None, parse=None, format=None, repo_dir=None):
953 """Yield information about commits as per "git rev-list". If a format
954 is not provided, yield one hex hash at a time. If a format is
955 provided, pass it to rev-list and call parse(git_stdout) for each
956 commit with the stream positioned just after the rev-list "commit
957 HASH" header line. When a format is provided yield (oidx,
958 parse(git_stdout)) for each commit.
961 assert bool(parse) == bool(format)
962 p = subprocess.Popen(rev_list_invocation(ref_or_refs, count=count,
964 preexec_fn = _gitenv(repo_dir),
965 stdout = subprocess.PIPE)
967 for line in p.stdout:
970 line = p.stdout.readline()
973 if not s.startswith('commit '):
974 raise Exception('unexpected line ' + s)
975 yield s[7:], parse(p.stdout)
976 line = p.stdout.readline()
978 rv = p.wait() # not fatal
980 raise GitError, 'git rev-list returned error %d' % rv
983 def get_commit_dates(refs, repo_dir=None):
984 """Get the dates for the specified commit refs. For now, every unique
985 string in refs must resolve to a different commit or this
986 function will fail."""
989 commit = get_commit_items(ref, cp(repo_dir))
990 result.append(commit.author_sec)
994 def rev_parse(committish, repo_dir=None):
995 """Resolve the full hash for 'committish', if it exists.
997 Should be roughly equivalent to 'git rev-parse'.
999 Returns the hex value of the hash if it is found, None if 'committish' does
1000 not correspond to anything.
1002 head = read_ref(committish, repo_dir=repo_dir)
1004 debug2("resolved from ref: commit = %s\n" % head.encode('hex'))
1007 pL = PackIdxList(repo('objects/pack', repo_dir=repo_dir))
1009 if len(committish) == 40:
1011 hash = committish.decode('hex')
1021 def update_ref(refname, newval, oldval, repo_dir=None):
1022 """Update a repository reference."""
1025 assert(refname.startswith('refs/heads/') \
1026 or refname.startswith('refs/tags/'))
1027 p = subprocess.Popen(['git', 'update-ref', refname,
1028 newval.encode('hex'), oldval.encode('hex')],
1029 preexec_fn = _gitenv(repo_dir))
1030 _git_wait('git update-ref', p)
1033 def delete_ref(refname, oldvalue=None):
1034 """Delete a repository reference (see git update-ref(1))."""
1035 assert(refname.startswith('refs/'))
1036 oldvalue = [] if not oldvalue else [oldvalue]
1037 p = subprocess.Popen(['git', 'update-ref', '-d', refname] + oldvalue,
1038 preexec_fn = _gitenv())
1039 _git_wait('git update-ref', p)
1042 def guess_repo(path=None):
1043 """Set the path value in the global variable "repodir".
1044 This makes bup look for an existing bup repository, but not fail if a
1045 repository doesn't exist. Usually, if you are interacting with a bup
1046 repository, you would not be calling this function but using
1047 check_repo_or_die().
1053 repodir = os.environ.get('BUP_DIR')
1055 repodir = os.path.expanduser('~/.bup')
1058 def init_repo(path=None):
1059 """Create the Git bare repository for bup in a given path."""
1061 d = repo() # appends a / to the path
1062 parent = os.path.dirname(os.path.dirname(d))
1063 if parent and not os.path.exists(parent):
1064 raise GitError('parent directory "%s" does not exist\n' % parent)
1065 if os.path.exists(d) and not os.path.isdir(os.path.join(d, '.')):
1066 raise GitError('"%s" exists but is not a directory\n' % d)
1067 p = subprocess.Popen(['git', '--bare', 'init'], stdout=sys.stderr,
1068 preexec_fn = _gitenv())
1069 _git_wait('git init', p)
1070 # Force the index version configuration in order to ensure bup works
1071 # regardless of the version of the installed Git binary.
1072 p = subprocess.Popen(['git', 'config', 'pack.indexVersion', '2'],
1073 stdout=sys.stderr, preexec_fn = _gitenv())
1074 _git_wait('git config', p)
1076 p = subprocess.Popen(['git', 'config', 'core.logAllRefUpdates', 'true'],
1077 stdout=sys.stderr, preexec_fn = _gitenv())
1078 _git_wait('git config', p)
1081 def check_repo_or_die(path=None):
1082 """Check to see if a bup repository probably exists, and abort if not."""
1085 pst = stat_if_exists(top + '/objects/pack')
1086 if pst and stat.S_ISDIR(pst.st_mode):
1089 top_st = stat_if_exists(top)
1091 log('error: repository %r does not exist (see "bup help init")\n'
1094 log('error: %r is not a repository\n' % top)
1100 """Get Git's version and ensure a usable version is installed.
1102 The returned version is formatted as an ordered tuple with each position
1103 representing a digit in the version tag. For example, the following tuple
1104 would represent version 1.6.6.9:
1106 ('1', '6', '6', '9')
1110 p = subprocess.Popen(['git', '--version'],
1111 stdout=subprocess.PIPE)
1112 gvs = p.stdout.read()
1113 _git_wait('git --version', p)
1114 m = re.match(r'git version (\S+.\S+)', gvs)
1116 raise GitError('git --version weird output: %r' % gvs)
1117 _ver = tuple(m.group(1).split('.'))
1118 needed = ('1','5', '3', '1')
1120 raise GitError('git version %s or higher is required; you have %s'
1121 % ('.'.join(needed), '.'.join(_ver)))
1125 class _AbortableIter:
1126 def __init__(self, it, onabort = None):
1128 self.onabort = onabort
1136 return next(self.it)
1137 except StopIteration as e:
1145 """Abort iteration and call the abortion callback, if needed."""
1157 """Link to 'git cat-file' that is used to retrieve blob data."""
1158 def __init__(self, repo_dir = None):
1160 self.repo_dir = repo_dir
1161 wanted = ('1','5','6')
1163 log('error: git version must be at least 1.5.6\n')
1165 self.p = self.inprogress = None
1169 self.p.stdout.close()
1170 self.p.stdin.close()
1172 self.inprogress = None
1176 self.p = subprocess.Popen(['git', 'cat-file', '--batch'],
1177 stdin=subprocess.PIPE,
1178 stdout=subprocess.PIPE,
1181 preexec_fn = _gitenv(self.repo_dir))
1184 """Yield (oidx, type, size), followed by the data referred to by ref.
1185 If ref does not exist, only yield (None, None, None).
1188 if not self.p or self.p.poll() != None:
1191 poll_result = self.p.poll()
1192 assert(poll_result == None)
1194 log('get: opening %r while %r is open\n' % (ref, self.inprogress))
1195 assert(not self.inprogress)
1196 assert(ref.find('\n') < 0)
1197 assert(ref.find('\r') < 0)
1198 assert(not ref.startswith('-'))
1199 self.inprogress = ref
1200 self.p.stdin.write('%s\n' % ref)
1201 self.p.stdin.flush()
1202 hdr = self.p.stdout.readline()
1203 if hdr.endswith(' missing\n'):
1204 self.inprogress = None
1205 yield None, None, None
1207 info = hdr.split(' ')
1208 if len(info) != 3 or len(info[0]) != 40:
1209 raise GitError('expected object (id, type, size), got %r' % spl)
1210 oidx, typ, size = info
1212 it = _AbortableIter(chunkyreader(self.p.stdout, size),
1213 onabort=self._abort)
1215 yield oidx, typ, size
1218 readline_result = self.p.stdout.readline()
1219 assert(readline_result == '\n')
1220 self.inprogress = None
1221 except Exception as e:
1225 def _join(self, it):
1226 _, typ, _ = next(it)
1231 treefile = ''.join(it)
1232 for (mode, name, sha) in tree_decode(treefile):
1233 for blob in self.join(sha.encode('hex')):
1235 elif typ == 'commit':
1236 treeline = ''.join(it).split('\n')[0]
1237 assert(treeline.startswith('tree '))
1238 for blob in self.join(treeline[5:]):
1241 raise GitError('invalid object type %r: expected blob/tree/commit'
1245 """Generate a list of the content of all blobs that can be reached
1246 from an object. The hash given in 'id' must point to a blob, a tree
1247 or a commit. The content of all blobs that can be seen from trees or
1248 commits will be added to the list.
1251 for d in self._join(self.get(id)):
1253 except StopIteration:
1259 def cp(repo_dir=None):
1260 """Create a CatPipe object or reuse the already existing one."""
1263 repo_dir = repodir or repo()
1264 repo_dir = os.path.abspath(repo_dir)
1265 cp = _cp.get(repo_dir)
1267 cp = CatPipe(repo_dir)
1272 def tags(repo_dir = None):
1273 """Return a dictionary of all tags in the form {hash: [tag_names, ...]}."""
1275 for n, c in list_refs(repo_dir = repo_dir, limit_to_tags=True):
1276 assert(n.startswith('refs/tags/'))
1280 tags[c].append(name) # more than one tag can point at 'c'
1284 class MissingObject(KeyError):
1285 def __init__(self, oid):
1287 KeyError.__init__(self, 'object %r is missing' % oid.encode('hex'))
1290 WalkItem = namedtuple('WalkItem', ['oid', 'type', 'mode',
1291 'path', 'chunk_path', 'data'])
1292 # The path is the mangled path, and if an item represents a fragment
1293 # of a chunked file, the chunk_path will be the chunked subtree path
1294 # for the chunk, i.e. ['', '2d3115e', ...]. The top-level path for a
1295 # chunked file will have a chunk_path of ['']. So some chunk subtree
1296 # of the file '/foo/bar/baz' might look like this:
1298 # item.path = ['foo', 'bar', 'baz.bup']
1299 # item.chunk_path = ['', '2d3115e', '016b097']
1300 # item.type = 'tree'
1304 def walk_object(cat_pipe, oidx,
1307 """Yield everything reachable from oidx via cat_pipe as a WalkItem,
1308 stopping whenever stop_at(oidx) returns true. Throw MissingObject
1309 if a hash encountered is missing from the repository, and don't
1310 read or return blob content in the data field unless include_data
1313 # Maintain the pending stack on the heap to avoid stack overflow
1314 pending = [(oidx, [], [], None)]
1316 oidx, parent_path, chunk_path, mode = pending.pop()
1317 oid = oidx.decode('hex')
1318 if stop_at and stop_at(oidx):
1321 if (not include_data) and mode and stat.S_ISREG(mode):
1322 # If the object is a "regular file", then it's a leaf in
1323 # the graph, so we can skip reading the data if the caller
1324 # hasn't requested it.
1325 yield WalkItem(oid=oid, type='blob',
1326 chunk_path=chunk_path, path=parent_path,
1331 item_it = cat_pipe.get(oidx)
1332 get_oidx, typ, _ = next(item_it)
1334 raise MissingObject(oidx.decode('hex'))
1335 if typ not in ('blob', 'commit', 'tree'):
1336 raise Exception('unexpected repository object type %r' % typ)
1338 # FIXME: set the mode based on the type when the mode is None
1339 if typ == 'blob' and not include_data:
1340 # Dump data until we can ask cat_pipe not to fetch it
1341 for ignored in item_it:
1345 data = ''.join(item_it)
1347 yield WalkItem(oid=oid, type=typ,
1348 chunk_path=chunk_path, path=parent_path,
1350 data=(data if include_data else None))
1353 commit_items = parse_commit(data)
1354 for pid in commit_items.parents:
1355 pending.append((pid, parent_path, chunk_path, mode))
1356 pending.append((commit_items.tree, parent_path, chunk_path,
1357 hashsplit.GIT_MODE_TREE))
1359 for mode, name, ent_id in tree_decode(data):
1360 demangled, bup_type = demangle_name(name, mode)
1362 sub_path = parent_path
1363 sub_chunk_path = chunk_path + [name]
1365 sub_path = parent_path + [name]
1366 if bup_type == BUP_CHUNKED:
1367 sub_chunk_path = ['']
1369 sub_chunk_path = chunk_path
1370 pending.append((ent_id.encode('hex'), sub_path, sub_chunk_path,