1 """Git interaction library.
2 bup repositories are in Git format. This library allows us to
3 interact with the Git data structures.
6 import errno, os, sys, zlib, time, subprocess, struct, stat, re, tempfile, glob
7 from collections import namedtuple
8 from itertools import islice
10 from bup import _helpers, hashsplit, path, midx, bloom, xstat
11 from bup.helpers import (Sha1, add_error, chunkyreader, debug1, debug2,
13 hostname, localtime, log, merge_iter,
14 mmap_read, mmap_readwrite,
16 progress, qprogress, stat_if_exists,
17 unlink, username, userfullname,
22 repodir = None # The default repository, once initialized
24 _typemap = { 'blob':3, 'tree':2, 'commit':1, 'tag':4 }
25 _typermap = { 3:'blob', 2:'tree', 1:'commit', 4:'tag' }
31 class GitError(Exception):
35 def _git_wait(cmd, p):
38 raise GitError('%s returned %d' % (cmd, rv))
40 def _git_capture(argv):
41 p = subprocess.Popen(argv, stdout=subprocess.PIPE, preexec_fn = _gitenv())
43 _git_wait(repr(argv), p)
46 def git_config_get(option, repo_dir=None):
47 cmd = ('git', 'config', '--get', option)
48 p = subprocess.Popen(cmd, stdout=subprocess.PIPE,
49 preexec_fn=_gitenv(repo_dir=repo_dir))
55 raise GitError('%s returned %d' % (cmd, rc))
59 def parse_tz_offset(s):
60 """UTC offset in seconds."""
61 tz_off = (int(s[1:3]) * 60 * 60) + (int(s[3:5]) * 60)
67 # FIXME: derived from http://git.rsbx.net/Documents/Git_Data_Formats.txt
68 # Make sure that's authoritative.
69 _start_end_char = r'[^ .,:;<>"\'\0\n]'
70 _content_char = r'[^\0\n<>]'
71 _safe_str_rx = '(?:%s{1,2}|(?:%s%s*%s))' \
73 _start_end_char, _content_char, _start_end_char)
74 _tz_rx = r'[-+]\d\d[0-5]\d'
75 _parent_rx = r'(?:parent [abcdefABCDEF0123456789]{40}\n)'
76 _commit_rx = re.compile(r'''tree (?P<tree>[abcdefABCDEF0123456789]{40})
77 (?P<parents>%s*)author (?P<author_name>%s) <(?P<author_mail>%s)> (?P<asec>\d+) (?P<atz>%s)
78 committer (?P<committer_name>%s) <(?P<committer_mail>%s)> (?P<csec>\d+) (?P<ctz>%s)
80 (?P<message>(?:.|\n)*)''' % (_parent_rx,
81 _safe_str_rx, _safe_str_rx, _tz_rx,
82 _safe_str_rx, _safe_str_rx, _tz_rx))
83 _parent_hash_rx = re.compile(r'\s*parent ([abcdefABCDEF0123456789]{40})\s*')
86 # Note that the author_sec and committer_sec values are (UTC) epoch seconds.
87 CommitInfo = namedtuple('CommitInfo', ['tree', 'parents',
88 'author_name', 'author_mail',
89 'author_sec', 'author_offset',
90 'committer_name', 'committer_mail',
91 'committer_sec', 'committer_offset',
94 def parse_commit(content):
95 commit_match = re.match(_commit_rx, content)
97 raise Exception('cannot parse commit %r' % content)
98 matches = commit_match.groupdict()
99 return CommitInfo(tree=matches['tree'],
100 parents=re.findall(_parent_hash_rx, matches['parents']),
101 author_name=matches['author_name'],
102 author_mail=matches['author_mail'],
103 author_sec=int(matches['asec']),
104 author_offset=parse_tz_offset(matches['atz']),
105 committer_name=matches['committer_name'],
106 committer_mail=matches['committer_mail'],
107 committer_sec=int(matches['csec']),
108 committer_offset=parse_tz_offset(matches['ctz']),
109 message=matches['message'])
112 def get_commit_items(id, cp):
113 commit_it = cp.get(id)
114 assert(commit_it.next() == 'commit')
115 commit_content = ''.join(commit_it)
116 return parse_commit(commit_content)
119 def _local_git_date_str(epoch_sec):
120 return '%d %s' % (epoch_sec, utc_offset_str(epoch_sec))
123 def _git_date_str(epoch_sec, tz_offset_sec):
124 offs = tz_offset_sec // 60
125 return '%d %s%02d%02d' \
127 '+' if offs >= 0 else '-',
132 def repo(sub = '', repo_dir=None):
133 """Get the path to the git repository or one of its subdirectories."""
135 repo_dir = repo_dir or repodir
137 raise GitError('You should call check_repo_or_die()')
139 # If there's a .git subdirectory, then the actual repo is in there.
140 gd = os.path.join(repo_dir, '.git')
141 if os.path.exists(gd):
144 return os.path.join(repo_dir, sub)
148 return re.sub(r'([^0-9a-z]|\b)([0-9a-z]{7})[0-9a-z]{33}([^0-9a-z]|\b)',
153 full = os.path.abspath(path)
154 fullrepo = os.path.abspath(repo(''))
155 if not fullrepo.endswith('/'):
157 if full.startswith(fullrepo):
158 path = full[len(fullrepo):]
159 if path.startswith('index-cache/'):
160 path = path[len('index-cache/'):]
161 return shorten_hash(path)
165 paths = [repo('objects/pack')]
166 paths += glob.glob(repo('index-cache/*/.'))
170 def auto_midx(objdir):
171 args = [path.exe(), 'midx', '--auto', '--dir', objdir]
173 rv = subprocess.call(args, stdout=open('/dev/null', 'w'))
175 # make sure 'args' gets printed to help with debugging
176 add_error('%r: exception: %s' % (args, e))
179 add_error('%r: returned %d' % (args, rv))
181 args = [path.exe(), 'bloom', '--dir', objdir]
183 rv = subprocess.call(args, stdout=open('/dev/null', 'w'))
185 # make sure 'args' gets printed to help with debugging
186 add_error('%r: exception: %s' % (args, e))
189 add_error('%r: returned %d' % (args, rv))
192 def mangle_name(name, mode, gitmode):
193 """Mangle a file name to present an abstract name for segmented files.
194 Mangled file names will have the ".bup" extension added to them. If a
195 file's name already ends with ".bup", a ".bupl" extension is added to
196 disambiguate normal files from segmented ones.
198 if stat.S_ISREG(mode) and not stat.S_ISREG(gitmode):
199 assert(stat.S_ISDIR(gitmode))
201 elif name.endswith('.bup') or name[:-1].endswith('.bup'):
202 return name + '.bupl'
207 (BUP_NORMAL, BUP_CHUNKED) = (0,1)
208 def demangle_name(name, mode):
209 """Remove name mangling from a file name, if necessary.
211 The return value is a tuple (demangled_filename,mode), where mode is one of
214 * BUP_NORMAL : files that should be read as-is from the repository
215 * BUP_CHUNKED : files that were chunked and need to be reassembled
217 For more information on the name mangling algorithm, see mangle_name()
219 if name.endswith('.bupl'):
220 return (name[:-5], BUP_NORMAL)
221 elif name.endswith('.bup'):
222 return (name[:-4], BUP_CHUNKED)
223 elif name.endswith('.bupm'):
225 BUP_CHUNKED if stat.S_ISDIR(mode) else BUP_NORMAL)
227 return (name, BUP_NORMAL)
230 def calc_hash(type, content):
231 """Calculate some content's hash in the Git fashion."""
232 header = '%s %d\0' % (type, len(content))
238 def shalist_item_sort_key(ent):
239 (mode, name, id) = ent
240 assert(mode+0 == mode)
241 if stat.S_ISDIR(mode):
247 def tree_encode(shalist):
248 """Generate a git tree object from (mode,name,hash) tuples."""
249 shalist = sorted(shalist, key = shalist_item_sort_key)
251 for (mode,name,bin) in shalist:
253 assert(mode+0 == mode)
255 assert(len(bin) == 20)
256 s = '%o %s\0%s' % (mode,name,bin)
257 assert(s[0] != '0') # 0-padded octal is not acceptable in a git tree
262 def tree_decode(buf):
263 """Generate a list of (mode,name,hash) from the git tree object in buf."""
265 while ofs < len(buf):
266 z = buf.find('\0', ofs)
268 spl = buf[ofs:z].split(' ', 1)
269 assert(len(spl) == 2)
271 sha = buf[z+1:z+1+20]
273 yield (int(mode, 8), name, sha)
276 def _encode_packobj(type, content, compression_level=1):
277 if compression_level not in (0, 1, 2, 3, 4, 5, 6, 7, 8, 9):
278 raise ValueError('invalid compression level %s' % compression_level)
281 szbits = (sz & 0x0f) | (_typemap[type]<<4)
284 if sz: szbits |= 0x80
290 z = zlib.compressobj(compression_level)
292 yield z.compress(content)
296 def _encode_looseobj(type, content, compression_level=1):
297 z = zlib.compressobj(compression_level)
298 yield z.compress('%s %d\0' % (type, len(content)))
299 yield z.compress(content)
303 def _decode_looseobj(buf):
305 s = zlib.decompress(buf)
312 assert(type in _typemap)
313 assert(sz == len(content))
314 return (type, content)
317 def _decode_packobj(buf):
320 type = _typermap[(c & 0x70) >> 4]
327 sz |= (c & 0x7f) << shift
331 return (type, zlib.decompress(buf[i+1:]))
338 def find_offset(self, hash):
339 """Get the offset of an object inside the index file."""
340 idx = self._idx_from_hash(hash)
342 return self._ofs_from_idx(idx)
345 def exists(self, hash, want_source=False):
346 """Return nonempty if the object exists in this index."""
347 if hash and (self._idx_from_hash(hash) != None):
348 return want_source and os.path.basename(self.name) or True
352 return int(self.fanout[255])
354 def _idx_from_hash(self, hash):
355 global _total_searches, _total_steps
357 assert(len(hash) == 20)
359 start = self.fanout[b1-1] # range -1..254
360 end = self.fanout[b1] # range 0..255
362 _total_steps += 1 # lookup table is a step
365 mid = start + (end-start)/2
366 v = self._idx_to_hash(mid)
376 class PackIdxV1(PackIdx):
377 """Object representation of a Git pack index (version 1) file."""
378 def __init__(self, filename, f):
380 self.idxnames = [self.name]
381 self.map = mmap_read(f)
382 self.fanout = list(struct.unpack('!256I',
383 str(buffer(self.map, 0, 256*4))))
384 self.fanout.append(0) # entry "-1"
385 nsha = self.fanout[255]
387 self.shatable = buffer(self.map, self.sha_ofs, nsha*24)
389 def _ofs_from_idx(self, idx):
390 return struct.unpack('!I', str(self.shatable[idx*24 : idx*24+4]))[0]
392 def _idx_to_hash(self, idx):
393 return str(self.shatable[idx*24+4 : idx*24+24])
396 for i in xrange(self.fanout[255]):
397 yield buffer(self.map, 256*4 + 24*i + 4, 20)
400 class PackIdxV2(PackIdx):
401 """Object representation of a Git pack index (version 2) file."""
402 def __init__(self, filename, f):
404 self.idxnames = [self.name]
405 self.map = mmap_read(f)
406 assert(str(self.map[0:8]) == '\377tOc\0\0\0\2')
407 self.fanout = list(struct.unpack('!256I',
408 str(buffer(self.map, 8, 256*4))))
409 self.fanout.append(0) # entry "-1"
410 nsha = self.fanout[255]
411 self.sha_ofs = 8 + 256*4
412 self.shatable = buffer(self.map, self.sha_ofs, nsha*20)
413 self.ofstable = buffer(self.map,
414 self.sha_ofs + nsha*20 + nsha*4,
416 self.ofs64table = buffer(self.map,
417 8 + 256*4 + nsha*20 + nsha*4 + nsha*4)
419 def _ofs_from_idx(self, idx):
420 ofs = struct.unpack('!I', str(buffer(self.ofstable, idx*4, 4)))[0]
422 idx64 = ofs & 0x7fffffff
423 ofs = struct.unpack('!Q',
424 str(buffer(self.ofs64table, idx64*8, 8)))[0]
427 def _idx_to_hash(self, idx):
428 return str(self.shatable[idx*20:(idx+1)*20])
431 for i in xrange(self.fanout[255]):
432 yield buffer(self.map, 8 + 256*4 + 20*i, 20)
437 def __init__(self, dir):
439 assert(_mpi_count == 0) # these things suck tons of VM; don't waste it
444 self.do_bloom = False
451 assert(_mpi_count == 0)
454 return iter(idxmerge(self.packs))
457 return sum(len(pack) for pack in self.packs)
459 def exists(self, hash, want_source=False):
460 """Return nonempty if the object exists in the index files."""
461 global _total_searches
463 if hash in self.also:
465 if self.do_bloom and self.bloom:
466 if self.bloom.exists(hash):
467 self.do_bloom = False
469 _total_searches -= 1 # was counted by bloom
471 for i in xrange(len(self.packs)):
473 _total_searches -= 1 # will be incremented by sub-pack
474 ix = p.exists(hash, want_source=want_source)
476 # reorder so most recently used packs are searched first
477 self.packs = [p] + self.packs[:i] + self.packs[i+1:]
482 def refresh(self, skip_midx = False):
483 """Refresh the index list.
484 This method verifies if .midx files were superseded (e.g. all of its
485 contents are in another, bigger .midx file) and removes the superseded
488 If skip_midx is True, all work on .midx files will be skipped and .midx
489 files will be removed from the list.
491 The module-global variable 'ignore_midx' can force this function to
492 always act as if skip_midx was True.
494 self.bloom = None # Always reopen the bloom as it may have been relaced
495 self.do_bloom = False
496 skip_midx = skip_midx or ignore_midx
497 d = dict((p.name, p) for p in self.packs
498 if not skip_midx or not isinstance(p, midx.PackMidx))
499 if os.path.exists(self.dir):
502 for ix in self.packs:
503 if isinstance(ix, midx.PackMidx):
504 for name in ix.idxnames:
505 d[os.path.join(self.dir, name)] = ix
506 for full in glob.glob(os.path.join(self.dir,'*.midx')):
508 mx = midx.PackMidx(full)
509 (mxd, mxf) = os.path.split(mx.name)
511 for n in mx.idxnames:
512 if not os.path.exists(os.path.join(mxd, n)):
513 log(('warning: index %s missing\n' +
514 ' used by %s\n') % (n, mxf))
522 midxl.sort(key=lambda ix:
523 (-len(ix), -xstat.stat(ix.name).st_mtime))
526 for sub in ix.idxnames:
527 found = d.get(os.path.join(self.dir, sub))
528 if not found or isinstance(found, PackIdx):
529 # doesn't exist, or exists but not in a midx
534 for name in ix.idxnames:
535 d[os.path.join(self.dir, name)] = ix
536 elif not ix.force_keep:
537 debug1('midx: removing redundant: %s\n'
538 % os.path.basename(ix.name))
541 for full in glob.glob(os.path.join(self.dir,'*.idx')):
545 except GitError as e:
549 bfull = os.path.join(self.dir, 'bup.bloom')
550 if self.bloom is None and os.path.exists(bfull):
551 self.bloom = bloom.ShaBloom(bfull)
552 self.packs = list(set(d.values()))
553 self.packs.sort(lambda x,y: -cmp(len(x),len(y)))
554 if self.bloom and self.bloom.valid() and len(self.bloom) >= len(self):
558 debug1('PackIdxList: using %d index%s.\n'
559 % (len(self.packs), len(self.packs)!=1 and 'es' or ''))
562 """Insert an additional object in the list."""
566 def open_idx(filename):
567 if filename.endswith('.idx'):
568 f = open(filename, 'rb')
570 if header[0:4] == '\377tOc':
571 version = struct.unpack('!I', header[4:8])[0]
573 return PackIdxV2(filename, f)
575 raise GitError('%s: expected idx file version 2, got %d'
576 % (filename, version))
577 elif len(header) == 8 and header[0:4] < '\377tOc':
578 return PackIdxV1(filename, f)
580 raise GitError('%s: unrecognized idx file header' % filename)
581 elif filename.endswith('.midx'):
582 return midx.PackMidx(filename)
584 raise GitError('idx filenames must end with .idx or .midx')
587 def idxmerge(idxlist, final_progress=True):
588 """Generate a list of all the objects reachable in a PackIdxList."""
589 def pfunc(count, total):
590 qprogress('Reading indexes: %.2f%% (%d/%d)\r'
591 % (count*100.0/total, count, total))
592 def pfinal(count, total):
594 progress('Reading indexes: %.2f%% (%d/%d), done.\n'
595 % (100, total, total))
596 return merge_iter(idxlist, 10024, pfunc, pfinal)
599 def _make_objcache():
600 return PackIdxList(repo('objects/pack'))
602 # bup-gc assumes that it can disable all PackWriter activities
603 # (bloom/midx/cache) via the constructor and close() arguments.
606 """Writes Git objects inside a pack file."""
607 def __init__(self, objcache_maker=_make_objcache, compression_level=1,
608 run_midx=True, on_pack_finish=None,
609 max_pack_size=None, max_pack_objects=None):
610 self.repo_dir = repo()
617 self.objcache_maker = objcache_maker
619 self.compression_level = compression_level
620 self.run_midx=run_midx
621 self.on_pack_finish = on_pack_finish
622 if not max_pack_size:
623 max_pack_size = git_config_get('pack.packSizeLimit',
624 repo_dir=self.repo_dir)
625 if max_pack_size is not None:
626 max_pack_size = parse_num(max_pack_size)
627 if not max_pack_size:
628 # larger packs slow down pruning
629 max_pack_size = 1000 * 1000 * 1000
630 self.max_pack_size = max_pack_size
631 # cache memory usage is about 83 bytes per object
632 self.max_pack_objects = max_pack_objects if max_pack_objects \
633 else max(1, self.max_pack_size // 5000)
640 objdir = dir = os.path.join(self.repo_dir, 'objects')
641 fd, name = tempfile.mkstemp(suffix='.pack', dir=objdir)
643 self.file = os.fdopen(fd, 'w+b')
648 self.parentfd = os.open(objdir, os.O_RDONLY)
654 assert(name.endswith('.pack'))
655 self.filename = name[:-5]
656 self.file.write('PACK\0\0\0\2\0\0\0\0')
657 self.idx = list(list() for i in xrange(256))
659 def _raw_write(self, datalist, sha):
662 # in case we get interrupted (eg. KeyboardInterrupt), it's best if
663 # the file never has a *partial* blob. So let's make sure it's
664 # all-or-nothing. (The blob shouldn't be very big anyway, thanks
665 # to our hashsplit algorithm.) f.write() does its own buffering,
666 # but that's okay because we'll flush it in _end().
667 oneblob = ''.join(datalist)
671 raise GitError, e, sys.exc_info()[2]
673 crc = zlib.crc32(oneblob) & 0xffffffff
674 self._update_idx(sha, crc, nw)
679 def _update_idx(self, sha, crc, size):
682 self.idx[ord(sha[0])].append((sha, crc, self.file.tell() - size))
684 def _write(self, sha, type, content):
688 sha = calc_hash(type, content)
689 size, crc = self._raw_write(_encode_packobj(type, content,
690 self.compression_level),
692 if self.outbytes >= self.max_pack_size \
693 or self.count >= self.max_pack_objects:
697 def breakpoint(self):
698 """Clear byte and object counts and return the last processed id."""
699 id = self._end(self.run_midx)
700 self.outbytes = self.count = 0
703 def _require_objcache(self):
704 if self.objcache is None and self.objcache_maker:
705 self.objcache = self.objcache_maker()
706 if self.objcache is None:
708 "PackWriter not opened or can't check exists w/o objcache")
710 def exists(self, id, want_source=False):
711 """Return non-empty if an object is found in the object cache."""
712 self._require_objcache()
713 return self.objcache.exists(id, want_source=want_source)
715 def just_write(self, sha, type, content):
716 """Write an object to the pack file, bypassing the objcache. Fails if
718 self._write(sha, type, content)
720 def maybe_write(self, type, content):
721 """Write an object to the pack file if not present and return its id."""
722 sha = calc_hash(type, content)
723 if not self.exists(sha):
724 self.just_write(sha, type, content)
725 self._require_objcache()
726 self.objcache.add(sha)
729 def new_blob(self, blob):
730 """Create a blob object in the pack with the supplied content."""
731 return self.maybe_write('blob', blob)
733 def new_tree(self, shalist):
734 """Create a tree object in the pack."""
735 content = tree_encode(shalist)
736 return self.maybe_write('tree', content)
738 def new_commit(self, tree, parent,
739 author, adate_sec, adate_tz,
740 committer, cdate_sec, cdate_tz,
742 """Create a commit object in the pack. The date_sec values must be
743 epoch-seconds, and if a tz is None, the local timezone is assumed."""
745 adate_str = _git_date_str(adate_sec, adate_tz)
747 adate_str = _local_git_date_str(adate_sec)
749 cdate_str = _git_date_str(cdate_sec, cdate_tz)
751 cdate_str = _local_git_date_str(cdate_sec)
753 if tree: l.append('tree %s' % tree.encode('hex'))
754 if parent: l.append('parent %s' % parent.encode('hex'))
755 if author: l.append('author %s %s' % (author, adate_str))
756 if committer: l.append('committer %s %s' % (committer, cdate_str))
759 return self.maybe_write('commit', '\n'.join(l))
762 """Remove the pack file from disk."""
771 os.unlink(self.filename + '.pack')
778 def _end(self, run_midx=True):
780 if not f: return None
787 # update object count
789 cp = struct.pack('!i', self.count)
793 # calculate the pack sha1sum
796 for b in chunkyreader(f):
798 packbin = sum.digest()
800 fdatasync(f.fileno())
804 obj_list_sha = self._write_pack_idx_v2(self.filename + '.idx', idx, packbin)
805 nameprefix = os.path.join(self.repo_dir,
806 'objects/pack/pack-' + obj_list_sha)
807 if os.path.exists(self.filename + '.map'):
808 os.unlink(self.filename + '.map')
809 os.rename(self.filename + '.pack', nameprefix + '.pack')
810 os.rename(self.filename + '.idx', nameprefix + '.idx')
812 os.fsync(self.parentfd)
814 os.close(self.parentfd)
817 auto_midx(os.path.join(self.repo_dir, 'objects/pack'))
819 if self.on_pack_finish:
820 self.on_pack_finish(nameprefix)
824 def close(self, run_midx=True):
825 """Close the pack file and move it to its definitive path."""
826 return self._end(run_midx=run_midx)
828 def _write_pack_idx_v2(self, filename, idx, packbin):
831 for entry in section:
832 if entry[2] >= 2**31:
835 # Length: header + fan-out + shas-and-crcs + overflow-offsets
836 index_len = 8 + (4 * 256) + (28 * self.count) + (8 * ofs64_count)
838 idx_f = open(filename, 'w+b')
840 idx_f.truncate(index_len)
841 fdatasync(idx_f.fileno())
842 idx_map = mmap_readwrite(idx_f, close=False)
844 count = _helpers.write_idx(filename, idx_map, idx, self.count)
845 assert(count == self.count)
852 idx_f = open(filename, 'a+b')
857 b = idx_f.read(8 + 4*256)
860 obj_list_sum = Sha1()
861 for b in chunkyreader(idx_f, 20*self.count):
863 obj_list_sum.update(b)
864 namebase = obj_list_sum.hexdigest()
866 for b in chunkyreader(idx_f):
868 idx_f.write(idx_sum.digest())
869 fdatasync(idx_f.fileno())
875 def _gitenv(repo_dir = None):
879 os.environ['GIT_DIR'] = os.path.abspath(repo_dir)
883 def list_refs(refnames=None, repo_dir=None,
884 limit_to_heads=False, limit_to_tags=False):
885 """Yield (refname, hash) tuples for all repository refs unless
886 refnames are specified. In that case, only include tuples for
887 those refs. The limits restrict the result items to refs/heads or
888 refs/tags. If both limits are specified, items from both sources
892 argv = ['git', 'show-ref']
894 argv.append('--heads')
896 argv.append('--tags')
900 p = subprocess.Popen(argv,
901 preexec_fn = _gitenv(repo_dir),
902 stdout = subprocess.PIPE)
903 out = p.stdout.read().strip()
904 rv = p.wait() # not fatal
908 for d in out.split('\n'):
909 (sha, name) = d.split(' ', 1)
910 yield (name, sha.decode('hex'))
913 def read_ref(refname, repo_dir = None):
914 """Get the commit id of the most recent commit made on a given ref."""
915 refs = list_refs(refnames=[refname], repo_dir=repo_dir, limit_to_heads=True)
916 l = tuple(islice(refs, 2))
924 def rev_list(ref, count=None, repo_dir=None):
925 """Generate a list of reachable commits in reverse chronological order.
927 This generator walks through commits, from child to parent, that are
928 reachable via the specified ref and yields a series of tuples of the form
931 If count is a non-zero integer, limit the number of commits to "count"
934 assert(not ref.startswith('-'))
937 opts += ['-n', str(atoi(count))]
938 argv = ['git', 'rev-list', '--pretty=format:%at'] + opts + [ref, '--']
939 p = subprocess.Popen(argv,
940 preexec_fn = _gitenv(repo_dir),
941 stdout = subprocess.PIPE)
945 if s.startswith('commit '):
946 commit = s[7:].decode('hex')
950 rv = p.wait() # not fatal
952 raise GitError, 'git rev-list returned error %d' % rv
955 def get_commit_dates(refs, repo_dir=None):
956 """Get the dates for the specified commit refs. For now, every unique
957 string in refs must resolve to a different commit or this
958 function will fail."""
961 commit = get_commit_items(ref, cp(repo_dir))
962 result.append(commit.author_sec)
966 def rev_parse(committish, repo_dir=None):
967 """Resolve the full hash for 'committish', if it exists.
969 Should be roughly equivalent to 'git rev-parse'.
971 Returns the hex value of the hash if it is found, None if 'committish' does
972 not correspond to anything.
974 head = read_ref(committish, repo_dir=repo_dir)
976 debug2("resolved from ref: commit = %s\n" % head.encode('hex'))
979 pL = PackIdxList(repo('objects/pack', repo_dir=repo_dir))
981 if len(committish) == 40:
983 hash = committish.decode('hex')
993 def update_ref(refname, newval, oldval, repo_dir=None):
994 """Update a repository reference."""
997 assert(refname.startswith('refs/heads/') \
998 or refname.startswith('refs/tags/'))
999 p = subprocess.Popen(['git', 'update-ref', refname,
1000 newval.encode('hex'), oldval.encode('hex')],
1001 preexec_fn = _gitenv(repo_dir))
1002 _git_wait('git update-ref', p)
1005 def delete_ref(refname, oldvalue=None):
1006 """Delete a repository reference (see git update-ref(1))."""
1007 assert(refname.startswith('refs/'))
1008 oldvalue = [] if not oldvalue else [oldvalue]
1009 p = subprocess.Popen(['git', 'update-ref', '-d', refname] + oldvalue,
1010 preexec_fn = _gitenv())
1011 _git_wait('git update-ref', p)
1014 def guess_repo(path=None):
1015 """Set the path value in the global variable "repodir".
1016 This makes bup look for an existing bup repository, but not fail if a
1017 repository doesn't exist. Usually, if you are interacting with a bup
1018 repository, you would not be calling this function but using
1019 check_repo_or_die().
1025 repodir = os.environ.get('BUP_DIR')
1027 repodir = os.path.expanduser('~/.bup')
1030 def init_repo(path=None):
1031 """Create the Git bare repository for bup in a given path."""
1033 d = repo() # appends a / to the path
1034 parent = os.path.dirname(os.path.dirname(d))
1035 if parent and not os.path.exists(parent):
1036 raise GitError('parent directory "%s" does not exist\n' % parent)
1037 if os.path.exists(d) and not os.path.isdir(os.path.join(d, '.')):
1038 raise GitError('"%s" exists but is not a directory\n' % d)
1039 p = subprocess.Popen(['git', '--bare', 'init'], stdout=sys.stderr,
1040 preexec_fn = _gitenv())
1041 _git_wait('git init', p)
1042 # Force the index version configuration in order to ensure bup works
1043 # regardless of the version of the installed Git binary.
1044 p = subprocess.Popen(['git', 'config', 'pack.indexVersion', '2'],
1045 stdout=sys.stderr, preexec_fn = _gitenv())
1046 _git_wait('git config', p)
1048 p = subprocess.Popen(['git', 'config', 'core.logAllRefUpdates', 'true'],
1049 stdout=sys.stderr, preexec_fn = _gitenv())
1050 _git_wait('git config', p)
1053 def check_repo_or_die(path=None):
1054 """Check to see if a bup repository probably exists, and abort if not."""
1057 pst = stat_if_exists(top + '/objects/pack')
1058 if pst and stat.S_ISDIR(pst.st_mode):
1061 top_st = stat_if_exists(top)
1063 log('error: repository %r does not exist (see "bup help init")\n'
1066 log('error: %r is not a repository\n' % top)
1072 """Get Git's version and ensure a usable version is installed.
1074 The returned version is formatted as an ordered tuple with each position
1075 representing a digit in the version tag. For example, the following tuple
1076 would represent version 1.6.6.9:
1078 ('1', '6', '6', '9')
1082 p = subprocess.Popen(['git', '--version'],
1083 stdout=subprocess.PIPE)
1084 gvs = p.stdout.read()
1085 _git_wait('git --version', p)
1086 m = re.match(r'git version (\S+.\S+)', gvs)
1088 raise GitError('git --version weird output: %r' % gvs)
1089 _ver = tuple(m.group(1).split('.'))
1090 needed = ('1','5', '3', '1')
1092 raise GitError('git version %s or higher is required; you have %s'
1093 % ('.'.join(needed), '.'.join(_ver)))
1097 class _AbortableIter:
1098 def __init__(self, it, onabort = None):
1100 self.onabort = onabort
1108 return self.it.next()
1109 except StopIteration as e:
1117 """Abort iteration and call the abortion callback, if needed."""
1127 class MissingObject(KeyError):
1128 def __init__(self, id):
1130 KeyError.__init__(self, 'object %r is missing' % id.encode('hex'))
1135 """Link to 'git cat-file' that is used to retrieve blob data."""
1136 def __init__(self, repo_dir = None):
1138 self.repo_dir = repo_dir
1139 wanted = ('1','5','6')
1142 log('warning: git version < %s; bup will be slow.\n'
1145 self.get = self._slow_get
1147 self.p = self.inprogress = None
1148 self.get = self._fast_get
1152 self.p.stdout.close()
1153 self.p.stdin.close()
1155 self.inprogress = None
1159 self.p = subprocess.Popen(['git', 'cat-file', '--batch'],
1160 stdin=subprocess.PIPE,
1161 stdout=subprocess.PIPE,
1164 preexec_fn = _gitenv(self.repo_dir))
1166 def _fast_get(self, id):
1167 if not self.p or self.p.poll() != None:
1170 poll_result = self.p.poll()
1171 assert(poll_result == None)
1173 log('_fast_get: opening %r while %r is open\n'
1174 % (id, self.inprogress))
1175 assert(not self.inprogress)
1176 assert(id.find('\n') < 0)
1177 assert(id.find('\r') < 0)
1178 assert(not id.startswith('-'))
1179 self.inprogress = id
1180 self.p.stdin.write('%s\n' % id)
1181 self.p.stdin.flush()
1182 hdr = self.p.stdout.readline()
1183 if hdr.endswith(' missing\n'):
1184 self.inprogress = None
1185 raise MissingObject(id.decode('hex'))
1186 spl = hdr.split(' ')
1187 if len(spl) != 3 or len(spl[0]) != 40:
1188 raise GitError('expected blob, got %r' % spl)
1189 (hex, type, size) = spl
1191 it = _AbortableIter(chunkyreader(self.p.stdout, int(spl[2])),
1192 onabort = self._abort)
1197 readline_result = self.p.stdout.readline()
1198 assert(readline_result == '\n')
1199 self.inprogress = None
1200 except Exception as e:
1204 def _slow_get(self, id):
1205 assert(id.find('\n') < 0)
1206 assert(id.find('\r') < 0)
1207 assert(id[0] != '-')
1208 type = _git_capture(['git', 'cat-file', '-t', id]).strip()
1211 p = subprocess.Popen(['git', 'cat-file', type, id],
1212 stdout=subprocess.PIPE,
1213 preexec_fn = _gitenv(self.repo_dir))
1214 for blob in chunkyreader(p.stdout):
1216 _git_wait('git cat-file', p)
1218 def _join(self, it):
1223 elif type == 'tree':
1224 treefile = ''.join(it)
1225 for (mode, name, sha) in tree_decode(treefile):
1226 for blob in self.join(sha.encode('hex')):
1228 elif type == 'commit':
1229 treeline = ''.join(it).split('\n')[0]
1230 assert(treeline.startswith('tree '))
1231 for blob in self.join(treeline[5:]):
1234 raise GitError('invalid object type %r: expected blob/tree/commit'
1238 """Generate a list of the content of all blobs that can be reached
1239 from an object. The hash given in 'id' must point to a blob, a tree
1240 or a commit. The content of all blobs that can be seen from trees or
1241 commits will be added to the list.
1244 for d in self._join(self.get(id)):
1246 except StopIteration:
1252 def cp(repo_dir=None):
1253 """Create a CatPipe object or reuse the already existing one."""
1256 repo_dir = repodir or repo()
1257 repo_dir = os.path.abspath(repo_dir)
1258 cp = _cp.get(repo_dir)
1260 cp = CatPipe(repo_dir)
1265 def tags(repo_dir = None):
1266 """Return a dictionary of all tags in the form {hash: [tag_names, ...]}."""
1268 for n, c in list_refs(repo_dir = repo_dir, limit_to_tags=True):
1269 assert(n.startswith('refs/tags/'))
1273 tags[c].append(name) # more than one tag can point at 'c'
1277 WalkItem = namedtuple('WalkItem', ['id', 'type', 'mode',
1278 'path', 'chunk_path', 'data'])
1279 # The path is the mangled path, and if an item represents a fragment
1280 # of a chunked file, the chunk_path will be the chunked subtree path
1281 # for the chunk, i.e. ['', '2d3115e', ...]. The top-level path for a
1282 # chunked file will have a chunk_path of ['']. So some chunk subtree
1283 # of the file '/foo/bar/baz' might look like this:
1285 # item.path = ['foo', 'bar', 'baz.bup']
1286 # item.chunk_path = ['', '2d3115e', '016b097']
1287 # item.type = 'tree'
1291 def walk_object(cat_pipe, id,
1294 """Yield everything reachable from id via cat_pipe as a WalkItem,
1295 stopping whenever stop_at(id) returns true. Throw MissingObject
1296 if a hash encountered is missing from the repository, and don't
1297 read or return blob content in the data field unless include_data
1300 # Maintain the pending stack on the heap to avoid stack overflow
1301 pending = [(id, [], [], None)]
1303 id, parent_path, chunk_path, mode = pending.pop()
1304 if stop_at and stop_at(id):
1307 if (not include_data) and mode and stat.S_ISREG(mode):
1308 # If the object is a "regular file", then it's a leaf in
1309 # the graph, so we can skip reading the data if the caller
1310 # hasn't requested it.
1311 yield WalkItem(id=id, type='blob',
1312 chunk_path=chunk_path, path=parent_path,
1317 item_it = cat_pipe.get(id)
1318 type = item_it.next()
1319 if type not in ('blob', 'commit', 'tree'):
1320 raise Exception('unexpected repository object type %r' % type)
1322 # FIXME: set the mode based on the type when the mode is None
1323 if type == 'blob' and not include_data:
1324 # Dump data until we can ask cat_pipe not to fetch it
1325 for ignored in item_it:
1329 data = ''.join(item_it)
1331 yield WalkItem(id=id, type=type,
1332 chunk_path=chunk_path, path=parent_path,
1334 data=(data if include_data else None))
1336 if type == 'commit':
1337 commit_items = parse_commit(data)
1338 for pid in commit_items.parents:
1339 pending.append((pid, parent_path, chunk_path, mode))
1340 pending.append((commit_items.tree, parent_path, chunk_path,
1341 hashsplit.GIT_MODE_TREE))
1342 elif type == 'tree':
1343 for mode, name, ent_id in tree_decode(data):
1344 demangled, bup_type = demangle_name(name, mode)
1346 sub_path = parent_path
1347 sub_chunk_path = chunk_path + [name]
1349 sub_path = parent_path + [name]
1350 if bup_type == BUP_CHUNKED:
1351 sub_chunk_path = ['']
1353 sub_chunk_path = chunk_path
1354 pending.append((ent_id.encode('hex'), sub_path, sub_chunk_path,