1 """Git interaction library.
2 bup repositories are in Git format. This library allows us to
3 interact with the Git data structures.
6 from __future__ import absolute_import
7 import errno, os, sys, zlib, time, subprocess, struct, stat, re, tempfile, glob
8 from collections import namedtuple
9 from itertools import islice
10 from numbers import Integral
12 from bup import _helpers, compat, hashsplit, path, midx, bloom, xstat
13 from bup.compat import range
14 from bup.helpers import (Sha1, add_error, chunkyreader, debug1, debug2,
16 hostname, localtime, log,
19 mmap_read, mmap_readwrite,
21 progress, qprogress, shstr, stat_if_exists,
24 from bup.pwdgrp import username, userfullname
28 repodir = None # The default repository, once initialized
30 _typemap = { 'blob':3, 'tree':2, 'commit':1, 'tag':4 }
31 _typermap = { 3:'blob', 2:'tree', 1:'commit', 4:'tag' }
37 class GitError(Exception):
41 def _gitenv(repo_dir=None):
44 return merge_dict(os.environ, {'GIT_DIR': os.path.abspath(repo_dir)})
46 def _git_wait(cmd, p):
49 raise GitError('%s returned %d' % (shstr(cmd), rv))
51 def _git_capture(argv):
52 p = subprocess.Popen(argv, stdout=subprocess.PIPE, env=_gitenv())
54 _git_wait(repr(argv), p)
57 def git_config_get(option, repo_dir=None):
58 cmd = ('git', 'config', '--get', option)
59 p = subprocess.Popen(cmd, stdout=subprocess.PIPE,
60 env=_gitenv(repo_dir=repo_dir))
66 raise GitError('%s returned %d' % (cmd, rc))
70 def parse_tz_offset(s):
71 """UTC offset in seconds."""
72 tz_off = (int(s[1:3]) * 60 * 60) + (int(s[3:5]) * 60)
78 # FIXME: derived from http://git.rsbx.net/Documents/Git_Data_Formats.txt
79 # Make sure that's authoritative.
80 _start_end_char = r'[^ .,:;<>"\'\0\n]'
81 _content_char = r'[^\0\n<>]'
82 _safe_str_rx = '(?:%s{1,2}|(?:%s%s*%s))' \
84 _start_end_char, _content_char, _start_end_char)
85 _tz_rx = r'[-+]\d\d[0-5]\d'
86 _parent_rx = r'(?:parent [abcdefABCDEF0123456789]{40}\n)'
87 # Assumes every following line starting with a space is part of the
88 # mergetag. Is there a formal commit blob spec?
89 _mergetag_rx = r'(?:\nmergetag object [abcdefABCDEF0123456789]{40}(?:\n [^\0\n]*)*)'
90 _commit_rx = re.compile(r'''tree (?P<tree>[abcdefABCDEF0123456789]{40})
91 (?P<parents>%s*)author (?P<author_name>%s) <(?P<author_mail>%s)> (?P<asec>\d+) (?P<atz>%s)
92 committer (?P<committer_name>%s) <(?P<committer_mail>%s)> (?P<csec>\d+) (?P<ctz>%s)(?P<mergetag>%s?)
94 (?P<message>(?:.|\n)*)''' % (_parent_rx,
95 _safe_str_rx, _safe_str_rx, _tz_rx,
96 _safe_str_rx, _safe_str_rx, _tz_rx,
98 _parent_hash_rx = re.compile(r'\s*parent ([abcdefABCDEF0123456789]{40})\s*')
100 # Note that the author_sec and committer_sec values are (UTC) epoch
101 # seconds, and for now the mergetag is not included.
102 CommitInfo = namedtuple('CommitInfo', ['tree', 'parents',
103 'author_name', 'author_mail',
104 'author_sec', 'author_offset',
105 'committer_name', 'committer_mail',
106 'committer_sec', 'committer_offset',
109 def parse_commit(content):
110 commit_match = re.match(_commit_rx, content)
112 raise Exception('cannot parse commit %r' % content)
113 matches = commit_match.groupdict()
114 return CommitInfo(tree=matches['tree'],
115 parents=re.findall(_parent_hash_rx, matches['parents']),
116 author_name=matches['author_name'],
117 author_mail=matches['author_mail'],
118 author_sec=int(matches['asec']),
119 author_offset=parse_tz_offset(matches['atz']),
120 committer_name=matches['committer_name'],
121 committer_mail=matches['committer_mail'],
122 committer_sec=int(matches['csec']),
123 committer_offset=parse_tz_offset(matches['ctz']),
124 message=matches['message'])
127 def get_cat_data(cat_iterator, expected_type):
128 _, kind, _ = next(cat_iterator)
129 if kind != expected_type:
130 raise Exception('expected %r, saw %r' % (expected_type, kind))
131 return ''.join(cat_iterator)
133 def get_commit_items(id, cp):
134 return parse_commit(get_cat_data(cp.get(id), 'commit'))
136 def _local_git_date_str(epoch_sec):
137 return '%d %s' % (epoch_sec, utc_offset_str(epoch_sec))
140 def _git_date_str(epoch_sec, tz_offset_sec):
141 offs = tz_offset_sec // 60
142 return '%d %s%02d%02d' \
144 '+' if offs >= 0 else '-',
149 def repo(sub = '', repo_dir=None):
150 """Get the path to the git repository or one of its subdirectories."""
151 repo_dir = repo_dir or repodir
153 raise GitError('You should call check_repo_or_die()')
155 # If there's a .git subdirectory, then the actual repo is in there.
156 gd = os.path.join(repo_dir, '.git')
157 if os.path.exists(gd):
160 return os.path.join(repo_dir, sub)
164 return re.sub(r'([^0-9a-z]|\b)([0-9a-z]{7})[0-9a-z]{33}([^0-9a-z]|\b)',
169 full = os.path.abspath(path)
170 fullrepo = os.path.abspath(repo(''))
171 if not fullrepo.endswith('/'):
173 if full.startswith(fullrepo):
174 path = full[len(fullrepo):]
175 if path.startswith('index-cache/'):
176 path = path[len('index-cache/'):]
177 return shorten_hash(path)
181 paths = [repo('objects/pack')]
182 paths += glob.glob(repo('index-cache/*/.'))
186 def auto_midx(objdir):
187 args = [path.exe(), 'midx', '--auto', '--dir', objdir]
189 rv = subprocess.call(args, stdout=open('/dev/null', 'w'))
191 # make sure 'args' gets printed to help with debugging
192 add_error('%r: exception: %s' % (args, e))
195 add_error('%r: returned %d' % (args, rv))
197 args = [path.exe(), 'bloom', '--dir', objdir]
199 rv = subprocess.call(args, stdout=open('/dev/null', 'w'))
201 # make sure 'args' gets printed to help with debugging
202 add_error('%r: exception: %s' % (args, e))
205 add_error('%r: returned %d' % (args, rv))
208 def mangle_name(name, mode, gitmode):
209 """Mangle a file name to present an abstract name for segmented files.
210 Mangled file names will have the ".bup" extension added to them. If a
211 file's name already ends with ".bup", a ".bupl" extension is added to
212 disambiguate normal files from segmented ones.
214 if stat.S_ISREG(mode) and not stat.S_ISREG(gitmode):
215 assert(stat.S_ISDIR(gitmode))
217 elif name.endswith('.bup') or name[:-1].endswith('.bup'):
218 return name + '.bupl'
223 (BUP_NORMAL, BUP_CHUNKED) = (0,1)
224 def demangle_name(name, mode):
225 """Remove name mangling from a file name, if necessary.
227 The return value is a tuple (demangled_filename,mode), where mode is one of
230 * BUP_NORMAL : files that should be read as-is from the repository
231 * BUP_CHUNKED : files that were chunked and need to be reassembled
233 For more information on the name mangling algorithm, see mangle_name()
235 if name.endswith('.bupl'):
236 return (name[:-5], BUP_NORMAL)
237 elif name.endswith('.bup'):
238 return (name[:-4], BUP_CHUNKED)
239 elif name.endswith('.bupm'):
241 BUP_CHUNKED if stat.S_ISDIR(mode) else BUP_NORMAL)
243 return (name, BUP_NORMAL)
246 def calc_hash(type, content):
247 """Calculate some content's hash in the Git fashion."""
248 header = '%s %d\0' % (type, len(content))
254 def shalist_item_sort_key(ent):
255 (mode, name, id) = ent
256 assert(mode+0 == mode)
257 if stat.S_ISDIR(mode):
263 def tree_encode(shalist):
264 """Generate a git tree object from (mode,name,hash) tuples."""
265 shalist = sorted(shalist, key = shalist_item_sort_key)
267 for (mode,name,bin) in shalist:
269 assert(mode+0 == mode)
271 assert(len(bin) == 20)
272 s = '%o %s\0%s' % (mode,name,bin)
273 assert(s[0] != '0') # 0-padded octal is not acceptable in a git tree
278 def tree_decode(buf):
279 """Generate a list of (mode,name,hash) from the git tree object in buf."""
281 while ofs < len(buf):
282 z = buf.find('\0', ofs)
284 spl = buf[ofs:z].split(' ', 1)
285 assert(len(spl) == 2)
287 sha = buf[z+1:z+1+20]
289 yield (int(mode, 8), name, sha)
292 def _encode_packobj(type, content, compression_level=1):
293 if compression_level not in (0, 1, 2, 3, 4, 5, 6, 7, 8, 9):
294 raise ValueError('invalid compression level %s' % compression_level)
297 szbits = (sz & 0x0f) | (_typemap[type]<<4)
300 if sz: szbits |= 0x80
306 z = zlib.compressobj(compression_level)
308 yield z.compress(content)
312 def _encode_looseobj(type, content, compression_level=1):
313 z = zlib.compressobj(compression_level)
314 yield z.compress('%s %d\0' % (type, len(content)))
315 yield z.compress(content)
319 def _decode_looseobj(buf):
321 s = zlib.decompress(buf)
328 assert(type in _typemap)
329 assert(sz == len(content))
330 return (type, content)
333 def _decode_packobj(buf):
336 type = _typermap[(c & 0x70) >> 4]
343 sz |= (c & 0x7f) << shift
347 return (type, zlib.decompress(buf[i+1:]))
354 def find_offset(self, hash):
355 """Get the offset of an object inside the index file."""
356 idx = self._idx_from_hash(hash)
358 return self._ofs_from_idx(idx)
361 def exists(self, hash, want_source=False):
362 """Return nonempty if the object exists in this index."""
363 if hash and (self._idx_from_hash(hash) != None):
364 return want_source and os.path.basename(self.name) or True
368 return int(self.fanout[255])
370 def _idx_from_hash(self, hash):
371 global _total_searches, _total_steps
373 assert(len(hash) == 20)
375 start = self.fanout[b1-1] # range -1..254
376 end = self.fanout[b1] # range 0..255
378 _total_steps += 1 # lookup table is a step
381 mid = start + (end-start)/2
382 v = self._idx_to_hash(mid)
392 class PackIdxV1(PackIdx):
393 """Object representation of a Git pack index (version 1) file."""
394 def __init__(self, filename, f):
396 self.idxnames = [self.name]
397 self.map = mmap_read(f)
398 self.fanout = list(struct.unpack('!256I', buffer(self.map, 0, 256 * 4)))
399 self.fanout.append(0) # entry "-1"
400 nsha = self.fanout[255]
402 self.shatable = buffer(self.map, self.sha_ofs, nsha*24)
404 def _ofs_from_idx(self, idx):
406 return struct.unpack('!I', self.shatable[ofs : ofs + 4])[0]
408 def _idx_to_hash(self, idx):
410 return self.shatable[ofs : ofs + 20]
413 count = self.fanout[255]
415 for ofs in range(start, start + (24 * count), 24):
416 yield self.map[ofs : ofs + 20]
419 class PackIdxV2(PackIdx):
420 """Object representation of a Git pack index (version 2) file."""
421 def __init__(self, filename, f):
423 self.idxnames = [self.name]
424 self.map = mmap_read(f)
425 assert self.map[0:8] == b'\377tOc\0\0\0\2'
426 self.fanout = list(struct.unpack('!256I',
427 buffer(self.map[8 : 8 + 256 * 4])))
428 self.fanout.append(0) # entry "-1"
429 nsha = self.fanout[255]
430 self.sha_ofs = 8 + 256*4
431 self.shatable = buffer(self.map, self.sha_ofs, nsha*20)
432 self.ofstable = buffer(self.map,
433 self.sha_ofs + nsha*20 + nsha*4,
435 self.ofs64table = buffer(self.map,
436 8 + 256*4 + nsha*20 + nsha*4 + nsha*4)
438 def _ofs_from_idx(self, idx):
440 ofs = struct.unpack('!I', self.ofstable[i : i + 4])[0]
442 idx64 = ofs & 0x7fffffff
444 ofs = struct.unpack('!Q', self.ofs64table[idx64_i : idx64_i + 8])[0]
447 def _idx_to_hash(self, idx):
448 return self.shatable[idx * 20 : (idx + 1) * 20]
451 count = self.fanout[255]
453 for ofs in range(start, start + (20 * count), 20):
454 yield self.map[ofs : ofs + 20]
459 def __init__(self, dir):
461 assert(_mpi_count == 0) # these things suck tons of VM; don't waste it
466 self.do_bloom = False
473 assert(_mpi_count == 0)
476 return iter(idxmerge(self.packs))
479 return sum(len(pack) for pack in self.packs)
481 def exists(self, hash, want_source=False):
482 """Return nonempty if the object exists in the index files."""
483 global _total_searches
485 if hash in self.also:
487 if self.do_bloom and self.bloom:
488 if self.bloom.exists(hash):
489 self.do_bloom = False
491 _total_searches -= 1 # was counted by bloom
493 for i in xrange(len(self.packs)):
495 _total_searches -= 1 # will be incremented by sub-pack
496 ix = p.exists(hash, want_source=want_source)
498 # reorder so most recently used packs are searched first
499 self.packs = [p] + self.packs[:i] + self.packs[i+1:]
504 def refresh(self, skip_midx = False):
505 """Refresh the index list.
506 This method verifies if .midx files were superseded (e.g. all of its
507 contents are in another, bigger .midx file) and removes the superseded
510 If skip_midx is True, all work on .midx files will be skipped and .midx
511 files will be removed from the list.
513 The module-global variable 'ignore_midx' can force this function to
514 always act as if skip_midx was True.
516 self.bloom = None # Always reopen the bloom as it may have been relaced
517 self.do_bloom = False
518 skip_midx = skip_midx or ignore_midx
519 d = dict((p.name, p) for p in self.packs
520 if not skip_midx or not isinstance(p, midx.PackMidx))
521 if os.path.exists(self.dir):
524 for ix in self.packs:
525 if isinstance(ix, midx.PackMidx):
526 for name in ix.idxnames:
527 d[os.path.join(self.dir, name)] = ix
528 for full in glob.glob(os.path.join(self.dir,'*.midx')):
530 mx = midx.PackMidx(full)
531 (mxd, mxf) = os.path.split(mx.name)
533 for n in mx.idxnames:
534 if not os.path.exists(os.path.join(mxd, n)):
535 log(('warning: index %s missing\n' +
536 ' used by %s\n') % (n, mxf))
544 midxl.sort(key=lambda ix:
545 (-len(ix), -xstat.stat(ix.name).st_mtime))
548 for sub in ix.idxnames:
549 found = d.get(os.path.join(self.dir, sub))
550 if not found or isinstance(found, PackIdx):
551 # doesn't exist, or exists but not in a midx
556 for name in ix.idxnames:
557 d[os.path.join(self.dir, name)] = ix
558 elif not ix.force_keep:
559 debug1('midx: removing redundant: %s\n'
560 % os.path.basename(ix.name))
563 for full in glob.glob(os.path.join(self.dir,'*.idx')):
567 except GitError as e:
571 bfull = os.path.join(self.dir, 'bup.bloom')
572 if self.bloom is None and os.path.exists(bfull):
573 self.bloom = bloom.ShaBloom(bfull)
574 self.packs = list(set(d.values()))
575 self.packs.sort(reverse=True, key=lambda x: len(x))
576 if self.bloom and self.bloom.valid() and len(self.bloom) >= len(self):
580 debug1('PackIdxList: using %d index%s.\n'
581 % (len(self.packs), len(self.packs)!=1 and 'es' or ''))
584 """Insert an additional object in the list."""
588 def open_idx(filename):
589 if filename.endswith('.idx'):
590 f = open(filename, 'rb')
592 if header[0:4] == '\377tOc':
593 version = struct.unpack('!I', header[4:8])[0]
595 return PackIdxV2(filename, f)
597 raise GitError('%s: expected idx file version 2, got %d'
598 % (filename, version))
599 elif len(header) == 8 and header[0:4] < '\377tOc':
600 return PackIdxV1(filename, f)
602 raise GitError('%s: unrecognized idx file header' % filename)
603 elif filename.endswith('.midx'):
604 return midx.PackMidx(filename)
606 raise GitError('idx filenames must end with .idx or .midx')
609 def idxmerge(idxlist, final_progress=True):
610 """Generate a list of all the objects reachable in a PackIdxList."""
611 def pfunc(count, total):
612 qprogress('Reading indexes: %.2f%% (%d/%d)\r'
613 % (count*100.0/total, count, total))
614 def pfinal(count, total):
616 progress('Reading indexes: %.2f%% (%d/%d), done.\n'
617 % (100, total, total))
618 return merge_iter(idxlist, 10024, pfunc, pfinal)
621 def _make_objcache():
622 return PackIdxList(repo('objects/pack'))
624 # bup-gc assumes that it can disable all PackWriter activities
625 # (bloom/midx/cache) via the constructor and close() arguments.
628 """Writes Git objects inside a pack file."""
629 def __init__(self, objcache_maker=_make_objcache, compression_level=1,
630 run_midx=True, on_pack_finish=None,
631 max_pack_size=None, max_pack_objects=None, repo_dir=None):
632 self.repo_dir = repo_dir or repo()
639 self.objcache_maker = objcache_maker
641 self.compression_level = compression_level
642 self.run_midx=run_midx
643 self.on_pack_finish = on_pack_finish
644 if not max_pack_size:
645 max_pack_size = git_config_get('pack.packSizeLimit',
646 repo_dir=self.repo_dir)
647 if max_pack_size is not None:
648 max_pack_size = parse_num(max_pack_size)
649 if not max_pack_size:
650 # larger packs slow down pruning
651 max_pack_size = 1000 * 1000 * 1000
652 self.max_pack_size = max_pack_size
653 # cache memory usage is about 83 bytes per object
654 self.max_pack_objects = max_pack_objects if max_pack_objects \
655 else max(1, self.max_pack_size // 5000)
663 def __exit__(self, type, value, traceback):
668 objdir = dir = os.path.join(self.repo_dir, 'objects')
669 fd, name = tempfile.mkstemp(suffix='.pack', dir=objdir)
671 self.file = os.fdopen(fd, 'w+b')
676 self.parentfd = os.open(objdir, os.O_RDONLY)
682 assert(name.endswith('.pack'))
683 self.filename = name[:-5]
684 self.file.write('PACK\0\0\0\2\0\0\0\0')
685 self.idx = list(list() for i in xrange(256))
687 def _raw_write(self, datalist, sha):
690 # in case we get interrupted (eg. KeyboardInterrupt), it's best if
691 # the file never has a *partial* blob. So let's make sure it's
692 # all-or-nothing. (The blob shouldn't be very big anyway, thanks
693 # to our hashsplit algorithm.) f.write() does its own buffering,
694 # but that's okay because we'll flush it in _end().
695 oneblob = ''.join(datalist)
699 raise GitError, e, sys.exc_info()[2]
701 crc = zlib.crc32(oneblob) & 0xffffffff
702 self._update_idx(sha, crc, nw)
707 def _update_idx(self, sha, crc, size):
710 self.idx[ord(sha[0])].append((sha, crc, self.file.tell() - size))
712 def _write(self, sha, type, content):
716 sha = calc_hash(type, content)
717 size, crc = self._raw_write(_encode_packobj(type, content,
718 self.compression_level),
720 if self.outbytes >= self.max_pack_size \
721 or self.count >= self.max_pack_objects:
725 def breakpoint(self):
726 """Clear byte and object counts and return the last processed id."""
727 id = self._end(self.run_midx)
728 self.outbytes = self.count = 0
731 def _require_objcache(self):
732 if self.objcache is None and self.objcache_maker:
733 self.objcache = self.objcache_maker()
734 if self.objcache is None:
736 "PackWriter not opened or can't check exists w/o objcache")
738 def exists(self, id, want_source=False):
739 """Return non-empty if an object is found in the object cache."""
740 self._require_objcache()
741 return self.objcache.exists(id, want_source=want_source)
743 def just_write(self, sha, type, content):
744 """Write an object to the pack file without checking for duplication."""
745 self._write(sha, type, content)
746 # If nothing else, gc doesn't have/want an objcache
747 if self.objcache is not None:
748 self.objcache.add(sha)
750 def maybe_write(self, type, content):
751 """Write an object to the pack file if not present and return its id."""
752 sha = calc_hash(type, content)
753 if not self.exists(sha):
754 self._require_objcache()
755 self.just_write(sha, type, content)
758 def new_blob(self, blob):
759 """Create a blob object in the pack with the supplied content."""
760 return self.maybe_write('blob', blob)
762 def new_tree(self, shalist):
763 """Create a tree object in the pack."""
764 content = tree_encode(shalist)
765 return self.maybe_write('tree', content)
767 def new_commit(self, tree, parent,
768 author, adate_sec, adate_tz,
769 committer, cdate_sec, cdate_tz,
771 """Create a commit object in the pack. The date_sec values must be
772 epoch-seconds, and if a tz is None, the local timezone is assumed."""
774 adate_str = _git_date_str(adate_sec, adate_tz)
776 adate_str = _local_git_date_str(adate_sec)
778 cdate_str = _git_date_str(cdate_sec, cdate_tz)
780 cdate_str = _local_git_date_str(cdate_sec)
782 if tree: l.append('tree %s' % tree.encode('hex'))
783 if parent: l.append('parent %s' % parent.encode('hex'))
784 if author: l.append('author %s %s' % (author, adate_str))
785 if committer: l.append('committer %s %s' % (committer, cdate_str))
788 return self.maybe_write('commit', '\n'.join(l))
791 """Remove the pack file from disk."""
800 os.unlink(self.filename + '.pack')
807 def _end(self, run_midx=True):
809 if not f: return None
816 # update object count
818 cp = struct.pack('!i', self.count)
822 # calculate the pack sha1sum
825 for b in chunkyreader(f):
827 packbin = sum.digest()
829 fdatasync(f.fileno())
833 obj_list_sha = self._write_pack_idx_v2(self.filename + '.idx', idx, packbin)
834 nameprefix = os.path.join(self.repo_dir,
835 'objects/pack/pack-' + obj_list_sha)
836 if os.path.exists(self.filename + '.map'):
837 os.unlink(self.filename + '.map')
838 os.rename(self.filename + '.pack', nameprefix + '.pack')
839 os.rename(self.filename + '.idx', nameprefix + '.idx')
841 os.fsync(self.parentfd)
843 os.close(self.parentfd)
846 auto_midx(os.path.join(self.repo_dir, 'objects/pack'))
848 if self.on_pack_finish:
849 self.on_pack_finish(nameprefix)
853 def close(self, run_midx=True):
854 """Close the pack file and move it to its definitive path."""
855 return self._end(run_midx=run_midx)
857 def _write_pack_idx_v2(self, filename, idx, packbin):
860 for entry in section:
861 if entry[2] >= 2**31:
864 # Length: header + fan-out + shas-and-crcs + overflow-offsets
865 index_len = 8 + (4 * 256) + (28 * self.count) + (8 * ofs64_count)
867 idx_f = open(filename, 'w+b')
869 idx_f.truncate(index_len)
870 fdatasync(idx_f.fileno())
871 idx_map = mmap_readwrite(idx_f, close=False)
873 count = _helpers.write_idx(filename, idx_map, idx, self.count)
874 assert(count == self.count)
881 idx_f = open(filename, 'a+b')
886 b = idx_f.read(8 + 4*256)
889 obj_list_sum = Sha1()
890 for b in chunkyreader(idx_f, 20*self.count):
892 obj_list_sum.update(b)
893 namebase = obj_list_sum.hexdigest()
895 for b in chunkyreader(idx_f):
897 idx_f.write(idx_sum.digest())
898 fdatasync(idx_f.fileno())
904 def list_refs(patterns=None, repo_dir=None,
905 limit_to_heads=False, limit_to_tags=False):
906 """Yield (refname, hash) tuples for all repository refs unless
907 patterns are specified. In that case, only include tuples for
908 refs matching those patterns (cf. git-show-ref(1)). The limits
909 restrict the result items to refs/heads or refs/tags. If both
910 limits are specified, items from both sources will be included.
913 argv = ['git', 'show-ref']
915 argv.append('--heads')
917 argv.append('--tags')
920 argv.extend(patterns)
921 p = subprocess.Popen(argv, env=_gitenv(repo_dir), stdout=subprocess.PIPE)
922 out = p.stdout.read().strip()
923 rv = p.wait() # not fatal
927 for d in out.split('\n'):
928 (sha, name) = d.split(' ', 1)
929 yield (name, sha.decode('hex'))
932 def read_ref(refname, repo_dir = None):
933 """Get the commit id of the most recent commit made on a given ref."""
934 refs = list_refs(patterns=[refname], repo_dir=repo_dir, limit_to_heads=True)
935 l = tuple(islice(refs, 2))
943 def rev_list_invocation(ref_or_refs, count=None, format=None):
944 if isinstance(ref_or_refs, compat.str_type):
945 refs = (ref_or_refs,)
948 argv = ['git', 'rev-list']
949 if isinstance(count, Integral):
950 argv.extend(['-n', str(count)])
952 raise ValueError('unexpected count argument %r' % count)
955 argv.append('--pretty=format:' + format)
957 assert not ref.startswith('-')
963 def rev_list(ref_or_refs, count=None, parse=None, format=None, repo_dir=None):
964 """Yield information about commits as per "git rev-list". If a format
965 is not provided, yield one hex hash at a time. If a format is
966 provided, pass it to rev-list and call parse(git_stdout) for each
967 commit with the stream positioned just after the rev-list "commit
968 HASH" header line. When a format is provided yield (oidx,
969 parse(git_stdout)) for each commit.
972 assert bool(parse) == bool(format)
973 p = subprocess.Popen(rev_list_invocation(ref_or_refs, count=count,
975 env=_gitenv(repo_dir),
976 stdout = subprocess.PIPE)
978 for line in p.stdout:
981 line = p.stdout.readline()
984 if not s.startswith('commit '):
985 raise Exception('unexpected line ' + s)
988 yield s, parse(p.stdout)
989 line = p.stdout.readline()
991 rv = p.wait() # not fatal
993 raise GitError, 'git rev-list returned error %d' % rv
996 def get_commit_dates(refs, repo_dir=None):
997 """Get the dates for the specified commit refs. For now, every unique
998 string in refs must resolve to a different commit or this
999 function will fail."""
1002 commit = get_commit_items(ref, cp(repo_dir))
1003 result.append(commit.author_sec)
1007 def rev_parse(committish, repo_dir=None):
1008 """Resolve the full hash for 'committish', if it exists.
1010 Should be roughly equivalent to 'git rev-parse'.
1012 Returns the hex value of the hash if it is found, None if 'committish' does
1013 not correspond to anything.
1015 head = read_ref(committish, repo_dir=repo_dir)
1017 debug2("resolved from ref: commit = %s\n" % head.encode('hex'))
1020 pL = PackIdxList(repo('objects/pack', repo_dir=repo_dir))
1022 if len(committish) == 40:
1024 hash = committish.decode('hex')
1034 def update_ref(refname, newval, oldval, repo_dir=None):
1035 """Update a repository reference."""
1038 assert(refname.startswith('refs/heads/') \
1039 or refname.startswith('refs/tags/'))
1040 p = subprocess.Popen(['git', 'update-ref', refname,
1041 newval.encode('hex'), oldval.encode('hex')],
1042 env=_gitenv(repo_dir))
1043 _git_wait('git update-ref', p)
1046 def delete_ref(refname, oldvalue=None):
1047 """Delete a repository reference (see git update-ref(1))."""
1048 assert(refname.startswith('refs/'))
1049 oldvalue = [] if not oldvalue else [oldvalue]
1050 p = subprocess.Popen(['git', 'update-ref', '-d', refname] + oldvalue,
1052 _git_wait('git update-ref', p)
1055 def guess_repo(path=None):
1056 """Set the path value in the global variable "repodir".
1057 This makes bup look for an existing bup repository, but not fail if a
1058 repository doesn't exist. Usually, if you are interacting with a bup
1059 repository, you would not be calling this function but using
1060 check_repo_or_die().
1066 repodir = os.environ.get('BUP_DIR')
1068 repodir = os.path.expanduser('~/.bup')
1071 def init_repo(path=None):
1072 """Create the Git bare repository for bup in a given path."""
1074 d = repo() # appends a / to the path
1075 parent = os.path.dirname(os.path.dirname(d))
1076 if parent and not os.path.exists(parent):
1077 raise GitError('parent directory "%s" does not exist\n' % parent)
1078 if os.path.exists(d) and not os.path.isdir(os.path.join(d, '.')):
1079 raise GitError('"%s" exists but is not a directory\n' % d)
1080 p = subprocess.Popen(['git', '--bare', 'init'], stdout=sys.stderr,
1082 _git_wait('git init', p)
1083 # Force the index version configuration in order to ensure bup works
1084 # regardless of the version of the installed Git binary.
1085 p = subprocess.Popen(['git', 'config', 'pack.indexVersion', '2'],
1086 stdout=sys.stderr, env=_gitenv())
1087 _git_wait('git config', p)
1089 p = subprocess.Popen(['git', 'config', 'core.logAllRefUpdates', 'true'],
1090 stdout=sys.stderr, env=_gitenv())
1091 _git_wait('git config', p)
1094 def check_repo_or_die(path=None):
1095 """Check to see if a bup repository probably exists, and abort if not."""
1098 pst = stat_if_exists(top + '/objects/pack')
1099 if pst and stat.S_ISDIR(pst.st_mode):
1102 top_st = stat_if_exists(top)
1104 log('error: repository %r does not exist (see "bup help init")\n'
1107 log('error: %r is not a repository\n' % top)
1113 """Get Git's version and ensure a usable version is installed.
1115 The returned version is formatted as an ordered tuple with each position
1116 representing a digit in the version tag. For example, the following tuple
1117 would represent version 1.6.6.9:
1119 ('1', '6', '6', '9')
1123 p = subprocess.Popen(['git', '--version'],
1124 stdout=subprocess.PIPE)
1125 gvs = p.stdout.read()
1126 _git_wait('git --version', p)
1127 m = re.match(r'git version (\S+.\S+)', gvs)
1129 raise GitError('git --version weird output: %r' % gvs)
1130 _ver = tuple(m.group(1).split('.'))
1131 needed = ('1','5', '3', '1')
1133 raise GitError('git version %s or higher is required; you have %s'
1134 % ('.'.join(needed), '.'.join(_ver)))
1138 class _AbortableIter:
1139 def __init__(self, it, onabort = None):
1141 self.onabort = onabort
1149 return next(self.it)
1150 except StopIteration as e:
1158 """Abort iteration and call the abortion callback, if needed."""
1170 """Link to 'git cat-file' that is used to retrieve blob data."""
1171 def __init__(self, repo_dir = None):
1173 self.repo_dir = repo_dir
1174 wanted = ('1','5','6')
1176 log('error: git version must be at least 1.5.6\n')
1178 self.p = self.inprogress = None
1182 self.p.stdout.close()
1183 self.p.stdin.close()
1185 self.inprogress = None
1189 self.p = subprocess.Popen(['git', 'cat-file', '--batch'],
1190 stdin=subprocess.PIPE,
1191 stdout=subprocess.PIPE,
1194 env=_gitenv(self.repo_dir))
1197 """Yield (oidx, type, size), followed by the data referred to by ref.
1198 If ref does not exist, only yield (None, None, None).
1201 if not self.p or self.p.poll() != None:
1204 poll_result = self.p.poll()
1205 assert(poll_result == None)
1207 log('get: opening %r while %r is open\n' % (ref, self.inprogress))
1208 assert(not self.inprogress)
1209 assert(ref.find('\n') < 0)
1210 assert(ref.find('\r') < 0)
1211 assert(not ref.startswith('-'))
1212 self.inprogress = ref
1213 self.p.stdin.write('%s\n' % ref)
1214 self.p.stdin.flush()
1215 hdr = self.p.stdout.readline()
1216 if hdr.endswith(' missing\n'):
1217 self.inprogress = None
1218 yield None, None, None
1220 info = hdr.split(' ')
1221 if len(info) != 3 or len(info[0]) != 40:
1222 raise GitError('expected object (id, type, size), got %r' % info)
1223 oidx, typ, size = info
1225 it = _AbortableIter(chunkyreader(self.p.stdout, size),
1226 onabort=self._abort)
1228 yield oidx, typ, size
1231 readline_result = self.p.stdout.readline()
1232 assert(readline_result == '\n')
1233 self.inprogress = None
1234 except Exception as e:
1238 def _join(self, it):
1239 _, typ, _ = next(it)
1244 treefile = ''.join(it)
1245 for (mode, name, sha) in tree_decode(treefile):
1246 for blob in self.join(sha.encode('hex')):
1248 elif typ == 'commit':
1249 treeline = ''.join(it).split('\n')[0]
1250 assert(treeline.startswith('tree '))
1251 for blob in self.join(treeline[5:]):
1254 raise GitError('invalid object type %r: expected blob/tree/commit'
1258 """Generate a list of the content of all blobs that can be reached
1259 from an object. The hash given in 'id' must point to a blob, a tree
1260 or a commit. The content of all blobs that can be seen from trees or
1261 commits will be added to the list.
1264 for d in self._join(self.get(id)):
1266 except StopIteration:
1272 def cp(repo_dir=None):
1273 """Create a CatPipe object or reuse the already existing one."""
1276 repo_dir = repodir or repo()
1277 repo_dir = os.path.abspath(repo_dir)
1278 cp = _cp.get(repo_dir)
1280 cp = CatPipe(repo_dir)
1285 def tags(repo_dir = None):
1286 """Return a dictionary of all tags in the form {hash: [tag_names, ...]}."""
1288 for n, c in list_refs(repo_dir = repo_dir, limit_to_tags=True):
1289 assert(n.startswith('refs/tags/'))
1293 tags[c].append(name) # more than one tag can point at 'c'
1297 class MissingObject(KeyError):
1298 def __init__(self, oid):
1300 KeyError.__init__(self, 'object %r is missing' % oid.encode('hex'))
1303 WalkItem = namedtuple('WalkItem', ['oid', 'type', 'mode',
1304 'path', 'chunk_path', 'data'])
1305 # The path is the mangled path, and if an item represents a fragment
1306 # of a chunked file, the chunk_path will be the chunked subtree path
1307 # for the chunk, i.e. ['', '2d3115e', ...]. The top-level path for a
1308 # chunked file will have a chunk_path of ['']. So some chunk subtree
1309 # of the file '/foo/bar/baz' might look like this:
1311 # item.path = ['foo', 'bar', 'baz.bup']
1312 # item.chunk_path = ['', '2d3115e', '016b097']
1313 # item.type = 'tree'
1317 def walk_object(get_ref, oidx, stop_at=None, include_data=None):
1318 """Yield everything reachable from oidx via get_ref (which must behave
1319 like CatPipe get) as a WalkItem, stopping whenever stop_at(oidx)
1320 returns true. Throw MissingObject if a hash encountered is
1321 missing from the repository, and don't read or return blob content
1322 in the data field unless include_data is set.
1325 # Maintain the pending stack on the heap to avoid stack overflow
1326 pending = [(oidx, [], [], None)]
1328 oidx, parent_path, chunk_path, mode = pending.pop()
1329 oid = oidx.decode('hex')
1330 if stop_at and stop_at(oidx):
1333 if (not include_data) and mode and stat.S_ISREG(mode):
1334 # If the object is a "regular file", then it's a leaf in
1335 # the graph, so we can skip reading the data if the caller
1336 # hasn't requested it.
1337 yield WalkItem(oid=oid, type='blob',
1338 chunk_path=chunk_path, path=parent_path,
1343 item_it = get_ref(oidx)
1344 get_oidx, typ, _ = next(item_it)
1346 raise MissingObject(oidx.decode('hex'))
1347 if typ not in ('blob', 'commit', 'tree'):
1348 raise Exception('unexpected repository object type %r' % typ)
1350 # FIXME: set the mode based on the type when the mode is None
1351 if typ == 'blob' and not include_data:
1352 # Dump data until we can ask cat_pipe not to fetch it
1353 for ignored in item_it:
1357 data = ''.join(item_it)
1359 yield WalkItem(oid=oid, type=typ,
1360 chunk_path=chunk_path, path=parent_path,
1362 data=(data if include_data else None))
1365 commit_items = parse_commit(data)
1366 for pid in commit_items.parents:
1367 pending.append((pid, parent_path, chunk_path, mode))
1368 pending.append((commit_items.tree, parent_path, chunk_path,
1369 hashsplit.GIT_MODE_TREE))
1371 for mode, name, ent_id in tree_decode(data):
1372 demangled, bup_type = demangle_name(name, mode)
1374 sub_path = parent_path
1375 sub_chunk_path = chunk_path + [name]
1377 sub_path = parent_path + [name]
1378 if bup_type == BUP_CHUNKED:
1379 sub_chunk_path = ['']
1381 sub_chunk_path = chunk_path
1382 pending.append((ent_id.encode('hex'), sub_path, sub_chunk_path,