1 """Git interaction library.
2 bup repositories are in Git format. This library allows us to
3 interact with the Git data structures.
6 import errno, os, sys, zlib, time, subprocess, struct, stat, re, tempfile, glob
7 from collections import namedtuple
8 from itertools import islice
10 from bup import _helpers, hashsplit, path, midx, bloom, xstat
11 from bup.helpers import (Sha1, add_error, chunkyreader, debug1, debug2,
13 hostname, localtime, log, merge_iter,
14 mmap_read, mmap_readwrite,
15 progress, qprogress, stat_if_exists,
16 unlink, username, userfullname,
20 max_pack_size = 1000*1000*1000 # larger packs will slow down pruning
21 max_pack_objects = 200*1000 # cache memory usage is about 83 bytes per object
27 _typemap = { 'blob':3, 'tree':2, 'commit':1, 'tag':4 }
28 _typermap = { 3:'blob', 2:'tree', 1:'commit', 4:'tag' }
34 class GitError(Exception):
38 def parse_tz_offset(s):
39 """UTC offset in seconds."""
40 tz_off = (int(s[1:3]) * 60 * 60) + (int(s[3:5]) * 60)
46 # FIXME: derived from http://git.rsbx.net/Documents/Git_Data_Formats.txt
47 # Make sure that's authoritative.
48 _start_end_char = r'[^ .,:;<>"\'\0\n]'
49 _content_char = r'[^\0\n<>]'
50 _safe_str_rx = '(?:%s{1,2}|(?:%s%s*%s))' \
52 _start_end_char, _content_char, _start_end_char)
53 _tz_rx = r'[-+]\d\d[0-5]\d'
54 _parent_rx = r'(?:parent [abcdefABCDEF0123456789]{40}\n)'
55 _commit_rx = re.compile(r'''tree (?P<tree>[abcdefABCDEF0123456789]{40})
56 (?P<parents>%s*)author (?P<author_name>%s) <(?P<author_mail>%s)> (?P<asec>\d+) (?P<atz>%s)
57 committer (?P<committer_name>%s) <(?P<committer_mail>%s)> (?P<csec>\d+) (?P<ctz>%s)
59 (?P<message>(?:.|\n)*)''' % (_parent_rx,
60 _safe_str_rx, _safe_str_rx, _tz_rx,
61 _safe_str_rx, _safe_str_rx, _tz_rx))
62 _parent_hash_rx = re.compile(r'\s*parent ([abcdefABCDEF0123456789]{40})\s*')
65 # Note that the author_sec and committer_sec values are (UTC) epoch seconds.
66 CommitInfo = namedtuple('CommitInfo', ['tree', 'parents',
67 'author_name', 'author_mail',
68 'author_sec', 'author_offset',
69 'committer_name', 'committer_mail',
70 'committer_sec', 'committer_offset',
73 def parse_commit(content):
74 commit_match = re.match(_commit_rx, content)
76 raise Exception('cannot parse commit %r' % content)
77 matches = commit_match.groupdict()
78 return CommitInfo(tree=matches['tree'],
79 parents=re.findall(_parent_hash_rx, matches['parents']),
80 author_name=matches['author_name'],
81 author_mail=matches['author_mail'],
82 author_sec=int(matches['asec']),
83 author_offset=parse_tz_offset(matches['atz']),
84 committer_name=matches['committer_name'],
85 committer_mail=matches['committer_mail'],
86 committer_sec=int(matches['csec']),
87 committer_offset=parse_tz_offset(matches['ctz']),
88 message=matches['message'])
91 def get_commit_items(id, cp):
92 commit_it = cp.get(id)
93 assert(commit_it.next() == 'commit')
94 commit_content = ''.join(commit_it)
95 return parse_commit(commit_content)
98 def _local_git_date_str(epoch_sec):
99 return '%d %s' % (epoch_sec, utc_offset_str(epoch_sec))
102 def _git_date_str(epoch_sec, tz_offset_sec):
103 offs = tz_offset_sec // 60
104 return '%d %s%02d%02d' \
106 '+' if offs >= 0 else '-',
111 def repo(sub = '', repo_dir=None):
112 """Get the path to the git repository or one of its subdirectories."""
114 repo_dir = repo_dir or repodir
116 raise GitError('You should call check_repo_or_die()')
118 # If there's a .git subdirectory, then the actual repo is in there.
119 gd = os.path.join(repo_dir, '.git')
120 if os.path.exists(gd):
123 return os.path.join(repo_dir, sub)
127 return re.sub(r'([^0-9a-z]|\b)([0-9a-z]{7})[0-9a-z]{33}([^0-9a-z]|\b)',
132 full = os.path.abspath(path)
133 fullrepo = os.path.abspath(repo(''))
134 if not fullrepo.endswith('/'):
136 if full.startswith(fullrepo):
137 path = full[len(fullrepo):]
138 if path.startswith('index-cache/'):
139 path = path[len('index-cache/'):]
140 return shorten_hash(path)
144 paths = [repo('objects/pack')]
145 paths += glob.glob(repo('index-cache/*/.'))
149 def auto_midx(objdir):
150 args = [path.exe(), 'midx', '--auto', '--dir', objdir]
152 rv = subprocess.call(args, stdout=open('/dev/null', 'w'))
154 # make sure 'args' gets printed to help with debugging
155 add_error('%r: exception: %s' % (args, e))
158 add_error('%r: returned %d' % (args, rv))
160 args = [path.exe(), 'bloom', '--dir', objdir]
162 rv = subprocess.call(args, stdout=open('/dev/null', 'w'))
164 # make sure 'args' gets printed to help with debugging
165 add_error('%r: exception: %s' % (args, e))
168 add_error('%r: returned %d' % (args, rv))
171 def mangle_name(name, mode, gitmode):
172 """Mangle a file name to present an abstract name for segmented files.
173 Mangled file names will have the ".bup" extension added to them. If a
174 file's name already ends with ".bup", a ".bupl" extension is added to
175 disambiguate normal files from segmented ones.
177 if stat.S_ISREG(mode) and not stat.S_ISREG(gitmode):
178 assert(stat.S_ISDIR(gitmode))
180 elif name.endswith('.bup') or name[:-1].endswith('.bup'):
181 return name + '.bupl'
186 (BUP_NORMAL, BUP_CHUNKED) = (0,1)
187 def demangle_name(name, mode):
188 """Remove name mangling from a file name, if necessary.
190 The return value is a tuple (demangled_filename,mode), where mode is one of
193 * BUP_NORMAL : files that should be read as-is from the repository
194 * BUP_CHUNKED : files that were chunked and need to be reassembled
196 For more information on the name mangling algorithm, see mangle_name()
198 if name.endswith('.bupl'):
199 return (name[:-5], BUP_NORMAL)
200 elif name.endswith('.bup'):
201 return (name[:-4], BUP_CHUNKED)
202 elif name.endswith('.bupm'):
204 BUP_CHUNKED if stat.S_ISDIR(mode) else BUP_NORMAL)
206 return (name, BUP_NORMAL)
209 def calc_hash(type, content):
210 """Calculate some content's hash in the Git fashion."""
211 header = '%s %d\0' % (type, len(content))
217 def shalist_item_sort_key(ent):
218 (mode, name, id) = ent
219 assert(mode+0 == mode)
220 if stat.S_ISDIR(mode):
226 def tree_encode(shalist):
227 """Generate a git tree object from (mode,name,hash) tuples."""
228 shalist = sorted(shalist, key = shalist_item_sort_key)
230 for (mode,name,bin) in shalist:
232 assert(mode+0 == mode)
234 assert(len(bin) == 20)
235 s = '%o %s\0%s' % (mode,name,bin)
236 assert(s[0] != '0') # 0-padded octal is not acceptable in a git tree
241 def tree_decode(buf):
242 """Generate a list of (mode,name,hash) from the git tree object in buf."""
244 while ofs < len(buf):
245 z = buf.find('\0', ofs)
247 spl = buf[ofs:z].split(' ', 1)
248 assert(len(spl) == 2)
250 sha = buf[z+1:z+1+20]
252 yield (int(mode, 8), name, sha)
255 def _encode_packobj(type, content, compression_level=1):
256 if compression_level not in (0, 1, 2, 3, 4, 5, 6, 7, 8, 9):
257 raise ValueError('invalid compression level %s' % compression_level)
260 szbits = (sz & 0x0f) | (_typemap[type]<<4)
263 if sz: szbits |= 0x80
269 z = zlib.compressobj(compression_level)
271 yield z.compress(content)
275 def _encode_looseobj(type, content, compression_level=1):
276 z = zlib.compressobj(compression_level)
277 yield z.compress('%s %d\0' % (type, len(content)))
278 yield z.compress(content)
282 def _decode_looseobj(buf):
284 s = zlib.decompress(buf)
291 assert(type in _typemap)
292 assert(sz == len(content))
293 return (type, content)
296 def _decode_packobj(buf):
299 type = _typermap[(c & 0x70) >> 4]
306 sz |= (c & 0x7f) << shift
310 return (type, zlib.decompress(buf[i+1:]))
317 def find_offset(self, hash):
318 """Get the offset of an object inside the index file."""
319 idx = self._idx_from_hash(hash)
321 return self._ofs_from_idx(idx)
324 def exists(self, hash, want_source=False):
325 """Return nonempty if the object exists in this index."""
326 if hash and (self._idx_from_hash(hash) != None):
327 return want_source and os.path.basename(self.name) or True
331 return int(self.fanout[255])
333 def _idx_from_hash(self, hash):
334 global _total_searches, _total_steps
336 assert(len(hash) == 20)
338 start = self.fanout[b1-1] # range -1..254
339 end = self.fanout[b1] # range 0..255
341 _total_steps += 1 # lookup table is a step
344 mid = start + (end-start)/2
345 v = self._idx_to_hash(mid)
355 class PackIdxV1(PackIdx):
356 """Object representation of a Git pack index (version 1) file."""
357 def __init__(self, filename, f):
359 self.idxnames = [self.name]
360 self.map = mmap_read(f)
361 self.fanout = list(struct.unpack('!256I',
362 str(buffer(self.map, 0, 256*4))))
363 self.fanout.append(0) # entry "-1"
364 nsha = self.fanout[255]
366 self.shatable = buffer(self.map, self.sha_ofs, nsha*24)
368 def _ofs_from_idx(self, idx):
369 return struct.unpack('!I', str(self.shatable[idx*24 : idx*24+4]))[0]
371 def _idx_to_hash(self, idx):
372 return str(self.shatable[idx*24+4 : idx*24+24])
375 for i in xrange(self.fanout[255]):
376 yield buffer(self.map, 256*4 + 24*i + 4, 20)
379 class PackIdxV2(PackIdx):
380 """Object representation of a Git pack index (version 2) file."""
381 def __init__(self, filename, f):
383 self.idxnames = [self.name]
384 self.map = mmap_read(f)
385 assert(str(self.map[0:8]) == '\377tOc\0\0\0\2')
386 self.fanout = list(struct.unpack('!256I',
387 str(buffer(self.map, 8, 256*4))))
388 self.fanout.append(0) # entry "-1"
389 nsha = self.fanout[255]
390 self.sha_ofs = 8 + 256*4
391 self.shatable = buffer(self.map, self.sha_ofs, nsha*20)
392 self.ofstable = buffer(self.map,
393 self.sha_ofs + nsha*20 + nsha*4,
395 self.ofs64table = buffer(self.map,
396 8 + 256*4 + nsha*20 + nsha*4 + nsha*4)
398 def _ofs_from_idx(self, idx):
399 ofs = struct.unpack('!I', str(buffer(self.ofstable, idx*4, 4)))[0]
401 idx64 = ofs & 0x7fffffff
402 ofs = struct.unpack('!Q',
403 str(buffer(self.ofs64table, idx64*8, 8)))[0]
406 def _idx_to_hash(self, idx):
407 return str(self.shatable[idx*20:(idx+1)*20])
410 for i in xrange(self.fanout[255]):
411 yield buffer(self.map, 8 + 256*4 + 20*i, 20)
416 def __init__(self, dir):
418 assert(_mpi_count == 0) # these things suck tons of VM; don't waste it
423 self.do_bloom = False
430 assert(_mpi_count == 0)
433 return iter(idxmerge(self.packs))
436 return sum(len(pack) for pack in self.packs)
438 def exists(self, hash, want_source=False):
439 """Return nonempty if the object exists in the index files."""
440 global _total_searches
442 if hash in self.also:
444 if self.do_bloom and self.bloom:
445 if self.bloom.exists(hash):
446 self.do_bloom = False
448 _total_searches -= 1 # was counted by bloom
450 for i in xrange(len(self.packs)):
452 _total_searches -= 1 # will be incremented by sub-pack
453 ix = p.exists(hash, want_source=want_source)
455 # reorder so most recently used packs are searched first
456 self.packs = [p] + self.packs[:i] + self.packs[i+1:]
461 def refresh(self, skip_midx = False):
462 """Refresh the index list.
463 This method verifies if .midx files were superseded (e.g. all of its
464 contents are in another, bigger .midx file) and removes the superseded
467 If skip_midx is True, all work on .midx files will be skipped and .midx
468 files will be removed from the list.
470 The module-global variable 'ignore_midx' can force this function to
471 always act as if skip_midx was True.
473 self.bloom = None # Always reopen the bloom as it may have been relaced
474 self.do_bloom = False
475 skip_midx = skip_midx or ignore_midx
476 d = dict((p.name, p) for p in self.packs
477 if not skip_midx or not isinstance(p, midx.PackMidx))
478 if os.path.exists(self.dir):
481 for ix in self.packs:
482 if isinstance(ix, midx.PackMidx):
483 for name in ix.idxnames:
484 d[os.path.join(self.dir, name)] = ix
485 for full in glob.glob(os.path.join(self.dir,'*.midx')):
487 mx = midx.PackMidx(full)
488 (mxd, mxf) = os.path.split(mx.name)
490 for n in mx.idxnames:
491 if not os.path.exists(os.path.join(mxd, n)):
492 log(('warning: index %s missing\n' +
493 ' used by %s\n') % (n, mxf))
501 midxl.sort(key=lambda ix:
502 (-len(ix), -xstat.stat(ix.name).st_mtime))
505 for sub in ix.idxnames:
506 found = d.get(os.path.join(self.dir, sub))
507 if not found or isinstance(found, PackIdx):
508 # doesn't exist, or exists but not in a midx
513 for name in ix.idxnames:
514 d[os.path.join(self.dir, name)] = ix
515 elif not ix.force_keep:
516 debug1('midx: removing redundant: %s\n'
517 % os.path.basename(ix.name))
520 for full in glob.glob(os.path.join(self.dir,'*.idx')):
524 except GitError as e:
528 bfull = os.path.join(self.dir, 'bup.bloom')
529 if self.bloom is None and os.path.exists(bfull):
530 self.bloom = bloom.ShaBloom(bfull)
531 self.packs = list(set(d.values()))
532 self.packs.sort(lambda x,y: -cmp(len(x),len(y)))
533 if self.bloom and self.bloom.valid() and len(self.bloom) >= len(self):
537 debug1('PackIdxList: using %d index%s.\n'
538 % (len(self.packs), len(self.packs)!=1 and 'es' or ''))
541 """Insert an additional object in the list."""
545 def open_idx(filename):
546 if filename.endswith('.idx'):
547 f = open(filename, 'rb')
549 if header[0:4] == '\377tOc':
550 version = struct.unpack('!I', header[4:8])[0]
552 return PackIdxV2(filename, f)
554 raise GitError('%s: expected idx file version 2, got %d'
555 % (filename, version))
556 elif len(header) == 8 and header[0:4] < '\377tOc':
557 return PackIdxV1(filename, f)
559 raise GitError('%s: unrecognized idx file header' % filename)
560 elif filename.endswith('.midx'):
561 return midx.PackMidx(filename)
563 raise GitError('idx filenames must end with .idx or .midx')
566 def idxmerge(idxlist, final_progress=True):
567 """Generate a list of all the objects reachable in a PackIdxList."""
568 def pfunc(count, total):
569 qprogress('Reading indexes: %.2f%% (%d/%d)\r'
570 % (count*100.0/total, count, total))
571 def pfinal(count, total):
573 progress('Reading indexes: %.2f%% (%d/%d), done.\n'
574 % (100, total, total))
575 return merge_iter(idxlist, 10024, pfunc, pfinal)
578 def _make_objcache():
579 return PackIdxList(repo('objects/pack'))
581 # bup-gc assumes that it can disable all PackWriter activities
582 # (bloom/midx/cache) via the constructor and close() arguments.
585 """Writes Git objects inside a pack file."""
586 def __init__(self, objcache_maker=_make_objcache, compression_level=1,
587 run_midx=True, on_pack_finish=None):
594 self.objcache_maker = objcache_maker
596 self.compression_level = compression_level
597 self.run_midx=run_midx
598 self.on_pack_finish = on_pack_finish
605 objdir = dir=repo('objects')
606 fd, name = tempfile.mkstemp(suffix='.pack', dir=objdir)
608 self.file = os.fdopen(fd, 'w+b')
613 self.parentfd = os.open(objdir, os.O_RDONLY)
619 assert(name.endswith('.pack'))
620 self.filename = name[:-5]
621 self.file.write('PACK\0\0\0\2\0\0\0\0')
622 self.idx = list(list() for i in xrange(256))
624 def _raw_write(self, datalist, sha):
627 # in case we get interrupted (eg. KeyboardInterrupt), it's best if
628 # the file never has a *partial* blob. So let's make sure it's
629 # all-or-nothing. (The blob shouldn't be very big anyway, thanks
630 # to our hashsplit algorithm.) f.write() does its own buffering,
631 # but that's okay because we'll flush it in _end().
632 oneblob = ''.join(datalist)
636 raise GitError, e, sys.exc_info()[2]
638 crc = zlib.crc32(oneblob) & 0xffffffff
639 self._update_idx(sha, crc, nw)
644 def _update_idx(self, sha, crc, size):
647 self.idx[ord(sha[0])].append((sha, crc, self.file.tell() - size))
649 def _write(self, sha, type, content):
653 sha = calc_hash(type, content)
654 size, crc = self._raw_write(_encode_packobj(type, content,
655 self.compression_level),
657 if self.outbytes >= max_pack_size or self.count >= max_pack_objects:
661 def breakpoint(self):
662 """Clear byte and object counts and return the last processed id."""
663 id = self._end(self.run_midx)
664 self.outbytes = self.count = 0
667 def _require_objcache(self):
668 if self.objcache is None and self.objcache_maker:
669 self.objcache = self.objcache_maker()
670 if self.objcache is None:
672 "PackWriter not opened or can't check exists w/o objcache")
674 def exists(self, id, want_source=False):
675 """Return non-empty if an object is found in the object cache."""
676 self._require_objcache()
677 return self.objcache.exists(id, want_source=want_source)
679 def just_write(self, sha, type, content):
680 """Write an object to the pack file, bypassing the objcache. Fails if
682 self._write(sha, type, content)
684 def maybe_write(self, type, content):
685 """Write an object to the pack file if not present and return its id."""
686 sha = calc_hash(type, content)
687 if not self.exists(sha):
688 self.just_write(sha, type, content)
689 self._require_objcache()
690 self.objcache.add(sha)
693 def new_blob(self, blob):
694 """Create a blob object in the pack with the supplied content."""
695 return self.maybe_write('blob', blob)
697 def new_tree(self, shalist):
698 """Create a tree object in the pack."""
699 content = tree_encode(shalist)
700 return self.maybe_write('tree', content)
702 def new_commit(self, tree, parent,
703 author, adate_sec, adate_tz,
704 committer, cdate_sec, cdate_tz,
706 """Create a commit object in the pack. The date_sec values must be
707 epoch-seconds, and if a tz is None, the local timezone is assumed."""
709 adate_str = _git_date_str(adate_sec, adate_tz)
711 adate_str = _local_git_date_str(adate_sec)
713 cdate_str = _git_date_str(cdate_sec, cdate_tz)
715 cdate_str = _local_git_date_str(cdate_sec)
717 if tree: l.append('tree %s' % tree.encode('hex'))
718 if parent: l.append('parent %s' % parent.encode('hex'))
719 if author: l.append('author %s %s' % (author, adate_str))
720 if committer: l.append('committer %s %s' % (committer, cdate_str))
723 return self.maybe_write('commit', '\n'.join(l))
726 """Remove the pack file from disk."""
735 os.unlink(self.filename + '.pack')
742 def _end(self, run_midx=True):
744 if not f: return None
751 # update object count
753 cp = struct.pack('!i', self.count)
757 # calculate the pack sha1sum
760 for b in chunkyreader(f):
762 packbin = sum.digest()
764 fdatasync(f.fileno())
768 obj_list_sha = self._write_pack_idx_v2(self.filename + '.idx', idx, packbin)
770 nameprefix = repo('objects/pack/pack-%s' % obj_list_sha)
771 if os.path.exists(self.filename + '.map'):
772 os.unlink(self.filename + '.map')
773 os.rename(self.filename + '.pack', nameprefix + '.pack')
774 os.rename(self.filename + '.idx', nameprefix + '.idx')
776 os.fsync(self.parentfd)
778 os.close(self.parentfd)
781 auto_midx(repo('objects/pack'))
783 if self.on_pack_finish:
784 self.on_pack_finish(nameprefix)
788 def close(self, run_midx=True):
789 """Close the pack file and move it to its definitive path."""
790 return self._end(run_midx=run_midx)
792 def _write_pack_idx_v2(self, filename, idx, packbin):
795 for entry in section:
796 if entry[2] >= 2**31:
799 # Length: header + fan-out + shas-and-crcs + overflow-offsets
800 index_len = 8 + (4 * 256) + (28 * self.count) + (8 * ofs64_count)
802 idx_f = open(filename, 'w+b')
804 idx_f.truncate(index_len)
805 fdatasync(idx_f.fileno())
806 idx_map = mmap_readwrite(idx_f, close=False)
808 count = _helpers.write_idx(filename, idx_map, idx, self.count)
809 assert(count == self.count)
816 idx_f = open(filename, 'a+b')
821 b = idx_f.read(8 + 4*256)
824 obj_list_sum = Sha1()
825 for b in chunkyreader(idx_f, 20*self.count):
827 obj_list_sum.update(b)
828 namebase = obj_list_sum.hexdigest()
830 for b in chunkyreader(idx_f):
832 idx_f.write(idx_sum.digest())
833 fdatasync(idx_f.fileno())
839 def _gitenv(repo_dir = None):
843 os.environ['GIT_DIR'] = os.path.abspath(repo_dir)
847 def list_refs(refnames=None, repo_dir=None,
848 limit_to_heads=False, limit_to_tags=False):
849 """Yield (refname, hash) tuples for all repository refs unless
850 refnames are specified. In that case, only include tuples for
851 those refs. The limits restrict the result items to refs/heads or
852 refs/tags. If both limits are specified, items from both sources
856 argv = ['git', 'show-ref']
858 argv.append('--heads')
860 argv.append('--tags')
864 p = subprocess.Popen(argv,
865 preexec_fn = _gitenv(repo_dir),
866 stdout = subprocess.PIPE)
867 out = p.stdout.read().strip()
868 rv = p.wait() # not fatal
872 for d in out.split('\n'):
873 (sha, name) = d.split(' ', 1)
874 yield (name, sha.decode('hex'))
877 def read_ref(refname, repo_dir = None):
878 """Get the commit id of the most recent commit made on a given ref."""
879 refs = list_refs(refnames=[refname], repo_dir=repo_dir, limit_to_heads=True)
880 l = tuple(islice(refs, 2))
888 def rev_list(ref, count=None, repo_dir=None):
889 """Generate a list of reachable commits in reverse chronological order.
891 This generator walks through commits, from child to parent, that are
892 reachable via the specified ref and yields a series of tuples of the form
895 If count is a non-zero integer, limit the number of commits to "count"
898 assert(not ref.startswith('-'))
901 opts += ['-n', str(atoi(count))]
902 argv = ['git', 'rev-list', '--pretty=format:%at'] + opts + [ref, '--']
903 p = subprocess.Popen(argv,
904 preexec_fn = _gitenv(repo_dir),
905 stdout = subprocess.PIPE)
909 if s.startswith('commit '):
910 commit = s[7:].decode('hex')
914 rv = p.wait() # not fatal
916 raise GitError, 'git rev-list returned error %d' % rv
919 def get_commit_dates(refs, repo_dir=None):
920 """Get the dates for the specified commit refs. For now, every unique
921 string in refs must resolve to a different commit or this
922 function will fail."""
925 commit = get_commit_items(ref, cp(repo_dir))
926 result.append(commit.author_sec)
930 def rev_parse(committish, repo_dir=None):
931 """Resolve the full hash for 'committish', if it exists.
933 Should be roughly equivalent to 'git rev-parse'.
935 Returns the hex value of the hash if it is found, None if 'committish' does
936 not correspond to anything.
938 head = read_ref(committish, repo_dir=repo_dir)
940 debug2("resolved from ref: commit = %s\n" % head.encode('hex'))
943 pL = PackIdxList(repo('objects/pack', repo_dir=repo_dir))
945 if len(committish) == 40:
947 hash = committish.decode('hex')
957 def update_ref(refname, newval, oldval, repo_dir=None):
958 """Update a repository reference."""
961 assert(refname.startswith('refs/heads/') \
962 or refname.startswith('refs/tags/'))
963 p = subprocess.Popen(['git', 'update-ref', refname,
964 newval.encode('hex'), oldval.encode('hex')],
965 preexec_fn = _gitenv(repo_dir))
966 _git_wait('git update-ref', p)
969 def delete_ref(refname, oldvalue=None):
970 """Delete a repository reference (see git update-ref(1))."""
971 assert(refname.startswith('refs/'))
972 oldvalue = [] if not oldvalue else [oldvalue]
973 p = subprocess.Popen(['git', 'update-ref', '-d', refname] + oldvalue,
974 preexec_fn = _gitenv())
975 _git_wait('git update-ref', p)
978 def guess_repo(path=None):
979 """Set the path value in the global variable "repodir".
980 This makes bup look for an existing bup repository, but not fail if a
981 repository doesn't exist. Usually, if you are interacting with a bup
982 repository, you would not be calling this function but using
989 repodir = os.environ.get('BUP_DIR')
991 repodir = os.path.expanduser('~/.bup')
994 def init_repo(path=None):
995 """Create the Git bare repository for bup in a given path."""
997 d = repo() # appends a / to the path
998 parent = os.path.dirname(os.path.dirname(d))
999 if parent and not os.path.exists(parent):
1000 raise GitError('parent directory "%s" does not exist\n' % parent)
1001 if os.path.exists(d) and not os.path.isdir(os.path.join(d, '.')):
1002 raise GitError('"%s" exists but is not a directory\n' % d)
1003 p = subprocess.Popen(['git', '--bare', 'init'], stdout=sys.stderr,
1004 preexec_fn = _gitenv())
1005 _git_wait('git init', p)
1006 # Force the index version configuration in order to ensure bup works
1007 # regardless of the version of the installed Git binary.
1008 p = subprocess.Popen(['git', 'config', 'pack.indexVersion', '2'],
1009 stdout=sys.stderr, preexec_fn = _gitenv())
1010 _git_wait('git config', p)
1012 p = subprocess.Popen(['git', 'config', 'core.logAllRefUpdates', 'true'],
1013 stdout=sys.stderr, preexec_fn = _gitenv())
1014 _git_wait('git config', p)
1017 def check_repo_or_die(path=None):
1018 """Check to see if a bup repository probably exists, and abort if not."""
1021 pst = stat_if_exists(top + '/objects/pack')
1022 if pst and stat.S_ISDIR(pst.st_mode):
1025 top_st = stat_if_exists(top)
1027 log('error: repository %r does not exist (see "bup help init")\n'
1030 log('error: %r is not a repository\n' % top)
1036 """Get Git's version and ensure a usable version is installed.
1038 The returned version is formatted as an ordered tuple with each position
1039 representing a digit in the version tag. For example, the following tuple
1040 would represent version 1.6.6.9:
1042 ('1', '6', '6', '9')
1046 p = subprocess.Popen(['git', '--version'],
1047 stdout=subprocess.PIPE)
1048 gvs = p.stdout.read()
1049 _git_wait('git --version', p)
1050 m = re.match(r'git version (\S+.\S+)', gvs)
1052 raise GitError('git --version weird output: %r' % gvs)
1053 _ver = tuple(m.group(1).split('.'))
1054 needed = ('1','5', '3', '1')
1056 raise GitError('git version %s or higher is required; you have %s'
1057 % ('.'.join(needed), '.'.join(_ver)))
1061 def _git_wait(cmd, p):
1064 raise GitError('%s returned %d' % (cmd, rv))
1067 def _git_capture(argv):
1068 p = subprocess.Popen(argv, stdout=subprocess.PIPE, preexec_fn = _gitenv())
1070 _git_wait(repr(argv), p)
1074 class _AbortableIter:
1075 def __init__(self, it, onabort = None):
1077 self.onabort = onabort
1085 return self.it.next()
1086 except StopIteration as e:
1094 """Abort iteration and call the abortion callback, if needed."""
1104 class MissingObject(KeyError):
1105 def __init__(self, id):
1107 KeyError.__init__(self, 'object %r is missing' % id.encode('hex'))
1112 """Link to 'git cat-file' that is used to retrieve blob data."""
1113 def __init__(self, repo_dir = None):
1115 self.repo_dir = repo_dir
1116 wanted = ('1','5','6')
1119 log('warning: git version < %s; bup will be slow.\n'
1122 self.get = self._slow_get
1124 self.p = self.inprogress = None
1125 self.get = self._fast_get
1129 self.p.stdout.close()
1130 self.p.stdin.close()
1132 self.inprogress = None
1136 self.p = subprocess.Popen(['git', 'cat-file', '--batch'],
1137 stdin=subprocess.PIPE,
1138 stdout=subprocess.PIPE,
1141 preexec_fn = _gitenv(self.repo_dir))
1143 def _fast_get(self, id):
1144 if not self.p or self.p.poll() != None:
1147 poll_result = self.p.poll()
1148 assert(poll_result == None)
1150 log('_fast_get: opening %r while %r is open\n'
1151 % (id, self.inprogress))
1152 assert(not self.inprogress)
1153 assert(id.find('\n') < 0)
1154 assert(id.find('\r') < 0)
1155 assert(not id.startswith('-'))
1156 self.inprogress = id
1157 self.p.stdin.write('%s\n' % id)
1158 self.p.stdin.flush()
1159 hdr = self.p.stdout.readline()
1160 if hdr.endswith(' missing\n'):
1161 self.inprogress = None
1162 raise MissingObject(id.decode('hex'))
1163 spl = hdr.split(' ')
1164 if len(spl) != 3 or len(spl[0]) != 40:
1165 raise GitError('expected blob, got %r' % spl)
1166 (hex, type, size) = spl
1168 it = _AbortableIter(chunkyreader(self.p.stdout, int(spl[2])),
1169 onabort = self._abort)
1174 readline_result = self.p.stdout.readline()
1175 assert(readline_result == '\n')
1176 self.inprogress = None
1177 except Exception as e:
1181 def _slow_get(self, id):
1182 assert(id.find('\n') < 0)
1183 assert(id.find('\r') < 0)
1184 assert(id[0] != '-')
1185 type = _git_capture(['git', 'cat-file', '-t', id]).strip()
1188 p = subprocess.Popen(['git', 'cat-file', type, id],
1189 stdout=subprocess.PIPE,
1190 preexec_fn = _gitenv(self.repo_dir))
1191 for blob in chunkyreader(p.stdout):
1193 _git_wait('git cat-file', p)
1195 def _join(self, it):
1200 elif type == 'tree':
1201 treefile = ''.join(it)
1202 for (mode, name, sha) in tree_decode(treefile):
1203 for blob in self.join(sha.encode('hex')):
1205 elif type == 'commit':
1206 treeline = ''.join(it).split('\n')[0]
1207 assert(treeline.startswith('tree '))
1208 for blob in self.join(treeline[5:]):
1211 raise GitError('invalid object type %r: expected blob/tree/commit'
1215 """Generate a list of the content of all blobs that can be reached
1216 from an object. The hash given in 'id' must point to a blob, a tree
1217 or a commit. The content of all blobs that can be seen from trees or
1218 commits will be added to the list.
1221 for d in self._join(self.get(id)):
1223 except StopIteration:
1229 def cp(repo_dir=None):
1230 """Create a CatPipe object or reuse the already existing one."""
1234 repo_dir = os.path.abspath(repo_dir)
1235 cp = _cp.get(repo_dir)
1237 cp = CatPipe(repo_dir)
1242 def tags(repo_dir = None):
1243 """Return a dictionary of all tags in the form {hash: [tag_names, ...]}."""
1245 for n, c in list_refs(repo_dir = repo_dir, limit_to_tags=True):
1246 assert(n.startswith('refs/tags/'))
1250 tags[c].append(name) # more than one tag can point at 'c'
1254 WalkItem = namedtuple('WalkItem', ['id', 'type', 'mode',
1255 'path', 'chunk_path', 'data'])
1256 # The path is the mangled path, and if an item represents a fragment
1257 # of a chunked file, the chunk_path will be the chunked subtree path
1258 # for the chunk, i.e. ['', '2d3115e', ...]. The top-level path for a
1259 # chunked file will have a chunk_path of ['']. So some chunk subtree
1260 # of the file '/foo/bar/baz' might look like this:
1262 # item.path = ['foo', 'bar', 'baz.bup']
1263 # item.chunk_path = ['', '2d3115e', '016b097']
1264 # item.type = 'tree'
1268 def walk_object(cat_pipe, id,
1271 """Yield everything reachable from id via cat_pipe as a WalkItem,
1272 stopping whenever stop_at(id) returns true. Throw MissingObject
1273 if a hash encountered is missing from the repository, and don't
1274 read or return blob content in the data field unless include_data
1277 # Maintain the pending stack on the heap to avoid stack overflow
1278 pending = [(id, [], [], None)]
1280 id, parent_path, chunk_path, mode = pending.pop()
1281 if stop_at and stop_at(id):
1284 if (not include_data) and mode and stat.S_ISREG(mode):
1285 # If the object is a "regular file", then it's a leaf in
1286 # the graph, so we can skip reading the data if the caller
1287 # hasn't requested it.
1288 yield WalkItem(id=id, type='blob',
1289 chunk_path=chunk_path, path=parent_path,
1294 item_it = cat_pipe.get(id)
1295 type = item_it.next()
1296 if type not in ('blob', 'commit', 'tree'):
1297 raise Exception('unexpected repository object type %r' % type)
1299 # FIXME: set the mode based on the type when the mode is None
1300 if type == 'blob' and not include_data:
1301 # Dump data until we can ask cat_pipe not to fetch it
1302 for ignored in item_it:
1306 data = ''.join(item_it)
1308 yield WalkItem(id=id, type=type,
1309 chunk_path=chunk_path, path=parent_path,
1311 data=(data if include_data else None))
1313 if type == 'commit':
1314 commit_items = parse_commit(data)
1315 for pid in commit_items.parents:
1316 pending.append((pid, parent_path, chunk_path, mode))
1317 pending.append((commit_items.tree, parent_path, chunk_path,
1318 hashsplit.GIT_MODE_TREE))
1319 elif type == 'tree':
1320 for mode, name, ent_id in tree_decode(data):
1321 demangled, bup_type = demangle_name(name, mode)
1323 sub_path = parent_path
1324 sub_chunk_path = chunk_path + [name]
1326 sub_path = parent_path + [name]
1327 if bup_type == BUP_CHUNKED:
1328 sub_chunk_path = ['']
1330 sub_chunk_path = chunk_path
1331 pending.append((ent_id.encode('hex'), sub_path, sub_chunk_path,