1 """Git interaction library.
2 bup repositories are in Git format. This library allows us to
3 interact with the Git data structures.
6 import errno, os, sys, zlib, time, subprocess, struct, stat, re, tempfile, glob
7 from collections import namedtuple
8 from itertools import islice
10 from bup import _helpers, hashsplit, path, midx, bloom, xstat
11 from bup.helpers import (Sha1, add_error, chunkyreader, debug1, debug2,
13 hostname, localtime, log, merge_iter,
14 mmap_read, mmap_readwrite,
15 progress, qprogress, unlink, username, userfullname,
19 max_pack_size = 1000*1000*1000 # larger packs will slow down pruning
20 max_pack_objects = 200*1000 # cache memory usage is about 83 bytes per object
26 _typemap = { 'blob':3, 'tree':2, 'commit':1, 'tag':4 }
27 _typermap = { 3:'blob', 2:'tree', 1:'commit', 4:'tag' }
33 class GitError(Exception):
37 def parse_tz_offset(s):
38 """UTC offset in seconds."""
39 tz_off = (int(s[1:3]) * 60 * 60) + (int(s[3:5]) * 60)
45 # FIXME: derived from http://git.rsbx.net/Documents/Git_Data_Formats.txt
46 # Make sure that's authoritative.
47 _start_end_char = r'[^ .,:;<>"\'\0\n]'
48 _content_char = r'[^\0\n<>]'
49 _safe_str_rx = '(?:%s{1,2}|(?:%s%s*%s))' \
51 _start_end_char, _content_char, _start_end_char)
52 _tz_rx = r'[-+]\d\d[0-5]\d'
53 _parent_rx = r'(?:parent [abcdefABCDEF0123456789]{40}\n)'
54 _commit_rx = re.compile(r'''tree (?P<tree>[abcdefABCDEF0123456789]{40})
55 (?P<parents>%s*)author (?P<author_name>%s) <(?P<author_mail>%s)> (?P<asec>\d+) (?P<atz>%s)
56 committer (?P<committer_name>%s) <(?P<committer_mail>%s)> (?P<csec>\d+) (?P<ctz>%s)
58 (?P<message>(?:.|\n)*)''' % (_parent_rx,
59 _safe_str_rx, _safe_str_rx, _tz_rx,
60 _safe_str_rx, _safe_str_rx, _tz_rx))
61 _parent_hash_rx = re.compile(r'\s*parent ([abcdefABCDEF0123456789]{40})\s*')
64 # Note that the author_sec and committer_sec values are (UTC) epoch seconds.
65 CommitInfo = namedtuple('CommitInfo', ['tree', 'parents',
66 'author_name', 'author_mail',
67 'author_sec', 'author_offset',
68 'committer_name', 'committer_mail',
69 'committer_sec', 'committer_offset',
72 def parse_commit(content):
73 commit_match = re.match(_commit_rx, content)
75 raise Exception('cannot parse commit %r' % content)
76 matches = commit_match.groupdict()
77 return CommitInfo(tree=matches['tree'],
78 parents=re.findall(_parent_hash_rx, matches['parents']),
79 author_name=matches['author_name'],
80 author_mail=matches['author_mail'],
81 author_sec=int(matches['asec']),
82 author_offset=parse_tz_offset(matches['atz']),
83 committer_name=matches['committer_name'],
84 committer_mail=matches['committer_mail'],
85 committer_sec=int(matches['csec']),
86 committer_offset=parse_tz_offset(matches['ctz']),
87 message=matches['message'])
90 def get_commit_items(id, cp):
91 commit_it = cp.get(id)
92 assert(commit_it.next() == 'commit')
93 commit_content = ''.join(commit_it)
94 return parse_commit(commit_content)
97 def _local_git_date_str(epoch_sec):
98 return '%d %s' % (epoch_sec, utc_offset_str(epoch_sec))
101 def _git_date_str(epoch_sec, tz_offset_sec):
102 offs = tz_offset_sec // 60
103 return '%d %s%02d%02d' \
105 '+' if offs >= 0 else '-',
110 def repo(sub = '', repo_dir=None):
111 """Get the path to the git repository or one of its subdirectories."""
113 repo_dir = repo_dir or repodir
115 raise GitError('You should call check_repo_or_die()')
117 # If there's a .git subdirectory, then the actual repo is in there.
118 gd = os.path.join(repo_dir, '.git')
119 if os.path.exists(gd):
122 return os.path.join(repo_dir, sub)
126 return re.sub(r'([^0-9a-z]|\b)([0-9a-z]{7})[0-9a-z]{33}([^0-9a-z]|\b)',
131 full = os.path.abspath(path)
132 fullrepo = os.path.abspath(repo(''))
133 if not fullrepo.endswith('/'):
135 if full.startswith(fullrepo):
136 path = full[len(fullrepo):]
137 if path.startswith('index-cache/'):
138 path = path[len('index-cache/'):]
139 return shorten_hash(path)
143 paths = [repo('objects/pack')]
144 paths += glob.glob(repo('index-cache/*/.'))
148 def auto_midx(objdir):
149 args = [path.exe(), 'midx', '--auto', '--dir', objdir]
151 rv = subprocess.call(args, stdout=open('/dev/null', 'w'))
153 # make sure 'args' gets printed to help with debugging
154 add_error('%r: exception: %s' % (args, e))
157 add_error('%r: returned %d' % (args, rv))
159 args = [path.exe(), 'bloom', '--dir', objdir]
161 rv = subprocess.call(args, stdout=open('/dev/null', 'w'))
163 # make sure 'args' gets printed to help with debugging
164 add_error('%r: exception: %s' % (args, e))
167 add_error('%r: returned %d' % (args, rv))
170 def mangle_name(name, mode, gitmode):
171 """Mangle a file name to present an abstract name for segmented files.
172 Mangled file names will have the ".bup" extension added to them. If a
173 file's name already ends with ".bup", a ".bupl" extension is added to
174 disambiguate normal files from segmented ones.
176 if stat.S_ISREG(mode) and not stat.S_ISREG(gitmode):
177 assert(stat.S_ISDIR(gitmode))
179 elif name.endswith('.bup') or name[:-1].endswith('.bup'):
180 return name + '.bupl'
185 (BUP_NORMAL, BUP_CHUNKED) = (0,1)
186 def demangle_name(name, mode):
187 """Remove name mangling from a file name, if necessary.
189 The return value is a tuple (demangled_filename,mode), where mode is one of
192 * BUP_NORMAL : files that should be read as-is from the repository
193 * BUP_CHUNKED : files that were chunked and need to be reassembled
195 For more information on the name mangling algorithm, see mangle_name()
197 if name.endswith('.bupl'):
198 return (name[:-5], BUP_NORMAL)
199 elif name.endswith('.bup'):
200 return (name[:-4], BUP_CHUNKED)
201 elif name.endswith('.bupm'):
203 BUP_CHUNKED if stat.S_ISDIR(mode) else BUP_NORMAL)
205 return (name, BUP_NORMAL)
208 def calc_hash(type, content):
209 """Calculate some content's hash in the Git fashion."""
210 header = '%s %d\0' % (type, len(content))
216 def shalist_item_sort_key(ent):
217 (mode, name, id) = ent
218 assert(mode+0 == mode)
219 if stat.S_ISDIR(mode):
225 def tree_encode(shalist):
226 """Generate a git tree object from (mode,name,hash) tuples."""
227 shalist = sorted(shalist, key = shalist_item_sort_key)
229 for (mode,name,bin) in shalist:
231 assert(mode+0 == mode)
233 assert(len(bin) == 20)
234 s = '%o %s\0%s' % (mode,name,bin)
235 assert(s[0] != '0') # 0-padded octal is not acceptable in a git tree
240 def tree_decode(buf):
241 """Generate a list of (mode,name,hash) from the git tree object in buf."""
243 while ofs < len(buf):
244 z = buf.find('\0', ofs)
246 spl = buf[ofs:z].split(' ', 1)
247 assert(len(spl) == 2)
249 sha = buf[z+1:z+1+20]
251 yield (int(mode, 8), name, sha)
254 def _encode_packobj(type, content, compression_level=1):
257 szbits = (sz & 0x0f) | (_typemap[type]<<4)
260 if sz: szbits |= 0x80
266 if compression_level > 9:
267 raise BaseException("Compression Level Above 9")
268 elif compression_level < 0:
269 raise BaseException("Compression Level Below 0")
270 z = zlib.compressobj(compression_level)
272 yield z.compress(content)
276 def _encode_looseobj(type, content, compression_level=1):
277 z = zlib.compressobj(compression_level)
278 yield z.compress('%s %d\0' % (type, len(content)))
279 yield z.compress(content)
283 def _decode_looseobj(buf):
285 s = zlib.decompress(buf)
292 assert(type in _typemap)
293 assert(sz == len(content))
294 return (type, content)
297 def _decode_packobj(buf):
300 type = _typermap[(c & 0x70) >> 4]
307 sz |= (c & 0x7f) << shift
311 return (type, zlib.decompress(buf[i+1:]))
318 def find_offset(self, hash):
319 """Get the offset of an object inside the index file."""
320 idx = self._idx_from_hash(hash)
322 return self._ofs_from_idx(idx)
325 def exists(self, hash, want_source=False):
326 """Return nonempty if the object exists in this index."""
327 if hash and (self._idx_from_hash(hash) != None):
328 return want_source and os.path.basename(self.name) or True
332 return int(self.fanout[255])
334 def _idx_from_hash(self, hash):
335 global _total_searches, _total_steps
337 assert(len(hash) == 20)
339 start = self.fanout[b1-1] # range -1..254
340 end = self.fanout[b1] # range 0..255
342 _total_steps += 1 # lookup table is a step
345 mid = start + (end-start)/2
346 v = self._idx_to_hash(mid)
356 class PackIdxV1(PackIdx):
357 """Object representation of a Git pack index (version 1) file."""
358 def __init__(self, filename, f):
360 self.idxnames = [self.name]
361 self.map = mmap_read(f)
362 self.fanout = list(struct.unpack('!256I',
363 str(buffer(self.map, 0, 256*4))))
364 self.fanout.append(0) # entry "-1"
365 nsha = self.fanout[255]
367 self.shatable = buffer(self.map, self.sha_ofs, nsha*24)
369 def _ofs_from_idx(self, idx):
370 return struct.unpack('!I', str(self.shatable[idx*24 : idx*24+4]))[0]
372 def _idx_to_hash(self, idx):
373 return str(self.shatable[idx*24+4 : idx*24+24])
376 for i in xrange(self.fanout[255]):
377 yield buffer(self.map, 256*4 + 24*i + 4, 20)
380 class PackIdxV2(PackIdx):
381 """Object representation of a Git pack index (version 2) file."""
382 def __init__(self, filename, f):
384 self.idxnames = [self.name]
385 self.map = mmap_read(f)
386 assert(str(self.map[0:8]) == '\377tOc\0\0\0\2')
387 self.fanout = list(struct.unpack('!256I',
388 str(buffer(self.map, 8, 256*4))))
389 self.fanout.append(0) # entry "-1"
390 nsha = self.fanout[255]
391 self.sha_ofs = 8 + 256*4
392 self.shatable = buffer(self.map, self.sha_ofs, nsha*20)
393 self.ofstable = buffer(self.map,
394 self.sha_ofs + nsha*20 + nsha*4,
396 self.ofs64table = buffer(self.map,
397 8 + 256*4 + nsha*20 + nsha*4 + nsha*4)
399 def _ofs_from_idx(self, idx):
400 ofs = struct.unpack('!I', str(buffer(self.ofstable, idx*4, 4)))[0]
402 idx64 = ofs & 0x7fffffff
403 ofs = struct.unpack('!Q',
404 str(buffer(self.ofs64table, idx64*8, 8)))[0]
407 def _idx_to_hash(self, idx):
408 return str(self.shatable[idx*20:(idx+1)*20])
411 for i in xrange(self.fanout[255]):
412 yield buffer(self.map, 8 + 256*4 + 20*i, 20)
417 def __init__(self, dir):
419 assert(_mpi_count == 0) # these things suck tons of VM; don't waste it
424 self.do_bloom = False
431 assert(_mpi_count == 0)
434 return iter(idxmerge(self.packs))
437 return sum(len(pack) for pack in self.packs)
439 def exists(self, hash, want_source=False):
440 """Return nonempty if the object exists in the index files."""
441 global _total_searches
443 if hash in self.also:
445 if self.do_bloom and self.bloom:
446 if self.bloom.exists(hash):
447 self.do_bloom = False
449 _total_searches -= 1 # was counted by bloom
451 for i in xrange(len(self.packs)):
453 _total_searches -= 1 # will be incremented by sub-pack
454 ix = p.exists(hash, want_source=want_source)
456 # reorder so most recently used packs are searched first
457 self.packs = [p] + self.packs[:i] + self.packs[i+1:]
462 def refresh(self, skip_midx = False):
463 """Refresh the index list.
464 This method verifies if .midx files were superseded (e.g. all of its
465 contents are in another, bigger .midx file) and removes the superseded
468 If skip_midx is True, all work on .midx files will be skipped and .midx
469 files will be removed from the list.
471 The module-global variable 'ignore_midx' can force this function to
472 always act as if skip_midx was True.
474 self.bloom = None # Always reopen the bloom as it may have been relaced
475 self.do_bloom = False
476 skip_midx = skip_midx or ignore_midx
477 d = dict((p.name, p) for p in self.packs
478 if not skip_midx or not isinstance(p, midx.PackMidx))
479 if os.path.exists(self.dir):
482 for ix in self.packs:
483 if isinstance(ix, midx.PackMidx):
484 for name in ix.idxnames:
485 d[os.path.join(self.dir, name)] = ix
486 for full in glob.glob(os.path.join(self.dir,'*.midx')):
488 mx = midx.PackMidx(full)
489 (mxd, mxf) = os.path.split(mx.name)
491 for n in mx.idxnames:
492 if not os.path.exists(os.path.join(mxd, n)):
493 log(('warning: index %s missing\n' +
494 ' used by %s\n') % (n, mxf))
502 midxl.sort(key=lambda ix:
503 (-len(ix), -xstat.stat(ix.name).st_mtime))
506 for sub in ix.idxnames:
507 found = d.get(os.path.join(self.dir, sub))
508 if not found or isinstance(found, PackIdx):
509 # doesn't exist, or exists but not in a midx
514 for name in ix.idxnames:
515 d[os.path.join(self.dir, name)] = ix
516 elif not ix.force_keep:
517 debug1('midx: removing redundant: %s\n'
518 % os.path.basename(ix.name))
521 for full in glob.glob(os.path.join(self.dir,'*.idx')):
525 except GitError as e:
529 bfull = os.path.join(self.dir, 'bup.bloom')
530 if self.bloom is None and os.path.exists(bfull):
531 self.bloom = bloom.ShaBloom(bfull)
532 self.packs = list(set(d.values()))
533 self.packs.sort(lambda x,y: -cmp(len(x),len(y)))
534 if self.bloom and self.bloom.valid() and len(self.bloom) >= len(self):
538 debug1('PackIdxList: using %d index%s.\n'
539 % (len(self.packs), len(self.packs)!=1 and 'es' or ''))
542 """Insert an additional object in the list."""
546 def open_idx(filename):
547 if filename.endswith('.idx'):
548 f = open(filename, 'rb')
550 if header[0:4] == '\377tOc':
551 version = struct.unpack('!I', header[4:8])[0]
553 return PackIdxV2(filename, f)
555 raise GitError('%s: expected idx file version 2, got %d'
556 % (filename, version))
557 elif len(header) == 8 and header[0:4] < '\377tOc':
558 return PackIdxV1(filename, f)
560 raise GitError('%s: unrecognized idx file header' % filename)
561 elif filename.endswith('.midx'):
562 return midx.PackMidx(filename)
564 raise GitError('idx filenames must end with .idx or .midx')
567 def idxmerge(idxlist, final_progress=True):
568 """Generate a list of all the objects reachable in a PackIdxList."""
569 def pfunc(count, total):
570 qprogress('Reading indexes: %.2f%% (%d/%d)\r'
571 % (count*100.0/total, count, total))
572 def pfinal(count, total):
574 progress('Reading indexes: %.2f%% (%d/%d), done.\n'
575 % (100, total, total))
576 return merge_iter(idxlist, 10024, pfunc, pfinal)
579 def _make_objcache():
580 return PackIdxList(repo('objects/pack'))
582 # bup-gc assumes that it can disable all PackWriter activities
583 # (bloom/midx/cache) via the constructor and close() arguments.
586 """Writes Git objects inside a pack file."""
587 def __init__(self, objcache_maker=_make_objcache, compression_level=1,
588 run_midx=True, on_pack_finish=None):
595 self.objcache_maker = objcache_maker
597 self.compression_level = compression_level
598 self.run_midx=run_midx
599 self.on_pack_finish = on_pack_finish
606 objdir = dir=repo('objects')
607 fd, name = tempfile.mkstemp(suffix='.pack', dir=objdir)
609 self.file = os.fdopen(fd, 'w+b')
614 self.parentfd = os.open(objdir, os.O_RDONLY)
620 assert(name.endswith('.pack'))
621 self.filename = name[:-5]
622 self.file.write('PACK\0\0\0\2\0\0\0\0')
623 self.idx = list(list() for i in xrange(256))
625 def _raw_write(self, datalist, sha):
628 # in case we get interrupted (eg. KeyboardInterrupt), it's best if
629 # the file never has a *partial* blob. So let's make sure it's
630 # all-or-nothing. (The blob shouldn't be very big anyway, thanks
631 # to our hashsplit algorithm.) f.write() does its own buffering,
632 # but that's okay because we'll flush it in _end().
633 oneblob = ''.join(datalist)
637 raise GitError, e, sys.exc_info()[2]
639 crc = zlib.crc32(oneblob) & 0xffffffff
640 self._update_idx(sha, crc, nw)
645 def _update_idx(self, sha, crc, size):
648 self.idx[ord(sha[0])].append((sha, crc, self.file.tell() - size))
650 def _write(self, sha, type, content):
654 sha = calc_hash(type, content)
655 size, crc = self._raw_write(_encode_packobj(type, content,
656 self.compression_level),
658 if self.outbytes >= max_pack_size or self.count >= max_pack_objects:
662 def breakpoint(self):
663 """Clear byte and object counts and return the last processed id."""
664 id = self._end(self.run_midx)
665 self.outbytes = self.count = 0
668 def _require_objcache(self):
669 if self.objcache is None and self.objcache_maker:
670 self.objcache = self.objcache_maker()
671 if self.objcache is None:
673 "PackWriter not opened or can't check exists w/o objcache")
675 def exists(self, id, want_source=False):
676 """Return non-empty if an object is found in the object cache."""
677 self._require_objcache()
678 return self.objcache.exists(id, want_source=want_source)
680 def write(self, sha, type, content):
681 """Write an object to the pack file. Fails if sha exists()."""
682 self._write(sha, type, content)
684 def maybe_write(self, type, content):
685 """Write an object to the pack file if not present and return its id."""
686 sha = calc_hash(type, content)
687 if not self.exists(sha):
688 self.write(sha, type, content)
689 self._require_objcache()
690 self.objcache.add(sha)
693 def new_blob(self, blob):
694 """Create a blob object in the pack with the supplied content."""
695 return self.maybe_write('blob', blob)
697 def new_tree(self, shalist):
698 """Create a tree object in the pack."""
699 content = tree_encode(shalist)
700 return self.maybe_write('tree', content)
702 def new_commit(self, tree, parent,
703 author, adate_sec, adate_tz,
704 committer, cdate_sec, cdate_tz,
706 """Create a commit object in the pack. The date_sec values must be
707 epoch-seconds, and if a tz is None, the local timezone is assumed."""
709 adate_str = _git_date_str(adate_sec, adate_tz)
711 adate_str = _local_git_date_str(adate_sec)
713 cdate_str = _git_date_str(cdate_sec, cdate_tz)
715 cdate_str = _local_git_date_str(cdate_sec)
717 if tree: l.append('tree %s' % tree.encode('hex'))
718 if parent: l.append('parent %s' % parent.encode('hex'))
719 if author: l.append('author %s %s' % (author, adate_str))
720 if committer: l.append('committer %s %s' % (committer, cdate_str))
723 return self.maybe_write('commit', '\n'.join(l))
726 """Remove the pack file from disk."""
735 os.unlink(self.filename + '.pack')
742 def _end(self, run_midx=True):
744 if not f: return None
751 # update object count
753 cp = struct.pack('!i', self.count)
757 # calculate the pack sha1sum
760 for b in chunkyreader(f):
762 packbin = sum.digest()
764 fdatasync(f.fileno())
768 obj_list_sha = self._write_pack_idx_v2(self.filename + '.idx', idx, packbin)
770 nameprefix = repo('objects/pack/pack-%s' % obj_list_sha)
771 if os.path.exists(self.filename + '.map'):
772 os.unlink(self.filename + '.map')
773 os.rename(self.filename + '.pack', nameprefix + '.pack')
774 os.rename(self.filename + '.idx', nameprefix + '.idx')
776 os.fsync(self.parentfd)
778 os.close(self.parentfd)
781 auto_midx(repo('objects/pack'))
783 if self.on_pack_finish:
784 self.on_pack_finish(nameprefix)
788 def close(self, run_midx=True):
789 """Close the pack file and move it to its definitive path."""
790 return self._end(run_midx=run_midx)
792 def _write_pack_idx_v2(self, filename, idx, packbin):
795 for entry in section:
796 if entry[2] >= 2**31:
799 # Length: header + fan-out + shas-and-crcs + overflow-offsets
800 index_len = 8 + (4 * 256) + (28 * self.count) + (8 * ofs64_count)
802 idx_f = open(filename, 'w+b')
804 idx_f.truncate(index_len)
805 fdatasync(idx_f.fileno())
806 idx_map = mmap_readwrite(idx_f, close=False)
808 count = _helpers.write_idx(filename, idx_map, idx, self.count)
809 assert(count == self.count)
816 idx_f = open(filename, 'a+b')
821 b = idx_f.read(8 + 4*256)
824 obj_list_sum = Sha1()
825 for b in chunkyreader(idx_f, 20*self.count):
827 obj_list_sum.update(b)
828 namebase = obj_list_sum.hexdigest()
830 for b in chunkyreader(idx_f):
832 idx_f.write(idx_sum.digest())
833 fdatasync(idx_f.fileno())
839 def _gitenv(repo_dir = None):
843 os.environ['GIT_DIR'] = os.path.abspath(repo_dir)
847 def list_refs(refname=None, repo_dir=None,
848 limit_to_heads=False, limit_to_tags=False):
849 """Yield (refname, hash) tuples for all repository refs unless a ref
850 name is specified. Given a ref name, only include tuples for that
851 particular ref. The limits restrict the result items to
852 refs/heads or refs/tags. If both limits are specified, items from
853 both sources will be included.
856 argv = ['git', 'show-ref']
858 argv.append('--heads')
860 argv.append('--tags')
864 p = subprocess.Popen(argv,
865 preexec_fn = _gitenv(repo_dir),
866 stdout = subprocess.PIPE)
867 out = p.stdout.read().strip()
868 rv = p.wait() # not fatal
872 for d in out.split('\n'):
873 (sha, name) = d.split(' ', 1)
874 yield (name, sha.decode('hex'))
877 def read_ref(refname, repo_dir = None):
878 """Get the commit id of the most recent commit made on a given ref."""
879 refs = list_refs(refname, repo_dir=repo_dir, limit_to_heads=True)
880 l = tuple(islice(refs, 2))
888 def rev_list(ref, count=None, repo_dir=None):
889 """Generate a list of reachable commits in reverse chronological order.
891 This generator walks through commits, from child to parent, that are
892 reachable via the specified ref and yields a series of tuples of the form
895 If count is a non-zero integer, limit the number of commits to "count"
898 assert(not ref.startswith('-'))
901 opts += ['-n', str(atoi(count))]
902 argv = ['git', 'rev-list', '--pretty=format:%at'] + opts + [ref, '--']
903 p = subprocess.Popen(argv,
904 preexec_fn = _gitenv(repo_dir),
905 stdout = subprocess.PIPE)
909 if s.startswith('commit '):
910 commit = s[7:].decode('hex')
914 rv = p.wait() # not fatal
916 raise GitError, 'git rev-list returned error %d' % rv
919 def get_commit_dates(refs, repo_dir=None):
920 """Get the dates for the specified commit refs. For now, every unique
921 string in refs must resolve to a different commit or this
922 function will fail."""
925 commit = get_commit_items(ref, cp(repo_dir))
926 result.append(commit.author_sec)
930 def rev_parse(committish, repo_dir=None):
931 """Resolve the full hash for 'committish', if it exists.
933 Should be roughly equivalent to 'git rev-parse'.
935 Returns the hex value of the hash if it is found, None if 'committish' does
936 not correspond to anything.
938 head = read_ref(committish, repo_dir=repo_dir)
940 debug2("resolved from ref: commit = %s\n" % head.encode('hex'))
943 pL = PackIdxList(repo('objects/pack', repo_dir=repo_dir))
945 if len(committish) == 40:
947 hash = committish.decode('hex')
957 def update_ref(refname, newval, oldval, repo_dir=None):
958 """Update a repository reference."""
961 assert(refname.startswith('refs/heads/') \
962 or refname.startswith('refs/tags/'))
963 p = subprocess.Popen(['git', 'update-ref', refname,
964 newval.encode('hex'), oldval.encode('hex')],
965 preexec_fn = _gitenv(repo_dir))
966 _git_wait('git update-ref', p)
969 def delete_ref(refname, oldvalue=None):
970 """Delete a repository reference (see git update-ref(1))."""
971 assert(refname.startswith('refs/'))
972 oldvalue = [] if not oldvalue else [oldvalue]
973 p = subprocess.Popen(['git', 'update-ref', '-d', refname] + oldvalue,
974 preexec_fn = _gitenv())
975 _git_wait('git update-ref', p)
978 def guess_repo(path=None):
979 """Set the path value in the global variable "repodir".
980 This makes bup look for an existing bup repository, but not fail if a
981 repository doesn't exist. Usually, if you are interacting with a bup
982 repository, you would not be calling this function but using
989 repodir = os.environ.get('BUP_DIR')
991 repodir = os.path.expanduser('~/.bup')
994 def init_repo(path=None):
995 """Create the Git bare repository for bup in a given path."""
997 d = repo() # appends a / to the path
998 parent = os.path.dirname(os.path.dirname(d))
999 if parent and not os.path.exists(parent):
1000 raise GitError('parent directory "%s" does not exist\n' % parent)
1001 if os.path.exists(d) and not os.path.isdir(os.path.join(d, '.')):
1002 raise GitError('"%s" exists but is not a directory\n' % d)
1003 p = subprocess.Popen(['git', '--bare', 'init'], stdout=sys.stderr,
1004 preexec_fn = _gitenv())
1005 _git_wait('git init', p)
1006 # Force the index version configuration in order to ensure bup works
1007 # regardless of the version of the installed Git binary.
1008 p = subprocess.Popen(['git', 'config', 'pack.indexVersion', '2'],
1009 stdout=sys.stderr, preexec_fn = _gitenv())
1010 _git_wait('git config', p)
1012 p = subprocess.Popen(['git', 'config', 'core.logAllRefUpdates', 'true'],
1013 stdout=sys.stderr, preexec_fn = _gitenv())
1014 _git_wait('git config', p)
1017 def check_repo_or_die(path=None):
1018 """Make sure a bup repository exists, and abort if not.
1019 If the path to a particular repository was not specified, this function
1020 initializes the default repository automatically.
1024 os.stat(repo('objects/pack/.'))
1025 except OSError as e:
1026 if e.errno == errno.ENOENT:
1027 log('error: %r is not a bup repository; run "bup init"\n'
1031 log('error: %s\n' % e)
1037 """Get Git's version and ensure a usable version is installed.
1039 The returned version is formatted as an ordered tuple with each position
1040 representing a digit in the version tag. For example, the following tuple
1041 would represent version 1.6.6.9:
1043 ('1', '6', '6', '9')
1047 p = subprocess.Popen(['git', '--version'],
1048 stdout=subprocess.PIPE)
1049 gvs = p.stdout.read()
1050 _git_wait('git --version', p)
1051 m = re.match(r'git version (\S+.\S+)', gvs)
1053 raise GitError('git --version weird output: %r' % gvs)
1054 _ver = tuple(m.group(1).split('.'))
1055 needed = ('1','5', '3', '1')
1057 raise GitError('git version %s or higher is required; you have %s'
1058 % ('.'.join(needed), '.'.join(_ver)))
1062 def _git_wait(cmd, p):
1065 raise GitError('%s returned %d' % (cmd, rv))
1068 def _git_capture(argv):
1069 p = subprocess.Popen(argv, stdout=subprocess.PIPE, preexec_fn = _gitenv())
1071 _git_wait(repr(argv), p)
1075 class _AbortableIter:
1076 def __init__(self, it, onabort = None):
1078 self.onabort = onabort
1086 return self.it.next()
1087 except StopIteration as e:
1095 """Abort iteration and call the abortion callback, if needed."""
1105 class MissingObject(KeyError):
1106 def __init__(self, id):
1108 KeyError.__init__(self, 'object %r is missing' % id.encode('hex'))
1113 """Link to 'git cat-file' that is used to retrieve blob data."""
1114 def __init__(self, repo_dir = None):
1116 self.repo_dir = repo_dir
1117 wanted = ('1','5','6')
1120 log('warning: git version < %s; bup will be slow.\n'
1123 self.get = self._slow_get
1125 self.p = self.inprogress = None
1126 self.get = self._fast_get
1130 self.p.stdout.close()
1131 self.p.stdin.close()
1133 self.inprogress = None
1137 self.p = subprocess.Popen(['git', 'cat-file', '--batch'],
1138 stdin=subprocess.PIPE,
1139 stdout=subprocess.PIPE,
1142 preexec_fn = _gitenv(self.repo_dir))
1144 def _fast_get(self, id):
1145 if not self.p or self.p.poll() != None:
1148 poll_result = self.p.poll()
1149 assert(poll_result == None)
1151 log('_fast_get: opening %r while %r is open\n'
1152 % (id, self.inprogress))
1153 assert(not self.inprogress)
1154 assert(id.find('\n') < 0)
1155 assert(id.find('\r') < 0)
1156 assert(not id.startswith('-'))
1157 self.inprogress = id
1158 self.p.stdin.write('%s\n' % id)
1159 self.p.stdin.flush()
1160 hdr = self.p.stdout.readline()
1161 if hdr.endswith(' missing\n'):
1162 self.inprogress = None
1163 raise MissingObject(id.decode('hex'))
1164 spl = hdr.split(' ')
1165 if len(spl) != 3 or len(spl[0]) != 40:
1166 raise GitError('expected blob, got %r' % spl)
1167 (hex, type, size) = spl
1169 it = _AbortableIter(chunkyreader(self.p.stdout, int(spl[2])),
1170 onabort = self._abort)
1175 readline_result = self.p.stdout.readline()
1176 assert(readline_result == '\n')
1177 self.inprogress = None
1178 except Exception as e:
1182 def _slow_get(self, id):
1183 assert(id.find('\n') < 0)
1184 assert(id.find('\r') < 0)
1185 assert(id[0] != '-')
1186 type = _git_capture(['git', 'cat-file', '-t', id]).strip()
1189 p = subprocess.Popen(['git', 'cat-file', type, id],
1190 stdout=subprocess.PIPE,
1191 preexec_fn = _gitenv(self.repo_dir))
1192 for blob in chunkyreader(p.stdout):
1194 _git_wait('git cat-file', p)
1196 def _join(self, it):
1201 elif type == 'tree':
1202 treefile = ''.join(it)
1203 for (mode, name, sha) in tree_decode(treefile):
1204 for blob in self.join(sha.encode('hex')):
1206 elif type == 'commit':
1207 treeline = ''.join(it).split('\n')[0]
1208 assert(treeline.startswith('tree '))
1209 for blob in self.join(treeline[5:]):
1212 raise GitError('invalid object type %r: expected blob/tree/commit'
1216 """Generate a list of the content of all blobs that can be reached
1217 from an object. The hash given in 'id' must point to a blob, a tree
1218 or a commit. The content of all blobs that can be seen from trees or
1219 commits will be added to the list.
1222 for d in self._join(self.get(id)):
1224 except StopIteration:
1230 def cp(repo_dir=None):
1231 """Create a CatPipe object or reuse the already existing one."""
1235 repo_dir = os.path.abspath(repo_dir)
1236 cp = _cp.get(repo_dir)
1238 cp = CatPipe(repo_dir)
1243 def tags(repo_dir = None):
1244 """Return a dictionary of all tags in the form {hash: [tag_names, ...]}."""
1246 for n, c in list_refs(repo_dir = repo_dir, limit_to_tags=True):
1247 assert(n.startswith('refs/tags/'))
1251 tags[c].append(name) # more than one tag can point at 'c'
1255 WalkItem = namedtuple('WalkItem', ['id', 'type', 'mode',
1256 'path', 'chunk_path', 'data'])
1257 # The path is the mangled path, and if an item represents a fragment
1258 # of a chunked file, the chunk_path will be the chunked subtree path
1259 # for the chunk, i.e. ['', '2d3115e', ...]. The top-level path for a
1260 # chunked file will have a chunk_path of ['']. So some chunk subtree
1261 # of the file '/foo/bar/baz' might look like this:
1263 # item.path = ['foo', 'bar', 'baz.bup']
1264 # item.chunk_path = ['', '2d3115e', '016b097']
1265 # item.type = 'tree'
1269 def _walk_object(cat_pipe, id,
1270 parent_path, chunk_path,
1275 if stop_at and stop_at(id):
1278 item_it = cat_pipe.get(id) # FIXME: use include_data
1279 type = item_it.next()
1281 if type not in ('blob', 'commit', 'tree'):
1282 raise Exception('unexpected repository object type %r' % type)
1284 # FIXME: set the mode based on the type when the mode is None
1286 if type == 'blob' and not include_data:
1287 # Dump data until we can ask cat_pipe not to fetch it
1288 for ignored in item_it:
1292 data = ''.join(item_it)
1294 yield WalkItem(id=id, type=type,
1295 chunk_path=chunk_path, path=parent_path,
1297 data=(data if include_data else None))
1299 if type == 'commit':
1300 commit_items = parse_commit(data)
1301 tree_id = commit_items.tree
1302 for x in _walk_object(cat_pipe, tree_id, parent_path, chunk_path,
1303 mode=hashsplit.GIT_MODE_TREE,
1305 include_data=include_data):
1307 parents = commit_items.parents
1309 for x in _walk_object(cat_pipe, pid, parent_path, chunk_path,
1310 mode=mode, # Same mode as this child
1312 include_data=include_data):
1314 elif type == 'tree':
1315 for mode, name, ent_id in tree_decode(data):
1316 demangled, bup_type = demangle_name(name, mode)
1318 sub_path = parent_path
1319 sub_chunk_path = chunk_path + [name]
1321 sub_path = parent_path + [name]
1322 if bup_type == BUP_CHUNKED:
1323 sub_chunk_path = ['']
1325 sub_chunk_path = chunk_path
1326 for x in _walk_object(cat_pipe, ent_id.encode('hex'),
1327 sub_path, sub_chunk_path,
1330 include_data=include_data):
1334 def walk_object(cat_pipe, id,
1337 """Yield everything reachable from id via cat_pipe as a WalkItem,
1338 stopping whenever stop_at(id) returns true. Throw MissingObject
1339 if a hash encountered is missing from the repository.
1342 return _walk_object(cat_pipe, id, [], [],
1344 include_data=include_data)