1 """Git interaction library.
2 bup repositories are in Git format. This library allows us to
3 interact with the Git data structures.
6 import errno, os, sys, zlib, time, subprocess, struct, stat, re, tempfile, glob
7 from collections import namedtuple
8 from itertools import islice
10 from bup import _helpers, hashsplit, path, midx, bloom, xstat
11 from bup.helpers import (Sha1, add_error, chunkyreader, debug1, debug2,
13 hostname, localtime, log, merge_iter,
14 mmap_read, mmap_readwrite,
15 progress, qprogress, unlink, username, userfullname,
19 max_pack_size = 1000*1000*1000 # larger packs will slow down pruning
20 max_pack_objects = 200*1000 # cache memory usage is about 83 bytes per object
26 _typemap = { 'blob':3, 'tree':2, 'commit':1, 'tag':4 }
27 _typermap = { 3:'blob', 2:'tree', 1:'commit', 4:'tag' }
33 class GitError(Exception):
37 def parse_tz_offset(s):
38 """UTC offset in seconds."""
39 tz_off = (int(s[1:3]) * 60 * 60) + (int(s[3:5]) * 60)
45 # FIXME: derived from http://git.rsbx.net/Documents/Git_Data_Formats.txt
46 # Make sure that's authoritative.
47 _start_end_char = r'[^ .,:;<>"\'\0\n]'
48 _content_char = r'[^\0\n<>]'
49 _safe_str_rx = '(?:%s{1,2}|(?:%s%s*%s))' \
51 _start_end_char, _content_char, _start_end_char)
52 _tz_rx = r'[-+]\d\d[0-5]\d'
53 _parent_rx = r'(?:parent [abcdefABCDEF0123456789]{40}\n)'
54 _commit_rx = re.compile(r'''tree (?P<tree>[abcdefABCDEF0123456789]{40})
55 (?P<parents>%s*)author (?P<author_name>%s) <(?P<author_mail>%s)> (?P<asec>\d+) (?P<atz>%s)
56 committer (?P<committer_name>%s) <(?P<committer_mail>%s)> (?P<csec>\d+) (?P<ctz>%s)
58 (?P<message>(?:.|\n)*)''' % (_parent_rx,
59 _safe_str_rx, _safe_str_rx, _tz_rx,
60 _safe_str_rx, _safe_str_rx, _tz_rx))
61 _parent_hash_rx = re.compile(r'\s*parent ([abcdefABCDEF0123456789]{40})\s*')
64 # Note that the author_sec and committer_sec values are (UTC) epoch seconds.
65 CommitInfo = namedtuple('CommitInfo', ['tree', 'parents',
66 'author_name', 'author_mail',
67 'author_sec', 'author_offset',
68 'committer_name', 'committer_mail',
69 'committer_sec', 'committer_offset',
72 def parse_commit(content):
73 commit_match = re.match(_commit_rx, content)
75 raise Exception('cannot parse commit %r' % content)
76 matches = commit_match.groupdict()
77 return CommitInfo(tree=matches['tree'],
78 parents=re.findall(_parent_hash_rx, matches['parents']),
79 author_name=matches['author_name'],
80 author_mail=matches['author_mail'],
81 author_sec=int(matches['asec']),
82 author_offset=parse_tz_offset(matches['atz']),
83 committer_name=matches['committer_name'],
84 committer_mail=matches['committer_mail'],
85 committer_sec=int(matches['csec']),
86 committer_offset=parse_tz_offset(matches['ctz']),
87 message=matches['message'])
90 def get_commit_items(id, cp):
91 commit_it = cp.get(id)
92 assert(commit_it.next() == 'commit')
93 commit_content = ''.join(commit_it)
94 return parse_commit(commit_content)
97 def _local_git_date_str(epoch_sec):
98 return '%d %s' % (epoch_sec, utc_offset_str(epoch_sec))
101 def _git_date_str(epoch_sec, tz_offset_sec):
102 offs = tz_offset_sec // 60
103 return '%d %s%02d%02d' \
105 '+' if offs >= 0 else '-',
110 def repo(sub = '', repo_dir=None):
111 """Get the path to the git repository or one of its subdirectories."""
113 repo_dir = repo_dir or repodir
115 raise GitError('You should call check_repo_or_die()')
117 # If there's a .git subdirectory, then the actual repo is in there.
118 gd = os.path.join(repo_dir, '.git')
119 if os.path.exists(gd):
122 return os.path.join(repo_dir, sub)
126 return re.sub(r'([^0-9a-z]|\b)([0-9a-z]{7})[0-9a-z]{33}([^0-9a-z]|\b)',
131 full = os.path.abspath(path)
132 fullrepo = os.path.abspath(repo(''))
133 if not fullrepo.endswith('/'):
135 if full.startswith(fullrepo):
136 path = full[len(fullrepo):]
137 if path.startswith('index-cache/'):
138 path = path[len('index-cache/'):]
139 return shorten_hash(path)
143 paths = [repo('objects/pack')]
144 paths += glob.glob(repo('index-cache/*/.'))
148 def auto_midx(objdir):
149 args = [path.exe(), 'midx', '--auto', '--dir', objdir]
151 rv = subprocess.call(args, stdout=open('/dev/null', 'w'))
153 # make sure 'args' gets printed to help with debugging
154 add_error('%r: exception: %s' % (args, e))
157 add_error('%r: returned %d' % (args, rv))
159 args = [path.exe(), 'bloom', '--dir', objdir]
161 rv = subprocess.call(args, stdout=open('/dev/null', 'w'))
163 # make sure 'args' gets printed to help with debugging
164 add_error('%r: exception: %s' % (args, e))
167 add_error('%r: returned %d' % (args, rv))
170 def mangle_name(name, mode, gitmode):
171 """Mangle a file name to present an abstract name for segmented files.
172 Mangled file names will have the ".bup" extension added to them. If a
173 file's name already ends with ".bup", a ".bupl" extension is added to
174 disambiguate normal files from segmented ones.
176 if stat.S_ISREG(mode) and not stat.S_ISREG(gitmode):
177 assert(stat.S_ISDIR(gitmode))
179 elif name.endswith('.bup') or name[:-1].endswith('.bup'):
180 return name + '.bupl'
185 (BUP_NORMAL, BUP_CHUNKED) = (0,1)
186 def demangle_name(name, mode):
187 """Remove name mangling from a file name, if necessary.
189 The return value is a tuple (demangled_filename,mode), where mode is one of
192 * BUP_NORMAL : files that should be read as-is from the repository
193 * BUP_CHUNKED : files that were chunked and need to be reassembled
195 For more information on the name mangling algorithm, see mangle_name()
197 if name.endswith('.bupl'):
198 return (name[:-5], BUP_NORMAL)
199 elif name.endswith('.bup'):
200 return (name[:-4], BUP_CHUNKED)
201 elif name.endswith('.bupm'):
203 BUP_CHUNKED if stat.S_ISDIR(mode) else BUP_NORMAL)
205 return (name, BUP_NORMAL)
208 def calc_hash(type, content):
209 """Calculate some content's hash in the Git fashion."""
210 header = '%s %d\0' % (type, len(content))
216 def shalist_item_sort_key(ent):
217 (mode, name, id) = ent
218 assert(mode+0 == mode)
219 if stat.S_ISDIR(mode):
225 def tree_encode(shalist):
226 """Generate a git tree object from (mode,name,hash) tuples."""
227 shalist = sorted(shalist, key = shalist_item_sort_key)
229 for (mode,name,bin) in shalist:
231 assert(mode+0 == mode)
233 assert(len(bin) == 20)
234 s = '%o %s\0%s' % (mode,name,bin)
235 assert(s[0] != '0') # 0-padded octal is not acceptable in a git tree
240 def tree_decode(buf):
241 """Generate a list of (mode,name,hash) from the git tree object in buf."""
243 while ofs < len(buf):
244 z = buf.find('\0', ofs)
246 spl = buf[ofs:z].split(' ', 1)
247 assert(len(spl) == 2)
249 sha = buf[z+1:z+1+20]
251 yield (int(mode, 8), name, sha)
254 def _encode_packobj(type, content, compression_level=1):
257 szbits = (sz & 0x0f) | (_typemap[type]<<4)
260 if sz: szbits |= 0x80
266 if compression_level > 9:
267 compression_level = 9
268 elif compression_level < 0:
269 compression_level = 0
270 z = zlib.compressobj(compression_level)
272 yield z.compress(content)
276 def _encode_looseobj(type, content, compression_level=1):
277 z = zlib.compressobj(compression_level)
278 yield z.compress('%s %d\0' % (type, len(content)))
279 yield z.compress(content)
283 def _decode_looseobj(buf):
285 s = zlib.decompress(buf)
292 assert(type in _typemap)
293 assert(sz == len(content))
294 return (type, content)
297 def _decode_packobj(buf):
300 type = _typermap[(c & 0x70) >> 4]
307 sz |= (c & 0x7f) << shift
311 return (type, zlib.decompress(buf[i+1:]))
318 def find_offset(self, hash):
319 """Get the offset of an object inside the index file."""
320 idx = self._idx_from_hash(hash)
322 return self._ofs_from_idx(idx)
325 def exists(self, hash, want_source=False):
326 """Return nonempty if the object exists in this index."""
327 if hash and (self._idx_from_hash(hash) != None):
328 return want_source and os.path.basename(self.name) or True
332 return int(self.fanout[255])
334 def _idx_from_hash(self, hash):
335 global _total_searches, _total_steps
337 assert(len(hash) == 20)
339 start = self.fanout[b1-1] # range -1..254
340 end = self.fanout[b1] # range 0..255
342 _total_steps += 1 # lookup table is a step
345 mid = start + (end-start)/2
346 v = self._idx_to_hash(mid)
356 class PackIdxV1(PackIdx):
357 """Object representation of a Git pack index (version 1) file."""
358 def __init__(self, filename, f):
360 self.idxnames = [self.name]
361 self.map = mmap_read(f)
362 self.fanout = list(struct.unpack('!256I',
363 str(buffer(self.map, 0, 256*4))))
364 self.fanout.append(0) # entry "-1"
365 nsha = self.fanout[255]
367 self.shatable = buffer(self.map, self.sha_ofs, nsha*24)
369 def _ofs_from_idx(self, idx):
370 return struct.unpack('!I', str(self.shatable[idx*24 : idx*24+4]))[0]
372 def _idx_to_hash(self, idx):
373 return str(self.shatable[idx*24+4 : idx*24+24])
376 for i in xrange(self.fanout[255]):
377 yield buffer(self.map, 256*4 + 24*i + 4, 20)
380 class PackIdxV2(PackIdx):
381 """Object representation of a Git pack index (version 2) file."""
382 def __init__(self, filename, f):
384 self.idxnames = [self.name]
385 self.map = mmap_read(f)
386 assert(str(self.map[0:8]) == '\377tOc\0\0\0\2')
387 self.fanout = list(struct.unpack('!256I',
388 str(buffer(self.map, 8, 256*4))))
389 self.fanout.append(0) # entry "-1"
390 nsha = self.fanout[255]
391 self.sha_ofs = 8 + 256*4
392 self.shatable = buffer(self.map, self.sha_ofs, nsha*20)
393 self.ofstable = buffer(self.map,
394 self.sha_ofs + nsha*20 + nsha*4,
396 self.ofs64table = buffer(self.map,
397 8 + 256*4 + nsha*20 + nsha*4 + nsha*4)
399 def _ofs_from_idx(self, idx):
400 ofs = struct.unpack('!I', str(buffer(self.ofstable, idx*4, 4)))[0]
402 idx64 = ofs & 0x7fffffff
403 ofs = struct.unpack('!Q',
404 str(buffer(self.ofs64table, idx64*8, 8)))[0]
407 def _idx_to_hash(self, idx):
408 return str(self.shatable[idx*20:(idx+1)*20])
411 for i in xrange(self.fanout[255]):
412 yield buffer(self.map, 8 + 256*4 + 20*i, 20)
417 def __init__(self, dir):
419 assert(_mpi_count == 0) # these things suck tons of VM; don't waste it
424 self.do_bloom = False
431 assert(_mpi_count == 0)
434 return iter(idxmerge(self.packs))
437 return sum(len(pack) for pack in self.packs)
439 def exists(self, hash, want_source=False):
440 """Return nonempty if the object exists in the index files."""
441 global _total_searches
443 if hash in self.also:
445 if self.do_bloom and self.bloom:
446 if self.bloom.exists(hash):
447 self.do_bloom = False
449 _total_searches -= 1 # was counted by bloom
451 for i in xrange(len(self.packs)):
453 _total_searches -= 1 # will be incremented by sub-pack
454 ix = p.exists(hash, want_source=want_source)
456 # reorder so most recently used packs are searched first
457 self.packs = [p] + self.packs[:i] + self.packs[i+1:]
462 def refresh(self, skip_midx = False):
463 """Refresh the index list.
464 This method verifies if .midx files were superseded (e.g. all of its
465 contents are in another, bigger .midx file) and removes the superseded
468 If skip_midx is True, all work on .midx files will be skipped and .midx
469 files will be removed from the list.
471 The module-global variable 'ignore_midx' can force this function to
472 always act as if skip_midx was True.
474 self.bloom = None # Always reopen the bloom as it may have been relaced
475 self.do_bloom = False
476 skip_midx = skip_midx or ignore_midx
477 d = dict((p.name, p) for p in self.packs
478 if not skip_midx or not isinstance(p, midx.PackMidx))
479 if os.path.exists(self.dir):
482 for ix in self.packs:
483 if isinstance(ix, midx.PackMidx):
484 for name in ix.idxnames:
485 d[os.path.join(self.dir, name)] = ix
486 for full in glob.glob(os.path.join(self.dir,'*.midx')):
488 mx = midx.PackMidx(full)
489 (mxd, mxf) = os.path.split(mx.name)
491 for n in mx.idxnames:
492 if not os.path.exists(os.path.join(mxd, n)):
493 log(('warning: index %s missing\n' +
494 ' used by %s\n') % (n, mxf))
502 midxl.sort(key=lambda ix:
503 (-len(ix), -xstat.stat(ix.name).st_mtime))
506 for sub in ix.idxnames:
507 found = d.get(os.path.join(self.dir, sub))
508 if not found or isinstance(found, PackIdx):
509 # doesn't exist, or exists but not in a midx
514 for name in ix.idxnames:
515 d[os.path.join(self.dir, name)] = ix
516 elif not ix.force_keep:
517 debug1('midx: removing redundant: %s\n'
518 % os.path.basename(ix.name))
521 for full in glob.glob(os.path.join(self.dir,'*.idx')):
525 except GitError as e:
529 bfull = os.path.join(self.dir, 'bup.bloom')
530 if self.bloom is None and os.path.exists(bfull):
531 self.bloom = bloom.ShaBloom(bfull)
532 self.packs = list(set(d.values()))
533 self.packs.sort(lambda x,y: -cmp(len(x),len(y)))
534 if self.bloom and self.bloom.valid() and len(self.bloom) >= len(self):
538 debug1('PackIdxList: using %d index%s.\n'
539 % (len(self.packs), len(self.packs)!=1 and 'es' or ''))
542 """Insert an additional object in the list."""
546 def open_idx(filename):
547 if filename.endswith('.idx'):
548 f = open(filename, 'rb')
550 if header[0:4] == '\377tOc':
551 version = struct.unpack('!I', header[4:8])[0]
553 return PackIdxV2(filename, f)
555 raise GitError('%s: expected idx file version 2, got %d'
556 % (filename, version))
557 elif len(header) == 8 and header[0:4] < '\377tOc':
558 return PackIdxV1(filename, f)
560 raise GitError('%s: unrecognized idx file header' % filename)
561 elif filename.endswith('.midx'):
562 return midx.PackMidx(filename)
564 raise GitError('idx filenames must end with .idx or .midx')
567 def idxmerge(idxlist, final_progress=True):
568 """Generate a list of all the objects reachable in a PackIdxList."""
569 def pfunc(count, total):
570 qprogress('Reading indexes: %.2f%% (%d/%d)\r'
571 % (count*100.0/total, count, total))
572 def pfinal(count, total):
574 progress('Reading indexes: %.2f%% (%d/%d), done.\n'
575 % (100, total, total))
576 return merge_iter(idxlist, 10024, pfunc, pfinal)
579 def _make_objcache():
580 return PackIdxList(repo('objects/pack'))
582 # bup-gc assumes that it can disable all PackWriter activities
583 # (bloom/midx/cache) via the constructor and close() arguments.
586 """Writes Git objects inside a pack file."""
587 def __init__(self, objcache_maker=_make_objcache, compression_level=1,
588 run_midx=True, on_pack_finish=None):
595 self.objcache_maker = objcache_maker
597 self.compression_level = compression_level
598 self.run_midx=run_midx
599 self.on_pack_finish = on_pack_finish
606 objdir = dir=repo('objects')
607 fd, name = tempfile.mkstemp(suffix='.pack', dir=objdir)
609 self.file = os.fdopen(fd, 'w+b')
614 self.parentfd = os.open(objdir, os.O_RDONLY)
620 assert(name.endswith('.pack'))
621 self.filename = name[:-5]
622 self.file.write('PACK\0\0\0\2\0\0\0\0')
623 self.idx = list(list() for i in xrange(256))
625 def _raw_write(self, datalist, sha):
628 # in case we get interrupted (eg. KeyboardInterrupt), it's best if
629 # the file never has a *partial* blob. So let's make sure it's
630 # all-or-nothing. (The blob shouldn't be very big anyway, thanks
631 # to our hashsplit algorithm.) f.write() does its own buffering,
632 # but that's okay because we'll flush it in _end().
633 oneblob = ''.join(datalist)
637 raise GitError, e, sys.exc_info()[2]
639 crc = zlib.crc32(oneblob) & 0xffffffff
640 self._update_idx(sha, crc, nw)
645 def _update_idx(self, sha, crc, size):
648 self.idx[ord(sha[0])].append((sha, crc, self.file.tell() - size))
650 def _write(self, sha, type, content):
654 sha = calc_hash(type, content)
655 size, crc = self._raw_write(_encode_packobj(type, content,
656 self.compression_level),
658 if self.outbytes >= max_pack_size or self.count >= max_pack_objects:
662 def breakpoint(self):
663 """Clear byte and object counts and return the last processed id."""
664 id = self._end(self.run_midx)
665 self.outbytes = self.count = 0
668 def _require_objcache(self):
669 if self.objcache is None and self.objcache_maker:
670 self.objcache = self.objcache_maker()
671 if self.objcache is None:
673 "PackWriter not opened or can't check exists w/o objcache")
675 def exists(self, id, want_source=False):
676 """Return non-empty if an object is found in the object cache."""
677 self._require_objcache()
678 return self.objcache.exists(id, want_source=want_source)
680 def write(self, sha, type, content):
681 """Write an object to the pack file. Fails if sha exists()."""
682 self._write(sha, type, content)
684 def maybe_write(self, type, content):
685 """Write an object to the pack file if not present and return its id."""
686 sha = calc_hash(type, content)
687 if not self.exists(sha):
688 self.write(sha, type, content)
689 self._require_objcache()
690 self.objcache.add(sha)
693 def new_blob(self, blob):
694 """Create a blob object in the pack with the supplied content."""
695 return self.maybe_write('blob', blob)
697 def new_tree(self, shalist):
698 """Create a tree object in the pack."""
699 content = tree_encode(shalist)
700 return self.maybe_write('tree', content)
702 def new_commit(self, tree, parent,
703 author, adate_sec, adate_tz,
704 committer, cdate_sec, cdate_tz,
706 """Create a commit object in the pack. The date_sec values must be
707 epoch-seconds, and if a tz is None, the local timezone is assumed."""
709 adate_str = _git_date_str(adate_sec, adate_tz)
711 adate_str = _local_git_date_str(adate_sec)
713 cdate_str = _git_date_str(cdate_sec, cdate_tz)
715 cdate_str = _local_git_date_str(cdate_sec)
717 if tree: l.append('tree %s' % tree.encode('hex'))
718 if parent: l.append('parent %s' % parent.encode('hex'))
719 if author: l.append('author %s %s' % (author, adate_str))
720 if committer: l.append('committer %s %s' % (committer, cdate_str))
723 return self.maybe_write('commit', '\n'.join(l))
726 """Remove the pack file from disk."""
735 os.unlink(self.filename + '.pack')
742 def _end(self, run_midx=True):
744 if not f: return None
751 # update object count
753 cp = struct.pack('!i', self.count)
757 # calculate the pack sha1sum
760 for b in chunkyreader(f):
762 packbin = sum.digest()
764 fdatasync(f.fileno())
768 obj_list_sha = self._write_pack_idx_v2(self.filename + '.idx', idx, packbin)
770 nameprefix = repo('objects/pack/pack-%s' % obj_list_sha)
771 if os.path.exists(self.filename + '.map'):
772 os.unlink(self.filename + '.map')
773 os.rename(self.filename + '.pack', nameprefix + '.pack')
774 os.rename(self.filename + '.idx', nameprefix + '.idx')
776 os.fsync(self.parentfd)
778 os.close(self.parentfd)
781 auto_midx(repo('objects/pack'))
783 if self.on_pack_finish:
784 self.on_pack_finish(nameprefix)
788 def close(self, run_midx=True):
789 """Close the pack file and move it to its definitive path."""
790 return self._end(run_midx=run_midx)
792 def _write_pack_idx_v2(self, filename, idx, packbin):
795 for entry in section:
796 if entry[2] >= 2**31:
799 # Length: header + fan-out + shas-and-crcs + overflow-offsets
800 index_len = 8 + (4 * 256) + (28 * self.count) + (8 * ofs64_count)
802 idx_f = open(filename, 'w+b')
804 idx_f.truncate(index_len)
805 fdatasync(idx_f.fileno())
806 idx_map = mmap_readwrite(idx_f, close=False)
808 count = _helpers.write_idx(filename, idx_map, idx, self.count)
809 assert(count == self.count)
816 idx_f = open(filename, 'a+b')
821 b = idx_f.read(8 + 4*256)
824 obj_list_sum = Sha1()
825 for b in chunkyreader(idx_f, 20*self.count):
827 obj_list_sum.update(b)
828 namebase = obj_list_sum.hexdigest()
830 for b in chunkyreader(idx_f):
832 idx_f.write(idx_sum.digest())
833 fdatasync(idx_f.fileno())
839 def _gitenv(repo_dir = None):
843 os.environ['GIT_DIR'] = os.path.abspath(repo_dir)
847 def list_refs(refname=None, repo_dir=None,
848 limit_to_heads=False, limit_to_tags=False):
849 """Yield (refname, hash) tuples for all repository refs unless a ref
850 name is specified. Given a ref name, only include tuples for that
851 particular ref. The limits restrict the result items to
852 refs/heads or refs/tags. If both limits are specified, items from
853 both sources will be included.
856 argv = ['git', 'show-ref']
858 argv.append('--heads')
860 argv.append('--tags')
864 p = subprocess.Popen(argv,
865 preexec_fn = _gitenv(repo_dir),
866 stdout = subprocess.PIPE)
867 out = p.stdout.read().strip()
868 rv = p.wait() # not fatal
872 for d in out.split('\n'):
873 (sha, name) = d.split(' ', 1)
874 yield (name, sha.decode('hex'))
877 def read_ref(refname, repo_dir = None):
878 """Get the commit id of the most recent commit made on a given ref."""
879 refs = list_refs(refname, repo_dir=repo_dir, limit_to_heads=True)
880 l = tuple(islice(refs, 2))
888 def rev_list(ref, count=None, repo_dir=None):
889 """Generate a list of reachable commits in reverse chronological order.
891 This generator walks through commits, from child to parent, that are
892 reachable via the specified ref and yields a series of tuples of the form
895 If count is a non-zero integer, limit the number of commits to "count"
898 assert(not ref.startswith('-'))
901 opts += ['-n', str(atoi(count))]
902 argv = ['git', 'rev-list', '--pretty=format:%at'] + opts + [ref, '--']
903 p = subprocess.Popen(argv,
904 preexec_fn = _gitenv(repo_dir),
905 stdout = subprocess.PIPE)
909 if s.startswith('commit '):
910 commit = s[7:].decode('hex')
914 rv = p.wait() # not fatal
916 raise GitError, 'git rev-list returned error %d' % rv
919 def get_commit_dates(refs, repo_dir=None):
920 """Get the dates for the specified commit refs. For now, every unique
921 string in refs must resolve to a different commit or this
922 function will fail."""
925 commit = get_commit_items(ref, cp(repo_dir))
926 result.append(commit.author_sec)
930 def rev_parse(committish, repo_dir=None):
931 """Resolve the full hash for 'committish', if it exists.
933 Should be roughly equivalent to 'git rev-parse'.
935 Returns the hex value of the hash if it is found, None if 'committish' does
936 not correspond to anything.
938 head = read_ref(committish, repo_dir=repo_dir)
940 debug2("resolved from ref: commit = %s\n" % head.encode('hex'))
943 pL = PackIdxList(repo('objects/pack', repo_dir=repo_dir))
945 if len(committish) == 40:
947 hash = committish.decode('hex')
957 def update_ref(refname, newval, oldval, repo_dir=None):
958 """Update a repository reference."""
961 assert(refname.startswith('refs/heads/') \
962 or refname.startswith('refs/tags/'))
963 p = subprocess.Popen(['git', 'update-ref', refname,
964 newval.encode('hex'), oldval.encode('hex')],
965 preexec_fn = _gitenv(repo_dir))
966 _git_wait('git update-ref', p)
969 def delete_ref(refname, oldvalue=None):
970 """Delete a repository reference (see git update-ref(1))."""
971 assert(refname.startswith('refs/'))
972 oldvalue = [] if not oldvalue else [oldvalue]
973 p = subprocess.Popen(['git', 'update-ref', '-d', refname] + oldvalue,
974 preexec_fn = _gitenv())
975 _git_wait('git update-ref', p)
978 def guess_repo(path=None):
979 """Set the path value in the global variable "repodir".
980 This makes bup look for an existing bup repository, but not fail if a
981 repository doesn't exist. Usually, if you are interacting with a bup
982 repository, you would not be calling this function but using
989 repodir = os.environ.get('BUP_DIR')
991 repodir = os.path.expanduser('~/.bup')
994 def init_repo(path=None):
995 """Create the Git bare repository for bup in a given path."""
997 d = repo() # appends a / to the path
998 parent = os.path.dirname(os.path.dirname(d))
999 if parent and not os.path.exists(parent):
1000 raise GitError('parent directory "%s" does not exist\n' % parent)
1001 if os.path.exists(d) and not os.path.isdir(os.path.join(d, '.')):
1002 raise GitError('"%s" exists but is not a directory\n' % d)
1003 p = subprocess.Popen(['git', '--bare', 'init'], stdout=sys.stderr,
1004 preexec_fn = _gitenv())
1005 _git_wait('git init', p)
1006 # Force the index version configuration in order to ensure bup works
1007 # regardless of the version of the installed Git binary.
1008 p = subprocess.Popen(['git', 'config', 'pack.indexVersion', '2'],
1009 stdout=sys.stderr, preexec_fn = _gitenv())
1010 _git_wait('git config', p)
1012 p = subprocess.Popen(['git', 'config', 'core.logAllRefUpdates', 'true'],
1013 stdout=sys.stderr, preexec_fn = _gitenv())
1014 _git_wait('git config', p)
1017 def check_repo_or_die(path=None):
1018 """Make sure a bup repository exists, and abort if not.
1019 If the path to a particular repository was not specified, this function
1020 initializes the default repository automatically.
1024 os.stat(repo('objects/pack/.'))
1025 except OSError as e:
1026 if e.errno == errno.ENOENT:
1027 log('error: %r is not a bup repository; run "bup init"\n'
1031 log('error: %s\n' % e)
1037 """Get Git's version and ensure a usable version is installed.
1039 The returned version is formatted as an ordered tuple with each position
1040 representing a digit in the version tag. For example, the following tuple
1041 would represent version 1.6.6.9:
1043 ('1', '6', '6', '9')
1047 p = subprocess.Popen(['git', '--version'],
1048 stdout=subprocess.PIPE)
1049 gvs = p.stdout.read()
1050 _git_wait('git --version', p)
1051 m = re.match(r'git version (\S+.\S+)', gvs)
1053 raise GitError('git --version weird output: %r' % gvs)
1054 _ver = tuple(m.group(1).split('.'))
1055 needed = ('1','5', '3', '1')
1057 raise GitError('git version %s or higher is required; you have %s'
1058 % ('.'.join(needed), '.'.join(_ver)))
1062 def _git_wait(cmd, p):
1065 raise GitError('%s returned %d' % (cmd, rv))
1068 def _git_capture(argv):
1069 p = subprocess.Popen(argv, stdout=subprocess.PIPE, preexec_fn = _gitenv())
1071 _git_wait(repr(argv), p)
1075 class _AbortableIter:
1076 def __init__(self, it, onabort = None):
1078 self.onabort = onabort
1086 return self.it.next()
1087 except StopIteration as e:
1095 """Abort iteration and call the abortion callback, if needed."""
1107 """Link to 'git cat-file' that is used to retrieve blob data."""
1108 def __init__(self, repo_dir = None):
1110 self.repo_dir = repo_dir
1111 wanted = ('1','5','6')
1114 log('warning: git version < %s; bup will be slow.\n'
1117 self.get = self._slow_get
1119 self.p = self.inprogress = None
1120 self.get = self._fast_get
1124 self.p.stdout.close()
1125 self.p.stdin.close()
1127 self.inprogress = None
1131 self.p = subprocess.Popen(['git', 'cat-file', '--batch'],
1132 stdin=subprocess.PIPE,
1133 stdout=subprocess.PIPE,
1136 preexec_fn = _gitenv(self.repo_dir))
1138 def _fast_get(self, id):
1139 if not self.p or self.p.poll() != None:
1142 poll_result = self.p.poll()
1143 assert(poll_result == None)
1145 log('_fast_get: opening %r while %r is open\n'
1146 % (id, self.inprogress))
1147 assert(not self.inprogress)
1148 assert(id.find('\n') < 0)
1149 assert(id.find('\r') < 0)
1150 assert(not id.startswith('-'))
1151 self.inprogress = id
1152 self.p.stdin.write('%s\n' % id)
1153 self.p.stdin.flush()
1154 hdr = self.p.stdout.readline()
1155 if hdr.endswith(' missing\n'):
1156 self.inprogress = None
1157 raise KeyError('blob %r is missing' % id)
1158 spl = hdr.split(' ')
1159 if len(spl) != 3 or len(spl[0]) != 40:
1160 raise GitError('expected blob, got %r' % spl)
1161 (hex, type, size) = spl
1163 it = _AbortableIter(chunkyreader(self.p.stdout, int(spl[2])),
1164 onabort = self._abort)
1169 readline_result = self.p.stdout.readline()
1170 assert(readline_result == '\n')
1171 self.inprogress = None
1172 except Exception as e:
1176 def _slow_get(self, id):
1177 assert(id.find('\n') < 0)
1178 assert(id.find('\r') < 0)
1179 assert(id[0] != '-')
1180 type = _git_capture(['git', 'cat-file', '-t', id]).strip()
1183 p = subprocess.Popen(['git', 'cat-file', type, id],
1184 stdout=subprocess.PIPE,
1185 preexec_fn = _gitenv(self.repo_dir))
1186 for blob in chunkyreader(p.stdout):
1188 _git_wait('git cat-file', p)
1190 def _join(self, it):
1195 elif type == 'tree':
1196 treefile = ''.join(it)
1197 for (mode, name, sha) in tree_decode(treefile):
1198 for blob in self.join(sha.encode('hex')):
1200 elif type == 'commit':
1201 treeline = ''.join(it).split('\n')[0]
1202 assert(treeline.startswith('tree '))
1203 for blob in self.join(treeline[5:]):
1206 raise GitError('invalid object type %r: expected blob/tree/commit'
1210 """Generate a list of the content of all blobs that can be reached
1211 from an object. The hash given in 'id' must point to a blob, a tree
1212 or a commit. The content of all blobs that can be seen from trees or
1213 commits will be added to the list.
1216 for d in self._join(self.get(id)):
1218 except StopIteration:
1224 def cp(repo_dir=None):
1225 """Create a CatPipe object or reuse the already existing one."""
1229 repo_dir = os.path.abspath(repo_dir)
1230 cp = _cp.get(repo_dir)
1232 cp = CatPipe(repo_dir)
1237 def tags(repo_dir = None):
1238 """Return a dictionary of all tags in the form {hash: [tag_names, ...]}."""
1240 for n, c in list_refs(repo_dir = repo_dir, limit_to_tags=True):
1241 assert(n.startswith('refs/tags/'))
1245 tags[c].append(name) # more than one tag can point at 'c'
1249 WalkItem = namedtuple('WalkItem', ['id', 'type', 'mode',
1250 'path', 'chunk_path', 'data'])
1251 # The path is the mangled path, and if an item represents a fragment
1252 # of a chunked file, the chunk_path will be the chunked subtree path
1253 # for the chunk, i.e. ['', '2d3115e', ...]. The top-level path for a
1254 # chunked file will have a chunk_path of ['']. So some chunk subtree
1255 # of the file '/foo/bar/baz' might look like this:
1257 # item.path = ['foo', 'bar', 'baz.bup']
1258 # item.chunk_path = ['', '2d3115e', '016b097']
1259 # item.type = 'tree'
1263 def _walk_object(cat_pipe, id,
1264 parent_path, chunk_path,
1269 if stop_at and stop_at(id):
1272 item_it = cat_pipe.get(id) # FIXME: use include_data
1273 type = item_it.next()
1275 if type not in ('blob', 'commit', 'tree'):
1276 raise Exception('unexpected repository object type %r' % type)
1278 # FIXME: set the mode based on the type when the mode is None
1280 if type == 'blob' and not include_data:
1281 # Dump data until we can ask cat_pipe not to fetch it
1282 for ignored in item_it:
1286 data = ''.join(item_it)
1288 yield WalkItem(id=id, type=type,
1289 chunk_path=chunk_path, path=parent_path,
1291 data=(data if include_data else None))
1293 if type == 'commit':
1294 commit_items = parse_commit(data)
1295 tree_id = commit_items.tree
1296 for x in _walk_object(cat_pipe, tree_id, parent_path, chunk_path,
1297 mode=hashsplit.GIT_MODE_TREE,
1299 include_data=include_data):
1301 parents = commit_items.parents
1303 for x in _walk_object(cat_pipe, pid, parent_path, chunk_path,
1304 mode=mode, # Same mode as this child
1306 include_data=include_data):
1308 elif type == 'tree':
1309 for mode, name, ent_id in tree_decode(data):
1310 demangled, bup_type = demangle_name(name, mode)
1312 sub_path = parent_path
1313 sub_chunk_path = chunk_path + [name]
1315 sub_path = parent_path + [name]
1316 if bup_type == BUP_CHUNKED:
1317 sub_chunk_path = ['']
1319 sub_chunk_path = chunk_path
1320 for x in _walk_object(cat_pipe, ent_id.encode('hex'),
1321 sub_path, sub_chunk_path,
1324 include_data=include_data):
1328 def walk_object(cat_pipe, id,
1331 """Yield everything reachable from id via cat_pipe as a WalkItem,
1332 stopping whenever stop_at(id) returns true."""
1333 return _walk_object(cat_pipe, id, [], [],
1335 include_data=include_data)