1 """Git interaction library.
2 bup repositories are in Git format. This library allows us to
3 interact with the Git data structures.
6 import errno, os, sys, zlib, time, subprocess, struct, stat, re, tempfile, glob
7 from collections import namedtuple
8 from itertools import islice
10 from bup import _helpers, hashsplit, path, midx, bloom, xstat
11 from bup.helpers import (Sha1, add_error, chunkyreader, debug1, debug2,
13 hostname, localtime, log, merge_iter,
14 mmap_read, mmap_readwrite,
15 progress, qprogress, unlink, username, userfullname,
19 max_pack_size = 1000*1000*1000 # larger packs will slow down pruning
20 max_pack_objects = 200*1000 # cache memory usage is about 83 bytes per object
26 _typemap = { 'blob':3, 'tree':2, 'commit':1, 'tag':4 }
27 _typermap = { 3:'blob', 2:'tree', 1:'commit', 4:'tag' }
33 class GitError(Exception):
37 def parse_tz_offset(s):
38 """UTC offset in seconds."""
39 tz_off = (int(s[1:3]) * 60 * 60) + (int(s[3:5]) * 60)
45 # FIXME: derived from http://git.rsbx.net/Documents/Git_Data_Formats.txt
46 # Make sure that's authoritative.
47 _start_end_char = r'[^ .,:;<>"\'\0\n]'
48 _content_char = r'[^\0\n<>]'
49 _safe_str_rx = '(?:%s{1,2}|(?:%s%s*%s))' \
51 _start_end_char, _content_char, _start_end_char)
52 _tz_rx = r'[-+]\d\d[0-5]\d'
53 _parent_rx = r'(?:parent [abcdefABCDEF0123456789]{40}\n)'
54 _commit_rx = re.compile(r'''tree (?P<tree>[abcdefABCDEF0123456789]{40})
55 (?P<parents>%s*)author (?P<author_name>%s) <(?P<author_mail>%s)> (?P<asec>\d+) (?P<atz>%s)
56 committer (?P<committer_name>%s) <(?P<committer_mail>%s)> (?P<csec>\d+) (?P<ctz>%s)
58 (?P<message>(?:.|\n)*)''' % (_parent_rx,
59 _safe_str_rx, _safe_str_rx, _tz_rx,
60 _safe_str_rx, _safe_str_rx, _tz_rx))
61 _parent_hash_rx = re.compile(r'\s*parent ([abcdefABCDEF0123456789]{40})\s*')
64 # Note that the author_sec and committer_sec values are (UTC) epoch seconds.
65 CommitInfo = namedtuple('CommitInfo', ['tree', 'parents',
66 'author_name', 'author_mail',
67 'author_sec', 'author_offset',
68 'committer_name', 'committer_mail',
69 'committer_sec', 'committer_offset',
72 def parse_commit(content):
73 commit_match = re.match(_commit_rx, content)
75 raise Exception('cannot parse commit %r' % content)
76 matches = commit_match.groupdict()
77 return CommitInfo(tree=matches['tree'],
78 parents=re.findall(_parent_hash_rx, matches['parents']),
79 author_name=matches['author_name'],
80 author_mail=matches['author_mail'],
81 author_sec=int(matches['asec']),
82 author_offset=parse_tz_offset(matches['atz']),
83 committer_name=matches['committer_name'],
84 committer_mail=matches['committer_mail'],
85 committer_sec=int(matches['csec']),
86 committer_offset=parse_tz_offset(matches['ctz']),
87 message=matches['message'])
90 def get_commit_items(id, cp):
91 commit_it = cp.get(id)
92 assert(commit_it.next() == 'commit')
93 commit_content = ''.join(commit_it)
94 return parse_commit(commit_content)
97 def _local_git_date_str(epoch_sec):
98 return '%d %s' % (epoch_sec, utc_offset_str(epoch_sec))
101 def _git_date_str(epoch_sec, tz_offset_sec):
102 offs = tz_offset_sec // 60
103 return '%d %s%02d%02d' \
105 '+' if offs >= 0 else '-',
110 def repo(sub = '', repo_dir=None):
111 """Get the path to the git repository or one of its subdirectories."""
113 repo_dir = repo_dir or repodir
115 raise GitError('You should call check_repo_or_die()')
117 # If there's a .git subdirectory, then the actual repo is in there.
118 gd = os.path.join(repo_dir, '.git')
119 if os.path.exists(gd):
122 return os.path.join(repo_dir, sub)
126 return re.sub(r'([^0-9a-z]|\b)([0-9a-z]{7})[0-9a-z]{33}([^0-9a-z]|\b)',
131 full = os.path.abspath(path)
132 fullrepo = os.path.abspath(repo(''))
133 if not fullrepo.endswith('/'):
135 if full.startswith(fullrepo):
136 path = full[len(fullrepo):]
137 if path.startswith('index-cache/'):
138 path = path[len('index-cache/'):]
139 return shorten_hash(path)
143 paths = [repo('objects/pack')]
144 paths += glob.glob(repo('index-cache/*/.'))
148 def auto_midx(objdir):
149 args = [path.exe(), 'midx', '--auto', '--dir', objdir]
151 rv = subprocess.call(args, stdout=open('/dev/null', 'w'))
153 # make sure 'args' gets printed to help with debugging
154 add_error('%r: exception: %s' % (args, e))
157 add_error('%r: returned %d' % (args, rv))
159 args = [path.exe(), 'bloom', '--dir', objdir]
161 rv = subprocess.call(args, stdout=open('/dev/null', 'w'))
163 # make sure 'args' gets printed to help with debugging
164 add_error('%r: exception: %s' % (args, e))
167 add_error('%r: returned %d' % (args, rv))
170 def mangle_name(name, mode, gitmode):
171 """Mangle a file name to present an abstract name for segmented files.
172 Mangled file names will have the ".bup" extension added to them. If a
173 file's name already ends with ".bup", a ".bupl" extension is added to
174 disambiguate normal files from segmented ones.
176 if stat.S_ISREG(mode) and not stat.S_ISREG(gitmode):
177 assert(stat.S_ISDIR(gitmode))
179 elif name.endswith('.bup') or name[:-1].endswith('.bup'):
180 return name + '.bupl'
185 (BUP_NORMAL, BUP_CHUNKED) = (0,1)
186 def demangle_name(name, mode):
187 """Remove name mangling from a file name, if necessary.
189 The return value is a tuple (demangled_filename,mode), where mode is one of
192 * BUP_NORMAL : files that should be read as-is from the repository
193 * BUP_CHUNKED : files that were chunked and need to be reassembled
195 For more information on the name mangling algorithm, see mangle_name()
197 if name.endswith('.bupl'):
198 return (name[:-5], BUP_NORMAL)
199 elif name.endswith('.bup'):
200 return (name[:-4], BUP_CHUNKED)
201 elif name.endswith('.bupm'):
203 BUP_CHUNKED if stat.S_ISDIR(mode) else BUP_NORMAL)
205 return (name, BUP_NORMAL)
208 def calc_hash(type, content):
209 """Calculate some content's hash in the Git fashion."""
210 header = '%s %d\0' % (type, len(content))
216 def shalist_item_sort_key(ent):
217 (mode, name, id) = ent
218 assert(mode+0 == mode)
219 if stat.S_ISDIR(mode):
225 def tree_encode(shalist):
226 """Generate a git tree object from (mode,name,hash) tuples."""
227 shalist = sorted(shalist, key = shalist_item_sort_key)
229 for (mode,name,bin) in shalist:
231 assert(mode+0 == mode)
233 assert(len(bin) == 20)
234 s = '%o %s\0%s' % (mode,name,bin)
235 assert(s[0] != '0') # 0-padded octal is not acceptable in a git tree
240 def tree_decode(buf):
241 """Generate a list of (mode,name,hash) from the git tree object in buf."""
243 while ofs < len(buf):
244 z = buf.find('\0', ofs)
246 spl = buf[ofs:z].split(' ', 1)
247 assert(len(spl) == 2)
249 sha = buf[z+1:z+1+20]
251 yield (int(mode, 8), name, sha)
254 def _encode_packobj(type, content, compression_level=1):
255 if compression_level not in (0, 1, 2, 3, 4, 5, 6, 7, 8, 9):
256 raise ValueError('invalid compression level %s' % compression_level)
259 szbits = (sz & 0x0f) | (_typemap[type]<<4)
262 if sz: szbits |= 0x80
268 z = zlib.compressobj(compression_level)
270 yield z.compress(content)
274 def _encode_looseobj(type, content, compression_level=1):
275 z = zlib.compressobj(compression_level)
276 yield z.compress('%s %d\0' % (type, len(content)))
277 yield z.compress(content)
281 def _decode_looseobj(buf):
283 s = zlib.decompress(buf)
290 assert(type in _typemap)
291 assert(sz == len(content))
292 return (type, content)
295 def _decode_packobj(buf):
298 type = _typermap[(c & 0x70) >> 4]
305 sz |= (c & 0x7f) << shift
309 return (type, zlib.decompress(buf[i+1:]))
316 def find_offset(self, hash):
317 """Get the offset of an object inside the index file."""
318 idx = self._idx_from_hash(hash)
320 return self._ofs_from_idx(idx)
323 def exists(self, hash, want_source=False):
324 """Return nonempty if the object exists in this index."""
325 if hash and (self._idx_from_hash(hash) != None):
326 return want_source and os.path.basename(self.name) or True
330 return int(self.fanout[255])
332 def _idx_from_hash(self, hash):
333 global _total_searches, _total_steps
335 assert(len(hash) == 20)
337 start = self.fanout[b1-1] # range -1..254
338 end = self.fanout[b1] # range 0..255
340 _total_steps += 1 # lookup table is a step
343 mid = start + (end-start)/2
344 v = self._idx_to_hash(mid)
354 class PackIdxV1(PackIdx):
355 """Object representation of a Git pack index (version 1) file."""
356 def __init__(self, filename, f):
358 self.idxnames = [self.name]
359 self.map = mmap_read(f)
360 self.fanout = list(struct.unpack('!256I',
361 str(buffer(self.map, 0, 256*4))))
362 self.fanout.append(0) # entry "-1"
363 nsha = self.fanout[255]
365 self.shatable = buffer(self.map, self.sha_ofs, nsha*24)
367 def _ofs_from_idx(self, idx):
368 return struct.unpack('!I', str(self.shatable[idx*24 : idx*24+4]))[0]
370 def _idx_to_hash(self, idx):
371 return str(self.shatable[idx*24+4 : idx*24+24])
374 for i in xrange(self.fanout[255]):
375 yield buffer(self.map, 256*4 + 24*i + 4, 20)
378 class PackIdxV2(PackIdx):
379 """Object representation of a Git pack index (version 2) file."""
380 def __init__(self, filename, f):
382 self.idxnames = [self.name]
383 self.map = mmap_read(f)
384 assert(str(self.map[0:8]) == '\377tOc\0\0\0\2')
385 self.fanout = list(struct.unpack('!256I',
386 str(buffer(self.map, 8, 256*4))))
387 self.fanout.append(0) # entry "-1"
388 nsha = self.fanout[255]
389 self.sha_ofs = 8 + 256*4
390 self.shatable = buffer(self.map, self.sha_ofs, nsha*20)
391 self.ofstable = buffer(self.map,
392 self.sha_ofs + nsha*20 + nsha*4,
394 self.ofs64table = buffer(self.map,
395 8 + 256*4 + nsha*20 + nsha*4 + nsha*4)
397 def _ofs_from_idx(self, idx):
398 ofs = struct.unpack('!I', str(buffer(self.ofstable, idx*4, 4)))[0]
400 idx64 = ofs & 0x7fffffff
401 ofs = struct.unpack('!Q',
402 str(buffer(self.ofs64table, idx64*8, 8)))[0]
405 def _idx_to_hash(self, idx):
406 return str(self.shatable[idx*20:(idx+1)*20])
409 for i in xrange(self.fanout[255]):
410 yield buffer(self.map, 8 + 256*4 + 20*i, 20)
415 def __init__(self, dir):
417 assert(_mpi_count == 0) # these things suck tons of VM; don't waste it
422 self.do_bloom = False
429 assert(_mpi_count == 0)
432 return iter(idxmerge(self.packs))
435 return sum(len(pack) for pack in self.packs)
437 def exists(self, hash, want_source=False):
438 """Return nonempty if the object exists in the index files."""
439 global _total_searches
441 if hash in self.also:
443 if self.do_bloom and self.bloom:
444 if self.bloom.exists(hash):
445 self.do_bloom = False
447 _total_searches -= 1 # was counted by bloom
449 for i in xrange(len(self.packs)):
451 _total_searches -= 1 # will be incremented by sub-pack
452 ix = p.exists(hash, want_source=want_source)
454 # reorder so most recently used packs are searched first
455 self.packs = [p] + self.packs[:i] + self.packs[i+1:]
460 def refresh(self, skip_midx = False):
461 """Refresh the index list.
462 This method verifies if .midx files were superseded (e.g. all of its
463 contents are in another, bigger .midx file) and removes the superseded
466 If skip_midx is True, all work on .midx files will be skipped and .midx
467 files will be removed from the list.
469 The module-global variable 'ignore_midx' can force this function to
470 always act as if skip_midx was True.
472 self.bloom = None # Always reopen the bloom as it may have been relaced
473 self.do_bloom = False
474 skip_midx = skip_midx or ignore_midx
475 d = dict((p.name, p) for p in self.packs
476 if not skip_midx or not isinstance(p, midx.PackMidx))
477 if os.path.exists(self.dir):
480 for ix in self.packs:
481 if isinstance(ix, midx.PackMidx):
482 for name in ix.idxnames:
483 d[os.path.join(self.dir, name)] = ix
484 for full in glob.glob(os.path.join(self.dir,'*.midx')):
486 mx = midx.PackMidx(full)
487 (mxd, mxf) = os.path.split(mx.name)
489 for n in mx.idxnames:
490 if not os.path.exists(os.path.join(mxd, n)):
491 log(('warning: index %s missing\n' +
492 ' used by %s\n') % (n, mxf))
500 midxl.sort(key=lambda ix:
501 (-len(ix), -xstat.stat(ix.name).st_mtime))
504 for sub in ix.idxnames:
505 found = d.get(os.path.join(self.dir, sub))
506 if not found or isinstance(found, PackIdx):
507 # doesn't exist, or exists but not in a midx
512 for name in ix.idxnames:
513 d[os.path.join(self.dir, name)] = ix
514 elif not ix.force_keep:
515 debug1('midx: removing redundant: %s\n'
516 % os.path.basename(ix.name))
519 for full in glob.glob(os.path.join(self.dir,'*.idx')):
523 except GitError as e:
527 bfull = os.path.join(self.dir, 'bup.bloom')
528 if self.bloom is None and os.path.exists(bfull):
529 self.bloom = bloom.ShaBloom(bfull)
530 self.packs = list(set(d.values()))
531 self.packs.sort(lambda x,y: -cmp(len(x),len(y)))
532 if self.bloom and self.bloom.valid() and len(self.bloom) >= len(self):
536 debug1('PackIdxList: using %d index%s.\n'
537 % (len(self.packs), len(self.packs)!=1 and 'es' or ''))
540 """Insert an additional object in the list."""
544 def open_idx(filename):
545 if filename.endswith('.idx'):
546 f = open(filename, 'rb')
548 if header[0:4] == '\377tOc':
549 version = struct.unpack('!I', header[4:8])[0]
551 return PackIdxV2(filename, f)
553 raise GitError('%s: expected idx file version 2, got %d'
554 % (filename, version))
555 elif len(header) == 8 and header[0:4] < '\377tOc':
556 return PackIdxV1(filename, f)
558 raise GitError('%s: unrecognized idx file header' % filename)
559 elif filename.endswith('.midx'):
560 return midx.PackMidx(filename)
562 raise GitError('idx filenames must end with .idx or .midx')
565 def idxmerge(idxlist, final_progress=True):
566 """Generate a list of all the objects reachable in a PackIdxList."""
567 def pfunc(count, total):
568 qprogress('Reading indexes: %.2f%% (%d/%d)\r'
569 % (count*100.0/total, count, total))
570 def pfinal(count, total):
572 progress('Reading indexes: %.2f%% (%d/%d), done.\n'
573 % (100, total, total))
574 return merge_iter(idxlist, 10024, pfunc, pfinal)
577 def _make_objcache():
578 return PackIdxList(repo('objects/pack'))
580 # bup-gc assumes that it can disable all PackWriter activities
581 # (bloom/midx/cache) via the constructor and close() arguments.
584 """Writes Git objects inside a pack file."""
585 def __init__(self, objcache_maker=_make_objcache, compression_level=1,
586 run_midx=True, on_pack_finish=None):
593 self.objcache_maker = objcache_maker
595 self.compression_level = compression_level
596 self.run_midx=run_midx
597 self.on_pack_finish = on_pack_finish
604 objdir = dir=repo('objects')
605 fd, name = tempfile.mkstemp(suffix='.pack', dir=objdir)
607 self.file = os.fdopen(fd, 'w+b')
612 self.parentfd = os.open(objdir, os.O_RDONLY)
618 assert(name.endswith('.pack'))
619 self.filename = name[:-5]
620 self.file.write('PACK\0\0\0\2\0\0\0\0')
621 self.idx = list(list() for i in xrange(256))
623 def _raw_write(self, datalist, sha):
626 # in case we get interrupted (eg. KeyboardInterrupt), it's best if
627 # the file never has a *partial* blob. So let's make sure it's
628 # all-or-nothing. (The blob shouldn't be very big anyway, thanks
629 # to our hashsplit algorithm.) f.write() does its own buffering,
630 # but that's okay because we'll flush it in _end().
631 oneblob = ''.join(datalist)
635 raise GitError, e, sys.exc_info()[2]
637 crc = zlib.crc32(oneblob) & 0xffffffff
638 self._update_idx(sha, crc, nw)
643 def _update_idx(self, sha, crc, size):
646 self.idx[ord(sha[0])].append((sha, crc, self.file.tell() - size))
648 def _write(self, sha, type, content):
652 sha = calc_hash(type, content)
653 size, crc = self._raw_write(_encode_packobj(type, content,
654 self.compression_level),
656 if self.outbytes >= max_pack_size or self.count >= max_pack_objects:
660 def breakpoint(self):
661 """Clear byte and object counts and return the last processed id."""
662 id = self._end(self.run_midx)
663 self.outbytes = self.count = 0
666 def _require_objcache(self):
667 if self.objcache is None and self.objcache_maker:
668 self.objcache = self.objcache_maker()
669 if self.objcache is None:
671 "PackWriter not opened or can't check exists w/o objcache")
673 def exists(self, id, want_source=False):
674 """Return non-empty if an object is found in the object cache."""
675 self._require_objcache()
676 return self.objcache.exists(id, want_source=want_source)
678 def write(self, sha, type, content):
679 """Write an object to the pack file. Fails if sha exists()."""
680 self._write(sha, type, content)
682 def maybe_write(self, type, content):
683 """Write an object to the pack file if not present and return its id."""
684 sha = calc_hash(type, content)
685 if not self.exists(sha):
686 self.write(sha, type, content)
687 self._require_objcache()
688 self.objcache.add(sha)
691 def new_blob(self, blob):
692 """Create a blob object in the pack with the supplied content."""
693 return self.maybe_write('blob', blob)
695 def new_tree(self, shalist):
696 """Create a tree object in the pack."""
697 content = tree_encode(shalist)
698 return self.maybe_write('tree', content)
700 def new_commit(self, tree, parent,
701 author, adate_sec, adate_tz,
702 committer, cdate_sec, cdate_tz,
704 """Create a commit object in the pack. The date_sec values must be
705 epoch-seconds, and if a tz is None, the local timezone is assumed."""
707 adate_str = _git_date_str(adate_sec, adate_tz)
709 adate_str = _local_git_date_str(adate_sec)
711 cdate_str = _git_date_str(cdate_sec, cdate_tz)
713 cdate_str = _local_git_date_str(cdate_sec)
715 if tree: l.append('tree %s' % tree.encode('hex'))
716 if parent: l.append('parent %s' % parent.encode('hex'))
717 if author: l.append('author %s %s' % (author, adate_str))
718 if committer: l.append('committer %s %s' % (committer, cdate_str))
721 return self.maybe_write('commit', '\n'.join(l))
724 """Remove the pack file from disk."""
733 os.unlink(self.filename + '.pack')
740 def _end(self, run_midx=True):
742 if not f: return None
749 # update object count
751 cp = struct.pack('!i', self.count)
755 # calculate the pack sha1sum
758 for b in chunkyreader(f):
760 packbin = sum.digest()
762 fdatasync(f.fileno())
766 obj_list_sha = self._write_pack_idx_v2(self.filename + '.idx', idx, packbin)
768 nameprefix = repo('objects/pack/pack-%s' % obj_list_sha)
769 if os.path.exists(self.filename + '.map'):
770 os.unlink(self.filename + '.map')
771 os.rename(self.filename + '.pack', nameprefix + '.pack')
772 os.rename(self.filename + '.idx', nameprefix + '.idx')
774 os.fsync(self.parentfd)
776 os.close(self.parentfd)
779 auto_midx(repo('objects/pack'))
781 if self.on_pack_finish:
782 self.on_pack_finish(nameprefix)
786 def close(self, run_midx=True):
787 """Close the pack file and move it to its definitive path."""
788 return self._end(run_midx=run_midx)
790 def _write_pack_idx_v2(self, filename, idx, packbin):
793 for entry in section:
794 if entry[2] >= 2**31:
797 # Length: header + fan-out + shas-and-crcs + overflow-offsets
798 index_len = 8 + (4 * 256) + (28 * self.count) + (8 * ofs64_count)
800 idx_f = open(filename, 'w+b')
802 idx_f.truncate(index_len)
803 fdatasync(idx_f.fileno())
804 idx_map = mmap_readwrite(idx_f, close=False)
806 count = _helpers.write_idx(filename, idx_map, idx, self.count)
807 assert(count == self.count)
814 idx_f = open(filename, 'a+b')
819 b = idx_f.read(8 + 4*256)
822 obj_list_sum = Sha1()
823 for b in chunkyreader(idx_f, 20*self.count):
825 obj_list_sum.update(b)
826 namebase = obj_list_sum.hexdigest()
828 for b in chunkyreader(idx_f):
830 idx_f.write(idx_sum.digest())
831 fdatasync(idx_f.fileno())
837 def _gitenv(repo_dir = None):
841 os.environ['GIT_DIR'] = os.path.abspath(repo_dir)
845 def list_refs(refname=None, repo_dir=None,
846 limit_to_heads=False, limit_to_tags=False):
847 """Yield (refname, hash) tuples for all repository refs unless a ref
848 name is specified. Given a ref name, only include tuples for that
849 particular ref. The limits restrict the result items to
850 refs/heads or refs/tags. If both limits are specified, items from
851 both sources will be included.
854 argv = ['git', 'show-ref']
856 argv.append('--heads')
858 argv.append('--tags')
862 p = subprocess.Popen(argv,
863 preexec_fn = _gitenv(repo_dir),
864 stdout = subprocess.PIPE)
865 out = p.stdout.read().strip()
866 rv = p.wait() # not fatal
870 for d in out.split('\n'):
871 (sha, name) = d.split(' ', 1)
872 yield (name, sha.decode('hex'))
875 def read_ref(refname, repo_dir = None):
876 """Get the commit id of the most recent commit made on a given ref."""
877 refs = list_refs(refname, repo_dir=repo_dir, limit_to_heads=True)
878 l = tuple(islice(refs, 2))
886 def rev_list(ref, count=None, repo_dir=None):
887 """Generate a list of reachable commits in reverse chronological order.
889 This generator walks through commits, from child to parent, that are
890 reachable via the specified ref and yields a series of tuples of the form
893 If count is a non-zero integer, limit the number of commits to "count"
896 assert(not ref.startswith('-'))
899 opts += ['-n', str(atoi(count))]
900 argv = ['git', 'rev-list', '--pretty=format:%at'] + opts + [ref, '--']
901 p = subprocess.Popen(argv,
902 preexec_fn = _gitenv(repo_dir),
903 stdout = subprocess.PIPE)
907 if s.startswith('commit '):
908 commit = s[7:].decode('hex')
912 rv = p.wait() # not fatal
914 raise GitError, 'git rev-list returned error %d' % rv
917 def get_commit_dates(refs, repo_dir=None):
918 """Get the dates for the specified commit refs. For now, every unique
919 string in refs must resolve to a different commit or this
920 function will fail."""
923 commit = get_commit_items(ref, cp(repo_dir))
924 result.append(commit.author_sec)
928 def rev_parse(committish, repo_dir=None):
929 """Resolve the full hash for 'committish', if it exists.
931 Should be roughly equivalent to 'git rev-parse'.
933 Returns the hex value of the hash if it is found, None if 'committish' does
934 not correspond to anything.
936 head = read_ref(committish, repo_dir=repo_dir)
938 debug2("resolved from ref: commit = %s\n" % head.encode('hex'))
941 pL = PackIdxList(repo('objects/pack', repo_dir=repo_dir))
943 if len(committish) == 40:
945 hash = committish.decode('hex')
955 def update_ref(refname, newval, oldval, repo_dir=None):
956 """Update a repository reference."""
959 assert(refname.startswith('refs/heads/') \
960 or refname.startswith('refs/tags/'))
961 p = subprocess.Popen(['git', 'update-ref', refname,
962 newval.encode('hex'), oldval.encode('hex')],
963 preexec_fn = _gitenv(repo_dir))
964 _git_wait('git update-ref', p)
967 def delete_ref(refname, oldvalue=None):
968 """Delete a repository reference (see git update-ref(1))."""
969 assert(refname.startswith('refs/'))
970 oldvalue = [] if not oldvalue else [oldvalue]
971 p = subprocess.Popen(['git', 'update-ref', '-d', refname] + oldvalue,
972 preexec_fn = _gitenv())
973 _git_wait('git update-ref', p)
976 def guess_repo(path=None):
977 """Set the path value in the global variable "repodir".
978 This makes bup look for an existing bup repository, but not fail if a
979 repository doesn't exist. Usually, if you are interacting with a bup
980 repository, you would not be calling this function but using
987 repodir = os.environ.get('BUP_DIR')
989 repodir = os.path.expanduser('~/.bup')
992 def init_repo(path=None):
993 """Create the Git bare repository for bup in a given path."""
995 d = repo() # appends a / to the path
996 parent = os.path.dirname(os.path.dirname(d))
997 if parent and not os.path.exists(parent):
998 raise GitError('parent directory "%s" does not exist\n' % parent)
999 if os.path.exists(d) and not os.path.isdir(os.path.join(d, '.')):
1000 raise GitError('"%s" exists but is not a directory\n' % d)
1001 p = subprocess.Popen(['git', '--bare', 'init'], stdout=sys.stderr,
1002 preexec_fn = _gitenv())
1003 _git_wait('git init', p)
1004 # Force the index version configuration in order to ensure bup works
1005 # regardless of the version of the installed Git binary.
1006 p = subprocess.Popen(['git', 'config', 'pack.indexVersion', '2'],
1007 stdout=sys.stderr, preexec_fn = _gitenv())
1008 _git_wait('git config', p)
1010 p = subprocess.Popen(['git', 'config', 'core.logAllRefUpdates', 'true'],
1011 stdout=sys.stderr, preexec_fn = _gitenv())
1012 _git_wait('git config', p)
1015 def check_repo_or_die(path=None):
1016 """Make sure a bup repository exists, and abort if not.
1017 If the path to a particular repository was not specified, this function
1018 initializes the default repository automatically.
1022 os.stat(repo('objects/pack/.'))
1023 except OSError as e:
1024 if e.errno == errno.ENOENT:
1025 log('error: %r is not a bup repository; run "bup init"\n'
1029 log('error: %s\n' % e)
1035 """Get Git's version and ensure a usable version is installed.
1037 The returned version is formatted as an ordered tuple with each position
1038 representing a digit in the version tag. For example, the following tuple
1039 would represent version 1.6.6.9:
1041 ('1', '6', '6', '9')
1045 p = subprocess.Popen(['git', '--version'],
1046 stdout=subprocess.PIPE)
1047 gvs = p.stdout.read()
1048 _git_wait('git --version', p)
1049 m = re.match(r'git version (\S+.\S+)', gvs)
1051 raise GitError('git --version weird output: %r' % gvs)
1052 _ver = tuple(m.group(1).split('.'))
1053 needed = ('1','5', '3', '1')
1055 raise GitError('git version %s or higher is required; you have %s'
1056 % ('.'.join(needed), '.'.join(_ver)))
1060 def _git_wait(cmd, p):
1063 raise GitError('%s returned %d' % (cmd, rv))
1066 def _git_capture(argv):
1067 p = subprocess.Popen(argv, stdout=subprocess.PIPE, preexec_fn = _gitenv())
1069 _git_wait(repr(argv), p)
1073 class _AbortableIter:
1074 def __init__(self, it, onabort = None):
1076 self.onabort = onabort
1084 return self.it.next()
1085 except StopIteration as e:
1093 """Abort iteration and call the abortion callback, if needed."""
1103 class MissingObject(KeyError):
1104 def __init__(self, id):
1106 KeyError.__init__(self, 'object %r is missing' % id.encode('hex'))
1111 """Link to 'git cat-file' that is used to retrieve blob data."""
1112 def __init__(self, repo_dir = None):
1114 self.repo_dir = repo_dir
1115 wanted = ('1','5','6')
1118 log('warning: git version < %s; bup will be slow.\n'
1121 self.get = self._slow_get
1123 self.p = self.inprogress = None
1124 self.get = self._fast_get
1128 self.p.stdout.close()
1129 self.p.stdin.close()
1131 self.inprogress = None
1135 self.p = subprocess.Popen(['git', 'cat-file', '--batch'],
1136 stdin=subprocess.PIPE,
1137 stdout=subprocess.PIPE,
1140 preexec_fn = _gitenv(self.repo_dir))
1142 def _fast_get(self, id):
1143 if not self.p or self.p.poll() != None:
1146 poll_result = self.p.poll()
1147 assert(poll_result == None)
1149 log('_fast_get: opening %r while %r is open\n'
1150 % (id, self.inprogress))
1151 assert(not self.inprogress)
1152 assert(id.find('\n') < 0)
1153 assert(id.find('\r') < 0)
1154 assert(not id.startswith('-'))
1155 self.inprogress = id
1156 self.p.stdin.write('%s\n' % id)
1157 self.p.stdin.flush()
1158 hdr = self.p.stdout.readline()
1159 if hdr.endswith(' missing\n'):
1160 self.inprogress = None
1161 raise MissingObject(id.decode('hex'))
1162 spl = hdr.split(' ')
1163 if len(spl) != 3 or len(spl[0]) != 40:
1164 raise GitError('expected blob, got %r' % spl)
1165 (hex, type, size) = spl
1167 it = _AbortableIter(chunkyreader(self.p.stdout, int(spl[2])),
1168 onabort = self._abort)
1173 readline_result = self.p.stdout.readline()
1174 assert(readline_result == '\n')
1175 self.inprogress = None
1176 except Exception as e:
1180 def _slow_get(self, id):
1181 assert(id.find('\n') < 0)
1182 assert(id.find('\r') < 0)
1183 assert(id[0] != '-')
1184 type = _git_capture(['git', 'cat-file', '-t', id]).strip()
1187 p = subprocess.Popen(['git', 'cat-file', type, id],
1188 stdout=subprocess.PIPE,
1189 preexec_fn = _gitenv(self.repo_dir))
1190 for blob in chunkyreader(p.stdout):
1192 _git_wait('git cat-file', p)
1194 def _join(self, it):
1199 elif type == 'tree':
1200 treefile = ''.join(it)
1201 for (mode, name, sha) in tree_decode(treefile):
1202 for blob in self.join(sha.encode('hex')):
1204 elif type == 'commit':
1205 treeline = ''.join(it).split('\n')[0]
1206 assert(treeline.startswith('tree '))
1207 for blob in self.join(treeline[5:]):
1210 raise GitError('invalid object type %r: expected blob/tree/commit'
1214 """Generate a list of the content of all blobs that can be reached
1215 from an object. The hash given in 'id' must point to a blob, a tree
1216 or a commit. The content of all blobs that can be seen from trees or
1217 commits will be added to the list.
1220 for d in self._join(self.get(id)):
1222 except StopIteration:
1228 def cp(repo_dir=None):
1229 """Create a CatPipe object or reuse the already existing one."""
1233 repo_dir = os.path.abspath(repo_dir)
1234 cp = _cp.get(repo_dir)
1236 cp = CatPipe(repo_dir)
1241 def tags(repo_dir = None):
1242 """Return a dictionary of all tags in the form {hash: [tag_names, ...]}."""
1244 for n, c in list_refs(repo_dir = repo_dir, limit_to_tags=True):
1245 assert(n.startswith('refs/tags/'))
1249 tags[c].append(name) # more than one tag can point at 'c'
1253 WalkItem = namedtuple('WalkItem', ['id', 'type', 'mode',
1254 'path', 'chunk_path', 'data'])
1255 # The path is the mangled path, and if an item represents a fragment
1256 # of a chunked file, the chunk_path will be the chunked subtree path
1257 # for the chunk, i.e. ['', '2d3115e', ...]. The top-level path for a
1258 # chunked file will have a chunk_path of ['']. So some chunk subtree
1259 # of the file '/foo/bar/baz' might look like this:
1261 # item.path = ['foo', 'bar', 'baz.bup']
1262 # item.chunk_path = ['', '2d3115e', '016b097']
1263 # item.type = 'tree'
1267 def walk_object(cat_pipe, id,
1270 """Yield everything reachable from id via cat_pipe as a WalkItem,
1271 stopping whenever stop_at(id) returns true. Throw MissingObject
1272 if a hash encountered is missing from the repository.
1275 # Maintain the pending stack on the heap to avoid stack overflow
1276 pending = [(id, [], [], None)]
1278 id, parent_path, chunk_path, mode = pending.pop()
1279 if stop_at and stop_at(id):
1282 item_it = cat_pipe.get(id) # FIXME: use include_data
1283 type = item_it.next()
1284 if type not in ('blob', 'commit', 'tree'):
1285 raise Exception('unexpected repository object type %r' % type)
1287 # FIXME: set the mode based on the type when the mode is None
1288 if type == 'blob' and not include_data:
1289 # Dump data until we can ask cat_pipe not to fetch it
1290 for ignored in item_it:
1294 data = ''.join(item_it)
1296 yield WalkItem(id=id, type=type,
1297 chunk_path=chunk_path, path=parent_path,
1299 data=(data if include_data else None))
1301 if type == 'commit':
1302 commit_items = parse_commit(data)
1303 for pid in commit_items.parents:
1304 pending.append((pid, parent_path, chunk_path, mode))
1305 pending.append((commit_items.tree, parent_path, chunk_path,
1306 hashsplit.GIT_MODE_TREE))
1307 elif type == 'tree':
1308 for mode, name, ent_id in tree_decode(data):
1309 demangled, bup_type = demangle_name(name, mode)
1311 sub_path = parent_path
1312 sub_chunk_path = chunk_path + [name]
1314 sub_path = parent_path + [name]
1315 if bup_type == BUP_CHUNKED:
1316 sub_chunk_path = ['']
1318 sub_chunk_path = chunk_path
1319 pending.append((ent_id.encode('hex'), sub_path, sub_chunk_path,