1 """Git interaction library.
2 bup repositories are in Git format. This library allows us to
3 interact with the Git data structures.
6 import errno, os, sys, zlib, time, subprocess, struct, stat, re, tempfile, glob
7 from collections import namedtuple
8 from itertools import islice
10 from bup import _helpers, hashsplit, path, midx, bloom, xstat
11 from bup.helpers import (Sha1, add_error, chunkyreader, debug1, debug2,
13 hostname, localtime, log, merge_iter,
14 mmap_read, mmap_readwrite,
15 progress, qprogress, unlink, username, userfullname,
19 max_pack_size = 1000*1000*1000 # larger packs will slow down pruning
20 max_pack_objects = 200*1000 # cache memory usage is about 83 bytes per object
26 _typemap = { 'blob':3, 'tree':2, 'commit':1, 'tag':4 }
27 _typermap = { 3:'blob', 2:'tree', 1:'commit', 4:'tag' }
33 class GitError(Exception):
37 def parse_tz_offset(s):
38 """UTC offset in seconds."""
39 tz_off = (int(s[1:3]) * 60 * 60) + (int(s[3:5]) * 60)
45 # FIXME: derived from http://git.rsbx.net/Documents/Git_Data_Formats.txt
46 # Make sure that's authoritative.
47 _start_end_char = r'[^ .,:;<>"\'\0\n]'
48 _content_char = r'[^\0\n<>]'
49 _safe_str_rx = '(?:%s{1,2}|(?:%s%s*%s))' \
51 _start_end_char, _content_char, _start_end_char)
52 _tz_rx = r'[-+]\d\d[0-5]\d'
53 _parent_rx = r'(?:parent [abcdefABCDEF0123456789]{40}\n)'
54 _commit_rx = re.compile(r'''tree (?P<tree>[abcdefABCDEF0123456789]{40})
55 (?P<parents>%s*)author (?P<author_name>%s) <(?P<author_mail>%s)> (?P<asec>\d+) (?P<atz>%s)
56 committer (?P<committer_name>%s) <(?P<committer_mail>%s)> (?P<csec>\d+) (?P<ctz>%s)
58 (?P<message>(?:.|\n)*)''' % (_parent_rx,
59 _safe_str_rx, _safe_str_rx, _tz_rx,
60 _safe_str_rx, _safe_str_rx, _tz_rx))
61 _parent_hash_rx = re.compile(r'\s*parent ([abcdefABCDEF0123456789]{40})\s*')
64 # Note that the author_sec and committer_sec values are (UTC) epoch seconds.
65 CommitInfo = namedtuple('CommitInfo', ['tree', 'parents',
66 'author_name', 'author_mail',
67 'author_sec', 'author_offset',
68 'committer_name', 'committer_mail',
69 'committer_sec', 'committer_offset',
72 def parse_commit(content):
73 commit_match = re.match(_commit_rx, content)
75 raise Exception('cannot parse commit %r' % content)
76 matches = commit_match.groupdict()
77 return CommitInfo(tree=matches['tree'],
78 parents=re.findall(_parent_hash_rx, matches['parents']),
79 author_name=matches['author_name'],
80 author_mail=matches['author_mail'],
81 author_sec=int(matches['asec']),
82 author_offset=parse_tz_offset(matches['atz']),
83 committer_name=matches['committer_name'],
84 committer_mail=matches['committer_mail'],
85 committer_sec=int(matches['csec']),
86 committer_offset=parse_tz_offset(matches['ctz']),
87 message=matches['message'])
90 def get_commit_items(id, cp):
91 commit_it = cp.get(id)
92 assert(commit_it.next() == 'commit')
93 commit_content = ''.join(commit_it)
94 return parse_commit(commit_content)
97 def _local_git_date_str(epoch_sec):
98 return '%d %s' % (epoch_sec, utc_offset_str(epoch_sec))
101 def _git_date_str(epoch_sec, tz_offset_sec):
102 offs = tz_offset_sec // 60
103 return '%d %s%02d%02d' \
105 '+' if offs >= 0 else '-',
110 def repo(sub = '', repo_dir=None):
111 """Get the path to the git repository or one of its subdirectories."""
113 repo_dir = repo_dir or repodir
115 raise GitError('You should call check_repo_or_die()')
117 # If there's a .git subdirectory, then the actual repo is in there.
118 gd = os.path.join(repo_dir, '.git')
119 if os.path.exists(gd):
122 return os.path.join(repo_dir, sub)
126 return re.sub(r'([^0-9a-z]|\b)([0-9a-z]{7})[0-9a-z]{33}([^0-9a-z]|\b)',
131 full = os.path.abspath(path)
132 fullrepo = os.path.abspath(repo(''))
133 if not fullrepo.endswith('/'):
135 if full.startswith(fullrepo):
136 path = full[len(fullrepo):]
137 if path.startswith('index-cache/'):
138 path = path[len('index-cache/'):]
139 return shorten_hash(path)
143 paths = [repo('objects/pack')]
144 paths += glob.glob(repo('index-cache/*/.'))
148 def auto_midx(objdir):
149 args = [path.exe(), 'midx', '--auto', '--dir', objdir]
151 rv = subprocess.call(args, stdout=open('/dev/null', 'w'))
153 # make sure 'args' gets printed to help with debugging
154 add_error('%r: exception: %s' % (args, e))
157 add_error('%r: returned %d' % (args, rv))
159 args = [path.exe(), 'bloom', '--dir', objdir]
161 rv = subprocess.call(args, stdout=open('/dev/null', 'w'))
163 # make sure 'args' gets printed to help with debugging
164 add_error('%r: exception: %s' % (args, e))
167 add_error('%r: returned %d' % (args, rv))
170 def mangle_name(name, mode, gitmode):
171 """Mangle a file name to present an abstract name for segmented files.
172 Mangled file names will have the ".bup" extension added to them. If a
173 file's name already ends with ".bup", a ".bupl" extension is added to
174 disambiguate normal files from segmented ones.
176 if stat.S_ISREG(mode) and not stat.S_ISREG(gitmode):
177 assert(stat.S_ISDIR(gitmode))
179 elif name.endswith('.bup') or name[:-1].endswith('.bup'):
180 return name + '.bupl'
185 (BUP_NORMAL, BUP_CHUNKED) = (0,1)
186 def demangle_name(name, mode):
187 """Remove name mangling from a file name, if necessary.
189 The return value is a tuple (demangled_filename,mode), where mode is one of
192 * BUP_NORMAL : files that should be read as-is from the repository
193 * BUP_CHUNKED : files that were chunked and need to be reassembled
195 For more information on the name mangling algorithm, see mangle_name()
197 if name.endswith('.bupl'):
198 return (name[:-5], BUP_NORMAL)
199 elif name.endswith('.bup'):
200 return (name[:-4], BUP_CHUNKED)
201 elif name.endswith('.bupm'):
203 BUP_CHUNKED if stat.S_ISDIR(mode) else BUP_NORMAL)
205 return (name, BUP_NORMAL)
208 def calc_hash(type, content):
209 """Calculate some content's hash in the Git fashion."""
210 header = '%s %d\0' % (type, len(content))
216 def shalist_item_sort_key(ent):
217 (mode, name, id) = ent
218 assert(mode+0 == mode)
219 if stat.S_ISDIR(mode):
225 def tree_encode(shalist):
226 """Generate a git tree object from (mode,name,hash) tuples."""
227 shalist = sorted(shalist, key = shalist_item_sort_key)
229 for (mode,name,bin) in shalist:
231 assert(mode+0 == mode)
233 assert(len(bin) == 20)
234 s = '%o %s\0%s' % (mode,name,bin)
235 assert(s[0] != '0') # 0-padded octal is not acceptable in a git tree
240 def tree_decode(buf):
241 """Generate a list of (mode,name,hash) from the git tree object in buf."""
243 while ofs < len(buf):
244 z = buf.find('\0', ofs)
246 spl = buf[ofs:z].split(' ', 1)
247 assert(len(spl) == 2)
249 sha = buf[z+1:z+1+20]
251 yield (int(mode, 8), name, sha)
254 def _encode_packobj(type, content, compression_level=1):
257 szbits = (sz & 0x0f) | (_typemap[type]<<4)
260 if sz: szbits |= 0x80
266 if compression_level > 9:
267 compression_level = 9
268 elif compression_level < 0:
269 compression_level = 0
270 z = zlib.compressobj(compression_level)
272 yield z.compress(content)
276 def _encode_looseobj(type, content, compression_level=1):
277 z = zlib.compressobj(compression_level)
278 yield z.compress('%s %d\0' % (type, len(content)))
279 yield z.compress(content)
283 def _decode_looseobj(buf):
285 s = zlib.decompress(buf)
292 assert(type in _typemap)
293 assert(sz == len(content))
294 return (type, content)
297 def _decode_packobj(buf):
300 type = _typermap[(c & 0x70) >> 4]
307 sz |= (c & 0x7f) << shift
311 return (type, zlib.decompress(buf[i+1:]))
318 def find_offset(self, hash):
319 """Get the offset of an object inside the index file."""
320 idx = self._idx_from_hash(hash)
322 return self._ofs_from_idx(idx)
325 def exists(self, hash, want_source=False):
326 """Return nonempty if the object exists in this index."""
327 if hash and (self._idx_from_hash(hash) != None):
328 return want_source and os.path.basename(self.name) or True
332 return int(self.fanout[255])
334 def _idx_from_hash(self, hash):
335 global _total_searches, _total_steps
337 assert(len(hash) == 20)
339 start = self.fanout[b1-1] # range -1..254
340 end = self.fanout[b1] # range 0..255
342 _total_steps += 1 # lookup table is a step
345 mid = start + (end-start)/2
346 v = self._idx_to_hash(mid)
356 class PackIdxV1(PackIdx):
357 """Object representation of a Git pack index (version 1) file."""
358 def __init__(self, filename, f):
360 self.idxnames = [self.name]
361 self.map = mmap_read(f)
362 self.fanout = list(struct.unpack('!256I',
363 str(buffer(self.map, 0, 256*4))))
364 self.fanout.append(0) # entry "-1"
365 nsha = self.fanout[255]
367 self.shatable = buffer(self.map, self.sha_ofs, nsha*24)
369 def _ofs_from_idx(self, idx):
370 return struct.unpack('!I', str(self.shatable[idx*24 : idx*24+4]))[0]
372 def _idx_to_hash(self, idx):
373 return str(self.shatable[idx*24+4 : idx*24+24])
376 for i in xrange(self.fanout[255]):
377 yield buffer(self.map, 256*4 + 24*i + 4, 20)
380 class PackIdxV2(PackIdx):
381 """Object representation of a Git pack index (version 2) file."""
382 def __init__(self, filename, f):
384 self.idxnames = [self.name]
385 self.map = mmap_read(f)
386 assert(str(self.map[0:8]) == '\377tOc\0\0\0\2')
387 self.fanout = list(struct.unpack('!256I',
388 str(buffer(self.map, 8, 256*4))))
389 self.fanout.append(0) # entry "-1"
390 nsha = self.fanout[255]
391 self.sha_ofs = 8 + 256*4
392 self.shatable = buffer(self.map, self.sha_ofs, nsha*20)
393 self.ofstable = buffer(self.map,
394 self.sha_ofs + nsha*20 + nsha*4,
396 self.ofs64table = buffer(self.map,
397 8 + 256*4 + nsha*20 + nsha*4 + nsha*4)
399 def _ofs_from_idx(self, idx):
400 ofs = struct.unpack('!I', str(buffer(self.ofstable, idx*4, 4)))[0]
402 idx64 = ofs & 0x7fffffff
403 ofs = struct.unpack('!Q',
404 str(buffer(self.ofs64table, idx64*8, 8)))[0]
407 def _idx_to_hash(self, idx):
408 return str(self.shatable[idx*20:(idx+1)*20])
411 for i in xrange(self.fanout[255]):
412 yield buffer(self.map, 8 + 256*4 + 20*i, 20)
417 def __init__(self, dir):
419 assert(_mpi_count == 0) # these things suck tons of VM; don't waste it
424 self.do_bloom = False
431 assert(_mpi_count == 0)
434 return iter(idxmerge(self.packs))
437 return sum(len(pack) for pack in self.packs)
439 def exists(self, hash, want_source=False):
440 """Return nonempty if the object exists in the index files."""
441 global _total_searches
443 if hash in self.also:
445 if self.do_bloom and self.bloom:
446 if self.bloom.exists(hash):
447 self.do_bloom = False
449 _total_searches -= 1 # was counted by bloom
451 for i in xrange(len(self.packs)):
453 _total_searches -= 1 # will be incremented by sub-pack
454 ix = p.exists(hash, want_source=want_source)
456 # reorder so most recently used packs are searched first
457 self.packs = [p] + self.packs[:i] + self.packs[i+1:]
462 def refresh(self, skip_midx = False):
463 """Refresh the index list.
464 This method verifies if .midx files were superseded (e.g. all of its
465 contents are in another, bigger .midx file) and removes the superseded
468 If skip_midx is True, all work on .midx files will be skipped and .midx
469 files will be removed from the list.
471 The module-global variable 'ignore_midx' can force this function to
472 always act as if skip_midx was True.
474 self.bloom = None # Always reopen the bloom as it may have been relaced
475 self.do_bloom = False
476 skip_midx = skip_midx or ignore_midx
477 d = dict((p.name, p) for p in self.packs
478 if not skip_midx or not isinstance(p, midx.PackMidx))
479 if os.path.exists(self.dir):
482 for ix in self.packs:
483 if isinstance(ix, midx.PackMidx):
484 for name in ix.idxnames:
485 d[os.path.join(self.dir, name)] = ix
486 for full in glob.glob(os.path.join(self.dir,'*.midx')):
488 mx = midx.PackMidx(full)
489 (mxd, mxf) = os.path.split(mx.name)
491 for n in mx.idxnames:
492 if not os.path.exists(os.path.join(mxd, n)):
493 log(('warning: index %s missing\n' +
494 ' used by %s\n') % (n, mxf))
502 midxl.sort(key=lambda ix:
503 (-len(ix), -xstat.stat(ix.name).st_mtime))
506 for sub in ix.idxnames:
507 found = d.get(os.path.join(self.dir, sub))
508 if not found or isinstance(found, PackIdx):
509 # doesn't exist, or exists but not in a midx
514 for name in ix.idxnames:
515 d[os.path.join(self.dir, name)] = ix
516 elif not ix.force_keep:
517 debug1('midx: removing redundant: %s\n'
518 % os.path.basename(ix.name))
521 for full in glob.glob(os.path.join(self.dir,'*.idx')):
525 except GitError as e:
529 bfull = os.path.join(self.dir, 'bup.bloom')
530 if self.bloom is None and os.path.exists(bfull):
531 self.bloom = bloom.ShaBloom(bfull)
532 self.packs = list(set(d.values()))
533 self.packs.sort(lambda x,y: -cmp(len(x),len(y)))
534 if self.bloom and self.bloom.valid() and len(self.bloom) >= len(self):
538 debug1('PackIdxList: using %d index%s.\n'
539 % (len(self.packs), len(self.packs)!=1 and 'es' or ''))
542 """Insert an additional object in the list."""
546 def open_idx(filename):
547 if filename.endswith('.idx'):
548 f = open(filename, 'rb')
550 if header[0:4] == '\377tOc':
551 version = struct.unpack('!I', header[4:8])[0]
553 return PackIdxV2(filename, f)
555 raise GitError('%s: expected idx file version 2, got %d'
556 % (filename, version))
557 elif len(header) == 8 and header[0:4] < '\377tOc':
558 return PackIdxV1(filename, f)
560 raise GitError('%s: unrecognized idx file header' % filename)
561 elif filename.endswith('.midx'):
562 return midx.PackMidx(filename)
564 raise GitError('idx filenames must end with .idx or .midx')
567 def idxmerge(idxlist, final_progress=True):
568 """Generate a list of all the objects reachable in a PackIdxList."""
569 def pfunc(count, total):
570 qprogress('Reading indexes: %.2f%% (%d/%d)\r'
571 % (count*100.0/total, count, total))
572 def pfinal(count, total):
574 progress('Reading indexes: %.2f%% (%d/%d), done.\n'
575 % (100, total, total))
576 return merge_iter(idxlist, 10024, pfunc, pfinal)
579 def _make_objcache():
580 return PackIdxList(repo('objects/pack'))
583 """Writes Git objects inside a pack file."""
584 def __init__(self, objcache_maker=_make_objcache, compression_level=1):
591 self.objcache_maker = objcache_maker
593 self.compression_level = compression_level
600 objdir = dir=repo('objects')
601 fd, name = tempfile.mkstemp(suffix='.pack', dir=objdir)
603 self.file = os.fdopen(fd, 'w+b')
608 self.parentfd = os.open(objdir, os.O_RDONLY)
614 assert(name.endswith('.pack'))
615 self.filename = name[:-5]
616 self.file.write('PACK\0\0\0\2\0\0\0\0')
617 self.idx = list(list() for i in xrange(256))
619 def _raw_write(self, datalist, sha):
622 # in case we get interrupted (eg. KeyboardInterrupt), it's best if
623 # the file never has a *partial* blob. So let's make sure it's
624 # all-or-nothing. (The blob shouldn't be very big anyway, thanks
625 # to our hashsplit algorithm.) f.write() does its own buffering,
626 # but that's okay because we'll flush it in _end().
627 oneblob = ''.join(datalist)
631 raise GitError, e, sys.exc_info()[2]
633 crc = zlib.crc32(oneblob) & 0xffffffff
634 self._update_idx(sha, crc, nw)
639 def _update_idx(self, sha, crc, size):
642 self.idx[ord(sha[0])].append((sha, crc, self.file.tell() - size))
644 def _write(self, sha, type, content):
648 sha = calc_hash(type, content)
649 size, crc = self._raw_write(_encode_packobj(type, content,
650 self.compression_level),
652 if self.outbytes >= max_pack_size or self.count >= max_pack_objects:
656 def breakpoint(self):
657 """Clear byte and object counts and return the last processed id."""
659 self.outbytes = self.count = 0
662 def _require_objcache(self):
663 if self.objcache is None and self.objcache_maker:
664 self.objcache = self.objcache_maker()
665 if self.objcache is None:
667 "PackWriter not opened or can't check exists w/o objcache")
669 def exists(self, id, want_source=False):
670 """Return non-empty if an object is found in the object cache."""
671 self._require_objcache()
672 return self.objcache.exists(id, want_source=want_source)
674 def maybe_write(self, type, content):
675 """Write an object to the pack file if not present and return its id."""
676 sha = calc_hash(type, content)
677 if not self.exists(sha):
678 self._write(sha, type, content)
679 self._require_objcache()
680 self.objcache.add(sha)
683 def new_blob(self, blob):
684 """Create a blob object in the pack with the supplied content."""
685 return self.maybe_write('blob', blob)
687 def new_tree(self, shalist):
688 """Create a tree object in the pack."""
689 content = tree_encode(shalist)
690 return self.maybe_write('tree', content)
692 def new_commit(self, tree, parent,
693 author, adate_sec, adate_tz,
694 committer, cdate_sec, cdate_tz,
696 """Create a commit object in the pack. The date_sec values must be
697 epoch-seconds, and if a tz is None, the local timezone is assumed."""
699 adate_str = _git_date_str(adate_sec, adate_tz)
701 adate_str = _local_git_date_str(adate_sec)
703 cdate_str = _git_date_str(cdate_sec, cdate_tz)
705 cdate_str = _local_git_date_str(cdate_sec)
707 if tree: l.append('tree %s' % tree.encode('hex'))
708 if parent: l.append('parent %s' % parent.encode('hex'))
709 if author: l.append('author %s %s' % (author, adate_str))
710 if committer: l.append('committer %s %s' % (committer, cdate_str))
713 return self.maybe_write('commit', '\n'.join(l))
716 """Remove the pack file from disk."""
725 os.unlink(self.filename + '.pack')
732 def _end(self, run_midx=True):
734 if not f: return None
741 # update object count
743 cp = struct.pack('!i', self.count)
747 # calculate the pack sha1sum
750 for b in chunkyreader(f):
752 packbin = sum.digest()
754 fdatasync(f.fileno())
758 obj_list_sha = self._write_pack_idx_v2(self.filename + '.idx', idx, packbin)
760 nameprefix = repo('objects/pack/pack-%s' % obj_list_sha)
761 if os.path.exists(self.filename + '.map'):
762 os.unlink(self.filename + '.map')
763 os.rename(self.filename + '.pack', nameprefix + '.pack')
764 os.rename(self.filename + '.idx', nameprefix + '.idx')
766 os.fsync(self.parentfd)
768 os.close(self.parentfd)
771 auto_midx(repo('objects/pack'))
774 def close(self, run_midx=True):
775 """Close the pack file and move it to its definitive path."""
776 return self._end(run_midx=run_midx)
778 def _write_pack_idx_v2(self, filename, idx, packbin):
781 for entry in section:
782 if entry[2] >= 2**31:
785 # Length: header + fan-out + shas-and-crcs + overflow-offsets
786 index_len = 8 + (4 * 256) + (28 * self.count) + (8 * ofs64_count)
788 idx_f = open(filename, 'w+b')
790 idx_f.truncate(index_len)
791 fdatasync(idx_f.fileno())
792 idx_map = mmap_readwrite(idx_f, close=False)
794 count = _helpers.write_idx(filename, idx_map, idx, self.count)
795 assert(count == self.count)
802 idx_f = open(filename, 'a+b')
807 b = idx_f.read(8 + 4*256)
810 obj_list_sum = Sha1()
811 for b in chunkyreader(idx_f, 20*self.count):
813 obj_list_sum.update(b)
814 namebase = obj_list_sum.hexdigest()
816 for b in chunkyreader(idx_f):
818 idx_f.write(idx_sum.digest())
819 fdatasync(idx_f.fileno())
825 def _gitenv(repo_dir = None):
829 os.environ['GIT_DIR'] = os.path.abspath(repo_dir)
833 def list_refs(refname=None, repo_dir=None,
834 limit_to_heads=False, limit_to_tags=False):
835 """Yield (refname, hash) tuples for all repository refs unless a ref
836 name is specified. Given a ref name, only include tuples for that
837 particular ref. The limits restrict the result items to
838 refs/heads or refs/tags. If both limits are specified, items from
839 both sources will be included.
842 argv = ['git', 'show-ref']
844 argv.append('--heads')
846 argv.append('--tags')
850 p = subprocess.Popen(argv,
851 preexec_fn = _gitenv(repo_dir),
852 stdout = subprocess.PIPE)
853 out = p.stdout.read().strip()
854 rv = p.wait() # not fatal
858 for d in out.split('\n'):
859 (sha, name) = d.split(' ', 1)
860 yield (name, sha.decode('hex'))
863 def read_ref(refname, repo_dir = None):
864 """Get the commit id of the most recent commit made on a given ref."""
865 refs = list_refs(refname, repo_dir=repo_dir, limit_to_heads=True)
866 l = tuple(islice(refs, 2))
874 def rev_list(ref, count=None, repo_dir=None):
875 """Generate a list of reachable commits in reverse chronological order.
877 This generator walks through commits, from child to parent, that are
878 reachable via the specified ref and yields a series of tuples of the form
881 If count is a non-zero integer, limit the number of commits to "count"
884 assert(not ref.startswith('-'))
887 opts += ['-n', str(atoi(count))]
888 argv = ['git', 'rev-list', '--pretty=format:%at'] + opts + [ref, '--']
889 p = subprocess.Popen(argv,
890 preexec_fn = _gitenv(repo_dir),
891 stdout = subprocess.PIPE)
895 if s.startswith('commit '):
896 commit = s[7:].decode('hex')
900 rv = p.wait() # not fatal
902 raise GitError, 'git rev-list returned error %d' % rv
905 def get_commit_dates(refs, repo_dir=None):
906 """Get the dates for the specified commit refs. For now, every unique
907 string in refs must resolve to a different commit or this
908 function will fail."""
911 commit = get_commit_items(ref, cp(repo_dir))
912 result.append(commit.author_sec)
916 def rev_parse(committish, repo_dir=None):
917 """Resolve the full hash for 'committish', if it exists.
919 Should be roughly equivalent to 'git rev-parse'.
921 Returns the hex value of the hash if it is found, None if 'committish' does
922 not correspond to anything.
924 head = read_ref(committish, repo_dir=repo_dir)
926 debug2("resolved from ref: commit = %s\n" % head.encode('hex'))
929 pL = PackIdxList(repo('objects/pack', repo_dir=repo_dir))
931 if len(committish) == 40:
933 hash = committish.decode('hex')
943 def update_ref(refname, newval, oldval, repo_dir=None):
944 """Update a repository reference."""
947 assert(refname.startswith('refs/heads/') \
948 or refname.startswith('refs/tags/'))
949 p = subprocess.Popen(['git', 'update-ref', refname,
950 newval.encode('hex'), oldval.encode('hex')],
951 preexec_fn = _gitenv(repo_dir))
952 _git_wait('git update-ref', p)
955 def delete_ref(refname, oldvalue=None):
956 """Delete a repository reference (see git update-ref(1))."""
957 assert(refname.startswith('refs/'))
958 oldvalue = [] if not oldvalue else [oldvalue]
959 p = subprocess.Popen(['git', 'update-ref', '-d', refname] + oldvalue,
960 preexec_fn = _gitenv())
961 _git_wait('git update-ref', p)
964 def guess_repo(path=None):
965 """Set the path value in the global variable "repodir".
966 This makes bup look for an existing bup repository, but not fail if a
967 repository doesn't exist. Usually, if you are interacting with a bup
968 repository, you would not be calling this function but using
975 repodir = os.environ.get('BUP_DIR')
977 repodir = os.path.expanduser('~/.bup')
980 def init_repo(path=None):
981 """Create the Git bare repository for bup in a given path."""
983 d = repo() # appends a / to the path
984 parent = os.path.dirname(os.path.dirname(d))
985 if parent and not os.path.exists(parent):
986 raise GitError('parent directory "%s" does not exist\n' % parent)
987 if os.path.exists(d) and not os.path.isdir(os.path.join(d, '.')):
988 raise GitError('"%s" exists but is not a directory\n' % d)
989 p = subprocess.Popen(['git', '--bare', 'init'], stdout=sys.stderr,
990 preexec_fn = _gitenv())
991 _git_wait('git init', p)
992 # Force the index version configuration in order to ensure bup works
993 # regardless of the version of the installed Git binary.
994 p = subprocess.Popen(['git', 'config', 'pack.indexVersion', '2'],
995 stdout=sys.stderr, preexec_fn = _gitenv())
996 _git_wait('git config', p)
998 p = subprocess.Popen(['git', 'config', 'core.logAllRefUpdates', 'true'],
999 stdout=sys.stderr, preexec_fn = _gitenv())
1000 _git_wait('git config', p)
1003 def check_repo_or_die(path=None):
1004 """Make sure a bup repository exists, and abort if not.
1005 If the path to a particular repository was not specified, this function
1006 initializes the default repository automatically.
1010 os.stat(repo('objects/pack/.'))
1011 except OSError as e:
1012 if e.errno == errno.ENOENT:
1013 log('error: %r is not a bup repository; run "bup init"\n'
1017 log('error: %s\n' % e)
1023 """Get Git's version and ensure a usable version is installed.
1025 The returned version is formatted as an ordered tuple with each position
1026 representing a digit in the version tag. For example, the following tuple
1027 would represent version 1.6.6.9:
1029 ('1', '6', '6', '9')
1033 p = subprocess.Popen(['git', '--version'],
1034 stdout=subprocess.PIPE)
1035 gvs = p.stdout.read()
1036 _git_wait('git --version', p)
1037 m = re.match(r'git version (\S+.\S+)', gvs)
1039 raise GitError('git --version weird output: %r' % gvs)
1040 _ver = tuple(m.group(1).split('.'))
1041 needed = ('1','5', '3', '1')
1043 raise GitError('git version %s or higher is required; you have %s'
1044 % ('.'.join(needed), '.'.join(_ver)))
1048 def _git_wait(cmd, p):
1051 raise GitError('%s returned %d' % (cmd, rv))
1054 def _git_capture(argv):
1055 p = subprocess.Popen(argv, stdout=subprocess.PIPE, preexec_fn = _gitenv())
1057 _git_wait(repr(argv), p)
1061 class _AbortableIter:
1062 def __init__(self, it, onabort = None):
1064 self.onabort = onabort
1072 return self.it.next()
1073 except StopIteration as e:
1081 """Abort iteration and call the abortion callback, if needed."""
1093 """Link to 'git cat-file' that is used to retrieve blob data."""
1094 def __init__(self, repo_dir = None):
1096 self.repo_dir = repo_dir
1097 wanted = ('1','5','6')
1100 log('warning: git version < %s; bup will be slow.\n'
1103 self.get = self._slow_get
1105 self.p = self.inprogress = None
1106 self.get = self._fast_get
1110 self.p.stdout.close()
1111 self.p.stdin.close()
1113 self.inprogress = None
1117 self.p = subprocess.Popen(['git', 'cat-file', '--batch'],
1118 stdin=subprocess.PIPE,
1119 stdout=subprocess.PIPE,
1122 preexec_fn = _gitenv(self.repo_dir))
1124 def _fast_get(self, id):
1125 if not self.p or self.p.poll() != None:
1128 poll_result = self.p.poll()
1129 assert(poll_result == None)
1131 log('_fast_get: opening %r while %r is open\n'
1132 % (id, self.inprogress))
1133 assert(not self.inprogress)
1134 assert(id.find('\n') < 0)
1135 assert(id.find('\r') < 0)
1136 assert(not id.startswith('-'))
1137 self.inprogress = id
1138 self.p.stdin.write('%s\n' % id)
1139 self.p.stdin.flush()
1140 hdr = self.p.stdout.readline()
1141 if hdr.endswith(' missing\n'):
1142 self.inprogress = None
1143 raise KeyError('blob %r is missing' % id)
1144 spl = hdr.split(' ')
1145 if len(spl) != 3 or len(spl[0]) != 40:
1146 raise GitError('expected blob, got %r' % spl)
1147 (hex, type, size) = spl
1149 it = _AbortableIter(chunkyreader(self.p.stdout, int(spl[2])),
1150 onabort = self._abort)
1155 readline_result = self.p.stdout.readline()
1156 assert(readline_result == '\n')
1157 self.inprogress = None
1158 except Exception as e:
1162 def _slow_get(self, id):
1163 assert(id.find('\n') < 0)
1164 assert(id.find('\r') < 0)
1165 assert(id[0] != '-')
1166 type = _git_capture(['git', 'cat-file', '-t', id]).strip()
1169 p = subprocess.Popen(['git', 'cat-file', type, id],
1170 stdout=subprocess.PIPE,
1171 preexec_fn = _gitenv(self.repo_dir))
1172 for blob in chunkyreader(p.stdout):
1174 _git_wait('git cat-file', p)
1176 def _join(self, it):
1181 elif type == 'tree':
1182 treefile = ''.join(it)
1183 for (mode, name, sha) in tree_decode(treefile):
1184 for blob in self.join(sha.encode('hex')):
1186 elif type == 'commit':
1187 treeline = ''.join(it).split('\n')[0]
1188 assert(treeline.startswith('tree '))
1189 for blob in self.join(treeline[5:]):
1192 raise GitError('invalid object type %r: expected blob/tree/commit'
1196 """Generate a list of the content of all blobs that can be reached
1197 from an object. The hash given in 'id' must point to a blob, a tree
1198 or a commit. The content of all blobs that can be seen from trees or
1199 commits will be added to the list.
1202 for d in self._join(self.get(id)):
1204 except StopIteration:
1210 def cp(repo_dir=None):
1211 """Create a CatPipe object or reuse the already existing one."""
1215 repo_dir = os.path.abspath(repo_dir)
1216 cp = _cp.get(repo_dir)
1218 cp = CatPipe(repo_dir)
1223 def tags(repo_dir = None):
1224 """Return a dictionary of all tags in the form {hash: [tag_names, ...]}."""
1226 for n, c in list_refs(repo_dir = repo_dir, limit_to_tags=True):
1227 assert(n.startswith('refs/tags/'))
1231 tags[c].append(name) # more than one tag can point at 'c'
1235 WalkItem = namedtuple('WalkItem', ['id', 'type', 'mode',
1236 'path', 'chunk_path', 'data'])
1237 # The path is the mangled path, and if an item represents a fragment
1238 # of a chunked file, the chunk_path will be the chunked subtree path
1239 # for the chunk, i.e. ['', '2d3115e', ...]. The top-level path for a
1240 # chunked file will have a chunk_path of ['']. So some chunk subtree
1241 # of the file '/foo/bar/baz' might look like this:
1243 # item.path = ['foo', 'bar', 'baz.bup']
1244 # item.chunk_path = ['', '2d3115e', '016b097']
1245 # item.type = 'tree'
1249 def _walk_object(cat_pipe, id,
1250 parent_path, chunk_path,
1255 if stop_at and stop_at(id):
1258 item_it = cat_pipe.get(id) # FIXME: use include_data
1259 type = item_it.next()
1261 if type not in ('blob', 'commit', 'tree'):
1262 raise Exception('unexpected repository object type %r' % type)
1264 # FIXME: set the mode based on the type when the mode is None
1266 if type == 'blob' and not include_data:
1267 # Dump data until we can ask cat_pipe not to fetch it
1268 for ignored in item_it:
1272 data = ''.join(item_it)
1274 yield WalkItem(id=id, type=type,
1275 chunk_path=chunk_path, path=parent_path,
1277 data=(data if include_data else None))
1279 if type == 'commit':
1280 commit_items = parse_commit(data)
1281 tree_id = commit_items.tree
1282 for x in _walk_object(cat_pipe, tree_id, parent_path, chunk_path,
1283 mode=hashsplit.GIT_MODE_TREE,
1285 include_data=include_data):
1287 parents = commit_items.parents
1289 for x in _walk_object(cat_pipe, pid, parent_path, chunk_path,
1290 mode=mode, # Same mode as this child
1292 include_data=include_data):
1294 elif type == 'tree':
1295 for mode, name, ent_id in tree_decode(data):
1296 demangled, bup_type = demangle_name(name, mode)
1298 sub_path = parent_path
1299 sub_chunk_path = chunk_path + [name]
1301 sub_path = parent_path + [name]
1302 if bup_type == BUP_CHUNKED:
1303 sub_chunk_path = ['']
1305 sub_chunk_path = chunk_path
1306 for x in _walk_object(cat_pipe, ent_id.encode('hex'),
1307 sub_path, sub_chunk_path,
1310 include_data=include_data):
1314 def walk_object(cat_pipe, id,
1317 """Yield everything reachable from id via cat_pipe as a WalkItem,
1318 stopping whenever stop_at(id) returns true."""
1319 return _walk_object(cat_pipe, id, [], [],
1321 include_data=include_data)