1 """Git interaction library.
2 bup repositories are in Git format. This library allows us to
3 interact with the Git data structures.
6 import errno, os, sys, zlib, time, subprocess, struct, stat, re, tempfile, glob
7 from collections import namedtuple
8 from itertools import islice
10 from bup import _helpers, path, midx, bloom, xstat
11 from bup.helpers import (Sha1, add_error, chunkyreader, debug1, debug2,
13 hostname, log, merge_iter, mmap_read, mmap_readwrite,
14 progress, qprogress, unlink, username, userfullname,
18 max_pack_size = 1000*1000*1000 # larger packs will slow down pruning
19 max_pack_objects = 200*1000 # cache memory usage is about 83 bytes per object
25 _typemap = { 'blob':3, 'tree':2, 'commit':1, 'tag':4 }
26 _typermap = { 3:'blob', 2:'tree', 1:'commit', 4:'tag' }
32 class GitError(Exception):
36 def parse_tz_offset(s):
37 """UTC offset in seconds."""
38 tz_off = (int(s[1:3]) * 60 * 60) + (int(s[3:5]) * 60)
44 # FIXME: derived from http://git.rsbx.net/Documents/Git_Data_Formats.txt
45 # Make sure that's authoritative.
46 _start_end_char = r'[^ .,:;<>"\'\0\n]'
47 _content_char = r'[^\0\n<>]'
48 _safe_str_rx = '(?:%s{1,2}|(?:%s%s*%s))' \
50 _start_end_char, _content_char, _start_end_char)
51 _tz_rx = r'[-+]\d\d[0-5]\d'
52 _parent_rx = r'(?:parent [abcdefABCDEF0123456789]{40}\n)'
53 _commit_rx = re.compile(r'''tree (?P<tree>[abcdefABCDEF0123456789]{40})
54 (?P<parents>%s*)author (?P<author_name>%s) <(?P<author_mail>%s)> (?P<asec>\d+) (?P<atz>%s)
55 committer (?P<committer_name>%s) <(?P<committer_mail>%s)> (?P<csec>\d+) (?P<ctz>%s)
57 (?P<message>(?:.|\n)*)''' % (_parent_rx,
58 _safe_str_rx, _safe_str_rx, _tz_rx,
59 _safe_str_rx, _safe_str_rx, _tz_rx))
60 _parent_hash_rx = re.compile(r'\s*parent ([abcdefABCDEF0123456789]{40})\s*')
63 # Note that the author_sec and committer_sec values are (UTC) epoch seconds.
64 CommitInfo = namedtuple('CommitInfo', ['tree', 'parents',
65 'author_name', 'author_mail',
66 'author_sec', 'author_offset',
67 'committer_name', 'committer_mail',
68 'committer_sec', 'committer_offset',
71 def parse_commit(content):
72 commit_match = re.match(_commit_rx, content)
74 raise Exception('cannot parse commit %r' % content)
75 matches = commit_match.groupdict()
76 return CommitInfo(tree=matches['tree'],
77 parents=re.findall(_parent_hash_rx, matches['parents']),
78 author_name=matches['author_name'],
79 author_mail=matches['author_mail'],
80 author_sec=int(matches['asec']),
81 author_offset=parse_tz_offset(matches['atz']),
82 committer_name=matches['committer_name'],
83 committer_mail=matches['committer_mail'],
84 committer_sec=int(matches['csec']),
85 committer_offset=parse_tz_offset(matches['ctz']),
86 message=matches['message'])
89 def get_commit_items(id, cp):
90 commit_it = cp.get(id)
91 assert(commit_it.next() == 'commit')
92 commit_content = ''.join(commit_it)
93 return parse_commit(commit_content)
96 def repo(sub = '', repo_dir=None):
97 """Get the path to the git repository or one of its subdirectories."""
99 repo_dir = repo_dir or repodir
101 raise GitError('You should call check_repo_or_die()')
103 # If there's a .git subdirectory, then the actual repo is in there.
104 gd = os.path.join(repo_dir, '.git')
105 if os.path.exists(gd):
108 return os.path.join(repo_dir, sub)
112 return re.sub(r'([^0-9a-z]|\b)([0-9a-z]{7})[0-9a-z]{33}([^0-9a-z]|\b)',
117 full = os.path.abspath(path)
118 fullrepo = os.path.abspath(repo(''))
119 if not fullrepo.endswith('/'):
121 if full.startswith(fullrepo):
122 path = full[len(fullrepo):]
123 if path.startswith('index-cache/'):
124 path = path[len('index-cache/'):]
125 return shorten_hash(path)
129 paths = [repo('objects/pack')]
130 paths += glob.glob(repo('index-cache/*/.'))
134 def auto_midx(objdir):
135 args = [path.exe(), 'midx', '--auto', '--dir', objdir]
137 rv = subprocess.call(args, stdout=open('/dev/null', 'w'))
139 # make sure 'args' gets printed to help with debugging
140 add_error('%r: exception: %s' % (args, e))
143 add_error('%r: returned %d' % (args, rv))
145 args = [path.exe(), 'bloom', '--dir', objdir]
147 rv = subprocess.call(args, stdout=open('/dev/null', 'w'))
149 # make sure 'args' gets printed to help with debugging
150 add_error('%r: exception: %s' % (args, e))
153 add_error('%r: returned %d' % (args, rv))
156 def mangle_name(name, mode, gitmode):
157 """Mangle a file name to present an abstract name for segmented files.
158 Mangled file names will have the ".bup" extension added to them. If a
159 file's name already ends with ".bup", a ".bupl" extension is added to
160 disambiguate normal files from segmented ones.
162 if stat.S_ISREG(mode) and not stat.S_ISREG(gitmode):
163 assert(stat.S_ISDIR(gitmode))
165 elif name.endswith('.bup') or name[:-1].endswith('.bup'):
166 return name + '.bupl'
171 (BUP_NORMAL, BUP_CHUNKED) = (0,1)
172 def demangle_name(name, mode):
173 """Remove name mangling from a file name, if necessary.
175 The return value is a tuple (demangled_filename,mode), where mode is one of
178 * BUP_NORMAL : files that should be read as-is from the repository
179 * BUP_CHUNKED : files that were chunked and need to be reassembled
181 For more information on the name mangling algorithm, see mangle_name()
183 if name.endswith('.bupl'):
184 return (name[:-5], BUP_NORMAL)
185 elif name.endswith('.bup'):
186 return (name[:-4], BUP_CHUNKED)
187 elif name.endswith('.bupm'):
189 BUP_CHUNKED if stat.S_ISDIR(mode) else BUP_NORMAL)
191 return (name, BUP_NORMAL)
194 def calc_hash(type, content):
195 """Calculate some content's hash in the Git fashion."""
196 header = '%s %d\0' % (type, len(content))
202 def shalist_item_sort_key(ent):
203 (mode, name, id) = ent
204 assert(mode+0 == mode)
205 if stat.S_ISDIR(mode):
211 def tree_encode(shalist):
212 """Generate a git tree object from (mode,name,hash) tuples."""
213 shalist = sorted(shalist, key = shalist_item_sort_key)
215 for (mode,name,bin) in shalist:
217 assert(mode+0 == mode)
219 assert(len(bin) == 20)
220 s = '%o %s\0%s' % (mode,name,bin)
221 assert(s[0] != '0') # 0-padded octal is not acceptable in a git tree
226 def tree_decode(buf):
227 """Generate a list of (mode,name,hash) from the git tree object in buf."""
229 while ofs < len(buf):
230 z = buf.find('\0', ofs)
232 spl = buf[ofs:z].split(' ', 1)
233 assert(len(spl) == 2)
235 sha = buf[z+1:z+1+20]
237 yield (int(mode, 8), name, sha)
240 def _encode_packobj(type, content, compression_level=1):
243 szbits = (sz & 0x0f) | (_typemap[type]<<4)
246 if sz: szbits |= 0x80
252 if compression_level > 9:
253 compression_level = 9
254 elif compression_level < 0:
255 compression_level = 0
256 z = zlib.compressobj(compression_level)
258 yield z.compress(content)
262 def _encode_looseobj(type, content, compression_level=1):
263 z = zlib.compressobj(compression_level)
264 yield z.compress('%s %d\0' % (type, len(content)))
265 yield z.compress(content)
269 def _decode_looseobj(buf):
271 s = zlib.decompress(buf)
278 assert(type in _typemap)
279 assert(sz == len(content))
280 return (type, content)
283 def _decode_packobj(buf):
286 type = _typermap[(c & 0x70) >> 4]
293 sz |= (c & 0x7f) << shift
297 return (type, zlib.decompress(buf[i+1:]))
304 def find_offset(self, hash):
305 """Get the offset of an object inside the index file."""
306 idx = self._idx_from_hash(hash)
308 return self._ofs_from_idx(idx)
311 def exists(self, hash, want_source=False):
312 """Return nonempty if the object exists in this index."""
313 if hash and (self._idx_from_hash(hash) != None):
314 return want_source and os.path.basename(self.name) or True
318 return int(self.fanout[255])
320 def _idx_from_hash(self, hash):
321 global _total_searches, _total_steps
323 assert(len(hash) == 20)
325 start = self.fanout[b1-1] # range -1..254
326 end = self.fanout[b1] # range 0..255
328 _total_steps += 1 # lookup table is a step
331 mid = start + (end-start)/2
332 v = self._idx_to_hash(mid)
342 class PackIdxV1(PackIdx):
343 """Object representation of a Git pack index (version 1) file."""
344 def __init__(self, filename, f):
346 self.idxnames = [self.name]
347 self.map = mmap_read(f)
348 self.fanout = list(struct.unpack('!256I',
349 str(buffer(self.map, 0, 256*4))))
350 self.fanout.append(0) # entry "-1"
351 nsha = self.fanout[255]
353 self.shatable = buffer(self.map, self.sha_ofs, nsha*24)
355 def _ofs_from_idx(self, idx):
356 return struct.unpack('!I', str(self.shatable[idx*24 : idx*24+4]))[0]
358 def _idx_to_hash(self, idx):
359 return str(self.shatable[idx*24+4 : idx*24+24])
362 for i in xrange(self.fanout[255]):
363 yield buffer(self.map, 256*4 + 24*i + 4, 20)
366 class PackIdxV2(PackIdx):
367 """Object representation of a Git pack index (version 2) file."""
368 def __init__(self, filename, f):
370 self.idxnames = [self.name]
371 self.map = mmap_read(f)
372 assert(str(self.map[0:8]) == '\377tOc\0\0\0\2')
373 self.fanout = list(struct.unpack('!256I',
374 str(buffer(self.map, 8, 256*4))))
375 self.fanout.append(0) # entry "-1"
376 nsha = self.fanout[255]
377 self.sha_ofs = 8 + 256*4
378 self.shatable = buffer(self.map, self.sha_ofs, nsha*20)
379 self.ofstable = buffer(self.map,
380 self.sha_ofs + nsha*20 + nsha*4,
382 self.ofs64table = buffer(self.map,
383 8 + 256*4 + nsha*20 + nsha*4 + nsha*4)
385 def _ofs_from_idx(self, idx):
386 ofs = struct.unpack('!I', str(buffer(self.ofstable, idx*4, 4)))[0]
388 idx64 = ofs & 0x7fffffff
389 ofs = struct.unpack('!Q',
390 str(buffer(self.ofs64table, idx64*8, 8)))[0]
393 def _idx_to_hash(self, idx):
394 return str(self.shatable[idx*20:(idx+1)*20])
397 for i in xrange(self.fanout[255]):
398 yield buffer(self.map, 8 + 256*4 + 20*i, 20)
403 def __init__(self, dir):
405 assert(_mpi_count == 0) # these things suck tons of VM; don't waste it
410 self.do_bloom = False
417 assert(_mpi_count == 0)
420 return iter(idxmerge(self.packs))
423 return sum(len(pack) for pack in self.packs)
425 def exists(self, hash, want_source=False):
426 """Return nonempty if the object exists in the index files."""
427 global _total_searches
429 if hash in self.also:
431 if self.do_bloom and self.bloom:
432 if self.bloom.exists(hash):
433 self.do_bloom = False
435 _total_searches -= 1 # was counted by bloom
437 for i in xrange(len(self.packs)):
439 _total_searches -= 1 # will be incremented by sub-pack
440 ix = p.exists(hash, want_source=want_source)
442 # reorder so most recently used packs are searched first
443 self.packs = [p] + self.packs[:i] + self.packs[i+1:]
448 def refresh(self, skip_midx = False):
449 """Refresh the index list.
450 This method verifies if .midx files were superseded (e.g. all of its
451 contents are in another, bigger .midx file) and removes the superseded
454 If skip_midx is True, all work on .midx files will be skipped and .midx
455 files will be removed from the list.
457 The module-global variable 'ignore_midx' can force this function to
458 always act as if skip_midx was True.
460 self.bloom = None # Always reopen the bloom as it may have been relaced
461 self.do_bloom = False
462 skip_midx = skip_midx or ignore_midx
463 d = dict((p.name, p) for p in self.packs
464 if not skip_midx or not isinstance(p, midx.PackMidx))
465 if os.path.exists(self.dir):
468 for ix in self.packs:
469 if isinstance(ix, midx.PackMidx):
470 for name in ix.idxnames:
471 d[os.path.join(self.dir, name)] = ix
472 for full in glob.glob(os.path.join(self.dir,'*.midx')):
474 mx = midx.PackMidx(full)
475 (mxd, mxf) = os.path.split(mx.name)
477 for n in mx.idxnames:
478 if not os.path.exists(os.path.join(mxd, n)):
479 log(('warning: index %s missing\n' +
480 ' used by %s\n') % (n, mxf))
488 midxl.sort(key=lambda ix:
489 (-len(ix), -xstat.stat(ix.name).st_mtime))
492 for sub in ix.idxnames:
493 found = d.get(os.path.join(self.dir, sub))
494 if not found or isinstance(found, PackIdx):
495 # doesn't exist, or exists but not in a midx
500 for name in ix.idxnames:
501 d[os.path.join(self.dir, name)] = ix
502 elif not ix.force_keep:
503 debug1('midx: removing redundant: %s\n'
504 % os.path.basename(ix.name))
507 for full in glob.glob(os.path.join(self.dir,'*.idx')):
511 except GitError as e:
515 bfull = os.path.join(self.dir, 'bup.bloom')
516 if self.bloom is None and os.path.exists(bfull):
517 self.bloom = bloom.ShaBloom(bfull)
518 self.packs = list(set(d.values()))
519 self.packs.sort(lambda x,y: -cmp(len(x),len(y)))
520 if self.bloom and self.bloom.valid() and len(self.bloom) >= len(self):
524 debug1('PackIdxList: using %d index%s.\n'
525 % (len(self.packs), len(self.packs)!=1 and 'es' or ''))
528 """Insert an additional object in the list."""
532 def open_idx(filename):
533 if filename.endswith('.idx'):
534 f = open(filename, 'rb')
536 if header[0:4] == '\377tOc':
537 version = struct.unpack('!I', header[4:8])[0]
539 return PackIdxV2(filename, f)
541 raise GitError('%s: expected idx file version 2, got %d'
542 % (filename, version))
543 elif len(header) == 8 and header[0:4] < '\377tOc':
544 return PackIdxV1(filename, f)
546 raise GitError('%s: unrecognized idx file header' % filename)
547 elif filename.endswith('.midx'):
548 return midx.PackMidx(filename)
550 raise GitError('idx filenames must end with .idx or .midx')
553 def idxmerge(idxlist, final_progress=True):
554 """Generate a list of all the objects reachable in a PackIdxList."""
555 def pfunc(count, total):
556 qprogress('Reading indexes: %.2f%% (%d/%d)\r'
557 % (count*100.0/total, count, total))
558 def pfinal(count, total):
560 progress('Reading indexes: %.2f%% (%d/%d), done.\n'
561 % (100, total, total))
562 return merge_iter(idxlist, 10024, pfunc, pfinal)
565 def _make_objcache():
566 return PackIdxList(repo('objects/pack'))
569 """Writes Git objects inside a pack file."""
570 def __init__(self, objcache_maker=_make_objcache, compression_level=1):
577 self.objcache_maker = objcache_maker
579 self.compression_level = compression_level
586 objdir = dir=repo('objects')
587 fd, name = tempfile.mkstemp(suffix='.pack', dir=objdir)
589 self.file = os.fdopen(fd, 'w+b')
594 self.parentfd = os.open(objdir, os.O_RDONLY)
600 assert(name.endswith('.pack'))
601 self.filename = name[:-5]
602 self.file.write('PACK\0\0\0\2\0\0\0\0')
603 self.idx = list(list() for i in xrange(256))
605 def _raw_write(self, datalist, sha):
608 # in case we get interrupted (eg. KeyboardInterrupt), it's best if
609 # the file never has a *partial* blob. So let's make sure it's
610 # all-or-nothing. (The blob shouldn't be very big anyway, thanks
611 # to our hashsplit algorithm.) f.write() does its own buffering,
612 # but that's okay because we'll flush it in _end().
613 oneblob = ''.join(datalist)
617 raise GitError, e, sys.exc_info()[2]
619 crc = zlib.crc32(oneblob) & 0xffffffff
620 self._update_idx(sha, crc, nw)
625 def _update_idx(self, sha, crc, size):
628 self.idx[ord(sha[0])].append((sha, crc, self.file.tell() - size))
630 def _write(self, sha, type, content):
634 sha = calc_hash(type, content)
635 size, crc = self._raw_write(_encode_packobj(type, content,
636 self.compression_level),
638 if self.outbytes >= max_pack_size or self.count >= max_pack_objects:
642 def breakpoint(self):
643 """Clear byte and object counts and return the last processed id."""
645 self.outbytes = self.count = 0
648 def _require_objcache(self):
649 if self.objcache is None and self.objcache_maker:
650 self.objcache = self.objcache_maker()
651 if self.objcache is None:
653 "PackWriter not opened or can't check exists w/o objcache")
655 def exists(self, id, want_source=False):
656 """Return non-empty if an object is found in the object cache."""
657 self._require_objcache()
658 return self.objcache.exists(id, want_source=want_source)
660 def maybe_write(self, type, content):
661 """Write an object to the pack file if not present and return its id."""
662 sha = calc_hash(type, content)
663 if not self.exists(sha):
664 self._write(sha, type, content)
665 self._require_objcache()
666 self.objcache.add(sha)
669 def new_blob(self, blob):
670 """Create a blob object in the pack with the supplied content."""
671 return self.maybe_write('blob', blob)
673 def new_tree(self, shalist):
674 """Create a tree object in the pack."""
675 content = tree_encode(shalist)
676 return self.maybe_write('tree', content)
678 def _new_commit(self, tree, parent, author, adate, committer, cdate, msg):
680 if tree: l.append('tree %s' % tree.encode('hex'))
681 if parent: l.append('parent %s' % parent.encode('hex'))
682 if author: l.append('author %s %s' % (author, _git_date(adate)))
683 if committer: l.append('committer %s %s' % (committer, _git_date(cdate)))
686 return self.maybe_write('commit', '\n'.join(l))
688 def new_commit(self, parent, tree, date, msg):
689 """Create a commit object in the pack."""
690 userline = '%s <%s@%s>' % (userfullname(), username(), hostname())
691 commit = self._new_commit(tree, parent,
692 userline, date, userline, date,
697 """Remove the pack file from disk."""
706 os.unlink(self.filename + '.pack')
713 def _end(self, run_midx=True):
715 if not f: return None
722 # update object count
724 cp = struct.pack('!i', self.count)
728 # calculate the pack sha1sum
731 for b in chunkyreader(f):
733 packbin = sum.digest()
735 fdatasync(f.fileno())
739 obj_list_sha = self._write_pack_idx_v2(self.filename + '.idx', idx, packbin)
741 nameprefix = repo('objects/pack/pack-%s' % obj_list_sha)
742 if os.path.exists(self.filename + '.map'):
743 os.unlink(self.filename + '.map')
744 os.rename(self.filename + '.pack', nameprefix + '.pack')
745 os.rename(self.filename + '.idx', nameprefix + '.idx')
747 os.fsync(self.parentfd)
749 os.close(self.parentfd)
752 auto_midx(repo('objects/pack'))
755 def close(self, run_midx=True):
756 """Close the pack file and move it to its definitive path."""
757 return self._end(run_midx=run_midx)
759 def _write_pack_idx_v2(self, filename, idx, packbin):
762 for entry in section:
763 if entry[2] >= 2**31:
766 # Length: header + fan-out + shas-and-crcs + overflow-offsets
767 index_len = 8 + (4 * 256) + (28 * self.count) + (8 * ofs64_count)
769 idx_f = open(filename, 'w+b')
771 idx_f.truncate(index_len)
772 fdatasync(idx_f.fileno())
773 idx_map = mmap_readwrite(idx_f, close=False)
775 count = _helpers.write_idx(filename, idx_map, idx, self.count)
776 assert(count == self.count)
783 idx_f = open(filename, 'a+b')
788 b = idx_f.read(8 + 4*256)
791 obj_list_sum = Sha1()
792 for b in chunkyreader(idx_f, 20*self.count):
794 obj_list_sum.update(b)
795 namebase = obj_list_sum.hexdigest()
797 for b in chunkyreader(idx_f):
799 idx_f.write(idx_sum.digest())
800 fdatasync(idx_f.fileno())
807 return '%d %s' % (date, utc_offset_str(date))
810 def _gitenv(repo_dir = None):
814 os.environ['GIT_DIR'] = os.path.abspath(repo_dir)
818 def list_refs(refname=None, repo_dir=None,
819 limit_to_heads=False, limit_to_tags=False):
820 """Yield (refname, hash) tuples for all repository refs unless a ref
821 name is specified. Given a ref name, only include tuples for that
822 particular ref. The limits restrict the result items to
823 refs/heads or refs/tags. If both limits are specified, items from
824 both sources will be included.
827 argv = ['git', 'show-ref']
829 argv.append('--heads')
831 argv.append('--tags')
835 p = subprocess.Popen(argv,
836 preexec_fn = _gitenv(repo_dir),
837 stdout = subprocess.PIPE)
838 out = p.stdout.read().strip()
839 rv = p.wait() # not fatal
843 for d in out.split('\n'):
844 (sha, name) = d.split(' ', 1)
845 yield (name, sha.decode('hex'))
848 def read_ref(refname, repo_dir = None):
849 """Get the commit id of the most recent commit made on a given ref."""
850 refs = list_refs(refname, repo_dir=repo_dir, limit_to_heads=True)
851 l = tuple(islice(refs, 2))
859 def rev_list(ref, count=None, repo_dir=None):
860 """Generate a list of reachable commits in reverse chronological order.
862 This generator walks through commits, from child to parent, that are
863 reachable via the specified ref and yields a series of tuples of the form
866 If count is a non-zero integer, limit the number of commits to "count"
869 assert(not ref.startswith('-'))
872 opts += ['-n', str(atoi(count))]
873 argv = ['git', 'rev-list', '--pretty=format:%at'] + opts + [ref, '--']
874 p = subprocess.Popen(argv,
875 preexec_fn = _gitenv(repo_dir),
876 stdout = subprocess.PIPE)
880 if s.startswith('commit '):
881 commit = s[7:].decode('hex')
885 rv = p.wait() # not fatal
887 raise GitError, 'git rev-list returned error %d' % rv
890 def get_commit_dates(refs, repo_dir=None):
891 """Get the dates for the specified commit refs. For now, every unique
892 string in refs must resolve to a different commit or this
893 function will fail."""
896 commit = get_commit_items(ref, cp(repo_dir))
897 result.append(commit.author_sec)
901 def rev_parse(committish, repo_dir=None):
902 """Resolve the full hash for 'committish', if it exists.
904 Should be roughly equivalent to 'git rev-parse'.
906 Returns the hex value of the hash if it is found, None if 'committish' does
907 not correspond to anything.
909 head = read_ref(committish, repo_dir=repo_dir)
911 debug2("resolved from ref: commit = %s\n" % head.encode('hex'))
914 pL = PackIdxList(repo('objects/pack', repo_dir=repo_dir))
916 if len(committish) == 40:
918 hash = committish.decode('hex')
928 def update_ref(refname, newval, oldval, repo_dir=None):
929 """Update a repository reference."""
932 assert(refname.startswith('refs/heads/') \
933 or refname.startswith('refs/tags/'))
934 p = subprocess.Popen(['git', 'update-ref', refname,
935 newval.encode('hex'), oldval.encode('hex')],
936 preexec_fn = _gitenv(repo_dir))
937 _git_wait('git update-ref', p)
940 def delete_ref(refname):
941 """Delete a repository reference."""
942 assert(refname.startswith('refs/'))
943 p = subprocess.Popen(['git', 'update-ref', '-d', refname],
944 preexec_fn = _gitenv())
945 _git_wait('git update-ref', p)
948 def guess_repo(path=None):
949 """Set the path value in the global variable "repodir".
950 This makes bup look for an existing bup repository, but not fail if a
951 repository doesn't exist. Usually, if you are interacting with a bup
952 repository, you would not be calling this function but using
959 repodir = os.environ.get('BUP_DIR')
961 repodir = os.path.expanduser('~/.bup')
964 def init_repo(path=None):
965 """Create the Git bare repository for bup in a given path."""
967 d = repo() # appends a / to the path
968 parent = os.path.dirname(os.path.dirname(d))
969 if parent and not os.path.exists(parent):
970 raise GitError('parent directory "%s" does not exist\n' % parent)
971 if os.path.exists(d) and not os.path.isdir(os.path.join(d, '.')):
972 raise GitError('"%s" exists but is not a directory\n' % d)
973 p = subprocess.Popen(['git', '--bare', 'init'], stdout=sys.stderr,
974 preexec_fn = _gitenv())
975 _git_wait('git init', p)
976 # Force the index version configuration in order to ensure bup works
977 # regardless of the version of the installed Git binary.
978 p = subprocess.Popen(['git', 'config', 'pack.indexVersion', '2'],
979 stdout=sys.stderr, preexec_fn = _gitenv())
980 _git_wait('git config', p)
982 p = subprocess.Popen(['git', 'config', 'core.logAllRefUpdates', 'true'],
983 stdout=sys.stderr, preexec_fn = _gitenv())
984 _git_wait('git config', p)
987 def check_repo_or_die(path=None):
988 """Make sure a bup repository exists, and abort if not.
989 If the path to a particular repository was not specified, this function
990 initializes the default repository automatically.
994 os.stat(repo('objects/pack/.'))
996 if e.errno == errno.ENOENT:
997 log('error: %r is not a bup repository; run "bup init"\n'
1001 log('error: %s\n' % e)
1007 """Get Git's version and ensure a usable version is installed.
1009 The returned version is formatted as an ordered tuple with each position
1010 representing a digit in the version tag. For example, the following tuple
1011 would represent version 1.6.6.9:
1013 ('1', '6', '6', '9')
1017 p = subprocess.Popen(['git', '--version'],
1018 stdout=subprocess.PIPE)
1019 gvs = p.stdout.read()
1020 _git_wait('git --version', p)
1021 m = re.match(r'git version (\S+.\S+)', gvs)
1023 raise GitError('git --version weird output: %r' % gvs)
1024 _ver = tuple(m.group(1).split('.'))
1025 needed = ('1','5', '3', '1')
1027 raise GitError('git version %s or higher is required; you have %s'
1028 % ('.'.join(needed), '.'.join(_ver)))
1032 def _git_wait(cmd, p):
1035 raise GitError('%s returned %d' % (cmd, rv))
1038 def _git_capture(argv):
1039 p = subprocess.Popen(argv, stdout=subprocess.PIPE, preexec_fn = _gitenv())
1041 _git_wait(repr(argv), p)
1045 class _AbortableIter:
1046 def __init__(self, it, onabort = None):
1048 self.onabort = onabort
1056 return self.it.next()
1057 except StopIteration as e:
1065 """Abort iteration and call the abortion callback, if needed."""
1077 """Link to 'git cat-file' that is used to retrieve blob data."""
1078 def __init__(self, repo_dir = None):
1080 self.repo_dir = repo_dir
1081 wanted = ('1','5','6')
1084 log('warning: git version < %s; bup will be slow.\n'
1087 self.get = self._slow_get
1089 self.p = self.inprogress = None
1090 self.get = self._fast_get
1094 self.p.stdout.close()
1095 self.p.stdin.close()
1097 self.inprogress = None
1101 self.p = subprocess.Popen(['git', 'cat-file', '--batch'],
1102 stdin=subprocess.PIPE,
1103 stdout=subprocess.PIPE,
1106 preexec_fn = _gitenv(self.repo_dir))
1108 def _fast_get(self, id):
1109 if not self.p or self.p.poll() != None:
1112 poll_result = self.p.poll()
1113 assert(poll_result == None)
1115 log('_fast_get: opening %r while %r is open\n'
1116 % (id, self.inprogress))
1117 assert(not self.inprogress)
1118 assert(id.find('\n') < 0)
1119 assert(id.find('\r') < 0)
1120 assert(not id.startswith('-'))
1121 self.inprogress = id
1122 self.p.stdin.write('%s\n' % id)
1123 self.p.stdin.flush()
1124 hdr = self.p.stdout.readline()
1125 if hdr.endswith(' missing\n'):
1126 self.inprogress = None
1127 raise KeyError('blob %r is missing' % id)
1128 spl = hdr.split(' ')
1129 if len(spl) != 3 or len(spl[0]) != 40:
1130 raise GitError('expected blob, got %r' % spl)
1131 (hex, type, size) = spl
1133 it = _AbortableIter(chunkyreader(self.p.stdout, int(spl[2])),
1134 onabort = self._abort)
1139 readline_result = self.p.stdout.readline()
1140 assert(readline_result == '\n')
1141 self.inprogress = None
1142 except Exception as e:
1146 def _slow_get(self, id):
1147 assert(id.find('\n') < 0)
1148 assert(id.find('\r') < 0)
1149 assert(id[0] != '-')
1150 type = _git_capture(['git', 'cat-file', '-t', id]).strip()
1153 p = subprocess.Popen(['git', 'cat-file', type, id],
1154 stdout=subprocess.PIPE,
1155 preexec_fn = _gitenv(self.repo_dir))
1156 for blob in chunkyreader(p.stdout):
1158 _git_wait('git cat-file', p)
1160 def _join(self, it):
1165 elif type == 'tree':
1166 treefile = ''.join(it)
1167 for (mode, name, sha) in tree_decode(treefile):
1168 for blob in self.join(sha.encode('hex')):
1170 elif type == 'commit':
1171 treeline = ''.join(it).split('\n')[0]
1172 assert(treeline.startswith('tree '))
1173 for blob in self.join(treeline[5:]):
1176 raise GitError('invalid object type %r: expected blob/tree/commit'
1180 """Generate a list of the content of all blobs that can be reached
1181 from an object. The hash given in 'id' must point to a blob, a tree
1182 or a commit. The content of all blobs that can be seen from trees or
1183 commits will be added to the list.
1186 for d in self._join(self.get(id)):
1188 except StopIteration:
1194 def cp(repo_dir=None):
1195 """Create a CatPipe object or reuse the already existing one."""
1199 repo_dir = os.path.abspath(repo_dir)
1200 cp = _cp.get(repo_dir)
1202 cp = CatPipe(repo_dir)
1207 def tags(repo_dir = None):
1208 """Return a dictionary of all tags in the form {hash: [tag_names, ...]}."""
1210 for n, c in list_refs(repo_dir = repo_dir, limit_to_tags=True):
1211 assert(n.startswith('refs/tags/'))
1215 tags[c].append(name) # more than one tag can point at 'c'