1 """Git interaction library.
2 bup repositories are in Git format. This library allows us to
3 interact with the Git data structures.
6 import errno, os, sys, zlib, time, subprocess, struct, stat, re, tempfile, glob
7 from collections import namedtuple
8 from itertools import islice
10 from bup import _helpers, path, midx, bloom, xstat
11 from bup.helpers import (Sha1, add_error, chunkyreader, debug1, debug2,
12 hostname, log, merge_iter, mmap_read, mmap_readwrite,
13 progress, qprogress, unlink, username, userfullname,
17 max_pack_size = 1000*1000*1000 # larger packs will slow down pruning
18 max_pack_objects = 200*1000 # cache memory usage is about 83 bytes per object
24 _typemap = { 'blob':3, 'tree':2, 'commit':1, 'tag':4 }
25 _typermap = { 3:'blob', 2:'tree', 1:'commit', 4:'tag' }
31 class GitError(Exception):
35 def parse_tz_offset(s):
36 """UTC offset in seconds."""
37 tz_off = (int(s[1:3]) * 60 * 60) + (int(s[3:5]) * 60)
43 # FIXME: derived from http://git.rsbx.net/Documents/Git_Data_Formats.txt
44 # Make sure that's authoritative.
45 _start_end_char = r'[^ .,:;<>"\'\0\n]'
46 _content_char = r'[^\0\n<>]'
47 _safe_str_rx = '(?:%s{1,2}|(?:%s%s*%s))' \
49 _start_end_char, _content_char, _start_end_char)
50 _tz_rx = r'[-+]\d\d[0-5]\d'
51 _parent_rx = r'(?:parent [abcdefABCDEF0123456789]{40}\n)'
52 _commit_rx = re.compile(r'''tree (?P<tree>[abcdefABCDEF0123456789]{40})
53 (?P<parents>%s*)author (?P<author_name>%s) <(?P<author_mail>%s)> (?P<asec>\d+) (?P<atz>%s)
54 committer (?P<committer_name>%s) <(?P<committer_mail>%s)> (?P<csec>\d+) (?P<ctz>%s)
56 (?P<message>(?:.|\n)*)''' % (_parent_rx,
57 _safe_str_rx, _safe_str_rx, _tz_rx,
58 _safe_str_rx, _safe_str_rx, _tz_rx))
59 _parent_hash_rx = re.compile(r'\s*parent ([abcdefABCDEF0123456789]{40})\s*')
62 # Note that the author_sec and committer_sec values are (UTC) epoch seconds.
63 CommitInfo = namedtuple('CommitInfo', ['tree', 'parents',
64 'author_name', 'author_mail',
65 'author_sec', 'author_offset',
66 'committer_name', 'committer_mail',
67 'committer_sec', 'committer_offset',
70 def parse_commit(content):
71 commit_match = re.match(_commit_rx, content)
73 raise Exception('cannot parse commit %r' % content)
74 matches = commit_match.groupdict()
75 return CommitInfo(tree=matches['tree'],
76 parents=re.findall(_parent_hash_rx, matches['parents']),
77 author_name=matches['author_name'],
78 author_mail=matches['author_mail'],
79 author_sec=int(matches['asec']),
80 author_offset=parse_tz_offset(matches['atz']),
81 committer_name=matches['committer_name'],
82 committer_mail=matches['committer_mail'],
83 committer_sec=int(matches['csec']),
84 committer_offset=parse_tz_offset(matches['ctz']),
85 message=matches['message'])
88 def get_commit_items(id, cp):
89 commit_it = cp.get(id)
90 assert(commit_it.next() == 'commit')
91 commit_content = ''.join(commit_it)
92 return parse_commit(commit_content)
95 def repo(sub = '', repo_dir=None):
96 """Get the path to the git repository or one of its subdirectories."""
98 repo_dir = repo_dir or repodir
100 raise GitError('You should call check_repo_or_die()')
102 # If there's a .git subdirectory, then the actual repo is in there.
103 gd = os.path.join(repo_dir, '.git')
104 if os.path.exists(gd):
107 return os.path.join(repo_dir, sub)
111 return re.sub(r'([^0-9a-z]|\b)([0-9a-z]{7})[0-9a-z]{33}([^0-9a-z]|\b)',
116 full = os.path.abspath(path)
117 fullrepo = os.path.abspath(repo(''))
118 if not fullrepo.endswith('/'):
120 if full.startswith(fullrepo):
121 path = full[len(fullrepo):]
122 if path.startswith('index-cache/'):
123 path = path[len('index-cache/'):]
124 return shorten_hash(path)
128 paths = [repo('objects/pack')]
129 paths += glob.glob(repo('index-cache/*/.'))
133 def auto_midx(objdir):
134 args = [path.exe(), 'midx', '--auto', '--dir', objdir]
136 rv = subprocess.call(args, stdout=open('/dev/null', 'w'))
138 # make sure 'args' gets printed to help with debugging
139 add_error('%r: exception: %s' % (args, e))
142 add_error('%r: returned %d' % (args, rv))
144 args = [path.exe(), 'bloom', '--dir', objdir]
146 rv = subprocess.call(args, stdout=open('/dev/null', 'w'))
148 # make sure 'args' gets printed to help with debugging
149 add_error('%r: exception: %s' % (args, e))
152 add_error('%r: returned %d' % (args, rv))
155 def mangle_name(name, mode, gitmode):
156 """Mangle a file name to present an abstract name for segmented files.
157 Mangled file names will have the ".bup" extension added to them. If a
158 file's name already ends with ".bup", a ".bupl" extension is added to
159 disambiguate normal files from segmented ones.
161 if stat.S_ISREG(mode) and not stat.S_ISREG(gitmode):
162 assert(stat.S_ISDIR(gitmode))
164 elif name.endswith('.bup') or name[:-1].endswith('.bup'):
165 return name + '.bupl'
170 (BUP_NORMAL, BUP_CHUNKED) = (0,1)
171 def demangle_name(name, mode):
172 """Remove name mangling from a file name, if necessary.
174 The return value is a tuple (demangled_filename,mode), where mode is one of
177 * BUP_NORMAL : files that should be read as-is from the repository
178 * BUP_CHUNKED : files that were chunked and need to be reassembled
180 For more information on the name mangling algorithm, see mangle_name()
182 if name.endswith('.bupl'):
183 return (name[:-5], BUP_NORMAL)
184 elif name.endswith('.bup'):
185 return (name[:-4], BUP_CHUNKED)
186 elif name.endswith('.bupm'):
188 BUP_CHUNKED if stat.S_ISDIR(mode) else BUP_NORMAL)
190 return (name, BUP_NORMAL)
193 def calc_hash(type, content):
194 """Calculate some content's hash in the Git fashion."""
195 header = '%s %d\0' % (type, len(content))
201 def shalist_item_sort_key(ent):
202 (mode, name, id) = ent
203 assert(mode+0 == mode)
204 if stat.S_ISDIR(mode):
210 def tree_encode(shalist):
211 """Generate a git tree object from (mode,name,hash) tuples."""
212 shalist = sorted(shalist, key = shalist_item_sort_key)
214 for (mode,name,bin) in shalist:
216 assert(mode+0 == mode)
218 assert(len(bin) == 20)
219 s = '%o %s\0%s' % (mode,name,bin)
220 assert(s[0] != '0') # 0-padded octal is not acceptable in a git tree
225 def tree_decode(buf):
226 """Generate a list of (mode,name,hash) from the git tree object in buf."""
228 while ofs < len(buf):
229 z = buf.find('\0', ofs)
231 spl = buf[ofs:z].split(' ', 1)
232 assert(len(spl) == 2)
234 sha = buf[z+1:z+1+20]
236 yield (int(mode, 8), name, sha)
239 def _encode_packobj(type, content, compression_level=1):
242 szbits = (sz & 0x0f) | (_typemap[type]<<4)
245 if sz: szbits |= 0x80
251 if compression_level > 9:
252 compression_level = 9
253 elif compression_level < 0:
254 compression_level = 0
255 z = zlib.compressobj(compression_level)
257 yield z.compress(content)
261 def _encode_looseobj(type, content, compression_level=1):
262 z = zlib.compressobj(compression_level)
263 yield z.compress('%s %d\0' % (type, len(content)))
264 yield z.compress(content)
268 def _decode_looseobj(buf):
270 s = zlib.decompress(buf)
277 assert(type in _typemap)
278 assert(sz == len(content))
279 return (type, content)
282 def _decode_packobj(buf):
285 type = _typermap[(c & 0x70) >> 4]
292 sz |= (c & 0x7f) << shift
296 return (type, zlib.decompress(buf[i+1:]))
303 def find_offset(self, hash):
304 """Get the offset of an object inside the index file."""
305 idx = self._idx_from_hash(hash)
307 return self._ofs_from_idx(idx)
310 def exists(self, hash, want_source=False):
311 """Return nonempty if the object exists in this index."""
312 if hash and (self._idx_from_hash(hash) != None):
313 return want_source and os.path.basename(self.name) or True
317 return int(self.fanout[255])
319 def _idx_from_hash(self, hash):
320 global _total_searches, _total_steps
322 assert(len(hash) == 20)
324 start = self.fanout[b1-1] # range -1..254
325 end = self.fanout[b1] # range 0..255
327 _total_steps += 1 # lookup table is a step
330 mid = start + (end-start)/2
331 v = self._idx_to_hash(mid)
341 class PackIdxV1(PackIdx):
342 """Object representation of a Git pack index (version 1) file."""
343 def __init__(self, filename, f):
345 self.idxnames = [self.name]
346 self.map = mmap_read(f)
347 self.fanout = list(struct.unpack('!256I',
348 str(buffer(self.map, 0, 256*4))))
349 self.fanout.append(0) # entry "-1"
350 nsha = self.fanout[255]
352 self.shatable = buffer(self.map, self.sha_ofs, nsha*24)
354 def _ofs_from_idx(self, idx):
355 return struct.unpack('!I', str(self.shatable[idx*24 : idx*24+4]))[0]
357 def _idx_to_hash(self, idx):
358 return str(self.shatable[idx*24+4 : idx*24+24])
361 for i in xrange(self.fanout[255]):
362 yield buffer(self.map, 256*4 + 24*i + 4, 20)
365 class PackIdxV2(PackIdx):
366 """Object representation of a Git pack index (version 2) file."""
367 def __init__(self, filename, f):
369 self.idxnames = [self.name]
370 self.map = mmap_read(f)
371 assert(str(self.map[0:8]) == '\377tOc\0\0\0\2')
372 self.fanout = list(struct.unpack('!256I',
373 str(buffer(self.map, 8, 256*4))))
374 self.fanout.append(0) # entry "-1"
375 nsha = self.fanout[255]
376 self.sha_ofs = 8 + 256*4
377 self.shatable = buffer(self.map, self.sha_ofs, nsha*20)
378 self.ofstable = buffer(self.map,
379 self.sha_ofs + nsha*20 + nsha*4,
381 self.ofs64table = buffer(self.map,
382 8 + 256*4 + nsha*20 + nsha*4 + nsha*4)
384 def _ofs_from_idx(self, idx):
385 ofs = struct.unpack('!I', str(buffer(self.ofstable, idx*4, 4)))[0]
387 idx64 = ofs & 0x7fffffff
388 ofs = struct.unpack('!Q',
389 str(buffer(self.ofs64table, idx64*8, 8)))[0]
392 def _idx_to_hash(self, idx):
393 return str(self.shatable[idx*20:(idx+1)*20])
396 for i in xrange(self.fanout[255]):
397 yield buffer(self.map, 8 + 256*4 + 20*i, 20)
402 def __init__(self, dir):
404 assert(_mpi_count == 0) # these things suck tons of VM; don't waste it
409 self.do_bloom = False
416 assert(_mpi_count == 0)
419 return iter(idxmerge(self.packs))
422 return sum(len(pack) for pack in self.packs)
424 def exists(self, hash, want_source=False):
425 """Return nonempty if the object exists in the index files."""
426 global _total_searches
428 if hash in self.also:
430 if self.do_bloom and self.bloom:
431 if self.bloom.exists(hash):
432 self.do_bloom = False
434 _total_searches -= 1 # was counted by bloom
436 for i in xrange(len(self.packs)):
438 _total_searches -= 1 # will be incremented by sub-pack
439 ix = p.exists(hash, want_source=want_source)
441 # reorder so most recently used packs are searched first
442 self.packs = [p] + self.packs[:i] + self.packs[i+1:]
447 def refresh(self, skip_midx = False):
448 """Refresh the index list.
449 This method verifies if .midx files were superseded (e.g. all of its
450 contents are in another, bigger .midx file) and removes the superseded
453 If skip_midx is True, all work on .midx files will be skipped and .midx
454 files will be removed from the list.
456 The module-global variable 'ignore_midx' can force this function to
457 always act as if skip_midx was True.
459 self.bloom = None # Always reopen the bloom as it may have been relaced
460 self.do_bloom = False
461 skip_midx = skip_midx or ignore_midx
462 d = dict((p.name, p) for p in self.packs
463 if not skip_midx or not isinstance(p, midx.PackMidx))
464 if os.path.exists(self.dir):
467 for ix in self.packs:
468 if isinstance(ix, midx.PackMidx):
469 for name in ix.idxnames:
470 d[os.path.join(self.dir, name)] = ix
471 for full in glob.glob(os.path.join(self.dir,'*.midx')):
473 mx = midx.PackMidx(full)
474 (mxd, mxf) = os.path.split(mx.name)
476 for n in mx.idxnames:
477 if not os.path.exists(os.path.join(mxd, n)):
478 log(('warning: index %s missing\n' +
479 ' used by %s\n') % (n, mxf))
487 midxl.sort(key=lambda ix:
488 (-len(ix), -xstat.stat(ix.name).st_mtime))
491 for sub in ix.idxnames:
492 found = d.get(os.path.join(self.dir, sub))
493 if not found or isinstance(found, PackIdx):
494 # doesn't exist, or exists but not in a midx
499 for name in ix.idxnames:
500 d[os.path.join(self.dir, name)] = ix
501 elif not ix.force_keep:
502 debug1('midx: removing redundant: %s\n'
503 % os.path.basename(ix.name))
506 for full in glob.glob(os.path.join(self.dir,'*.idx')):
510 except GitError as e:
514 bfull = os.path.join(self.dir, 'bup.bloom')
515 if self.bloom is None and os.path.exists(bfull):
516 self.bloom = bloom.ShaBloom(bfull)
517 self.packs = list(set(d.values()))
518 self.packs.sort(lambda x,y: -cmp(len(x),len(y)))
519 if self.bloom and self.bloom.valid() and len(self.bloom) >= len(self):
523 debug1('PackIdxList: using %d index%s.\n'
524 % (len(self.packs), len(self.packs)!=1 and 'es' or ''))
527 """Insert an additional object in the list."""
531 def open_idx(filename):
532 if filename.endswith('.idx'):
533 f = open(filename, 'rb')
535 if header[0:4] == '\377tOc':
536 version = struct.unpack('!I', header[4:8])[0]
538 return PackIdxV2(filename, f)
540 raise GitError('%s: expected idx file version 2, got %d'
541 % (filename, version))
542 elif len(header) == 8 and header[0:4] < '\377tOc':
543 return PackIdxV1(filename, f)
545 raise GitError('%s: unrecognized idx file header' % filename)
546 elif filename.endswith('.midx'):
547 return midx.PackMidx(filename)
549 raise GitError('idx filenames must end with .idx or .midx')
552 def idxmerge(idxlist, final_progress=True):
553 """Generate a list of all the objects reachable in a PackIdxList."""
554 def pfunc(count, total):
555 qprogress('Reading indexes: %.2f%% (%d/%d)\r'
556 % (count*100.0/total, count, total))
557 def pfinal(count, total):
559 progress('Reading indexes: %.2f%% (%d/%d), done.\n'
560 % (100, total, total))
561 return merge_iter(idxlist, 10024, pfunc, pfinal)
564 def _make_objcache():
565 return PackIdxList(repo('objects/pack'))
568 """Writes Git objects inside a pack file."""
569 def __init__(self, objcache_maker=_make_objcache, compression_level=1):
575 self.objcache_maker = objcache_maker
577 self.compression_level = compression_level
584 (fd,name) = tempfile.mkstemp(suffix='.pack', dir=repo('objects'))
586 self.file = os.fdopen(fd, 'w+b')
590 assert(name.endswith('.pack'))
591 self.filename = name[:-5]
592 self.file.write('PACK\0\0\0\2\0\0\0\0')
593 self.idx = list(list() for i in xrange(256))
595 def _raw_write(self, datalist, sha):
598 # in case we get interrupted (eg. KeyboardInterrupt), it's best if
599 # the file never has a *partial* blob. So let's make sure it's
600 # all-or-nothing. (The blob shouldn't be very big anyway, thanks
601 # to our hashsplit algorithm.) f.write() does its own buffering,
602 # but that's okay because we'll flush it in _end().
603 oneblob = ''.join(datalist)
607 raise GitError, e, sys.exc_info()[2]
609 crc = zlib.crc32(oneblob) & 0xffffffff
610 self._update_idx(sha, crc, nw)
615 def _update_idx(self, sha, crc, size):
618 self.idx[ord(sha[0])].append((sha, crc, self.file.tell() - size))
620 def _write(self, sha, type, content):
624 sha = calc_hash(type, content)
625 size, crc = self._raw_write(_encode_packobj(type, content,
626 self.compression_level),
628 if self.outbytes >= max_pack_size or self.count >= max_pack_objects:
632 def breakpoint(self):
633 """Clear byte and object counts and return the last processed id."""
635 self.outbytes = self.count = 0
638 def _require_objcache(self):
639 if self.objcache is None and self.objcache_maker:
640 self.objcache = self.objcache_maker()
641 if self.objcache is None:
643 "PackWriter not opened or can't check exists w/o objcache")
645 def exists(self, id, want_source=False):
646 """Return non-empty if an object is found in the object cache."""
647 self._require_objcache()
648 return self.objcache.exists(id, want_source=want_source)
650 def maybe_write(self, type, content):
651 """Write an object to the pack file if not present and return its id."""
652 sha = calc_hash(type, content)
653 if not self.exists(sha):
654 self._write(sha, type, content)
655 self._require_objcache()
656 self.objcache.add(sha)
659 def new_blob(self, blob):
660 """Create a blob object in the pack with the supplied content."""
661 return self.maybe_write('blob', blob)
663 def new_tree(self, shalist):
664 """Create a tree object in the pack."""
665 content = tree_encode(shalist)
666 return self.maybe_write('tree', content)
668 def _new_commit(self, tree, parent, author, adate, committer, cdate, msg):
670 if tree: l.append('tree %s' % tree.encode('hex'))
671 if parent: l.append('parent %s' % parent.encode('hex'))
672 if author: l.append('author %s %s' % (author, _git_date(adate)))
673 if committer: l.append('committer %s %s' % (committer, _git_date(cdate)))
676 return self.maybe_write('commit', '\n'.join(l))
678 def new_commit(self, parent, tree, date, msg):
679 """Create a commit object in the pack."""
680 userline = '%s <%s@%s>' % (userfullname(), username(), hostname())
681 commit = self._new_commit(tree, parent,
682 userline, date, userline, date,
687 """Remove the pack file from disk."""
693 os.unlink(self.filename + '.pack')
697 def _end(self, run_midx=True):
699 if not f: return None
706 # update object count
708 cp = struct.pack('!i', self.count)
712 # calculate the pack sha1sum
715 for b in chunkyreader(f):
717 packbin = sum.digest()
722 obj_list_sha = self._write_pack_idx_v2(self.filename + '.idx', idx, packbin)
724 nameprefix = repo('objects/pack/pack-%s' % obj_list_sha)
725 if os.path.exists(self.filename + '.map'):
726 os.unlink(self.filename + '.map')
727 os.rename(self.filename + '.pack', nameprefix + '.pack')
728 os.rename(self.filename + '.idx', nameprefix + '.idx')
731 auto_midx(repo('objects/pack'))
734 def close(self, run_midx=True):
735 """Close the pack file and move it to its definitive path."""
736 return self._end(run_midx=run_midx)
738 def _write_pack_idx_v2(self, filename, idx, packbin):
741 for entry in section:
742 if entry[2] >= 2**31:
745 # Length: header + fan-out + shas-and-crcs + overflow-offsets
746 index_len = 8 + (4 * 256) + (28 * self.count) + (8 * ofs64_count)
748 idx_f = open(filename, 'w+b')
750 idx_f.truncate(index_len)
751 idx_map = mmap_readwrite(idx_f, close=False)
752 count = _helpers.write_idx(filename, idx_map, idx, self.count)
753 assert(count == self.count)
755 if idx_map: idx_map.close()
758 idx_f = open(filename, 'a+b')
763 b = idx_f.read(8 + 4*256)
766 obj_list_sum = Sha1()
767 for b in chunkyreader(idx_f, 20*self.count):
769 obj_list_sum.update(b)
770 namebase = obj_list_sum.hexdigest()
772 for b in chunkyreader(idx_f):
774 idx_f.write(idx_sum.digest())
781 return '%d %s' % (date, utc_offset_str(date))
784 def _gitenv(repo_dir = None):
788 os.environ['GIT_DIR'] = os.path.abspath(repo_dir)
792 def list_refs(refname=None, repo_dir=None,
793 limit_to_heads=False, limit_to_tags=False):
794 """Yield (refname, hash) tuples for all repository refs unless a ref
795 name is specified. Given a ref name, only include tuples for that
796 particular ref. The limits restrict the result items to
797 refs/heads or refs/tags. If both limits are specified, items from
798 both sources will be included.
801 argv = ['git', 'show-ref']
803 argv.append('--heads')
805 argv.append('--tags')
809 p = subprocess.Popen(argv,
810 preexec_fn = _gitenv(repo_dir),
811 stdout = subprocess.PIPE)
812 out = p.stdout.read().strip()
813 rv = p.wait() # not fatal
817 for d in out.split('\n'):
818 (sha, name) = d.split(' ', 1)
819 yield (name, sha.decode('hex'))
822 def read_ref(refname, repo_dir = None):
823 """Get the commit id of the most recent commit made on a given ref."""
824 refs = list_refs(refname, repo_dir=repo_dir, limit_to_heads=True)
825 l = tuple(islice(refs, 2))
833 def rev_list(ref, count=None, repo_dir=None):
834 """Generate a list of reachable commits in reverse chronological order.
836 This generator walks through commits, from child to parent, that are
837 reachable via the specified ref and yields a series of tuples of the form
840 If count is a non-zero integer, limit the number of commits to "count"
843 assert(not ref.startswith('-'))
846 opts += ['-n', str(atoi(count))]
847 argv = ['git', 'rev-list', '--pretty=format:%at'] + opts + [ref, '--']
848 p = subprocess.Popen(argv,
849 preexec_fn = _gitenv(repo_dir),
850 stdout = subprocess.PIPE)
854 if s.startswith('commit '):
855 commit = s[7:].decode('hex')
859 rv = p.wait() # not fatal
861 raise GitError, 'git rev-list returned error %d' % rv
864 def get_commit_dates(refs, repo_dir=None):
865 """Get the dates for the specified commit refs. For now, every unique
866 string in refs must resolve to a different commit or this
867 function will fail."""
870 commit = get_commit_items(ref, cp(repo_dir))
871 result.append(commit.author_sec)
875 def rev_parse(committish, repo_dir=None):
876 """Resolve the full hash for 'committish', if it exists.
878 Should be roughly equivalent to 'git rev-parse'.
880 Returns the hex value of the hash if it is found, None if 'committish' does
881 not correspond to anything.
883 head = read_ref(committish, repo_dir=repo_dir)
885 debug2("resolved from ref: commit = %s\n" % head.encode('hex'))
888 pL = PackIdxList(repo('objects/pack', repo_dir=repo_dir))
890 if len(committish) == 40:
892 hash = committish.decode('hex')
902 def update_ref(refname, newval, oldval, repo_dir=None):
903 """Update a repository reference."""
906 assert(refname.startswith('refs/heads/') \
907 or refname.startswith('refs/tags/'))
908 p = subprocess.Popen(['git', 'update-ref', refname,
909 newval.encode('hex'), oldval.encode('hex')],
910 preexec_fn = _gitenv(repo_dir))
911 _git_wait('git update-ref', p)
914 def delete_ref(refname):
915 """Delete a repository reference."""
916 assert(refname.startswith('refs/'))
917 p = subprocess.Popen(['git', 'update-ref', '-d', refname],
918 preexec_fn = _gitenv())
919 _git_wait('git update-ref', p)
922 def guess_repo(path=None):
923 """Set the path value in the global variable "repodir".
924 This makes bup look for an existing bup repository, but not fail if a
925 repository doesn't exist. Usually, if you are interacting with a bup
926 repository, you would not be calling this function but using
933 repodir = os.environ.get('BUP_DIR')
935 repodir = os.path.expanduser('~/.bup')
938 def init_repo(path=None):
939 """Create the Git bare repository for bup in a given path."""
941 d = repo() # appends a / to the path
942 parent = os.path.dirname(os.path.dirname(d))
943 if parent and not os.path.exists(parent):
944 raise GitError('parent directory "%s" does not exist\n' % parent)
945 if os.path.exists(d) and not os.path.isdir(os.path.join(d, '.')):
946 raise GitError('"%s" exists but is not a directory\n' % d)
947 p = subprocess.Popen(['git', '--bare', 'init'], stdout=sys.stderr,
948 preexec_fn = _gitenv())
949 _git_wait('git init', p)
950 # Force the index version configuration in order to ensure bup works
951 # regardless of the version of the installed Git binary.
952 p = subprocess.Popen(['git', 'config', 'pack.indexVersion', '2'],
953 stdout=sys.stderr, preexec_fn = _gitenv())
954 _git_wait('git config', p)
956 p = subprocess.Popen(['git', 'config', 'core.logAllRefUpdates', 'true'],
957 stdout=sys.stderr, preexec_fn = _gitenv())
958 _git_wait('git config', p)
961 def check_repo_or_die(path=None):
962 """Make sure a bup repository exists, and abort if not.
963 If the path to a particular repository was not specified, this function
964 initializes the default repository automatically.
968 os.stat(repo('objects/pack/.'))
970 if e.errno == errno.ENOENT:
971 log('error: %r is not a bup repository; run "bup init"\n'
975 log('error: %s\n' % e)
981 """Get Git's version and ensure a usable version is installed.
983 The returned version is formatted as an ordered tuple with each position
984 representing a digit in the version tag. For example, the following tuple
985 would represent version 1.6.6.9:
991 p = subprocess.Popen(['git', '--version'],
992 stdout=subprocess.PIPE)
993 gvs = p.stdout.read()
994 _git_wait('git --version', p)
995 m = re.match(r'git version (\S+.\S+)', gvs)
997 raise GitError('git --version weird output: %r' % gvs)
998 _ver = tuple(m.group(1).split('.'))
999 needed = ('1','5', '3', '1')
1001 raise GitError('git version %s or higher is required; you have %s'
1002 % ('.'.join(needed), '.'.join(_ver)))
1006 def _git_wait(cmd, p):
1009 raise GitError('%s returned %d' % (cmd, rv))
1012 def _git_capture(argv):
1013 p = subprocess.Popen(argv, stdout=subprocess.PIPE, preexec_fn = _gitenv())
1015 _git_wait(repr(argv), p)
1019 class _AbortableIter:
1020 def __init__(self, it, onabort = None):
1022 self.onabort = onabort
1030 return self.it.next()
1031 except StopIteration as e:
1039 """Abort iteration and call the abortion callback, if needed."""
1051 """Link to 'git cat-file' that is used to retrieve blob data."""
1052 def __init__(self, repo_dir = None):
1054 self.repo_dir = repo_dir
1055 wanted = ('1','5','6')
1058 log('warning: git version < %s; bup will be slow.\n'
1061 self.get = self._slow_get
1063 self.p = self.inprogress = None
1064 self.get = self._fast_get
1068 self.p.stdout.close()
1069 self.p.stdin.close()
1071 self.inprogress = None
1075 self.p = subprocess.Popen(['git', 'cat-file', '--batch'],
1076 stdin=subprocess.PIPE,
1077 stdout=subprocess.PIPE,
1080 preexec_fn = _gitenv(self.repo_dir))
1082 def _fast_get(self, id):
1083 if not self.p or self.p.poll() != None:
1086 poll_result = self.p.poll()
1087 assert(poll_result == None)
1089 log('_fast_get: opening %r while %r is open\n'
1090 % (id, self.inprogress))
1091 assert(not self.inprogress)
1092 assert(id.find('\n') < 0)
1093 assert(id.find('\r') < 0)
1094 assert(not id.startswith('-'))
1095 self.inprogress = id
1096 self.p.stdin.write('%s\n' % id)
1097 self.p.stdin.flush()
1098 hdr = self.p.stdout.readline()
1099 if hdr.endswith(' missing\n'):
1100 self.inprogress = None
1101 raise KeyError('blob %r is missing' % id)
1102 spl = hdr.split(' ')
1103 if len(spl) != 3 or len(spl[0]) != 40:
1104 raise GitError('expected blob, got %r' % spl)
1105 (hex, type, size) = spl
1107 it = _AbortableIter(chunkyreader(self.p.stdout, int(spl[2])),
1108 onabort = self._abort)
1113 readline_result = self.p.stdout.readline()
1114 assert(readline_result == '\n')
1115 self.inprogress = None
1116 except Exception as e:
1120 def _slow_get(self, id):
1121 assert(id.find('\n') < 0)
1122 assert(id.find('\r') < 0)
1123 assert(id[0] != '-')
1124 type = _git_capture(['git', 'cat-file', '-t', id]).strip()
1127 p = subprocess.Popen(['git', 'cat-file', type, id],
1128 stdout=subprocess.PIPE,
1129 preexec_fn = _gitenv(self.repo_dir))
1130 for blob in chunkyreader(p.stdout):
1132 _git_wait('git cat-file', p)
1134 def _join(self, it):
1139 elif type == 'tree':
1140 treefile = ''.join(it)
1141 for (mode, name, sha) in tree_decode(treefile):
1142 for blob in self.join(sha.encode('hex')):
1144 elif type == 'commit':
1145 treeline = ''.join(it).split('\n')[0]
1146 assert(treeline.startswith('tree '))
1147 for blob in self.join(treeline[5:]):
1150 raise GitError('invalid object type %r: expected blob/tree/commit'
1154 """Generate a list of the content of all blobs that can be reached
1155 from an object. The hash given in 'id' must point to a blob, a tree
1156 or a commit. The content of all blobs that can be seen from trees or
1157 commits will be added to the list.
1160 for d in self._join(self.get(id)):
1162 except StopIteration:
1168 def cp(repo_dir=None):
1169 """Create a CatPipe object or reuse the already existing one."""
1173 repo_dir = os.path.abspath(repo_dir)
1174 cp = _cp.get(repo_dir)
1176 cp = CatPipe(repo_dir)
1181 def tags(repo_dir = None):
1182 """Return a dictionary of all tags in the form {hash: [tag_names, ...]}."""
1184 for n, c in list_refs(repo_dir = repo_dir, limit_to_tags=True):
1185 assert(n.startswith('refs/tags/'))
1189 tags[c].append(name) # more than one tag can point at 'c'