1 """Git interaction library.
2 bup repositories are in Git format. This library allows us to
3 interact with the Git data structures.
6 import errno, os, sys, zlib, time, subprocess, struct, stat, re, tempfile, glob
7 from collections import namedtuple
8 from itertools import islice
10 from bup import _helpers, hashsplit, path, midx, bloom, xstat
11 from bup.helpers import (Sha1, add_error, chunkyreader, debug1, debug2,
13 hostname, localtime, log, merge_iter,
14 mmap_read, mmap_readwrite,
15 progress, qprogress, stat_if_exists,
16 unlink, username, userfullname,
21 repodir = None # The default repository, once initialized
23 _typemap = { 'blob':3, 'tree':2, 'commit':1, 'tag':4 }
24 _typermap = { 3:'blob', 2:'tree', 1:'commit', 4:'tag' }
30 class GitError(Exception):
34 def _git_wait(cmd, p):
37 raise GitError('%s returned %d' % (cmd, rv))
39 def _git_capture(argv):
40 p = subprocess.Popen(argv, stdout=subprocess.PIPE, preexec_fn = _gitenv())
42 _git_wait(repr(argv), p)
46 def parse_tz_offset(s):
47 """UTC offset in seconds."""
48 tz_off = (int(s[1:3]) * 60 * 60) + (int(s[3:5]) * 60)
54 # FIXME: derived from http://git.rsbx.net/Documents/Git_Data_Formats.txt
55 # Make sure that's authoritative.
56 _start_end_char = r'[^ .,:;<>"\'\0\n]'
57 _content_char = r'[^\0\n<>]'
58 _safe_str_rx = '(?:%s{1,2}|(?:%s%s*%s))' \
60 _start_end_char, _content_char, _start_end_char)
61 _tz_rx = r'[-+]\d\d[0-5]\d'
62 _parent_rx = r'(?:parent [abcdefABCDEF0123456789]{40}\n)'
63 _commit_rx = re.compile(r'''tree (?P<tree>[abcdefABCDEF0123456789]{40})
64 (?P<parents>%s*)author (?P<author_name>%s) <(?P<author_mail>%s)> (?P<asec>\d+) (?P<atz>%s)
65 committer (?P<committer_name>%s) <(?P<committer_mail>%s)> (?P<csec>\d+) (?P<ctz>%s)
67 (?P<message>(?:.|\n)*)''' % (_parent_rx,
68 _safe_str_rx, _safe_str_rx, _tz_rx,
69 _safe_str_rx, _safe_str_rx, _tz_rx))
70 _parent_hash_rx = re.compile(r'\s*parent ([abcdefABCDEF0123456789]{40})\s*')
73 # Note that the author_sec and committer_sec values are (UTC) epoch seconds.
74 CommitInfo = namedtuple('CommitInfo', ['tree', 'parents',
75 'author_name', 'author_mail',
76 'author_sec', 'author_offset',
77 'committer_name', 'committer_mail',
78 'committer_sec', 'committer_offset',
81 def parse_commit(content):
82 commit_match = re.match(_commit_rx, content)
84 raise Exception('cannot parse commit %r' % content)
85 matches = commit_match.groupdict()
86 return CommitInfo(tree=matches['tree'],
87 parents=re.findall(_parent_hash_rx, matches['parents']),
88 author_name=matches['author_name'],
89 author_mail=matches['author_mail'],
90 author_sec=int(matches['asec']),
91 author_offset=parse_tz_offset(matches['atz']),
92 committer_name=matches['committer_name'],
93 committer_mail=matches['committer_mail'],
94 committer_sec=int(matches['csec']),
95 committer_offset=parse_tz_offset(matches['ctz']),
96 message=matches['message'])
99 def get_commit_items(id, cp):
100 commit_it = cp.get(id)
101 assert(commit_it.next() == 'commit')
102 commit_content = ''.join(commit_it)
103 return parse_commit(commit_content)
106 def _local_git_date_str(epoch_sec):
107 return '%d %s' % (epoch_sec, utc_offset_str(epoch_sec))
110 def _git_date_str(epoch_sec, tz_offset_sec):
111 offs = tz_offset_sec // 60
112 return '%d %s%02d%02d' \
114 '+' if offs >= 0 else '-',
119 def repo(sub = '', repo_dir=None):
120 """Get the path to the git repository or one of its subdirectories."""
122 repo_dir = repo_dir or repodir
124 raise GitError('You should call check_repo_or_die()')
126 # If there's a .git subdirectory, then the actual repo is in there.
127 gd = os.path.join(repo_dir, '.git')
128 if os.path.exists(gd):
131 return os.path.join(repo_dir, sub)
135 return re.sub(r'([^0-9a-z]|\b)([0-9a-z]{7})[0-9a-z]{33}([^0-9a-z]|\b)',
140 full = os.path.abspath(path)
141 fullrepo = os.path.abspath(repo(''))
142 if not fullrepo.endswith('/'):
144 if full.startswith(fullrepo):
145 path = full[len(fullrepo):]
146 if path.startswith('index-cache/'):
147 path = path[len('index-cache/'):]
148 return shorten_hash(path)
152 paths = [repo('objects/pack')]
153 paths += glob.glob(repo('index-cache/*/.'))
157 def auto_midx(objdir):
158 args = [path.exe(), 'midx', '--auto', '--dir', objdir]
160 rv = subprocess.call(args, stdout=open('/dev/null', 'w'))
162 # make sure 'args' gets printed to help with debugging
163 add_error('%r: exception: %s' % (args, e))
166 add_error('%r: returned %d' % (args, rv))
168 args = [path.exe(), 'bloom', '--dir', objdir]
170 rv = subprocess.call(args, stdout=open('/dev/null', 'w'))
172 # make sure 'args' gets printed to help with debugging
173 add_error('%r: exception: %s' % (args, e))
176 add_error('%r: returned %d' % (args, rv))
179 def mangle_name(name, mode, gitmode):
180 """Mangle a file name to present an abstract name for segmented files.
181 Mangled file names will have the ".bup" extension added to them. If a
182 file's name already ends with ".bup", a ".bupl" extension is added to
183 disambiguate normal files from segmented ones.
185 if stat.S_ISREG(mode) and not stat.S_ISREG(gitmode):
186 assert(stat.S_ISDIR(gitmode))
188 elif name.endswith('.bup') or name[:-1].endswith('.bup'):
189 return name + '.bupl'
194 (BUP_NORMAL, BUP_CHUNKED) = (0,1)
195 def demangle_name(name, mode):
196 """Remove name mangling from a file name, if necessary.
198 The return value is a tuple (demangled_filename,mode), where mode is one of
201 * BUP_NORMAL : files that should be read as-is from the repository
202 * BUP_CHUNKED : files that were chunked and need to be reassembled
204 For more information on the name mangling algorithm, see mangle_name()
206 if name.endswith('.bupl'):
207 return (name[:-5], BUP_NORMAL)
208 elif name.endswith('.bup'):
209 return (name[:-4], BUP_CHUNKED)
210 elif name.endswith('.bupm'):
212 BUP_CHUNKED if stat.S_ISDIR(mode) else BUP_NORMAL)
214 return (name, BUP_NORMAL)
217 def calc_hash(type, content):
218 """Calculate some content's hash in the Git fashion."""
219 header = '%s %d\0' % (type, len(content))
225 def shalist_item_sort_key(ent):
226 (mode, name, id) = ent
227 assert(mode+0 == mode)
228 if stat.S_ISDIR(mode):
234 def tree_encode(shalist):
235 """Generate a git tree object from (mode,name,hash) tuples."""
236 shalist = sorted(shalist, key = shalist_item_sort_key)
238 for (mode,name,bin) in shalist:
240 assert(mode+0 == mode)
242 assert(len(bin) == 20)
243 s = '%o %s\0%s' % (mode,name,bin)
244 assert(s[0] != '0') # 0-padded octal is not acceptable in a git tree
249 def tree_decode(buf):
250 """Generate a list of (mode,name,hash) from the git tree object in buf."""
252 while ofs < len(buf):
253 z = buf.find('\0', ofs)
255 spl = buf[ofs:z].split(' ', 1)
256 assert(len(spl) == 2)
258 sha = buf[z+1:z+1+20]
260 yield (int(mode, 8), name, sha)
263 def _encode_packobj(type, content, compression_level=1):
264 if compression_level not in (0, 1, 2, 3, 4, 5, 6, 7, 8, 9):
265 raise ValueError('invalid compression level %s' % compression_level)
268 szbits = (sz & 0x0f) | (_typemap[type]<<4)
271 if sz: szbits |= 0x80
277 z = zlib.compressobj(compression_level)
279 yield z.compress(content)
283 def _encode_looseobj(type, content, compression_level=1):
284 z = zlib.compressobj(compression_level)
285 yield z.compress('%s %d\0' % (type, len(content)))
286 yield z.compress(content)
290 def _decode_looseobj(buf):
292 s = zlib.decompress(buf)
299 assert(type in _typemap)
300 assert(sz == len(content))
301 return (type, content)
304 def _decode_packobj(buf):
307 type = _typermap[(c & 0x70) >> 4]
314 sz |= (c & 0x7f) << shift
318 return (type, zlib.decompress(buf[i+1:]))
325 def find_offset(self, hash):
326 """Get the offset of an object inside the index file."""
327 idx = self._idx_from_hash(hash)
329 return self._ofs_from_idx(idx)
332 def exists(self, hash, want_source=False):
333 """Return nonempty if the object exists in this index."""
334 if hash and (self._idx_from_hash(hash) != None):
335 return want_source and os.path.basename(self.name) or True
339 return int(self.fanout[255])
341 def _idx_from_hash(self, hash):
342 global _total_searches, _total_steps
344 assert(len(hash) == 20)
346 start = self.fanout[b1-1] # range -1..254
347 end = self.fanout[b1] # range 0..255
349 _total_steps += 1 # lookup table is a step
352 mid = start + (end-start)/2
353 v = self._idx_to_hash(mid)
363 class PackIdxV1(PackIdx):
364 """Object representation of a Git pack index (version 1) file."""
365 def __init__(self, filename, f):
367 self.idxnames = [self.name]
368 self.map = mmap_read(f)
369 self.fanout = list(struct.unpack('!256I',
370 str(buffer(self.map, 0, 256*4))))
371 self.fanout.append(0) # entry "-1"
372 nsha = self.fanout[255]
374 self.shatable = buffer(self.map, self.sha_ofs, nsha*24)
376 def _ofs_from_idx(self, idx):
377 return struct.unpack('!I', str(self.shatable[idx*24 : idx*24+4]))[0]
379 def _idx_to_hash(self, idx):
380 return str(self.shatable[idx*24+4 : idx*24+24])
383 for i in xrange(self.fanout[255]):
384 yield buffer(self.map, 256*4 + 24*i + 4, 20)
387 class PackIdxV2(PackIdx):
388 """Object representation of a Git pack index (version 2) file."""
389 def __init__(self, filename, f):
391 self.idxnames = [self.name]
392 self.map = mmap_read(f)
393 assert(str(self.map[0:8]) == '\377tOc\0\0\0\2')
394 self.fanout = list(struct.unpack('!256I',
395 str(buffer(self.map, 8, 256*4))))
396 self.fanout.append(0) # entry "-1"
397 nsha = self.fanout[255]
398 self.sha_ofs = 8 + 256*4
399 self.shatable = buffer(self.map, self.sha_ofs, nsha*20)
400 self.ofstable = buffer(self.map,
401 self.sha_ofs + nsha*20 + nsha*4,
403 self.ofs64table = buffer(self.map,
404 8 + 256*4 + nsha*20 + nsha*4 + nsha*4)
406 def _ofs_from_idx(self, idx):
407 ofs = struct.unpack('!I', str(buffer(self.ofstable, idx*4, 4)))[0]
409 idx64 = ofs & 0x7fffffff
410 ofs = struct.unpack('!Q',
411 str(buffer(self.ofs64table, idx64*8, 8)))[0]
414 def _idx_to_hash(self, idx):
415 return str(self.shatable[idx*20:(idx+1)*20])
418 for i in xrange(self.fanout[255]):
419 yield buffer(self.map, 8 + 256*4 + 20*i, 20)
424 def __init__(self, dir):
426 assert(_mpi_count == 0) # these things suck tons of VM; don't waste it
431 self.do_bloom = False
438 assert(_mpi_count == 0)
441 return iter(idxmerge(self.packs))
444 return sum(len(pack) for pack in self.packs)
446 def exists(self, hash, want_source=False):
447 """Return nonempty if the object exists in the index files."""
448 global _total_searches
450 if hash in self.also:
452 if self.do_bloom and self.bloom:
453 if self.bloom.exists(hash):
454 self.do_bloom = False
456 _total_searches -= 1 # was counted by bloom
458 for i in xrange(len(self.packs)):
460 _total_searches -= 1 # will be incremented by sub-pack
461 ix = p.exists(hash, want_source=want_source)
463 # reorder so most recently used packs are searched first
464 self.packs = [p] + self.packs[:i] + self.packs[i+1:]
469 def refresh(self, skip_midx = False):
470 """Refresh the index list.
471 This method verifies if .midx files were superseded (e.g. all of its
472 contents are in another, bigger .midx file) and removes the superseded
475 If skip_midx is True, all work on .midx files will be skipped and .midx
476 files will be removed from the list.
478 The module-global variable 'ignore_midx' can force this function to
479 always act as if skip_midx was True.
481 self.bloom = None # Always reopen the bloom as it may have been relaced
482 self.do_bloom = False
483 skip_midx = skip_midx or ignore_midx
484 d = dict((p.name, p) for p in self.packs
485 if not skip_midx or not isinstance(p, midx.PackMidx))
486 if os.path.exists(self.dir):
489 for ix in self.packs:
490 if isinstance(ix, midx.PackMidx):
491 for name in ix.idxnames:
492 d[os.path.join(self.dir, name)] = ix
493 for full in glob.glob(os.path.join(self.dir,'*.midx')):
495 mx = midx.PackMidx(full)
496 (mxd, mxf) = os.path.split(mx.name)
498 for n in mx.idxnames:
499 if not os.path.exists(os.path.join(mxd, n)):
500 log(('warning: index %s missing\n' +
501 ' used by %s\n') % (n, mxf))
509 midxl.sort(key=lambda ix:
510 (-len(ix), -xstat.stat(ix.name).st_mtime))
513 for sub in ix.idxnames:
514 found = d.get(os.path.join(self.dir, sub))
515 if not found or isinstance(found, PackIdx):
516 # doesn't exist, or exists but not in a midx
521 for name in ix.idxnames:
522 d[os.path.join(self.dir, name)] = ix
523 elif not ix.force_keep:
524 debug1('midx: removing redundant: %s\n'
525 % os.path.basename(ix.name))
528 for full in glob.glob(os.path.join(self.dir,'*.idx')):
532 except GitError as e:
536 bfull = os.path.join(self.dir, 'bup.bloom')
537 if self.bloom is None and os.path.exists(bfull):
538 self.bloom = bloom.ShaBloom(bfull)
539 self.packs = list(set(d.values()))
540 self.packs.sort(lambda x,y: -cmp(len(x),len(y)))
541 if self.bloom and self.bloom.valid() and len(self.bloom) >= len(self):
545 debug1('PackIdxList: using %d index%s.\n'
546 % (len(self.packs), len(self.packs)!=1 and 'es' or ''))
549 """Insert an additional object in the list."""
553 def open_idx(filename):
554 if filename.endswith('.idx'):
555 f = open(filename, 'rb')
557 if header[0:4] == '\377tOc':
558 version = struct.unpack('!I', header[4:8])[0]
560 return PackIdxV2(filename, f)
562 raise GitError('%s: expected idx file version 2, got %d'
563 % (filename, version))
564 elif len(header) == 8 and header[0:4] < '\377tOc':
565 return PackIdxV1(filename, f)
567 raise GitError('%s: unrecognized idx file header' % filename)
568 elif filename.endswith('.midx'):
569 return midx.PackMidx(filename)
571 raise GitError('idx filenames must end with .idx or .midx')
574 def idxmerge(idxlist, final_progress=True):
575 """Generate a list of all the objects reachable in a PackIdxList."""
576 def pfunc(count, total):
577 qprogress('Reading indexes: %.2f%% (%d/%d)\r'
578 % (count*100.0/total, count, total))
579 def pfinal(count, total):
581 progress('Reading indexes: %.2f%% (%d/%d), done.\n'
582 % (100, total, total))
583 return merge_iter(idxlist, 10024, pfunc, pfinal)
586 def _make_objcache():
587 return PackIdxList(repo('objects/pack'))
589 # bup-gc assumes that it can disable all PackWriter activities
590 # (bloom/midx/cache) via the constructor and close() arguments.
593 """Writes Git objects inside a pack file."""
594 def __init__(self, objcache_maker=_make_objcache, compression_level=1,
595 run_midx=True, on_pack_finish=None,
596 max_pack_size=None, max_pack_objects=None):
597 self.repo_dir = repo()
604 self.objcache_maker = objcache_maker
606 self.compression_level = compression_level
607 self.run_midx=run_midx
608 self.on_pack_finish = on_pack_finish
609 # larger packs will slow down pruning
610 self.max_pack_size = max_pack_size if max_pack_size \
611 else 1000 * 1000 * 1000
612 # cache memory usage is about 83 bytes per object
613 self.max_pack_objects = max_pack_objects if max_pack_objects \
614 else max(1, self.max_pack_size // 5000)
621 objdir = dir = os.path.join(self.repo_dir, 'objects')
622 fd, name = tempfile.mkstemp(suffix='.pack', dir=objdir)
624 self.file = os.fdopen(fd, 'w+b')
629 self.parentfd = os.open(objdir, os.O_RDONLY)
635 assert(name.endswith('.pack'))
636 self.filename = name[:-5]
637 self.file.write('PACK\0\0\0\2\0\0\0\0')
638 self.idx = list(list() for i in xrange(256))
640 def _raw_write(self, datalist, sha):
643 # in case we get interrupted (eg. KeyboardInterrupt), it's best if
644 # the file never has a *partial* blob. So let's make sure it's
645 # all-or-nothing. (The blob shouldn't be very big anyway, thanks
646 # to our hashsplit algorithm.) f.write() does its own buffering,
647 # but that's okay because we'll flush it in _end().
648 oneblob = ''.join(datalist)
652 raise GitError, e, sys.exc_info()[2]
654 crc = zlib.crc32(oneblob) & 0xffffffff
655 self._update_idx(sha, crc, nw)
660 def _update_idx(self, sha, crc, size):
663 self.idx[ord(sha[0])].append((sha, crc, self.file.tell() - size))
665 def _write(self, sha, type, content):
669 sha = calc_hash(type, content)
670 size, crc = self._raw_write(_encode_packobj(type, content,
671 self.compression_level),
673 if self.outbytes >= self.max_pack_size \
674 or self.count >= self.max_pack_objects:
678 def breakpoint(self):
679 """Clear byte and object counts and return the last processed id."""
680 id = self._end(self.run_midx)
681 self.outbytes = self.count = 0
684 def _require_objcache(self):
685 if self.objcache is None and self.objcache_maker:
686 self.objcache = self.objcache_maker()
687 if self.objcache is None:
689 "PackWriter not opened or can't check exists w/o objcache")
691 def exists(self, id, want_source=False):
692 """Return non-empty if an object is found in the object cache."""
693 self._require_objcache()
694 return self.objcache.exists(id, want_source=want_source)
696 def just_write(self, sha, type, content):
697 """Write an object to the pack file, bypassing the objcache. Fails if
699 self._write(sha, type, content)
701 def maybe_write(self, type, content):
702 """Write an object to the pack file if not present and return its id."""
703 sha = calc_hash(type, content)
704 if not self.exists(sha):
705 self.just_write(sha, type, content)
706 self._require_objcache()
707 self.objcache.add(sha)
710 def new_blob(self, blob):
711 """Create a blob object in the pack with the supplied content."""
712 return self.maybe_write('blob', blob)
714 def new_tree(self, shalist):
715 """Create a tree object in the pack."""
716 content = tree_encode(shalist)
717 return self.maybe_write('tree', content)
719 def new_commit(self, tree, parent,
720 author, adate_sec, adate_tz,
721 committer, cdate_sec, cdate_tz,
723 """Create a commit object in the pack. The date_sec values must be
724 epoch-seconds, and if a tz is None, the local timezone is assumed."""
726 adate_str = _git_date_str(adate_sec, adate_tz)
728 adate_str = _local_git_date_str(adate_sec)
730 cdate_str = _git_date_str(cdate_sec, cdate_tz)
732 cdate_str = _local_git_date_str(cdate_sec)
734 if tree: l.append('tree %s' % tree.encode('hex'))
735 if parent: l.append('parent %s' % parent.encode('hex'))
736 if author: l.append('author %s %s' % (author, adate_str))
737 if committer: l.append('committer %s %s' % (committer, cdate_str))
740 return self.maybe_write('commit', '\n'.join(l))
743 """Remove the pack file from disk."""
752 os.unlink(self.filename + '.pack')
759 def _end(self, run_midx=True):
761 if not f: return None
768 # update object count
770 cp = struct.pack('!i', self.count)
774 # calculate the pack sha1sum
777 for b in chunkyreader(f):
779 packbin = sum.digest()
781 fdatasync(f.fileno())
785 obj_list_sha = self._write_pack_idx_v2(self.filename + '.idx', idx, packbin)
786 nameprefix = os.path.join(self.repo_dir,
787 'objects/pack/pack-' + obj_list_sha)
788 if os.path.exists(self.filename + '.map'):
789 os.unlink(self.filename + '.map')
790 os.rename(self.filename + '.pack', nameprefix + '.pack')
791 os.rename(self.filename + '.idx', nameprefix + '.idx')
793 os.fsync(self.parentfd)
795 os.close(self.parentfd)
798 auto_midx(os.path.join(self.repo_dir, 'objects/pack'))
800 if self.on_pack_finish:
801 self.on_pack_finish(nameprefix)
805 def close(self, run_midx=True):
806 """Close the pack file and move it to its definitive path."""
807 return self._end(run_midx=run_midx)
809 def _write_pack_idx_v2(self, filename, idx, packbin):
812 for entry in section:
813 if entry[2] >= 2**31:
816 # Length: header + fan-out + shas-and-crcs + overflow-offsets
817 index_len = 8 + (4 * 256) + (28 * self.count) + (8 * ofs64_count)
819 idx_f = open(filename, 'w+b')
821 idx_f.truncate(index_len)
822 fdatasync(idx_f.fileno())
823 idx_map = mmap_readwrite(idx_f, close=False)
825 count = _helpers.write_idx(filename, idx_map, idx, self.count)
826 assert(count == self.count)
833 idx_f = open(filename, 'a+b')
838 b = idx_f.read(8 + 4*256)
841 obj_list_sum = Sha1()
842 for b in chunkyreader(idx_f, 20*self.count):
844 obj_list_sum.update(b)
845 namebase = obj_list_sum.hexdigest()
847 for b in chunkyreader(idx_f):
849 idx_f.write(idx_sum.digest())
850 fdatasync(idx_f.fileno())
856 def _gitenv(repo_dir = None):
860 os.environ['GIT_DIR'] = os.path.abspath(repo_dir)
864 def list_refs(refnames=None, repo_dir=None,
865 limit_to_heads=False, limit_to_tags=False):
866 """Yield (refname, hash) tuples for all repository refs unless
867 refnames are specified. In that case, only include tuples for
868 those refs. The limits restrict the result items to refs/heads or
869 refs/tags. If both limits are specified, items from both sources
873 argv = ['git', 'show-ref']
875 argv.append('--heads')
877 argv.append('--tags')
881 p = subprocess.Popen(argv,
882 preexec_fn = _gitenv(repo_dir),
883 stdout = subprocess.PIPE)
884 out = p.stdout.read().strip()
885 rv = p.wait() # not fatal
889 for d in out.split('\n'):
890 (sha, name) = d.split(' ', 1)
891 yield (name, sha.decode('hex'))
894 def read_ref(refname, repo_dir = None):
895 """Get the commit id of the most recent commit made on a given ref."""
896 refs = list_refs(refnames=[refname], repo_dir=repo_dir, limit_to_heads=True)
897 l = tuple(islice(refs, 2))
905 def rev_list(ref, count=None, repo_dir=None):
906 """Generate a list of reachable commits in reverse chronological order.
908 This generator walks through commits, from child to parent, that are
909 reachable via the specified ref and yields a series of tuples of the form
912 If count is a non-zero integer, limit the number of commits to "count"
915 assert(not ref.startswith('-'))
918 opts += ['-n', str(atoi(count))]
919 argv = ['git', 'rev-list', '--pretty=format:%at'] + opts + [ref, '--']
920 p = subprocess.Popen(argv,
921 preexec_fn = _gitenv(repo_dir),
922 stdout = subprocess.PIPE)
926 if s.startswith('commit '):
927 commit = s[7:].decode('hex')
931 rv = p.wait() # not fatal
933 raise GitError, 'git rev-list returned error %d' % rv
936 def get_commit_dates(refs, repo_dir=None):
937 """Get the dates for the specified commit refs. For now, every unique
938 string in refs must resolve to a different commit or this
939 function will fail."""
942 commit = get_commit_items(ref, cp(repo_dir))
943 result.append(commit.author_sec)
947 def rev_parse(committish, repo_dir=None):
948 """Resolve the full hash for 'committish', if it exists.
950 Should be roughly equivalent to 'git rev-parse'.
952 Returns the hex value of the hash if it is found, None if 'committish' does
953 not correspond to anything.
955 head = read_ref(committish, repo_dir=repo_dir)
957 debug2("resolved from ref: commit = %s\n" % head.encode('hex'))
960 pL = PackIdxList(repo('objects/pack', repo_dir=repo_dir))
962 if len(committish) == 40:
964 hash = committish.decode('hex')
974 def update_ref(refname, newval, oldval, repo_dir=None):
975 """Update a repository reference."""
978 assert(refname.startswith('refs/heads/') \
979 or refname.startswith('refs/tags/'))
980 p = subprocess.Popen(['git', 'update-ref', refname,
981 newval.encode('hex'), oldval.encode('hex')],
982 preexec_fn = _gitenv(repo_dir))
983 _git_wait('git update-ref', p)
986 def delete_ref(refname, oldvalue=None):
987 """Delete a repository reference (see git update-ref(1))."""
988 assert(refname.startswith('refs/'))
989 oldvalue = [] if not oldvalue else [oldvalue]
990 p = subprocess.Popen(['git', 'update-ref', '-d', refname] + oldvalue,
991 preexec_fn = _gitenv())
992 _git_wait('git update-ref', p)
995 def guess_repo(path=None):
996 """Set the path value in the global variable "repodir".
997 This makes bup look for an existing bup repository, but not fail if a
998 repository doesn't exist. Usually, if you are interacting with a bup
999 repository, you would not be calling this function but using
1000 check_repo_or_die().
1006 repodir = os.environ.get('BUP_DIR')
1008 repodir = os.path.expanduser('~/.bup')
1011 def init_repo(path=None):
1012 """Create the Git bare repository for bup in a given path."""
1014 d = repo() # appends a / to the path
1015 parent = os.path.dirname(os.path.dirname(d))
1016 if parent and not os.path.exists(parent):
1017 raise GitError('parent directory "%s" does not exist\n' % parent)
1018 if os.path.exists(d) and not os.path.isdir(os.path.join(d, '.')):
1019 raise GitError('"%s" exists but is not a directory\n' % d)
1020 p = subprocess.Popen(['git', '--bare', 'init'], stdout=sys.stderr,
1021 preexec_fn = _gitenv())
1022 _git_wait('git init', p)
1023 # Force the index version configuration in order to ensure bup works
1024 # regardless of the version of the installed Git binary.
1025 p = subprocess.Popen(['git', 'config', 'pack.indexVersion', '2'],
1026 stdout=sys.stderr, preexec_fn = _gitenv())
1027 _git_wait('git config', p)
1029 p = subprocess.Popen(['git', 'config', 'core.logAllRefUpdates', 'true'],
1030 stdout=sys.stderr, preexec_fn = _gitenv())
1031 _git_wait('git config', p)
1034 def check_repo_or_die(path=None):
1035 """Check to see if a bup repository probably exists, and abort if not."""
1038 pst = stat_if_exists(top + '/objects/pack')
1039 if pst and stat.S_ISDIR(pst.st_mode):
1042 top_st = stat_if_exists(top)
1044 log('error: repository %r does not exist (see "bup help init")\n'
1047 log('error: %r is not a repository\n' % top)
1053 """Get Git's version and ensure a usable version is installed.
1055 The returned version is formatted as an ordered tuple with each position
1056 representing a digit in the version tag. For example, the following tuple
1057 would represent version 1.6.6.9:
1059 ('1', '6', '6', '9')
1063 p = subprocess.Popen(['git', '--version'],
1064 stdout=subprocess.PIPE)
1065 gvs = p.stdout.read()
1066 _git_wait('git --version', p)
1067 m = re.match(r'git version (\S+.\S+)', gvs)
1069 raise GitError('git --version weird output: %r' % gvs)
1070 _ver = tuple(m.group(1).split('.'))
1071 needed = ('1','5', '3', '1')
1073 raise GitError('git version %s or higher is required; you have %s'
1074 % ('.'.join(needed), '.'.join(_ver)))
1078 class _AbortableIter:
1079 def __init__(self, it, onabort = None):
1081 self.onabort = onabort
1089 return self.it.next()
1090 except StopIteration as e:
1098 """Abort iteration and call the abortion callback, if needed."""
1108 class MissingObject(KeyError):
1109 def __init__(self, id):
1111 KeyError.__init__(self, 'object %r is missing' % id.encode('hex'))
1116 """Link to 'git cat-file' that is used to retrieve blob data."""
1117 def __init__(self, repo_dir = None):
1119 self.repo_dir = repo_dir
1120 wanted = ('1','5','6')
1123 log('warning: git version < %s; bup will be slow.\n'
1126 self.get = self._slow_get
1128 self.p = self.inprogress = None
1129 self.get = self._fast_get
1133 self.p.stdout.close()
1134 self.p.stdin.close()
1136 self.inprogress = None
1140 self.p = subprocess.Popen(['git', 'cat-file', '--batch'],
1141 stdin=subprocess.PIPE,
1142 stdout=subprocess.PIPE,
1145 preexec_fn = _gitenv(self.repo_dir))
1147 def _fast_get(self, id):
1148 if not self.p or self.p.poll() != None:
1151 poll_result = self.p.poll()
1152 assert(poll_result == None)
1154 log('_fast_get: opening %r while %r is open\n'
1155 % (id, self.inprogress))
1156 assert(not self.inprogress)
1157 assert(id.find('\n') < 0)
1158 assert(id.find('\r') < 0)
1159 assert(not id.startswith('-'))
1160 self.inprogress = id
1161 self.p.stdin.write('%s\n' % id)
1162 self.p.stdin.flush()
1163 hdr = self.p.stdout.readline()
1164 if hdr.endswith(' missing\n'):
1165 self.inprogress = None
1166 raise MissingObject(id.decode('hex'))
1167 spl = hdr.split(' ')
1168 if len(spl) != 3 or len(spl[0]) != 40:
1169 raise GitError('expected blob, got %r' % spl)
1170 (hex, type, size) = spl
1172 it = _AbortableIter(chunkyreader(self.p.stdout, int(spl[2])),
1173 onabort = self._abort)
1178 readline_result = self.p.stdout.readline()
1179 assert(readline_result == '\n')
1180 self.inprogress = None
1181 except Exception as e:
1185 def _slow_get(self, id):
1186 assert(id.find('\n') < 0)
1187 assert(id.find('\r') < 0)
1188 assert(id[0] != '-')
1189 type = _git_capture(['git', 'cat-file', '-t', id]).strip()
1192 p = subprocess.Popen(['git', 'cat-file', type, id],
1193 stdout=subprocess.PIPE,
1194 preexec_fn = _gitenv(self.repo_dir))
1195 for blob in chunkyreader(p.stdout):
1197 _git_wait('git cat-file', p)
1199 def _join(self, it):
1204 elif type == 'tree':
1205 treefile = ''.join(it)
1206 for (mode, name, sha) in tree_decode(treefile):
1207 for blob in self.join(sha.encode('hex')):
1209 elif type == 'commit':
1210 treeline = ''.join(it).split('\n')[0]
1211 assert(treeline.startswith('tree '))
1212 for blob in self.join(treeline[5:]):
1215 raise GitError('invalid object type %r: expected blob/tree/commit'
1219 """Generate a list of the content of all blobs that can be reached
1220 from an object. The hash given in 'id' must point to a blob, a tree
1221 or a commit. The content of all blobs that can be seen from trees or
1222 commits will be added to the list.
1225 for d in self._join(self.get(id)):
1227 except StopIteration:
1233 def cp(repo_dir=None):
1234 """Create a CatPipe object or reuse the already existing one."""
1237 repo_dir = repodir or repo()
1238 repo_dir = os.path.abspath(repo_dir)
1239 cp = _cp.get(repo_dir)
1241 cp = CatPipe(repo_dir)
1246 def tags(repo_dir = None):
1247 """Return a dictionary of all tags in the form {hash: [tag_names, ...]}."""
1249 for n, c in list_refs(repo_dir = repo_dir, limit_to_tags=True):
1250 assert(n.startswith('refs/tags/'))
1254 tags[c].append(name) # more than one tag can point at 'c'
1258 WalkItem = namedtuple('WalkItem', ['id', 'type', 'mode',
1259 'path', 'chunk_path', 'data'])
1260 # The path is the mangled path, and if an item represents a fragment
1261 # of a chunked file, the chunk_path will be the chunked subtree path
1262 # for the chunk, i.e. ['', '2d3115e', ...]. The top-level path for a
1263 # chunked file will have a chunk_path of ['']. So some chunk subtree
1264 # of the file '/foo/bar/baz' might look like this:
1266 # item.path = ['foo', 'bar', 'baz.bup']
1267 # item.chunk_path = ['', '2d3115e', '016b097']
1268 # item.type = 'tree'
1272 def walk_object(cat_pipe, id,
1275 """Yield everything reachable from id via cat_pipe as a WalkItem,
1276 stopping whenever stop_at(id) returns true. Throw MissingObject
1277 if a hash encountered is missing from the repository, and don't
1278 read or return blob content in the data field unless include_data
1281 # Maintain the pending stack on the heap to avoid stack overflow
1282 pending = [(id, [], [], None)]
1284 id, parent_path, chunk_path, mode = pending.pop()
1285 if stop_at and stop_at(id):
1288 if (not include_data) and mode and stat.S_ISREG(mode):
1289 # If the object is a "regular file", then it's a leaf in
1290 # the graph, so we can skip reading the data if the caller
1291 # hasn't requested it.
1292 yield WalkItem(id=id, type='blob',
1293 chunk_path=chunk_path, path=parent_path,
1298 item_it = cat_pipe.get(id)
1299 type = item_it.next()
1300 if type not in ('blob', 'commit', 'tree'):
1301 raise Exception('unexpected repository object type %r' % type)
1303 # FIXME: set the mode based on the type when the mode is None
1304 if type == 'blob' and not include_data:
1305 # Dump data until we can ask cat_pipe not to fetch it
1306 for ignored in item_it:
1310 data = ''.join(item_it)
1312 yield WalkItem(id=id, type=type,
1313 chunk_path=chunk_path, path=parent_path,
1315 data=(data if include_data else None))
1317 if type == 'commit':
1318 commit_items = parse_commit(data)
1319 for pid in commit_items.parents:
1320 pending.append((pid, parent_path, chunk_path, mode))
1321 pending.append((commit_items.tree, parent_path, chunk_path,
1322 hashsplit.GIT_MODE_TREE))
1323 elif type == 'tree':
1324 for mode, name, ent_id in tree_decode(data):
1325 demangled, bup_type = demangle_name(name, mode)
1327 sub_path = parent_path
1328 sub_chunk_path = chunk_path + [name]
1330 sub_path = parent_path + [name]
1331 if bup_type == BUP_CHUNKED:
1332 sub_chunk_path = ['']
1334 sub_chunk_path = chunk_path
1335 pending.append((ent_id.encode('hex'), sub_path, sub_chunk_path,