From abd1d110179268a0af899641e6264c035128ad06 Mon Sep 17 00:00:00 2001 From: Rob Browning Date: Wed, 22 Jan 2014 12:55:16 -0600 Subject: [PATCH] Allow the specification of a repo_dir to some VFS and git operations Previously, these VFS and git operations would only operate on the default repository (git.repo()). Have vfs.cp() handle more than one repository (via an internal cache). Signed-off-by: Rob Browning Tested-by: Rob Browning --- lib/bup/git.py | 58 +++++++++++--------- lib/bup/vfs.py | 142 +++++++++++++++++++++++++++---------------------- 2 files changed, 109 insertions(+), 91 deletions(-) diff --git a/lib/bup/git.py b/lib/bup/git.py index a8f3729..543d55d 100644 --- a/lib/bup/git.py +++ b/lib/bup/git.py @@ -86,18 +86,19 @@ def get_commit_items(id, cp): return parse_commit(commit_content) -def repo(sub = ''): +def repo(sub = '', repo_dir=None): """Get the path to the git repository or one of its subdirectories.""" global repodir - if not repodir: + repo_dir = repo_dir or repodir + if not repo_dir: raise GitError('You should call check_repo_or_die()') # If there's a .git subdirectory, then the actual repo is in there. - gd = os.path.join(repodir, '.git') + gd = os.path.join(repo_dir, '.git') if os.path.exists(gd): repodir = gd - return os.path.join(repodir, sub) + return os.path.join(repo_dir, sub) def shorten_hash(s): @@ -771,14 +772,16 @@ def _gitenv(repo_dir = None): return env -def list_refs(refname = None): +def list_refs(refname = None, repo_dir = None): """Generate a list of tuples in the form (refname,hash). If a ref name is specified, list only this particular ref. """ argv = ['git', 'show-ref', '--'] if refname: argv += [refname] - p = subprocess.Popen(argv, preexec_fn = _gitenv(), stdout = subprocess.PIPE) + p = subprocess.Popen(argv, + preexec_fn = _gitenv(repo_dir), + stdout = subprocess.PIPE) out = p.stdout.read().strip() rv = p.wait() # not fatal if rv: @@ -789,9 +792,9 @@ def list_refs(refname = None): yield (name, sha.decode('hex')) -def read_ref(refname): +def read_ref(refname, repo_dir = None): """Get the commit id of the most recent commit made on a given ref.""" - l = list(list_refs(refname)) + l = list(list_refs(refname, repo_dir)) if l: assert(len(l) == 1) return l[0][1] @@ -799,7 +802,7 @@ def read_ref(refname): return None -def rev_list(ref, count=None): +def rev_list(ref, count=None, repo_dir=None): """Generate a list of reachable commits in reverse chronological order. This generator walks through commits, from child to parent, that are @@ -814,7 +817,9 @@ def rev_list(ref, count=None): if count: opts += ['-n', str(atoi(count))] argv = ['git', 'rev-list', '--pretty=format:%at'] + opts + [ref, '--'] - p = subprocess.Popen(argv, preexec_fn = _gitenv(), stdout = subprocess.PIPE) + p = subprocess.Popen(argv, + preexec_fn = _gitenv(repo_dir), + stdout = subprocess.PIPE) commit = None for row in p.stdout: s = row.strip() @@ -828,18 +833,18 @@ def rev_list(ref, count=None): raise GitError, 'git rev-list returned error %d' % rv -def get_commit_dates(refs): +def get_commit_dates(refs, repo_dir=None): """Get the dates for the specified commit refs. For now, every unique string in refs must resolve to a different commit or this function will fail.""" result = [] for ref in refs: - commit = get_commit_items(ref, cp()) + commit = get_commit_items(ref, cp(repo_dir)) result.append(commit.author_sec) return result -def rev_parse(committish): +def rev_parse(committish, repo_dir=None): """Resolve the full hash for 'committish', if it exists. Should be roughly equivalent to 'git rev-parse'. @@ -847,12 +852,12 @@ def rev_parse(committish): Returns the hex value of the hash if it is found, None if 'committish' does not correspond to anything. """ - head = read_ref(committish) + head = read_ref(committish, repo_dir=repo_dir) if head: debug2("resolved from ref: commit = %s\n" % head.encode('hex')) return head - pL = PackIdxList(repo('objects/pack')) + pL = PackIdxList(repo('objects/pack', repo_dir=repo_dir)) if len(committish) == 40: try: @@ -1121,28 +1126,29 @@ class CatPipe: log('booger!\n') -_cp = (None, None) +_cp = {} -def cp(): - """Create a CatPipe object or reuse an already existing one.""" +def cp(repo_dir=None): + """Create a CatPipe object or reuse the already existing one.""" global _cp - cp_dir, cp = _cp - cur_dir = os.path.realpath(repo()) - if cur_dir != cp_dir: - cp = CatPipe() - _cp = (cur_dir, cp) + if not repo_dir: + repo_dir = repo() + repo_dir = os.path.abspath(repo_dir) + cp = _cp.get(repo_dir) + if not cp: + cp = CatPipe(repo_dir) + _cp[repo_dir] = cp return cp -def tags(): +def tags(repo_dir = None): """Return a dictionary of all tags in the form {hash: [tag_names, ...]}.""" tags = {} - for (n,c) in list_refs(): + for (n,c) in list_refs(repo_dir = repo_dir): if n.startswith('refs/tags/'): name = n[10:] if not c in tags: tags[c] = [] tags[c].append(name) # more than one tag can point at 'c' - return tags diff --git a/lib/bup/vfs.py b/lib/bup/vfs.py index d76f26f..3da55d7 100644 --- a/lib/bup/vfs.py +++ b/lib/bup/vfs.py @@ -33,44 +33,44 @@ class TooManySymlinks(NodeError): pass -def _treeget(hash): - it = cp().get(hash.encode('hex')) +def _treeget(hash, repo_dir=None): + it = cp(repo_dir).get(hash.encode('hex')) type = it.next() assert(type == 'tree') return git.tree_decode(''.join(it)) -def _tree_decode(hash): +def _tree_decode(hash, repo_dir=None): tree = [(int(name,16),stat.S_ISDIR(mode),sha) for (mode,name,sha) - in _treeget(hash)] + in _treeget(hash, repo_dir)] assert(tree == list(sorted(tree))) return tree -def _chunk_len(hash): - return sum(len(b) for b in cp().join(hash.encode('hex'))) +def _chunk_len(hash, repo_dir=None): + return sum(len(b) for b in cp(repo_dir).join(hash.encode('hex'))) -def _last_chunk_info(hash): - tree = _tree_decode(hash) +def _last_chunk_info(hash, repo_dir=None): + tree = _tree_decode(hash, repo_dir) assert(tree) (ofs,isdir,sha) = tree[-1] if isdir: - (subofs, sublen) = _last_chunk_info(sha) + (subofs, sublen) = _last_chunk_info(sha, repo_dir) return (ofs+subofs, sublen) else: return (ofs, _chunk_len(sha)) -def _total_size(hash): - (lastofs, lastsize) = _last_chunk_info(hash) +def _total_size(hash, repo_dir=None): + (lastofs, lastsize) = _last_chunk_info(hash, repo_dir) return lastofs + lastsize -def _chunkiter(hash, startofs): +def _chunkiter(hash, startofs, repo_dir=None): assert(startofs >= 0) - tree = _tree_decode(hash) + tree = _tree_decode(hash, repo_dir) # skip elements before startofs for i in xrange(len(tree)): @@ -85,20 +85,20 @@ def _chunkiter(hash, startofs): if skipmore < 0: skipmore = 0 if isdir: - for b in _chunkiter(sha, skipmore): + for b in _chunkiter(sha, skipmore, repo_dir): yield b else: - yield ''.join(cp().join(sha.encode('hex')))[skipmore:] + yield ''.join(cp(repo_dir).join(sha.encode('hex')))[skipmore:] class _ChunkReader: - def __init__(self, hash, isdir, startofs): + def __init__(self, hash, isdir, startofs, repo_dir=None): if isdir: - self.it = _chunkiter(hash, startofs) + self.it = _chunkiter(hash, startofs, repo_dir) self.blob = None else: self.it = None - self.blob = ''.join(cp().join(hash.encode('hex')))[startofs:] + self.blob = ''.join(cp(repo_dir).join(hash.encode('hex')))[startofs:] self.ofs = startofs def next(self, size): @@ -121,12 +121,13 @@ class _ChunkReader: class _FileReader(object): - def __init__(self, hash, size, isdir): + def __init__(self, hash, size, isdir, repo_dir=None): self.hash = hash self.ofs = 0 self.size = size self.isdir = isdir self.reader = None + self._repo_dir = repo_dir def seek(self, ofs): if ofs > self.size: @@ -143,7 +144,8 @@ class _FileReader(object): if count < 0: count = self.size - self.ofs if not self.reader or self.reader.ofs != self.ofs: - self.reader = _ChunkReader(self.hash, self.isdir, self.ofs) + self.reader = _ChunkReader(self.hash, self.isdir, self.ofs, + self._repo_dir) try: buf = self.reader.next(count) except: @@ -158,12 +160,13 @@ class _FileReader(object): class Node(object): """Base class for file representation.""" - def __init__(self, parent, name, mode, hash): + def __init__(self, parent, name, mode, hash, repo_dir=None): self.parent = parent self.name = name self.mode = mode self.hash = hash self.ctime = self.mtime = self.atime = 0 + self._repo_dir = repo_dir self._subs = None self._metadata = None @@ -309,8 +312,8 @@ class Node(object): class File(Node): """A normal file from bup's repository.""" - def __init__(self, parent, name, mode, hash, bupmode): - Node.__init__(self, parent, name, mode, hash) + def __init__(self, parent, name, mode, hash, bupmode, repo_dir=None): + Node.__init__(self, parent, name, mode, hash, repo_dir) self.bupmode = bupmode self._cached_size = None self._filereader = None @@ -323,7 +326,8 @@ class File(Node): # object here so we're not constantly re-seeking. if not self._filereader: self._filereader = _FileReader(self.hash, self.size(), - self.bupmode == git.BUP_CHUNKED) + self.bupmode == git.BUP_CHUNKED, + repo_dir = self._repo_dir) self._filereader.seek(0) return self._filereader @@ -332,9 +336,11 @@ class File(Node): if self._cached_size == None: debug1('<<<