fdatasync,
hostname, localtime, log, merge_iter,
mmap_read, mmap_readwrite,
- progress, qprogress, unlink, username, userfullname,
+ parse_num,
+ progress, qprogress, stat_if_exists,
+ unlink, username, userfullname,
utc_offset_str)
-
-max_pack_size = 1000*1000*1000 # larger packs will slow down pruning
-max_pack_objects = 200*1000 # cache memory usage is about 83 bytes per object
-
verbose = 0
ignore_midx = 0
-repodir = None
+repodir = None # The default repository, once initialized
_typemap = { 'blob':3, 'tree':2, 'commit':1, 'tag':4 }
_typermap = { 3:'blob', 2:'tree', 1:'commit', 4:'tag' }
pass
+def _git_wait(cmd, p):
+ rv = p.wait()
+ if rv != 0:
+ raise GitError('%s returned %d' % (cmd, rv))
+
+def _git_capture(argv):
+ p = subprocess.Popen(argv, stdout=subprocess.PIPE, preexec_fn = _gitenv())
+ r = p.stdout.read()
+ _git_wait(repr(argv), p)
+ return r
+
+def git_config_get(option, repo_dir=None):
+ cmd = ('git', 'config', '--get', option)
+ p = subprocess.Popen(cmd, stdout=subprocess.PIPE,
+ preexec_fn=_gitenv(repo_dir=repo_dir))
+ r = p.stdout.read()
+ rc = p.wait()
+ if rc == 0:
+ return r
+ if rc != 1:
+ raise GitError('%s returned %d' % (cmd, rc))
+ return None
+
+
def parse_tz_offset(s):
"""UTC offset in seconds."""
tz_off = (int(s[1:3]) * 60 * 60) + (int(s[3:5]) * 60)
class PackWriter:
"""Writes Git objects inside a pack file."""
def __init__(self, objcache_maker=_make_objcache, compression_level=1,
- run_midx=True, on_pack_finish=None):
+ run_midx=True, on_pack_finish=None,
+ max_pack_size=None, max_pack_objects=None):
+ self.repo_dir = repo()
self.file = None
self.parentfd = None
self.count = 0
self.compression_level = compression_level
self.run_midx=run_midx
self.on_pack_finish = on_pack_finish
+ if not max_pack_size:
+ max_pack_size = git_config_get('pack.packSizeLimit',
+ repo_dir=self.repo_dir)
+ if max_pack_size is not None:
+ max_pack_size = parse_num(max_pack_size)
+ if not max_pack_size:
+ # larger packs slow down pruning
+ max_pack_size = 1000 * 1000 * 1000
+ self.max_pack_size = max_pack_size
+ # cache memory usage is about 83 bytes per object
+ self.max_pack_objects = max_pack_objects if max_pack_objects \
+ else max(1, self.max_pack_size // 5000)
def __del__(self):
self.close()
def _open(self):
if not self.file:
- objdir = dir=repo('objects')
+ objdir = dir = os.path.join(self.repo_dir, 'objects')
fd, name = tempfile.mkstemp(suffix='.pack', dir=objdir)
try:
self.file = os.fdopen(fd, 'w+b')
size, crc = self._raw_write(_encode_packobj(type, content,
self.compression_level),
sha=sha)
- if self.outbytes >= max_pack_size or self.count >= max_pack_objects:
+ if self.outbytes >= self.max_pack_size \
+ or self.count >= self.max_pack_objects:
self.breakpoint()
return sha
self._require_objcache()
return self.objcache.exists(id, want_source=want_source)
- def write(self, sha, type, content):
- """Write an object to the pack file. Fails if sha exists()."""
+ def just_write(self, sha, type, content):
+ """Write an object to the pack file, bypassing the objcache. Fails if
+ sha exists()."""
self._write(sha, type, content)
def maybe_write(self, type, content):
"""Write an object to the pack file if not present and return its id."""
sha = calc_hash(type, content)
if not self.exists(sha):
- self.write(sha, type, content)
+ self.just_write(sha, type, content)
self._require_objcache()
self.objcache.add(sha)
return sha
f.close()
obj_list_sha = self._write_pack_idx_v2(self.filename + '.idx', idx, packbin)
-
- nameprefix = repo('objects/pack/pack-%s' % obj_list_sha)
+ nameprefix = os.path.join(self.repo_dir,
+ 'objects/pack/pack-' + obj_list_sha)
if os.path.exists(self.filename + '.map'):
os.unlink(self.filename + '.map')
os.rename(self.filename + '.pack', nameprefix + '.pack')
os.close(self.parentfd)
if run_midx:
- auto_midx(repo('objects/pack'))
+ auto_midx(os.path.join(self.repo_dir, 'objects/pack'))
if self.on_pack_finish:
self.on_pack_finish(nameprefix)
def check_repo_or_die(path=None):
- """Make sure a bup repository exists, and abort if not.
- If the path to a particular repository was not specified, this function
- initializes the default repository automatically.
- """
+ """Check to see if a bup repository probably exists, and abort if not."""
guess_repo(path)
- try:
- os.stat(repo('objects/pack/.'))
- except OSError as e:
- if e.errno == errno.ENOENT:
- log('error: %r is not a bup repository; run "bup init"\n'
- % repo())
+ top = repo()
+ pst = stat_if_exists(top + '/objects/pack')
+ if pst and stat.S_ISDIR(pst.st_mode):
+ return
+ if not pst:
+ top_st = stat_if_exists(top)
+ if not top_st:
+ log('error: repository %r does not exist (see "bup help init")\n'
+ % top)
sys.exit(15)
- else:
- log('error: %s\n' % e)
- sys.exit(14)
+ log('error: %r is not a repository\n' % top)
+ sys.exit(14)
_ver = None
return _ver
-def _git_wait(cmd, p):
- rv = p.wait()
- if rv != 0:
- raise GitError('%s returned %d' % (cmd, rv))
-
-
-def _git_capture(argv):
- p = subprocess.Popen(argv, stdout=subprocess.PIPE, preexec_fn = _gitenv())
- r = p.stdout.read()
- _git_wait(repr(argv), p)
- return r
-
-
class _AbortableIter:
def __init__(self, it, onabort = None):
self.it = it
self.p = None
self.inprogress = None
- def _restart(self):
+ def restart(self):
self._abort()
self.p = subprocess.Popen(['git', 'cat-file', '--batch'],
stdin=subprocess.PIPE,
def _fast_get(self, id):
if not self.p or self.p.poll() != None:
- self._restart()
+ self.restart()
assert(self.p)
poll_result = self.p.poll()
assert(poll_result == None)
def cp(repo_dir=None):
"""Create a CatPipe object or reuse the already existing one."""
- global _cp
+ global _cp, repodir
if not repo_dir:
- repo_dir = repo()
+ repo_dir = repodir or repo()
repo_dir = os.path.abspath(repo_dir)
cp = _cp.get(repo_dir)
if not cp:
include_data=None):
"""Yield everything reachable from id via cat_pipe as a WalkItem,
stopping whenever stop_at(id) returns true. Throw MissingObject
- if a hash encountered is missing from the repository.
-
+ if a hash encountered is missing from the repository, and don't
+ read or return blob content in the data field unless include_data
+ is set.
"""
# Maintain the pending stack on the heap to avoid stack overflow
pending = [(id, [], [], None)]
if stop_at and stop_at(id):
continue
- item_it = cat_pipe.get(id) # FIXME: use include_data
+ if (not include_data) and mode and stat.S_ISREG(mode):
+ # If the object is a "regular file", then it's a leaf in
+ # the graph, so we can skip reading the data if the caller
+ # hasn't requested it.
+ yield WalkItem(id=id, type='blob',
+ chunk_path=chunk_path, path=parent_path,
+ mode=mode,
+ data=None)
+ continue
+
+ item_it = cat_pipe.get(id)
type = item_it.next()
if type not in ('blob', 'commit', 'tree'):
raise Exception('unexpected repository object type %r' % type)