from __future__ import absolute_import, print_function
from binascii import hexlify
-from errno import EACCES
+from errno import ENOENT
from io import BytesIO
import math, os, stat, sys, time
from bup import compat, hashsplit, git, options, index, client, metadata
from bup import hlinkdb
-from bup.compat import argv_bytes, environ
+from bup.compat import argv_bytes, environ, nullcontext
from bup.hashsplit import GIT_MODE_TREE, GIT_MODE_FILE, GIT_MODE_SYMLINK
from bup.helpers import (add_error, grafted_path_components, handle_ctrl_c,
hostname, istty2, log, parse_date_or_fatal, parse_num,
valid_save_name)
from bup.io import byte_stream, path_msg
from bup.pwdgrp import userfullname, username
+from bup.tree import StackDir
optspec = """
#,compress= set compression level to # (0-9, 9 is highest) [1]
"""
-def main(argv):
- # Hack around lack of nonlocal vars in python 2
- _nonlocal = {}
+### Test hooks
+
+after_nondir_metadata_stat = None
+def before_saving_regular_file(name):
+ return
+
+
+def opts_from_cmdline(argv):
o = options.Options(optspec)
opt, flags, extra = o.parse_bytes(argv[1:])
opt.remote = argv_bytes(opt.remote)
if opt.strip_path:
opt.strip_path = argv_bytes(opt.strip_path)
-
- git.check_repo_or_die()
if not (opt.tree or opt.commit or opt.name):
o.fatal("use one or more of -t, -c, -n")
if not extra:
o.fatal("no filenames given")
-
- extra = [argv_bytes(x) for x in extra]
+ if opt.date:
+ opt.date = parse_date_or_fatal(opt.date, o.fatal)
+ else:
+ opt.date = time.time()
opt.progress = (istty2 and not opt.quiet)
opt.smaller = parse_num(opt.smaller or 0)
- if opt.bwlimit:
- client.bwlimit = parse_num(opt.bwlimit)
- if opt.date:
- date = parse_date_or_fatal(opt.date, o.fatal)
- else:
- date = time.time()
+ if opt.bwlimit:
+ opt.bwlimit = parse_num(opt.bwlimit)
if opt.strip and opt.strip_path:
o.fatal("--strip is incompatible with --strip-path")
- graft_points = []
+ opt.sources = [argv_bytes(x) for x in extra]
+
+ grafts = []
if opt.graft:
if opt.strip:
o.fatal("--strip is incompatible with --graft")
old_path, new_path = splitted_parameter
if not (old_path and new_path):
o.fatal("a graft point cannot be empty")
- graft_points.append((resolve_parent(old_path),
- resolve_parent(new_path)))
+ grafts.append((resolve_parent(old_path),
+ resolve_parent(new_path)))
+ opt.grafts = grafts
- is_reverse = environ.get(b'BUP_SERVER_REVERSE')
- if is_reverse and opt.remote:
+ opt.is_reverse = environ.get(b'BUP_SERVER_REVERSE')
+ if opt.is_reverse and opt.remote:
o.fatal("don't use -r in reverse mode; it's automatic")
- name = opt.name
- if name and not valid_save_name(name):
- o.fatal("'%s' is not a valid branch name" % path_msg(name))
- refname = name and b'refs/heads/%s' % name or None
- if opt.remote or is_reverse:
- try:
- cli = client.Client(opt.remote)
- except client.ClientError as e:
- log('error: %s' % e)
- sys.exit(1)
- oldref = refname and cli.read_ref(refname) or None
- w = cli.new_packwriter(compression_level=opt.compress)
- else:
- cli = None
- oldref = refname and git.read_ref(refname) or None
- w = git.PackWriter(compression_level=opt.compress)
-
- handle_ctrl_c()
+ if opt.name and not valid_save_name(opt.name):
+ o.fatal("'%s' is not a valid branch name" % path_msg(opt.name))
+ return opt
+def save_tree(opt, reader, hlink_db, msr, w):
# Metadata is stored in a file named .bupm in each directory. The
# first metadata entry will be the metadata for the current directory.
# The remaining entries will be for each of the other directory
# Maintain a stack of information representing the current location in
# the archive being constructed. The current path is recorded in
- # parts, which will be something like ['', 'home', 'someuser'], and
- # the accumulated content and metadata for of the dirs in parts is
- # stored in parallel stacks in shalists and metalists.
-
- parts = [] # Current archive position (stack of dir names).
- shalists = [] # Hashes for each dir in paths.
- metalists = [] # Metadata for each dir in paths.
+ # parts, which will be something like
+ # [StackDir(name=''), StackDir(name='home'), StackDir(name='someuser')],
+ # and the accumulated content and metadata for files in the dirs is stored
+ # in the .items member of the StackDir.
+ stack = []
def _push(part, metadata):
# Enter a new archive directory -- make it the current directory.
- parts.append(part)
- shalists.append([])
- metalists.append([(b'', metadata)]) # This dir's metadata (no name).
+ item = StackDir(part, metadata)
+ stack.append(item)
- def _pop(force_tree, dir_metadata=None):
+ def _pop(force_tree=None, dir_metadata=None):
# Leave the current archive directory and add its tree to its parent.
- assert(len(parts) >= 1)
- part = parts.pop()
- shalist = shalists.pop()
- metalist = metalists.pop()
+ item = stack.pop()
# FIXME: only test if collision is possible (i.e. given --strip, etc.)?
if force_tree:
tree = force_tree
else:
names_seen = set()
clean_list = []
- metaidx = 1 # entry at 0 is for the dir
- for x in shalist:
- name = x[1]
+ for x in item.items:
+ name = x.name
if name in names_seen:
- parent_path = b'/'.join(parts) + b'/'
+ parent_path = b'/'.join(x.name for x in stack) + b'/'
add_error('error: ignoring duplicate path %s in %s'
% (path_msg(name), path_msg(parent_path)))
- if not stat.S_ISDIR(x[0]):
- del metalist[metaidx]
else:
names_seen.add(name)
clean_list.append(x)
- if not stat.S_ISDIR(x[0]):
- metaidx += 1
-
- if dir_metadata: # Override the original metadata pushed for this dir.
- metalist = [(b'', dir_metadata)] + metalist[1:]
- sorted_metalist = sorted(metalist, key = lambda x : x[0])
- metadata = b''.join([m[1].encode() for m in sorted_metalist])
- metadata_f = BytesIO(metadata)
+
+ # if set, overrides the original metadata pushed for this dir.
+ if dir_metadata is None:
+ dir_metadata = item.meta
+ metalist = [(b'', dir_metadata)]
+ metalist += [(git.shalist_item_sort_key((entry.mode, entry.name, None)),
+ entry.meta)
+ for entry in clean_list if entry.mode != GIT_MODE_TREE]
+ metalist.sort(key = lambda x: x[0])
+ metadata = BytesIO(b''.join(m[1].encode() for m in metalist))
mode, id = hashsplit.split_to_blob_or_tree(w.new_blob, w.new_tree,
- [metadata_f],
+ [metadata],
keep_boundaries=False)
- clean_list.append((mode, b'.bupm', id))
-
- tree = w.new_tree(clean_list)
- if shalists:
- shalists[-1].append((GIT_MODE_TREE,
- git.mangle_name(part,
- GIT_MODE_TREE, GIT_MODE_TREE),
- tree))
+ shalist = [(mode, b'.bupm', id)]
+ shalist += [(entry.gitmode,
+ git.mangle_name(entry.name, entry.mode, entry.gitmode),
+ entry.oid)
+ for entry in clean_list]
+
+ tree = w.new_tree(shalist)
+ if stack:
+ stack[-1].append(item.name, GIT_MODE_TREE, GIT_MODE_TREE, tree, None)
return tree
+ # Hack around lack of nonlocal vars in python 2
+ _nonlocal = {}
_nonlocal['count'] = 0
_nonlocal['subcount'] = 0
_nonlocal['lastremain'] = None
remainstr, kpsstr))
- indexfile = opt.indexfile or git.repo(b'bupindex')
- r = index.Reader(indexfile)
- try:
- msr = index.MetaStoreReader(indexfile + b'.meta')
- except IOError as ex:
- if ex.errno != EACCES:
- raise
- log('error: cannot access %r; have you run bup index?'
- % path_msg(indexfile))
- sys.exit(1)
- hlink_db = hlinkdb.HLinkDB(indexfile + b'.hlink')
-
def already_saved(ent):
return ent.is_valid() and w.exists(ent.sha) and ent.sha
link_paths = hlink_db.node_paths(ent.dev, ent.ino)
if link_paths:
return link_paths[0]
+ return None
total = ftotal = 0
if opt.progress:
- for (transname,ent) in r.filter(extra, wantrecurse=wantrecurse_pre):
+ for transname, ent in reader.filter(opt.sources,
+ wantrecurse=wantrecurse_pre):
if not (ftotal % 10024):
qprogress('Reading index: %d\r' % ftotal)
exists = ent.exists()
fcount = 0
lastskip_name = None
lastdir = b''
- for (transname,ent) in r.filter(extra, wantrecurse=wantrecurse_during):
+ for transname, ent in reader.filter(opt.sources,
+ wantrecurse=wantrecurse_during):
(dir, file) = os.path.split(ent.name)
exists = (ent.flags & index.IX_EXISTS)
hashvalid = already_saved(ent)
assert(dir.startswith(b'/'))
if opt.strip:
- dirp = stripped_path_components(dir, extra)
+ dirp = stripped_path_components(dir, opt.sources)
elif opt.strip_path:
dirp = stripped_path_components(dir, [opt.strip_path])
- elif graft_points:
- dirp = grafted_path_components(graft_points, dir)
+ elif opt.grafts:
+ dirp = grafted_path_components(opt.grafts, dir)
else:
dirp = path_components(dir)
root_collision = True
# If switching to a new sub-tree, finish the current sub-tree.
- while parts > [x[0] for x in dirp]:
- _pop(force_tree = None)
+ while [x.name for x in stack] > [x[0] for x in dirp]:
+ _pop()
# If switching to a new sub-tree, start a new sub-tree.
- for path_component in dirp[len(parts):]:
+ for path_component in dirp[len(stack):]:
dir_name, fs_path = path_component
# Not indexed, so just grab the FS metadata or use empty metadata.
try:
_push(dir_name, meta)
if not file:
- if len(parts) == 1:
+ if len(stack) == 1:
continue # We're at the top level -- keep the current root dir
# Since there's no filename, this is a subdir -- finish it.
oldtree = already_saved(ent) # may be None
# it's not a directory
if hashvalid:
- id = ent.sha
- git_name = git.mangle_name(file, ent.mode, ent.gitmode)
- git_info = (ent.gitmode, git_name, id)
- shalists[-1].append(git_info)
- sort_key = git.shalist_item_sort_key((ent.mode, file, id))
meta = msr.metadata_at(ent.meta_ofs)
meta.hardlink_target = find_hardlink_target(hlink_db, ent)
# Restore the times that were cleared to 0 in the metastore.
(meta.atime, meta.mtime, meta.ctime) = (ent.atime, ent.mtime, ent.ctime)
- metalists[-1].append((sort_key, meta))
+ stack[-1].append(file, ent.mode, ent.gitmode, ent.sha, meta)
else:
id = None
hlink = find_hardlink_target(hlink_db, ent)
try:
meta = metadata.from_path(ent.name, hardlink_target=hlink,
- normalized=True)
+ normalized=True,
+ after_stat=after_nondir_metadata_stat)
except (OSError, IOError) as e:
add_error(e)
lastskip_name = ent.name
def new_blob(data):
meta.size += len(data)
return w.new_blob(data)
+ before_saving_regular_file(ent.name)
with hashsplit.open_noatime(ent.name) as f:
(mode, id) = hashsplit.split_to_blob_or_tree(
new_blob, w.new_tree, [f],
if id:
ent.validate(mode, id)
ent.repack()
- git_name = git.mangle_name(file, ent.mode, ent.gitmode)
- git_info = (mode, git_name, id)
- shalists[-1].append(git_info)
- sort_key = git.shalist_item_sort_key((ent.mode, file, id))
- metalists[-1].append((sort_key, meta))
+ stack[-1].append(file, ent.mode, ent.gitmode, id, meta)
if exists and wasmissing:
_nonlocal['count'] += oldsize
progress('Saving: %.2f%% (%d/%dk, %d/%d files), done. \n'
% (pct, _nonlocal['count']/1024, total/1024, fcount, ftotal))
- while len(parts) > 1: # _pop() all the parts above the root
- _pop(force_tree = None)
- assert(len(shalists) == 1)
- assert(len(metalists) == 1)
+ while len(stack) > 1: # _pop() all the parts above the root
+ _pop()
# Finish the root directory.
- tree = _pop(force_tree = None,
- # When there's a collision, use empty metadata for the root.
- dir_metadata = metadata.Metadata() if root_collision else None)
-
- sys.stdout.flush()
- out = byte_stream(sys.stdout)
-
- if opt.tree:
- out.write(hexlify(tree))
- out.write(b'\n')
- if opt.commit or name:
- if compat.py_maj > 2:
- # Strip b prefix from python 3 bytes reprs to preserve previous format
- msgcmd = b'[%s]' % b', '.join([repr(argv_bytes(x))[1:].encode('ascii')
- for x in argv])
- else:
- msgcmd = repr(argv)
- msg = b'bup save\n\nGenerated by command:\n%s\n' % msgcmd
- userline = (b'%s <%s@%s>' % (userfullname(), username(), hostname()))
- commit = w.new_commit(tree, oldref, userline, date, None,
- userline, date, None, msg)
- if opt.commit:
- out.write(hexlify(commit))
- out.write(b'\n')
-
- msr.close()
- w.close() # must close before we can update the ref
+ # When there's a collision, use empty metadata for the root.
+ tree = _pop(dir_metadata = metadata.Metadata() if root_collision else None)
- if opt.name:
- if cli:
- cli.update_ref(refname, commit, oldref)
+ return tree
+
+
+def commit_tree(tree, parent, date, argv, writer):
+ if compat.py_maj > 2:
+ # Strip b prefix from python 3 bytes reprs to preserve previous format
+ msgcmd = b'[%s]' % b', '.join([repr(argv_bytes(x))[1:].encode('ascii')
+ for x in argv])
+ else:
+ msgcmd = repr(argv)
+ msg = b'bup save\n\nGenerated by command:\n%s\n' % msgcmd
+ userline = (b'%s <%s@%s>' % (userfullname(), username(), hostname()))
+ return writer.new_commit(tree, parent, userline, date, None,
+ userline, date, None, msg)
+
+
+def main(argv):
+ handle_ctrl_c()
+ opt = opts_from_cmdline(argv)
+ client.bwlimit = opt.bwlimit
+ git.check_repo_or_die()
+
+ remote_dest = opt.remote or opt.is_reverse
+ if not remote_dest:
+ repo = git
+ cli = nullcontext()
+ else:
+ try:
+ cli = repo = client.Client(opt.remote)
+ except client.ClientError as e:
+ log('error: %s' % e)
+ sys.exit(1)
+
+ # cli creation must be last nontrivial command in each if clause above
+ with cli:
+ if not remote_dest:
+ w = git.PackWriter(compression_level=opt.compress)
else:
- git.update_ref(refname, commit, oldref)
+ w = cli.new_packwriter(compression_level=opt.compress)
+
+ with w:
+ sys.stdout.flush()
+ out = byte_stream(sys.stdout)
- if cli:
- cli.close()
+ if opt.name:
+ refname = b'refs/heads/%s' % opt.name
+ parent = repo.read_ref(refname)
+ else:
+ refname = parent = None
+
+ indexfile = opt.indexfile or git.repo(b'bupindex')
+ try:
+ msr = index.MetaStoreReader(indexfile + b'.meta')
+ except IOError as ex:
+ if ex.errno != ENOENT:
+ raise
+ log('error: cannot access %r; have you run bup index?'
+ % path_msg(indexfile))
+ sys.exit(1)
+ with msr, \
+ hlinkdb.HLinkDB(indexfile + b'.hlink') as hlink_db, \
+ index.Reader(indexfile) as reader:
+ tree = save_tree(opt, reader, hlink_db, msr, w)
+ if opt.tree:
+ out.write(hexlify(tree))
+ out.write(b'\n')
+ if opt.commit or opt.name:
+ commit = commit_tree(tree, parent, opt.date, argv, w)
+ if opt.commit:
+ out.write(hexlify(commit))
+ out.write(b'\n')
+
+ # packwriter must be closed before we can update the ref
+ if opt.name:
+ repo.update_ref(refname, commit, parent)
if saved_errors:
log('WARNING: %d errors encountered while saving.\n' % len(saved_errors))