X-Git-Url: https://arthur.barton.de/cgi-bin/gitweb.cgi?a=blobdiff_plain;f=cmd%2Fsave-cmd.py;h=5140ac027ecff4cef3b93ff93cd7ab05a6295c00;hb=f0a4b3b6ef7d00e64b04dee51b8e15db71b693d3;hp=6b3959961052c1be3d6fc6b7a9bfc08de339f440;hpb=042eaac10b2650a71f7e8604cfac8213091ec1be;p=bup.git diff --git a/cmd/save-cmd.py b/cmd/save-cmd.py index 6b39599..5140ac0 100755 --- a/cmd/save-cmd.py +++ b/cmd/save-cmd.py @@ -1,8 +1,26 @@ -#!/usr/bin/env python -import sys, stat, time, math +#!/bin/sh +"""": # -*-python-*- +bup_python="$(dirname "$0")/bup-python" || exit $? +exec "$bup_python" "$0" ${1+"$@"} +""" +# end of bup preamble + +from __future__ import absolute_import, print_function +from binascii import hexlify +from errno import EACCES +from io import BytesIO +import os, sys, stat, time, math + from bup import hashsplit, git, options, index, client, metadata, hlinkdb -from bup.helpers import * +from bup.compat import argv_bytes, environ from bup.hashsplit import GIT_MODE_TREE, GIT_MODE_FILE, GIT_MODE_SYMLINK +from bup.helpers import (add_error, grafted_path_components, handle_ctrl_c, + hostname, istty2, log, parse_date_or_fatal, parse_num, + path_components, progress, qprogress, resolve_parent, + saved_errors, stripped_path_components, + valid_save_name) +from bup.io import byte_stream, path_msg +from bup.pwdgrp import userfullname, username optspec = """ @@ -26,12 +44,23 @@ graft= a graft point *old_path*=*new_path* (can be used more than once) o = options.Options(optspec) (opt, flags, extra) = o.parse(sys.argv[1:]) +if opt.indexfile: + opt.indexfile = argv_bytes(opt.indexfile) +if opt.name: + opt.name = argv_bytes(opt.name) +if opt.remote: + opt.remote = argv_bytes(opt.remote) +if opt.strip_path: + opt.strip_path = argv_bytes(opt.strip_path) + git.check_repo_or_die() if not (opt.tree or opt.commit or opt.name): o.fatal("use one or more of -t, -c, -n") if not extra: o.fatal("no filenames given") +extra = [argv_bytes(x) for x in extra] + opt.progress = (istty2 and not opt.quiet) opt.smaller = parse_num(opt.smaller or 0) if opt.bwlimit: @@ -55,25 +84,32 @@ if opt.graft: for (option, parameter) in flags: if option == "--graft": - splitted_parameter = parameter.split('=') + parameter = argv_bytes(parameter) + splitted_parameter = parameter.split(b'=') if len(splitted_parameter) != 2: o.fatal("a graft point must be of the form old_path=new_path") old_path, new_path = splitted_parameter if not (old_path and new_path): o.fatal("a graft point cannot be empty") - graft_points.append((realpath(old_path), realpath(new_path))) + graft_points.append((resolve_parent(old_path), + resolve_parent(new_path))) -is_reverse = os.environ.get('BUP_SERVER_REVERSE') +is_reverse = environ.get(b'BUP_SERVER_REVERSE') if is_reverse and opt.remote: o.fatal("don't use -r in reverse mode; it's automatic") -if opt.name and opt.name.startswith('.'): - o.fatal("'%s' is not a valid branch name" % opt.name) -refname = opt.name and 'refs/heads/%s' % opt.name or None +name = opt.name +if name and not valid_save_name(name): + o.fatal("'%s' is not a valid branch name" % path_msg(name)) +refname = name and b'refs/heads/%s' % name or None if opt.remote or is_reverse: - cli = client.Client(opt.remote) + try: + cli = client.Client(opt.remote) + except client.ClientError as e: + log('error: %s' % e) + sys.exit(1) oldref = refname and cli.read_ref(refname) or None - w = cli.new_packwriter() + w = cli.new_packwriter(compression_level=opt.compress) else: cli = None oldref = refname and git.read_ref(refname) or None @@ -83,12 +119,11 @@ handle_ctrl_c() def eatslash(dir): - if dir.endswith('/'): + if dir.endswith(b'/'): return dir[:-1] else: return dir - # Metadata is stored in a file named .bupm in each directory. The # first metadata entry will be the metadata for the current directory. # The remaining entries will be for each of the other directory @@ -115,7 +150,7 @@ def _push(part, metadata): # Enter a new archive directory -- make it the current directory. parts.append(part) shalists.append([]) - metalists.append([('', metadata)]) # This dir's metadata (no name). + metalists.append([(b'', metadata)]) # This dir's metadata (no name). def _pop(force_tree, dir_metadata=None): @@ -124,13 +159,32 @@ def _pop(force_tree, dir_metadata=None): part = parts.pop() shalist = shalists.pop() metalist = metalists.pop() - if metalist: + if metalist and not force_tree: if dir_metadata: # Override the original metadata pushed for this dir. - metalist = [('', dir_metadata)] + metalist[1:] + metalist = [(b'', dir_metadata)] + metalist[1:] sorted_metalist = sorted(metalist, key = lambda x : x[0]) - metadata = ''.join([m[1].encode() for m in sorted_metalist]) - shalist.append((0100644, '.bupm', w.new_blob(metadata))) - tree = force_tree or w.new_tree(shalist) + metadata = b''.join([m[1].encode() for m in sorted_metalist]) + metadata_f = BytesIO(metadata) + mode, id = hashsplit.split_to_blob_or_tree(w.new_blob, w.new_tree, + [metadata_f], + keep_boundaries=False) + shalist.append((mode, b'.bupm', id)) + # FIXME: only test if collision is possible (i.e. given --strip, etc.)? + if force_tree: + tree = force_tree + else: + names_seen = set() + clean_list = [] + for x in shalist: + name = x[1] + if name in names_seen: + parent_path = b'/'.join(parts) + b'/' + add_error('error: ignoring duplicate path %s in %s' + % (path_msg(name), path_msg(parent_path))) + else: + names_seen.add(name) + clean_list.append(x) + tree = w.new_tree(clean_list) if shalists: shalists[-1].append((GIT_MODE_TREE, git.mangle_name(part, @@ -178,9 +232,17 @@ def progress_report(n): remainstr, kpsstr)) -indexfile = opt.indexfile or git.repo('bupindex') +indexfile = opt.indexfile or git.repo(b'bupindex') r = index.Reader(indexfile) -hlink_db = hlinkdb.HLinkDB(indexfile + '.hlink') +try: + msr = index.MetaStoreReader(indexfile + b'.meta') +except IOError as ex: + if ex.errno != EACCES: + raise + log('error: cannot access %r; have you run bup index?' + % path_msg(indexfile)) + sys.exit(1) +hlink_db = hlinkdb.HLinkDB(indexfile + b'.hlink') def already_saved(ent): return ent.is_valid() and w.exists(ent.sha) and ent.sha @@ -228,7 +290,7 @@ root_collision = None tstart = time.time() count = subcount = fcount = 0 lastskip_name = None -lastdir = '' +lastdir = b'' for (transname,ent) in r.filter(extra, wantrecurse=wantrecurse_during): (dir, file) = os.path.split(ent.name) exists = (ent.flags & index.IX_EXISTS) @@ -246,10 +308,10 @@ for (transname,ent) in r.filter(extra, wantrecurse=wantrecurse_during): else: status = ' ' if opt.verbose >= 2: - log('%s %-70s\n' % (status, ent.name)) + log('%s %-70s\n' % (status, path_msg(ent.name))) elif not stat.S_ISDIR(ent.mode) and lastdir != dir: if not lastdir.startswith(dir): - log('%s %-70s\n' % (status, os.path.join(dir, ''))) + log('%s %-70s\n' % (status, path_msg(os.path.join(dir, b'')))) lastdir = dir if opt.progress: @@ -260,11 +322,12 @@ for (transname,ent) in r.filter(extra, wantrecurse=wantrecurse_during): continue if opt.smaller and ent.size >= opt.smaller: if exists and not hashvalid: - add_error('skipping large file "%s"' % ent.name) + if opt.verbose: + log('skipping large file "%s"\n' % path_msg(ent.name)) lastskip_name = ent.name continue - assert(dir.startswith('/')) + assert(dir.startswith(b'/')) if opt.strip: dirp = stripped_path_components(dir, extra) elif opt.strip_path: @@ -289,10 +352,7 @@ for (transname,ent) in r.filter(extra, wantrecurse=wantrecurse_during): # ...]. if first_root == None: - dir_name, fs_path = dirp[0] first_root = dirp[0] - meta = metadata.from_path(fs_path) if fs_path else metadata.Metadata() - _push(dir_name, meta) elif first_root != dirp[0]: root_collision = True @@ -303,7 +363,14 @@ for (transname,ent) in r.filter(extra, wantrecurse=wantrecurse_during): # If switching to a new sub-tree, start a new sub-tree. for path_component in dirp[len(parts):]: dir_name, fs_path = path_component - meta = metadata.from_path(fs_path) if fs_path else metadata.Metadata() + # Not indexed, so just grab the FS metadata or use empty metadata. + try: + meta = metadata.from_path(fs_path, normalized=True) \ + if fs_path else metadata.Metadata() + except (OSError, IOError) as e: + add_error(e) + lastskip_name = dir_name + meta = metadata.Metadata() _push(dir_name, meta) if not file: @@ -330,15 +397,16 @@ for (transname,ent) in r.filter(extra, wantrecurse=wantrecurse_during): git_info = (ent.gitmode, git_name, id) shalists[-1].append(git_info) sort_key = git.shalist_item_sort_key((ent.mode, file, id)) - hlink = find_hardlink_target(hlink_db, ent) - metalists[-1].append((sort_key, - metadata.from_path(ent.name, - hardlink_target=hlink))) + meta = msr.metadata_at(ent.meta_ofs) + meta.hardlink_target = find_hardlink_target(hlink_db, ent) + # Restore the times that were cleared to 0 in the metastore. + (meta.atime, meta.mtime, meta.ctime) = (ent.atime, ent.mtime, ent.ctime) + metalists[-1].append((sort_key, meta)) else: if stat.S_ISREG(ent.mode): try: f = hashsplit.open_noatime(ent.name) - except (IOError, OSError), e: + except (IOError, OSError) as e: add_error(e) lastskip_name = ent.name else: @@ -346,7 +414,7 @@ for (transname,ent) in r.filter(extra, wantrecurse=wantrecurse_during): (mode, id) = hashsplit.split_to_blob_or_tree( w.new_blob, w.new_tree, [f], keep_boundaries=False) - except (IOError, OSError), e: + except (IOError, OSError) as e: add_error('%s: %s' % (ent.name, e)) lastskip_name = ent.name else: @@ -355,7 +423,7 @@ for (transname,ent) in r.filter(extra, wantrecurse=wantrecurse_during): elif stat.S_ISLNK(ent.mode): try: rl = os.readlink(ent.name) - except (OSError, IOError), e: + except (OSError, IOError) as e: add_error(e) lastskip_name = ent.name else: @@ -364,7 +432,7 @@ for (transname,ent) in r.filter(extra, wantrecurse=wantrecurse_during): # Everything else should be fully described by its # metadata, so just record an empty blob, so the paths # in the tree and .bupm will match up. - (mode, id) = (GIT_MODE_FILE, w.new_blob("")) + (mode, id) = (GIT_MODE_FILE, w.new_blob(b'')) if id: ent.validate(mode, id) @@ -374,9 +442,15 @@ for (transname,ent) in r.filter(extra, wantrecurse=wantrecurse_during): shalists[-1].append(git_info) sort_key = git.shalist_item_sort_key((ent.mode, file, id)) hlink = find_hardlink_target(hlink_db, ent) - metalists[-1].append((sort_key, - metadata.from_path(ent.name, - hardlink_target=hlink))) + try: + meta = metadata.from_path(ent.name, hardlink_target=hlink, + normalized=True) + except (OSError, IOError) as e: + add_error(e) + lastskip_name = ent.name + else: + metalists[-1].append((sort_key, meta)) + if exists and wasmissing: count += oldsize subcount = 0 @@ -394,16 +468,26 @@ assert(len(metalists) == 1) # Finish the root directory. tree = _pop(force_tree = None, + # When there's a collision, use empty metadata for the root. dir_metadata = metadata.Metadata() if root_collision else None) +sys.stdout.flush() +out = byte_stream(sys.stdout) + if opt.tree: - print tree.encode('hex') -if opt.commit or opt.name: - msg = 'bup save\n\nGenerated by command:\n%r' % sys.argv - commit = w.new_commit(oldref, tree, date, msg) + out.write(hexlify(tree)) + out.write(b'\n') +if opt.commit or name: + msg = (b'bup save\n\nGenerated by command:\n%r\n' + % [argv_bytes(x) for x in sys.argv]) + userline = (b'%s <%s@%s>' % (userfullname(), username(), hostname())) + commit = w.new_commit(tree, oldref, userline, date, None, + userline, date, None, msg) if opt.commit: - print commit.encode('hex') + out.write(hexlify(commit)) + out.write(b'\n') +msr.close() w.close() # must close before we can update the ref if opt.name: