X-Git-Url: https://arthur.barton.de/cgi-bin/gitweb.cgi?a=blobdiff_plain;f=cmd%2Fsave-cmd.py;h=b84d63e1c910eb3112e85c362b3fe1e55ba56889;hb=093752b42c5548028c6f84c67f7741b2321c512f;hp=8067facb872c38ffb40832d7442b7a81f3b43436;hpb=a866d6734917ab6f5fab5949f9e761dcc4b981d0;p=bup.git diff --git a/cmd/save-cmd.py b/cmd/save-cmd.py index 8067fac..b84d63e 100755 --- a/cmd/save-cmd.py +++ b/cmd/save-cmd.py @@ -1,11 +1,26 @@ -#!/usr/bin/env python -import sys, stat, time, math -from cStringIO import StringIO +#!/bin/sh +"""": # -*-python-*- +bup_python="$(dirname "$0")/bup-python" || exit $? +exec "$bup_python" "$0" ${1+"$@"} +""" +# end of bup preamble + +from __future__ import absolute_import, print_function +from binascii import hexlify from errno import EACCES +from io import BytesIO +import os, sys, stat, time, math from bup import hashsplit, git, options, index, client, metadata, hlinkdb -from bup.helpers import * +from bup.compat import argv_bytes, environ from bup.hashsplit import GIT_MODE_TREE, GIT_MODE_FILE, GIT_MODE_SYMLINK +from bup.helpers import (add_error, grafted_path_components, handle_ctrl_c, + hostname, istty2, log, parse_date_or_fatal, parse_num, + path_components, progress, qprogress, resolve_parent, + saved_errors, stripped_path_components, + valid_save_name) +from bup.io import byte_stream, path_msg +from bup.pwdgrp import userfullname, username optspec = """ @@ -29,12 +44,23 @@ graft= a graft point *old_path*=*new_path* (can be used more than once) o = options.Options(optspec) (opt, flags, extra) = o.parse(sys.argv[1:]) +if opt.indexfile: + opt.indexfile = argv_bytes(opt.indexfile) +if opt.name: + opt.name = argv_bytes(opt.name) +if opt.remote: + opt.remote = argv_bytes(opt.remote) +if opt.strip_path: + opt.strip_path = argv_bytes(opt.strip_path) + git.check_repo_or_die() if not (opt.tree or opt.commit or opt.name): o.fatal("use one or more of -t, -c, -n") if not extra: o.fatal("no filenames given") +extra = [argv_bytes(x) for x in extra] + opt.progress = (istty2 and not opt.quiet) opt.smaller = parse_num(opt.smaller or 0) if opt.bwlimit: @@ -58,23 +84,30 @@ if opt.graft: for (option, parameter) in flags: if option == "--graft": - splitted_parameter = parameter.split('=') + parameter = argv_bytes(parameter) + splitted_parameter = parameter.split(b'=') if len(splitted_parameter) != 2: o.fatal("a graft point must be of the form old_path=new_path") old_path, new_path = splitted_parameter if not (old_path and new_path): o.fatal("a graft point cannot be empty") - graft_points.append((realpath(old_path), realpath(new_path))) + graft_points.append((resolve_parent(old_path), + resolve_parent(new_path))) -is_reverse = os.environ.get('BUP_SERVER_REVERSE') +is_reverse = environ.get(b'BUP_SERVER_REVERSE') if is_reverse and opt.remote: o.fatal("don't use -r in reverse mode; it's automatic") -if opt.name and opt.name.startswith('.'): - o.fatal("'%s' is not a valid branch name" % opt.name) -refname = opt.name and 'refs/heads/%s' % opt.name or None +name = opt.name +if name and not valid_save_name(name): + o.fatal("'%s' is not a valid branch name" % path_msg(name)) +refname = name and b'refs/heads/%s' % name or None if opt.remote or is_reverse: - cli = client.Client(opt.remote) + try: + cli = client.Client(opt.remote) + except client.ClientError as e: + log('error: %s' % e) + sys.exit(1) oldref = refname and cli.read_ref(refname) or None w = cli.new_packwriter(compression_level=opt.compress) else: @@ -85,13 +118,6 @@ else: handle_ctrl_c() -def eatslash(dir): - if dir.endswith('/'): - return dir[:-1] - else: - return dir - - # Metadata is stored in a file named .bupm in each directory. The # first metadata entry will be the metadata for the current directory. # The remaining entries will be for each of the other directory @@ -100,8 +126,11 @@ def eatslash(dir): # Since the git tree elements are sorted according to # git.shalist_item_sort_key, the metalist items are accumulated as # (sort_key, metadata) tuples, and then sorted when the .bupm file is -# created. The sort_key must be computed using the element's real -# name and mode rather than the git mode and (possibly mangled) name. +# created. The sort_key should have been computed using the element's +# mangled name and git mode (after hashsplitting), but the code isn't +# actually doing that but rather uses the element's real name and mode. +# This makes things a bit more difficult when reading it back, see +# vfs.ordered_tree_entries(). # Maintain a stack of information representing the current location in # the archive being constructed. The current path is recorded in @@ -118,7 +147,7 @@ def _push(part, metadata): # Enter a new archive directory -- make it the current directory. parts.append(part) shalists.append([]) - metalists.append([('', metadata)]) # This dir's metadata (no name). + metalists.append([(b'', metadata)]) # This dir's metadata (no name). def _pop(force_tree, dir_metadata=None): @@ -127,17 +156,38 @@ def _pop(force_tree, dir_metadata=None): part = parts.pop() shalist = shalists.pop() metalist = metalists.pop() - if metalist and not force_tree: + # FIXME: only test if collision is possible (i.e. given --strip, etc.)? + if force_tree: + tree = force_tree + else: + names_seen = set() + clean_list = [] + metaidx = 1 # entry at 0 is for the dir + for x in shalist: + name = x[1] + if name in names_seen: + parent_path = b'/'.join(parts) + b'/' + add_error('error: ignoring duplicate path %s in %s' + % (path_msg(name), path_msg(parent_path))) + if not stat.S_ISDIR(x[0]): + del metalist[metaidx] + else: + names_seen.add(name) + clean_list.append(x) + if not stat.S_ISDIR(x[0]): + metaidx += 1 + if dir_metadata: # Override the original metadata pushed for this dir. - metalist = [('', dir_metadata)] + metalist[1:] + metalist = [(b'', dir_metadata)] + metalist[1:] sorted_metalist = sorted(metalist, key = lambda x : x[0]) - metadata = ''.join([m[1].encode() for m in sorted_metalist]) - metadata_f = StringIO(metadata) + metadata = b''.join([m[1].encode() for m in sorted_metalist]) + metadata_f = BytesIO(metadata) mode, id = hashsplit.split_to_blob_or_tree(w.new_blob, w.new_tree, [metadata_f], keep_boundaries=False) - shalist.append((mode, '.bupm', id)) - tree = force_tree or w.new_tree(shalist) + clean_list.append((mode, b'.bupm', id)) + + tree = w.new_tree(clean_list) if shalists: shalists[-1].append((GIT_MODE_TREE, git.mangle_name(part, @@ -185,16 +235,17 @@ def progress_report(n): remainstr, kpsstr)) -indexfile = opt.indexfile or git.repo('bupindex') +indexfile = opt.indexfile or git.repo(b'bupindex') r = index.Reader(indexfile) try: - msr = index.MetaStoreReader(indexfile + '.meta') -except IOError, ex: + msr = index.MetaStoreReader(indexfile + b'.meta') +except IOError as ex: if ex.errno != EACCES: raise - log('error: cannot access %r; have you run bup index?' % indexfile) + log('error: cannot access %r; have you run bup index?' + % path_msg(indexfile)) sys.exit(1) -hlink_db = hlinkdb.HLinkDB(indexfile + '.hlink') +hlink_db = hlinkdb.HLinkDB(indexfile + b'.hlink') def already_saved(ent): return ent.is_valid() and w.exists(ent.sha) and ent.sha @@ -242,7 +293,7 @@ root_collision = None tstart = time.time() count = subcount = fcount = 0 lastskip_name = None -lastdir = '' +lastdir = b'' for (transname,ent) in r.filter(extra, wantrecurse=wantrecurse_during): (dir, file) = os.path.split(ent.name) exists = (ent.flags & index.IX_EXISTS) @@ -260,10 +311,10 @@ for (transname,ent) in r.filter(extra, wantrecurse=wantrecurse_during): else: status = ' ' if opt.verbose >= 2: - log('%s %-70s\n' % (status, ent.name)) + log('%s %-70s\n' % (status, path_msg(ent.name))) elif not stat.S_ISDIR(ent.mode) and lastdir != dir: if not lastdir.startswith(dir): - log('%s %-70s\n' % (status, os.path.join(dir, ''))) + log('%s %-70s\n' % (status, path_msg(os.path.join(dir, b'')))) lastdir = dir if opt.progress: @@ -274,11 +325,12 @@ for (transname,ent) in r.filter(extra, wantrecurse=wantrecurse_during): continue if opt.smaller and ent.size >= opt.smaller: if exists and not hashvalid: - add_error('skipping large file "%s"' % ent.name) + if opt.verbose: + log('skipping large file "%s"\n' % path_msg(ent.name)) lastskip_name = ent.name continue - assert(dir.startswith('/')) + assert(dir.startswith(b'/')) if opt.strip: dirp = stripped_path_components(dir, extra) elif opt.strip_path: @@ -303,16 +355,7 @@ for (transname,ent) in r.filter(extra, wantrecurse=wantrecurse_during): # ...]. if first_root == None: - dir_name, fs_path = dirp[0] first_root = dirp[0] - # Not indexed, so just grab the FS metadata or use empty metadata. - try: - meta = metadata.from_path(fs_path) if fs_path else metadata.Metadata() - except (OSError, IOError), e: - add_error(e) - lastskip_name = dir_name - else: - _push(dir_name, meta) elif first_root != dirp[0]: root_collision = True @@ -325,12 +368,13 @@ for (transname,ent) in r.filter(extra, wantrecurse=wantrecurse_during): dir_name, fs_path = path_component # Not indexed, so just grab the FS metadata or use empty metadata. try: - meta = metadata.from_path(fs_path) if fs_path else metadata.Metadata() - except (OSError, IOError), e: + meta = metadata.from_path(fs_path, normalized=True) \ + if fs_path else metadata.Metadata() + except (OSError, IOError) as e: add_error(e) lastskip_name = dir_name - else: - _push(dir_name, meta) + meta = metadata.Metadata() + _push(dir_name, meta) if not file: if len(parts) == 1: @@ -349,7 +393,6 @@ for (transname,ent) in r.filter(extra, wantrecurse=wantrecurse_during): continue # it's not a directory - id = None if hashvalid: id = ent.sha git_name = git.mangle_name(file, ent.mode, ent.gitmode) @@ -362,36 +405,31 @@ for (transname,ent) in r.filter(extra, wantrecurse=wantrecurse_during): (meta.atime, meta.mtime, meta.ctime) = (ent.atime, ent.mtime, ent.ctime) metalists[-1].append((sort_key, meta)) else: + id = None if stat.S_ISREG(ent.mode): try: - f = hashsplit.open_noatime(ent.name) - except (IOError, OSError), e: - add_error(e) - lastskip_name = ent.name - else: - try: + with hashsplit.open_noatime(ent.name) as f: (mode, id) = hashsplit.split_to_blob_or_tree( w.new_blob, w.new_tree, [f], keep_boundaries=False) - except (IOError, OSError), e: - add_error('%s: %s' % (ent.name, e)) - lastskip_name = ent.name - else: - if stat.S_ISDIR(ent.mode): - assert(0) # handled above - elif stat.S_ISLNK(ent.mode): - try: - rl = os.readlink(ent.name) - except (OSError, IOError), e: - add_error(e) - lastskip_name = ent.name - else: - (mode, id) = (GIT_MODE_SYMLINK, w.new_blob(rl)) + except (IOError, OSError) as e: + add_error('%s: %s' % (ent.name, e)) + lastskip_name = ent.name + elif stat.S_ISDIR(ent.mode): + assert(0) # handled above + elif stat.S_ISLNK(ent.mode): + try: + rl = os.readlink(ent.name) + except (OSError, IOError) as e: + add_error(e) + lastskip_name = ent.name else: - # Everything else should be fully described by its - # metadata, so just record an empty blob, so the paths - # in the tree and .bupm will match up. - (mode, id) = (GIT_MODE_FILE, w.new_blob("")) + (mode, id) = (GIT_MODE_SYMLINK, w.new_blob(rl)) + else: + # Everything else should be fully described by its + # metadata, so just record an empty blob, so the paths + # in the tree and .bupm will match up. + (mode, id) = (GIT_MODE_FILE, w.new_blob(b'')) if id: ent.validate(mode, id) @@ -402,12 +440,13 @@ for (transname,ent) in r.filter(extra, wantrecurse=wantrecurse_during): sort_key = git.shalist_item_sort_key((ent.mode, file, id)) hlink = find_hardlink_target(hlink_db, ent) try: - meta = metadata.from_path(ent.name, hardlink_target=hlink) - except (OSError, IOError), e: + meta = metadata.from_path(ent.name, hardlink_target=hlink, + normalized=True) + except (OSError, IOError) as e: add_error(e) lastskip_name = ent.name - else: - metalists[-1].append((sort_key, meta)) + meta = metadata.Metadata() + metalists[-1].append((sort_key, meta)) if exists and wasmissing: count += oldsize @@ -429,13 +468,21 @@ tree = _pop(force_tree = None, # When there's a collision, use empty metadata for the root. dir_metadata = metadata.Metadata() if root_collision else None) +sys.stdout.flush() +out = byte_stream(sys.stdout) + if opt.tree: - print tree.encode('hex') -if opt.commit or opt.name: - msg = 'bup save\n\nGenerated by command:\n%r\n' % sys.argv - commit = w.new_commit(oldref, tree, date, msg) + out.write(hexlify(tree)) + out.write(b'\n') +if opt.commit or name: + msg = (b'bup save\n\nGenerated by command:\n%r\n' + % [argv_bytes(x) for x in sys.argv]) + userline = (b'%s <%s@%s>' % (userfullname(), username(), hostname())) + commit = w.new_commit(tree, oldref, userline, date, None, + userline, date, None, msg) if opt.commit: - print commit.encode('hex') + out.write(hexlify(commit)) + out.write(b'\n') msr.close() w.close() # must close before we can update the ref