X-Git-Url: https://arthur.barton.de/gitweb/?a=blobdiff_plain;f=cmd%2Fsave-cmd.py;h=bf2877ba240a0ff90c79745c5104217c4c1849ea;hb=aeafe13a9330e7deca3dc6b9c0496937d904663a;hp=a8265bdc6815d318879a901631608548742a2de8;hpb=7ccf0a9f15b5fc4bc0e257cda7658b72bd04b8a1;p=bup.git diff --git a/cmd/save-cmd.py b/cmd/save-cmd.py index a8265bd..bf2877b 100755 --- a/cmd/save-cmd.py +++ b/cmd/save-cmd.py @@ -1,8 +1,22 @@ -#!/usr/bin/env python -import sys, stat, time, math +#!/bin/sh +"""": # -*-python-*- +bup_python="$(dirname "$0")/bup-python" || exit $? +exec "$bup_python" "$0" ${1+"$@"} +""" +# end of bup preamble + +from __future__ import absolute_import, print_function +from errno import EACCES +from io import BytesIO +import os, sys, stat, time, math + from bup import hashsplit, git, options, index, client, metadata, hlinkdb -from bup.helpers import * from bup.hashsplit import GIT_MODE_TREE, GIT_MODE_FILE, GIT_MODE_SYMLINK +from bup.helpers import (add_error, grafted_path_components, handle_ctrl_c, + hostname, istty2, log, parse_date_or_fatal, parse_num, + path_components, progress, qprogress, resolve_parent, + saved_errors, stripped_path_components, + userfullname, username, valid_save_name) optspec = """ @@ -61,19 +75,24 @@ if opt.graft: old_path, new_path = splitted_parameter if not (old_path and new_path): o.fatal("a graft point cannot be empty") - graft_points.append((realpath(old_path), realpath(new_path))) + graft_points.append((resolve_parent(old_path), + resolve_parent(new_path))) is_reverse = os.environ.get('BUP_SERVER_REVERSE') if is_reverse and opt.remote: o.fatal("don't use -r in reverse mode; it's automatic") -if opt.name and opt.name.startswith('.'): +if opt.name and not valid_save_name(opt.name): o.fatal("'%s' is not a valid branch name" % opt.name) refname = opt.name and 'refs/heads/%s' % opt.name or None if opt.remote or is_reverse: - cli = client.Client(opt.remote) + try: + cli = client.Client(opt.remote) + except client.ClientError as e: + log('error: %s' % e) + sys.exit(1) oldref = refname and cli.read_ref(refname) or None - w = cli.new_packwriter() + w = cli.new_packwriter(compression_level=opt.compress) else: cli = None oldref = refname and git.read_ref(refname) or None @@ -100,38 +119,64 @@ def eatslash(dir): # created. The sort_key must be computed using the element's real # name and mode rather than the git mode and (possibly mangled) name. -parts = [''] -shalists = [[]] -metalists = [[]] +# Maintain a stack of information representing the current location in +# the archive being constructed. The current path is recorded in +# parts, which will be something like ['', 'home', 'someuser'], and +# the accumulated content and metadata for of the dirs in parts is +# stored in parallel stacks in shalists and metalists. + +parts = [] # Current archive position (stack of dir names). +shalists = [] # Hashes for each dir in paths. +metalists = [] # Metadata for each dir in paths. + def _push(part, metadata): - assert(part) + # Enter a new archive directory -- make it the current directory. parts.append(part) shalists.append([]) - # First entry is dir metadata, which is represented with an empty name. - metalists.append([('', metadata)]) + metalists.append([('', metadata)]) # This dir's metadata (no name). -def _pop(force_tree): + +def _pop(force_tree, dir_metadata=None): + # Leave the current archive directory and add its tree to its parent. assert(len(parts) >= 1) part = parts.pop() shalist = shalists.pop() metalist = metalists.pop() - if metalist: + if metalist and not force_tree: + if dir_metadata: # Override the original metadata pushed for this dir. + metalist = [('', dir_metadata)] + metalist[1:] sorted_metalist = sorted(metalist, key = lambda x : x[0]) metadata = ''.join([m[1].encode() for m in sorted_metalist]) - shalist.append((0100644, '.bupm', w.new_blob(metadata))) - tree = force_tree or w.new_tree(shalist) + metadata_f = BytesIO(metadata) + mode, id = hashsplit.split_to_blob_or_tree(w.new_blob, w.new_tree, + [metadata_f], + keep_boundaries=False) + shalist.append((mode, '.bupm', id)) + # FIXME: only test if collision is possible (i.e. given --strip, etc.)? + if force_tree: + tree = force_tree + else: + names_seen = set() + clean_list = [] + for x in shalist: + name = x[1] + if name in names_seen: + parent_path = '/'.join(parts) + '/' + add_error('error: ignoring duplicate path %r in %r' + % (name, parent_path)) + else: + names_seen.add(name) + clean_list.append(x) + tree = w.new_tree(clean_list) if shalists: shalists[-1].append((GIT_MODE_TREE, git.mangle_name(part, GIT_MODE_TREE, GIT_MODE_TREE), tree)) - else: - # This was the toplevel, so put it back for sanity (i.e. cd .. from /). - shalists.append(shalist) - metalists.append(metalist) return tree + lastremain = None def progress_report(n): global count, subcount, lastremain @@ -173,6 +218,13 @@ def progress_report(n): indexfile = opt.indexfile or git.repo('bupindex') r = index.Reader(indexfile) +try: + msr = index.MetaStoreReader(indexfile + '.meta') +except IOError as ex: + if ex.errno != EACCES: + raise + log('error: cannot access %r; have you run bup index?' % indexfile) + sys.exit(1) hlink_db = hlinkdb.HLinkDB(indexfile + '.hlink') def already_saved(ent): @@ -205,6 +257,19 @@ if opt.progress: progress('Reading index: %d, done.\n' % ftotal) hashsplit.progress_callback = progress_report +# Root collisions occur when strip or graft options map more than one +# path to the same directory (paths which originally had separate +# parents). When that situation is detected, use empty metadata for +# the parent. Otherwise, use the metadata for the common parent. +# Collision example: "bup save ... --strip /foo /foo/bar /bar". + +# FIXME: Add collision tests, or handle collisions some other way. + +# FIXME: Detect/handle strip/graft name collisions (other than root), +# i.e. if '/foo/bar' and '/bar' both map to '/'. + +first_root = None +root_collision = None tstart = time.time() count = subcount = fcount = 0 lastskip_name = None @@ -240,7 +305,8 @@ for (transname,ent) in r.filter(extra, wantrecurse=wantrecurse_during): continue if opt.smaller and ent.size >= opt.smaller: if exists and not hashvalid: - add_error('skipping large file "%s"' % ent.name) + if opt.verbose: + log('skipping large file "%s"\n' % ent.name) lastskip_name = ent.name continue @@ -254,21 +320,45 @@ for (transname,ent) in r.filter(extra, wantrecurse=wantrecurse_during): else: dirp = path_components(dir) + # At this point, dirp contains a representation of the archive + # path that looks like [(archive_dir_name, real_fs_path), ...]. + # So given "bup save ... --strip /foo/bar /foo/bar/baz", dirp + # might look like this at some point: + # [('', '/foo/bar'), ('baz', '/foo/bar/baz'), ...]. + + # This dual representation supports stripping/grafting, where the + # archive path may not have a direct correspondence with the + # filesystem. The root directory is represented by an initial + # component named '', and any component that doesn't have a + # corresponding filesystem directory (due to grafting, for + # example) will have a real_fs_path of None, i.e. [('', None), + # ...]. + + if first_root == None: + first_root = dirp[0] + elif first_root != dirp[0]: + root_collision = True + + # If switching to a new sub-tree, finish the current sub-tree. while parts > [x[0] for x in dirp]: _pop(force_tree = None) - if dir != '/': - for path_component in dirp[len(parts):]: - dir_name, fs_path = path_component - if fs_path: - meta = metadata.from_path(fs_path) - else: - meta = metadata.Metadata() - _push(dir_name, meta) + # If switching to a new sub-tree, start a new sub-tree. + for path_component in dirp[len(parts):]: + dir_name, fs_path = path_component + # Not indexed, so just grab the FS metadata or use empty metadata. + try: + meta = metadata.from_path(fs_path) if fs_path else metadata.Metadata() + except (OSError, IOError) as e: + add_error(e) + lastskip_name = dir_name + meta = metadata.Metadata() + _push(dir_name, meta) if not file: - # no filename portion means this is a subdir. But - # sub/parentdirectories already handled in the pop/push() part above. + if len(parts) == 1: + continue # We're at the top level -- keep the current root dir + # Since there's no filename, this is a subdir -- finish it. oldtree = already_saved(ent) # may be None newtree = _pop(force_tree = oldtree) if not oldtree: @@ -289,15 +379,16 @@ for (transname,ent) in r.filter(extra, wantrecurse=wantrecurse_during): git_info = (ent.gitmode, git_name, id) shalists[-1].append(git_info) sort_key = git.shalist_item_sort_key((ent.mode, file, id)) - hlink = find_hardlink_target(hlink_db, ent) - metalists[-1].append((sort_key, - metadata.from_path(ent.name, - hardlink_target=hlink))) + meta = msr.metadata_at(ent.meta_ofs) + meta.hardlink_target = find_hardlink_target(hlink_db, ent) + # Restore the times that were cleared to 0 in the metastore. + (meta.atime, meta.mtime, meta.ctime) = (ent.atime, ent.mtime, ent.ctime) + metalists[-1].append((sort_key, meta)) else: if stat.S_ISREG(ent.mode): try: f = hashsplit.open_noatime(ent.name) - except (IOError, OSError), e: + except (IOError, OSError) as e: add_error(e) lastskip_name = ent.name else: @@ -305,7 +396,7 @@ for (transname,ent) in r.filter(extra, wantrecurse=wantrecurse_during): (mode, id) = hashsplit.split_to_blob_or_tree( w.new_blob, w.new_tree, [f], keep_boundaries=False) - except (IOError, OSError), e: + except (IOError, OSError) as e: add_error('%s: %s' % (ent.name, e)) lastskip_name = ent.name else: @@ -314,7 +405,7 @@ for (transname,ent) in r.filter(extra, wantrecurse=wantrecurse_during): elif stat.S_ISLNK(ent.mode): try: rl = os.readlink(ent.name) - except (OSError, IOError), e: + except (OSError, IOError) as e: add_error(e) lastskip_name = ent.name else: @@ -333,9 +424,14 @@ for (transname,ent) in r.filter(extra, wantrecurse=wantrecurse_during): shalists[-1].append(git_info) sort_key = git.shalist_item_sort_key((ent.mode, file, id)) hlink = find_hardlink_target(hlink_db, ent) - metalists[-1].append((sort_key, - metadata.from_path(ent.name, - hardlink_target=hlink))) + try: + meta = metadata.from_path(ent.name, hardlink_target=hlink) + except (OSError, IOError) as e: + add_error(e) + lastskip_name = ent.name + else: + metalists[-1].append((sort_key, meta)) + if exists and wasmissing: count += oldsize subcount = 0 @@ -346,28 +442,27 @@ if opt.progress: progress('Saving: %.2f%% (%d/%dk, %d/%d files), done. \n' % (pct, count/1024, total/1024, fcount, ftotal)) -while len(parts) > 1: # _pop() all the parts above the indexed items. +while len(parts) > 1: # _pop() all the parts above the root _pop(force_tree = None) assert(len(shalists) == 1) assert(len(metalists) == 1) -if not (opt.strip or opt.strip_path or graft_points): - # For now, only save metadata for the root directory when there - # isn't any path grafting or stripping that might create multiple - # roots. - shalist = shalists[-1] - metadata = ''.join([metadata.from_path('/').encode()]) - shalist.append((0100644, '.bupm', w.new_blob(metadata))) -tree = w.new_tree(shalists[-1]) +# Finish the root directory. +tree = _pop(force_tree = None, + # When there's a collision, use empty metadata for the root. + dir_metadata = metadata.Metadata() if root_collision else None) if opt.tree: - print tree.encode('hex') + print(tree.encode('hex')) if opt.commit or opt.name: - msg = 'bup save\n\nGenerated by command:\n%r' % sys.argv - commit = w.new_commit(oldref, tree, date, msg) + msg = 'bup save\n\nGenerated by command:\n%r\n' % sys.argv + userline = '%s <%s@%s>' % (userfullname(), username(), hostname()) + commit = w.new_commit(tree, oldref, userline, date, None, + userline, date, None, msg) if opt.commit: - print commit.encode('hex') + print(commit.encode('hex')) +msr.close() w.close() # must close before we can update the ref if opt.name: