3 # https://sourceware.org/bugzilla/show_bug.cgi?id=26034
4 export "BUP_ARGV_0"="$0"
7 export "BUP_ARGV_${arg_i}"="$arg"
11 # Here to end of preamble replaced during install
12 bup_python="$(dirname "$0")/bup-python" || exit $?
13 exec "$bup_python" "$0"
17 from __future__ import absolute_import, division, print_function
18 from binascii import hexlify
21 from bup import compat, hashsplit, git, options, client
22 from bup.compat import argv_bytes, environ
23 from bup.helpers import (add_error, handle_ctrl_c, hostname, log, parse_num,
24 qprogress, reprogress, saved_errors,
27 from bup.io import byte_stream
28 from bup.pwdgrp import userfullname, username
32 bup split [-t] [-c] [-n name] OPTIONS [--git-ids | filenames...]
33 bup split -b OPTIONS [--git-ids | filenames...]
34 bup split --copy OPTIONS [--git-ids | filenames...]
35 bup split --noop [-b|-t] OPTIONS [--git-ids | filenames...]
38 b,blobs output a series of blob ids. Implies --fanout=0.
39 t,tree output a tree id
40 c,commit output a commit id
41 n,name= save the result under the given name
42 noop split the input, but throw away the result
43 copy split the input, copy it to stdout, don't save to repo
45 r,remote= remote repository path
46 d,date= date for the commit (seconds since the epoch)
47 q,quiet don't print progress messages
48 v,verbose increase log output (can be used more than once)
49 git-ids read a list of git object ids from stdin and split their contents
50 keep-boundaries don't let one chunk span two input files
51 bench print benchmark timings to stderr
52 max-pack-size= maximum bytes in a single pack
53 max-pack-objects= maximum number of objects in a single pack
54 fanout= average number of blobs in a single tree
55 bwlimit= maximum bytes/sec to transmit to server
56 #,compress= set compression level to # (0-9, 9 is highest) [1]
60 o = options.Options(optspec)
61 opt, flags, extra = o.parse(compat.argv[1:])
62 if opt.name: opt.name = argv_bytes(opt.name)
63 if opt.remote: opt.remote = argv_bytes(opt.remote)
64 if opt.verbose is None: opt.verbose = 0
66 if not (opt.blobs or opt.tree or opt.commit or opt.name or
67 opt.noop or opt.copy):
68 o.fatal("use one or more of -b, -t, -c, -n, --noop, --copy")
69 if opt.copy and (opt.blobs or opt.tree):
70 o.fatal('--copy is incompatible with -b, -t')
71 if (opt.noop or opt.copy) and (opt.commit or opt.name):
72 o.fatal('--noop and --copy are incompatible with -c, -n')
73 if opt.blobs and (opt.tree or opt.commit or opt.name):
74 o.fatal('-b is incompatible with -t, -c, -n')
75 if extra and opt.git_ids:
76 o.fatal("don't provide filenames when using --git-ids")
79 git.verbose = opt.verbose - 1
84 max_pack_size = parse_num(opt.max_pack_size)
85 max_pack_objects = None
86 if opt.max_pack_objects:
87 max_pack_objects = parse_num(opt.max_pack_objects)
90 hashsplit.fanout = parse_num(opt.fanout)
94 client.bwlimit = parse_num(opt.bwlimit)
96 date = parse_date_or_fatal(opt.date, o.fatal)
101 def prog(filenum, nbytes):
103 total_bytes += nbytes
105 qprogress('Splitting: file #%d, %d kbytes\r'
106 % (filenum+1, total_bytes // 1024))
108 qprogress('Splitting: %d kbytes\r' % (total_bytes // 1024))
111 is_reverse = environ.get(b'BUP_SERVER_REVERSE')
112 if is_reverse and opt.remote:
113 o.fatal("don't use -r in reverse mode; it's automatic")
114 start_time = time.time()
116 if opt.name and not valid_save_name(opt.name):
117 o.fatal("'%r' is not a valid branch name." % opt.name)
118 refname = opt.name and b'refs/heads/%s' % opt.name or None
120 if opt.noop or opt.copy:
121 cli = pack_writer = oldref = None
122 elif opt.remote or is_reverse:
123 git.check_repo_or_die()
124 cli = client.Client(opt.remote)
125 oldref = refname and cli.read_ref(refname) or None
126 pack_writer = cli.new_packwriter(compression_level=opt.compress,
127 max_pack_size=max_pack_size,
128 max_pack_objects=max_pack_objects)
130 git.check_repo_or_die()
132 oldref = refname and git.read_ref(refname) or None
133 pack_writer = git.PackWriter(compression_level=opt.compress,
134 max_pack_size=max_pack_size,
135 max_pack_objects=max_pack_objects)
137 input = byte_stream(sys.stdin)
140 # the input is actually a series of git object ids that we should retrieve
143 # This is a bit messy, but basically it converts from a series of
144 # CatPipe.get() iterators into a series of file-type objects.
145 # It would be less ugly if either CatPipe.get() returned a file-like object
146 # (not very efficient), or split_to_shalist() expected an iterator instead
150 def __init__(self, it):
152 def read(self, size):
153 v = next(self.it, None)
157 line = input.readline()
163 it = cp.get(line.strip())
164 next(it, None) # skip the file info
165 except KeyError as e:
166 add_error('error: %s' % e)
171 # the input either comes from a series of files or from stdin.
172 files = extra and (open(argv_bytes(fn), 'rb') for fn in extra) or [input]
175 new_blob = pack_writer.new_blob
176 new_tree = pack_writer.new_tree
177 elif opt.blobs or opt.tree:
179 new_blob = lambda content: git.calc_hash(b'blob', content)
180 new_tree = lambda shalist: git.calc_hash(b'tree', git.tree_encode(shalist))
183 out = byte_stream(sys.stdout)
186 shalist = hashsplit.split_to_blobs(new_blob, files,
187 keep_boundaries=opt.keep_boundaries,
189 for (sha, size, level) in shalist:
190 out.write(hexlify(sha) + b'\n')
192 elif opt.tree or opt.commit or opt.name:
193 if opt.name: # insert dummy_name which may be used as a restore target
195 hashsplit.split_to_blob_or_tree(new_blob, new_tree, files,
196 keep_boundaries=opt.keep_boundaries,
198 splitfile_name = git.mangle_name(b'data', hashsplit.GIT_MODE_FILE, mode)
199 shalist = [(mode, splitfile_name, sha)]
201 shalist = hashsplit.split_to_shalist(
202 new_blob, new_tree, files,
203 keep_boundaries=opt.keep_boundaries, progress=prog)
204 tree = new_tree(shalist)
207 it = hashsplit.hashsplit_iter(files,
208 keep_boundaries=opt.keep_boundaries,
210 for (blob, level) in it:
211 hashsplit.total_split += len(blob)
213 sys.stdout.write(str(blob))
214 megs = hashsplit.total_split // 1024 // 1024
215 if not opt.quiet and last != megs:
221 out.write(hexlify(tree) + b'\n')
222 if opt.commit or opt.name:
223 msg = b'bup split\n\nGenerated by command:\n%r\n' % compat.argvb
224 ref = opt.name and (b'refs/heads/%s' % opt.name) or None
225 userline = b'%s <%s@%s>' % (userfullname(), username(), hostname())
226 commit = pack_writer.new_commit(tree, oldref, userline, date, None,
227 userline, date, None, msg)
229 out.write(hexlify(commit) + b'\n')
232 pack_writer.close() # must close before we can update the ref
236 cli.update_ref(refname, commit, oldref)
238 git.update_ref(refname, commit, oldref)
243 secs = time.time() - start_time
244 size = hashsplit.total_split
246 log('bup: %.2f kbytes in %.2f secs = %.2f kbytes/sec\n'
247 % (size / 1024, secs, size / 1024 / secs))
250 log('WARNING: %d errors encountered while saving.\n' % len(saved_errors))