3 # https://sourceware.org/bugzilla/show_bug.cgi?id=26034
4 export "BUP_ARGV_0"="$0"
7 export "BUP_ARGV_${arg_i}"="$arg"
11 # Here to end of preamble replaced during install
12 bup_python="$(dirname "$0")/../../config/bin/python" || exit $?
13 exec "$bup_python" "$0"
17 from __future__ import absolute_import, division, print_function
19 # Intentionally replace the dirname "$0" that python prepends
21 sys.path[0] = os.path.dirname(os.path.realpath(__file__)) + '/..'
23 from binascii import hexlify
26 from bup import compat, hashsplit, git, options, client
27 from bup.compat import argv_bytes, environ
28 from bup.helpers import (add_error, handle_ctrl_c, hostname, log, parse_num,
29 qprogress, reprogress, saved_errors,
32 from bup.io import byte_stream
33 from bup.pwdgrp import userfullname, username
37 bup split [-t] [-c] [-n name] OPTIONS [--git-ids | filenames...]
38 bup split -b OPTIONS [--git-ids | filenames...]
39 bup split --copy OPTIONS [--git-ids | filenames...]
40 bup split --noop [-b|-t] OPTIONS [--git-ids | filenames...]
43 b,blobs output a series of blob ids. Implies --fanout=0.
44 t,tree output a tree id
45 c,commit output a commit id
46 n,name= save the result under the given name
47 noop split the input, but throw away the result
48 copy split the input, copy it to stdout, don't save to repo
50 r,remote= remote repository path
51 d,date= date for the commit (seconds since the epoch)
52 q,quiet don't print progress messages
53 v,verbose increase log output (can be used more than once)
54 git-ids read a list of git object ids from stdin and split their contents
55 keep-boundaries don't let one chunk span two input files
56 bench print benchmark timings to stderr
57 max-pack-size= maximum bytes in a single pack
58 max-pack-objects= maximum number of objects in a single pack
59 fanout= average number of blobs in a single tree
60 bwlimit= maximum bytes/sec to transmit to server
61 #,compress= set compression level to # (0-9, 9 is highest) [1]
65 o = options.Options(optspec)
66 opt, flags, extra = o.parse(compat.argv[1:])
67 if opt.name: opt.name = argv_bytes(opt.name)
68 if opt.remote: opt.remote = argv_bytes(opt.remote)
69 if opt.verbose is None: opt.verbose = 0
71 if not (opt.blobs or opt.tree or opt.commit or opt.name or
72 opt.noop or opt.copy):
73 o.fatal("use one or more of -b, -t, -c, -n, --noop, --copy")
74 if opt.copy and (opt.blobs or opt.tree):
75 o.fatal('--copy is incompatible with -b, -t')
76 if (opt.noop or opt.copy) and (opt.commit or opt.name):
77 o.fatal('--noop and --copy are incompatible with -c, -n')
78 if opt.blobs and (opt.tree or opt.commit or opt.name):
79 o.fatal('-b is incompatible with -t, -c, -n')
80 if extra and opt.git_ids:
81 o.fatal("don't provide filenames when using --git-ids")
84 git.verbose = opt.verbose - 1
89 max_pack_size = parse_num(opt.max_pack_size)
90 max_pack_objects = None
91 if opt.max_pack_objects:
92 max_pack_objects = parse_num(opt.max_pack_objects)
95 hashsplit.fanout = parse_num(opt.fanout)
99 client.bwlimit = parse_num(opt.bwlimit)
101 date = parse_date_or_fatal(opt.date, o.fatal)
106 def prog(filenum, nbytes):
108 total_bytes += nbytes
110 qprogress('Splitting: file #%d, %d kbytes\r'
111 % (filenum+1, total_bytes // 1024))
113 qprogress('Splitting: %d kbytes\r' % (total_bytes // 1024))
116 is_reverse = environ.get(b'BUP_SERVER_REVERSE')
117 if is_reverse and opt.remote:
118 o.fatal("don't use -r in reverse mode; it's automatic")
119 start_time = time.time()
121 if opt.name and not valid_save_name(opt.name):
122 o.fatal("'%r' is not a valid branch name." % opt.name)
123 refname = opt.name and b'refs/heads/%s' % opt.name or None
125 if opt.noop or opt.copy:
126 cli = pack_writer = oldref = None
127 elif opt.remote or is_reverse:
128 git.check_repo_or_die()
129 cli = client.Client(opt.remote)
130 oldref = refname and cli.read_ref(refname) or None
131 pack_writer = cli.new_packwriter(compression_level=opt.compress,
132 max_pack_size=max_pack_size,
133 max_pack_objects=max_pack_objects)
135 git.check_repo_or_die()
137 oldref = refname and git.read_ref(refname) or None
138 pack_writer = git.PackWriter(compression_level=opt.compress,
139 max_pack_size=max_pack_size,
140 max_pack_objects=max_pack_objects)
142 input = byte_stream(sys.stdin)
145 # the input is actually a series of git object ids that we should retrieve
148 # This is a bit messy, but basically it converts from a series of
149 # CatPipe.get() iterators into a series of file-type objects.
150 # It would be less ugly if either CatPipe.get() returned a file-like object
151 # (not very efficient), or split_to_shalist() expected an iterator instead
155 def __init__(self, it):
157 def read(self, size):
158 v = next(self.it, None)
162 line = input.readline()
168 it = cp.get(line.strip())
169 next(it, None) # skip the file info
170 except KeyError as e:
171 add_error('error: %s' % e)
176 # the input either comes from a series of files or from stdin.
177 files = extra and (open(argv_bytes(fn), 'rb') for fn in extra) or [input]
180 new_blob = pack_writer.new_blob
181 new_tree = pack_writer.new_tree
182 elif opt.blobs or opt.tree:
184 new_blob = lambda content: git.calc_hash(b'blob', content)
185 new_tree = lambda shalist: git.calc_hash(b'tree', git.tree_encode(shalist))
188 out = byte_stream(sys.stdout)
191 shalist = hashsplit.split_to_blobs(new_blob, files,
192 keep_boundaries=opt.keep_boundaries,
194 for (sha, size, level) in shalist:
195 out.write(hexlify(sha) + b'\n')
197 elif opt.tree or opt.commit or opt.name:
198 if opt.name: # insert dummy_name which may be used as a restore target
200 hashsplit.split_to_blob_or_tree(new_blob, new_tree, files,
201 keep_boundaries=opt.keep_boundaries,
203 splitfile_name = git.mangle_name(b'data', hashsplit.GIT_MODE_FILE, mode)
204 shalist = [(mode, splitfile_name, sha)]
206 shalist = hashsplit.split_to_shalist(
207 new_blob, new_tree, files,
208 keep_boundaries=opt.keep_boundaries, progress=prog)
209 tree = new_tree(shalist)
212 it = hashsplit.hashsplit_iter(files,
213 keep_boundaries=opt.keep_boundaries,
215 for (blob, level) in it:
216 hashsplit.total_split += len(blob)
218 sys.stdout.write(str(blob))
219 megs = hashsplit.total_split // 1024 // 1024
220 if not opt.quiet and last != megs:
226 out.write(hexlify(tree) + b'\n')
227 if opt.commit or opt.name:
228 msg = b'bup split\n\nGenerated by command:\n%r\n' % compat.argvb
229 ref = opt.name and (b'refs/heads/%s' % opt.name) or None
230 userline = b'%s <%s@%s>' % (userfullname(), username(), hostname())
231 commit = pack_writer.new_commit(tree, oldref, userline, date, None,
232 userline, date, None, msg)
234 out.write(hexlify(commit) + b'\n')
237 pack_writer.close() # must close before we can update the ref
241 cli.update_ref(refname, commit, oldref)
243 git.update_ref(refname, commit, oldref)
248 secs = time.time() - start_time
249 size = hashsplit.total_split
251 log('bup: %.2f kbytes in %.2f secs = %.2f kbytes/sec\n'
252 % (size / 1024, secs, size / 1024 / secs))
255 log('WARNING: %d errors encountered while saving.\n' % len(saved_errors))