3 # https://sourceware.org/bugzilla/show_bug.cgi?id=26034
4 export "BUP_ARGV_0"="$0"
7 export "BUP_ARGV_${arg_i}"="$arg"
11 # Here to end of preamble replaced during install
12 bup_python="$(dirname "$0")/../../config/bin/python" || exit $?
13 exec "$bup_python" "$0"
17 from __future__ import absolute_import, division, print_function
18 from binascii import hexlify
21 sys.path[:0] = [os.path.dirname(os.path.realpath(__file__)) + '/..']
23 from bup import compat, hashsplit, git, options, client
24 from bup.compat import argv_bytes, environ
25 from bup.helpers import (add_error, handle_ctrl_c, hostname, log, parse_num,
26 qprogress, reprogress, saved_errors,
29 from bup.io import byte_stream
30 from bup.pwdgrp import userfullname, username
34 bup split [-t] [-c] [-n name] OPTIONS [--git-ids | filenames...]
35 bup split -b OPTIONS [--git-ids | filenames...]
36 bup split --copy OPTIONS [--git-ids | filenames...]
37 bup split --noop [-b|-t] OPTIONS [--git-ids | filenames...]
40 b,blobs output a series of blob ids. Implies --fanout=0.
41 t,tree output a tree id
42 c,commit output a commit id
43 n,name= save the result under the given name
44 noop split the input, but throw away the result
45 copy split the input, copy it to stdout, don't save to repo
47 r,remote= remote repository path
48 d,date= date for the commit (seconds since the epoch)
49 q,quiet don't print progress messages
50 v,verbose increase log output (can be used more than once)
51 git-ids read a list of git object ids from stdin and split their contents
52 keep-boundaries don't let one chunk span two input files
53 bench print benchmark timings to stderr
54 max-pack-size= maximum bytes in a single pack
55 max-pack-objects= maximum number of objects in a single pack
56 fanout= average number of blobs in a single tree
57 bwlimit= maximum bytes/sec to transmit to server
58 #,compress= set compression level to # (0-9, 9 is highest) [1]
62 o = options.Options(optspec)
63 opt, flags, extra = o.parse(compat.argv[1:])
64 if opt.name: opt.name = argv_bytes(opt.name)
65 if opt.remote: opt.remote = argv_bytes(opt.remote)
66 if opt.verbose is None: opt.verbose = 0
68 if not (opt.blobs or opt.tree or opt.commit or opt.name or
69 opt.noop or opt.copy):
70 o.fatal("use one or more of -b, -t, -c, -n, --noop, --copy")
71 if opt.copy and (opt.blobs or opt.tree):
72 o.fatal('--copy is incompatible with -b, -t')
73 if (opt.noop or opt.copy) and (opt.commit or opt.name):
74 o.fatal('--noop and --copy are incompatible with -c, -n')
75 if opt.blobs and (opt.tree or opt.commit or opt.name):
76 o.fatal('-b is incompatible with -t, -c, -n')
77 if extra and opt.git_ids:
78 o.fatal("don't provide filenames when using --git-ids")
81 git.verbose = opt.verbose - 1
86 max_pack_size = parse_num(opt.max_pack_size)
87 max_pack_objects = None
88 if opt.max_pack_objects:
89 max_pack_objects = parse_num(opt.max_pack_objects)
92 hashsplit.fanout = parse_num(opt.fanout)
96 client.bwlimit = parse_num(opt.bwlimit)
98 date = parse_date_or_fatal(opt.date, o.fatal)
103 def prog(filenum, nbytes):
105 total_bytes += nbytes
107 qprogress('Splitting: file #%d, %d kbytes\r'
108 % (filenum+1, total_bytes // 1024))
110 qprogress('Splitting: %d kbytes\r' % (total_bytes // 1024))
113 is_reverse = environ.get(b'BUP_SERVER_REVERSE')
114 if is_reverse and opt.remote:
115 o.fatal("don't use -r in reverse mode; it's automatic")
116 start_time = time.time()
118 if opt.name and not valid_save_name(opt.name):
119 o.fatal("'%r' is not a valid branch name." % opt.name)
120 refname = opt.name and b'refs/heads/%s' % opt.name or None
122 if opt.noop or opt.copy:
123 cli = pack_writer = oldref = None
124 elif opt.remote or is_reverse:
125 git.check_repo_or_die()
126 cli = client.Client(opt.remote)
127 oldref = refname and cli.read_ref(refname) or None
128 pack_writer = cli.new_packwriter(compression_level=opt.compress,
129 max_pack_size=max_pack_size,
130 max_pack_objects=max_pack_objects)
132 git.check_repo_or_die()
134 oldref = refname and git.read_ref(refname) or None
135 pack_writer = git.PackWriter(compression_level=opt.compress,
136 max_pack_size=max_pack_size,
137 max_pack_objects=max_pack_objects)
139 input = byte_stream(sys.stdin)
142 # the input is actually a series of git object ids that we should retrieve
145 # This is a bit messy, but basically it converts from a series of
146 # CatPipe.get() iterators into a series of file-type objects.
147 # It would be less ugly if either CatPipe.get() returned a file-like object
148 # (not very efficient), or split_to_shalist() expected an iterator instead
152 def __init__(self, it):
154 def read(self, size):
155 v = next(self.it, None)
159 line = input.readline()
165 it = cp.get(line.strip())
166 next(it, None) # skip the file info
167 except KeyError as e:
168 add_error('error: %s' % e)
173 # the input either comes from a series of files or from stdin.
174 files = extra and (open(argv_bytes(fn), 'rb') for fn in extra) or [input]
177 new_blob = pack_writer.new_blob
178 new_tree = pack_writer.new_tree
179 elif opt.blobs or opt.tree:
181 new_blob = lambda content: git.calc_hash(b'blob', content)
182 new_tree = lambda shalist: git.calc_hash(b'tree', git.tree_encode(shalist))
185 out = byte_stream(sys.stdout)
188 shalist = hashsplit.split_to_blobs(new_blob, files,
189 keep_boundaries=opt.keep_boundaries,
191 for (sha, size, level) in shalist:
192 out.write(hexlify(sha) + b'\n')
194 elif opt.tree or opt.commit or opt.name:
195 if opt.name: # insert dummy_name which may be used as a restore target
197 hashsplit.split_to_blob_or_tree(new_blob, new_tree, files,
198 keep_boundaries=opt.keep_boundaries,
200 splitfile_name = git.mangle_name(b'data', hashsplit.GIT_MODE_FILE, mode)
201 shalist = [(mode, splitfile_name, sha)]
203 shalist = hashsplit.split_to_shalist(
204 new_blob, new_tree, files,
205 keep_boundaries=opt.keep_boundaries, progress=prog)
206 tree = new_tree(shalist)
209 it = hashsplit.hashsplit_iter(files,
210 keep_boundaries=opt.keep_boundaries,
212 for (blob, level) in it:
213 hashsplit.total_split += len(blob)
215 sys.stdout.write(str(blob))
216 megs = hashsplit.total_split // 1024 // 1024
217 if not opt.quiet and last != megs:
223 out.write(hexlify(tree) + b'\n')
224 if opt.commit or opt.name:
225 msg = b'bup split\n\nGenerated by command:\n%r\n' % compat.argvb
226 ref = opt.name and (b'refs/heads/%s' % opt.name) or None
227 userline = b'%s <%s@%s>' % (userfullname(), username(), hostname())
228 commit = pack_writer.new_commit(tree, oldref, userline, date, None,
229 userline, date, None, msg)
231 out.write(hexlify(commit) + b'\n')
234 pack_writer.close() # must close before we can update the ref
238 cli.update_ref(refname, commit, oldref)
240 git.update_ref(refname, commit, oldref)
245 secs = time.time() - start_time
246 size = hashsplit.total_split
248 log('bup: %.2f kbytes in %.2f secs = %.2f kbytes/sec\n'
249 % (size / 1024, secs, size / 1024 / secs))
252 log('WARNING: %d errors encountered while saving.\n' % len(saved_errors))