3 bup_python="$(dirname "$0")/bup-python" || exit $?
4 exec "$bup_python" "$0" ${1+"$@"}
8 from __future__ import absolute_import, division, print_function
11 from bup import hashsplit, git, options, client
12 from bup.helpers import (add_error, handle_ctrl_c, hostname, log, parse_num,
13 qprogress, reprogress, saved_errors,
16 from bup.pwdgrp import userfullname, username
20 bup split [-t] [-c] [-n name] OPTIONS [--git-ids | filenames...]
21 bup split -b OPTIONS [--git-ids | filenames...]
22 bup split --copy OPTIONS [--git-ids | filenames...]
23 bup split --noop [-b|-t] OPTIONS [--git-ids | filenames...]
26 b,blobs output a series of blob ids. Implies --fanout=0.
27 t,tree output a tree id
28 c,commit output a commit id
29 n,name= save the result under the given name
30 noop split the input, but throw away the result
31 copy split the input, copy it to stdout, don't save to repo
33 r,remote= remote repository path
34 d,date= date for the commit (seconds since the epoch)
35 q,quiet don't print progress messages
36 v,verbose increase log output (can be used more than once)
37 git-ids read a list of git object ids from stdin and split their contents
38 keep-boundaries don't let one chunk span two input files
39 bench print benchmark timings to stderr
40 max-pack-size= maximum bytes in a single pack
41 max-pack-objects= maximum number of objects in a single pack
42 fanout= average number of blobs in a single tree
43 bwlimit= maximum bytes/sec to transmit to server
44 #,compress= set compression level to # (0-9, 9 is highest) [1]
48 o = options.Options(optspec)
49 (opt, flags, extra) = o.parse(sys.argv[1:])
51 if not (opt.blobs or opt.tree or opt.commit or opt.name or
52 opt.noop or opt.copy):
53 o.fatal("use one or more of -b, -t, -c, -n, --noop, --copy")
54 if opt.copy and (opt.blobs or opt.tree):
55 o.fatal('--copy is incompatible with -b, -t')
56 if (opt.noop or opt.copy) and (opt.commit or opt.name):
57 o.fatal('--noop and --copy are incompatible with -c, -n')
58 if opt.blobs and (opt.tree or opt.commit or opt.name):
59 o.fatal('-b is incompatible with -t, -c, -n')
60 if extra and opt.git_ids:
61 o.fatal("don't provide filenames when using --git-ids")
64 git.verbose = opt.verbose - 1
69 max_pack_size = parse_num(opt.max_pack_size)
70 max_pack_objects = None
71 if opt.max_pack_objects:
72 max_pack_objects = parse_num(opt.max_pack_objects)
75 hashsplit.fanout = parse_num(opt.fanout)
79 client.bwlimit = parse_num(opt.bwlimit)
81 date = parse_date_or_fatal(opt.date, o.fatal)
86 def prog(filenum, nbytes):
90 qprogress('Splitting: file #%d, %d kbytes\r'
91 % (filenum+1, total_bytes // 1024))
93 qprogress('Splitting: %d kbytes\r' % (total_bytes // 1024))
96 is_reverse = os.environ.get('BUP_SERVER_REVERSE')
97 if is_reverse and opt.remote:
98 o.fatal("don't use -r in reverse mode; it's automatic")
99 start_time = time.time()
101 if opt.name and not valid_save_name(opt.name):
102 o.fatal("'%s' is not a valid branch name." % opt.name)
103 refname = opt.name and 'refs/heads/%s' % opt.name or None
105 if opt.noop or opt.copy:
106 cli = pack_writer = oldref = None
107 elif opt.remote or is_reverse:
108 git.check_repo_or_die()
109 cli = client.Client(opt.remote)
110 oldref = refname and cli.read_ref(refname) or None
111 pack_writer = cli.new_packwriter(compression_level=opt.compress,
112 max_pack_size=max_pack_size,
113 max_pack_objects=max_pack_objects)
115 git.check_repo_or_die()
117 oldref = refname and git.read_ref(refname) or None
118 pack_writer = git.PackWriter(compression_level=opt.compress,
119 max_pack_size=max_pack_size,
120 max_pack_objects=max_pack_objects)
123 # the input is actually a series of git object ids that we should retrieve
126 # This is a bit messy, but basically it converts from a series of
127 # CatPipe.get() iterators into a series of file-type objects.
128 # It would be less ugly if either CatPipe.get() returned a file-like object
129 # (not very efficient), or split_to_shalist() expected an iterator instead
133 def __init__(self, it):
135 def read(self, size):
136 v = next(self.it, None)
140 line = sys.stdin.readline()
146 it = cp.get(line.strip())
147 next(it, None) # skip the file info
148 except KeyError as e:
149 add_error('error: %s' % e)
154 # the input either comes from a series of files or from stdin.
155 files = extra and (open(fn) for fn in extra) or [sys.stdin]
158 new_blob = pack_writer.new_blob
159 new_tree = pack_writer.new_tree
160 elif opt.blobs or opt.tree:
162 new_blob = lambda content: git.calc_hash('blob', content)
163 new_tree = lambda shalist: git.calc_hash('tree', git.tree_encode(shalist))
166 shalist = hashsplit.split_to_blobs(new_blob, files,
167 keep_boundaries=opt.keep_boundaries,
169 for (sha, size, level) in shalist:
170 print(sha.encode('hex'))
172 elif opt.tree or opt.commit or opt.name:
173 if opt.name: # insert dummy_name which may be used as a restore target
175 hashsplit.split_to_blob_or_tree(new_blob, new_tree, files,
176 keep_boundaries=opt.keep_boundaries,
178 splitfile_name = git.mangle_name('data', hashsplit.GIT_MODE_FILE, mode)
179 shalist = [(mode, splitfile_name, sha)]
181 shalist = hashsplit.split_to_shalist(
182 new_blob, new_tree, files,
183 keep_boundaries=opt.keep_boundaries, progress=prog)
184 tree = new_tree(shalist)
187 it = hashsplit.hashsplit_iter(files,
188 keep_boundaries=opt.keep_boundaries,
190 for (blob, level) in it:
191 hashsplit.total_split += len(blob)
193 sys.stdout.write(str(blob))
194 megs = hashsplit.total_split // 1024 // 1024
195 if not opt.quiet and last != megs:
201 print(tree.encode('hex'))
202 if opt.commit or opt.name:
203 msg = 'bup split\n\nGenerated by command:\n%r\n' % sys.argv
204 ref = opt.name and ('refs/heads/%s' % opt.name) or None
205 userline = '%s <%s@%s>' % (userfullname(), username(), hostname())
206 commit = pack_writer.new_commit(tree, oldref, userline, date, None,
207 userline, date, None, msg)
209 print(commit.encode('hex'))
212 pack_writer.close() # must close before we can update the ref
216 cli.update_ref(refname, commit, oldref)
218 git.update_ref(refname, commit, oldref)
223 secs = time.time() - start_time
224 size = hashsplit.total_split
226 log('bup: %.2f kbytes in %.2f secs = %.2f kbytes/sec\n'
227 % (size / 1024, secs, size / 1024 / secs))
230 log('WARNING: %d errors encountered while saving.\n' % len(saved_errors))