3 bup_python="$(dirname "$0")/bup-python" || exit $?
4 exec "$bup_python" "$0" ${1+"$@"}
8 from __future__ import absolute_import, division, print_function
11 from bup import hashsplit, git, options, client
12 from bup.helpers import (add_error, handle_ctrl_c, hostname, log, parse_num,
13 qprogress, reprogress, saved_errors,
14 userfullname, username, valid_save_name,
19 bup split [-t] [-c] [-n name] OPTIONS [--git-ids | filenames...]
20 bup split -b OPTIONS [--git-ids | filenames...]
21 bup split --copy OPTIONS [--git-ids | filenames...]
22 bup split --noop [-b|-t] OPTIONS [--git-ids | filenames...]
25 b,blobs output a series of blob ids. Implies --fanout=0.
26 t,tree output a tree id
27 c,commit output a commit id
28 n,name= save the result under the given name
29 noop split the input, but throw away the result
30 copy split the input, copy it to stdout, don't save to repo
32 r,remote= remote repository path
33 d,date= date for the commit (seconds since the epoch)
34 q,quiet don't print progress messages
35 v,verbose increase log output (can be used more than once)
36 git-ids read a list of git object ids from stdin and split their contents
37 keep-boundaries don't let one chunk span two input files
38 bench print benchmark timings to stderr
39 max-pack-size= maximum bytes in a single pack
40 max-pack-objects= maximum number of objects in a single pack
41 fanout= average number of blobs in a single tree
42 bwlimit= maximum bytes/sec to transmit to server
43 #,compress= set compression level to # (0-9, 9 is highest) [1]
47 o = options.Options(optspec)
48 (opt, flags, extra) = o.parse(sys.argv[1:])
50 if not (opt.blobs or opt.tree or opt.commit or opt.name or
51 opt.noop or opt.copy):
52 o.fatal("use one or more of -b, -t, -c, -n, --noop, --copy")
53 if opt.copy and (opt.blobs or opt.tree):
54 o.fatal('--copy is incompatible with -b, -t')
55 if (opt.noop or opt.copy) and (opt.commit or opt.name):
56 o.fatal('--noop and --copy are incompatible with -c, -n')
57 if opt.blobs and (opt.tree or opt.commit or opt.name):
58 o.fatal('-b is incompatible with -t, -c, -n')
59 if extra and opt.git_ids:
60 o.fatal("don't provide filenames when using --git-ids")
63 git.verbose = opt.verbose - 1
68 max_pack_size = parse_num(opt.max_pack_size)
69 max_pack_objects = None
70 if opt.max_pack_objects:
71 max_pack_objects = parse_num(opt.max_pack_objects)
74 hashsplit.fanout = parse_num(opt.fanout)
78 client.bwlimit = parse_num(opt.bwlimit)
80 date = parse_date_or_fatal(opt.date, o.fatal)
85 def prog(filenum, nbytes):
89 qprogress('Splitting: file #%d, %d kbytes\r'
90 % (filenum+1, total_bytes // 1024))
92 qprogress('Splitting: %d kbytes\r' % (total_bytes // 1024))
95 is_reverse = os.environ.get('BUP_SERVER_REVERSE')
96 if is_reverse and opt.remote:
97 o.fatal("don't use -r in reverse mode; it's automatic")
98 start_time = time.time()
100 if opt.name and not valid_save_name(opt.name):
101 o.fatal("'%s' is not a valid branch name." % opt.name)
102 refname = opt.name and 'refs/heads/%s' % opt.name or None
104 if opt.noop or opt.copy:
105 cli = pack_writer = oldref = None
106 elif opt.remote or is_reverse:
107 git.check_repo_or_die()
108 cli = client.Client(opt.remote)
109 oldref = refname and cli.read_ref(refname) or None
110 pack_writer = cli.new_packwriter(compression_level=opt.compress,
111 max_pack_size=max_pack_size,
112 max_pack_objects=max_pack_objects)
114 git.check_repo_or_die()
116 oldref = refname and git.read_ref(refname) or None
117 pack_writer = git.PackWriter(compression_level=opt.compress,
118 max_pack_size=max_pack_size,
119 max_pack_objects=max_pack_objects)
122 # the input is actually a series of git object ids that we should retrieve
125 # This is a bit messy, but basically it converts from a series of
126 # CatPipe.get() iterators into a series of file-type objects.
127 # It would be less ugly if either CatPipe.get() returned a file-like object
128 # (not very efficient), or split_to_shalist() expected an iterator instead
132 def __init__(self, it):
134 def read(self, size):
135 v = next(self.it, None)
139 line = sys.stdin.readline()
145 it = cp.get(line.strip())
146 next(it, None) # skip the file info
147 except KeyError as e:
148 add_error('error: %s' % e)
153 # the input either comes from a series of files or from stdin.
154 files = extra and (open(fn) for fn in extra) or [sys.stdin]
157 new_blob = pack_writer.new_blob
158 new_tree = pack_writer.new_tree
159 elif opt.blobs or opt.tree:
161 new_blob = lambda content: git.calc_hash('blob', content)
162 new_tree = lambda shalist: git.calc_hash('tree', git.tree_encode(shalist))
165 shalist = hashsplit.split_to_blobs(new_blob, files,
166 keep_boundaries=opt.keep_boundaries,
168 for (sha, size, level) in shalist:
169 print(sha.encode('hex'))
171 elif opt.tree or opt.commit or opt.name:
172 if opt.name: # insert dummy_name which may be used as a restore target
174 hashsplit.split_to_blob_or_tree(new_blob, new_tree, files,
175 keep_boundaries=opt.keep_boundaries,
177 splitfile_name = git.mangle_name('data', hashsplit.GIT_MODE_FILE, mode)
178 shalist = [(mode, splitfile_name, sha)]
180 shalist = hashsplit.split_to_shalist(
181 new_blob, new_tree, files,
182 keep_boundaries=opt.keep_boundaries, progress=prog)
183 tree = new_tree(shalist)
186 it = hashsplit.hashsplit_iter(files,
187 keep_boundaries=opt.keep_boundaries,
189 for (blob, level) in it:
190 hashsplit.total_split += len(blob)
192 sys.stdout.write(str(blob))
193 megs = hashsplit.total_split // 1024 // 1024
194 if not opt.quiet and last != megs:
200 print(tree.encode('hex'))
201 if opt.commit or opt.name:
202 msg = 'bup split\n\nGenerated by command:\n%r\n' % sys.argv
203 ref = opt.name and ('refs/heads/%s' % opt.name) or None
204 userline = '%s <%s@%s>' % (userfullname(), username(), hostname())
205 commit = pack_writer.new_commit(tree, oldref, userline, date, None,
206 userline, date, None, msg)
208 print(commit.encode('hex'))
211 pack_writer.close() # must close before we can update the ref
215 cli.update_ref(refname, commit, oldref)
217 git.update_ref(refname, commit, oldref)
222 secs = time.time() - start_time
223 size = hashsplit.total_split
225 log('bup: %.2f kbytes in %.2f secs = %.2f kbytes/sec\n'
226 % (size / 1024, secs, size / 1024 / secs))
229 log('WARNING: %d errors encountered while saving.\n' % len(saved_errors))