3 bup_python="$(dirname "$0")/bup-python" || exit $?
4 exec "$bup_python" "$0" ${1+"$@"}
8 from __future__ import absolute_import, division, print_function
9 from binascii import hexlify
12 from bup import hashsplit, git, options, client
13 from bup.compat import argv_bytes, environ
14 from bup.helpers import (add_error, handle_ctrl_c, hostname, log, parse_num,
15 qprogress, reprogress, saved_errors,
18 from bup.io import byte_stream
19 from bup.pwdgrp import userfullname, username
23 bup split [-t] [-c] [-n name] OPTIONS [--git-ids | filenames...]
24 bup split -b OPTIONS [--git-ids | filenames...]
25 bup split --copy OPTIONS [--git-ids | filenames...]
26 bup split --noop [-b|-t] OPTIONS [--git-ids | filenames...]
29 b,blobs output a series of blob ids. Implies --fanout=0.
30 t,tree output a tree id
31 c,commit output a commit id
32 n,name= save the result under the given name
33 noop split the input, but throw away the result
34 copy split the input, copy it to stdout, don't save to repo
36 r,remote= remote repository path
37 d,date= date for the commit (seconds since the epoch)
38 q,quiet don't print progress messages
39 v,verbose increase log output (can be used more than once)
40 git-ids read a list of git object ids from stdin and split their contents
41 keep-boundaries don't let one chunk span two input files
42 bench print benchmark timings to stderr
43 max-pack-size= maximum bytes in a single pack
44 max-pack-objects= maximum number of objects in a single pack
45 fanout= average number of blobs in a single tree
46 bwlimit= maximum bytes/sec to transmit to server
47 #,compress= set compression level to # (0-9, 9 is highest) [1]
51 o = options.Options(optspec)
52 (opt, flags, extra) = o.parse(sys.argv[1:])
53 if opt.name: opt.name = argv_bytes(opt.name)
54 if opt.remote: opt.remote = argv_bytes(opt.remote)
55 if opt.verbose is None: opt.verbose = 0
57 if not (opt.blobs or opt.tree or opt.commit or opt.name or
58 opt.noop or opt.copy):
59 o.fatal("use one or more of -b, -t, -c, -n, --noop, --copy")
60 if opt.copy and (opt.blobs or opt.tree):
61 o.fatal('--copy is incompatible with -b, -t')
62 if (opt.noop or opt.copy) and (opt.commit or opt.name):
63 o.fatal('--noop and --copy are incompatible with -c, -n')
64 if opt.blobs and (opt.tree or opt.commit or opt.name):
65 o.fatal('-b is incompatible with -t, -c, -n')
66 if extra and opt.git_ids:
67 o.fatal("don't provide filenames when using --git-ids")
70 git.verbose = opt.verbose - 1
75 max_pack_size = parse_num(opt.max_pack_size)
76 max_pack_objects = None
77 if opt.max_pack_objects:
78 max_pack_objects = parse_num(opt.max_pack_objects)
81 hashsplit.fanout = parse_num(opt.fanout)
85 client.bwlimit = parse_num(opt.bwlimit)
87 date = parse_date_or_fatal(opt.date, o.fatal)
92 def prog(filenum, nbytes):
96 qprogress('Splitting: file #%d, %d kbytes\r'
97 % (filenum+1, total_bytes // 1024))
99 qprogress('Splitting: %d kbytes\r' % (total_bytes // 1024))
102 is_reverse = environ.get(b'BUP_SERVER_REVERSE')
103 if is_reverse and opt.remote:
104 o.fatal("don't use -r in reverse mode; it's automatic")
105 start_time = time.time()
107 if opt.name and not valid_save_name(opt.name):
108 o.fatal("'%r' is not a valid branch name." % opt.name)
109 refname = opt.name and b'refs/heads/%s' % opt.name or None
111 if opt.noop or opt.copy:
112 cli = pack_writer = oldref = None
113 elif opt.remote or is_reverse:
114 git.check_repo_or_die()
115 cli = client.Client(opt.remote)
116 oldref = refname and cli.read_ref(refname) or None
117 pack_writer = cli.new_packwriter(compression_level=opt.compress,
118 max_pack_size=max_pack_size,
119 max_pack_objects=max_pack_objects)
121 git.check_repo_or_die()
123 oldref = refname and git.read_ref(refname) or None
124 pack_writer = git.PackWriter(compression_level=opt.compress,
125 max_pack_size=max_pack_size,
126 max_pack_objects=max_pack_objects)
128 input = byte_stream(sys.stdin)
131 # the input is actually a series of git object ids that we should retrieve
134 # This is a bit messy, but basically it converts from a series of
135 # CatPipe.get() iterators into a series of file-type objects.
136 # It would be less ugly if either CatPipe.get() returned a file-like object
137 # (not very efficient), or split_to_shalist() expected an iterator instead
141 def __init__(self, it):
143 def read(self, size):
144 v = next(self.it, None)
148 line = input.readline()
154 it = cp.get(line.strip())
155 next(it, None) # skip the file info
156 except KeyError as e:
157 add_error('error: %s' % e)
162 # the input either comes from a series of files or from stdin.
163 files = extra and (open(argv_bytes(fn), 'rb') for fn in extra) or [input]
166 new_blob = pack_writer.new_blob
167 new_tree = pack_writer.new_tree
168 elif opt.blobs or opt.tree:
170 new_blob = lambda content: git.calc_hash(b'blob', content)
171 new_tree = lambda shalist: git.calc_hash(b'tree', git.tree_encode(shalist))
174 out = byte_stream(sys.stdout)
177 shalist = hashsplit.split_to_blobs(new_blob, files,
178 keep_boundaries=opt.keep_boundaries,
180 for (sha, size, level) in shalist:
181 out.write(hexlify(sha) + b'\n')
183 elif opt.tree or opt.commit or opt.name:
184 if opt.name: # insert dummy_name which may be used as a restore target
186 hashsplit.split_to_blob_or_tree(new_blob, new_tree, files,
187 keep_boundaries=opt.keep_boundaries,
189 splitfile_name = git.mangle_name(b'data', hashsplit.GIT_MODE_FILE, mode)
190 shalist = [(mode, splitfile_name, sha)]
192 shalist = hashsplit.split_to_shalist(
193 new_blob, new_tree, files,
194 keep_boundaries=opt.keep_boundaries, progress=prog)
195 tree = new_tree(shalist)
198 it = hashsplit.hashsplit_iter(files,
199 keep_boundaries=opt.keep_boundaries,
201 for (blob, level) in it:
202 hashsplit.total_split += len(blob)
204 sys.stdout.write(str(blob))
205 megs = hashsplit.total_split // 1024 // 1024
206 if not opt.quiet and last != megs:
212 out.write(hexlify(tree) + b'\n')
213 if opt.commit or opt.name:
214 msg = b'bup split\n\nGenerated by command:\n%r\n' % sys.argv
215 ref = opt.name and (b'refs/heads/%s' % opt.name) or None
216 userline = b'%s <%s@%s>' % (userfullname(), username(), hostname())
217 commit = pack_writer.new_commit(tree, oldref, userline, date, None,
218 userline, date, None, msg)
220 out.write(hexlify(commit) + b'\n')
223 pack_writer.close() # must close before we can update the ref
227 cli.update_ref(refname, commit, oldref)
229 git.update_ref(refname, commit, oldref)
234 secs = time.time() - start_time
235 size = hashsplit.total_split
237 log('bup: %.2f kbytes in %.2f secs = %.2f kbytes/sec\n'
238 % (size / 1024, secs, size / 1024 / secs))
241 log('WARNING: %d errors encountered while saving.\n' % len(saved_errors))