2 from __future__ import absolute_import, division, print_function
3 from binascii import hexlify
6 from bup import compat, hashsplit, git, options, client
7 from bup.compat import argv_bytes, environ, nullcontext
8 from bup.helpers import (add_error, hostname, log, parse_num,
9 qprogress, reprogress, saved_errors,
12 from bup.io import byte_stream
13 from bup.pwdgrp import userfullname, username
17 bup split [-t] [-c] [-n name] OPTIONS [--git-ids | filenames...]
18 bup split -b OPTIONS [--git-ids | filenames...]
19 bup split --copy OPTIONS [--git-ids | filenames...]
20 bup split --noop [-b|-t] OPTIONS [--git-ids | filenames...]
23 b,blobs output a series of blob ids. Implies --fanout=0.
24 t,tree output a tree id
25 c,commit output a commit id
26 n,name= save the result under the given name
27 noop split the input, but throw away the result
28 copy split the input, copy it to stdout, don't save to repo
30 r,remote= remote repository path
31 d,date= date for the commit (seconds since the epoch)
32 q,quiet don't print progress messages
33 v,verbose increase log output (can be used more than once)
34 git-ids read a list of git object ids from stdin and split their contents
35 keep-boundaries don't let one chunk span two input files
36 bench print benchmark timings to stderr
37 max-pack-size= maximum bytes in a single pack
38 max-pack-objects= maximum number of objects in a single pack
39 fanout= average number of blobs in a single tree
40 bwlimit= maximum bytes/sec to transmit to server
41 #,compress= set compression level to # (0-9, 9 is highest) [1]
50 def __exit__(self, type, value, traceback):
51 return None # since close() does nothing
54 def new_blob(self, content):
55 return git.calc_hash(b'blob', content)
56 def new_tree(self, shalist):
57 return git.calc_hash(b'tree', git.tree_encode(shalist))
59 def opts_from_cmdline(argv):
60 o = options.Options(optspec)
61 opt, flags, extra = o.parse_bytes(argv[1:])
64 if opt.name: opt.name = argv_bytes(opt.name)
65 if opt.remote: opt.remote = argv_bytes(opt.remote)
66 if opt.verbose is None: opt.verbose = 0
68 if not (opt.blobs or opt.tree or opt.commit or opt.name or
69 opt.noop or opt.copy):
70 o.fatal("use one or more of -b, -t, -c, -n, --noop, --copy")
71 if opt.copy and (opt.blobs or opt.tree):
72 o.fatal('--copy is incompatible with -b, -t')
73 if (opt.noop or opt.copy) and (opt.commit or opt.name):
74 o.fatal('--noop and --copy are incompatible with -c, -n')
75 if opt.blobs and (opt.tree or opt.commit or opt.name):
76 o.fatal('-b is incompatible with -t, -c, -n')
77 if extra and opt.git_ids:
78 o.fatal("don't provide filenames when using --git-ids")
80 git.verbose = opt.verbose - 1
83 opt.max_pack_size = parse_num(opt.max_pack_size)
84 if opt.max_pack_objects:
85 opt.max_pack_objects = parse_num(opt.max_pack_objects)
87 opt.fanout = parse_num(opt.fanout)
89 opt.bwlimit = parse_num(opt.bwlimit)
91 opt.date = parse_date_or_fatal(opt.date, o.fatal)
93 opt.date = time.time()
95 opt.is_reverse = environ.get(b'BUP_SERVER_REVERSE')
96 if opt.is_reverse and opt.remote:
97 o.fatal("don't use -r in reverse mode; it's automatic")
99 if opt.name and not valid_save_name(opt.name):
100 o.fatal("'%r' is not a valid branch name." % opt.name)
104 def split(opt, files, parent, out, pack_writer):
105 # Hack around lack of nonlocal vars in python 2
107 def prog(filenum, nbytes):
108 total_bytes[0] += nbytes
110 qprogress('Splitting: file #%d, %d kbytes\r'
111 % (filenum+1, total_bytes[0] // 1024))
113 qprogress('Splitting: %d kbytes\r' % (total_bytes[0] // 1024))
115 new_blob = pack_writer.new_blob
116 new_tree = pack_writer.new_tree
118 shalist = hashsplit.split_to_blobs(new_blob, files,
119 keep_boundaries=opt.keep_boundaries,
121 for sha, size, level in shalist:
122 out.write(hexlify(sha) + b'\n')
124 elif opt.tree or opt.commit or opt.name:
125 if opt.name: # insert dummy_name which may be used as a restore target
127 hashsplit.split_to_blob_or_tree(new_blob, new_tree, files,
128 keep_boundaries=opt.keep_boundaries,
130 splitfile_name = git.mangle_name(b'data', hashsplit.GIT_MODE_FILE, mode)
131 shalist = [(mode, splitfile_name, sha)]
134 hashsplit.split_to_shalist(new_blob, new_tree, files,
135 keep_boundaries=opt.keep_boundaries,
137 tree = new_tree(shalist)
140 it = hashsplit.hashsplit_iter(files,
141 keep_boundaries=opt.keep_boundaries,
143 for blob, level in it:
144 hashsplit.total_split += len(blob)
146 sys.stdout.write(str(blob))
147 megs = hashsplit.total_split // 1024 // 1024
148 if not opt.quiet and last != megs:
154 out.write(hexlify(tree) + b'\n')
157 if opt.commit or opt.name:
158 msg = b'bup split\n\nGenerated by command:\n%r\n' % compat.get_argvb()
159 userline = b'%s <%s@%s>' % (userfullname(), username(), hostname())
160 commit = pack_writer.new_commit(tree, parent, userline, opt.date,
161 None, userline, opt.date, None, msg)
163 out.write(hexlify(commit) + b'\n')
168 opt = opts_from_cmdline(argv)
170 git.verbose = opt.verbose - 1
172 hashsplit.fanout = opt.fanout
176 client.bwlimit = opt.bwlimit
178 start_time = time.time()
181 out = byte_stream(sys.stdout)
182 stdin = byte_stream(sys.stdin)
185 # the input is actually a series of git object ids that we should retrieve
188 # This is a bit messy, but basically it converts from a series of
189 # CatPipe.get() iterators into a series of file-type objects.
190 # It would be less ugly if either CatPipe.get() returned a file-like object
191 # (not very efficient), or split_to_shalist() expected an iterator instead
195 def __init__(self, it):
197 def read(self, size):
198 v = next(self.it, None)
202 line = stdin.readline()
208 it = cp.get(line.strip())
209 next(it, None) # skip the file info
210 except KeyError as e:
211 add_error('error: %s' % e)
216 # the input either comes from a series of files or from stdin.
218 files = (open(argv_bytes(fn), 'rb') for fn in opt.sources)
222 writing = not (opt.noop or opt.copy)
223 remote_dest = opt.remote or opt.is_reverse
226 git.check_repo_or_die()
228 if remote_dest and writing:
229 cli = repo = client.Client(opt.remote)
234 # cli creation must be last nontrivial command in each if clause above
236 if opt.name and writing:
237 refname = opt.name and b'refs/heads/%s' % opt.name
238 oldref = repo.read_ref(refname)
240 refname = oldref = None
243 pack_writer = NoOpPackWriter()
244 elif not remote_dest:
245 pack_writer = git.PackWriter(compression_level=opt.compress,
246 max_pack_size=opt.max_pack_size,
247 max_pack_objects=opt.max_pack_objects)
249 pack_writer = cli.new_packwriter(compression_level=opt.compress,
250 max_pack_size=opt.max_pack_size,
251 max_pack_objects=opt.max_pack_objects)
253 commit = split(opt, files, oldref, out, pack_writer)
258 # pack_writer must be closed before we can update the ref
260 repo.update_ref(refname, commit, oldref)
262 secs = time.time() - start_time
263 size = hashsplit.total_split
265 log('bup: %.2f kbytes in %.2f secs = %.2f kbytes/sec\n'
266 % (size / 1024, secs, size / 1024 / secs))
269 log('WARNING: %d errors encountered while saving.\n' % len(saved_errors))