2 from __future__ import absolute_import, division, print_function
3 from binascii import hexlify
6 from bup import compat, hashsplit, git, options, client
7 from bup.compat import argv_bytes, environ, nullcontext
8 from bup.helpers import (add_error, hostname, log, parse_num,
9 qprogress, reprogress, saved_errors,
12 from bup.io import byte_stream
13 from bup.pwdgrp import userfullname, username
17 bup split [-t] [-c] [-n name] OPTIONS [--git-ids | filenames...]
18 bup split -b OPTIONS [--git-ids | filenames...]
19 bup split --copy OPTIONS [--git-ids | filenames...]
20 bup split --noop [-b|-t] OPTIONS [--git-ids | filenames...]
23 b,blobs output a series of blob ids. Implies --fanout=0.
24 t,tree output a tree id
25 c,commit output a commit id
26 n,name= save the result under the given name
27 noop split the input, but throw away the result
28 copy split the input, copy it to stdout, don't save to repo
30 r,remote= remote repository path
31 d,date= date for the commit (seconds since the epoch)
32 q,quiet don't print progress messages
33 v,verbose increase log output (can be used more than once)
34 git-ids read a list of git object ids from stdin and split their contents
35 keep-boundaries don't let one chunk span two input files
36 bench print benchmark timings to stderr
37 max-pack-size= maximum bytes in a single pack
38 max-pack-objects= maximum number of objects in a single pack
39 fanout= average number of blobs in a single tree
40 bwlimit= maximum bytes/sec to transmit to server
41 #,compress= set compression level to # (0-9, 9 is highest) [1]
50 def __exit__(self, type, value, traceback):
51 return None # since close() does nothing
56 def new_blob(self, content):
57 return git.calc_hash(b'blob', content)
58 def new_tree(self, shalist):
59 return git.calc_hash(b'tree', git.tree_encode(shalist))
61 def opts_from_cmdline(argv):
62 o = options.Options(optspec)
63 opt, flags, extra = o.parse_bytes(argv[1:])
66 if opt.name: opt.name = argv_bytes(opt.name)
67 if opt.remote: opt.remote = argv_bytes(opt.remote)
68 if opt.verbose is None: opt.verbose = 0
70 if not (opt.blobs or opt.tree or opt.commit or opt.name or
71 opt.noop or opt.copy):
72 o.fatal("use one or more of -b, -t, -c, -n, --noop, --copy")
73 if opt.copy and (opt.blobs or opt.tree):
74 o.fatal('--copy is incompatible with -b, -t')
75 if (opt.noop or opt.copy) and (opt.commit or opt.name):
76 o.fatal('--noop and --copy are incompatible with -c, -n')
77 if opt.blobs and (opt.tree or opt.commit or opt.name):
78 o.fatal('-b is incompatible with -t, -c, -n')
79 if extra and opt.git_ids:
80 o.fatal("don't provide filenames when using --git-ids")
82 git.verbose = opt.verbose - 1
85 opt.max_pack_size = parse_num(opt.max_pack_size)
86 if opt.max_pack_objects:
87 opt.max_pack_objects = parse_num(opt.max_pack_objects)
89 opt.fanout = parse_num(opt.fanout)
91 opt.bwlimit = parse_num(opt.bwlimit)
93 opt.date = parse_date_or_fatal(opt.date, o.fatal)
95 opt.date = time.time()
97 opt.is_reverse = environ.get(b'BUP_SERVER_REVERSE')
98 if opt.is_reverse and opt.remote:
99 o.fatal("don't use -r in reverse mode; it's automatic")
101 if opt.name and not valid_save_name(opt.name):
102 o.fatal("'%r' is not a valid branch name." % opt.name)
106 def split(opt, files, parent, out, pack_writer):
107 # Hack around lack of nonlocal vars in python 2
109 def prog(filenum, nbytes):
110 total_bytes[0] += nbytes
112 qprogress('Splitting: file #%d, %d kbytes\r'
113 % (filenum+1, total_bytes[0] // 1024))
115 qprogress('Splitting: %d kbytes\r' % (total_bytes[0] // 1024))
117 new_blob = pack_writer.new_blob
118 new_tree = pack_writer.new_tree
120 shalist = hashsplit.split_to_blobs(new_blob, files,
121 keep_boundaries=opt.keep_boundaries,
123 for sha, size, level in shalist:
124 out.write(hexlify(sha) + b'\n')
126 elif opt.tree or opt.commit or opt.name:
127 if opt.name: # insert dummy_name which may be used as a restore target
129 hashsplit.split_to_blob_or_tree(new_blob, new_tree, files,
130 keep_boundaries=opt.keep_boundaries,
132 splitfile_name = git.mangle_name(b'data', hashsplit.GIT_MODE_FILE, mode)
133 shalist = [(mode, splitfile_name, sha)]
136 hashsplit.split_to_shalist(new_blob, new_tree, files,
137 keep_boundaries=opt.keep_boundaries,
139 tree = new_tree(shalist)
142 it = hashsplit.hashsplit_iter(files,
143 keep_boundaries=opt.keep_boundaries,
145 for blob, level in it:
146 hashsplit.total_split += len(blob)
148 sys.stdout.write(str(blob))
149 megs = hashsplit.total_split // 1024 // 1024
150 if not opt.quiet and last != megs:
156 out.write(hexlify(tree) + b'\n')
159 if opt.commit or opt.name:
160 msg = b'bup split\n\nGenerated by command:\n%r\n' % compat.get_argvb()
161 userline = b'%s <%s@%s>' % (userfullname(), username(), hostname())
162 commit = pack_writer.new_commit(tree, parent, userline, opt.date,
163 None, userline, opt.date, None, msg)
165 out.write(hexlify(commit) + b'\n')
170 opt = opts_from_cmdline(argv)
172 git.verbose = opt.verbose - 1
174 hashsplit.fanout = opt.fanout
178 client.bwlimit = opt.bwlimit
180 start_time = time.time()
183 out = byte_stream(sys.stdout)
184 stdin = byte_stream(sys.stdin)
187 # the input is actually a series of git object ids that we should retrieve
190 # This is a bit messy, but basically it converts from a series of
191 # CatPipe.get() iterators into a series of file-type objects.
192 # It would be less ugly if either CatPipe.get() returned a file-like object
193 # (not very efficient), or split_to_shalist() expected an iterator instead
197 def __init__(self, it):
199 def read(self, size):
200 v = next(self.it, None)
204 line = stdin.readline()
210 it = cp.get(line.strip())
211 next(it, None) # skip the file info
212 except KeyError as e:
213 add_error('error: %s' % e)
218 # the input either comes from a series of files or from stdin.
220 files = (open(argv_bytes(fn), 'rb') for fn in opt.sources)
224 writing = not (opt.noop or opt.copy)
225 remote_dest = opt.remote or opt.is_reverse
228 git.check_repo_or_die()
230 if remote_dest and writing:
231 cli = repo = client.Client(opt.remote)
236 # cli creation must be last nontrivial command in each if clause above
238 if opt.name and writing:
239 refname = opt.name and b'refs/heads/%s' % opt.name
240 oldref = repo.read_ref(refname)
242 refname = oldref = None
245 pack_writer = NoOpPackWriter()
246 elif not remote_dest:
247 pack_writer = git.PackWriter(compression_level=opt.compress,
248 max_pack_size=opt.max_pack_size,
249 max_pack_objects=opt.max_pack_objects)
251 pack_writer = cli.new_packwriter(compression_level=opt.compress,
252 max_pack_size=opt.max_pack_size,
253 max_pack_objects=opt.max_pack_objects)
255 # packwriter creation must be last command in each if clause above
257 commit = split(opt, files, oldref, out, pack_writer)
259 # pack_writer must be closed before we can update the ref
261 repo.update_ref(refname, commit, oldref)
263 secs = time.time() - start_time
264 size = hashsplit.total_split
266 log('bup: %.2f kbytes in %.2f secs = %.2f kbytes/sec\n'
267 % (size / 1024, secs, size / 1024 / secs))
270 log('WARNING: %d errors encountered while saving.\n' % len(saved_errors))