2 from __future__ import absolute_import, division, print_function
3 from binascii import hexlify
6 from bup import compat, hashsplit, git, options, client
7 from bup.compat import argv_bytes, environ
8 from bup.helpers import (add_error, hostname, log, parse_num,
9 qprogress, reprogress, saved_errors,
12 from bup.io import byte_stream
13 from bup.pwdgrp import userfullname, username
17 bup split [-t] [-c] [-n name] OPTIONS [--git-ids | filenames...]
18 bup split -b OPTIONS [--git-ids | filenames...]
19 bup split --copy OPTIONS [--git-ids | filenames...]
20 bup split --noop [-b|-t] OPTIONS [--git-ids | filenames...]
23 b,blobs output a series of blob ids. Implies --fanout=0.
24 t,tree output a tree id
25 c,commit output a commit id
26 n,name= save the result under the given name
27 noop split the input, but throw away the result
28 copy split the input, copy it to stdout, don't save to repo
30 r,remote= remote repository path
31 d,date= date for the commit (seconds since the epoch)
32 q,quiet don't print progress messages
33 v,verbose increase log output (can be used more than once)
34 git-ids read a list of git object ids from stdin and split their contents
35 keep-boundaries don't let one chunk span two input files
36 bench print benchmark timings to stderr
37 max-pack-size= maximum bytes in a single pack
38 max-pack-objects= maximum number of objects in a single pack
39 fanout= average number of blobs in a single tree
40 bwlimit= maximum bytes/sec to transmit to server
41 #,compress= set compression level to # (0-9, 9 is highest) [1]
45 o = options.Options(optspec)
46 opt, flags, extra = o.parse_bytes(argv[1:])
47 if opt.name: opt.name = argv_bytes(opt.name)
48 if opt.remote: opt.remote = argv_bytes(opt.remote)
49 if opt.verbose is None: opt.verbose = 0
51 if not (opt.blobs or opt.tree or opt.commit or opt.name or
52 opt.noop or opt.copy):
53 o.fatal("use one or more of -b, -t, -c, -n, --noop, --copy")
54 if opt.copy and (opt.blobs or opt.tree):
55 o.fatal('--copy is incompatible with -b, -t')
56 if (opt.noop or opt.copy) and (opt.commit or opt.name):
57 o.fatal('--noop and --copy are incompatible with -c, -n')
58 if opt.blobs and (opt.tree or opt.commit or opt.name):
59 o.fatal('-b is incompatible with -t, -c, -n')
60 if extra and opt.git_ids:
61 o.fatal("don't provide filenames when using --git-ids")
64 git.verbose = opt.verbose - 1
69 max_pack_size = parse_num(opt.max_pack_size)
70 max_pack_objects = None
71 if opt.max_pack_objects:
72 max_pack_objects = parse_num(opt.max_pack_objects)
75 hashsplit.fanout = parse_num(opt.fanout)
79 client.bwlimit = parse_num(opt.bwlimit)
81 date = parse_date_or_fatal(opt.date, o.fatal)
85 # Hack around lack of nonlocal vars in python 2
87 def prog(filenum, nbytes):
88 total_bytes[0] += nbytes
90 qprogress('Splitting: file #%d, %d kbytes\r'
91 % (filenum+1, total_bytes[0] // 1024))
93 qprogress('Splitting: %d kbytes\r' % (total_bytes[0] // 1024))
96 is_reverse = environ.get(b'BUP_SERVER_REVERSE')
97 if is_reverse and opt.remote:
98 o.fatal("don't use -r in reverse mode; it's automatic")
99 start_time = time.time()
101 if opt.name and not valid_save_name(opt.name):
102 o.fatal("'%r' is not a valid branch name." % opt.name)
103 refname = opt.name and b'refs/heads/%s' % opt.name or None
105 if opt.noop or opt.copy:
106 cli = pack_writer = oldref = None
107 elif opt.remote or is_reverse:
108 git.check_repo_or_die()
109 cli = client.Client(opt.remote)
110 oldref = refname and cli.read_ref(refname) or None
111 pack_writer = cli.new_packwriter(compression_level=opt.compress,
112 max_pack_size=max_pack_size,
113 max_pack_objects=max_pack_objects)
115 git.check_repo_or_die()
117 oldref = refname and git.read_ref(refname) or None
118 pack_writer = git.PackWriter(compression_level=opt.compress,
119 max_pack_size=max_pack_size,
120 max_pack_objects=max_pack_objects)
122 input = byte_stream(sys.stdin)
125 # the input is actually a series of git object ids that we should retrieve
128 # This is a bit messy, but basically it converts from a series of
129 # CatPipe.get() iterators into a series of file-type objects.
130 # It would be less ugly if either CatPipe.get() returned a file-like object
131 # (not very efficient), or split_to_shalist() expected an iterator instead
135 def __init__(self, it):
137 def read(self, size):
138 v = next(self.it, None)
142 line = input.readline()
148 it = cp.get(line.strip())
149 next(it, None) # skip the file info
150 except KeyError as e:
151 add_error('error: %s' % e)
156 # the input either comes from a series of files or from stdin.
157 files = extra and (open(argv_bytes(fn), 'rb') for fn in extra) or [input]
160 new_blob = pack_writer.new_blob
161 new_tree = pack_writer.new_tree
162 elif opt.blobs or opt.tree:
164 new_blob = lambda content: git.calc_hash(b'blob', content)
165 new_tree = lambda shalist: git.calc_hash(b'tree', git.tree_encode(shalist))
168 out = byte_stream(sys.stdout)
171 shalist = hashsplit.split_to_blobs(new_blob, files,
172 keep_boundaries=opt.keep_boundaries,
174 for (sha, size, level) in shalist:
175 out.write(hexlify(sha) + b'\n')
177 elif opt.tree or opt.commit or opt.name:
178 if opt.name: # insert dummy_name which may be used as a restore target
180 hashsplit.split_to_blob_or_tree(new_blob, new_tree, files,
181 keep_boundaries=opt.keep_boundaries,
183 splitfile_name = git.mangle_name(b'data', hashsplit.GIT_MODE_FILE, mode)
184 shalist = [(mode, splitfile_name, sha)]
186 shalist = hashsplit.split_to_shalist(
187 new_blob, new_tree, files,
188 keep_boundaries=opt.keep_boundaries, progress=prog)
189 tree = new_tree(shalist)
192 it = hashsplit.hashsplit_iter(files,
193 keep_boundaries=opt.keep_boundaries,
195 for (blob, level) in it:
196 hashsplit.total_split += len(blob)
198 sys.stdout.write(str(blob))
199 megs = hashsplit.total_split // 1024 // 1024
200 if not opt.quiet and last != megs:
206 out.write(hexlify(tree) + b'\n')
207 if opt.commit or opt.name:
208 msg = b'bup split\n\nGenerated by command:\n%r\n' % compat.get_argvb()
209 ref = opt.name and (b'refs/heads/%s' % opt.name) or None
210 userline = b'%s <%s@%s>' % (userfullname(), username(), hostname())
211 commit = pack_writer.new_commit(tree, oldref, userline, date, None,
212 userline, date, None, msg)
214 out.write(hexlify(commit) + b'\n')
217 pack_writer.close() # must close before we can update the ref
221 cli.update_ref(refname, commit, oldref)
223 git.update_ref(refname, commit, oldref)
228 secs = time.time() - start_time
229 size = hashsplit.total_split
231 log('bup: %.2f kbytes in %.2f secs = %.2f kbytes/sec\n'
232 % (size / 1024, secs, size / 1024 / secs))
235 log('WARNING: %d errors encountered while saving.\n' % len(saved_errors))