2 from __future__ import absolute_import, division, print_function
3 from binascii import hexlify
6 from bup import compat, hashsplit, git, options, client
7 from bup.compat import argv_bytes, environ
8 from bup.helpers import (add_error, hostname, log, parse_num,
9 qprogress, reprogress, saved_errors,
12 from bup.io import byte_stream
13 from bup.pwdgrp import userfullname, username
17 bup split [-t] [-c] [-n name] OPTIONS [--git-ids | filenames...]
18 bup split -b OPTIONS [--git-ids | filenames...]
19 bup split --copy OPTIONS [--git-ids | filenames...]
20 bup split --noop [-b|-t] OPTIONS [--git-ids | filenames...]
23 b,blobs output a series of blob ids. Implies --fanout=0.
24 t,tree output a tree id
25 c,commit output a commit id
26 n,name= save the result under the given name
27 noop split the input, but throw away the result
28 copy split the input, copy it to stdout, don't save to repo
30 r,remote= remote repository path
31 d,date= date for the commit (seconds since the epoch)
32 q,quiet don't print progress messages
33 v,verbose increase log output (can be used more than once)
34 git-ids read a list of git object ids from stdin and split their contents
35 keep-boundaries don't let one chunk span two input files
36 bench print benchmark timings to stderr
37 max-pack-size= maximum bytes in a single pack
38 max-pack-objects= maximum number of objects in a single pack
39 fanout= average number of blobs in a single tree
40 bwlimit= maximum bytes/sec to transmit to server
41 #,compress= set compression level to # (0-9, 9 is highest) [1]
50 def __exit__(self, type, value, traceback):
51 return None # since close() does nothing
54 def new_blob(self, content):
55 return git.calc_hash(b'blob', content)
56 def new_tree(self, shalist):
57 return git.calc_hash(b'tree', git.tree_encode(shalist))
61 o = options.Options(optspec)
62 opt, flags, extra = o.parse_bytes(argv[1:])
63 if opt.name: opt.name = argv_bytes(opt.name)
64 if opt.remote: opt.remote = argv_bytes(opt.remote)
65 if opt.verbose is None: opt.verbose = 0
67 if not (opt.blobs or opt.tree or opt.commit or opt.name or
68 opt.noop or opt.copy):
69 o.fatal("use one or more of -b, -t, -c, -n, --noop, --copy")
70 if opt.copy and (opt.blobs or opt.tree):
71 o.fatal('--copy is incompatible with -b, -t')
72 if (opt.noop or opt.copy) and (opt.commit or opt.name):
73 o.fatal('--noop and --copy are incompatible with -c, -n')
74 if opt.blobs and (opt.tree or opt.commit or opt.name):
75 o.fatal('-b is incompatible with -t, -c, -n')
76 if extra and opt.git_ids:
77 o.fatal("don't provide filenames when using --git-ids")
80 git.verbose = opt.verbose - 1
85 max_pack_size = parse_num(opt.max_pack_size)
86 max_pack_objects = None
87 if opt.max_pack_objects:
88 max_pack_objects = parse_num(opt.max_pack_objects)
91 hashsplit.fanout = parse_num(opt.fanout)
95 client.bwlimit = parse_num(opt.bwlimit)
97 date = parse_date_or_fatal(opt.date, o.fatal)
101 # Hack around lack of nonlocal vars in python 2
103 def prog(filenum, nbytes):
104 total_bytes[0] += nbytes
106 qprogress('Splitting: file #%d, %d kbytes\r'
107 % (filenum+1, total_bytes[0] // 1024))
109 qprogress('Splitting: %d kbytes\r' % (total_bytes[0] // 1024))
112 opt.is_reverse = environ.get(b'BUP_SERVER_REVERSE')
113 if opt.is_reverse and opt.remote:
114 o.fatal("don't use -r in reverse mode; it's automatic")
115 start_time = time.time()
117 if opt.name and not valid_save_name(opt.name):
118 o.fatal("'%r' is not a valid branch name." % opt.name)
119 refname = opt.name and b'refs/heads/%s' % opt.name or None
121 writing = not (opt.noop or opt.copy)
122 remote_dest = opt.remote or opt.is_reverse
125 cli = pack_writer = oldref = None
127 git.check_repo_or_die()
128 cli = client.Client(opt.remote)
129 oldref = refname and cli.read_ref(refname) or None
131 git.check_repo_or_die()
133 oldref = refname and git.read_ref(refname) or None
135 input = byte_stream(sys.stdin)
138 # the input is actually a series of git object ids that we should retrieve
141 # This is a bit messy, but basically it converts from a series of
142 # CatPipe.get() iterators into a series of file-type objects.
143 # It would be less ugly if either CatPipe.get() returned a file-like object
144 # (not very efficient), or split_to_shalist() expected an iterator instead
148 def __init__(self, it):
150 def read(self, size):
151 v = next(self.it, None)
155 line = input.readline()
161 it = cp.get(line.strip())
162 next(it, None) # skip the file info
163 except KeyError as e:
164 add_error('error: %s' % e)
169 # the input either comes from a series of files or from stdin.
170 files = extra and (open(argv_bytes(fn), 'rb') for fn in extra) or [input]
173 pack_writer = NoOpPackWriter()
174 elif not remote_dest:
175 pack_writer = git.PackWriter(compression_level=opt.compress,
176 max_pack_size=max_pack_size,
177 max_pack_objects=max_pack_objects)
179 pack_writer = cli.new_packwriter(compression_level=opt.compress,
180 max_pack_size=max_pack_size,
181 max_pack_objects=max_pack_objects)
184 out = byte_stream(sys.stdout)
186 new_blob = pack_writer.new_blob
187 new_tree = pack_writer.new_tree
189 shalist = hashsplit.split_to_blobs(new_blob, files,
190 keep_boundaries=opt.keep_boundaries,
192 for (sha, size, level) in shalist:
193 out.write(hexlify(sha) + b'\n')
195 elif opt.tree or opt.commit or opt.name:
196 if opt.name: # insert dummy_name which may be used as a restore target
198 hashsplit.split_to_blob_or_tree(new_blob, new_tree, files,
199 keep_boundaries=opt.keep_boundaries,
201 splitfile_name = git.mangle_name(b'data', hashsplit.GIT_MODE_FILE, mode)
202 shalist = [(mode, splitfile_name, sha)]
205 hashsplit.split_to_shalist(new_blob, new_tree, files,
206 keep_boundaries=opt.keep_boundaries,
208 tree = new_tree(shalist)
211 it = hashsplit.hashsplit_iter(files,
212 keep_boundaries=opt.keep_boundaries,
214 for (blob, level) in it:
215 hashsplit.total_split += len(blob)
217 sys.stdout.write(str(blob))
218 megs = hashsplit.total_split // 1024 // 1024
219 if not opt.quiet and last != megs:
225 out.write(hexlify(tree) + b'\n')
226 if opt.commit or opt.name:
227 msg = b'bup split\n\nGenerated by command:\n%r\n' % compat.get_argvb()
228 ref = opt.name and (b'refs/heads/%s' % opt.name) or None
229 userline = b'%s <%s@%s>' % (userfullname(), username(), hostname())
230 commit = pack_writer.new_commit(tree, oldref, userline, date, None,
231 userline, date, None, msg)
233 out.write(hexlify(commit) + b'\n')
236 pack_writer.close() # must close before we can update the ref
240 cli.update_ref(refname, commit, oldref)
242 git.update_ref(refname, commit, oldref)
247 secs = time.time() - start_time
248 size = hashsplit.total_split
250 log('bup: %.2f kbytes in %.2f secs = %.2f kbytes/sec\n'
251 % (size / 1024, secs, size / 1024 / secs))
254 log('WARNING: %d errors encountered while saving.\n' % len(saved_errors))