3 bup_python="$(dirname "$0")/bup-python" || exit $?
4 exec "$bup_python" "$0" ${1+"$@"}
8 from __future__ import absolute_import
11 from bup import hashsplit, git, options, client
12 from bup.helpers import (add_error, handle_ctrl_c, hostname, log, parse_num,
13 qprogress, reprogress, saved_errors,
14 userfullname, username, valid_save_name,
19 bup split [-t] [-c] [-n name] OPTIONS [--git-ids | filenames...]
20 bup split -b OPTIONS [--git-ids | filenames...]
21 bup split <--noop [--copy]|--copy> OPTIONS [--git-ids | filenames...]
24 b,blobs output a series of blob ids. Implies --fanout=0.
25 t,tree output a tree id
26 c,commit output a commit id
27 n,name= save the result under the given name
28 noop split the input, but throw away the result
29 copy split the input, copy it to stdout, don't save to repo
31 r,remote= remote repository path
32 d,date= date for the commit (seconds since the epoch)
33 q,quiet don't print progress messages
34 v,verbose increase log output (can be used more than once)
35 git-ids read a list of git object ids from stdin and split their contents
36 keep-boundaries don't let one chunk span two input files
37 bench print benchmark timings to stderr
38 max-pack-size= maximum bytes in a single pack
39 max-pack-objects= maximum number of objects in a single pack
40 fanout= average number of blobs in a single tree
41 bwlimit= maximum bytes/sec to transmit to server
42 #,compress= set compression level to # (0-9, 9 is highest) [1]
44 o = options.Options(optspec)
45 (opt, flags, extra) = o.parse(sys.argv[1:])
48 git.check_repo_or_die()
49 if not (opt.blobs or opt.tree or opt.commit or opt.name or
50 opt.noop or opt.copy):
51 o.fatal("use one or more of -b, -t, -c, -n, --noop, --copy")
52 if (opt.noop or opt.copy) and (opt.blobs or opt.tree or
53 opt.commit or opt.name):
54 o.fatal('--noop and --copy are incompatible with -b, -t, -c, -n')
55 if opt.blobs and (opt.tree or opt.commit or opt.name):
56 o.fatal('-b is incompatible with -t, -c, -n')
57 if extra and opt.git_ids:
58 o.fatal("don't provide filenames when using --git-ids")
61 git.verbose = opt.verbose - 1
66 max_pack_size = parse_num(opt.max_pack_size)
67 max_pack_objects = None
68 if opt.max_pack_objects:
69 max_pack_objects = parse_num(opt.max_pack_objects)
72 hashsplit.fanout = parse_num(opt.fanout)
76 client.bwlimit = parse_num(opt.bwlimit)
78 date = parse_date_or_fatal(opt.date, o.fatal)
83 def prog(filenum, nbytes):
87 qprogress('Splitting: file #%d, %d kbytes\r'
88 % (filenum+1, total_bytes/1024))
90 qprogress('Splitting: %d kbytes\r' % (total_bytes/1024))
93 is_reverse = os.environ.get('BUP_SERVER_REVERSE')
94 if is_reverse and opt.remote:
95 o.fatal("don't use -r in reverse mode; it's automatic")
96 start_time = time.time()
98 if opt.name and not valid_save_name(opt.name):
99 o.fatal("'%s' is not a valid branch name." % opt.name)
100 refname = opt.name and 'refs/heads/%s' % opt.name or None
101 if opt.noop or opt.copy:
102 cli = pack_writer = oldref = None
103 elif opt.remote or is_reverse:
104 cli = client.Client(opt.remote)
105 oldref = refname and cli.read_ref(refname) or None
106 pack_writer = cli.new_packwriter(compression_level=opt.compress,
107 max_pack_size=max_pack_size,
108 max_pack_objects=max_pack_objects)
111 oldref = refname and git.read_ref(refname) or None
112 pack_writer = git.PackWriter(compression_level=opt.compress,
113 max_pack_size=max_pack_size,
114 max_pack_objects=max_pack_objects)
117 # the input is actually a series of git object ids that we should retrieve
120 # This is a bit messy, but basically it converts from a series of
121 # CatPipe.get() iterators into a series of file-type objects.
122 # It would be less ugly if either CatPipe.get() returned a file-like object
123 # (not very efficient), or split_to_shalist() expected an iterator instead
127 def __init__(self, it):
129 def read(self, size):
130 v = next(self.it, None)
134 line = sys.stdin.readline()
140 it = cp.get(line.strip())
141 next(it, None) # skip the file info
142 except KeyError as e:
143 add_error('error: %s' % e)
148 # the input either comes from a series of files or from stdin.
149 files = extra and (open(fn) for fn in extra) or [sys.stdin]
151 if pack_writer and opt.blobs:
152 shalist = hashsplit.split_to_blobs(pack_writer.new_blob, files,
153 keep_boundaries=opt.keep_boundaries,
155 for (sha, size, level) in shalist:
156 print sha.encode('hex')
158 elif pack_writer: # tree or commit or name
159 if opt.name: # insert dummy_name which may be used as a restore target
161 hashsplit.split_to_blob_or_tree(pack_writer.new_blob,
162 pack_writer.new_tree,
164 keep_boundaries=opt.keep_boundaries,
166 splitfile_name = git.mangle_name('data', hashsplit.GIT_MODE_FILE, mode)
167 shalist = [(mode, splitfile_name, sha)]
169 shalist = hashsplit.split_to_shalist(
170 pack_writer.new_blob, pack_writer.new_tree, files,
171 keep_boundaries=opt.keep_boundaries, progress=prog)
172 tree = pack_writer.new_tree(shalist)
175 it = hashsplit.hashsplit_iter(files,
176 keep_boundaries=opt.keep_boundaries,
178 for (blob, level) in it:
179 hashsplit.total_split += len(blob)
181 sys.stdout.write(str(blob))
182 megs = hashsplit.total_split/1024/1024
183 if not opt.quiet and last != megs:
189 print tree.encode('hex')
190 if opt.commit or opt.name:
191 msg = 'bup split\n\nGenerated by command:\n%r\n' % sys.argv
192 ref = opt.name and ('refs/heads/%s' % opt.name) or None
193 userline = '%s <%s@%s>' % (userfullname(), username(), hostname())
194 commit = pack_writer.new_commit(tree, oldref, userline, date, None,
195 userline, date, None, msg)
197 print commit.encode('hex')
200 pack_writer.close() # must close before we can update the ref
204 cli.update_ref(refname, commit, oldref)
206 git.update_ref(refname, commit, oldref)
211 secs = time.time() - start_time
212 size = hashsplit.total_split
214 log('bup: %.2fkbytes in %.2f secs = %.2f kbytes/sec\n'
215 % (size/1024., secs, size/1024./secs))
218 log('WARNING: %d errors encountered while saving.\n' % len(saved_errors))