3 bup_python="$(dirname "$0")/bup-python" || exit $?
4 exec "$bup_python" "$0" ${1+"$@"}
10 from bup import hashsplit, git, options, client
11 from bup.helpers import (add_error, handle_ctrl_c, hostname, log, parse_num,
12 qprogress, reprogress, saved_errors,
13 userfullname, username, valid_save_name,
18 bup split [-t] [-c] [-n name] OPTIONS [--git-ids | filenames...]
19 bup split -b OPTIONS [--git-ids | filenames...]
20 bup split <--noop [--copy]|--copy> OPTIONS [--git-ids | filenames...]
23 b,blobs output a series of blob ids. Implies --fanout=0.
24 t,tree output a tree id
25 c,commit output a commit id
26 n,name= save the result under the given name
27 noop split the input, but throw away the result
28 copy split the input, copy it to stdout, don't save to repo
30 r,remote= remote repository path
31 d,date= date for the commit (seconds since the epoch)
32 q,quiet don't print progress messages
33 v,verbose increase log output (can be used more than once)
34 git-ids read a list of git object ids from stdin and split their contents
35 keep-boundaries don't let one chunk span two input files
36 bench print benchmark timings to stderr
37 max-pack-size= maximum bytes in a single pack
38 max-pack-objects= maximum number of objects in a single pack
39 fanout= average number of blobs in a single tree
40 bwlimit= maximum bytes/sec to transmit to server
41 #,compress= set compression level to # (0-9, 9 is highest) [1]
43 o = options.Options(optspec)
44 (opt, flags, extra) = o.parse(sys.argv[1:])
47 git.check_repo_or_die()
48 if not (opt.blobs or opt.tree or opt.commit or opt.name or
49 opt.noop or opt.copy):
50 o.fatal("use one or more of -b, -t, -c, -n, --noop, --copy")
51 if (opt.noop or opt.copy) and (opt.blobs or opt.tree or
52 opt.commit or opt.name):
53 o.fatal('--noop and --copy are incompatible with -b, -t, -c, -n')
54 if opt.blobs and (opt.tree or opt.commit or opt.name):
55 o.fatal('-b is incompatible with -t, -c, -n')
56 if extra and opt.git_ids:
57 o.fatal("don't provide filenames when using --git-ids")
60 git.verbose = opt.verbose - 1
65 max_pack_size = parse_num(opt.max_pack_size)
66 max_pack_objects = None
67 if opt.max_pack_objects:
68 max_pack_objects = parse_num(opt.max_pack_objects)
71 hashsplit.fanout = parse_num(opt.fanout)
75 client.bwlimit = parse_num(opt.bwlimit)
77 date = parse_date_or_fatal(opt.date, o.fatal)
82 def prog(filenum, nbytes):
86 qprogress('Splitting: file #%d, %d kbytes\r'
87 % (filenum+1, total_bytes/1024))
89 qprogress('Splitting: %d kbytes\r' % (total_bytes/1024))
92 is_reverse = os.environ.get('BUP_SERVER_REVERSE')
93 if is_reverse and opt.remote:
94 o.fatal("don't use -r in reverse mode; it's automatic")
95 start_time = time.time()
97 if opt.name and not valid_save_name(opt.name):
98 o.fatal("'%s' is not a valid branch name." % opt.name)
99 refname = opt.name and 'refs/heads/%s' % opt.name or None
100 if opt.noop or opt.copy:
101 cli = pack_writer = oldref = None
102 elif opt.remote or is_reverse:
103 cli = client.Client(opt.remote)
104 oldref = refname and cli.read_ref(refname) or None
105 pack_writer = cli.new_packwriter(compression_level=opt.compress,
106 max_pack_size=max_pack_size,
107 max_pack_objects=max_pack_objects)
110 oldref = refname and git.read_ref(refname) or None
111 pack_writer = git.PackWriter(compression_level=opt.compress,
112 max_pack_size=max_pack_size,
113 max_pack_objects=max_pack_objects)
116 # the input is actually a series of git object ids that we should retrieve
119 # This is a bit messy, but basically it converts from a series of
120 # CatPipe.get() iterators into a series of file-type objects.
121 # It would be less ugly if either CatPipe.get() returned a file-like object
122 # (not very efficient), or split_to_shalist() expected an iterator instead
126 def __init__(self, it):
128 def read(self, size):
129 v = next(self.it, None)
133 line = sys.stdin.readline()
139 it = cp.get(line.strip())
140 next(it, None) # skip the file info
141 except KeyError as e:
142 add_error('error: %s' % e)
147 # the input either comes from a series of files or from stdin.
148 files = extra and (open(fn) for fn in extra) or [sys.stdin]
150 if pack_writer and opt.blobs:
151 shalist = hashsplit.split_to_blobs(pack_writer.new_blob, files,
152 keep_boundaries=opt.keep_boundaries,
154 for (sha, size, level) in shalist:
155 print sha.encode('hex')
157 elif pack_writer: # tree or commit or name
158 if opt.name: # insert dummy_name which may be used as a restore target
160 hashsplit.split_to_blob_or_tree(pack_writer.new_blob,
161 pack_writer.new_tree,
163 keep_boundaries=opt.keep_boundaries,
165 splitfile_name = git.mangle_name('data', hashsplit.GIT_MODE_FILE, mode)
166 shalist = [(mode, splitfile_name, sha)]
168 shalist = hashsplit.split_to_shalist(
169 pack_writer.new_blob, pack_writer.new_tree, files,
170 keep_boundaries=opt.keep_boundaries, progress=prog)
171 tree = pack_writer.new_tree(shalist)
174 it = hashsplit.hashsplit_iter(files,
175 keep_boundaries=opt.keep_boundaries,
177 for (blob, level) in it:
178 hashsplit.total_split += len(blob)
180 sys.stdout.write(str(blob))
181 megs = hashsplit.total_split/1024/1024
182 if not opt.quiet and last != megs:
188 print tree.encode('hex')
189 if opt.commit or opt.name:
190 msg = 'bup split\n\nGenerated by command:\n%r\n' % sys.argv
191 ref = opt.name and ('refs/heads/%s' % opt.name) or None
192 userline = '%s <%s@%s>' % (userfullname(), username(), hostname())
193 commit = pack_writer.new_commit(tree, oldref, userline, date, None,
194 userline, date, None, msg)
196 print commit.encode('hex')
199 pack_writer.close() # must close before we can update the ref
203 cli.update_ref(refname, commit, oldref)
205 git.update_ref(refname, commit, oldref)
210 secs = time.time() - start_time
211 size = hashsplit.total_split
213 log('bup: %.2fkbytes in %.2f secs = %.2f kbytes/sec\n'
214 % (size/1024., secs, size/1024./secs))
217 log('WARNING: %d errors encountered while saving.\n' % len(saved_errors))