3 bup_python="$(dirname "$0")/bup-python" || exit $?
4 exec "$bup_python" "$0" ${1+"$@"}
10 from bup import hashsplit, git, options, client
11 from bup.helpers import (add_error, handle_ctrl_c, hostname, log, parse_num,
12 qprogress, reprogress, saved_errors,
13 userfullname, username, valid_save_name)
17 bup split [-t] [-c] [-n name] OPTIONS [--git-ids | filenames...]
18 bup split -b OPTIONS [--git-ids | filenames...]
19 bup split <--noop [--copy]|--copy> OPTIONS [--git-ids | filenames...]
22 b,blobs output a series of blob ids. Implies --fanout=0.
23 t,tree output a tree id
24 c,commit output a commit id
25 n,name= save the result under the given name
26 noop split the input, but throw away the result
27 copy split the input, copy it to stdout, don't save to repo
29 r,remote= remote repository path
30 d,date= date for the commit (seconds since the epoch)
31 q,quiet don't print progress messages
32 v,verbose increase log output (can be used more than once)
33 git-ids read a list of git object ids from stdin and split their contents
34 keep-boundaries don't let one chunk span two input files
35 bench print benchmark timings to stderr
36 max-pack-size= maximum bytes in a single pack
37 max-pack-objects= maximum number of objects in a single pack
38 fanout= average number of blobs in a single tree
39 bwlimit= maximum bytes/sec to transmit to server
40 #,compress= set compression level to # (0-9, 9 is highest) [1]
42 o = options.Options(optspec)
43 (opt, flags, extra) = o.parse(sys.argv[1:])
46 git.check_repo_or_die()
47 if not (opt.blobs or opt.tree or opt.commit or opt.name or
48 opt.noop or opt.copy):
49 o.fatal("use one or more of -b, -t, -c, -n, --noop, --copy")
50 if (opt.noop or opt.copy) and (opt.blobs or opt.tree or
51 opt.commit or opt.name):
52 o.fatal('--noop and --copy are incompatible with -b, -t, -c, -n')
53 if opt.blobs and (opt.tree or opt.commit or opt.name):
54 o.fatal('-b is incompatible with -t, -c, -n')
55 if extra and opt.git_ids:
56 o.fatal("don't provide filenames when using --git-ids")
59 git.verbose = opt.verbose - 1
64 max_pack_size = parse_num(opt.max_pack_size)
65 max_pack_objects = None
66 if opt.max_pack_objects:
67 max_pack_objects = parse_num(opt.max_pack_objects)
70 hashsplit.fanout = parse_num(opt.fanout)
74 client.bwlimit = parse_num(opt.bwlimit)
76 date = parse_date_or_fatal(opt.date, o.fatal)
81 def prog(filenum, nbytes):
85 qprogress('Splitting: file #%d, %d kbytes\r'
86 % (filenum+1, total_bytes/1024))
88 qprogress('Splitting: %d kbytes\r' % (total_bytes/1024))
91 is_reverse = os.environ.get('BUP_SERVER_REVERSE')
92 if is_reverse and opt.remote:
93 o.fatal("don't use -r in reverse mode; it's automatic")
94 start_time = time.time()
96 if opt.name and not valid_save_name(opt.name):
97 o.fatal("'%s' is not a valid branch name." % opt.name)
98 refname = opt.name and 'refs/heads/%s' % opt.name or None
99 if opt.noop or opt.copy:
100 cli = pack_writer = oldref = None
101 elif opt.remote or is_reverse:
102 cli = client.Client(opt.remote)
103 oldref = refname and cli.read_ref(refname) or None
104 pack_writer = cli.new_packwriter(compression_level=opt.compress,
105 max_pack_size=max_pack_size,
106 max_pack_objects=max_pack_objects)
109 oldref = refname and git.read_ref(refname) or None
110 pack_writer = git.PackWriter(compression_level=opt.compress,
111 max_pack_size=max_pack_size,
112 max_pack_objects=max_pack_objects)
115 # the input is actually a series of git object ids that we should retrieve
118 # This is a bit messy, but basically it converts from a series of
119 # CatPipe.get() iterators into a series of file-type objects.
120 # It would be less ugly if either CatPipe.get() returned a file-like object
121 # (not very efficient), or split_to_shalist() expected an iterator instead
125 def __init__(self, it):
127 def read(self, size):
128 v = next(self.it, None)
132 line = sys.stdin.readline()
138 it = cp.get(line.strip())
139 next(it, None) # skip the file type
140 except KeyError as e:
141 add_error('error: %s' % e)
146 # the input either comes from a series of files or from stdin.
147 files = extra and (open(fn) for fn in extra) or [sys.stdin]
149 if pack_writer and opt.blobs:
150 shalist = hashsplit.split_to_blobs(pack_writer.new_blob, files,
151 keep_boundaries=opt.keep_boundaries,
153 for (sha, size, level) in shalist:
154 print sha.encode('hex')
156 elif pack_writer: # tree or commit or name
157 if opt.name: # insert dummy_name which may be used as a restore target
159 hashsplit.split_to_blob_or_tree(pack_writer.new_blob,
160 pack_writer.new_tree,
162 keep_boundaries=opt.keep_boundaries,
164 splitfile_name = git.mangle_name('data', hashsplit.GIT_MODE_FILE, mode)
165 shalist = [(mode, splitfile_name, sha)]
167 shalist = hashsplit.split_to_shalist(
168 pack_writer.new_blob, pack_writer.new_tree, files,
169 keep_boundaries=opt.keep_boundaries, progress=prog)
170 tree = pack_writer.new_tree(shalist)
173 it = hashsplit.hashsplit_iter(files,
174 keep_boundaries=opt.keep_boundaries,
176 for (blob, level) in it:
177 hashsplit.total_split += len(blob)
179 sys.stdout.write(str(blob))
180 megs = hashsplit.total_split/1024/1024
181 if not opt.quiet and last != megs:
187 print tree.encode('hex')
188 if opt.commit or opt.name:
189 msg = 'bup split\n\nGenerated by command:\n%r\n' % sys.argv
190 ref = opt.name and ('refs/heads/%s' % opt.name) or None
191 userline = '%s <%s@%s>' % (userfullname(), username(), hostname())
192 commit = pack_writer.new_commit(tree, oldref, userline, date, None,
193 userline, date, None, msg)
195 print commit.encode('hex')
198 pack_writer.close() # must close before we can update the ref
202 cli.update_ref(refname, commit, oldref)
204 git.update_ref(refname, commit, oldref)
209 secs = time.time() - start_time
210 size = hashsplit.total_split
212 log('bup: %.2fkbytes in %.2f secs = %.2f kbytes/sec\n'
213 % (size/1024., secs, size/1024./secs))
216 log('WARNING: %d errors encountered while saving.\n' % len(saved_errors))