3 bup_python="$(dirname "$0")/bup-python" || exit $?
4 exec "$bup_python" "$0" ${1+"$@"}
10 from bup import hashsplit, git, options, client
11 from bup.helpers import (add_error, handle_ctrl_c, hostname, log, parse_num,
12 qprogress, reprogress, saved_errors,
13 userfullname, username, valid_save_name)
17 bup split [-t] [-c] [-n name] OPTIONS [--git-ids | filenames...]
18 bup split -b OPTIONS [--git-ids | filenames...]
19 bup split <--noop [--copy]|--copy> OPTIONS [--git-ids | filenames...]
22 b,blobs output a series of blob ids. Implies --fanout=0.
23 t,tree output a tree id
24 c,commit output a commit id
25 n,name= save the result under the given name
26 noop split the input, but throw away the result
27 copy split the input, copy it to stdout, don't save to repo
29 r,remote= remote repository path
30 d,date= date for the commit (seconds since the epoch)
31 q,quiet don't print progress messages
32 v,verbose increase log output (can be used more than once)
33 git-ids read a list of git object ids from stdin and split their contents
34 keep-boundaries don't let one chunk span two input files
35 bench print benchmark timings to stderr
36 max-pack-size= maximum bytes in a single pack
37 max-pack-objects= maximum number of objects in a single pack
38 fanout= average number of blobs in a single tree
39 bwlimit= maximum bytes/sec to transmit to server
40 #,compress= set compression level to # (0-9, 9 is highest) [1]
42 o = options.Options(optspec)
43 (opt, flags, extra) = o.parse(sys.argv[1:])
46 git.check_repo_or_die()
47 if not (opt.blobs or opt.tree or opt.commit or opt.name or
48 opt.noop or opt.copy):
49 o.fatal("use one or more of -b, -t, -c, -n, --noop, --copy")
50 if (opt.noop or opt.copy) and (opt.blobs or opt.tree or
51 opt.commit or opt.name):
52 o.fatal('--noop and --copy are incompatible with -b, -t, -c, -n')
53 if opt.blobs and (opt.tree or opt.commit or opt.name):
54 o.fatal('-b is incompatible with -t, -c, -n')
55 if extra and opt.git_ids:
56 o.fatal("don't provide filenames when using --git-ids")
59 git.verbose = opt.verbose - 1
62 git.max_pack_size = parse_num(opt.max_pack_size)
63 if opt.max_pack_objects:
64 git.max_pack_objects = parse_num(opt.max_pack_objects)
66 hashsplit.fanout = parse_num(opt.fanout)
70 client.bwlimit = parse_num(opt.bwlimit)
72 date = parse_date_or_fatal(opt.date, o.fatal)
77 def prog(filenum, nbytes):
81 qprogress('Splitting: file #%d, %d kbytes\r'
82 % (filenum+1, total_bytes/1024))
84 qprogress('Splitting: %d kbytes\r' % (total_bytes/1024))
87 is_reverse = os.environ.get('BUP_SERVER_REVERSE')
88 if is_reverse and opt.remote:
89 o.fatal("don't use -r in reverse mode; it's automatic")
90 start_time = time.time()
92 if opt.name and not valid_save_name(opt.name):
93 o.fatal("'%s' is not a valid branch name." % opt.name)
94 refname = opt.name and 'refs/heads/%s' % opt.name or None
95 if opt.noop or opt.copy:
96 cli = pack_writer = oldref = None
97 elif opt.remote or is_reverse:
98 cli = client.Client(opt.remote)
99 oldref = refname and cli.read_ref(refname) or None
100 pack_writer = cli.new_packwriter(compression_level=opt.compress)
103 oldref = refname and git.read_ref(refname) or None
104 pack_writer = git.PackWriter(compression_level=opt.compress)
107 # the input is actually a series of git object ids that we should retrieve
110 # This is a bit messy, but basically it converts from a series of
111 # CatPipe.get() iterators into a series of file-type objects.
112 # It would be less ugly if either CatPipe.get() returned a file-like object
113 # (not very efficient), or split_to_shalist() expected an iterator instead
117 def __init__(self, it):
119 def read(self, size):
120 v = next(self.it, None)
124 line = sys.stdin.readline()
130 it = cp.get(line.strip())
131 next(it, None) # skip the file type
132 except KeyError as e:
133 add_error('error: %s' % e)
138 # the input either comes from a series of files or from stdin.
139 files = extra and (open(fn) for fn in extra) or [sys.stdin]
141 if pack_writer and opt.blobs:
142 shalist = hashsplit.split_to_blobs(pack_writer.new_blob, files,
143 keep_boundaries=opt.keep_boundaries,
145 for (sha, size, level) in shalist:
146 print sha.encode('hex')
148 elif pack_writer: # tree or commit or name
149 if opt.name: # insert dummy_name which may be used as a restore target
151 hashsplit.split_to_blob_or_tree(pack_writer.new_blob,
152 pack_writer.new_tree,
154 keep_boundaries=opt.keep_boundaries,
156 splitfile_name = git.mangle_name('data', hashsplit.GIT_MODE_FILE, mode)
157 shalist = [(mode, splitfile_name, sha)]
159 shalist = hashsplit.split_to_shalist(
160 pack_writer.new_blob, pack_writer.new_tree, files,
161 keep_boundaries=opt.keep_boundaries, progress=prog)
162 tree = pack_writer.new_tree(shalist)
165 it = hashsplit.hashsplit_iter(files,
166 keep_boundaries=opt.keep_boundaries,
168 for (blob, level) in it:
169 hashsplit.total_split += len(blob)
171 sys.stdout.write(str(blob))
172 megs = hashsplit.total_split/1024/1024
173 if not opt.quiet and last != megs:
179 print tree.encode('hex')
180 if opt.commit or opt.name:
181 msg = 'bup split\n\nGenerated by command:\n%r\n' % sys.argv
182 ref = opt.name and ('refs/heads/%s' % opt.name) or None
183 userline = '%s <%s@%s>' % (userfullname(), username(), hostname())
184 commit = pack_writer.new_commit(tree, oldref, userline, date, None,
185 userline, date, None, msg)
187 print commit.encode('hex')
190 pack_writer.close() # must close before we can update the ref
194 cli.update_ref(refname, commit, oldref)
196 git.update_ref(refname, commit, oldref)
201 secs = time.time() - start_time
202 size = hashsplit.total_split
204 log('bup: %.2fkbytes in %.2f secs = %.2f kbytes/sec\n'
205 % (size/1024., secs, size/1024./secs))
208 log('WARNING: %d errors encountered while saving.\n' % len(saved_errors))