3 bup_python="$(dirname "$0")/bup-python" || exit $?
4 exec "$bup_python" "$0" ${1+"$@"}
10 from bup import hashsplit, git, options, client
11 from bup.helpers import (handle_ctrl_c, hostname, log, parse_num, qprogress,
12 reprogress, saved_errors, userfullname, username)
16 bup split [-t] [-c] [-n name] OPTIONS [--git-ids | filenames...]
17 bup split -b OPTIONS [--git-ids | filenames...]
18 bup split <--noop [--copy]|--copy> OPTIONS [--git-ids | filenames...]
21 b,blobs output a series of blob ids. Implies --fanout=0.
22 t,tree output a tree id
23 c,commit output a commit id
24 n,name= save the result under the given name
25 noop split the input, but throw away the result
26 copy split the input, copy it to stdout, don't save to repo
28 r,remote= remote repository path
29 d,date= date for the commit (seconds since the epoch)
30 q,quiet don't print progress messages
31 v,verbose increase log output (can be used more than once)
32 git-ids read a list of git object ids from stdin and split their contents
33 keep-boundaries don't let one chunk span two input files
34 bench print benchmark timings to stderr
35 max-pack-size= maximum bytes in a single pack
36 max-pack-objects= maximum number of objects in a single pack
37 fanout= average number of blobs in a single tree
38 bwlimit= maximum bytes/sec to transmit to server
39 #,compress= set compression level to # (0-9, 9 is highest) [1]
41 o = options.Options(optspec)
42 (opt, flags, extra) = o.parse(sys.argv[1:])
45 git.check_repo_or_die()
46 if not (opt.blobs or opt.tree or opt.commit or opt.name or
47 opt.noop or opt.copy):
48 o.fatal("use one or more of -b, -t, -c, -n, --noop, --copy")
49 if (opt.noop or opt.copy) and (opt.blobs or opt.tree or
50 opt.commit or opt.name):
51 o.fatal('--noop and --copy are incompatible with -b, -t, -c, -n')
52 if opt.blobs and (opt.tree or opt.commit or opt.name):
53 o.fatal('-b is incompatible with -t, -c, -n')
54 if extra and opt.git_ids:
55 o.fatal("don't provide filenames when using --git-ids")
58 git.verbose = opt.verbose - 1
61 git.max_pack_size = parse_num(opt.max_pack_size)
62 if opt.max_pack_objects:
63 git.max_pack_objects = parse_num(opt.max_pack_objects)
65 hashsplit.fanout = parse_num(opt.fanout)
69 client.bwlimit = parse_num(opt.bwlimit)
71 date = parse_date_or_fatal(opt.date, o.fatal)
76 def prog(filenum, nbytes):
80 qprogress('Splitting: file #%d, %d kbytes\r'
81 % (filenum+1, total_bytes/1024))
83 qprogress('Splitting: %d kbytes\r' % (total_bytes/1024))
86 is_reverse = os.environ.get('BUP_SERVER_REVERSE')
87 if is_reverse and opt.remote:
88 o.fatal("don't use -r in reverse mode; it's automatic")
89 start_time = time.time()
91 if opt.name and opt.name.startswith('.'):
92 o.fatal("'%s' is not a valid branch name." % opt.name)
93 refname = opt.name and 'refs/heads/%s' % opt.name or None
94 if opt.noop or opt.copy:
95 cli = pack_writer = oldref = None
96 elif opt.remote or is_reverse:
97 cli = client.Client(opt.remote)
98 oldref = refname and cli.read_ref(refname) or None
99 pack_writer = cli.new_packwriter(compression_level=opt.compress)
102 oldref = refname and git.read_ref(refname) or None
103 pack_writer = git.PackWriter(compression_level=opt.compress)
106 # the input is actually a series of git object ids that we should retrieve
109 # This is a bit messy, but basically it converts from a series of
110 # CatPipe.get() iterators into a series of file-type objects.
111 # It would be less ugly if either CatPipe.get() returned a file-like object
112 # (not very efficient), or split_to_shalist() expected an iterator instead
116 def __init__(self, it):
118 def read(self, size):
119 v = next(self.it, None)
123 line = sys.stdin.readline()
129 it = cp.get(line.strip())
130 next(it, None) # skip the file type
131 except KeyError as e:
132 add_error('error: %s' % e)
137 # the input either comes from a series of files or from stdin.
138 files = extra and (open(fn) for fn in extra) or [sys.stdin]
140 if pack_writer and opt.blobs:
141 shalist = hashsplit.split_to_blobs(pack_writer.new_blob, files,
142 keep_boundaries=opt.keep_boundaries,
144 for (sha, size, level) in shalist:
145 print sha.encode('hex')
147 elif pack_writer: # tree or commit or name
148 if opt.name: # insert dummy_name which may be used as a restore target
150 hashsplit.split_to_blob_or_tree(pack_writer.new_blob,
151 pack_writer.new_tree,
153 keep_boundaries=opt.keep_boundaries,
155 splitfile_name = git.mangle_name('data', hashsplit.GIT_MODE_FILE, mode)
156 shalist = [(mode, splitfile_name, sha)]
158 shalist = hashsplit.split_to_shalist(
159 pack_writer.new_blob, pack_writer.new_tree, files,
160 keep_boundaries=opt.keep_boundaries, progress=prog)
161 tree = pack_writer.new_tree(shalist)
164 it = hashsplit.hashsplit_iter(files,
165 keep_boundaries=opt.keep_boundaries,
167 for (blob, level) in it:
168 hashsplit.total_split += len(blob)
170 sys.stdout.write(str(blob))
171 megs = hashsplit.total_split/1024/1024
172 if not opt.quiet and last != megs:
178 print tree.encode('hex')
179 if opt.commit or opt.name:
180 msg = 'bup split\n\nGenerated by command:\n%r\n' % sys.argv
181 ref = opt.name and ('refs/heads/%s' % opt.name) or None
182 userline = '%s <%s@%s>' % (userfullname(), username(), hostname())
183 commit = pack_writer.new_commit(tree, oldref, userline, date, None,
184 userline, date, None, msg)
186 print commit.encode('hex')
189 pack_writer.close() # must close before we can update the ref
193 cli.update_ref(refname, commit, oldref)
195 git.update_ref(refname, commit, oldref)
200 secs = time.time() - start_time
201 size = hashsplit.total_split
203 log('bup: %.2fkbytes in %.2f secs = %.2f kbytes/sec\n'
204 % (size/1024., secs, size/1024./secs))
207 log('WARNING: %d errors encountered while saving.\n' % len(saved_errors))