3 bup_python="$(dirname "$0")/bup-python" || exit $?
4 exec "$bup_python" "$0" ${1+"$@"}
8 from bup import hashsplit, git, options, client
9 from bup.helpers import *
13 bup split [-t] [-c] [-n name] OPTIONS [--git-ids | filenames...]
14 bup split -b OPTIONS [--git-ids | filenames...]
15 bup split <--noop [--copy]|--copy> OPTIONS [--git-ids | filenames...]
18 b,blobs output a series of blob ids. Implies --fanout=0.
19 t,tree output a tree id
20 c,commit output a commit id
21 n,name= save the result under the given name
22 noop split the input, but throw away the result
23 copy split the input, copy it to stdout, don't save to repo
25 r,remote= remote repository path
26 d,date= date for the commit (seconds since the epoch)
27 q,quiet don't print progress messages
28 v,verbose increase log output (can be used more than once)
29 git-ids read a list of git object ids from stdin and split their contents
30 keep-boundaries don't let one chunk span two input files
31 bench print benchmark timings to stderr
32 max-pack-size= maximum bytes in a single pack
33 max-pack-objects= maximum number of objects in a single pack
34 fanout= average number of blobs in a single tree
35 bwlimit= maximum bytes/sec to transmit to server
36 #,compress= set compression level to # (0-9, 9 is highest) [1]
38 o = options.Options(optspec)
39 (opt, flags, extra) = o.parse(sys.argv[1:])
42 git.check_repo_or_die()
43 if not (opt.blobs or opt.tree or opt.commit or opt.name or
44 opt.noop or opt.copy):
45 o.fatal("use one or more of -b, -t, -c, -n, --noop, --copy")
46 if (opt.noop or opt.copy) and (opt.blobs or opt.tree or
47 opt.commit or opt.name):
48 o.fatal('--noop and --copy are incompatible with -b, -t, -c, -n')
49 if opt.blobs and (opt.tree or opt.commit or opt.name):
50 o.fatal('-b is incompatible with -t, -c, -n')
51 if extra and opt.git_ids:
52 o.fatal("don't provide filenames when using --git-ids")
55 git.verbose = opt.verbose - 1
58 git.max_pack_size = parse_num(opt.max_pack_size)
59 if opt.max_pack_objects:
60 git.max_pack_objects = parse_num(opt.max_pack_objects)
62 hashsplit.fanout = parse_num(opt.fanout)
66 client.bwlimit = parse_num(opt.bwlimit)
68 date = parse_date_or_fatal(opt.date, o.fatal)
73 def prog(filenum, nbytes):
77 qprogress('Splitting: file #%d, %d kbytes\r'
78 % (filenum+1, total_bytes/1024))
80 qprogress('Splitting: %d kbytes\r' % (total_bytes/1024))
83 is_reverse = os.environ.get('BUP_SERVER_REVERSE')
84 if is_reverse and opt.remote:
85 o.fatal("don't use -r in reverse mode; it's automatic")
86 start_time = time.time()
88 if opt.name and opt.name.startswith('.'):
89 o.fatal("'%s' is not a valid branch name." % opt.name)
90 refname = opt.name and 'refs/heads/%s' % opt.name or None
91 if opt.noop or opt.copy:
92 cli = pack_writer = oldref = None
93 elif opt.remote or is_reverse:
94 cli = client.Client(opt.remote)
95 oldref = refname and cli.read_ref(refname) or None
96 pack_writer = cli.new_packwriter(compression_level=opt.compress)
99 oldref = refname and git.read_ref(refname) or None
100 pack_writer = git.PackWriter(compression_level=opt.compress)
103 # the input is actually a series of git object ids that we should retrieve
106 # This is a bit messy, but basically it converts from a series of
107 # CatPipe.get() iterators into a series of file-type objects.
108 # It would be less ugly if either CatPipe.get() returned a file-like object
109 # (not very efficient), or split_to_shalist() expected an iterator instead
113 def __init__(self, it):
115 def read(self, size):
116 v = next(self.it, None)
120 line = sys.stdin.readline()
126 it = cp.get(line.strip())
127 next(it, None) # skip the file type
129 add_error('error: %s' % e)
134 # the input either comes from a series of files or from stdin.
135 files = extra and (open(fn) for fn in extra) or [sys.stdin]
137 if pack_writer and opt.blobs:
138 shalist = hashsplit.split_to_blobs(pack_writer.new_blob, files,
139 keep_boundaries=opt.keep_boundaries,
141 for (sha, size, level) in shalist:
142 print sha.encode('hex')
144 elif pack_writer: # tree or commit or name
145 if opt.name: # insert dummy_name which may be used as a restore target
147 hashsplit.split_to_blob_or_tree(pack_writer.new_blob,
148 pack_writer.new_tree,
150 keep_boundaries=opt.keep_boundaries,
152 splitfile_name = git.mangle_name('data', hashsplit.GIT_MODE_FILE, mode)
153 shalist = [(mode, splitfile_name, sha)]
155 shalist = hashsplit.split_to_shalist(
156 pack_writer.new_blob, pack_writer.new_tree, files,
157 keep_boundaries=opt.keep_boundaries, progress=prog)
158 tree = pack_writer.new_tree(shalist)
161 it = hashsplit.hashsplit_iter(files,
162 keep_boundaries=opt.keep_boundaries,
164 for (blob, level) in it:
165 hashsplit.total_split += len(blob)
167 sys.stdout.write(str(blob))
168 megs = hashsplit.total_split/1024/1024
169 if not opt.quiet and last != megs:
175 print tree.encode('hex')
176 if opt.commit or opt.name:
177 msg = 'bup split\n\nGenerated by command:\n%r\n' % sys.argv
178 ref = opt.name and ('refs/heads/%s' % opt.name) or None
179 commit = pack_writer.new_commit(oldref, tree, date, msg)
181 print commit.encode('hex')
184 pack_writer.close() # must close before we can update the ref
188 cli.update_ref(refname, commit, oldref)
190 git.update_ref(refname, commit, oldref)
195 secs = time.time() - start_time
196 size = hashsplit.total_split
198 log('bup: %.2fkbytes in %.2f secs = %.2f kbytes/sec\n'
199 % (size/1024., secs, size/1024./secs))
202 log('WARNING: %d errors encountered while saving.\n' % len(saved_errors))