3 from bup import hashsplit, git, options, client
4 from bup.helpers import *
8 bup split [-t] [-c] [-n name] OPTIONS [--git-ids | filenames...]
9 bup split -b OPTIONS [--git-ids | filenames...]
10 bup split <--noop [--copy]|--copy> OPTIONS [--git-ids | filenames...]
13 b,blobs output a series of blob ids. Implies --fanout=0.
14 t,tree output a tree id
15 c,commit output a commit id
16 n,name= save the result under the given name
17 noop split the input, but throw away the result
18 copy split the input, copy it to stdout, don't save to repo
20 r,remote= remote repository path
21 d,date= date for the commit (seconds since the epoch)
22 q,quiet don't print progress messages
23 v,verbose increase log output (can be used more than once)
24 git-ids read a list of git object ids from stdin and split their contents
25 keep-boundaries don't let one chunk span two input files
26 bench print benchmark timings to stderr
27 max-pack-size= maximum bytes in a single pack
28 max-pack-objects= maximum number of objects in a single pack
29 fanout= average number of blobs in a single tree
30 bwlimit= maximum bytes/sec to transmit to server
31 #,compress= set compression level to # (0-9, 9 is highest) [1]
33 o = options.Options(optspec)
34 (opt, flags, extra) = o.parse(sys.argv[1:])
37 git.check_repo_or_die()
38 if not (opt.blobs or opt.tree or opt.commit or opt.name or
39 opt.noop or opt.copy):
40 o.fatal("use one or more of -b, -t, -c, -n, --noop, --copy")
41 if (opt.noop or opt.copy) and (opt.blobs or opt.tree or
42 opt.commit or opt.name):
43 o.fatal('--noop and --copy are incompatible with -b, -t, -c, -n')
44 if opt.blobs and (opt.tree or opt.commit or opt.name):
45 o.fatal('-b is incompatible with -t, -c, -n')
46 if extra and opt.git_ids:
47 o.fatal("don't provide filenames when using --git-ids")
50 git.verbose = opt.verbose - 1
53 git.max_pack_size = parse_num(opt.max_pack_size)
54 if opt.max_pack_objects:
55 git.max_pack_objects = parse_num(opt.max_pack_objects)
57 hashsplit.fanout = parse_num(opt.fanout)
61 client.bwlimit = parse_num(opt.bwlimit)
63 date = parse_date_or_fatal(opt.date, o.fatal)
68 def prog(filenum, nbytes):
72 qprogress('Splitting: file #%d, %d kbytes\r'
73 % (filenum+1, total_bytes/1024))
75 qprogress('Splitting: %d kbytes\r' % (total_bytes/1024))
78 is_reverse = os.environ.get('BUP_SERVER_REVERSE')
79 if is_reverse and opt.remote:
80 o.fatal("don't use -r in reverse mode; it's automatic")
81 start_time = time.time()
83 if opt.name and opt.name.startswith('.'):
84 o.fatal("'%s' is not a valid branch name." % opt.name)
85 refname = opt.name and 'refs/heads/%s' % opt.name or None
86 if opt.noop or opt.copy:
87 cli = pack_writer = oldref = None
88 elif opt.remote or is_reverse:
89 cli = client.Client(opt.remote)
90 oldref = refname and cli.read_ref(refname) or None
91 pack_writer = cli.new_packwriter(compression_level=opt.compress)
94 oldref = refname and git.read_ref(refname) or None
95 pack_writer = git.PackWriter(compression_level=opt.compress)
98 # the input is actually a series of git object ids that we should retrieve
101 # This is a bit messy, but basically it converts from a series of
102 # CatPipe.get() iterators into a series of file-type objects.
103 # It would be less ugly if either CatPipe.get() returned a file-like object
104 # (not very efficient), or split_to_shalist() expected an iterator instead
108 def __init__(self, it):
110 def read(self, size):
115 line = sys.stdin.readline()
121 it = cp.get(line.strip())
122 next(it) # skip the file type
124 add_error('error: %s' % e)
129 # the input either comes from a series of files or from stdin.
130 files = extra and (open(fn) for fn in extra) or [sys.stdin]
132 if pack_writer and opt.blobs:
133 shalist = hashsplit.split_to_blobs(pack_writer.new_blob, files,
134 keep_boundaries=opt.keep_boundaries,
136 for (sha, size, level) in shalist:
137 print sha.encode('hex')
139 elif pack_writer: # tree or commit or name
140 shalist = hashsplit.split_to_shalist(pack_writer.new_blob,
141 pack_writer.new_tree,
143 keep_boundaries=opt.keep_boundaries,
145 tree = pack_writer.new_tree(shalist)
148 it = hashsplit.hashsplit_iter(files,
149 keep_boundaries=opt.keep_boundaries,
151 for (blob, level) in it:
152 hashsplit.total_split += len(blob)
154 sys.stdout.write(str(blob))
155 megs = hashsplit.total_split/1024/1024
156 if not opt.quiet and last != megs:
162 print tree.encode('hex')
163 if opt.commit or opt.name:
164 msg = 'bup split\n\nGenerated by command:\n%r\n' % sys.argv
165 ref = opt.name and ('refs/heads/%s' % opt.name) or None
166 commit = pack_writer.new_commit(oldref, tree, date, msg)
168 print commit.encode('hex')
171 pack_writer.close() # must close before we can update the ref
175 cli.update_ref(refname, commit, oldref)
177 git.update_ref(refname, commit, oldref)
182 secs = time.time() - start_time
183 size = hashsplit.total_split
185 log('bup: %.2fkbytes in %.2f secs = %.2f kbytes/sec\n'
186 % (size/1024., secs, size/1024./secs))
189 log('WARNING: %d errors encountered while saving.\n' % len(saved_errors))