3 from bup import hashsplit, git, options, client
4 from bup.helpers import *
8 bup split [-tcb] [-n name] [--bench] [filenames...]
10 r,remote= remote repository path
11 b,blobs output a series of blob ids
12 t,tree output a tree id
13 c,commit output a commit id
14 n,name= name of backup set to update (if any)
15 d,date= date for the commit (seconds since the epoch)
16 q,quiet don't print progress messages
17 v,verbose increase log output (can be used more than once)
18 git-ids read a list of git object ids from stdin and split their contents
19 keep-boundaries don't let one chunk span two input files
20 noop don't actually save the data anywhere
21 copy just copy input to output, hashsplitting along the way
22 bench print benchmark timings to stderr
23 max-pack-size= maximum bytes in a single pack
24 max-pack-objects= maximum number of objects in a single pack
25 fanout= maximum number of blobs in a single tree
26 bwlimit= maximum bytes/sec to transmit to server
28 o = options.Options('bup split', optspec)
29 (opt, flags, extra) = o.parse(sys.argv[1:])
32 git.check_repo_or_die()
33 if not (opt.blobs or opt.tree or opt.commit or opt.name or
34 opt.noop or opt.copy):
35 o.fatal("use one or more of -b, -t, -c, -n, -N, --copy")
36 if (opt.noop or opt.copy) and (opt.blobs or opt.tree or
37 opt.commit or opt.name):
38 o.fatal('-N and --copy are incompatible with -b, -t, -c, -n')
39 if extra and opt.git_ids:
40 o.fatal("don't provide filenames when using --git-ids")
43 git.verbose = opt.verbose - 1
46 hashsplit.max_pack_size = parse_num(opt.max_pack_size)
47 if opt.max_pack_objects:
48 hashsplit.max_pack_objects = parse_num(opt.max_pack_objects)
50 hashsplit.fanout = parse_num(opt.fanout)
54 client.bwlimit = parse_num(opt.bwlimit)
56 date = parse_date_or_fatal(opt.date, o.fatal)
61 last_prog = total_bytes = 0
62 def prog(filenum, nbytes):
63 global last_prog, total_bytes
66 if now - last_prog < 0.2:
69 progress('Splitting: file #%d, %d kbytes\r'
70 % (filenum+1, total_bytes/1024))
72 progress('Splitting: %d kbytes\r' % (total_bytes/1024))
76 is_reverse = os.environ.get('BUP_SERVER_REVERSE')
77 if is_reverse and opt.remote:
78 o.fatal("don't use -r in reverse mode; it's automatic")
79 start_time = time.time()
81 refname = opt.name and 'refs/heads/%s' % opt.name or None
82 if opt.noop or opt.copy:
83 cli = pack_writer = oldref = None
84 elif opt.remote or is_reverse:
85 if opt.remote and opt.remote.find(":") == -1:
86 o.fatal("--remote argument must contain a colon")
88 cli = client.Client(opt.remote)
89 except client.ClientError:
90 o.fatal("server exited unexpectedly; see errors above")
91 oldref = refname and cli.read_ref(refname) or None
92 pack_writer = cli.new_packwriter()
95 oldref = refname and git.read_ref(refname) or None
96 pack_writer = git.PackWriter()
99 # the input is actually a series of git object ids that we should retrieve
102 # This is a bit messy, but basically it converts from a series of
103 # CatPipe.get() iterators into a series of file-type objects.
104 # It would be less ugly if either CatPipe.get() returned a file-like object
105 # (not very efficient), or split_to_shalist() expected an iterator instead
109 def __init__(self, it):
111 def read(self, size):
116 line = sys.stdin.readline()
122 it = cp.get(line.strip())
123 next(it) # skip the file type
125 add_error('error: %s' % e)
130 # the input either comes from a series of files or from stdin.
131 files = extra and (open(fn) for fn in extra) or [sys.stdin]
134 shalist = hashsplit.split_to_shalist(pack_writer, files,
135 keep_boundaries=opt.keep_boundaries,
137 tree = pack_writer.new_tree(shalist)
140 for (blob, bits) in hashsplit.hashsplit_iter(files,
141 keep_boundaries=opt.keep_boundaries,
143 hashsplit.total_split += len(blob)
145 sys.stdout.write(str(blob))
146 megs = hashsplit.total_split/1024/1024
147 if not opt.quiet and last != megs:
148 progress('%d Mbytes read\r' % megs)
150 progress('%d Mbytes read, done.\n' % megs)
155 for (mode,name,bin) in shalist:
156 print bin.encode('hex')
158 print tree.encode('hex')
159 if opt.commit or opt.name:
160 msg = 'bup split\n\nGenerated by command:\n%r' % sys.argv
161 ref = opt.name and ('refs/heads/%s' % opt.name) or None
162 commit = pack_writer.new_commit(oldref, tree, date, msg)
164 print commit.encode('hex')
167 pack_writer.close() # must close before we can update the ref
171 cli.update_ref(refname, commit, oldref)
173 git.update_ref(refname, commit, oldref)
178 secs = time.time() - start_time
179 size = hashsplit.total_split
181 log('\nbup: %.2fkbytes in %.2f secs = %.2f kbytes/sec\n'
182 % (size/1024., secs, size/1024./secs))
185 log('WARNING: %d errors encountered while saving.\n' % len(saved_errors))