#!/usr/bin/env python from bup import options, drecurse from bup.helpers import * optspec = """ bup drecurse -- x,xdev,one-file-system don't cross filesystem boundaries q,quiet don't actually print filenames profile run under the python profiler """ o = options.Options('bup drecurse', optspec) (opt, flags, extra) = o.parse(sys.argv[1:]) if len(extra) != 1: o.fatal("exactly one filename expected") it = drecurse.recursive_dirlist(extra, opt.xdev) if opt.profile: import cProfile def do_it(): for i in it: pass cProfile.run('do_it()') else: if opt.quiet: for i in it: pass else: for (name,st) in it: print name if saved_errors: log('WARNING: %d errors encountered.\n' % len(saved_errors)) sys.exit(1) #!/usr/bin/env python import sys, time, struct from bup import hashsplit, git, options, client from bup.helpers import * from subprocess import PIPE optspec = """ bup split [-tcb] [-n name] [--bench] [filenames...] -- r,remote= remote repository path b,blobs output a series of blob ids t,tree output a tree id c,commit output a commit id n,name= name of backup set to update (if any) N,noop don't actually save the data anywhere q,quiet don't print progress messages v,verbose increase log output (can be used more than once) copy just copy input to output, hashsplitting along the way bench print benchmark timings to stderr max-pack-size= maximum bytes in a single pack max-pack-objects= maximum number of objects in a single pack fanout= maximum number of blobs in a single tree """ o = options.Options('bup split', optspec) (opt, flags, extra) = o.parse(sys.argv[1:]) git.check_repo_or_die() if not (opt.blobs or opt.tree or opt.commit or opt.name or opt.noop or opt.copy): o.fatal("use one or more of -b, -t, -c, -n, -N, --copy") if (opt.noop or opt.copy) and (opt.blobs or opt.tree or opt.commit or opt.name): o.fatal('-N is incompatible with -b, -t, -c, -n') if opt.verbose >= 2: git.verbose = opt.verbose - 1 opt.bench = 1 if opt.max_pack_size: hashsplit.max_pack_size = parse_num(opt.max_pack_size) if opt.max_pack_objects: hashsplit.max_pack_objects = parse_num(opt.max_pack_objects) if opt.fanout: hashsplit.fanout = parse_num(opt.fanout) if opt.blobs: hashsplit.fanout = 0 is_reverse = os.environ.get('BUP_SERVER_REVERSE') if is_reverse and opt.remote: o.fatal("don't use -r in reverse mode; it's automatic") start_time = time.time() refname = opt.name and 'refs/heads/%s' % opt.name or None if opt.noop or opt.copy: cli = w = oldref = None elif opt.remote or is_reverse: cli = client.Client(opt.remote) oldref = refname and cli.read_ref(refname) or None w = cli.new_packwriter() else: cli = None oldref = refname and git.read_ref(refname) or None w = git.PackWriter() files = extra and (open(fn) for fn in extra) or [sys.stdin] if w: shalist = hashsplit.split_to_shalist(w, files) tree = w.new_tree(shalist) else: last = 0 for (blob, bits) in hashsplit.hashsplit_iter(files): hashsplit.total_split += len(blob) if opt.copy: sys.stdout.write(str(blob)) megs = hashsplit.total_split/1024/1024 if not opt.quiet and last != megs: progress('%d Mbytes read\r' % megs) last = megs progress('%d Mbytes read, done.\n' % megs) if opt.verbose: log('\n') if opt.blobs: for (mode,name,bin) in shalist: print bin.encode('hex') if opt.tree: print tree.encode('hex') if opt.commit or opt.name: msg = 'bup split\n\nGenerated by command:\n%r' % sys.argv ref = opt.name and ('refs/heads/%s' % opt.name) or None commit = w.new_commit(oldref, tree, msg) if opt.commit: print commit.encode('hex') if w: w.close() # must close before we can update the ref if opt.name: if cli: cli.update_ref(refname, commit, oldref) else: git.update_ref(refname, commit, oldref) if cli: cli.close() secs = time.time() - start_time size = hashsplit.total_split if opt.bench: log('\nbup: %.2fkbytes in %.2f secs = %.2f kbytes/sec\n' % (size/1024., secs, size/1024./secs)) #!/usr/bin/env python import sys, re, struct, mmap from bup import git, options from bup.helpers import * def s_from_bytes(bytes): clist = [chr(b) for b in bytes] return ''.join(clist) def report(count): fields = ['VmSize', 'VmRSS', 'VmData', 'VmStk'] d = {} for line in open('/proc/self/status').readlines(): l = re.split(r':\s*', line.strip(), 1) d[l[0]] = l[1] if count >= 0: e1 = count fields = [d[k] for k in fields] else: e1 = '' print ('%9s ' + ('%10s ' * len(fields))) % tuple([e1] + fields) sys.stdout.flush() optspec = """ bup memtest [-n elements] [-c cycles] -- n,number= number of objects per cycle c,cycles= number of cycles to run ignore-midx ignore .midx files, use only .idx files """ o = options.Options('bup memtest', optspec) (opt, flags, extra) = o.parse(sys.argv[1:]) if extra: o.fatal('no arguments expected') git.ignore_midx = opt.ignore_midx git.check_repo_or_die() m = git.PackIdxList(git.repo('objects/pack')) cycles = opt.cycles or 100 number = opt.number or 10000 report(-1) f = open('/dev/urandom') a = mmap.mmap(-1, 20) report(0) for c in xrange(cycles): for n in xrange(number): b = f.read(3) if 0: bytes = list(struct.unpack('!BBB', b)) + [0]*17 bytes[2] &= 0xf0 bin = struct.pack('!20s', s_from_bytes(bytes)) else: a[0:2] = b[0:2] a[2] = chr(ord(b[2]) & 0xf0) bin = str(a[0:20]) #print bin.encode('hex') m.exists(bin) report((c+1)*number) #!/usr/bin/env python import sys, os, stat from bup import options, git, vfs from bup.helpers import * def print_node(text, n): prefix = '' if opt.hash: prefix += "%s " % n.hash.encode('hex') if stat.S_ISDIR(n.mode): print '%s%s/' % (prefix, text) elif stat.S_ISLNK(n.mode): print '%s%s@' % (prefix, text) else: print '%s%s' % (prefix, text) optspec = """ bup ls -- s,hash show hash for each file """ o = options.Options('bup ls', optspec) (opt, flags, extra) = o.parse(sys.argv[1:]) git.check_repo_or_die() top = vfs.RefList(None) if not extra: extra = ['/'] ret = 0 for d in extra: try: n = top.lresolve(d) if stat.S_ISDIR(n.mode): for sub in n: print_node(sub.name, sub) else: print_node(d, n) except vfs.NodeError, e: log('error: %s\n' % e) ret = 1 sys.exit(ret) #!/usr/bin/env python import sys, os, re, stat, readline, fnmatch from bup import options, git, shquote, vfs from bup.helpers import * def node_name(text, n): if stat.S_ISDIR(n.mode): return '%s/' % text elif stat.S_ISLNK(n.mode): return '%s@' % text else: return '%s' % text def do_ls(path, n): l = [] if stat.S_ISDIR(n.mode): for sub in n: l.append(node_name(sub.name, sub)) else: l.append(node_name(path, n)) print columnate(l, '') def write_to_file(inf, outf): for blob in chunkyreader(inf): outf.write(blob) def inputiter(): if os.isatty(sys.stdin.fileno()): while 1: try: yield raw_input('bup> ') except EOFError: break else: for line in sys.stdin: yield line def _completer_get_subs(line): (qtype, lastword) = shquote.unfinished_word(line) (dir,name) = os.path.split(lastword) #log('\ncompleter: %r %r %r\n' % (qtype, lastword, text)) n = pwd.resolve(dir) subs = list(filter(lambda x: x.name.startswith(name), n.subs())) return (dir, name, qtype, lastword, subs) _last_line = None _last_res = None def completer(text, state): global _last_line global _last_res try: line = readline.get_line_buffer()[:readline.get_endidx()] if _last_line != line: _last_res = _completer_get_subs(line) _last_line = line (dir, name, qtype, lastword, subs) = _last_res if state < len(subs): sn = subs[state] sn1 = sn.resolve('') # deref symlinks fullname = os.path.join(dir, sn.name) if stat.S_ISDIR(sn1.mode): ret = shquote.what_to_add(qtype, lastword, fullname+'/', terminate=False) else: ret = shquote.what_to_add(qtype, lastword, fullname, terminate=True) + ' ' return text + ret except Exception, e: log('\nerror in completion: %s\n' % e) optspec = """ bup ftp """ o = options.Options('bup ftp', optspec) (opt, flags, extra) = o.parse(sys.argv[1:]) git.check_repo_or_die() top = vfs.RefList(None) pwd = top if extra: lines = extra else: readline.set_completer_delims(' \t\n\r/') readline.set_completer(completer) readline.parse_and_bind("tab: complete") lines = inputiter() for line in lines: if not line.strip(): continue words = [word for (wordstart,word) in shquote.quotesplit(line)] cmd = words[0].lower() #log('execute: %r %r\n' % (cmd, parm)) try: if cmd == 'ls': for parm in (words[1:] or ['.']): do_ls(parm, pwd.resolve(parm)) elif cmd == 'cd': for parm in words[1:]: pwd = pwd.resolve(parm) elif cmd == 'pwd': print pwd.fullname() elif cmd == 'cat': for parm in words[1:]: write_to_file(pwd.resolve(parm).open(), sys.stdout) elif cmd == 'get': if len(words) not in [2,3]: raise Exception('Usage: get [localname]') rname = words[1] (dir,base) = os.path.split(rname) lname = len(words)>2 and words[2] or base inf = pwd.resolve(rname).open() log('Saving %r\n' % lname) write_to_file(inf, open(lname, 'wb')) elif cmd == 'mget': for parm in words[1:]: (dir,base) = os.path.split(parm) for n in pwd.resolve(dir).subs(): if fnmatch.fnmatch(n.name, base): try: log('Saving %r\n' % n.name) inf = n.open() outf = open(n.name, 'wb') write_to_file(inf, outf) outf.close() except Exception, e: log(' error: %s\n' % e) elif cmd == 'help' or cmd == '?': log('Commands: ls cd pwd cat get mget help quit\n') elif cmd == 'quit' or cmd == 'exit' or cmd == 'bye': break else: raise Exception('no such command %r' % cmd) except Exception, e: log('error: %s\n' % e) #raise #!/usr/bin/env python import sys, mmap from bup import options, _hashsplit from bup.helpers import * optspec = """ bup random [-S seed] -- S,seed= optional random number seed (default 1) f,force print random data to stdout even if it's a tty """ o = options.Options('bup random', optspec) (opt, flags, extra) = o.parse(sys.argv[1:]) if len(extra) != 1: o.fatal("exactly one argument expected") total = parse_num(extra[0]) if opt.force or (not os.isatty(1) and not atoi(os.environ.get('BUP_FORCE_TTY')) & 1): _hashsplit.write_random(sys.stdout.fileno(), total, opt.seed or 0) else: log('error: not writing binary data to a terminal. Use -f to force.\n') sys.exit(1) #!/usr/bin/env python import sys, os, glob from bup import options optspec = """ bup help """ o = options.Options('bup help', optspec) (opt, flags, extra) = o.parse(sys.argv[1:]) if len(extra) == 0: # the wrapper program provides the default usage string os.execvp(os.environ['BUP_MAIN_EXE'], ['bup']) elif len(extra) == 1: docname = (extra[0]=='bup' and 'bup' or ('bup-%s' % extra[0])) exe = sys.argv[0] (exepath, exefile) = os.path.split(exe) manpath = os.path.join(exepath, '../Documentation/' + docname + '.[1-9]') g = glob.glob(manpath) if g: os.execvp('man', ['man', '-l', g[0]]) else: os.execvp('man', ['man', docname]) else: o.fatal("exactly one command name expected") #!/usr/bin/env python import sys, os, stat, errno, fuse, re, time, tempfile from bup import options, git, vfs from bup.helpers import * class Stat(fuse.Stat): def __init__(self): self.st_mode = 0 self.st_ino = 0 self.st_dev = 0 self.st_nlink = 0 self.st_uid = 0 self.st_gid = 0 self.st_size = 0 self.st_atime = 0 self.st_mtime = 0 self.st_ctime = 0 self.st_blocks = 0 self.st_blksize = 0 self.st_rdev = 0 cache = {} def cache_get(top, path): parts = path.split('/') cache[('',)] = top c = None max = len(parts) #log('cache: %r\n' % cache.keys()) for i in range(max): pre = parts[:max-i] #log('cache trying: %r\n' % pre) c = cache.get(tuple(pre)) if c: rest = parts[max-i:] for r in rest: #log('resolving %r from %r\n' % (r, c.fullname())) c = c.lresolve(r) key = tuple(pre + [r]) #log('saving: %r\n' % (key,)) cache[key] = c break assert(c) return c class BupFs(fuse.Fuse): def __init__(self, top): fuse.Fuse.__init__(self) self.top = top def getattr(self, path): log('--getattr(%r)\n' % path) try: node = cache_get(self.top, path) st = Stat() st.st_mode = node.mode st.st_nlink = node.nlinks() st.st_size = node.size() st.st_mtime = node.mtime st.st_ctime = node.ctime st.st_atime = node.atime return st except vfs.NoSuchFile: return -errno.ENOENT def readdir(self, path, offset): log('--readdir(%r)\n' % path) node = cache_get(self.top, path) yield fuse.Direntry('.') yield fuse.Direntry('..') for sub in node.subs(): yield fuse.Direntry(sub.name) def readlink(self, path): log('--readlink(%r)\n' % path) node = cache_get(self.top, path) return node.readlink() def open(self, path, flags): log('--open(%r)\n' % path) node = cache_get(self.top, path) accmode = os.O_RDONLY | os.O_WRONLY | os.O_RDWR if (flags & accmode) != os.O_RDONLY: return -errno.EACCES node.open() def release(self, path, flags): log('--release(%r)\n' % path) def read(self, path, size, offset): log('--read(%r)\n' % path) n = cache_get(self.top, path) o = n.open() o.seek(offset) return o.read(size) if not hasattr(fuse, '__version__'): raise RuntimeError, "your fuse module is too old for fuse.__version__" fuse.fuse_python_api = (0, 2) optspec = """ bup fuse [-d] [-f] -- d,debug increase debug level f,foreground run in foreground """ o = options.Options('bup fuse', optspec) (opt, flags, extra) = o.parse(sys.argv[1:]) if len(extra) != 1: o.fatal("exactly one argument expected") git.check_repo_or_die() top = vfs.RefList(None) f = BupFs(top) f.fuse_args.mountpoint = extra[0] if opt.debug: f.fuse_args.add('debug') if opt.foreground: f.fuse_args.setmod('foreground') print f.multithreaded f.multithreaded = False f.main() #!/usr/bin/env python from bup import git, options, client from bup.helpers import * optspec = """ [BUP_DIR=...] bup init [-r host:path] -- r,remote= remote repository path """ o = options.Options('bup init', optspec) (opt, flags, extra) = o.parse(sys.argv[1:]) if extra: o.fatal("no arguments expected") if opt.remote: git.init_repo() # local repo git.check_repo_or_die() cli = client.Client(opt.remote, create=True) cli.close() else: git.init_repo() #!/usr/bin/env python import sys, math, struct, glob from bup import options, git from bup.helpers import * PAGE_SIZE=4096 SHA_PER_PAGE=PAGE_SIZE/200. def merge(idxlist, bits, table): count = 0 for e in git.idxmerge(idxlist): count += 1 prefix = git.extract_bits(e, bits) table[prefix] = count yield e def do_midx(outdir, outfilename, infilenames): if not outfilename: assert(outdir) sum = Sha1('\0'.join(infilenames)).hexdigest() outfilename = '%s/midx-%s.midx' % (outdir, sum) inp = [] total = 0 for name in infilenames: ix = git.PackIdx(name) inp.append(ix) total += len(ix) log('Merging %d indexes (%d objects).\n' % (len(infilenames), total)) if (not opt.force and (total < 1024 and len(infilenames) < 3)) \ or (opt.force and not total): log('midx: nothing to do.\n') return pages = int(total/SHA_PER_PAGE) or 1 bits = int(math.ceil(math.log(pages, 2))) entries = 2**bits log('Table size: %d (%d bits)\n' % (entries*4, bits)) table = [0]*entries try: os.unlink(outfilename) except OSError: pass f = open(outfilename + '.tmp', 'w+') f.write('MIDX\0\0\0\2') f.write(struct.pack('!I', bits)) assert(f.tell() == 12) f.write('\0'*4*entries) for e in merge(inp, bits, table): f.write(e) f.write('\0'.join(os.path.basename(p) for p in infilenames)) f.seek(12) f.write(struct.pack('!%dI' % entries, *table)) f.close() os.rename(outfilename + '.tmp', outfilename) # this is just for testing if 0: p = git.PackMidx(outfilename) assert(len(p.idxnames) == len(infilenames)) print p.idxnames assert(len(p) == total) pi = iter(p) for i in merge(inp, total, bits, table): assert(i == pi.next()) assert(p.exists(i)) print outfilename optspec = """ bup midx [options...] -- o,output= output midx filename (default: auto-generated) a,auto automatically create .midx from any unindexed .idx files f,force automatically create .midx from *all* .idx files """ o = options.Options('bup midx', optspec) (opt, flags, extra) = o.parse(sys.argv[1:]) if extra and (opt.auto or opt.force): o.fatal("you can't use -f/-a and also provide filenames") git.check_repo_or_die() if extra: do_midx(git.repo('objects/pack'), opt.output, extra) elif opt.auto or opt.force: paths = [git.repo('objects/pack')] paths += glob.glob(git.repo('index-cache/*/.')) for path in paths: log('midx: scanning %s\n' % path) if opt.force: do_midx(path, opt.output, glob.glob('%s/*.idx' % path)) elif opt.auto: m = git.PackIdxList(path) needed = {} for pack in m.packs: # only .idx files without a .midx are open if pack.name.endswith('.idx'): needed[pack.name] = 1 del m do_midx(path, opt.output, needed.keys()) log('\n') else: o.fatal("you must use -f or -a or provide input filenames") #!/usr/bin/env python import sys, os, random from bup import options from bup.helpers import * def randblock(n): l = [] for i in xrange(n): l.append(chr(random.randrange(0,256))) return ''.join(l) optspec = """ bup damage [-n count] [-s maxsize] [-S seed] -- WARNING: THIS COMMAND IS EXTREMELY DANGEROUS n,num= number of blocks to damage s,size= maximum size of each damaged block percent= maximum size of each damaged block (as a percent of entire file) equal spread damage evenly throughout the file S,seed= random number seed (for repeatable tests) """ o = options.Options('bup damage', optspec) (opt, flags, extra) = o.parse(sys.argv[1:]) if not extra: o.fatal('filenames expected') if opt.seed != None: random.seed(opt.seed) for name in extra: log('Damaging "%s"...\n' % name) f = open(name, 'r+b') st = os.fstat(f.fileno()) size = st.st_size if opt.percent or opt.size: ms1 = int(float(opt.percent or 0)/100.0*size) or size ms2 = opt.size or size maxsize = min(ms1, ms2) else: maxsize = 1 chunks = opt.num or 10 chunksize = size/chunks for r in range(chunks): sz = random.randrange(1, maxsize+1) if sz > size: sz = size if opt.equal: ofs = r*chunksize else: ofs = random.randrange(0, size - sz + 1) log(' %6d bytes at %d\n' % (sz, ofs)) f.seek(ofs) f.write(randblock(sz)) f.close() #!/usr/bin/env python import sys, struct, mmap from bup import options, git from bup.helpers import * suspended_w = None def init_dir(conn, arg): git.init_repo(arg) log('bup server: bupdir initialized: %r\n' % git.repodir) conn.ok() def set_dir(conn, arg): git.check_repo_or_die(arg) log('bup server: bupdir is %r\n' % git.repodir) conn.ok() def list_indexes(conn, junk): git.check_repo_or_die() for f in os.listdir(git.repo('objects/pack')): if f.endswith('.idx'): conn.write('%s\n' % f) conn.ok() def send_index(conn, name): git.check_repo_or_die() assert(name.find('/') < 0) assert(name.endswith('.idx')) idx = git.PackIdx(git.repo('objects/pack/%s' % name)) conn.write(struct.pack('!I', len(idx.map))) conn.write(idx.map) conn.ok() def receive_objects(conn, junk): global suspended_w git.check_repo_or_die() suggested = {} if suspended_w: w = suspended_w suspended_w = None else: w = git.PackWriter() while 1: ns = conn.read(4) if not ns: w.abort() raise Exception('object read: expected length header, got EOF\n') n = struct.unpack('!I', ns)[0] #log('expecting %d bytes\n' % n) if not n: log('bup server: received %d object%s.\n' % (w.count, w.count!=1 and "s" or '')) fullpath = w.close() if fullpath: (dir, name) = os.path.split(fullpath) conn.write('%s.idx\n' % name) conn.ok() return elif n == 0xffffffff: log('bup server: receive-objects suspended.\n') suspended_w = w conn.ok() return buf = conn.read(n) # object sizes in bup are reasonably small #log('read %d bytes\n' % n) if len(buf) < n: w.abort() raise Exception('object read: expected %d bytes, got %d\n' % (n, len(buf))) (type, content) = git._decode_packobj(buf) sha = git.calc_hash(type, content) oldpack = w.exists(sha) # FIXME: we only suggest a single index per cycle, because the client # is currently dumb to download more than one per cycle anyway. # Actually we should fix the client, but this is a minor optimization # on the server side. if not suggested and \ oldpack and (oldpack == True or oldpack.endswith('.midx')): # FIXME: we shouldn't really have to know about midx files # at this layer. But exists() on a midx doesn't return the # packname (since it doesn't know)... probably we should just # fix that deficiency of midx files eventually, although it'll # make the files bigger. This method is certainly not very # efficient. w.objcache.refresh(skip_midx = True) oldpack = w.objcache.exists(sha) log('new suggestion: %r\n' % oldpack) assert(oldpack) assert(oldpack != True) assert(not oldpack.endswith('.midx')) w.objcache.refresh(skip_midx = False) if not suggested and oldpack: assert(oldpack.endswith('.idx')) (dir,name) = os.path.split(oldpack) if not (name in suggested): log("bup server: suggesting index %s\n" % name) conn.write('index %s\n' % name) suggested[name] = 1 else: w._raw_write([buf]) # NOTREACHED def read_ref(conn, refname): git.check_repo_or_die() r = git.read_ref(refname) conn.write('%s\n' % (r or '').encode('hex')) conn.ok() def update_ref(conn, refname): git.check_repo_or_die() newval = conn.readline().strip() oldval = conn.readline().strip() git.update_ref(refname, newval.decode('hex'), oldval.decode('hex')) conn.ok() def cat(conn, id): git.check_repo_or_die() try: for blob in git.cat(id): conn.write(struct.pack('!I', len(blob))) conn.write(blob) except KeyError, e: log('server: error: %s\n' % e) conn.write('\0\0\0\0') conn.error(e) else: conn.write('\0\0\0\0') conn.ok() optspec = """ bup server """ o = options.Options('bup server', optspec) (opt, flags, extra) = o.parse(sys.argv[1:]) if extra: o.fatal('no arguments expected') log('bup server: reading from stdin.\n') commands = { 'init-dir': init_dir, 'set-dir': set_dir, 'list-indexes': list_indexes, 'send-index': send_index, 'receive-objects': receive_objects, 'read-ref': read_ref, 'update-ref': update_ref, 'cat': cat, } # FIXME: this protocol is totally lame and not at all future-proof. # (Especially since we abort completely as soon as *anything* bad happens) conn = Conn(sys.stdin, sys.stdout) lr = linereader(conn) for _line in lr: line = _line.strip() if not line: continue log('bup server: command: %r\n' % line) words = line.split(' ', 1) cmd = words[0] rest = len(words)>1 and words[1] or '' if cmd == 'quit': break else: cmd = commands.get(cmd) if cmd: cmd(conn, rest) else: raise Exception('unknown server command: %r\n' % line) log('bup server: done\n') #!/usr/bin/env python import sys, time, struct from bup import hashsplit, git, options, client from bup.helpers import * from subprocess import PIPE optspec = """ bup join [-r host:path] [refs or hashes...] -- r,remote= remote repository path """ o = options.Options('bup join', optspec) (opt, flags, extra) = o.parse(sys.argv[1:]) git.check_repo_or_die() if not extra: extra = linereader(sys.stdin) ret = 0 if opt.remote: cli = client.Client(opt.remote) cat = cli.cat else: cp = git.CatPipe() cat = cp.join for id in extra: try: for blob in cat(id): sys.stdout.write(blob) except KeyError, e: sys.stdout.flush() log('error: %s\n' % e) ret = 1 sys.exit(ret) #!/usr/bin/env python import sys, re, errno, stat, time, math from bup import hashsplit, git, options, index, client from bup.helpers import * optspec = """ bup save [-tc] [-n name] -- r,remote= remote repository path t,tree output a tree id c,commit output a commit id n,name= name of backup set to update (if any) v,verbose increase log output (can be used more than once) q,quiet don't show progress meter smaller= only back up files smaller than n bytes """ o = options.Options('bup save', optspec) (opt, flags, extra) = o.parse(sys.argv[1:]) git.check_repo_or_die() if not (opt.tree or opt.commit or opt.name): o.fatal("use one or more of -t, -c, -n") if not extra: o.fatal("no filenames given") opt.progress = (istty and not opt.quiet) opt.smaller = parse_num(opt.smaller or 0) is_reverse = os.environ.get('BUP_SERVER_REVERSE') if is_reverse and opt.remote: o.fatal("don't use -r in reverse mode; it's automatic") refname = opt.name and 'refs/heads/%s' % opt.name or None if opt.remote or is_reverse: cli = client.Client(opt.remote) oldref = refname and cli.read_ref(refname) or None w = cli.new_packwriter() else: cli = None oldref = refname and git.read_ref(refname) or None w = git.PackWriter() handle_ctrl_c() def eatslash(dir): if dir.endswith('/'): return dir[:-1] else: return dir parts = [''] shalists = [[]] def _push(part): assert(part) parts.append(part) shalists.append([]) def _pop(force_tree): assert(len(parts) >= 1) part = parts.pop() shalist = shalists.pop() tree = force_tree or w.new_tree(shalist) if shalists: shalists[-1].append(('40000', part, tree)) else: # this was the toplevel, so put it back for sanity shalists.append(shalist) return tree lastremain = None def progress_report(n): global count, subcount, lastremain subcount += n cc = count + subcount pct = total and (cc*100.0/total) or 0 now = time.time() elapsed = now - tstart kps = elapsed and int(cc/1024./elapsed) kps_frac = 10 ** int(math.log(kps+1, 10) - 1) kps = int(kps/kps_frac)*kps_frac if cc: remain = elapsed*1.0/cc * (total-cc) else: remain = 0.0 if (lastremain and (remain > lastremain) and ((remain - lastremain)/lastremain < 0.05)): remain = lastremain else: lastremain = remain hours = int(remain/60/60) mins = int(remain/60 - hours*60) secs = int(remain - hours*60*60 - mins*60) if elapsed < 30: remainstr = '' kpsstr = '' else: kpsstr = '%dk/s' % kps if hours: remainstr = '%dh%dm' % (hours, mins) elif mins: remainstr = '%dm%d' % (mins, secs) else: remainstr = '%ds' % secs progress('Saving: %.2f%% (%d/%dk, %d/%d files) %s %s\r' % (pct, cc/1024, total/1024, fcount, ftotal, remainstr, kpsstr)) r = index.Reader(git.repo('bupindex')) def already_saved(ent): return ent.is_valid() and w.exists(ent.sha) and ent.sha def wantrecurse_pre(ent): return not already_saved(ent) def wantrecurse_during(ent): return not already_saved(ent) or ent.sha_missing() total = ftotal = 0 if opt.progress: for (transname,ent) in r.filter(extra, wantrecurse=wantrecurse_pre): if not (ftotal % 10024): progress('Reading index: %d\r' % ftotal) exists = ent.exists() hashvalid = already_saved(ent) ent.set_sha_missing(not hashvalid) if not opt.smaller or ent.size < opt.smaller: if exists and not hashvalid: total += ent.size ftotal += 1 progress('Reading index: %d, done.\n' % ftotal) hashsplit.progress_callback = progress_report tstart = time.time() count = subcount = fcount = 0 lastskip_name = None lastdir = '' for (transname,ent) in r.filter(extra, wantrecurse=wantrecurse_during): (dir, file) = os.path.split(ent.name) exists = (ent.flags & index.IX_EXISTS) hashvalid = already_saved(ent) wasmissing = ent.sha_missing() oldsize = ent.size if opt.verbose: if not exists: status = 'D' elif not hashvalid: if ent.sha == index.EMPTY_SHA: status = 'A' else: status = 'M' else: status = ' ' if opt.verbose >= 2: log('%s %-70s\n' % (status, ent.name)) elif not stat.S_ISDIR(ent.mode) and lastdir != dir: if not lastdir.startswith(dir): log('%s %-70s\n' % (status, os.path.join(dir, ''))) lastdir = dir if opt.progress: progress_report(0) fcount += 1 if not exists: continue if opt.smaller and ent.size >= opt.smaller: if exists and not hashvalid: add_error('skipping large file "%s"' % ent.name) lastskip_name = ent.name continue assert(dir.startswith('/')) dirp = dir.split('/') while parts > dirp: _pop(force_tree = None) if dir != '/': for part in dirp[len(parts):]: _push(part) if not file: # no filename portion means this is a subdir. But # sub/parentdirectories already handled in the pop/push() part above. oldtree = already_saved(ent) # may be None newtree = _pop(force_tree = oldtree) if not oldtree: if lastskip_name and lastskip_name.startswith(ent.name): ent.invalidate() else: ent.validate(040000, newtree) ent.repack() if exists and wasmissing: count += oldsize continue # it's not a directory id = None if hashvalid: mode = '%o' % ent.gitmode id = ent.sha shalists[-1].append((mode, git.mangle_name(file, ent.mode, ent.gitmode), id)) else: if stat.S_ISREG(ent.mode): try: f = hashsplit.open_noatime(ent.name) except IOError, e: add_error(e) lastskip_name = ent.name except OSError, e: add_error(e) lastskip_name = ent.name else: (mode, id) = hashsplit.split_to_blob_or_tree(w, [f]) else: if stat.S_ISDIR(ent.mode): assert(0) # handled above elif stat.S_ISLNK(ent.mode): try: rl = os.readlink(ent.name) except OSError, e: add_error(e) lastskip_name = ent.name except IOError, e: add_error(e) lastskip_name = ent.name else: (mode, id) = ('120000', w.new_blob(rl)) else: add_error(Exception('skipping special file "%s"' % ent.name)) lastskip_name = ent.name if id: ent.validate(int(mode, 8), id) ent.repack() shalists[-1].append((mode, git.mangle_name(file, ent.mode, ent.gitmode), id)) if exists and wasmissing: count += oldsize subcount = 0 if opt.progress: pct = total and count*100.0/total or 100 progress('Saving: %.2f%% (%d/%dk, %d/%d files), done. \n' % (pct, count/1024, total/1024, fcount, ftotal)) while len(parts) > 1: _pop(force_tree = None) assert(len(shalists) == 1) tree = w.new_tree(shalists[-1]) if opt.tree: print tree.encode('hex') if opt.commit or opt.name: msg = 'bup save\n\nGenerated by command:\n%r' % sys.argv ref = opt.name and ('refs/heads/%s' % opt.name) or None commit = w.new_commit(oldref, tree, msg) if opt.commit: print commit.encode('hex') w.close() # must close before we can update the ref if opt.name: if cli: cli.update_ref(refname, commit, oldref) else: git.update_ref(refname, commit, oldref) if cli: cli.close() if saved_errors: log('WARNING: %d errors encountered while saving.\n' % len(saved_errors)) sys.exit(1) #!/usr/bin/env python import sys, time from bup import options optspec = """ bup tick """ o = options.Options('bup tick', optspec) (opt, flags, extra) = o.parse(sys.argv[1:]) if extra: o.fatal("no arguments expected") t = time.time() tleft = 1 - (t - int(t)) time.sleep(tleft) #!/usr/bin/env python import os, sys, stat, time from bup import options, git, index, drecurse from bup.helpers import * def merge_indexes(out, r1, r2): for e in index.MergeIter([r1, r2]): # FIXME: shouldn't we remove deleted entries eventually? When? out.add_ixentry(e) class IterHelper: def __init__(self, l): self.i = iter(l) self.cur = None self.next() def next(self): try: self.cur = self.i.next() except StopIteration: self.cur = None return self.cur def check_index(reader): try: log('check: checking forward iteration...\n') e = None d = {} for e in reader.forward_iter(): if e.children_n: if opt.verbose: log('%08x+%-4d %r\n' % (e.children_ofs, e.children_n, e.name)) assert(e.children_ofs) assert(e.name.endswith('/')) assert(not d.get(e.children_ofs)) d[e.children_ofs] = 1 if e.flags & index.IX_HASHVALID: assert(e.sha != index.EMPTY_SHA) assert(e.gitmode) assert(not e or e.name == '/') # last entry is *always* / log('check: checking normal iteration...\n') last = None for e in reader: if last: assert(last > e.name) last = e.name except: log('index error! at %r\n' % e) raise log('check: passed.\n') def update_index(top): ri = index.Reader(indexfile) wi = index.Writer(indexfile) rig = IterHelper(ri.iter(name=top)) tstart = int(time.time()) hashgen = None if opt.fake_valid: def hashgen(name): return (0100644, index.FAKE_SHA) total = 0 for (path,pst) in drecurse.recursive_dirlist([top], xdev=opt.xdev): if opt.verbose>=2 or (opt.verbose==1 and stat.S_ISDIR(pst.st_mode)): sys.stdout.write('%s\n' % path) sys.stdout.flush() progress('Indexing: %d\r' % total) elif not (total % 128): progress('Indexing: %d\r' % total) total += 1 while rig.cur and rig.cur.name > path: # deleted paths if rig.cur.exists(): rig.cur.set_deleted() rig.cur.repack() rig.next() if rig.cur and rig.cur.name == path: # paths that already existed if pst: rig.cur.from_stat(pst, tstart) if not (rig.cur.flags & index.IX_HASHVALID): if hashgen: (rig.cur.gitmode, rig.cur.sha) = hashgen(path) rig.cur.flags |= index.IX_HASHVALID if opt.fake_invalid: rig.cur.invalidate() rig.cur.repack() rig.next() else: # new paths wi.add(path, pst, hashgen = hashgen) progress('Indexing: %d, done.\n' % total) if ri.exists(): ri.save() wi.flush() if wi.count: wr = wi.new_reader() if opt.check: log('check: before merging: oldfile\n') check_index(ri) log('check: before merging: newfile\n') check_index(wr) mi = index.Writer(indexfile) merge_indexes(mi, ri, wr) ri.close() mi.close() wr.close() wi.abort() else: wi.close() optspec = """ bup index <-p|m|u> [options...] -- p,print print the index entries for the given names (also works with -u) m,modified print only added/deleted/modified files (implies -p) s,status print each filename with a status char (A/M/D) (implies -p) H,hash print the hash for each object next to its name (implies -p) l,long print more information about each file u,update (recursively) update the index entries for the given filenames x,xdev,one-file-system don't cross filesystem boundaries fake-valid mark all index entries as up-to-date even if they aren't fake-invalid mark all index entries as invalid check carefully check index file integrity f,indexfile= the name of the index file (default 'index') v,verbose increase log output (can be used more than once) """ o = options.Options('bup index', optspec) (opt, flags, extra) = o.parse(sys.argv[1:]) if not (opt.modified or opt['print'] or opt.status or opt.update or opt.check): o.fatal('supply one or more of -p, -s, -m, -u, or --check') if (opt.fake_valid or opt.fake_invalid) and not opt.update: o.fatal('--fake-{in,}valid are meaningless without -u') if opt.fake_valid and opt.fake_invalid: o.fatal('--fake-valid is incompatible with --fake-invalid') git.check_repo_or_die() indexfile = opt.indexfile or git.repo('bupindex') handle_ctrl_c() if opt.check: log('check: starting initial check.\n') check_index(index.Reader(indexfile)) paths = index.reduce_paths(extra) if opt.update: if not paths: o.fatal('update (-u) requested but no paths given') for (rp,path) in paths: update_index(rp) if opt['print'] or opt.status or opt.modified: for (name, ent) in index.Reader(indexfile).filter(extra or ['']): if (opt.modified and (ent.is_valid() or ent.is_deleted() or not ent.mode)): continue line = '' if opt.status: if ent.is_deleted(): line += 'D ' elif not ent.is_valid(): if ent.sha == index.EMPTY_SHA: line += 'A ' else: line += 'M ' else: line += ' ' if opt.hash: line += ent.sha.encode('hex') + ' ' if opt.long: line += "%7s %7s " % (oct(ent.mode), oct(ent.gitmode)) print line + (name or './') if opt.check and (opt['print'] or opt.status or opt.modified or opt.update): log('check: starting final check.\n') check_index(index.Reader(indexfile)) if saved_errors: log('WARNING: %d errors encountered.\n' % len(saved_errors)) sys.exit(1) #!/usr/bin/env python import sys, os, struct from bup import options, helpers optspec = """ bup rbackup-server -- This command is not intended to be run manually. """ o = options.Options('bup rbackup-server', optspec) (opt, flags, extra) = o.parse(sys.argv[1:]) if extra: o.fatal('no arguments expected') # get the subcommand's argv. # Normally we could just pass this on the command line, but since we'll often # be getting called on the other end of an ssh pipe, which tends to mangle # argv (by sending it via the shell), this way is much safer. buf = sys.stdin.read(4) sz = struct.unpack('!I', buf)[0] assert(sz > 0) assert(sz < 1000000) buf = sys.stdin.read(sz) assert(len(buf) == sz) argv = buf.split('\0') # stdin/stdout are supposedly connected to 'bup server' that the caller # started for us (often on the other end of an ssh tunnel), so we don't want # to misuse them. Move them out of the way, then replace stdout with # a pointer to stderr in case our subcommand wants to do something with it. # # It might be nice to do the same with stdin, but my experiments showed that # ssh seems to make its child's stderr a readable-but-never-reads-anything # socket. They really should have used shutdown(SHUT_WR) on the other end # of it, but probably didn't. Anyway, it's too messy, so let's just make sure # anyone reading from stdin is disappointed. # # (You can't just leave stdin/stdout "not open" by closing the file # descriptors. Then the next file that opens is automatically assigned 0 or 1, # and people *trying* to read/write stdin/stdout get screwed.) os.dup2(0, 3) os.dup2(1, 4) os.dup2(2, 1) fd = os.open('/dev/null', os.O_RDONLY) os.dup2(fd, 0) os.close(fd) os.environ['BUP_SERVER_REVERSE'] = helpers.hostname() os.execvp(argv[0], argv) sys.exit(99) #!/usr/bin/env python import sys, os, glob, subprocess, time from bup import options, git from bup.helpers import * par2_ok = 0 nullf = open('/dev/null') def debug(s): if opt.verbose: log(s) def run(argv): # at least in python 2.5, using "stdout=2" or "stdout=sys.stderr" below # doesn't actually work, because subprocess closes fd #2 right before # execing for some reason. So we work around it by duplicating the fd # first. fd = os.dup(2) # copy stderr try: p = subprocess.Popen(argv, stdout=fd, close_fds=False) return p.wait() finally: os.close(fd) def par2_setup(): global par2_ok rv = 1 try: p = subprocess.Popen(['par2', '--help'], stdout=nullf, stderr=nullf, stdin=nullf) rv = p.wait() except OSError: log('fsck: warning: par2 not found; disabling recovery features.\n') else: par2_ok = 1 def parv(lvl): if opt.verbose >= lvl: if istty: return [] else: return ['-q'] else: return ['-qq'] def par2_generate(base): return run(['par2', 'create', '-n1', '-c200'] + parv(2) + ['--', base, base+'.pack', base+'.idx']) def par2_verify(base): return run(['par2', 'verify'] + parv(3) + ['--', base]) def par2_repair(base): return run(['par2', 'repair'] + parv(2) + ['--', base]) def quick_verify(base): f = open(base + '.pack', 'rb') f.seek(-20, 2) wantsum = f.read(20) assert(len(wantsum) == 20) f.seek(0) sum = Sha1() for b in chunkyreader(f, os.fstat(f.fileno()).st_size - 20): sum.update(b) if sum.digest() != wantsum: raise ValueError('expected %r, got %r' % (wantsum.encode('hex'), sum.hexdigest())) def git_verify(base): if opt.quick: try: quick_verify(base) except Exception, e: debug('error: %s\n' % e) return 1 return 0 else: return run(['git', 'verify-pack', '--', base]) def do_pack(base, last): code = 0 if par2_ok and par2_exists and (opt.repair or not opt.generate): vresult = par2_verify(base) if vresult != 0: if opt.repair: rresult = par2_repair(base) if rresult != 0: print '%s par2 repair: failed (%d)' % (last, rresult) code = rresult else: print '%s par2 repair: succeeded (0)' % last code = 100 else: print '%s par2 verify: failed (%d)' % (last, vresult) code = vresult else: print '%s ok' % last elif not opt.generate or (par2_ok and not par2_exists): gresult = git_verify(base) if gresult != 0: print '%s git verify: failed (%d)' % (last, gresult) code = gresult else: if par2_ok and opt.generate: presult = par2_generate(base) if presult != 0: print '%s par2 create: failed (%d)' % (last, presult) code = presult else: print '%s ok' % last else: print '%s ok' % last else: assert(opt.generate and (not par2_ok or par2_exists)) debug(' skipped: par2 file already generated.\n') return code optspec = """ bup fsck [options...] [filenames...] -- r,repair attempt to repair errors using par2 (dangerous!) g,generate generate auto-repair information using par2 v,verbose increase verbosity (can be used more than once) quick just check pack sha1sum, don't use git verify-pack j,jobs= run 'n' jobs in parallel par2-ok immediately return 0 if par2 is ok, 1 if not disable-par2 ignore par2 even if it is available """ o = options.Options('bup fsck', optspec) (opt, flags, extra) = o.parse(sys.argv[1:]) par2_setup() if opt.par2_ok: if par2_ok: sys.exit(0) # 'true' in sh else: sys.exit(1) if opt.disable_par2: par2_ok = 0 git.check_repo_or_die() if not extra: debug('fsck: No filenames given: checking all packs.\n') extra = glob.glob(git.repo('objects/pack/*.pack')) code = 0 count = 0 outstanding = {} for name in extra: if name.endswith('.pack'): base = name[:-5] elif name.endswith('.idx'): base = name[:-4] elif name.endswith('.par2'): base = name[:-5] elif os.path.exists(name + '.pack'): base = name else: raise Exception('%s is not a pack file!' % name) (dir,last) = os.path.split(base) par2_exists = os.path.exists(base + '.par2') if par2_exists and os.stat(base + '.par2').st_size == 0: par2_exists = 0 sys.stdout.flush() debug('fsck: checking %s (%s)\n' % (last, par2_ok and par2_exists and 'par2' or 'git')) if not opt.verbose: progress('fsck (%d/%d)\r' % (count, len(extra))) if not opt.jobs: nc = do_pack(base, last) code = code or nc count += 1 else: while len(outstanding) >= opt.jobs: (pid,nc) = os.wait() nc >>= 8 if pid in outstanding: del outstanding[pid] code = code or nc count += 1 pid = os.fork() if pid: # parent outstanding[pid] = 1 else: # child try: sys.exit(do_pack(base, last)) except Exception, e: log('exception: %r\n' % e) sys.exit(99) while len(outstanding): (pid,nc) = os.wait() nc >>= 8 if pid in outstanding: del outstanding[pid] code = code or nc count += 1 if not opt.verbose: progress('fsck (%d/%d)\r' % (count, len(extra))) if not opt.verbose and istty: log('fsck done. \n') sys.exit(code) #!/usr/bin/env python import sys, os, struct, getopt, subprocess, signal from bup import options, ssh from bup.helpers import * optspec = """ bup rbackup index ... bup rbackup save ... bup rbackup split ... """ o = options.Options('bup rbackup', optspec, optfunc=getopt.getopt) (opt, flags, extra) = o.parse(sys.argv[1:]) if len(extra) < 2: o.fatal('arguments expected') class SigException(Exception): def __init__(self, signum): self.signum = signum Exception.__init__(self, 'signal %d received' % signum) def handler(signum, frame): raise SigException(signum) signal.signal(signal.SIGTERM, handler) signal.signal(signal.SIGINT, handler) sp = None p = None ret = 99 try: hostname = extra[0] argv = extra[1:] p = ssh.connect(hostname, 'rbackup-server') argvs = '\0'.join(['bup'] + argv) p.stdin.write(struct.pack('!I', len(argvs)) + argvs) p.stdin.flush() main_exe = os.environ.get('BUP_MAIN_EXE') or sys.argv[0] sp = subprocess.Popen([main_exe, 'server'], stdin=p.stdout, stdout=p.stdin) p.stdin.close() p.stdout.close() finally: while 1: # if we get a signal while waiting, we have to keep waiting, just # in case our child doesn't die. try: ret = p.wait() sp.wait() break except SigException, e: log('\nbup rbackup: %s\n' % e) os.kill(p.pid, e.signum) ret = 84 sys.exit(ret) #!/usr/bin/env python import sys, os, re from bup import options optspec = """ bup newliner """ o = options.Options('bup newliner', optspec) (opt, flags, extra) = o.parse(sys.argv[1:]) if extra: o.fatal("no arguments expected") r = re.compile(r'([\r\n])') lastlen = 0 all = '' while 1: l = r.split(all, 1) if len(l) <= 1: try: b = os.read(sys.stdin.fileno(), 4096) except KeyboardInterrupt: break if not b: break all += b else: assert(len(l) == 3) (line, splitchar, all) = l #splitchar = '\n' sys.stdout.write('%-*s%s' % (lastlen, line, splitchar)) if splitchar == '\r': lastlen = len(line) else: lastlen = 0 sys.stdout.flush() if lastlen or all: sys.stdout.write('%-*s\n' % (lastlen, all)) #!/usr/bin/env python import sys from bup import options, git, _hashsplit from bup.helpers import * optspec = """ bup margin """ o = options.Options('bup margin', optspec) (opt, flags, extra) = o.parse(sys.argv[1:]) if extra: o.fatal("no arguments expected") git.check_repo_or_die() #git.ignore_midx = 1 mi = git.PackIdxList(git.repo('objects/pack')) last = '\0'*20 longmatch = 0 for i in mi: if i == last: continue #assert(str(i) >= last) pm = _hashsplit.bitmatch(last, i) longmatch = max(longmatch, pm) last = i print longmatch #!/usr/bin/env python from bup import options, drecurse from bup.helpers import * optspec = """ bup drecurse -- x,xdev,one-file-system don't cross filesystem boundaries q,quiet don't actually print filenames profile run under the python profiler """ o = options.Options('bup drecurse', optspec) (opt, flags, extra) = o.parse(sys.argv[1:]) if len(extra) != 1: o.fatal("exactly one filename expected") it = drecurse.recursive_dirlist(extra, opt.xdev) if opt.profile: import cProfile def do_it(): for i in it: pass cProfile.run('do_it()') else: if opt.quiet: for i in it: pass else: for (name,st) in it: print name if saved_errors: log('WARNING: %d errors encountered.\n' % len(saved_errors)) sys.exit(1) #!/usr/bin/env python import sys, time, struct from bup import hashsplit, git, options, client from bup.helpers import * from subprocess import PIPE optspec = """ bup split [-tcb] [-n name] [--bench] [filenames...] -- r,remote= remote repository path b,blobs output a series of blob ids t,tree output a tree id c,commit output a commit id n,name= name of backup set to update (if any) N,noop don't actually save the data anywhere q,quiet don't print progress messages v,verbose increase log output (can be used more than once) copy just copy input to output, hashsplitting along the way bench print benchmark timings to stderr max-pack-size= maximum bytes in a single pack max-pack-objects= maximum number of objects in a single pack fanout= maximum number of blobs in a single tree """ o = options.Options('bup split', optspec) (opt, flags, extra) = o.parse(sys.argv[1:]) git.check_repo_or_die() if not (opt.blobs or opt.tree or opt.commit or opt.name or opt.noop or opt.copy): o.fatal("use one or more of -b, -t, -c, -n, -N, --copy") if (opt.noop or opt.copy) and (opt.blobs or opt.tree or opt.commit or opt.name): o.fatal('-N is incompatible with -b, -t, -c, -n') if opt.verbose >= 2: git.verbose = opt.verbose - 1 opt.bench = 1 if opt.max_pack_size: hashsplit.max_pack_size = parse_num(opt.max_pack_size) if opt.max_pack_objects: hashsplit.max_pack_objects = parse_num(opt.max_pack_objects) if opt.fanout: hashsplit.fanout = parse_num(opt.fanout) if opt.blobs: hashsplit.fanout = 0 is_reverse = os.environ.get('BUP_SERVER_REVERSE') if is_reverse and opt.remote: o.fatal("don't use -r in reverse mode; it's automatic") start_time = time.time() refname = opt.name and 'refs/heads/%s' % opt.name or None if opt.noop or opt.copy: cli = w = oldref = None elif opt.remote or is_reverse: cli = client.Client(opt.remote) oldref = refname and cli.read_ref(refname) or None w = cli.new_packwriter() else: cli = None oldref = refname and git.read_ref(refname) or None w = git.PackWriter() files = extra and (open(fn) for fn in extra) or [sys.stdin] if w: shalist = hashsplit.split_to_shalist(w, files) tree = w.new_tree(shalist) else: last = 0 for (blob, bits) in hashsplit.hashsplit_iter(files): hashsplit.total_split += len(blob) if opt.copy: sys.stdout.write(str(blob)) megs = hashsplit.total_split/1024/1024 if not opt.quiet and last != megs: progress('%d Mbytes read\r' % megs) last = megs progress('%d Mbytes read, done.\n' % megs) if opt.verbose: log('\n') if opt.blobs: for (mode,name,bin) in shalist: print bin.encode('hex') if opt.tree: print tree.encode('hex') if opt.commit or opt.name: msg = 'bup split\n\nGenerated by command:\n%r' % sys.argv ref = opt.name and ('refs/heads/%s' % opt.name) or None commit = w.new_commit(oldref, tree, msg) if opt.commit: print commit.encode('hex') if w: w.close() # must close before we can update the ref if opt.name: if cli: cli.update_ref(refname, commit, oldref) else: git.update_ref(refname, commit, oldref) if cli: cli.close() secs = time.time() - start_time size = hashsplit.total_split if opt.bench: log('\nbup: %.2fkbytes in %.2f secs = %.2f kbytes/sec\n' % (size/1024., secs, size/1024./secs)) #!/usr/bin/env python import sys, re, struct, mmap from bup import git, options from bup.helpers import * def s_from_bytes(bytes): clist = [chr(b) for b in bytes] return ''.join(clist) def report(count): fields = ['VmSize', 'VmRSS', 'VmData', 'VmStk'] d = {} for line in open('/proc/self/status').readlines(): l = re.split(r':\s*', line.strip(), 1) d[l[0]] = l[1] if count >= 0: e1 = count fields = [d[k] for k in fields] else: e1 = '' print ('%9s ' + ('%10s ' * len(fields))) % tuple([e1] + fields) sys.stdout.flush() optspec = """ bup memtest [-n elements] [-c cycles] -- n,number= number of objects per cycle c,cycles= number of cycles to run ignore-midx ignore .midx files, use only .idx files """ o = options.Options('bup memtest', optspec) (opt, flags, extra) = o.parse(sys.argv[1:]) if extra: o.fatal('no arguments expected') git.ignore_midx = opt.ignore_midx git.check_repo_or_die() m = git.PackIdxList(git.repo('objects/pack')) cycles = opt.cycles or 100 number = opt.number or 10000 report(-1) f = open('/dev/urandom') a = mmap.mmap(-1, 20) report(0) for c in xrange(cycles): for n in xrange(number): b = f.read(3) if 0: bytes = list(struct.unpack('!BBB', b)) + [0]*17 bytes[2] &= 0xf0 bin = struct.pack('!20s', s_from_bytes(bytes)) else: a[0:2] = b[0:2] a[2] = chr(ord(b[2]) & 0xf0) bin = str(a[0:20]) #print bin.encode('hex') m.exists(bin) report((c+1)*number) #!/usr/bin/env python import sys, os, stat from bup import options, git, vfs from bup.helpers import * def print_node(text, n): prefix = '' if opt.hash: prefix += "%s " % n.hash.encode('hex') if stat.S_ISDIR(n.mode): print '%s%s/' % (prefix, text) elif stat.S_ISLNK(n.mode): print '%s%s@' % (prefix, text) else: print '%s%s' % (prefix, text) optspec = """ bup ls -- s,hash show hash for each file """ o = options.Options('bup ls', optspec) (opt, flags, extra) = o.parse(sys.argv[1:]) git.check_repo_or_die() top = vfs.RefList(None) if not extra: extra = ['/'] ret = 0 for d in extra: try: n = top.lresolve(d) if stat.S_ISDIR(n.mode): for sub in n: print_node(sub.name, sub) else: print_node(d, n) except vfs.NodeError, e: log('error: %s\n' % e) ret = 1 sys.exit(ret) #!/usr/bin/env python import sys, os, re, stat, readline, fnmatch from bup import options, git, shquote, vfs from bup.helpers import * def node_name(text, n): if stat.S_ISDIR(n.mode): return '%s/' % text elif stat.S_ISLNK(n.mode): return '%s@' % text else: return '%s' % text def do_ls(path, n): l = [] if stat.S_ISDIR(n.mode): for sub in n: l.append(node_name(sub.name, sub)) else: l.append(node_name(path, n)) print columnate(l, '') def write_to_file(inf, outf): for blob in chunkyreader(inf): outf.write(blob) def inputiter(): if os.isatty(sys.stdin.fileno()): while 1: try: yield raw_input('bup> ') except EOFError: break else: for line in sys.stdin: yield line def _completer_get_subs(line): (qtype, lastword) = shquote.unfinished_word(line) (dir,name) = os.path.split(lastword) #log('\ncompleter: %r %r %r\n' % (qtype, lastword, text)) n = pwd.resolve(dir) subs = list(filter(lambda x: x.name.startswith(name), n.subs())) return (dir, name, qtype, lastword, subs) _last_line = None _last_res = None def completer(text, state): global _last_line global _last_res try: line = readline.get_line_buffer()[:readline.get_endidx()] if _last_line != line: _last_res = _completer_get_subs(line) _last_line = line (dir, name, qtype, lastword, subs) = _last_res if state < len(subs): sn = subs[state] sn1 = sn.resolve('') # deref symlinks fullname = os.path.join(dir, sn.name) if stat.S_ISDIR(sn1.mode): ret = shquote.what_to_add(qtype, lastword, fullname+'/', terminate=False) else: ret = shquote.what_to_add(qtype, lastword, fullname, terminate=True) + ' ' return text + ret except Exception, e: log('\nerror in completion: %s\n' % e) optspec = """ bup ftp """ o = options.Options('bup ftp', optspec) (opt, flags, extra) = o.parse(sys.argv[1:]) git.check_repo_or_die() top = vfs.RefList(None) pwd = top if extra: lines = extra else: readline.set_completer_delims(' \t\n\r/') readline.set_completer(completer) readline.parse_and_bind("tab: complete") lines = inputiter() for line in lines: if not line.strip(): continue words = [word for (wordstart,word) in shquote.quotesplit(line)] cmd = words[0].lower() #log('execute: %r %r\n' % (cmd, parm)) try: if cmd == 'ls': for parm in (words[1:] or ['.']): do_ls(parm, pwd.resolve(parm)) elif cmd == 'cd': for parm in words[1:]: pwd = pwd.resolve(parm) elif cmd == 'pwd': print pwd.fullname() elif cmd == 'cat': for parm in words[1:]: write_to_file(pwd.resolve(parm).open(), sys.stdout) elif cmd == 'get': if len(words) not in [2,3]: raise Exception('Usage: get [localname]') rname = words[1] (dir,base) = os.path.split(rname) lname = len(words)>2 and words[2] or base inf = pwd.resolve(rname).open() log('Saving %r\n' % lname) write_to_file(inf, open(lname, 'wb')) elif cmd == 'mget': for parm in words[1:]: (dir,base) = os.path.split(parm) for n in pwd.resolve(dir).subs(): if fnmatch.fnmatch(n.name, base): try: log('Saving %r\n' % n.name) inf = n.open() outf = open(n.name, 'wb') write_to_file(inf, outf) outf.close() except Exception, e: log(' error: %s\n' % e) elif cmd == 'help' or cmd == '?': log('Commands: ls cd pwd cat get mget help quit\n') elif cmd == 'quit' or cmd == 'exit' or cmd == 'bye': break else: raise Exception('no such command %r' % cmd) except Exception, e: log('error: %s\n' % e) #raise #!/usr/bin/env python import sys, mmap from bup import options, _hashsplit from bup.helpers import * optspec = """ bup random [-S seed] -- S,seed= optional random number seed (default 1) f,force print random data to stdout even if it's a tty """ o = options.Options('bup random', optspec) (opt, flags, extra) = o.parse(sys.argv[1:]) if len(extra) != 1: o.fatal("exactly one argument expected") total = parse_num(extra[0]) if opt.force or (not os.isatty(1) and not atoi(os.environ.get('BUP_FORCE_TTY')) & 1): _hashsplit.write_random(sys.stdout.fileno(), total, opt.seed or 0) else: log('error: not writing binary data to a terminal. Use -f to force.\n') sys.exit(1) #!/usr/bin/env python import sys, os, glob from bup import options optspec = """ bup help """ o = options.Options('bup help', optspec) (opt, flags, extra) = o.parse(sys.argv[1:]) if len(extra) == 0: # the wrapper program provides the default usage string os.execvp(os.environ['BUP_MAIN_EXE'], ['bup']) elif len(extra) == 1: docname = (extra[0]=='bup' and 'bup' or ('bup-%s' % extra[0])) exe = sys.argv[0] (exepath, exefile) = os.path.split(exe) manpath = os.path.join(exepath, '../Documentation/' + docname + '.[1-9]') g = glob.glob(manpath) if g: os.execvp('man', ['man', '-l', g[0]]) else: os.execvp('man', ['man', docname]) else: o.fatal("exactly one command name expected") #!/usr/bin/env python import sys, os, stat, errno, fuse, re, time, tempfile from bup import options, git, vfs from bup.helpers import * class Stat(fuse.Stat): def __init__(self): self.st_mode = 0 self.st_ino = 0 self.st_dev = 0 self.st_nlink = 0 self.st_uid = 0 self.st_gid = 0 self.st_size = 0 self.st_atime = 0 self.st_mtime = 0 self.st_ctime = 0 self.st_blocks = 0 self.st_blksize = 0 self.st_rdev = 0 cache = {} def cache_get(top, path): parts = path.split('/') cache[('',)] = top c = None max = len(parts) #log('cache: %r\n' % cache.keys()) for i in range(max): pre = parts[:max-i] #log('cache trying: %r\n' % pre) c = cache.get(tuple(pre)) if c: rest = parts[max-i:] for r in rest: #log('resolving %r from %r\n' % (r, c.fullname())) c = c.lresolve(r) key = tuple(pre + [r]) #log('saving: %r\n' % (key,)) cache[key] = c break assert(c) return c class BupFs(fuse.Fuse): def __init__(self, top): fuse.Fuse.__init__(self) self.top = top def getattr(self, path): log('--getattr(%r)\n' % path) try: node = cache_get(self.top, path) st = Stat() st.st_mode = node.mode st.st_nlink = node.nlinks() st.st_size = node.size() st.st_mtime = node.mtime st.st_ctime = node.ctime st.st_atime = node.atime return st except vfs.NoSuchFile: return -errno.ENOENT def readdir(self, path, offset): log('--readdir(%r)\n' % path) node = cache_get(self.top, path) yield fuse.Direntry('.') yield fuse.Direntry('..') for sub in node.subs(): yield fuse.Direntry(sub.name) def readlink(self, path): log('--readlink(%r)\n' % path) node = cache_get(self.top, path) return node.readlink() def open(self, path, flags): log('--open(%r)\n' % path) node = cache_get(self.top, path) accmode = os.O_RDONLY | os.O_WRONLY | os.O_RDWR if (flags & accmode) != os.O_RDONLY: return -errno.EACCES node.open() def release(self, path, flags): log('--release(%r)\n' % path) def read(self, path, size, offset): log('--read(%r)\n' % path) n = cache_get(self.top, path) o = n.open() o.seek(offset) return o.read(size) if not hasattr(fuse, '__version__'): raise RuntimeError, "your fuse module is too old for fuse.__version__" fuse.fuse_python_api = (0, 2) optspec = """ bup fuse [-d] [-f] -- d,debug increase debug level f,foreground run in foreground """ o = options.Options('bup fuse', optspec) (opt, flags, extra) = o.parse(sys.argv[1:]) if len(extra) != 1: o.fatal("exactly one argument expected") git.check_repo_or_die() top = vfs.RefList(None) f = BupFs(top) f.fuse_args.mountpoint = extra[0] if opt.debug: f.fuse_args.add('debug') if opt.foreground: f.fuse_args.setmod('foreground') print f.multithreaded f.multithreaded = False f.main() #!/usr/bin/env python from bup import git, options, client from bup.helpers import * optspec = """ [BUP_DIR=...] bup init [-r host:path] -- r,remote= remote repository path """ o = options.Options('bup init', optspec) (opt, flags, extra) = o.parse(sys.argv[1:]) if extra: o.fatal("no arguments expected") if opt.remote: git.init_repo() # local repo git.check_repo_or_die() cli = client.Client(opt.remote, create=True) cli.close() else: git.init_repo() #!/usr/bin/env python import sys, math, struct, glob from bup import options, git from bup.helpers import * PAGE_SIZE=4096 SHA_PER_PAGE=PAGE_SIZE/200. def merge(idxlist, bits, table): count = 0 for e in git.idxmerge(idxlist): count += 1 prefix = git.extract_bits(e, bits) table[prefix] = count yield e def do_midx(outdir, outfilename, infilenames): if not outfilename: assert(outdir) sum = Sha1('\0'.join(infilenames)).hexdigest() outfilename = '%s/midx-%s.midx' % (outdir, sum) inp = [] total = 0 for name in infilenames: ix = git.PackIdx(name) inp.append(ix) total += len(ix) log('Merging %d indexes (%d objects).\n' % (len(infilenames), total)) if (not opt.force and (total < 1024 and len(infilenames) < 3)) \ or (opt.force and not total): log('midx: nothing to do.\n') return pages = int(total/SHA_PER_PAGE) or 1 bits = int(math.ceil(math.log(pages, 2))) entries = 2**bits log('Table size: %d (%d bits)\n' % (entries*4, bits)) table = [0]*entries try: os.unlink(outfilename) except OSError: pass f = open(outfilename + '.tmp', 'w+') f.write('MIDX\0\0\0\2') f.write(struct.pack('!I', bits)) assert(f.tell() == 12) f.write('\0'*4*entries) for e in merge(inp, bits, table): f.write(e) f.write('\0'.join(os.path.basename(p) for p in infilenames)) f.seek(12) f.write(struct.pack('!%dI' % entries, *table)) f.close() os.rename(outfilename + '.tmp', outfilename) # this is just for testing if 0: p = git.PackMidx(outfilename) assert(len(p.idxnames) == len(infilenames)) print p.idxnames assert(len(p) == total) pi = iter(p) for i in merge(inp, total, bits, table): assert(i == pi.next()) assert(p.exists(i)) print outfilename optspec = """ bup midx [options...] -- o,output= output midx filename (default: auto-generated) a,auto automatically create .midx from any unindexed .idx files f,force automatically create .midx from *all* .idx files """ o = options.Options('bup midx', optspec) (opt, flags, extra) = o.parse(sys.argv[1:]) if extra and (opt.auto or opt.force): o.fatal("you can't use -f/-a and also provide filenames") git.check_repo_or_die() if extra: do_midx(git.repo('objects/pack'), opt.output, extra) elif opt.auto or opt.force: paths = [git.repo('objects/pack')] paths += glob.glob(git.repo('index-cache/*/.')) for path in paths: log('midx: scanning %s\n' % path) if opt.force: do_midx(path, opt.output, glob.glob('%s/*.idx' % path)) elif opt.auto: m = git.PackIdxList(path) needed = {} for pack in m.packs: # only .idx files without a .midx are open if pack.name.endswith('.idx'): needed[pack.name] = 1 del m do_midx(path, opt.output, needed.keys()) log('\n') else: o.fatal("you must use -f or -a or provide input filenames") #!/usr/bin/env python import sys, os, random from bup import options from bup.helpers import * def randblock(n): l = [] for i in xrange(n): l.append(chr(random.randrange(0,256))) return ''.join(l) optspec = """ bup damage [-n count] [-s maxsize] [-S seed] -- WARNING: THIS COMMAND IS EXTREMELY DANGEROUS n,num= number of blocks to damage s,size= maximum size of each damaged block percent= maximum size of each damaged block (as a percent of entire file) equal spread damage evenly throughout the file S,seed= random number seed (for repeatable tests) """ o = options.Options('bup damage', optspec) (opt, flags, extra) = o.parse(sys.argv[1:]) if not extra: o.fatal('filenames expected') if opt.seed != None: random.seed(opt.seed) for name in extra: log('Damaging "%s"...\n' % name) f = open(name, 'r+b') st = os.fstat(f.fileno()) size = st.st_size if opt.percent or opt.size: ms1 = int(float(opt.percent or 0)/100.0*size) or size ms2 = opt.size or size maxsize = min(ms1, ms2) else: maxsize = 1 chunks = opt.num or 10 chunksize = size/chunks for r in range(chunks): sz = random.randrange(1, maxsize+1) if sz > size: sz = size if opt.equal: ofs = r*chunksize else: ofs = random.randrange(0, size - sz + 1) log(' %6d bytes at %d\n' % (sz, ofs)) f.seek(ofs) f.write(randblock(sz)) f.close() #!/usr/bin/env python import sys, struct, mmap from bup import options, git from bup.helpers import * suspended_w = None def init_dir(conn, arg): git.init_repo(arg) log('bup server: bupdir initialized: %r\n' % git.repodir) conn.ok() def set_dir(conn, arg): git.check_repo_or_die(arg) log('bup server: bupdir is %r\n' % git.repodir) conn.ok() def list_indexes(conn, junk): git.check_repo_or_die() for f in os.listdir(git.repo('objects/pack')): if f.endswith('.idx'): conn.write('%s\n' % f) conn.ok() def send_index(conn, name): git.check_repo_or_die() assert(name.find('/') < 0) assert(name.endswith('.idx')) idx = git.PackIdx(git.repo('objects/pack/%s' % name)) conn.write(struct.pack('!I', len(idx.map))) conn.write(idx.map) conn.ok() def receive_objects(conn, junk): global suspended_w git.check_repo_or_die() suggested = {} if suspended_w: w = suspended_w suspended_w = None else: w = git.PackWriter() while 1: ns = conn.read(4) if not ns: w.abort() raise Exception('object read: expected length header, got EOF\n') n = struct.unpack('!I', ns)[0] #log('expecting %d bytes\n' % n) if not n: log('bup server: received %d object%s.\n' % (w.count, w.count!=1 and "s" or '')) fullpath = w.close() if fullpath: (dir, name) = os.path.split(fullpath) conn.write('%s.idx\n' % name) conn.ok() return elif n == 0xffffffff: log('bup server: receive-objects suspended.\n') suspended_w = w conn.ok() return buf = conn.read(n) # object sizes in bup are reasonably small #log('read %d bytes\n' % n) if len(buf) < n: w.abort() raise Exception('object read: expected %d bytes, got %d\n' % (n, len(buf))) (type, content) = git._decode_packobj(buf) sha = git.calc_hash(type, content) oldpack = w.exists(sha) # FIXME: we only suggest a single index per cycle, because the client # is currently dumb to download more than one per cycle anyway. # Actually we should fix the client, but this is a minor optimization # on the server side. if not suggested and \ oldpack and (oldpack == True or oldpack.endswith('.midx')): # FIXME: we shouldn't really have to know about midx files # at this layer. But exists() on a midx doesn't return the # packname (since it doesn't know)... probably we should just # fix that deficiency of midx files eventually, although it'll # make the files bigger. This method is certainly not very # efficient. w.objcache.refresh(skip_midx = True) oldpack = w.objcache.exists(sha) log('new suggestion: %r\n' % oldpack) assert(oldpack) assert(oldpack != True) assert(not oldpack.endswith('.midx')) w.objcache.refresh(skip_midx = False) if not suggested and oldpack: assert(oldpack.endswith('.idx')) (dir,name) = os.path.split(oldpack) if not (name in suggested): log("bup server: suggesting index %s\n" % name) conn.write('index %s\n' % name) suggested[name] = 1 else: w._raw_write([buf]) # NOTREACHED def read_ref(conn, refname): git.check_repo_or_die() r = git.read_ref(refname) conn.write('%s\n' % (r or '').encode('hex')) conn.ok() def update_ref(conn, refname): git.check_repo_or_die() newval = conn.readline().strip() oldval = conn.readline().strip() git.update_ref(refname, newval.decode('hex'), oldval.decode('hex')) conn.ok() def cat(conn, id): git.check_repo_or_die() try: for blob in git.cat(id): conn.write(struct.pack('!I', len(blob))) conn.write(blob) except KeyError, e: log('server: error: %s\n' % e) conn.write('\0\0\0\0') conn.error(e) else: conn.write('\0\0\0\0') conn.ok() optspec = """ bup server """ o = options.Options('bup server', optspec) (opt, flags, extra) = o.parse(sys.argv[1:]) if extra: o.fatal('no arguments expected') log('bup server: reading from stdin.\n') commands = { 'init-dir': init_dir, 'set-dir': set_dir, 'list-indexes': list_indexes, 'send-index': send_index, 'receive-objects': receive_objects, 'read-ref': read_ref, 'update-ref': update_ref, 'cat': cat, } # FIXME: this protocol is totally lame and not at all future-proof. # (Especially since we abort completely as soon as *anything* bad happens) conn = Conn(sys.stdin, sys.stdout) lr = linereader(conn) for _line in lr: line = _line.strip() if not line: continue log('bup server: command: %r\n' % line) words = line.split(' ', 1) cmd = words[0] rest = len(words)>1 and words[1] or '' if cmd == 'quit': break else: cmd = commands.get(cmd) if cmd: cmd(conn, rest) else: raise Exception('unknown server command: %r\n' % line) log('bup server: done\n') #!/usr/bin/env python import sys, time, struct from bup import hashsplit, git, options, client from bup.helpers import * from subprocess import PIPE optspec = """ bup join [-r host:path] [refs or hashes...] -- r,remote= remote repository path """ o = options.Options('bup join', optspec) (opt, flags, extra) = o.parse(sys.argv[1:]) git.check_repo_or_die() if not extra: extra = linereader(sys.stdin) ret = 0 if opt.remote: cli = client.Client(opt.remote) cat = cli.cat else: cp = git.CatPipe() cat = cp.join for id in extra: try: for blob in cat(id): sys.stdout.write(blob) except KeyError, e: sys.stdout.flush() log('error: %s\n' % e) ret = 1 sys.exit(ret) #!/usr/bin/env python import sys, re, errno, stat, time, math from bup import hashsplit, git, options, index, client from bup.helpers import * optspec = """ bup save [-tc] [-n name] -- r,remote= remote repository path t,tree output a tree id c,commit output a commit id n,name= name of backup set to update (if any) v,verbose increase log output (can be used more than once) q,quiet don't show progress meter smaller= only back up files smaller than n bytes """ o = options.Options('bup save', optspec) (opt, flags, extra) = o.parse(sys.argv[1:]) git.check_repo_or_die() if not (opt.tree or opt.commit or opt.name): o.fatal("use one or more of -t, -c, -n") if not extra: o.fatal("no filenames given") opt.progress = (istty and not opt.quiet) opt.smaller = parse_num(opt.smaller or 0) is_reverse = os.environ.get('BUP_SERVER_REVERSE') if is_reverse and opt.remote: o.fatal("don't use -r in reverse mode; it's automatic") refname = opt.name and 'refs/heads/%s' % opt.name or None if opt.remote or is_reverse: cli = client.Client(opt.remote) oldref = refname and cli.read_ref(refname) or None w = cli.new_packwriter() else: cli = None oldref = refname and git.read_ref(refname) or None w = git.PackWriter() handle_ctrl_c() def eatslash(dir): if dir.endswith('/'): return dir[:-1] else: return dir parts = [''] shalists = [[]] def _push(part): assert(part) parts.append(part) shalists.append([]) def _pop(force_tree): assert(len(parts) >= 1) part = parts.pop() shalist = shalists.pop() tree = force_tree or w.new_tree(shalist) if shalists: shalists[-1].append(('40000', part, tree)) else: # this was the toplevel, so put it back for sanity shalists.append(shalist) return tree lastremain = None def progress_report(n): global count, subcount, lastremain subcount += n cc = count + subcount pct = total and (cc*100.0/total) or 0 now = time.time() elapsed = now - tstart kps = elapsed and int(cc/1024./elapsed) kps_frac = 10 ** int(math.log(kps+1, 10) - 1) kps = int(kps/kps_frac)*kps_frac if cc: remain = elapsed*1.0/cc * (total-cc) else: remain = 0.0 if (lastremain and (remain > lastremain) and ((remain - lastremain)/lastremain < 0.05)): remain = lastremain else: lastremain = remain hours = int(remain/60/60) mins = int(remain/60 - hours*60) secs = int(remain - hours*60*60 - mins*60) if elapsed < 30: remainstr = '' kpsstr = '' else: kpsstr = '%dk/s' % kps if hours: remainstr = '%dh%dm' % (hours, mins) elif mins: remainstr = '%dm%d' % (mins, secs) else: remainstr = '%ds' % secs progress('Saving: %.2f%% (%d/%dk, %d/%d files) %s %s\r' % (pct, cc/1024, total/1024, fcount, ftotal, remainstr, kpsstr)) r = index.Reader(git.repo('bupindex')) def already_saved(ent): return ent.is_valid() and w.exists(ent.sha) and ent.sha def wantrecurse_pre(ent): return not already_saved(ent) def wantrecurse_during(ent): return not already_saved(ent) or ent.sha_missing() total = ftotal = 0 if opt.progress: for (transname,ent) in r.filter(extra, wantrecurse=wantrecurse_pre): if not (ftotal % 10024): progress('Reading index: %d\r' % ftotal) exists = ent.exists() hashvalid = already_saved(ent) ent.set_sha_missing(not hashvalid) if not opt.smaller or ent.size < opt.smaller: if exists and not hashvalid: total += ent.size ftotal += 1 progress('Reading index: %d, done.\n' % ftotal) hashsplit.progress_callback = progress_report tstart = time.time() count = subcount = fcount = 0 lastskip_name = None lastdir = '' for (transname,ent) in r.filter(extra, wantrecurse=wantrecurse_during): (dir, file) = os.path.split(ent.name) exists = (ent.flags & index.IX_EXISTS) hashvalid = already_saved(ent) wasmissing = ent.sha_missing() oldsize = ent.size if opt.verbose: if not exists: status = 'D' elif not hashvalid: if ent.sha == index.EMPTY_SHA: status = 'A' else: status = 'M' else: status = ' ' if opt.verbose >= 2: log('%s %-70s\n' % (status, ent.name)) elif not stat.S_ISDIR(ent.mode) and lastdir != dir: if not lastdir.startswith(dir): log('%s %-70s\n' % (status, os.path.join(dir, ''))) lastdir = dir if opt.progress: progress_report(0) fcount += 1 if not exists: continue if opt.smaller and ent.size >= opt.smaller: if exists and not hashvalid: add_error('skipping large file "%s"' % ent.name) lastskip_name = ent.name continue assert(dir.startswith('/')) dirp = dir.split('/') while parts > dirp: _pop(force_tree = None) if dir != '/': for part in dirp[len(parts):]: _push(part) if not file: # no filename portion means this is a subdir. But # sub/parentdirectories already handled in the pop/push() part above. oldtree = already_saved(ent) # may be None newtree = _pop(force_tree = oldtree) if not oldtree: if lastskip_name and lastskip_name.startswith(ent.name): ent.invalidate() else: ent.validate(040000, newtree) ent.repack() if exists and wasmissing: count += oldsize continue # it's not a directory id = None if hashvalid: mode = '%o' % ent.gitmode id = ent.sha shalists[-1].append((mode, git.mangle_name(file, ent.mode, ent.gitmode), id)) else: if stat.S_ISREG(ent.mode): try: f = hashsplit.open_noatime(ent.name) except IOError, e: add_error(e) lastskip_name = ent.name except OSError, e: add_error(e) lastskip_name = ent.name else: (mode, id) = hashsplit.split_to_blob_or_tree(w, [f]) else: if stat.S_ISDIR(ent.mode): assert(0) # handled above elif stat.S_ISLNK(ent.mode): try: rl = os.readlink(ent.name) except OSError, e: add_error(e) lastskip_name = ent.name except IOError, e: add_error(e) lastskip_name = ent.name else: (mode, id) = ('120000', w.new_blob(rl)) else: add_error(Exception('skipping special file "%s"' % ent.name)) lastskip_name = ent.name if id: ent.validate(int(mode, 8), id) ent.repack() shalists[-1].append((mode, git.mangle_name(file, ent.mode, ent.gitmode), id)) if exists and wasmissing: count += oldsize subcount = 0 if opt.progress: pct = total and count*100.0/total or 100 progress('Saving: %.2f%% (%d/%dk, %d/%d files), done. \n' % (pct, count/1024, total/1024, fcount, ftotal)) while len(parts) > 1: _pop(force_tree = None) assert(len(shalists) == 1) tree = w.new_tree(shalists[-1]) if opt.tree: print tree.encode('hex') if opt.commit or opt.name: msg = 'bup save\n\nGenerated by command:\n%r' % sys.argv ref = opt.name and ('refs/heads/%s' % opt.name) or None commit = w.new_commit(oldref, tree, msg) if opt.commit: print commit.encode('hex') w.close() # must close before we can update the ref if opt.name: if cli: cli.update_ref(refname, commit, oldref) else: git.update_ref(refname, commit, oldref) if cli: cli.close() if saved_errors: log('WARNING: %d errors encountered while saving.\n' % len(saved_errors)) sys.exit(1) #!/usr/bin/env python import sys, time from bup import options optspec = """ bup tick """ o = options.Options('bup tick', optspec) (opt, flags, extra) = o.parse(sys.argv[1:]) if extra: o.fatal("no arguments expected") t = time.time() tleft = 1 - (t - int(t)) time.sleep(tleft) #!/usr/bin/env python import os, sys, stat, time from bup import options, git, index, drecurse from bup.helpers import * def merge_indexes(out, r1, r2): for e in index.MergeIter([r1, r2]): # FIXME: shouldn't we remove deleted entries eventually? When? out.add_ixentry(e) class IterHelper: def __init__(self, l): self.i = iter(l) self.cur = None self.next() def next(self): try: self.cur = self.i.next() except StopIteration: self.cur = None return self.cur def check_index(reader): try: log('check: checking forward iteration...\n') e = None d = {} for e in reader.forward_iter(): if e.children_n: if opt.verbose: log('%08x+%-4d %r\n' % (e.children_ofs, e.children_n, e.name)) assert(e.children_ofs) assert(e.name.endswith('/')) assert(not d.get(e.children_ofs)) d[e.children_ofs] = 1 if e.flags & index.IX_HASHVALID: assert(e.sha != index.EMPTY_SHA) assert(e.gitmode) assert(not e or e.name == '/') # last entry is *always* / log('check: checking normal iteration...\n') last = None for e in reader: if last: assert(last > e.name) last = e.name except: log('index error! at %r\n' % e) raise log('check: passed.\n') def update_index(top): ri = index.Reader(indexfile) wi = index.Writer(indexfile) rig = IterHelper(ri.iter(name=top)) tstart = int(time.time()) hashgen = None if opt.fake_valid: def hashgen(name): return (0100644, index.FAKE_SHA) total = 0 for (path,pst) in drecurse.recursive_dirlist([top], xdev=opt.xdev): if opt.verbose>=2 or (opt.verbose==1 and stat.S_ISDIR(pst.st_mode)): sys.stdout.write('%s\n' % path) sys.stdout.flush() progress('Indexing: %d\r' % total) elif not (total % 128): progress('Indexing: %d\r' % total) total += 1 while rig.cur and rig.cur.name > path: # deleted paths if rig.cur.exists(): rig.cur.set_deleted() rig.cur.repack() rig.next() if rig.cur and rig.cur.name == path: # paths that already existed if pst: rig.cur.from_stat(pst, tstart) if not (rig.cur.flags & index.IX_HASHVALID): if hashgen: (rig.cur.gitmode, rig.cur.sha) = hashgen(path) rig.cur.flags |= index.IX_HASHVALID if opt.fake_invalid: rig.cur.invalidate() rig.cur.repack() rig.next() else: # new paths wi.add(path, pst, hashgen = hashgen) progress('Indexing: %d, done.\n' % total) if ri.exists(): ri.save() wi.flush() if wi.count: wr = wi.new_reader() if opt.check: log('check: before merging: oldfile\n') check_index(ri) log('check: before merging: newfile\n') check_index(wr) mi = index.Writer(indexfile) merge_indexes(mi, ri, wr) ri.close() mi.close() wr.close() wi.abort() else: wi.close() optspec = """ bup index <-p|m|u> [options...] -- p,print print the index entries for the given names (also works with -u) m,modified print only added/deleted/modified files (implies -p) s,status print each filename with a status char (A/M/D) (implies -p) H,hash print the hash for each object next to its name (implies -p) l,long print more information about each file u,update (recursively) update the index entries for the given filenames x,xdev,one-file-system don't cross filesystem boundaries fake-valid mark all index entries as up-to-date even if they aren't fake-invalid mark all index entries as invalid check carefully check index file integrity f,indexfile= the name of the index file (default 'index') v,verbose increase log output (can be used more than once) """ o = options.Options('bup index', optspec) (opt, flags, extra) = o.parse(sys.argv[1:]) if not (opt.modified or opt['print'] or opt.status or opt.update or opt.check): o.fatal('supply one or more of -p, -s, -m, -u, or --check') if (opt.fake_valid or opt.fake_invalid) and not opt.update: o.fatal('--fake-{in,}valid are meaningless without -u') if opt.fake_valid and opt.fake_invalid: o.fatal('--fake-valid is incompatible with --fake-invalid') git.check_repo_or_die() indexfile = opt.indexfile or git.repo('bupindex') handle_ctrl_c() if opt.check: log('check: starting initial check.\n') check_index(index.Reader(indexfile)) paths = index.reduce_paths(extra) if opt.update: if not paths: o.fatal('update (-u) requested but no paths given') for (rp,path) in paths: update_index(rp) if opt['print'] or opt.status or opt.modified: for (name, ent) in index.Reader(indexfile).filter(extra or ['']): if (opt.modified and (ent.is_valid() or ent.is_deleted() or not ent.mode)): continue line = '' if opt.status: if ent.is_deleted(): line += 'D ' elif not ent.is_valid(): if ent.sha == index.EMPTY_SHA: line += 'A ' else: line += 'M ' else: line += ' ' if opt.hash: line += ent.sha.encode('hex') + ' ' if opt.long: line += "%7s %7s " % (oct(ent.mode), oct(ent.gitmode)) print line + (name or './') if opt.check and (opt['print'] or opt.status or opt.modified or opt.update): log('check: starting final check.\n') check_index(index.Reader(indexfile)) if saved_errors: log('WARNING: %d errors encountered.\n' % len(saved_errors)) sys.exit(1) #!/usr/bin/env python import sys, os, struct from bup import options, helpers optspec = """ bup rbackup-server -- This command is not intended to be run manually. """ o = options.Options('bup rbackup-server', optspec) (opt, flags, extra) = o.parse(sys.argv[1:]) if extra: o.fatal('no arguments expected') # get the subcommand's argv. # Normally we could just pass this on the command line, but since we'll often # be getting called on the other end of an ssh pipe, which tends to mangle # argv (by sending it via the shell), this way is much safer. buf = sys.stdin.read(4) sz = struct.unpack('!I', buf)[0] assert(sz > 0) assert(sz < 1000000) buf = sys.stdin.read(sz) assert(len(buf) == sz) argv = buf.split('\0') # stdin/stdout are supposedly connected to 'bup server' that the caller # started for us (often on the other end of an ssh tunnel), so we don't want # to misuse them. Move them out of the way, then replace stdout with # a pointer to stderr in case our subcommand wants to do something with it. # # It might be nice to do the same with stdin, but my experiments showed that # ssh seems to make its child's stderr a readable-but-never-reads-anything # socket. They really should have used shutdown(SHUT_WR) on the other end # of it, but probably didn't. Anyway, it's too messy, so let's just make sure # anyone reading from stdin is disappointed. # # (You can't just leave stdin/stdout "not open" by closing the file # descriptors. Then the next file that opens is automatically assigned 0 or 1, # and people *trying* to read/write stdin/stdout get screwed.) os.dup2(0, 3) os.dup2(1, 4) os.dup2(2, 1) fd = os.open('/dev/null', os.O_RDONLY) os.dup2(fd, 0) os.close(fd) os.environ['BUP_SERVER_REVERSE'] = helpers.hostname() os.execvp(argv[0], argv) sys.exit(99) #!/usr/bin/env python import sys, os, glob, subprocess, time from bup import options, git from bup.helpers import * par2_ok = 0 nullf = open('/dev/null') def debug(s): if opt.verbose: log(s) def run(argv): # at least in python 2.5, using "stdout=2" or "stdout=sys.stderr" below # doesn't actually work, because subprocess closes fd #2 right before # execing for some reason. So we work around it by duplicating the fd # first. fd = os.dup(2) # copy stderr try: p = subprocess.Popen(argv, stdout=fd, close_fds=False) return p.wait() finally: os.close(fd) def par2_setup(): global par2_ok rv = 1 try: p = subprocess.Popen(['par2', '--help'], stdout=nullf, stderr=nullf, stdin=nullf) rv = p.wait() except OSError: log('fsck: warning: par2 not found; disabling recovery features.\n') else: par2_ok = 1 def parv(lvl): if opt.verbose >= lvl: if istty: return [] else: return ['-q'] else: return ['-qq'] def par2_generate(base): return run(['par2', 'create', '-n1', '-c200'] + parv(2) + ['--', base, base+'.pack', base+'.idx']) def par2_verify(base): return run(['par2', 'verify'] + parv(3) + ['--', base]) def par2_repair(base): return run(['par2', 'repair'] + parv(2) + ['--', base]) def quick_verify(base): f = open(base + '.pack', 'rb') f.seek(-20, 2) wantsum = f.read(20) assert(len(wantsum) == 20) f.seek(0) sum = Sha1() for b in chunkyreader(f, os.fstat(f.fileno()).st_size - 20): sum.update(b) if sum.digest() != wantsum: raise ValueError('expected %r, got %r' % (wantsum.encode('hex'), sum.hexdigest())) def git_verify(base): if opt.quick: try: quick_verify(base) except Exception, e: debug('error: %s\n' % e) return 1 return 0 else: return run(['git', 'verify-pack', '--', base]) def do_pack(base, last): code = 0 if par2_ok and par2_exists and (opt.repair or not opt.generate): vresult = par2_verify(base) if vresult != 0: if opt.repair: rresult = par2_repair(base) if rresult != 0: print '%s par2 repair: failed (%d)' % (last, rresult) code = rresult else: print '%s par2 repair: succeeded (0)' % last code = 100 else: print '%s par2 verify: failed (%d)' % (last, vresult) code = vresult else: print '%s ok' % last elif not opt.generate or (par2_ok and not par2_exists): gresult = git_verify(base) if gresult != 0: print '%s git verify: failed (%d)' % (last, gresult) code = gresult else: if par2_ok and opt.generate: presult = par2_generate(base) if presult != 0: print '%s par2 create: failed (%d)' % (last, presult) code = presult else: print '%s ok' % last else: print '%s ok' % last else: assert(opt.generate and (not par2_ok or par2_exists)) debug(' skipped: par2 file already generated.\n') return code optspec = """ bup fsck [options...] [filenames...] -- r,repair attempt to repair errors using par2 (dangerous!) g,generate generate auto-repair information using par2 v,verbose increase verbosity (can be used more than once) quick just check pack sha1sum, don't use git verify-pack j,jobs= run 'n' jobs in parallel par2-ok immediately return 0 if par2 is ok, 1 if not disable-par2 ignore par2 even if it is available """ o = options.Options('bup fsck', optspec) (opt, flags, extra) = o.parse(sys.argv[1:]) par2_setup() if opt.par2_ok: if par2_ok: sys.exit(0) # 'true' in sh else: sys.exit(1) if opt.disable_par2: par2_ok = 0 git.check_repo_or_die() if not extra: debug('fsck: No filenames given: checking all packs.\n') extra = glob.glob(git.repo('objects/pack/*.pack')) code = 0 count = 0 outstanding = {} for name in extra: if name.endswith('.pack'): base = name[:-5] elif name.endswith('.idx'): base = name[:-4] elif name.endswith('.par2'): base = name[:-5] elif os.path.exists(name + '.pack'): base = name else: raise Exception('%s is not a pack file!' % name) (dir,last) = os.path.split(base) par2_exists = os.path.exists(base + '.par2') if par2_exists and os.stat(base + '.par2').st_size == 0: par2_exists = 0 sys.stdout.flush() debug('fsck: checking %s (%s)\n' % (last, par2_ok and par2_exists and 'par2' or 'git')) if not opt.verbose: progress('fsck (%d/%d)\r' % (count, len(extra))) if not opt.jobs: nc = do_pack(base, last) code = code or nc count += 1 else: while len(outstanding) >= opt.jobs: (pid,nc) = os.wait() nc >>= 8 if pid in outstanding: del outstanding[pid] code = code or nc count += 1 pid = os.fork() if pid: # parent outstanding[pid] = 1 else: # child try: sys.exit(do_pack(base, last)) except Exception, e: log('exception: %r\n' % e) sys.exit(99) while len(outstanding): (pid,nc) = os.wait() nc >>= 8 if pid in outstanding: del outstanding[pid] code = code or nc count += 1 if not opt.verbose: progress('fsck (%d/%d)\r' % (count, len(extra))) if not opt.verbose and istty: log('fsck done. \n') sys.exit(code) #!/usr/bin/env python import sys, os, struct, getopt, subprocess, signal from bup import options, ssh from bup.helpers import * optspec = """ bup rbackup index ... bup rbackup save ... bup rbackup split ... """ o = options.Options('bup rbackup', optspec, optfunc=getopt.getopt) (opt, flags, extra) = o.parse(sys.argv[1:]) if len(extra) < 2: o.fatal('arguments expected') class SigException(Exception): def __init__(self, signum): self.signum = signum Exception.__init__(self, 'signal %d received' % signum) def handler(signum, frame): raise SigException(signum) signal.signal(signal.SIGTERM, handler) signal.signal(signal.SIGINT, handler) sp = None p = None ret = 99 try: hostname = extra[0] argv = extra[1:] p = ssh.connect(hostname, 'rbackup-server') argvs = '\0'.join(['bup'] + argv) p.stdin.write(struct.pack('!I', len(argvs)) + argvs) p.stdin.flush() main_exe = os.environ.get('BUP_MAIN_EXE') or sys.argv[0] sp = subprocess.Popen([main_exe, 'server'], stdin=p.stdout, stdout=p.stdin) p.stdin.close() p.stdout.close() finally: while 1: # if we get a signal while waiting, we have to keep waiting, just # in case our child doesn't die. try: ret = p.wait() sp.wait() break except SigException, e: log('\nbup rbackup: %s\n' % e) os.kill(p.pid, e.signum) ret = 84 sys.exit(ret) #!/usr/bin/env python import sys, os, re from bup import options optspec = """ bup newliner """ o = options.Options('bup newliner', optspec) (opt, flags, extra) = o.parse(sys.argv[1:]) if extra: o.fatal("no arguments expected") r = re.compile(r'([\r\n])') lastlen = 0 all = '' while 1: l = r.split(all, 1) if len(l) <= 1: try: b = os.read(sys.stdin.fileno(), 4096) except KeyboardInterrupt: break if not b: break all += b else: assert(len(l) == 3) (line, splitchar, all) = l #splitchar = '\n' sys.stdout.write('%-*s%s' % (lastlen, line, splitchar)) if splitchar == '\r': lastlen = len(line) else: lastlen = 0 sys.stdout.flush() if lastlen or all: sys.stdout.write('%-*s\n' % (lastlen, all)) #!/usr/bin/env python import sys from bup import options, git, _hashsplit from bup.helpers import * optspec = """ bup margin """ o = options.Options('bup margin', optspec) (opt, flags, extra) = o.parse(sys.argv[1:]) if extra: o.fatal("no arguments expected") git.check_repo_or_die() #git.ignore_midx = 1 mi = git.PackIdxList(git.repo('objects/pack')) last = '\0'*20 longmatch = 0 for i in mi: if i == last: continue #assert(str(i) >= last) pm = _hashsplit.bitmatch(last, i) longmatch = max(longmatch, pm) last = i print longmatch #!/usr/bin/env python from bup import options, drecurse from bup.helpers import * optspec = """ bup drecurse -- x,xdev,one-file-system don't cross filesystem boundaries q,quiet don't actually print filenames profile run under the python profiler """ o = options.Options('bup drecurse', optspec) (opt, flags, extra) = o.parse(sys.argv[1:]) if len(extra) != 1: o.fatal("exactly one filename expected") it = drecurse.recursive_dirlist(extra, opt.xdev) if opt.profile: import cProfile def do_it(): for i in it: pass cProfile.run('do_it()') else: if opt.quiet: for i in it: pass else: for (name,st) in it: print name if saved_errors: log('WARNING: %d errors encountered.\n' % len(saved_errors)) sys.exit(1) #!/usr/bin/env python import sys, time, struct from bup import hashsplit, git, options, client from bup.helpers import * from subprocess import PIPE optspec = """ bup split [-tcb] [-n name] [--bench] [filenames...] -- r,remote= remote repository path b,blobs output a series of blob ids t,tree output a tree id c,commit output a commit id n,name= name of backup set to update (if any) N,noop don't actually save the data anywhere q,quiet don't print progress messages v,verbose increase log output (can be used more than once) copy just copy input to output, hashsplitting along the way bench print benchmark timings to stderr max-pack-size= maximum bytes in a single pack max-pack-objects= maximum number of objects in a single pack fanout= maximum number of blobs in a single tree """ o = options.Options('bup split', optspec) (opt, flags, extra) = o.parse(sys.argv[1:]) git.check_repo_or_die() if not (opt.blobs or opt.tree or opt.commit or opt.name or opt.noop or opt.copy): o.fatal("use one or more of -b, -t, -c, -n, -N, --copy") if (opt.noop or opt.copy) and (opt.blobs or opt.tree or opt.commit or opt.name): o.fatal('-N is incompatible with -b, -t, -c, -n') if opt.verbose >= 2: git.verbose = opt.verbose - 1 opt.bench = 1 if opt.max_pack_size: hashsplit.max_pack_size = parse_num(opt.max_pack_size) if opt.max_pack_objects: hashsplit.max_pack_objects = parse_num(opt.max_pack_objects) if opt.fanout: hashsplit.fanout = parse_num(opt.fanout) if opt.blobs: hashsplit.fanout = 0 is_reverse = os.environ.get('BUP_SERVER_REVERSE') if is_reverse and opt.remote: o.fatal("don't use -r in reverse mode; it's automatic") start_time = time.time() refname = opt.name and 'refs/heads/%s' % opt.name or None if opt.noop or opt.copy: cli = w = oldref = None elif opt.remote or is_reverse: cli = client.Client(opt.remote) oldref = refname and cli.read_ref(refname) or None w = cli.new_packwriter() else: cli = None oldref = refname and git.read_ref(refname) or None w = git.PackWriter() files = extra and (open(fn) for fn in extra) or [sys.stdin] if w: shalist = hashsplit.split_to_shalist(w, files) tree = w.new_tree(shalist) else: last = 0 for (blob, bits) in hashsplit.hashsplit_iter(files): hashsplit.total_split += len(blob) if opt.copy: sys.stdout.write(str(blob)) megs = hashsplit.total_split/1024/1024 if not opt.quiet and last != megs: progress('%d Mbytes read\r' % megs) last = megs progress('%d Mbytes read, done.\n' % megs) if opt.verbose: log('\n') if opt.blobs: for (mode,name,bin) in shalist: print bin.encode('hex') if opt.tree: print tree.encode('hex') if opt.commit or opt.name: msg = 'bup split\n\nGenerated by command:\n%r' % sys.argv ref = opt.name and ('refs/heads/%s' % opt.name) or None commit = w.new_commit(oldref, tree, msg) if opt.commit: print commit.encode('hex') if w: w.close() # must close before we can update the ref if opt.name: if cli: cli.update_ref(refname, commit, oldref) else: git.update_ref(refname, commit, oldref) if cli: cli.close() secs = time.time() - start_time size = hashsplit.total_split if opt.bench: log('\nbup: %.2fkbytes in %.2f secs = %.2f kbytes/sec\n' % (size/1024., secs, size/1024./secs)) #!/usr/bin/env python import sys, re, struct, mmap from bup import git, options from bup.helpers import * def s_from_bytes(bytes): clist = [chr(b) for b in bytes] return ''.join(clist) def report(count): fields = ['VmSize', 'VmRSS', 'VmData', 'VmStk'] d = {} for line in open('/proc/self/status').readlines(): l = re.split(r':\s*', line.strip(), 1) d[l[0]] = l[1] if count >= 0: e1 = count fields = [d[k] for k in fields] else: e1 = '' print ('%9s ' + ('%10s ' * len(fields))) % tuple([e1] + fields) sys.stdout.flush() optspec = """ bup memtest [-n elements] [-c cycles] -- n,number= number of objects per cycle c,cycles= number of cycles to run ignore-midx ignore .midx files, use only .idx files """ o = options.Options('bup memtest', optspec) (opt, flags, extra) = o.parse(sys.argv[1:]) if extra: o.fatal('no arguments expected') git.ignore_midx = opt.ignore_midx git.check_repo_or_die() m = git.PackIdxList(git.repo('objects/pack')) cycles = opt.cycles or 100 number = opt.number or 10000 report(-1) f = open('/dev/urandom') a = mmap.mmap(-1, 20) report(0) for c in xrange(cycles): for n in xrange(number): b = f.read(3) if 0: bytes = list(struct.unpack('!BBB', b)) + [0]*17 bytes[2] &= 0xf0 bin = struct.pack('!20s', s_from_bytes(bytes)) else: a[0:2] = b[0:2] a[2] = chr(ord(b[2]) & 0xf0) bin = str(a[0:20]) #print bin.encode('hex') m.exists(bin) report((c+1)*number) #!/usr/bin/env python import sys, os, stat from bup import options, git, vfs from bup.helpers import * def print_node(text, n): prefix = '' if opt.hash: prefix += "%s " % n.hash.encode('hex') if stat.S_ISDIR(n.mode): print '%s%s/' % (prefix, text) elif stat.S_ISLNK(n.mode): print '%s%s@' % (prefix, text) else: print '%s%s' % (prefix, text) optspec = """ bup ls -- s,hash show hash for each file """ o = options.Options('bup ls', optspec) (opt, flags, extra) = o.parse(sys.argv[1:]) git.check_repo_or_die() top = vfs.RefList(None) if not extra: extra = ['/'] ret = 0 for d in extra: try: n = top.lresolve(d) if stat.S_ISDIR(n.mode): for sub in n: print_node(sub.name, sub) else: print_node(d, n) except vfs.NodeError, e: log('error: %s\n' % e) ret = 1 sys.exit(ret) #!/usr/bin/env python import sys, os, re, stat, readline, fnmatch from bup import options, git, shquote, vfs from bup.helpers import * def node_name(text, n): if stat.S_ISDIR(n.mode): return '%s/' % text elif stat.S_ISLNK(n.mode): return '%s@' % text else: return '%s' % text def do_ls(path, n): l = [] if stat.S_ISDIR(n.mode): for sub in n: l.append(node_name(sub.name, sub)) else: l.append(node_name(path, n)) print columnate(l, '') def write_to_file(inf, outf): for blob in chunkyreader(inf): outf.write(blob) def inputiter(): if os.isatty(sys.stdin.fileno()): while 1: try: yield raw_input('bup> ') except EOFError: break else: for line in sys.stdin: yield line def _completer_get_subs(line): (qtype, lastword) = shquote.unfinished_word(line) (dir,name) = os.path.split(lastword) #log('\ncompleter: %r %r %r\n' % (qtype, lastword, text)) n = pwd.resolve(dir) subs = list(filter(lambda x: x.name.startswith(name), n.subs())) return (dir, name, qtype, lastword, subs) _last_line = None _last_res = None def completer(text, state): global _last_line global _last_res try: line = readline.get_line_buffer()[:readline.get_endidx()] if _last_line != line: _last_res = _completer_get_subs(line) _last_line = line (dir, name, qtype, lastword, subs) = _last_res if state < len(subs): sn = subs[state] sn1 = sn.resolve('') # deref symlinks fullname = os.path.join(dir, sn.name) if stat.S_ISDIR(sn1.mode): ret = shquote.what_to_add(qtype, lastword, fullname+'/', terminate=False) else: ret = shquote.what_to_add(qtype, lastword, fullname, terminate=True) + ' ' return text + ret except Exception, e: log('\nerror in completion: %s\n' % e) optspec = """ bup ftp """ o = options.Options('bup ftp', optspec) (opt, flags, extra) = o.parse(sys.argv[1:]) git.check_repo_or_die() top = vfs.RefList(None) pwd = top if extra: lines = extra else: readline.set_completer_delims(' \t\n\r/') readline.set_completer(completer) readline.parse_and_bind("tab: complete") lines = inputiter() for line in lines: if not line.strip(): continue words = [word for (wordstart,word) in shquote.quotesplit(line)] cmd = words[0].lower() #log('execute: %r %r\n' % (cmd, parm)) try: if cmd == 'ls': for parm in (words[1:] or ['.']): do_ls(parm, pwd.resolve(parm)) elif cmd == 'cd': for parm in words[1:]: pwd = pwd.resolve(parm) elif cmd == 'pwd': print pwd.fullname() elif cmd == 'cat': for parm in words[1:]: write_to_file(pwd.resolve(parm).open(), sys.stdout) elif cmd == 'get': if len(words) not in [2,3]: raise Exception('Usage: get [localname]') rname = words[1] (dir,base) = os.path.split(rname) lname = len(words)>2 and words[2] or base inf = pwd.resolve(rname).open() log('Saving %r\n' % lname) write_to_file(inf, open(lname, 'wb')) elif cmd == 'mget': for parm in words[1:]: (dir,base) = os.path.split(parm) for n in pwd.resolve(dir).subs(): if fnmatch.fnmatch(n.name, base): try: log('Saving %r\n' % n.name) inf = n.open() outf = open(n.name, 'wb') write_to_file(inf, outf) outf.close() except Exception, e: log(' error: %s\n' % e) elif cmd == 'help' or cmd == '?': log('Commands: ls cd pwd cat get mget help quit\n') elif cmd == 'quit' or cmd == 'exit' or cmd == 'bye': break else: raise Exception('no such command %r' % cmd) except Exception, e: log('error: %s\n' % e) #raise #!/usr/bin/env python import sys, mmap from bup import options, _hashsplit from bup.helpers import * optspec = """ bup random [-S seed] -- S,seed= optional random number seed (default 1) f,force print random data to stdout even if it's a tty """ o = options.Options('bup random', optspec) (opt, flags, extra) = o.parse(sys.argv[1:]) if len(extra) != 1: o.fatal("exactly one argument expected") total = parse_num(extra[0]) if opt.force or (not os.isatty(1) and not atoi(os.environ.get('BUP_FORCE_TTY')) & 1): _hashsplit.write_random(sys.stdout.fileno(), total, opt.seed or 0) else: log('error: not writing binary data to a terminal. Use -f to force.\n') sys.exit(1) #!/usr/bin/env python import sys, os, glob from bup import options optspec = """ bup help """ o = options.Options('bup help', optspec) (opt, flags, extra) = o.parse(sys.argv[1:]) if len(extra) == 0: # the wrapper program provides the default usage string os.execvp(os.environ['BUP_MAIN_EXE'], ['bup']) elif len(extra) == 1: docname = (extra[0]=='bup' and 'bup' or ('bup-%s' % extra[0])) exe = sys.argv[0] (exepath, exefile) = os.path.split(exe) manpath = os.path.join(exepath, '../Documentation/' + docname + '.[1-9]') g = glob.glob(manpath) if g: os.execvp('man', ['man', '-l', g[0]]) else: os.execvp('man', ['man', docname]) else: o.fatal("exactly one command name expected") #!/usr/bin/env python import sys, os, stat, errno, fuse, re, time, tempfile from bup import options, git, vfs from bup.helpers import * class Stat(fuse.Stat): def __init__(self): self.st_mode = 0 self.st_ino = 0 self.st_dev = 0 self.st_nlink = 0 self.st_uid = 0 self.st_gid = 0 self.st_size = 0 self.st_atime = 0 self.st_mtime = 0 self.st_ctime = 0 self.st_blocks = 0 self.st_blksize = 0 self.st_rdev = 0 cache = {} def cache_get(top, path): parts = path.split('/') cache[('',)] = top c = None max = len(parts) #log('cache: %r\n' % cache.keys()) for i in range(max): pre = parts[:max-i] #log('cache trying: %r\n' % pre) c = cache.get(tuple(pre)) if c: rest = parts[max-i:] for r in rest: #log('resolving %r from %r\n' % (r, c.fullname())) c = c.lresolve(r) key = tuple(pre + [r]) #log('saving: %r\n' % (key,)) cache[key] = c break assert(c) return c class BupFs(fuse.Fuse): def __init__(self, top): fuse.Fuse.__init__(self) self.top = top def getattr(self, path): log('--getattr(%r)\n' % path) try: node = cache_get(self.top, path) st = Stat() st.st_mode = node.mode st.st_nlink = node.nlinks() st.st_size = node.size() st.st_mtime = node.mtime st.st_ctime = node.ctime st.st_atime = node.atime return st except vfs.NoSuchFile: return -errno.ENOENT def readdir(self, path, offset): log('--readdir(%r)\n' % path) node = cache_get(self.top, path) yield fuse.Direntry('.') yield fuse.Direntry('..') for sub in node.subs(): yield fuse.Direntry(sub.name) def readlink(self, path): log('--readlink(%r)\n' % path) node = cache_get(self.top, path) return node.readlink() def open(self, path, flags): log('--open(%r)\n' % path) node = cache_get(self.top, path) accmode = os.O_RDONLY | os.O_WRONLY | os.O_RDWR if (flags & accmode) != os.O_RDONLY: return -errno.EACCES node.open() def release(self, path, flags): log('--release(%r)\n' % path) def read(self, path, size, offset): log('--read(%r)\n' % path) n = cache_get(self.top, path) o = n.open() o.seek(offset) return o.read(size) if not hasattr(fuse, '__version__'): raise RuntimeError, "your fuse module is too old for fuse.__version__" fuse.fuse_python_api = (0, 2) optspec = """ bup fuse [-d] [-f] -- d,debug increase debug level f,foreground run in foreground """ o = options.Options('bup fuse', optspec) (opt, flags, extra) = o.parse(sys.argv[1:]) if len(extra) != 1: o.fatal("exactly one argument expected") git.check_repo_or_die() top = vfs.RefList(None) f = BupFs(top) f.fuse_args.mountpoint = extra[0] if opt.debug: f.fuse_args.add('debug') if opt.foreground: f.fuse_args.setmod('foreground') print f.multithreaded f.multithreaded = False f.main() #!/usr/bin/env python from bup import git, options, client from bup.helpers import * optspec = """ [BUP_DIR=...] bup init [-r host:path] -- r,remote= remote repository path """ o = options.Options('bup init', optspec) (opt, flags, extra) = o.parse(sys.argv[1:]) if extra: o.fatal("no arguments expected") if opt.remote: git.init_repo() # local repo git.check_repo_or_die() cli = client.Client(opt.remote, create=True) cli.close() else: git.init_repo() #!/usr/bin/env python import sys, math, struct, glob from bup import options, git from bup.helpers import * PAGE_SIZE=4096 SHA_PER_PAGE=PAGE_SIZE/200. def merge(idxlist, bits, table): count = 0 for e in git.idxmerge(idxlist): count += 1 prefix = git.extract_bits(e, bits) table[prefix] = count yield e def do_midx(outdir, outfilename, infilenames): if not outfilename: assert(outdir) sum = Sha1('\0'.join(infilenames)).hexdigest() outfilename = '%s/midx-%s.midx' % (outdir, sum) inp = [] total = 0 for name in infilenames: ix = git.PackIdx(name) inp.append(ix) total += len(ix) log('Merging %d indexes (%d objects).\n' % (len(infilenames), total)) if (not opt.force and (total < 1024 and len(infilenames) < 3)) \ or (opt.force and not total): log('midx: nothing to do.\n') return pages = int(total/SHA_PER_PAGE) or 1 bits = int(math.ceil(math.log(pages, 2))) entries = 2**bits log('Table size: %d (%d bits)\n' % (entries*4, bits)) table = [0]*entries try: os.unlink(outfilename) except OSError: pass f = open(outfilename + '.tmp', 'w+') f.write('MIDX\0\0\0\2') f.write(struct.pack('!I', bits)) assert(f.tell() == 12) f.write('\0'*4*entries) for e in merge(inp, bits, table): f.write(e) f.write('\0'.join(os.path.basename(p) for p in infilenames)) f.seek(12) f.write(struct.pack('!%dI' % entries, *table)) f.close() os.rename(outfilename + '.tmp', outfilename) # this is just for testing if 0: p = git.PackMidx(outfilename) assert(len(p.idxnames) == len(infilenames)) print p.idxnames assert(len(p) == total) pi = iter(p) for i in merge(inp, total, bits, table): assert(i == pi.next()) assert(p.exists(i)) print outfilename optspec = """ bup midx [options...] -- o,output= output midx filename (default: auto-generated) a,auto automatically create .midx from any unindexed .idx files f,force automatically create .midx from *all* .idx files """ o = options.Options('bup midx', optspec) (opt, flags, extra) = o.parse(sys.argv[1:]) if extra and (opt.auto or opt.force): o.fatal("you can't use -f/-a and also provide filenames") git.check_repo_or_die() if extra: do_midx(git.repo('objects/pack'), opt.output, extra) elif opt.auto or opt.force: paths = [git.repo('objects/pack')] paths += glob.glob(git.repo('index-cache/*/.')) for path in paths: log('midx: scanning %s\n' % path) if opt.force: do_midx(path, opt.output, glob.glob('%s/*.idx' % path)) elif opt.auto: m = git.PackIdxList(path) needed = {} for pack in m.packs: # only .idx files without a .midx are open if pack.name.endswith('.idx'): needed[pack.name] = 1 del m do_midx(path, opt.output, needed.keys()) log('\n') else: o.fatal("you must use -f or -a or provide input filenames") #!/usr/bin/env python import sys, os, random from bup import options from bup.helpers import * def randblock(n): l = [] for i in xrange(n): l.append(chr(random.randrange(0,256))) return ''.join(l) optspec = """ bup damage [-n count] [-s maxsize] [-S seed] -- WARNING: THIS COMMAND IS EXTREMELY DANGEROUS n,num= number of blocks to damage s,size= maximum size of each damaged block percent= maximum size of each damaged block (as a percent of entire file) equal spread damage evenly throughout the file S,seed= random number seed (for repeatable tests) """ o = options.Options('bup damage', optspec) (opt, flags, extra) = o.parse(sys.argv[1:]) if not extra: o.fatal('filenames expected') if opt.seed != None: random.seed(opt.seed) for name in extra: log('Damaging "%s"...\n' % name) f = open(name, 'r+b') st = os.fstat(f.fileno()) size = st.st_size if opt.percent or opt.size: ms1 = int(float(opt.percent or 0)/100.0*size) or size ms2 = opt.size or size maxsize = min(ms1, ms2) else: maxsize = 1 chunks = opt.num or 10 chunksize = size/chunks for r in range(chunks): sz = random.randrange(1, maxsize+1) if sz > size: sz = size if opt.equal: ofs = r*chunksize else: ofs = random.randrange(0, size - sz + 1) log(' %6d bytes at %d\n' % (sz, ofs)) f.seek(ofs) f.write(randblock(sz)) f.close() #!/usr/bin/env python import sys, struct, mmap from bup import options, git from bup.helpers import * suspended_w = None def init_dir(conn, arg): git.init_repo(arg) log('bup server: bupdir initialized: %r\n' % git.repodir) conn.ok() def set_dir(conn, arg): git.check_repo_or_die(arg) log('bup server: bupdir is %r\n' % git.repodir) conn.ok() def list_indexes(conn, junk): git.check_repo_or_die() for f in os.listdir(git.repo('objects/pack')): if f.endswith('.idx'): conn.write('%s\n' % f) conn.ok() def send_index(conn, name): git.check_repo_or_die() assert(name.find('/') < 0) assert(name.endswith('.idx')) idx = git.PackIdx(git.repo('objects/pack/%s' % name)) conn.write(struct.pack('!I', len(idx.map))) conn.write(idx.map) conn.ok() def receive_objects(conn, junk): global suspended_w git.check_repo_or_die() suggested = {} if suspended_w: w = suspended_w suspended_w = None else: w = git.PackWriter() while 1: ns = conn.read(4) if not ns: w.abort() raise Exception('object read: expected length header, got EOF\n') n = struct.unpack('!I', ns)[0] #log('expecting %d bytes\n' % n) if not n: log('bup server: received %d object%s.\n' % (w.count, w.count!=1 and "s" or '')) fullpath = w.close() if fullpath: (dir, name) = os.path.split(fullpath) conn.write('%s.idx\n' % name) conn.ok() return elif n == 0xffffffff: log('bup server: receive-objects suspended.\n') suspended_w = w conn.ok() return buf = conn.read(n) # object sizes in bup are reasonably small #log('read %d bytes\n' % n) if len(buf) < n: w.abort() raise Exception('object read: expected %d bytes, got %d\n' % (n, len(buf))) (type, content) = git._decode_packobj(buf) sha = git.calc_hash(type, content) oldpack = w.exists(sha) # FIXME: we only suggest a single index per cycle, because the client # is currently dumb to download more than one per cycle anyway. # Actually we should fix the client, but this is a minor optimization # on the server side. if not suggested and \ oldpack and (oldpack == True or oldpack.endswith('.midx')): # FIXME: we shouldn't really have to know about midx files # at this layer. But exists() on a midx doesn't return the # packname (since it doesn't know)... probably we should just # fix that deficiency of midx files eventually, although it'll # make the files bigger. This method is certainly not very # efficient. w.objcache.refresh(skip_midx = True) oldpack = w.objcache.exists(sha) log('new suggestion: %r\n' % oldpack) assert(oldpack) assert(oldpack != True) assert(not oldpack.endswith('.midx')) w.objcache.refresh(skip_midx = False) if not suggested and oldpack: assert(oldpack.endswith('.idx')) (dir,name) = os.path.split(oldpack) if not (name in suggested): log("bup server: suggesting index %s\n" % name) conn.write('index %s\n' % name) suggested[name] = 1 else: w._raw_write([buf]) # NOTREACHED def read_ref(conn, refname): git.check_repo_or_die() r = git.read_ref(refname) conn.write('%s\n' % (r or '').encode('hex')) conn.ok() def update_ref(conn, refname): git.check_repo_or_die() newval = conn.readline().strip() oldval = conn.readline().strip() git.update_ref(refname, newval.decode('hex'), oldval.decode('hex')) conn.ok() def cat(conn, id): git.check_repo_or_die() try: for blob in git.cat(id): conn.write(struct.pack('!I', len(blob))) conn.write(blob) except KeyError, e: log('server: error: %s\n' % e) conn.write('\0\0\0\0') conn.error(e) else: conn.write('\0\0\0\0') conn.ok() optspec = """ bup server """ o = options.Options('bup server', optspec) (opt, flags, extra) = o.parse(sys.argv[1:]) if extra: o.fatal('no arguments expected') log('bup server: reading from stdin.\n') commands = { 'init-dir': init_dir, 'set-dir': set_dir, 'list-indexes': list_indexes, 'send-index': send_index, 'receive-objects': receive_objects, 'read-ref': read_ref, 'update-ref': update_ref, 'cat': cat, } # FIXME: this protocol is totally lame and not at all future-proof. # (Especially since we abort completely as soon as *anything* bad happens) conn = Conn(sys.stdin, sys.stdout) lr = linereader(conn) for _line in lr: line = _line.strip() if not line: continue log('bup server: command: %r\n' % line) words = line.split(' ', 1) cmd = words[0] rest = len(words)>1 and words[1] or '' if cmd == 'quit': break else: cmd = commands.get(cmd) if cmd: cmd(conn, rest) else: raise Exception('unknown server command: %r\n' % line) log('bup server: done\n') #!/usr/bin/env python import sys, time, struct from bup import hashsplit, git, options, client from bup.helpers import * from subprocess import PIPE optspec = """ bup join [-r host:path] [refs or hashes...] -- r,remote= remote repository path """ o = options.Options('bup join', optspec) (opt, flags, extra) = o.parse(sys.argv[1:]) git.check_repo_or_die() if not extra: extra = linereader(sys.stdin) ret = 0 if opt.remote: cli = client.Client(opt.remote) cat = cli.cat else: cp = git.CatPipe() cat = cp.join for id in extra: try: for blob in cat(id): sys.stdout.write(blob) except KeyError, e: sys.stdout.flush() log('error: %s\n' % e) ret = 1 sys.exit(ret) #!/usr/bin/env python import sys, re, errno, stat, time, math from bup import hashsplit, git, options, index, client from bup.helpers import * optspec = """ bup save [-tc] [-n name] -- r,remote= remote repository path t,tree output a tree id c,commit output a commit id n,name= name of backup set to update (if any) v,verbose increase log output (can be used more than once) q,quiet don't show progress meter smaller= only back up files smaller than n bytes """ o = options.Options('bup save', optspec) (opt, flags, extra) = o.parse(sys.argv[1:]) git.check_repo_or_die() if not (opt.tree or opt.commit or opt.name): o.fatal("use one or more of -t, -c, -n") if not extra: o.fatal("no filenames given") opt.progress = (istty and not opt.quiet) opt.smaller = parse_num(opt.smaller or 0) is_reverse = os.environ.get('BUP_SERVER_REVERSE') if is_reverse and opt.remote: o.fatal("don't use -r in reverse mode; it's automatic") refname = opt.name and 'refs/heads/%s' % opt.name or None if opt.remote or is_reverse: cli = client.Client(opt.remote) oldref = refname and cli.read_ref(refname) or None w = cli.new_packwriter() else: cli = None oldref = refname and git.read_ref(refname) or None w = git.PackWriter() handle_ctrl_c() def eatslash(dir): if dir.endswith('/'): return dir[:-1] else: return dir parts = [''] shalists = [[]] def _push(part): assert(part) parts.append(part) shalists.append([]) def _pop(force_tree): assert(len(parts) >= 1) part = parts.pop() shalist = shalists.pop() tree = force_tree or w.new_tree(shalist) if shalists: shalists[-1].append(('40000', part, tree)) else: # this was the toplevel, so put it back for sanity shalists.append(shalist) return tree lastremain = None def progress_report(n): global count, subcount, lastremain subcount += n cc = count + subcount pct = total and (cc*100.0/total) or 0 now = time.time() elapsed = now - tstart kps = elapsed and int(cc/1024./elapsed) kps_frac = 10 ** int(math.log(kps+1, 10) - 1) kps = int(kps/kps_frac)*kps_frac if cc: remain = elapsed*1.0/cc * (total-cc) else: remain = 0.0 if (lastremain and (remain > lastremain) and ((remain - lastremain)/lastremain < 0.05)): remain = lastremain else: lastremain = remain hours = int(remain/60/60) mins = int(remain/60 - hours*60) secs = int(remain - hours*60*60 - mins*60) if elapsed < 30: remainstr = '' kpsstr = '' else: kpsstr = '%dk/s' % kps if hours: remainstr = '%dh%dm' % (hours, mins) elif mins: remainstr = '%dm%d' % (mins, secs) else: remainstr = '%ds' % secs progress('Saving: %.2f%% (%d/%dk, %d/%d files) %s %s\r' % (pct, cc/1024, total/1024, fcount, ftotal, remainstr, kpsstr)) r = index.Reader(git.repo('bupindex')) def already_saved(ent): return ent.is_valid() and w.exists(ent.sha) and ent.sha def wantrecurse_pre(ent): return not already_saved(ent) def wantrecurse_during(ent): return not already_saved(ent) or ent.sha_missing() total = ftotal = 0 if opt.progress: for (transname,ent) in r.filter(extra, wantrecurse=wantrecurse_pre): if not (ftotal % 10024): progress('Reading index: %d\r' % ftotal) exists = ent.exists() hashvalid = already_saved(ent) ent.set_sha_missing(not hashvalid) if not opt.smaller or ent.size < opt.smaller: if exists and not hashvalid: total += ent.size ftotal += 1 progress('Reading index: %d, done.\n' % ftotal) hashsplit.progress_callback = progress_report tstart = time.time() count = subcount = fcount = 0 lastskip_name = None lastdir = '' for (transname,ent) in r.filter(extra, wantrecurse=wantrecurse_during): (dir, file) = os.path.split(ent.name) exists = (ent.flags & index.IX_EXISTS) hashvalid = already_saved(ent) wasmissing = ent.sha_missing() oldsize = ent.size if opt.verbose: if not exists: status = 'D' elif not hashvalid: if ent.sha == index.EMPTY_SHA: status = 'A' else: status = 'M' else: status = ' ' if opt.verbose >= 2: log('%s %-70s\n' % (status, ent.name)) elif not stat.S_ISDIR(ent.mode) and lastdir != dir: if not lastdir.startswith(dir): log('%s %-70s\n' % (status, os.path.join(dir, ''))) lastdir = dir if opt.progress: progress_report(0) fcount += 1 if not exists: continue if opt.smaller and ent.size >= opt.smaller: if exists and not hashvalid: add_error('skipping large file "%s"' % ent.name) lastskip_name = ent.name continue assert(dir.startswith('/')) dirp = dir.split('/') while parts > dirp: _pop(force_tree = None) if dir != '/': for part in dirp[len(parts):]: _push(part) if not file: # no filename portion means this is a subdir. But # sub/parentdirectories already handled in the pop/push() part above. oldtree = already_saved(ent) # may be None newtree = _pop(force_tree = oldtree) if not oldtree: if lastskip_name and lastskip_name.startswith(ent.name): ent.invalidate() else: ent.validate(040000, newtree) ent.repack() if exists and wasmissing: count += oldsize continue # it's not a directory id = None if hashvalid: mode = '%o' % ent.gitmode id = ent.sha shalists[-1].append((mode, git.mangle_name(file, ent.mode, ent.gitmode), id)) else: if stat.S_ISREG(ent.mode): try: f = hashsplit.open_noatime(ent.name) except IOError, e: add_error(e) lastskip_name = ent.name except OSError, e: add_error(e) lastskip_name = ent.name else: (mode, id) = hashsplit.split_to_blob_or_tree(w, [f]) else: if stat.S_ISDIR(ent.mode): assert(0) # handled above elif stat.S_ISLNK(ent.mode): try: rl = os.readlink(ent.name) except OSError, e: add_error(e) lastskip_name = ent.name except IOError, e: add_error(e) lastskip_name = ent.name else: (mode, id) = ('120000', w.new_blob(rl)) else: add_error(Exception('skipping special file "%s"' % ent.name)) lastskip_name = ent.name if id: ent.validate(int(mode, 8), id) ent.repack() shalists[-1].append((mode, git.mangle_name(file, ent.mode, ent.gitmode), id)) if exists and wasmissing: count += oldsize subcount = 0 if opt.progress: pct = total and count*100.0/total or 100 progress('Saving: %.2f%% (%d/%dk, %d/%d files), done. \n' % (pct, count/1024, total/1024, fcount, ftotal)) while len(parts) > 1: _pop(force_tree = None) assert(len(shalists) == 1) tree = w.new_tree(shalists[-1]) if opt.tree: print tree.encode('hex') if opt.commit or opt.name: msg = 'bup save\n\nGenerated by command:\n%r' % sys.argv ref = opt.name and ('refs/heads/%s' % opt.name) or None commit = w.new_commit(oldref, tree, msg) if opt.commit: print commit.encode('hex') w.close() # must close before we can update the ref if opt.name: if cli: cli.update_ref(refname, commit, oldref) else: git.update_ref(refname, commit, oldref) if cli: cli.close() if saved_errors: log('WARNING: %d errors encountered while saving.\n' % len(saved_errors)) sys.exit(1) #!/usr/bin/env python import sys, time from bup import options optspec = """ bup tick """ o = options.Options('bup tick', optspec) (opt, flags, extra) = o.parse(sys.argv[1:]) if extra: o.fatal("no arguments expected") t = time.time() tleft = 1 - (t - int(t)) time.sleep(tleft) #!/usr/bin/env python import os, sys, stat, time from bup import options, git, index, drecurse from bup.helpers import * def merge_indexes(out, r1, r2): for e in index.MergeIter([r1, r2]): # FIXME: shouldn't we remove deleted entries eventually? When? out.add_ixentry(e) class IterHelper: def __init__(self, l): self.i = iter(l) self.cur = None self.next() def next(self): try: self.cur = self.i.next() except StopIteration: self.cur = None return self.cur def check_index(reader): try: log('check: checking forward iteration...\n') e = None d = {} for e in reader.forward_iter(): if e.children_n: if opt.verbose: log('%08x+%-4d %r\n' % (e.children_ofs, e.children_n, e.name)) assert(e.children_ofs) assert(e.name.endswith('/')) assert(not d.get(e.children_ofs)) d[e.children_ofs] = 1 if e.flags & index.IX_HASHVALID: assert(e.sha != index.EMPTY_SHA) assert(e.gitmode) assert(not e or e.name == '/') # last entry is *always* / log('check: checking normal iteration...\n') last = None for e in reader: if last: assert(last > e.name) last = e.name except: log('index error! at %r\n' % e) raise log('check: passed.\n') def update_index(top): ri = index.Reader(indexfile) wi = index.Writer(indexfile) rig = IterHelper(ri.iter(name=top)) tstart = int(time.time()) hashgen = None if opt.fake_valid: def hashgen(name): return (0100644, index.FAKE_SHA) total = 0 for (path,pst) in drecurse.recursive_dirlist([top], xdev=opt.xdev): if opt.verbose>=2 or (opt.verbose==1 and stat.S_ISDIR(pst.st_mode)): sys.stdout.write('%s\n' % path) sys.stdout.flush() progress('Indexing: %d\r' % total) elif not (total % 128): progress('Indexing: %d\r' % total) total += 1 while rig.cur and rig.cur.name > path: # deleted paths if rig.cur.exists(): rig.cur.set_deleted() rig.cur.repack() rig.next() if rig.cur and rig.cur.name == path: # paths that already existed if pst: rig.cur.from_stat(pst, tstart) if not (rig.cur.flags & index.IX_HASHVALID): if hashgen: (rig.cur.gitmode, rig.cur.sha) = hashgen(path) rig.cur.flags |= index.IX_HASHVALID if opt.fake_invalid: rig.cur.invalidate() rig.cur.repack() rig.next() else: # new paths wi.add(path, pst, hashgen = hashgen) progress('Indexing: %d, done.\n' % total) if ri.exists(): ri.save() wi.flush() if wi.count: wr = wi.new_reader() if opt.check: log('check: before merging: oldfile\n') check_index(ri) log('check: before merging: newfile\n') check_index(wr) mi = index.Writer(indexfile) merge_indexes(mi, ri, wr) ri.close() mi.close() wr.close() wi.abort() else: wi.close() optspec = """ bup index <-p|m|u> [options...] -- p,print print the index entries for the given names (also works with -u) m,modified print only added/deleted/modified files (implies -p) s,status print each filename with a status char (A/M/D) (implies -p) H,hash print the hash for each object next to its name (implies -p) l,long print more information about each file u,update (recursively) update the index entries for the given filenames x,xdev,one-file-system don't cross filesystem boundaries fake-valid mark all index entries as up-to-date even if they aren't fake-invalid mark all index entries as invalid check carefully check index file integrity f,indexfile= the name of the index file (default 'index') v,verbose increase log output (can be used more than once) """ o = options.Options('bup index', optspec) (opt, flags, extra) = o.parse(sys.argv[1:]) if not (opt.modified or opt['print'] or opt.status or opt.update or opt.check): o.fatal('supply one or more of -p, -s, -m, -u, or --check') if (opt.fake_valid or opt.fake_invalid) and not opt.update: o.fatal('--fake-{in,}valid are meaningless without -u') if opt.fake_valid and opt.fake_invalid: o.fatal('--fake-valid is incompatible with --fake-invalid') git.check_repo_or_die() indexfile = opt.indexfile or git.repo('bupindex') handle_ctrl_c() if opt.check: log('check: starting initial check.\n') check_index(index.Reader(indexfile)) paths = index.reduce_paths(extra) if opt.update: if not paths: o.fatal('update (-u) requested but no paths given') for (rp,path) in paths: update_index(rp) if opt['print'] or opt.status or opt.modified: for (name, ent) in index.Reader(indexfile).filter(extra or ['']): if (opt.modified and (ent.is_valid() or ent.is_deleted() or not ent.mode)): continue line = '' if opt.status: if ent.is_deleted(): line += 'D ' elif not ent.is_valid(): if ent.sha == index.EMPTY_SHA: line += 'A ' else: line += 'M ' else: line += ' ' if opt.hash: line += ent.sha.encode('hex') + ' ' if opt.long: line += "%7s %7s " % (oct(ent.mode), oct(ent.gitmode)) print line + (name or './') if opt.check and (opt['print'] or opt.status or opt.modified or opt.update): log('check: starting final check.\n') check_index(index.Reader(indexfile)) if saved_errors: log('WARNING: %d errors encountered.\n' % len(saved_errors)) sys.exit(1) #!/usr/bin/env python import sys, os, struct from bup import options, helpers optspec = """ bup rbackup-server -- This command is not intended to be run manually. """ o = options.Options('bup rbackup-server', optspec) (opt, flags, extra) = o.parse(sys.argv[1:]) if extra: o.fatal('no arguments expected') # get the subcommand's argv. # Normally we could just pass this on the command line, but since we'll often # be getting called on the other end of an ssh pipe, which tends to mangle # argv (by sending it via the shell), this way is much safer. buf = sys.stdin.read(4) sz = struct.unpack('!I', buf)[0] assert(sz > 0) assert(sz < 1000000) buf = sys.stdin.read(sz) assert(len(buf) == sz) argv = buf.split('\0') # stdin/stdout are supposedly connected to 'bup server' that the caller # started for us (often on the other end of an ssh tunnel), so we don't want # to misuse them. Move them out of the way, then replace stdout with # a pointer to stderr in case our subcommand wants to do something with it. # # It might be nice to do the same with stdin, but my experiments showed that # ssh seems to make its child's stderr a readable-but-never-reads-anything # socket. They really should have used shutdown(SHUT_WR) on the other end # of it, but probably didn't. Anyway, it's too messy, so let's just make sure # anyone reading from stdin is disappointed. # # (You can't just leave stdin/stdout "not open" by closing the file # descriptors. Then the next file that opens is automatically assigned 0 or 1, # and people *trying* to read/write stdin/stdout get screwed.) os.dup2(0, 3) os.dup2(1, 4) os.dup2(2, 1) fd = os.open('/dev/null', os.O_RDONLY) os.dup2(fd, 0) os.close(fd) os.environ['BUP_SERVER_REVERSE'] = helpers.hostname() os.execvp(argv[0], argv) sys.exit(99) #!/usr/bin/env python import sys, os, glob, subprocess, time from bup import options, git from bup.helpers import * par2_ok = 0 nullf = open('/dev/null') def debug(s): if opt.verbose: log(s) def run(argv): # at least in python 2.5, using "stdout=2" or "stdout=sys.stderr" below # doesn't actually work, because subprocess closes fd #2 right before # execing for some reason. So we work around it by duplicating the fd # first. fd = os.dup(2) # copy stderr try: p = subprocess.Popen(argv, stdout=fd, close_fds=False) return p.wait() finally: os.close(fd) def par2_setup(): global par2_ok rv = 1 try: p = subprocess.Popen(['par2', '--help'], stdout=nullf, stderr=nullf, stdin=nullf) rv = p.wait() except OSError: log('fsck: warning: par2 not found; disabling recovery features.\n') else: par2_ok = 1 def parv(lvl): if opt.verbose >= lvl: if istty: return [] else: return ['-q'] else: return ['-qq'] def par2_generate(base): return run(['par2', 'create', '-n1', '-c200'] + parv(2) + ['--', base, base+'.pack', base+'.idx']) def par2_verify(base): return run(['par2', 'verify'] + parv(3) + ['--', base]) def par2_repair(base): return run(['par2', 'repair'] + parv(2) + ['--', base]) def quick_verify(base): f = open(base + '.pack', 'rb') f.seek(-20, 2) wantsum = f.read(20) assert(len(wantsum) == 20) f.seek(0) sum = Sha1() for b in chunkyreader(f, os.fstat(f.fileno()).st_size - 20): sum.update(b) if sum.digest() != wantsum: raise ValueError('expected %r, got %r' % (wantsum.encode('hex'), sum.hexdigest())) def git_verify(base): if opt.quick: try: quick_verify(base) except Exception, e: debug('error: %s\n' % e) return 1 return 0 else: return run(['git', 'verify-pack', '--', base]) def do_pack(base, last): code = 0 if par2_ok and par2_exists and (opt.repair or not opt.generate): vresult = par2_verify(base) if vresult != 0: if opt.repair: rresult = par2_repair(base) if rresult != 0: print '%s par2 repair: failed (%d)' % (last, rresult) code = rresult else: print '%s par2 repair: succeeded (0)' % last code = 100 else: print '%s par2 verify: failed (%d)' % (last, vresult) code = vresult else: print '%s ok' % last elif not opt.generate or (par2_ok and not par2_exists): gresult = git_verify(base) if gresult != 0: print '%s git verify: failed (%d)' % (last, gresult) code = gresult else: if par2_ok and opt.generate: presult = par2_generate(base) if presult != 0: print '%s par2 create: failed (%d)' % (last, presult) code = presult else: print '%s ok' % last else: print '%s ok' % last else: assert(opt.generate and (not par2_ok or par2_exists)) debug(' skipped: par2 file already generated.\n') return code optspec = """ bup fsck [options...] [filenames...] -- r,repair attempt to repair errors using par2 (dangerous!) g,generate generate auto-repair information using par2 v,verbose increase verbosity (can be used more than once) quick just check pack sha1sum, don't use git verify-pack j,jobs= run 'n' jobs in parallel par2-ok immediately return 0 if par2 is ok, 1 if not disable-par2 ignore par2 even if it is available """ o = options.Options('bup fsck', optspec) (opt, flags, extra) = o.parse(sys.argv[1:]) par2_setup() if opt.par2_ok: if par2_ok: sys.exit(0) # 'true' in sh else: sys.exit(1) if opt.disable_par2: par2_ok = 0 git.check_repo_or_die() if not extra: debug('fsck: No filenames given: checking all packs.\n') extra = glob.glob(git.repo('objects/pack/*.pack')) code = 0 count = 0 outstanding = {} for name in extra: if name.endswith('.pack'): base = name[:-5] elif name.endswith('.idx'): base = name[:-4] elif name.endswith('.par2'): base = name[:-5] elif os.path.exists(name + '.pack'): base = name else: raise Exception('%s is not a pack file!' % name) (dir,last) = os.path.split(base) par2_exists = os.path.exists(base + '.par2') if par2_exists and os.stat(base + '.par2').st_size == 0: par2_exists = 0 sys.stdout.flush() debug('fsck: checking %s (%s)\n' % (last, par2_ok and par2_exists and 'par2' or 'git')) if not opt.verbose: progress('fsck (%d/%d)\r' % (count, len(extra))) if not opt.jobs: nc = do_pack(base, last) code = code or nc count += 1 else: while len(outstanding) >= opt.jobs: (pid,nc) = os.wait() nc >>= 8 if pid in outstanding: del outstanding[pid] code = code or nc count += 1 pid = os.fork() if pid: # parent outstanding[pid] = 1 else: # child try: sys.exit(do_pack(base, last)) except Exception, e: log('exception: %r\n' % e) sys.exit(99) while len(outstanding): (pid,nc) = os.wait() nc >>= 8 if pid in outstanding: del outstanding[pid] code = code or nc count += 1 if not opt.verbose: progress('fsck (%d/%d)\r' % (count, len(extra))) if not opt.verbose and istty: log('fsck done. \n') sys.exit(code) #!/usr/bin/env python import sys, os, struct, getopt, subprocess, signal from bup import options, ssh from bup.helpers import * optspec = """ bup rbackup index ... bup rbackup save ... bup rbackup split ... """ o = options.Options('bup rbackup', optspec, optfunc=getopt.getopt) (opt, flags, extra) = o.parse(sys.argv[1:]) if len(extra) < 2: o.fatal('arguments expected') class SigException(Exception): def __init__(self, signum): self.signum = signum Exception.__init__(self, 'signal %d received' % signum) def handler(signum, frame): raise SigException(signum) signal.signal(signal.SIGTERM, handler) signal.signal(signal.SIGINT, handler) sp = None p = None ret = 99 try: hostname = extra[0] argv = extra[1:] p = ssh.connect(hostname, 'rbackup-server') argvs = '\0'.join(['bup'] + argv) p.stdin.write(struct.pack('!I', len(argvs)) + argvs) p.stdin.flush() main_exe = os.environ.get('BUP_MAIN_EXE') or sys.argv[0] sp = subprocess.Popen([main_exe, 'server'], stdin=p.stdout, stdout=p.stdin) p.stdin.close() p.stdout.close() finally: while 1: # if we get a signal while waiting, we have to keep waiting, just # in case our child doesn't die. try: ret = p.wait() sp.wait() break except SigException, e: log('\nbup rbackup: %s\n' % e) os.kill(p.pid, e.signum) ret = 84 sys.exit(ret) #!/usr/bin/env python import sys, os, re from bup import options optspec = """ bup newliner """ o = options.Options('bup newliner', optspec) (opt, flags, extra) = o.parse(sys.argv[1:]) if extra: o.fatal("no arguments expected") r = re.compile(r'([\r\n])') lastlen = 0 all = '' while 1: l = r.split(all, 1) if len(l) <= 1: try: b = os.read(sys.stdin.fileno(), 4096) except KeyboardInterrupt: break if not b: break all += b else: assert(len(l) == 3) (line, splitchar, all) = l #splitchar = '\n' sys.stdout.write('%-*s%s' % (lastlen, line, splitchar)) if splitchar == '\r': lastlen = len(line) else: lastlen = 0 sys.stdout.flush() if lastlen or all: sys.stdout.write('%-*s\n' % (lastlen, all)) #!/usr/bin/env python import sys from bup import options, git, _hashsplit from bup.helpers import * optspec = """ bup margin """ o = options.Options('bup margin', optspec) (opt, flags, extra) = o.parse(sys.argv[1:]) if extra: o.fatal("no arguments expected") git.check_repo_or_die() #git.ignore_midx = 1 mi = git.PackIdxList(git.repo('objects/pack')) last = '\0'*20 longmatch = 0 for i in mi: if i == last: continue #assert(str(i) >= last) pm = _hashsplit.bitmatch(last, i) longmatch = max(longmatch, pm) last = i print longmatch