#!/usr/bin/env python
-import sys, math, struct
-import options, git
-from helpers import *
+import sys, math, struct, glob, sha
+from bup import options, git
+from bup.helpers import *
PAGE_SIZE=4096
SHA_PER_PAGE=PAGE_SIZE/200.
-def next(it):
- try:
- return it.next()
- except StopIteration:
- return None
-
+def merge(idxlist, bits, table):
+ count = 0
+ for e in git.idxmerge(idxlist):
+ count += 1
+ prefix = git.extract_bits(e, bits)
+ table[prefix] = count
+ yield e
+
+
+def do_midx(outdir, outfilename, infilenames):
+ if not outfilename:
+ assert(outdir)
+ sum = sha.sha('\0'.join(infilenames)).hexdigest()
+ outfilename = '%s/midx-%s.midx' % (outdir, sum)
-optspec = """
-bup midx -o outfile.midx <idxnames...>
---
-o,output= output midx file name
-"""
-o = options.Options('bup midx', optspec)
-(opt, flags, extra) = o.parse(sys.argv[1:])
+ inp = []
+ total = 0
+ for name in infilenames:
+ ix = git.PackIndex(name)
+ inp.append(ix)
+ total += len(ix)
-if not extra:
- log("bup midx: no input filenames given\n")
- o.usage()
-if not opt.output:
- log("bup midx: no output filename given\n")
- o.usage()
+ log('Merging %d indexes (%d objects).\n' % (len(infilenames), total))
+ if (not opt.force and (total < 1024 and len(infilenames) < 3)) \
+ or (opt.force and not total):
+ log('midx: nothing to do.\n')
+ return
+
+ pages = int(total/SHA_PER_PAGE) or 1
+ bits = int(math.ceil(math.log(pages, 2)))
+ entries = 2**bits
+ log('Table size: %d (%d bits)\n' % (entries*4, bits))
-inp = []
-total = 0
-for name in extra:
- ix = git.PackIndex(name)
- inp.append(ix)
- total += len(ix)
+ table = [0]*entries
+
+ try:
+ os.unlink(outfilename)
+ except OSError:
+ pass
+ f = open(outfilename + '.tmp', 'w+')
+ f.write('MIDX\0\0\0\2')
+ f.write(struct.pack('!I', bits))
+ assert(f.tell() == 12)
+ f.write('\0'*4*entries)
-log('total objects expected: %d\n' % total)
-pages = total/SHA_PER_PAGE
-log('pages: %d\n' % pages)
-bits = int(math.ceil(math.log(pages, 2)))
-log('table bits: %d\n' % bits)
-entries = 2**bits
-log('table entries: %d\n' % entries)
-log('table size: %d\n' % (entries*8))
+ for e in merge(inp, bits, table):
+ f.write(e)
+
+ f.write('\0'.join(os.path.basename(p) for p in infilenames))
-table = [0]*entries
+ f.seek(12)
+ f.write(struct.pack('!%dI' % entries, *table))
+ f.close()
+ os.rename(outfilename + '.tmp', outfilename)
-def merge(idxlist):
- iters = [iter(i) for i in inp]
- iters = [[next(it), it] for it in iters]
- count = 0
- while iters:
- if (count % 10000) == 0:
- log('\rMerging: %d/%d' % (count, total))
- e = min(iters) # FIXME: very slow for long lists
- assert(e[0])
- yield e[0]
- count += 1
- prefix = git.extract_bits(e[0], bits)
- table[prefix] = count
- e[0] = next(e[1])
- iters = filter(lambda x: x[0], iters)
- log('\rMerging: done. \n')
+ # this is just for testing
+ if 0:
+ p = git.PackMidx(outfilename)
+ assert(len(p.idxnames) == len(infilenames))
+ print p.idxnames
+ assert(len(p) == total)
+ pi = iter(p)
+ for i in merge(inp, total, bits, table):
+ assert(i == pi.next())
+ assert(p.exists(i))
-f = open(opt.output, 'w+')
-f.write('MIDX\0\0\0\1')
-f.write(struct.pack('!I', bits))
-assert(f.tell() == 12)
-f.write('\0'*8*entries)
+ print outfilename
-for e in merge(inp):
- f.write(e)
+optspec = """
+bup midx [options...] <idxnames...>
+--
+o,output= output midx filename (default: auto-generated)
+a,auto automatically create .midx from any unindexed .idx files
+f,force automatically create .midx from *all* .idx files
+"""
+o = options.Options('bup midx', optspec)
+(opt, flags, extra) = o.parse(sys.argv[1:])
-f.write('\0'.join([os.path.basename(p) for p in extra]))
+if extra and (opt.auto or opt.force):
+ o.fatal("you can't use -f/-a and also provide filenames")
-f.seek(12)
-f.write(struct.pack('!%dQ' % entries, *table))
-f.close()
+git.check_repo_or_die()
-# this is just for testing
-if 0:
- p = git.PackMidx(opt.output)
- assert(len(p.idxnames) == len(extra))
- print p.idxnames
- assert(len(p) == total)
- pi = iter(p)
- for i in merge(inp):
- assert(i == pi.next())
- assert(p.exists(i))
+if extra:
+ do_midx(git.repo('objects/pack'), opt.output, extra)
+elif opt.auto or opt.force:
+ paths = [git.repo('objects/pack')]
+ paths += glob.glob(git.repo('index-cache/*/.'))
+ for path in paths:
+ log('midx: scanning %s\n' % path)
+ if opt.force:
+ do_midx(path, opt.output, glob.glob('%s/*.idx' % path))
+ elif opt.auto:
+ m = git.MultiPackIndex(path)
+ needed = {}
+ for pack in m.packs: # only .idx files without a .midx are open
+ if pack.name.endswith('.idx'):
+ needed[pack.name] = 1
+ del m
+ do_midx(path, opt.output, needed.keys())
+ log('\n')
+else:
+ o.fatal("you must use -f or -a or provide input filenames")