]> arthur.barton.de Git - bup.git/blob - cmd-midx.py
Update README.md to reflect recent developments.
[bup.git] / cmd-midx.py
1 #!/usr/bin/env python
2 import sys, math, struct, glob, sha
3 import options, git
4 from helpers import *
5
6 PAGE_SIZE=4096
7 SHA_PER_PAGE=PAGE_SIZE/200.
8
9
10 def next(it):
11     try:
12         return it.next()
13     except StopIteration:
14         return None
15     
16     
17 def merge(idxlist, total, bits, table):
18     iters = [iter(i) for i in idxlist]
19     iters = [[next(it), it] for it in iters]
20     count = 0
21     iters.sort()
22     while iters:
23         if (count % 10000) == 0:
24             log('\rMerging: %.2f%% (%d/%d)'
25                 % (count*100.0/total, count, total))
26         e = iters[0][0]
27         yield e
28         count += 1
29         prefix = git.extract_bits(e, bits)
30         table[prefix] = count
31         e = iters[0][0] = next(iters[0][1])
32         if not e:
33             iters = iters[1:]
34         else:
35             i = 1
36             while i < len(iters):
37                 if iters[i][0] > e:
38                     break
39                 i += 1
40             iters = iters[1:i] + [iters[0]] + iters[i:]
41     log('\rMerging: done.                                    \n')
42
43
44 def do_midx(outdir, outfilename, infilenames):
45     if not outfilename:
46         assert(outdir)
47         sum = sha.sha('\0'.join(infilenames)).hexdigest()
48         outfilename = '%s/midx-%s.midx' % (outdir, sum)
49     
50     inp = []
51     total = 0
52     for name in infilenames:
53         ix = git.PackIndex(name)
54         inp.append(ix)
55         total += len(ix)
56
57     if not total:
58         log('%s: no new .idx files: nothing to do.\n' % outdir)
59         return
60
61     log('Merging %d indexes (%d objects).\n' % (len(infilenames), total))
62     pages = total/SHA_PER_PAGE
63     bits = int(math.ceil(math.log(pages, 2)))
64     entries = 2**bits
65     log('table size: %d (%d bits)\n' % (entries*8, bits))
66     
67     table = [0]*entries
68
69     try:
70         os.unlink(outfilename)
71     except OSError:
72         pass
73     f = open(outfilename + '.tmp', 'w+')
74     f.write('MIDX\0\0\0\1')
75     f.write(struct.pack('!I', bits))
76     assert(f.tell() == 12)
77     f.write('\0'*8*entries)
78     
79     for e in merge(inp, total, bits, table):
80         f.write(e)
81         
82     f.write('\0'.join([os.path.basename(p) for p in infilenames]))
83
84     f.seek(12)
85     f.write(struct.pack('!%dQ' % entries, *table))
86     f.close()
87     os.rename(outfilename + '.tmp', outfilename)
88
89     # this is just for testing
90     if 0:
91         p = git.PackMidx(outfilename)
92         assert(len(p.idxnames) == len(infilenames))
93         print p.idxnames
94         assert(len(p) == total)
95         pi = iter(p)
96         for i in merge(inp, total, bits, table):
97             assert(i == pi.next())
98             assert(p.exists(i))
99
100     print outfilename
101
102 optspec = """
103 bup midx [options...] <idxnames...>
104 --
105 o,output=  output midx filename (default: auto-generated)
106 a,auto     automatically create .midx from any unindexed .idx files
107 f,force    automatically create .midx from *all* .idx files
108 """
109 o = options.Options('bup midx', optspec)
110 (opt, flags, extra) = o.parse(sys.argv[1:])
111
112 if extra and (opt.auto or opt.force):
113     log("bup midx: you can't use -f/-a and also provide filenames\n")
114     o.usage()
115
116 git.check_repo_or_die()
117
118 if extra:
119     do_midx(git.repo('objects/pack'), opt.output, extra)
120 elif opt.auto or opt.force:
121     paths = [git.repo('objects/pack')]
122     paths += glob.glob(git.repo('index-cache/*/.'))
123     if opt.force:
124         for path in paths:
125             do_midx(path, opt.output, glob.glob('%s/*.idx' % path))
126     elif opt.auto:
127         for path in paths:
128             m = git.MultiPackIndex(path)
129             needed = {}
130             for pack in m.packs:  # only .idx files without a .midx are open
131                 if pack.name.endswith('.idx'):
132                     needed[pack.name] = 1
133             del m
134             do_midx(path, opt.output, needed.keys())
135 else:
136     log("bup midx: you must use -f or -a or provide input filenames\n")
137     o.usage()