3 bup_python="$(dirname "$0")/bup-python" || exit $?
4 exec "$bup_python" "$0" ${1+"$@"}
7 import sys, math, struct, glob, resource
9 from bup import options, git, midx, _helpers, xstat
10 from bup.helpers import *
13 SHA_PER_PAGE=PAGE_SIZE/20.
16 bup midx [options...] <idxnames...>
18 o,output= output midx filename (default: auto-generated)
19 a,auto automatically use all existing .midx/.idx files as input
20 f,force merge produce exactly one .midx containing all objects
21 p,print print names of generated midx files
22 check validate contents of the given midx files (with -a, all midx files)
23 max-files= maximum number of idx files to open at once [-1]
24 d,dir= directory containing idx/midx files
27 merge_into = _helpers.merge_into
31 for i in xrange(0, len(l), count):
36 mf = min(resource.getrlimit(resource.RLIMIT_NOFILE))
38 mf -= 20 # just a safety margin
40 mf -= 6 # minimum safety margin
45 nicename = git.repo_rel(name)
46 log('Checking %s.\n' % nicename)
48 ix = git.open_idx(name)
49 except git.GitError as e:
50 add_error('%s: %s' % (name, e))
52 for count,subname in enumerate(ix.idxnames):
53 sub = git.open_idx(os.path.join(os.path.dirname(name), subname))
54 for ecount,e in enumerate(sub):
55 if not (ecount % 1234):
56 qprogress(' %d/%d: %s %d/%d\r'
57 % (count, len(ix.idxnames),
58 git.shorten_hash(subname), ecount, len(sub)))
60 add_error("%s: %s: %s missing from idx"
61 % (nicename, git.shorten_hash(subname),
62 str(e).encode('hex')))
64 add_error("%s: %s: %s missing from midx"
65 % (nicename, git.shorten_hash(subname),
66 str(e).encode('hex')))
68 for ecount,e in enumerate(ix):
69 if not (ecount % 1234):
70 qprogress(' Ordering: %d/%d\r' % (ecount, len(ix)))
72 add_error('%s: ordering error: %s < %s'
74 str(e).encode('hex'), str(prev).encode('hex')))
79 def _do_midx(outdir, outfilename, infilenames, prefixstr):
83 sum = Sha1('\0'.join(infilenames)).hexdigest()
84 outfilename = '%s/midx-%s.midx' % (outdir, sum)
91 for name in infilenames:
92 ix = git.open_idx(name)
98 isinstance(ix, midx.PackMidx) and ix.which_ofs or 0,
101 for n in ix.idxnames:
102 allfilenames.append(os.path.basename(n))
104 inp.sort(lambda x,y: cmp(str(y[0][y[2]:y[2]+20]),str(x[0][x[2]:x[2]+20])))
106 if not _first: _first = outdir
107 dirprefix = (_first != outdir) and git.repo_rel(outdir)+': ' or ''
108 debug1('midx: %s%screating from %d files (%d objects).\n'
109 % (dirprefix, prefixstr, len(infilenames), total))
110 if (opt.auto and (total < 1024 and len(infilenames) < 3)) \
111 or ((opt.auto or opt.force) and len(infilenames) < 2) \
112 or (opt.force and not total):
113 debug1('midx: nothing to do.\n')
116 pages = int(total/SHA_PER_PAGE) or 1
117 bits = int(math.ceil(math.log(pages, 2)))
119 debug1('midx: table size: %d (%d bits)\n' % (entries*4, bits))
122 with atomically_replaced_file(outfilename, 'wb') as f:
124 f.write(struct.pack('!II', midx.MIDX_VERSION, bits))
125 assert(f.tell() == 12)
127 f.truncate(12 + 4*entries + 20*total + 4*total)
129 fdatasync(f.fileno())
131 fmap = mmap_readwrite(f, close=False)
133 count = merge_into(fmap, bits, total, inp)
134 del fmap # Assume this calls msync() now.
135 f.seek(0, os.SEEK_END)
136 f.write('\0'.join(allfilenames))
139 if isinstance(ix, midx.PackMidx):
145 # This is just for testing (if you enable this, don't clear inp above)
147 p = midx.PackMidx(outfilename)
148 assert(len(p.idxnames) == len(infilenames))
150 assert(len(p) == total)
151 for pe, e in p, git.idxmerge(inp, final_progress=False):
156 return total, outfilename
159 def do_midx(outdir, outfilename, infilenames, prefixstr):
160 rv = _do_midx(outdir, outfilename, infilenames, prefixstr)
161 if rv and opt['print']:
165 def do_midx_dir(path, outfilename):
168 if opt.force and not opt.auto:
169 midxs = [] # don't use existing midx files
171 midxs = glob.glob('%s/*.midx' % path)
174 m = git.open_idx(mname)
175 contents[mname] = [('%s/%s' % (path,i)) for i in m.idxnames]
176 sizes[mname] = len(m)
178 # sort the biggest+newest midxes first, so that we can eliminate
179 # smaller (or older) redundant ones that come later in the list
180 midxs.sort(key=lambda ix: (-sizes[ix], -xstat.stat(ix).st_mtime))
184 for iname in contents[mname]:
185 if not already.get(iname):
189 debug1('%r is redundant\n' % mname)
193 midxs = [k for k in midxs if not already.get(k)]
194 idxs = [k for k in glob.glob('%s/*.idx' % path) if not already.get(k)]
197 i = git.open_idx(iname)
198 sizes[iname] = len(i)
200 all = [(sizes[n],n) for n in (midxs + idxs)]
202 # FIXME: what are the optimal values? Does this make sense?
203 DESIRED_HWM = opt.force and 1 or 5
204 DESIRED_LWM = opt.force and 1 or 2
205 existed = dict((name,1) for sz,name in all)
206 debug1('midx: %d indexes; want no more than %d.\n'
207 % (len(all), DESIRED_HWM))
208 if len(all) <= DESIRED_HWM:
209 debug1('midx: nothing to do.\n')
210 while len(all) > DESIRED_HWM:
212 part1 = [name for sz,name in all[:len(all)-DESIRED_LWM+1]]
213 part2 = all[len(all)-DESIRED_LWM+1:]
214 all = list(do_midx_group(path, outfilename, part1)) + part2
215 if len(all) > DESIRED_HWM:
216 debug1('\nStill too many indexes (%d > %d). Merging again.\n'
217 % (len(all), DESIRED_HWM))
221 if not existed.get(name):
225 def do_midx_group(outdir, outfilename, infiles):
226 groups = list(_group(infiles, opt.max_files))
228 for n,sublist in enumerate(groups):
230 gprefix = 'Group %d: ' % (n+1)
231 rv = _do_midx(outdir, outfilename, sublist, gprefix)
238 o = options.Options(optspec)
239 (opt, flags, extra) = o.parse(sys.argv[1:])
241 if extra and (opt.auto or opt.force):
242 o.fatal("you can't use -f/-a and also provide filenames")
243 if opt.check and (not extra and not opt.auto):
244 o.fatal("if using --check, you must provide filenames or -a")
246 git.check_repo_or_die()
248 if opt.max_files < 0:
249 opt.max_files = max_files()
250 assert(opt.max_files >= 5)
253 # check existing midx files
258 paths = opt.dir and [opt.dir] or git.all_packdirs()
260 debug1('midx: scanning %s\n' % path)
261 midxes += glob.glob(os.path.join(path, '*.midx'))
265 log('All tests passed.\n')
268 do_midx(git.repo('objects/pack'), opt.output, extra, '')
269 elif opt.auto or opt.force:
270 paths = opt.dir and [opt.dir] or git.all_packdirs()
272 debug1('midx: scanning %s\n' % path)
273 do_midx_dir(path, opt.output)
275 o.fatal("you must use -f or -a or provide input filenames")
278 log('WARNING: %d errors encountered.\n' % len(saved_errors))