3 bup_python="$(dirname "$0")/bup-python" || exit $?
4 exec "$bup_python" "$0" ${1+"$@"}
8 from __future__ import absolute_import
9 import glob, math, os, resource, struct, sys, tempfile
11 from bup import options, git, midx, _helpers, xstat
12 from bup.helpers import (Sha1, add_error, atomically_replaced_file, debug1, fdatasync,
13 handle_ctrl_c, log, mmap_readwrite, qprogress,
18 SHA_PER_PAGE=PAGE_SIZE/20.
21 bup midx [options...] <idxnames...>
23 o,output= output midx filename (default: auto-generated)
24 a,auto automatically use all existing .midx/.idx files as input
25 f,force merge produce exactly one .midx containing all objects
26 p,print print names of generated midx files
27 check validate contents of the given midx files (with -a, all midx files)
28 max-files= maximum number of idx files to open at once [-1]
29 d,dir= directory containing idx/midx files
32 merge_into = _helpers.merge_into
36 for i in xrange(0, len(l), count):
41 mf = min(resource.getrlimit(resource.RLIMIT_NOFILE))
43 mf -= 20 # just a safety margin
45 mf -= 6 # minimum safety margin
50 nicename = git.repo_rel(name)
51 log('Checking %s.\n' % nicename)
53 ix = git.open_idx(name)
54 except git.GitError as e:
55 add_error('%s: %s' % (name, e))
57 for count,subname in enumerate(ix.idxnames):
58 sub = git.open_idx(os.path.join(os.path.dirname(name), subname))
59 for ecount,e in enumerate(sub):
60 if not (ecount % 1234):
61 qprogress(' %d/%d: %s %d/%d\r'
62 % (count, len(ix.idxnames),
63 git.shorten_hash(subname), ecount, len(sub)))
65 add_error("%s: %s: %s missing from idx"
66 % (nicename, git.shorten_hash(subname),
67 str(e).encode('hex')))
69 add_error("%s: %s: %s missing from midx"
70 % (nicename, git.shorten_hash(subname),
71 str(e).encode('hex')))
73 for ecount,e in enumerate(ix):
74 if not (ecount % 1234):
75 qprogress(' Ordering: %d/%d\r' % (ecount, len(ix)))
77 add_error('%s: ordering error: %s < %s'
79 str(e).encode('hex'), str(prev).encode('hex')))
84 def _do_midx(outdir, outfilename, infilenames, prefixstr):
88 sum = Sha1('\0'.join(infilenames)).hexdigest()
89 outfilename = '%s/midx-%s.midx' % (outdir, sum)
96 for name in infilenames:
97 ix = git.open_idx(name)
103 isinstance(ix, midx.PackMidx) and ix.which_ofs or 0,
106 for n in ix.idxnames:
107 allfilenames.append(os.path.basename(n))
109 inp.sort(reverse=True, key=lambda x: str(x[0][x[2]:x[2]+20]))
111 if not _first: _first = outdir
112 dirprefix = (_first != outdir) and git.repo_rel(outdir)+': ' or ''
113 debug1('midx: %s%screating from %d files (%d objects).\n'
114 % (dirprefix, prefixstr, len(infilenames), total))
115 if (opt.auto and (total < 1024 and len(infilenames) < 3)) \
116 or ((opt.auto or opt.force) and len(infilenames) < 2) \
117 or (opt.force and not total):
118 debug1('midx: nothing to do.\n')
121 pages = int(total/SHA_PER_PAGE) or 1
122 bits = int(math.ceil(math.log(pages, 2)))
124 debug1('midx: table size: %d (%d bits)\n' % (entries*4, bits))
127 with atomically_replaced_file(outfilename, 'wb') as f:
129 f.write(struct.pack('!II', midx.MIDX_VERSION, bits))
130 assert(f.tell() == 12)
132 f.truncate(12 + 4*entries + 20*total + 4*total)
134 fdatasync(f.fileno())
136 fmap = mmap_readwrite(f, close=False)
138 count = merge_into(fmap, bits, total, inp)
139 del fmap # Assume this calls msync() now.
140 f.seek(0, os.SEEK_END)
141 f.write('\0'.join(allfilenames))
144 if isinstance(ix, midx.PackMidx):
150 # This is just for testing (if you enable this, don't clear inp above)
152 p = midx.PackMidx(outfilename)
153 assert(len(p.idxnames) == len(infilenames))
155 assert(len(p) == total)
156 for pe, e in p, git.idxmerge(inp, final_progress=False):
161 return total, outfilename
164 def do_midx(outdir, outfilename, infilenames, prefixstr):
165 rv = _do_midx(outdir, outfilename, infilenames, prefixstr)
166 if rv and opt['print']:
170 def do_midx_dir(path, outfilename):
173 if opt.force and not opt.auto:
174 midxs = [] # don't use existing midx files
176 midxs = glob.glob('%s/*.midx' % path)
179 m = git.open_idx(mname)
180 contents[mname] = [('%s/%s' % (path,i)) for i in m.idxnames]
181 sizes[mname] = len(m)
183 # sort the biggest+newest midxes first, so that we can eliminate
184 # smaller (or older) redundant ones that come later in the list
185 midxs.sort(key=lambda ix: (-sizes[ix], -xstat.stat(ix).st_mtime))
189 for iname in contents[mname]:
190 if not already.get(iname):
194 debug1('%r is redundant\n' % mname)
198 midxs = [k for k in midxs if not already.get(k)]
199 idxs = [k for k in glob.glob('%s/*.idx' % path) if not already.get(k)]
202 i = git.open_idx(iname)
203 sizes[iname] = len(i)
205 all = [(sizes[n],n) for n in (midxs + idxs)]
207 # FIXME: what are the optimal values? Does this make sense?
208 DESIRED_HWM = opt.force and 1 or 5
209 DESIRED_LWM = opt.force and 1 or 2
210 existed = dict((name,1) for sz,name in all)
211 debug1('midx: %d indexes; want no more than %d.\n'
212 % (len(all), DESIRED_HWM))
213 if len(all) <= DESIRED_HWM:
214 debug1('midx: nothing to do.\n')
215 while len(all) > DESIRED_HWM:
217 part1 = [name for sz,name in all[:len(all)-DESIRED_LWM+1]]
218 part2 = all[len(all)-DESIRED_LWM+1:]
219 all = list(do_midx_group(path, outfilename, part1)) + part2
220 if len(all) > DESIRED_HWM:
221 debug1('\nStill too many indexes (%d > %d). Merging again.\n'
222 % (len(all), DESIRED_HWM))
226 if not existed.get(name):
230 def do_midx_group(outdir, outfilename, infiles):
231 groups = list(_group(infiles, opt.max_files))
233 for n,sublist in enumerate(groups):
235 gprefix = 'Group %d: ' % (n+1)
236 rv = _do_midx(outdir, outfilename, sublist, gprefix)
243 o = options.Options(optspec)
244 (opt, flags, extra) = o.parse(sys.argv[1:])
246 if extra and (opt.auto or opt.force):
247 o.fatal("you can't use -f/-a and also provide filenames")
248 if opt.check and (not extra and not opt.auto):
249 o.fatal("if using --check, you must provide filenames or -a")
251 git.check_repo_or_die()
253 if opt.max_files < 0:
254 opt.max_files = max_files()
255 assert(opt.max_files >= 5)
258 # check existing midx files
263 paths = opt.dir and [opt.dir] or git.all_packdirs()
265 debug1('midx: scanning %s\n' % path)
266 midxes += glob.glob(os.path.join(path, '*.midx'))
270 log('All tests passed.\n')
273 do_midx(git.repo('objects/pack'), opt.output, extra, '')
274 elif opt.auto or opt.force:
275 paths = opt.dir and [opt.dir] or git.all_packdirs()
277 debug1('midx: scanning %s\n' % path)
278 do_midx_dir(path, opt.output)
280 o.fatal("you must use -f or -a or provide input filenames")
283 log('WARNING: %d errors encountered.\n' % len(saved_errors))