3 bup_python="$(dirname "$0")/bup-python" || exit $?
4 exec "$bup_python" "$0" ${1+"$@"}
8 import glob, math, os, resource, struct, sys, tempfile
10 from bup import options, git, midx, _helpers, xstat
11 from bup.helpers import (Sha1, add_error, atomically_replaced_file, debug1, fdatasync,
12 handle_ctrl_c, log, mmap_readwrite, qprogress,
17 SHA_PER_PAGE=PAGE_SIZE/20.
20 bup midx [options...] <idxnames...>
22 o,output= output midx filename (default: auto-generated)
23 a,auto automatically use all existing .midx/.idx files as input
24 f,force merge produce exactly one .midx containing all objects
25 p,print print names of generated midx files
26 check validate contents of the given midx files (with -a, all midx files)
27 max-files= maximum number of idx files to open at once [-1]
28 d,dir= directory containing idx/midx files
31 merge_into = _helpers.merge_into
35 for i in xrange(0, len(l), count):
40 mf = min(resource.getrlimit(resource.RLIMIT_NOFILE))
42 mf -= 20 # just a safety margin
44 mf -= 6 # minimum safety margin
49 nicename = git.repo_rel(name)
50 log('Checking %s.\n' % nicename)
52 ix = git.open_idx(name)
53 except git.GitError as e:
54 add_error('%s: %s' % (name, e))
56 for count,subname in enumerate(ix.idxnames):
57 sub = git.open_idx(os.path.join(os.path.dirname(name), subname))
58 for ecount,e in enumerate(sub):
59 if not (ecount % 1234):
60 qprogress(' %d/%d: %s %d/%d\r'
61 % (count, len(ix.idxnames),
62 git.shorten_hash(subname), ecount, len(sub)))
64 add_error("%s: %s: %s missing from idx"
65 % (nicename, git.shorten_hash(subname),
66 str(e).encode('hex')))
68 add_error("%s: %s: %s missing from midx"
69 % (nicename, git.shorten_hash(subname),
70 str(e).encode('hex')))
72 for ecount,e in enumerate(ix):
73 if not (ecount % 1234):
74 qprogress(' Ordering: %d/%d\r' % (ecount, len(ix)))
76 add_error('%s: ordering error: %s < %s'
78 str(e).encode('hex'), str(prev).encode('hex')))
83 def _do_midx(outdir, outfilename, infilenames, prefixstr):
87 sum = Sha1('\0'.join(infilenames)).hexdigest()
88 outfilename = '%s/midx-%s.midx' % (outdir, sum)
95 for name in infilenames:
96 ix = git.open_idx(name)
102 isinstance(ix, midx.PackMidx) and ix.which_ofs or 0,
105 for n in ix.idxnames:
106 allfilenames.append(os.path.basename(n))
108 inp.sort(lambda x,y: cmp(str(y[0][y[2]:y[2]+20]),str(x[0][x[2]:x[2]+20])))
110 if not _first: _first = outdir
111 dirprefix = (_first != outdir) and git.repo_rel(outdir)+': ' or ''
112 debug1('midx: %s%screating from %d files (%d objects).\n'
113 % (dirprefix, prefixstr, len(infilenames), total))
114 if (opt.auto and (total < 1024 and len(infilenames) < 3)) \
115 or ((opt.auto or opt.force) and len(infilenames) < 2) \
116 or (opt.force and not total):
117 debug1('midx: nothing to do.\n')
120 pages = int(total/SHA_PER_PAGE) or 1
121 bits = int(math.ceil(math.log(pages, 2)))
123 debug1('midx: table size: %d (%d bits)\n' % (entries*4, bits))
126 with atomically_replaced_file(outfilename, 'wb') as f:
128 f.write(struct.pack('!II', midx.MIDX_VERSION, bits))
129 assert(f.tell() == 12)
131 f.truncate(12 + 4*entries + 20*total + 4*total)
133 fdatasync(f.fileno())
135 fmap = mmap_readwrite(f, close=False)
137 count = merge_into(fmap, bits, total, inp)
138 del fmap # Assume this calls msync() now.
139 f.seek(0, os.SEEK_END)
140 f.write('\0'.join(allfilenames))
143 if isinstance(ix, midx.PackMidx):
149 # This is just for testing (if you enable this, don't clear inp above)
151 p = midx.PackMidx(outfilename)
152 assert(len(p.idxnames) == len(infilenames))
154 assert(len(p) == total)
155 for pe, e in p, git.idxmerge(inp, final_progress=False):
160 return total, outfilename
163 def do_midx(outdir, outfilename, infilenames, prefixstr):
164 rv = _do_midx(outdir, outfilename, infilenames, prefixstr)
165 if rv and opt['print']:
169 def do_midx_dir(path, outfilename):
172 if opt.force and not opt.auto:
173 midxs = [] # don't use existing midx files
175 midxs = glob.glob('%s/*.midx' % path)
178 m = git.open_idx(mname)
179 contents[mname] = [('%s/%s' % (path,i)) for i in m.idxnames]
180 sizes[mname] = len(m)
182 # sort the biggest+newest midxes first, so that we can eliminate
183 # smaller (or older) redundant ones that come later in the list
184 midxs.sort(key=lambda ix: (-sizes[ix], -xstat.stat(ix).st_mtime))
188 for iname in contents[mname]:
189 if not already.get(iname):
193 debug1('%r is redundant\n' % mname)
197 midxs = [k for k in midxs if not already.get(k)]
198 idxs = [k for k in glob.glob('%s/*.idx' % path) if not already.get(k)]
201 i = git.open_idx(iname)
202 sizes[iname] = len(i)
204 all = [(sizes[n],n) for n in (midxs + idxs)]
206 # FIXME: what are the optimal values? Does this make sense?
207 DESIRED_HWM = opt.force and 1 or 5
208 DESIRED_LWM = opt.force and 1 or 2
209 existed = dict((name,1) for sz,name in all)
210 debug1('midx: %d indexes; want no more than %d.\n'
211 % (len(all), DESIRED_HWM))
212 if len(all) <= DESIRED_HWM:
213 debug1('midx: nothing to do.\n')
214 while len(all) > DESIRED_HWM:
216 part1 = [name for sz,name in all[:len(all)-DESIRED_LWM+1]]
217 part2 = all[len(all)-DESIRED_LWM+1:]
218 all = list(do_midx_group(path, outfilename, part1)) + part2
219 if len(all) > DESIRED_HWM:
220 debug1('\nStill too many indexes (%d > %d). Merging again.\n'
221 % (len(all), DESIRED_HWM))
225 if not existed.get(name):
229 def do_midx_group(outdir, outfilename, infiles):
230 groups = list(_group(infiles, opt.max_files))
232 for n,sublist in enumerate(groups):
234 gprefix = 'Group %d: ' % (n+1)
235 rv = _do_midx(outdir, outfilename, sublist, gprefix)
242 o = options.Options(optspec)
243 (opt, flags, extra) = o.parse(sys.argv[1:])
245 if extra and (opt.auto or opt.force):
246 o.fatal("you can't use -f/-a and also provide filenames")
247 if opt.check and (not extra and not opt.auto):
248 o.fatal("if using --check, you must provide filenames or -a")
250 git.check_repo_or_die()
252 if opt.max_files < 0:
253 opt.max_files = max_files()
254 assert(opt.max_files >= 5)
257 # check existing midx files
262 paths = opt.dir and [opt.dir] or git.all_packdirs()
264 debug1('midx: scanning %s\n' % path)
265 midxes += glob.glob(os.path.join(path, '*.midx'))
269 log('All tests passed.\n')
272 do_midx(git.repo('objects/pack'), opt.output, extra, '')
273 elif opt.auto or opt.force:
274 paths = opt.dir and [opt.dir] or git.all_packdirs()
276 debug1('midx: scanning %s\n' % path)
277 do_midx_dir(path, opt.output)
279 o.fatal("you must use -f or -a or provide input filenames")
282 log('WARNING: %d errors encountered.\n' % len(saved_errors))