3 bup_python="$(dirname "$0")/bup-python" || exit $?
4 exec "$bup_python" "$0" ${1+"$@"}
8 from __future__ import absolute_import
9 import glob, math, os, resource, struct, sys, tempfile
11 from bup import options, git, midx, _helpers, xstat
12 from bup.compat import hexstr, range
13 from bup.helpers import (Sha1, add_error, atomically_replaced_file, debug1, fdatasync,
14 handle_ctrl_c, log, mmap_readwrite, qprogress,
19 SHA_PER_PAGE=PAGE_SIZE/20.
22 bup midx [options...] <idxnames...>
24 o,output= output midx filename (default: auto-generated)
25 a,auto automatically use all existing .midx/.idx files as input
26 f,force merge produce exactly one .midx containing all objects
27 p,print print names of generated midx files
28 check validate contents of the given midx files (with -a, all midx files)
29 max-files= maximum number of idx files to open at once [-1]
30 d,dir= directory containing idx/midx files
33 merge_into = _helpers.merge_into
37 for i in range(0, len(l), count):
42 mf = min(resource.getrlimit(resource.RLIMIT_NOFILE))
44 mf -= 20 # just a safety margin
46 mf -= 6 # minimum safety margin
51 nicename = git.repo_rel(name)
52 log('Checking %s.\n' % nicename)
54 ix = git.open_idx(name)
55 except git.GitError as e:
56 add_error('%s: %s' % (name, e))
58 for count,subname in enumerate(ix.idxnames):
59 sub = git.open_idx(os.path.join(os.path.dirname(name), subname))
60 for ecount,e in enumerate(sub):
61 if not (ecount % 1234):
62 qprogress(' %d/%d: %s %d/%d\r'
63 % (count, len(ix.idxnames),
64 git.shorten_hash(subname), ecount, len(sub)))
66 add_error("%s: %s: %s missing from idx"
67 % (nicename, git.shorten_hash(subname), hexstr(e)))
69 add_error("%s: %s: %s missing from midx"
70 % (nicename, git.shorten_hash(subname), hexstr(e)))
72 for ecount,e in enumerate(ix):
73 if not (ecount % 1234):
74 qprogress(' Ordering: %d/%d\r' % (ecount, len(ix)))
76 add_error('%s: ordering error: %s < %s'
77 % (nicename, hexstr(e), hexstr(prev)))
82 def _do_midx(outdir, outfilename, infilenames, prefixstr):
86 sum = Sha1('\0'.join(infilenames)).hexdigest()
87 outfilename = '%s/midx-%s.midx' % (outdir, sum)
94 for name in infilenames:
95 ix = git.open_idx(name)
101 isinstance(ix, midx.PackMidx) and ix.which_ofs or 0,
104 for n in ix.idxnames:
105 allfilenames.append(os.path.basename(n))
107 inp.sort(reverse=True, key=lambda x: str(x[0][x[2]:x[2]+20]))
109 if not _first: _first = outdir
110 dirprefix = (_first != outdir) and git.repo_rel(outdir)+': ' or ''
111 debug1('midx: %s%screating from %d files (%d objects).\n'
112 % (dirprefix, prefixstr, len(infilenames), total))
113 if (opt.auto and (total < 1024 and len(infilenames) < 3)) \
114 or ((opt.auto or opt.force) and len(infilenames) < 2) \
115 or (opt.force and not total):
116 debug1('midx: nothing to do.\n')
119 pages = int(total/SHA_PER_PAGE) or 1
120 bits = int(math.ceil(math.log(pages, 2)))
122 debug1('midx: table size: %d (%d bits)\n' % (entries*4, bits))
125 with atomically_replaced_file(outfilename, 'wb') as f:
127 f.write(struct.pack('!II', midx.MIDX_VERSION, bits))
128 assert(f.tell() == 12)
130 f.truncate(12 + 4*entries + 20*total + 4*total)
132 fdatasync(f.fileno())
134 fmap = mmap_readwrite(f, close=False)
136 count = merge_into(fmap, bits, total, inp)
137 del fmap # Assume this calls msync() now.
138 f.seek(0, os.SEEK_END)
139 f.write('\0'.join(allfilenames))
142 if isinstance(ix, midx.PackMidx):
148 # This is just for testing (if you enable this, don't clear inp above)
150 p = midx.PackMidx(outfilename)
151 assert(len(p.idxnames) == len(infilenames))
153 assert(len(p) == total)
154 for pe, e in p, git.idxmerge(inp, final_progress=False):
159 return total, outfilename
162 def do_midx(outdir, outfilename, infilenames, prefixstr):
163 rv = _do_midx(outdir, outfilename, infilenames, prefixstr)
164 if rv and opt['print']:
168 def do_midx_dir(path, outfilename):
171 if opt.force and not opt.auto:
172 midxs = [] # don't use existing midx files
174 midxs = glob.glob('%s/*.midx' % path)
177 m = git.open_idx(mname)
178 contents[mname] = [('%s/%s' % (path,i)) for i in m.idxnames]
179 sizes[mname] = len(m)
181 # sort the biggest+newest midxes first, so that we can eliminate
182 # smaller (or older) redundant ones that come later in the list
183 midxs.sort(key=lambda ix: (-sizes[ix], -xstat.stat(ix).st_mtime))
187 for iname in contents[mname]:
188 if not already.get(iname):
192 debug1('%r is redundant\n' % mname)
196 midxs = [k for k in midxs if not already.get(k)]
197 idxs = [k for k in glob.glob('%s/*.idx' % path) if not already.get(k)]
200 i = git.open_idx(iname)
201 sizes[iname] = len(i)
203 all = [(sizes[n],n) for n in (midxs + idxs)]
205 # FIXME: what are the optimal values? Does this make sense?
206 DESIRED_HWM = opt.force and 1 or 5
207 DESIRED_LWM = opt.force and 1 or 2
208 existed = dict((name,1) for sz,name in all)
209 debug1('midx: %d indexes; want no more than %d.\n'
210 % (len(all), DESIRED_HWM))
211 if len(all) <= DESIRED_HWM:
212 debug1('midx: nothing to do.\n')
213 while len(all) > DESIRED_HWM:
215 part1 = [name for sz,name in all[:len(all)-DESIRED_LWM+1]]
216 part2 = all[len(all)-DESIRED_LWM+1:]
217 all = list(do_midx_group(path, outfilename, part1)) + part2
218 if len(all) > DESIRED_HWM:
219 debug1('\nStill too many indexes (%d > %d). Merging again.\n'
220 % (len(all), DESIRED_HWM))
224 if not existed.get(name):
228 def do_midx_group(outdir, outfilename, infiles):
229 groups = list(_group(infiles, opt.max_files))
231 for n,sublist in enumerate(groups):
233 gprefix = 'Group %d: ' % (n+1)
234 rv = _do_midx(outdir, outfilename, sublist, gprefix)
241 o = options.Options(optspec)
242 (opt, flags, extra) = o.parse(sys.argv[1:])
244 if extra and (opt.auto or opt.force):
245 o.fatal("you can't use -f/-a and also provide filenames")
246 if opt.check and (not extra and not opt.auto):
247 o.fatal("if using --check, you must provide filenames or -a")
249 git.check_repo_or_die()
251 if opt.max_files < 0:
252 opt.max_files = max_files()
253 assert(opt.max_files >= 5)
256 # check existing midx files
261 paths = opt.dir and [opt.dir] or git.all_packdirs()
263 debug1('midx: scanning %s\n' % path)
264 midxes += glob.glob(os.path.join(path, '*.midx'))
268 log('All tests passed.\n')
271 do_midx(git.repo('objects/pack'), opt.output, extra, '')
272 elif opt.auto or opt.force:
273 paths = opt.dir and [opt.dir] or git.all_packdirs()
275 debug1('midx: scanning %s\n' % path)
276 do_midx_dir(path, opt.output)
278 o.fatal("you must use -f or -a or provide input filenames")
281 log('WARNING: %d errors encountered.\n' % len(saved_errors))