3 # https://sourceware.org/bugzilla/show_bug.cgi?id=26034
4 export "BUP_ARGV_0"="$0"
7 export "BUP_ARGV_${arg_i}"="$arg"
11 # Here to end of preamble replaced during install
12 bup_python="$(dirname "$0")/../../../config/bin/python" || exit $?
13 exec "$bup_python" "$0"
17 from __future__ import absolute_import, print_function
19 # Intentionally replace the dirname "$0" that python prepends
21 sys.path[0] = os.path.dirname(os.path.realpath(__file__)) + '/../..'
23 from binascii import hexlify
24 import glob, math, resource, struct, tempfile
26 from bup import compat, options, git, midx, _helpers, xstat
27 from bup.compat import argv_bytes, hexstr, range
28 from bup.helpers import (Sha1, add_error, atomically_replaced_file, debug1, fdatasync,
29 handle_ctrl_c, log, mmap_readwrite, qprogress,
31 from bup.io import byte_stream, path_msg
35 SHA_PER_PAGE=PAGE_SIZE/20.
38 bup midx [options...] <idxnames...>
40 o,output= output midx filename (default: auto-generated)
41 a,auto automatically use all existing .midx/.idx files as input
42 f,force merge produce exactly one .midx containing all objects
43 p,print print names of generated midx files
44 check validate contents of the given midx files (with -a, all midx files)
45 max-files= maximum number of idx files to open at once [-1]
46 d,dir= directory containing idx/midx files
49 merge_into = _helpers.merge_into
53 for i in range(0, len(l), count):
58 mf = min(resource.getrlimit(resource.RLIMIT_NOFILE))
60 mf -= 20 # just a safety margin
62 mf -= 6 # minimum safety margin
67 nicename = git.repo_rel(name)
68 log('Checking %s.\n' % path_msg(nicename))
70 ix = git.open_idx(name)
71 except git.GitError as e:
72 add_error('%s: %s' % (pathmsg(name), e))
74 for count,subname in enumerate(ix.idxnames):
75 sub = git.open_idx(os.path.join(os.path.dirname(name), subname))
76 for ecount,e in enumerate(sub):
77 if not (ecount % 1234):
78 qprogress(' %d/%d: %s %d/%d\r'
79 % (count, len(ix.idxnames),
80 git.shorten_hash(subname).decode('ascii'),
83 add_error("%s: %s: %s missing from idx"
84 % (path_msg(nicename),
85 git.shorten_hash(subname).decode('ascii'),
88 add_error("%s: %s: %s missing from midx"
89 % (path_msg(nicename),
90 git.shorten_hash(subname).decode('ascii'),
93 for ecount,e in enumerate(ix):
94 if not (ecount % 1234):
95 qprogress(' Ordering: %d/%d\r' % (ecount, len(ix)))
96 if e and prev and not e >= prev:
97 add_error('%s: ordering error: %s < %s'
98 % (nicename, hexstr(e), hexstr(prev)))
103 def _do_midx(outdir, outfilename, infilenames, prefixstr):
107 sum = hexlify(Sha1(b'\0'.join(infilenames)).digest())
108 outfilename = b'%s/midx-%s.midx' % (outdir, sum)
115 for name in infilenames:
116 ix = git.open_idx(name)
122 isinstance(ix, midx.PackMidx) and ix.which_ofs or 0,
125 for n in ix.idxnames:
126 allfilenames.append(os.path.basename(n))
128 inp.sort(reverse=True, key=lambda x: x[0][x[2] : x[2] + 20])
130 if not _first: _first = outdir
131 dirprefix = (_first != outdir) and git.repo_rel(outdir) + b': ' or b''
132 debug1('midx: %s%screating from %d files (%d objects).\n'
133 % (dirprefix, prefixstr, len(infilenames), total))
134 if (opt.auto and (total < 1024 and len(infilenames) < 3)) \
135 or ((opt.auto or opt.force) and len(infilenames) < 2) \
136 or (opt.force and not total):
137 debug1('midx: nothing to do.\n')
140 pages = int(total/SHA_PER_PAGE) or 1
141 bits = int(math.ceil(math.log(pages, 2)))
143 debug1('midx: table size: %d (%d bits)\n' % (entries*4, bits))
146 with atomically_replaced_file(outfilename, 'wb') as f:
148 f.write(struct.pack('!II', midx.MIDX_VERSION, bits))
149 assert(f.tell() == 12)
151 f.truncate(12 + 4*entries + 20*total + 4*total)
153 fdatasync(f.fileno())
155 fmap = mmap_readwrite(f, close=False)
156 count = merge_into(fmap, bits, total, inp)
157 del fmap # Assume this calls msync() now.
158 f.seek(0, os.SEEK_END)
159 f.write(b'\0'.join(allfilenames))
162 if isinstance(ix, midx.PackMidx):
168 # This is just for testing (if you enable this, don't clear inp above)
170 p = midx.PackMidx(outfilename)
171 assert(len(p.idxnames) == len(infilenames))
172 log(repr(p.idxnames) + '\n')
173 assert(len(p) == total)
174 for pe, e in p, git.idxmerge(inp, final_progress=False):
179 return total, outfilename
182 def do_midx(outdir, outfilename, infilenames, prefixstr, prout):
183 rv = _do_midx(outdir, outfilename, infilenames, prefixstr)
184 if rv and opt['print']:
185 prout.write(rv[1] + b'\n')
188 def do_midx_dir(path, outfilename, prout):
191 if opt.force and not opt.auto:
192 midxs = [] # don't use existing midx files
194 midxs = glob.glob(b'%s/*.midx' % path)
197 m = git.open_idx(mname)
198 contents[mname] = [(b'%s/%s' % (path,i)) for i in m.idxnames]
199 sizes[mname] = len(m)
201 # sort the biggest+newest midxes first, so that we can eliminate
202 # smaller (or older) redundant ones that come later in the list
203 midxs.sort(key=lambda ix: (-sizes[ix], -xstat.stat(ix).st_mtime))
207 for iname in contents[mname]:
208 if not already.get(iname):
212 debug1('%r is redundant\n' % mname)
216 midxs = [k for k in midxs if not already.get(k)]
217 idxs = [k for k in glob.glob(b'%s/*.idx' % path) if not already.get(k)]
220 i = git.open_idx(iname)
221 sizes[iname] = len(i)
223 all = [(sizes[n],n) for n in (midxs + idxs)]
225 # FIXME: what are the optimal values? Does this make sense?
226 DESIRED_HWM = opt.force and 1 or 5
227 DESIRED_LWM = opt.force and 1 or 2
228 existed = dict((name,1) for sz,name in all)
229 debug1('midx: %d indexes; want no more than %d.\n'
230 % (len(all), DESIRED_HWM))
231 if len(all) <= DESIRED_HWM:
232 debug1('midx: nothing to do.\n')
233 while len(all) > DESIRED_HWM:
235 part1 = [name for sz,name in all[:len(all)-DESIRED_LWM+1]]
236 part2 = all[len(all)-DESIRED_LWM+1:]
237 all = list(do_midx_group(path, outfilename, part1)) + part2
238 if len(all) > DESIRED_HWM:
239 debug1('\nStill too many indexes (%d > %d). Merging again.\n'
240 % (len(all), DESIRED_HWM))
244 if not existed.get(name):
245 prout.write(name + b'\n')
248 def do_midx_group(outdir, outfilename, infiles):
249 groups = list(_group(infiles, opt.max_files))
251 for n,sublist in enumerate(groups):
253 gprefix = 'Group %d: ' % (n+1)
254 rv = _do_midx(outdir, outfilename, sublist, gprefix)
261 o = options.Options(optspec)
262 opt, flags, extra = o.parse(compat.argv[1:])
263 opt.dir = argv_bytes(opt.dir) if opt.dir else None
264 opt.output = argv_bytes(opt.output) if opt.output else None
266 if extra and (opt.auto or opt.force):
267 o.fatal("you can't use -f/-a and also provide filenames")
268 if opt.check and (not extra and not opt.auto):
269 o.fatal("if using --check, you must provide filenames or -a")
271 git.check_repo_or_die()
273 if opt.max_files < 0:
274 opt.max_files = max_files()
275 assert(opt.max_files >= 5)
277 extra = [argv_bytes(x) for x in extra]
280 # check existing midx files
285 paths = opt.dir and [opt.dir] or git.all_packdirs()
287 debug1('midx: scanning %s\n' % path)
288 midxes += glob.glob(os.path.join(path, b'*.midx'))
292 log('All tests passed.\n')
296 do_midx(git.repo(b'objects/pack'), opt.output, extra, b'',
297 byte_stream(sys.stdout))
298 elif opt.auto or opt.force:
300 paths = opt.dir and [opt.dir] or git.all_packdirs()
302 debug1('midx: scanning %s\n' % path_msg(path))
303 do_midx_dir(path, opt.output, byte_stream(sys.stdout))
305 o.fatal("you must use -f or -a or provide input filenames")
308 log('WARNING: %d errors encountered.\n' % len(saved_errors))