3 # https://sourceware.org/bugzilla/show_bug.cgi?id=26034
4 export "BUP_ARGV_0"="$0"
7 export "BUP_ARGV_${arg_i}"="$arg"
11 # Here to end of preamble replaced during install
12 bup_python="$(dirname "$0")/bup-python" || exit $?
13 exec "$bup_python" "$0"
17 from __future__ import absolute_import, print_function
18 from binascii import hexlify
19 import glob, math, os, resource, struct, sys, tempfile
21 from bup import compat, options, git, midx, _helpers, xstat
22 from bup.compat import argv_bytes, hexstr, range
23 from bup.helpers import (Sha1, add_error, atomically_replaced_file, debug1, fdatasync,
24 handle_ctrl_c, log, mmap_readwrite, qprogress,
26 from bup.io import byte_stream, path_msg
30 SHA_PER_PAGE=PAGE_SIZE/20.
33 bup midx [options...] <idxnames...>
35 o,output= output midx filename (default: auto-generated)
36 a,auto automatically use all existing .midx/.idx files as input
37 f,force merge produce exactly one .midx containing all objects
38 p,print print names of generated midx files
39 check validate contents of the given midx files (with -a, all midx files)
40 max-files= maximum number of idx files to open at once [-1]
41 d,dir= directory containing idx/midx files
44 merge_into = _helpers.merge_into
48 for i in range(0, len(l), count):
53 mf = min(resource.getrlimit(resource.RLIMIT_NOFILE))
55 mf -= 20 # just a safety margin
57 mf -= 6 # minimum safety margin
62 nicename = git.repo_rel(name)
63 log('Checking %s.\n' % path_msg(nicename))
65 ix = git.open_idx(name)
66 except git.GitError as e:
67 add_error('%s: %s' % (pathmsg(name), e))
69 for count,subname in enumerate(ix.idxnames):
70 sub = git.open_idx(os.path.join(os.path.dirname(name), subname))
71 for ecount,e in enumerate(sub):
72 if not (ecount % 1234):
73 qprogress(' %d/%d: %s %d/%d\r'
74 % (count, len(ix.idxnames),
75 git.shorten_hash(subname).decode('ascii'),
78 add_error("%s: %s: %s missing from idx"
79 % (path_msg(nicename),
80 git.shorten_hash(subname).decode('ascii'),
83 add_error("%s: %s: %s missing from midx"
84 % (path_msg(nicename),
85 git.shorten_hash(subname).decode('ascii'),
88 for ecount,e in enumerate(ix):
89 if not (ecount % 1234):
90 qprogress(' Ordering: %d/%d\r' % (ecount, len(ix)))
91 if e and prev and not e >= prev:
92 add_error('%s: ordering error: %s < %s'
93 % (nicename, hexstr(e), hexstr(prev)))
98 def _do_midx(outdir, outfilename, infilenames, prefixstr):
102 sum = hexlify(Sha1(b'\0'.join(infilenames)).digest())
103 outfilename = b'%s/midx-%s.midx' % (outdir, sum)
110 for name in infilenames:
111 ix = git.open_idx(name)
117 isinstance(ix, midx.PackMidx) and ix.which_ofs or 0,
120 for n in ix.idxnames:
121 allfilenames.append(os.path.basename(n))
123 inp.sort(reverse=True, key=lambda x: x[0][x[2] : x[2] + 20])
125 if not _first: _first = outdir
126 dirprefix = (_first != outdir) and git.repo_rel(outdir) + b': ' or b''
127 debug1('midx: %s%screating from %d files (%d objects).\n'
128 % (dirprefix, prefixstr, len(infilenames), total))
129 if (opt.auto and (total < 1024 and len(infilenames) < 3)) \
130 or ((opt.auto or opt.force) and len(infilenames) < 2) \
131 or (opt.force and not total):
132 debug1('midx: nothing to do.\n')
135 pages = int(total/SHA_PER_PAGE) or 1
136 bits = int(math.ceil(math.log(pages, 2)))
138 debug1('midx: table size: %d (%d bits)\n' % (entries*4, bits))
141 with atomically_replaced_file(outfilename, 'wb') as f:
143 f.write(struct.pack('!II', midx.MIDX_VERSION, bits))
144 assert(f.tell() == 12)
146 f.truncate(12 + 4*entries + 20*total + 4*total)
148 fdatasync(f.fileno())
150 fmap = mmap_readwrite(f, close=False)
151 count = merge_into(fmap, bits, total, inp)
152 del fmap # Assume this calls msync() now.
153 f.seek(0, os.SEEK_END)
154 f.write(b'\0'.join(allfilenames))
157 if isinstance(ix, midx.PackMidx):
163 # This is just for testing (if you enable this, don't clear inp above)
165 p = midx.PackMidx(outfilename)
166 assert(len(p.idxnames) == len(infilenames))
167 log(repr(p.idxnames) + '\n')
168 assert(len(p) == total)
169 for pe, e in p, git.idxmerge(inp, final_progress=False):
174 return total, outfilename
177 def do_midx(outdir, outfilename, infilenames, prefixstr, prout):
178 rv = _do_midx(outdir, outfilename, infilenames, prefixstr)
179 if rv and opt['print']:
180 prout.write(rv[1] + b'\n')
183 def do_midx_dir(path, outfilename, prout):
186 if opt.force and not opt.auto:
187 midxs = [] # don't use existing midx files
189 midxs = glob.glob(b'%s/*.midx' % path)
192 m = git.open_idx(mname)
193 contents[mname] = [(b'%s/%s' % (path,i)) for i in m.idxnames]
194 sizes[mname] = len(m)
196 # sort the biggest+newest midxes first, so that we can eliminate
197 # smaller (or older) redundant ones that come later in the list
198 midxs.sort(key=lambda ix: (-sizes[ix], -xstat.stat(ix).st_mtime))
202 for iname in contents[mname]:
203 if not already.get(iname):
207 debug1('%r is redundant\n' % mname)
211 midxs = [k for k in midxs if not already.get(k)]
212 idxs = [k for k in glob.glob(b'%s/*.idx' % path) if not already.get(k)]
215 i = git.open_idx(iname)
216 sizes[iname] = len(i)
218 all = [(sizes[n],n) for n in (midxs + idxs)]
220 # FIXME: what are the optimal values? Does this make sense?
221 DESIRED_HWM = opt.force and 1 or 5
222 DESIRED_LWM = opt.force and 1 or 2
223 existed = dict((name,1) for sz,name in all)
224 debug1('midx: %d indexes; want no more than %d.\n'
225 % (len(all), DESIRED_HWM))
226 if len(all) <= DESIRED_HWM:
227 debug1('midx: nothing to do.\n')
228 while len(all) > DESIRED_HWM:
230 part1 = [name for sz,name in all[:len(all)-DESIRED_LWM+1]]
231 part2 = all[len(all)-DESIRED_LWM+1:]
232 all = list(do_midx_group(path, outfilename, part1)) + part2
233 if len(all) > DESIRED_HWM:
234 debug1('\nStill too many indexes (%d > %d). Merging again.\n'
235 % (len(all), DESIRED_HWM))
239 if not existed.get(name):
240 prout.write(name + b'\n')
243 def do_midx_group(outdir, outfilename, infiles):
244 groups = list(_group(infiles, opt.max_files))
246 for n,sublist in enumerate(groups):
248 gprefix = 'Group %d: ' % (n+1)
249 rv = _do_midx(outdir, outfilename, sublist, gprefix)
256 o = options.Options(optspec)
257 opt, flags, extra = o.parse(compat.argv[1:])
258 opt.dir = argv_bytes(opt.dir) if opt.dir else None
259 opt.output = argv_bytes(opt.output) if opt.output else None
261 if extra and (opt.auto or opt.force):
262 o.fatal("you can't use -f/-a and also provide filenames")
263 if opt.check and (not extra and not opt.auto):
264 o.fatal("if using --check, you must provide filenames or -a")
266 git.check_repo_or_die()
268 if opt.max_files < 0:
269 opt.max_files = max_files()
270 assert(opt.max_files >= 5)
272 extra = [argv_bytes(x) for x in extra]
275 # check existing midx files
280 paths = opt.dir and [opt.dir] or git.all_packdirs()
282 debug1('midx: scanning %s\n' % path)
283 midxes += glob.glob(os.path.join(path, b'*.midx'))
287 log('All tests passed.\n')
291 do_midx(git.repo(b'objects/pack'), opt.output, extra, b'',
292 byte_stream(sys.stdout))
293 elif opt.auto or opt.force:
295 paths = opt.dir and [opt.dir] or git.all_packdirs()
297 debug1('midx: scanning %s\n' % path_msg(path))
298 do_midx_dir(path, opt.output, byte_stream(sys.stdout))
300 o.fatal("you must use -f or -a or provide input filenames")
303 log('WARNING: %d errors encountered.\n' % len(saved_errors))