3 # https://sourceware.org/bugzilla/show_bug.cgi?id=26034
4 export "BUP_ARGV_0"="$0"
7 export "BUP_ARGV_${arg_i}"="$arg"
11 # Here to end of preamble replaced during install
12 bup_python="$(dirname "$0")/../../config/bin/python" || exit $?
13 exec "$bup_python" "$0"
17 from __future__ import absolute_import, print_function
18 from binascii import hexlify
19 import glob, math, os, resource, struct, sys, tempfile
21 sys.path[:0] = [os.path.dirname(os.path.realpath(__file__)) + '/..']
23 from bup import compat, options, git, midx, _helpers, xstat
24 from bup.compat import argv_bytes, hexstr, range
25 from bup.helpers import (Sha1, add_error, atomically_replaced_file, debug1, fdatasync,
26 handle_ctrl_c, log, mmap_readwrite, qprogress,
28 from bup.io import byte_stream, path_msg
32 SHA_PER_PAGE=PAGE_SIZE/20.
35 bup midx [options...] <idxnames...>
37 o,output= output midx filename (default: auto-generated)
38 a,auto automatically use all existing .midx/.idx files as input
39 f,force merge produce exactly one .midx containing all objects
40 p,print print names of generated midx files
41 check validate contents of the given midx files (with -a, all midx files)
42 max-files= maximum number of idx files to open at once [-1]
43 d,dir= directory containing idx/midx files
46 merge_into = _helpers.merge_into
50 for i in range(0, len(l), count):
55 mf = min(resource.getrlimit(resource.RLIMIT_NOFILE))
57 mf -= 20 # just a safety margin
59 mf -= 6 # minimum safety margin
64 nicename = git.repo_rel(name)
65 log('Checking %s.\n' % path_msg(nicename))
67 ix = git.open_idx(name)
68 except git.GitError as e:
69 add_error('%s: %s' % (pathmsg(name), e))
71 for count,subname in enumerate(ix.idxnames):
72 sub = git.open_idx(os.path.join(os.path.dirname(name), subname))
73 for ecount,e in enumerate(sub):
74 if not (ecount % 1234):
75 qprogress(' %d/%d: %s %d/%d\r'
76 % (count, len(ix.idxnames),
77 git.shorten_hash(subname).decode('ascii'),
80 add_error("%s: %s: %s missing from idx"
81 % (path_msg(nicename),
82 git.shorten_hash(subname).decode('ascii'),
85 add_error("%s: %s: %s missing from midx"
86 % (path_msg(nicename),
87 git.shorten_hash(subname).decode('ascii'),
90 for ecount,e in enumerate(ix):
91 if not (ecount % 1234):
92 qprogress(' Ordering: %d/%d\r' % (ecount, len(ix)))
93 if e and prev and not e >= prev:
94 add_error('%s: ordering error: %s < %s'
95 % (nicename, hexstr(e), hexstr(prev)))
100 def _do_midx(outdir, outfilename, infilenames, prefixstr):
104 sum = hexlify(Sha1(b'\0'.join(infilenames)).digest())
105 outfilename = b'%s/midx-%s.midx' % (outdir, sum)
112 for name in infilenames:
113 ix = git.open_idx(name)
119 isinstance(ix, midx.PackMidx) and ix.which_ofs or 0,
122 for n in ix.idxnames:
123 allfilenames.append(os.path.basename(n))
125 inp.sort(reverse=True, key=lambda x: x[0][x[2] : x[2] + 20])
127 if not _first: _first = outdir
128 dirprefix = (_first != outdir) and git.repo_rel(outdir) + b': ' or b''
129 debug1('midx: %s%screating from %d files (%d objects).\n'
130 % (dirprefix, prefixstr, len(infilenames), total))
131 if (opt.auto and (total < 1024 and len(infilenames) < 3)) \
132 or ((opt.auto or opt.force) and len(infilenames) < 2) \
133 or (opt.force and not total):
134 debug1('midx: nothing to do.\n')
137 pages = int(total/SHA_PER_PAGE) or 1
138 bits = int(math.ceil(math.log(pages, 2)))
140 debug1('midx: table size: %d (%d bits)\n' % (entries*4, bits))
143 with atomically_replaced_file(outfilename, 'wb') as f:
145 f.write(struct.pack('!II', midx.MIDX_VERSION, bits))
146 assert(f.tell() == 12)
148 f.truncate(12 + 4*entries + 20*total + 4*total)
150 fdatasync(f.fileno())
152 fmap = mmap_readwrite(f, close=False)
153 count = merge_into(fmap, bits, total, inp)
154 del fmap # Assume this calls msync() now.
155 f.seek(0, os.SEEK_END)
156 f.write(b'\0'.join(allfilenames))
159 if isinstance(ix, midx.PackMidx):
165 # This is just for testing (if you enable this, don't clear inp above)
167 p = midx.PackMidx(outfilename)
168 assert(len(p.idxnames) == len(infilenames))
169 log(repr(p.idxnames) + '\n')
170 assert(len(p) == total)
171 for pe, e in p, git.idxmerge(inp, final_progress=False):
176 return total, outfilename
179 def do_midx(outdir, outfilename, infilenames, prefixstr, prout):
180 rv = _do_midx(outdir, outfilename, infilenames, prefixstr)
181 if rv and opt['print']:
182 prout.write(rv[1] + b'\n')
185 def do_midx_dir(path, outfilename, prout):
188 if opt.force and not opt.auto:
189 midxs = [] # don't use existing midx files
191 midxs = glob.glob(b'%s/*.midx' % path)
194 m = git.open_idx(mname)
195 contents[mname] = [(b'%s/%s' % (path,i)) for i in m.idxnames]
196 sizes[mname] = len(m)
198 # sort the biggest+newest midxes first, so that we can eliminate
199 # smaller (or older) redundant ones that come later in the list
200 midxs.sort(key=lambda ix: (-sizes[ix], -xstat.stat(ix).st_mtime))
204 for iname in contents[mname]:
205 if not already.get(iname):
209 debug1('%r is redundant\n' % mname)
213 midxs = [k for k in midxs if not already.get(k)]
214 idxs = [k for k in glob.glob(b'%s/*.idx' % path) if not already.get(k)]
217 i = git.open_idx(iname)
218 sizes[iname] = len(i)
220 all = [(sizes[n],n) for n in (midxs + idxs)]
222 # FIXME: what are the optimal values? Does this make sense?
223 DESIRED_HWM = opt.force and 1 or 5
224 DESIRED_LWM = opt.force and 1 or 2
225 existed = dict((name,1) for sz,name in all)
226 debug1('midx: %d indexes; want no more than %d.\n'
227 % (len(all), DESIRED_HWM))
228 if len(all) <= DESIRED_HWM:
229 debug1('midx: nothing to do.\n')
230 while len(all) > DESIRED_HWM:
232 part1 = [name for sz,name in all[:len(all)-DESIRED_LWM+1]]
233 part2 = all[len(all)-DESIRED_LWM+1:]
234 all = list(do_midx_group(path, outfilename, part1)) + part2
235 if len(all) > DESIRED_HWM:
236 debug1('\nStill too many indexes (%d > %d). Merging again.\n'
237 % (len(all), DESIRED_HWM))
241 if not existed.get(name):
242 prout.write(name + b'\n')
245 def do_midx_group(outdir, outfilename, infiles):
246 groups = list(_group(infiles, opt.max_files))
248 for n,sublist in enumerate(groups):
250 gprefix = 'Group %d: ' % (n+1)
251 rv = _do_midx(outdir, outfilename, sublist, gprefix)
258 o = options.Options(optspec)
259 opt, flags, extra = o.parse(compat.argv[1:])
260 opt.dir = argv_bytes(opt.dir) if opt.dir else None
261 opt.output = argv_bytes(opt.output) if opt.output else None
263 if extra and (opt.auto or opt.force):
264 o.fatal("you can't use -f/-a and also provide filenames")
265 if opt.check and (not extra and not opt.auto):
266 o.fatal("if using --check, you must provide filenames or -a")
268 git.check_repo_or_die()
270 if opt.max_files < 0:
271 opt.max_files = max_files()
272 assert(opt.max_files >= 5)
274 extra = [argv_bytes(x) for x in extra]
277 # check existing midx files
282 paths = opt.dir and [opt.dir] or git.all_packdirs()
284 debug1('midx: scanning %s\n' % path)
285 midxes += glob.glob(os.path.join(path, b'*.midx'))
289 log('All tests passed.\n')
293 do_midx(git.repo(b'objects/pack'), opt.output, extra, b'',
294 byte_stream(sys.stdout))
295 elif opt.auto or opt.force:
297 paths = opt.dir and [opt.dir] or git.all_packdirs()
299 debug1('midx: scanning %s\n' % path_msg(path))
300 do_midx_dir(path, opt.output, byte_stream(sys.stdout))
302 o.fatal("you must use -f or -a or provide input filenames")
305 log('WARNING: %d errors encountered.\n' % len(saved_errors))