3 bup_python="$(dirname "$0")/bup-python" || exit $?
4 exec "$bup_python" "$0" ${1+"$@"}
8 from __future__ import absolute_import, print_function
9 from binascii import hexlify
10 import glob, math, os, resource, struct, sys, tempfile
12 from bup import options, git, midx, _helpers, xstat
13 from bup.compat import argv_bytes, hexstr, range
14 from bup.helpers import (Sha1, add_error, atomically_replaced_file, debug1, fdatasync,
15 handle_ctrl_c, log, mmap_readwrite, qprogress,
17 from bup.io import byte_stream, path_msg
21 SHA_PER_PAGE=PAGE_SIZE/20.
24 bup midx [options...] <idxnames...>
26 o,output= output midx filename (default: auto-generated)
27 a,auto automatically use all existing .midx/.idx files as input
28 f,force merge produce exactly one .midx containing all objects
29 p,print print names of generated midx files
30 check validate contents of the given midx files (with -a, all midx files)
31 max-files= maximum number of idx files to open at once [-1]
32 d,dir= directory containing idx/midx files
35 merge_into = _helpers.merge_into
39 for i in range(0, len(l), count):
44 mf = min(resource.getrlimit(resource.RLIMIT_NOFILE))
46 mf -= 20 # just a safety margin
48 mf -= 6 # minimum safety margin
53 nicename = git.repo_rel(name)
54 log('Checking %s.\n' % path_msg(nicename))
56 ix = git.open_idx(name)
57 except git.GitError as e:
58 add_error('%s: %s' % (pathmsg(name), e))
60 for count,subname in enumerate(ix.idxnames):
61 sub = git.open_idx(os.path.join(os.path.dirname(name), subname))
62 for ecount,e in enumerate(sub):
63 if not (ecount % 1234):
64 qprogress(' %d/%d: %s %d/%d\r'
65 % (count, len(ix.idxnames),
66 git.shorten_hash(subname).decode('ascii'),
69 add_error("%s: %s: %s missing from idx"
70 % (path_msg(nicename),
71 git.shorten_hash(subname).decode('ascii'),
74 add_error("%s: %s: %s missing from midx"
75 % (path_msg(nicename),
76 git.shorten_hash(subname).decode('ascii'),
79 for ecount,e in enumerate(ix):
80 if not (ecount % 1234):
81 qprogress(' Ordering: %d/%d\r' % (ecount, len(ix)))
82 if e and prev and not e >= prev:
83 add_error('%s: ordering error: %s < %s'
84 % (nicename, hexstr(e), hexstr(prev)))
89 def _do_midx(outdir, outfilename, infilenames, prefixstr):
93 sum = hexlify(Sha1(b'\0'.join(infilenames)).digest())
94 outfilename = b'%s/midx-%s.midx' % (outdir, sum)
101 for name in infilenames:
102 ix = git.open_idx(name)
108 isinstance(ix, midx.PackMidx) and ix.which_ofs or 0,
111 for n in ix.idxnames:
112 allfilenames.append(os.path.basename(n))
114 inp.sort(reverse=True, key=lambda x: x[0][x[2] : x[2] + 20])
116 if not _first: _first = outdir
117 dirprefix = (_first != outdir) and git.repo_rel(outdir) + b': ' or b''
118 debug1('midx: %s%screating from %d files (%d objects).\n'
119 % (dirprefix, prefixstr, len(infilenames), total))
120 if (opt.auto and (total < 1024 and len(infilenames) < 3)) \
121 or ((opt.auto or opt.force) and len(infilenames) < 2) \
122 or (opt.force and not total):
123 debug1('midx: nothing to do.\n')
126 pages = int(total/SHA_PER_PAGE) or 1
127 bits = int(math.ceil(math.log(pages, 2)))
129 debug1('midx: table size: %d (%d bits)\n' % (entries*4, bits))
132 with atomically_replaced_file(outfilename, 'wb') as f:
134 f.write(struct.pack('!II', midx.MIDX_VERSION, bits))
135 assert(f.tell() == 12)
137 f.truncate(12 + 4*entries + 20*total + 4*total)
139 fdatasync(f.fileno())
141 fmap = mmap_readwrite(f, close=False)
142 count = merge_into(fmap, bits, total, inp)
143 del fmap # Assume this calls msync() now.
144 f.seek(0, os.SEEK_END)
145 f.write(b'\0'.join(allfilenames))
148 if isinstance(ix, midx.PackMidx):
154 # This is just for testing (if you enable this, don't clear inp above)
156 p = midx.PackMidx(outfilename)
157 assert(len(p.idxnames) == len(infilenames))
158 log(repr(p.idxnames) + '\n')
159 assert(len(p) == total)
160 for pe, e in p, git.idxmerge(inp, final_progress=False):
165 return total, outfilename
168 def do_midx(outdir, outfilename, infilenames, prefixstr, prout):
169 rv = _do_midx(outdir, outfilename, infilenames, prefixstr)
170 if rv and opt['print']:
171 prout.write(rv[1] + b'\n')
174 def do_midx_dir(path, outfilename, prout):
177 if opt.force and not opt.auto:
178 midxs = [] # don't use existing midx files
180 midxs = glob.glob(b'%s/*.midx' % path)
183 m = git.open_idx(mname)
184 contents[mname] = [(b'%s/%s' % (path,i)) for i in m.idxnames]
185 sizes[mname] = len(m)
187 # sort the biggest+newest midxes first, so that we can eliminate
188 # smaller (or older) redundant ones that come later in the list
189 midxs.sort(key=lambda ix: (-sizes[ix], -xstat.stat(ix).st_mtime))
193 for iname in contents[mname]:
194 if not already.get(iname):
198 debug1('%r is redundant\n' % mname)
202 midxs = [k for k in midxs if not already.get(k)]
203 idxs = [k for k in glob.glob(b'%s/*.idx' % path) if not already.get(k)]
206 i = git.open_idx(iname)
207 sizes[iname] = len(i)
209 all = [(sizes[n],n) for n in (midxs + idxs)]
211 # FIXME: what are the optimal values? Does this make sense?
212 DESIRED_HWM = opt.force and 1 or 5
213 DESIRED_LWM = opt.force and 1 or 2
214 existed = dict((name,1) for sz,name in all)
215 debug1('midx: %d indexes; want no more than %d.\n'
216 % (len(all), DESIRED_HWM))
217 if len(all) <= DESIRED_HWM:
218 debug1('midx: nothing to do.\n')
219 while len(all) > DESIRED_HWM:
221 part1 = [name for sz,name in all[:len(all)-DESIRED_LWM+1]]
222 part2 = all[len(all)-DESIRED_LWM+1:]
223 all = list(do_midx_group(path, outfilename, part1)) + part2
224 if len(all) > DESIRED_HWM:
225 debug1('\nStill too many indexes (%d > %d). Merging again.\n'
226 % (len(all), DESIRED_HWM))
230 if not existed.get(name):
231 prout.write(name + b'\n')
234 def do_midx_group(outdir, outfilename, infiles):
235 groups = list(_group(infiles, opt.max_files))
237 for n,sublist in enumerate(groups):
239 gprefix = 'Group %d: ' % (n+1)
240 rv = _do_midx(outdir, outfilename, sublist, gprefix)
247 o = options.Options(optspec)
248 (opt, flags, extra) = o.parse(sys.argv[1:])
249 opt.dir = argv_bytes(opt.dir) if opt.dir else None
250 opt.output = argv_bytes(opt.output) if opt.output else None
252 if extra and (opt.auto or opt.force):
253 o.fatal("you can't use -f/-a and also provide filenames")
254 if opt.check and (not extra and not opt.auto):
255 o.fatal("if using --check, you must provide filenames or -a")
257 git.check_repo_or_die()
259 if opt.max_files < 0:
260 opt.max_files = max_files()
261 assert(opt.max_files >= 5)
263 extra = [argv_bytes(x) for x in extra]
266 # check existing midx files
271 paths = opt.dir and [opt.dir] or git.all_packdirs()
273 debug1('midx: scanning %s\n' % path)
274 midxes += glob.glob(os.path.join(path, b'*.midx'))
278 log('All tests passed.\n')
282 do_midx(git.repo(b'objects/pack'), opt.output, extra, b'',
283 byte_stream(sys.stdout))
284 elif opt.auto or opt.force:
286 paths = opt.dir and [opt.dir] or git.all_packdirs()
288 debug1('midx: scanning %s\n' % path_msg(path))
289 do_midx_dir(path, opt.output, byte_stream(sys.stdout))
291 o.fatal("you must use -f or -a or provide input filenames")
294 log('WARNING: %d errors encountered.\n' % len(saved_errors))