3 bup_python="$(dirname "$0")/bup-python" || exit $?
4 exec "$bup_python" "$0" ${1+"$@"}
8 from __future__ import absolute_import
9 import glob, math, os, resource, struct, sys, tempfile
11 from bup import options, git, midx, _helpers, xstat
12 from bup.compat import range
13 from bup.helpers import (Sha1, add_error, atomically_replaced_file, debug1, fdatasync,
14 handle_ctrl_c, log, mmap_readwrite, qprogress,
19 SHA_PER_PAGE=PAGE_SIZE/20.
22 bup midx [options...] <idxnames...>
24 o,output= output midx filename (default: auto-generated)
25 a,auto automatically use all existing .midx/.idx files as input
26 f,force merge produce exactly one .midx containing all objects
27 p,print print names of generated midx files
28 check validate contents of the given midx files (with -a, all midx files)
29 max-files= maximum number of idx files to open at once [-1]
30 d,dir= directory containing idx/midx files
33 merge_into = _helpers.merge_into
37 for i in range(0, len(l), count):
42 mf = min(resource.getrlimit(resource.RLIMIT_NOFILE))
44 mf -= 20 # just a safety margin
46 mf -= 6 # minimum safety margin
51 nicename = git.repo_rel(name)
52 log('Checking %s.\n' % nicename)
54 ix = git.open_idx(name)
55 except git.GitError as e:
56 add_error('%s: %s' % (name, e))
58 for count,subname in enumerate(ix.idxnames):
59 sub = git.open_idx(os.path.join(os.path.dirname(name), subname))
60 for ecount,e in enumerate(sub):
61 if not (ecount % 1234):
62 qprogress(' %d/%d: %s %d/%d\r'
63 % (count, len(ix.idxnames),
64 git.shorten_hash(subname), ecount, len(sub)))
66 add_error("%s: %s: %s missing from idx"
67 % (nicename, git.shorten_hash(subname),
68 str(e).encode('hex')))
70 add_error("%s: %s: %s missing from midx"
71 % (nicename, git.shorten_hash(subname),
72 str(e).encode('hex')))
74 for ecount,e in enumerate(ix):
75 if not (ecount % 1234):
76 qprogress(' Ordering: %d/%d\r' % (ecount, len(ix)))
78 add_error('%s: ordering error: %s < %s'
80 str(e).encode('hex'), str(prev).encode('hex')))
85 def _do_midx(outdir, outfilename, infilenames, prefixstr):
89 sum = Sha1('\0'.join(infilenames)).hexdigest()
90 outfilename = '%s/midx-%s.midx' % (outdir, sum)
97 for name in infilenames:
98 ix = git.open_idx(name)
104 isinstance(ix, midx.PackMidx) and ix.which_ofs or 0,
107 for n in ix.idxnames:
108 allfilenames.append(os.path.basename(n))
110 inp.sort(reverse=True, key=lambda x: str(x[0][x[2]:x[2]+20]))
112 if not _first: _first = outdir
113 dirprefix = (_first != outdir) and git.repo_rel(outdir)+': ' or ''
114 debug1('midx: %s%screating from %d files (%d objects).\n'
115 % (dirprefix, prefixstr, len(infilenames), total))
116 if (opt.auto and (total < 1024 and len(infilenames) < 3)) \
117 or ((opt.auto or opt.force) and len(infilenames) < 2) \
118 or (opt.force and not total):
119 debug1('midx: nothing to do.\n')
122 pages = int(total/SHA_PER_PAGE) or 1
123 bits = int(math.ceil(math.log(pages, 2)))
125 debug1('midx: table size: %d (%d bits)\n' % (entries*4, bits))
128 with atomically_replaced_file(outfilename, 'wb') as f:
130 f.write(struct.pack('!II', midx.MIDX_VERSION, bits))
131 assert(f.tell() == 12)
133 f.truncate(12 + 4*entries + 20*total + 4*total)
135 fdatasync(f.fileno())
137 fmap = mmap_readwrite(f, close=False)
139 count = merge_into(fmap, bits, total, inp)
140 del fmap # Assume this calls msync() now.
141 f.seek(0, os.SEEK_END)
142 f.write('\0'.join(allfilenames))
145 if isinstance(ix, midx.PackMidx):
151 # This is just for testing (if you enable this, don't clear inp above)
153 p = midx.PackMidx(outfilename)
154 assert(len(p.idxnames) == len(infilenames))
156 assert(len(p) == total)
157 for pe, e in p, git.idxmerge(inp, final_progress=False):
162 return total, outfilename
165 def do_midx(outdir, outfilename, infilenames, prefixstr):
166 rv = _do_midx(outdir, outfilename, infilenames, prefixstr)
167 if rv and opt['print']:
171 def do_midx_dir(path, outfilename):
174 if opt.force and not opt.auto:
175 midxs = [] # don't use existing midx files
177 midxs = glob.glob('%s/*.midx' % path)
180 m = git.open_idx(mname)
181 contents[mname] = [('%s/%s' % (path,i)) for i in m.idxnames]
182 sizes[mname] = len(m)
184 # sort the biggest+newest midxes first, so that we can eliminate
185 # smaller (or older) redundant ones that come later in the list
186 midxs.sort(key=lambda ix: (-sizes[ix], -xstat.stat(ix).st_mtime))
190 for iname in contents[mname]:
191 if not already.get(iname):
195 debug1('%r is redundant\n' % mname)
199 midxs = [k for k in midxs if not already.get(k)]
200 idxs = [k for k in glob.glob('%s/*.idx' % path) if not already.get(k)]
203 i = git.open_idx(iname)
204 sizes[iname] = len(i)
206 all = [(sizes[n],n) for n in (midxs + idxs)]
208 # FIXME: what are the optimal values? Does this make sense?
209 DESIRED_HWM = opt.force and 1 or 5
210 DESIRED_LWM = opt.force and 1 or 2
211 existed = dict((name,1) for sz,name in all)
212 debug1('midx: %d indexes; want no more than %d.\n'
213 % (len(all), DESIRED_HWM))
214 if len(all) <= DESIRED_HWM:
215 debug1('midx: nothing to do.\n')
216 while len(all) > DESIRED_HWM:
218 part1 = [name for sz,name in all[:len(all)-DESIRED_LWM+1]]
219 part2 = all[len(all)-DESIRED_LWM+1:]
220 all = list(do_midx_group(path, outfilename, part1)) + part2
221 if len(all) > DESIRED_HWM:
222 debug1('\nStill too many indexes (%d > %d). Merging again.\n'
223 % (len(all), DESIRED_HWM))
227 if not existed.get(name):
231 def do_midx_group(outdir, outfilename, infiles):
232 groups = list(_group(infiles, opt.max_files))
234 for n,sublist in enumerate(groups):
236 gprefix = 'Group %d: ' % (n+1)
237 rv = _do_midx(outdir, outfilename, sublist, gprefix)
244 o = options.Options(optspec)
245 (opt, flags, extra) = o.parse(sys.argv[1:])
247 if extra and (opt.auto or opt.force):
248 o.fatal("you can't use -f/-a and also provide filenames")
249 if opt.check and (not extra and not opt.auto):
250 o.fatal("if using --check, you must provide filenames or -a")
252 git.check_repo_or_die()
254 if opt.max_files < 0:
255 opt.max_files = max_files()
256 assert(opt.max_files >= 5)
259 # check existing midx files
264 paths = opt.dir and [opt.dir] or git.all_packdirs()
266 debug1('midx: scanning %s\n' % path)
267 midxes += glob.glob(os.path.join(path, '*.midx'))
271 log('All tests passed.\n')
274 do_midx(git.repo('objects/pack'), opt.output, extra, '')
275 elif opt.auto or opt.force:
276 paths = opt.dir and [opt.dir] or git.all_packdirs()
278 debug1('midx: scanning %s\n' % path)
279 do_midx_dir(path, opt.output)
281 o.fatal("you must use -f or -a or provide input filenames")
284 log('WARNING: %d errors encountered.\n' % len(saved_errors))