2 import sys, math, struct, glob, resource
4 from bup import options, git, midx, _helpers, xstat
5 from bup.helpers import *
8 SHA_PER_PAGE=PAGE_SIZE/20.
11 bup midx [options...] <idxnames...>
13 o,output= output midx filename (default: auto-generated)
14 a,auto automatically use all existing .midx/.idx files as input
15 f,force merge produce exactly one .midx containing all objects
16 p,print print names of generated midx files
17 check validate contents of the given midx files (with -a, all midx files)
18 max-files= maximum number of idx files to open at once [-1]
19 d,dir= directory containing idx/midx files
22 merge_into = _helpers.merge_into
26 for i in xrange(0, len(l), count):
31 mf = min(resource.getrlimit(resource.RLIMIT_NOFILE))
33 mf -= 20 # just a safety margin
35 mf -= 6 # minimum safety margin
40 nicename = git.repo_rel(name)
41 log('Checking %s.\n' % nicename)
43 ix = git.open_idx(name)
44 except git.GitError, e:
45 add_error('%s: %s' % (name, e))
47 for count,subname in enumerate(ix.idxnames):
48 sub = git.open_idx(os.path.join(os.path.dirname(name), subname))
49 for ecount,e in enumerate(sub):
50 if not (ecount % 1234):
51 qprogress(' %d/%d: %s %d/%d\r'
52 % (count, len(ix.idxnames),
53 git.shorten_hash(subname), ecount, len(sub)))
55 add_error("%s: %s: %s missing from idx"
56 % (nicename, git.shorten_hash(subname),
57 str(e).encode('hex')))
59 add_error("%s: %s: %s missing from midx"
60 % (nicename, git.shorten_hash(subname),
61 str(e).encode('hex')))
63 for ecount,e in enumerate(ix):
64 if not (ecount % 1234):
65 qprogress(' Ordering: %d/%d\r' % (ecount, len(ix)))
67 add_error('%s: ordering error: %s < %s'
69 str(e).encode('hex'), str(prev).encode('hex')))
74 def _do_midx(outdir, outfilename, infilenames, prefixstr):
78 sum = Sha1('\0'.join(infilenames)).hexdigest()
79 outfilename = '%s/midx-%s.midx' % (outdir, sum)
86 for name in infilenames:
87 ix = git.open_idx(name)
93 isinstance(ix, midx.PackMidx) and ix.which_ofs or 0,
97 allfilenames.append(os.path.basename(n))
99 inp.sort(lambda x,y: cmp(str(y[0][y[2]:y[2]+20]),str(x[0][x[2]:x[2]+20])))
101 if not _first: _first = outdir
102 dirprefix = (_first != outdir) and git.repo_rel(outdir)+': ' or ''
103 debug1('midx: %s%screating from %d files (%d objects).\n'
104 % (dirprefix, prefixstr, len(infilenames), total))
105 if (opt.auto and (total < 1024 and len(infilenames) < 3)) \
106 or ((opt.auto or opt.force) and len(infilenames) < 2) \
107 or (opt.force and not total):
108 debug1('midx: nothing to do.\n')
111 pages = int(total/SHA_PER_PAGE) or 1
112 bits = int(math.ceil(math.log(pages, 2)))
114 debug1('midx: table size: %d (%d bits)\n' % (entries*4, bits))
117 with atomically_replaced_file(outfilename, 'wb') as f:
119 f.write(struct.pack('!II', midx.MIDX_VERSION, bits))
120 assert(f.tell() == 12)
122 f.truncate(12 + 4*entries + 20*total + 4*total)
124 fdatasync(f.fileno())
126 fmap = mmap_readwrite(f, close=False)
128 count = merge_into(fmap, bits, total, inp)
129 del fmap # Assume this calls msync() now.
130 f.seek(0, os.SEEK_END)
131 f.write('\0'.join(allfilenames))
134 if isinstance(ix, midx.PackMidx):
140 # This is just for testing (if you enable this, don't clear inp above)
142 p = midx.PackMidx(outfilename)
143 assert(len(p.idxnames) == len(infilenames))
145 assert(len(p) == total)
146 for pe, e in p, git.idxmerge(inp, final_progress=False):
151 return total, outfilename
154 def do_midx(outdir, outfilename, infilenames, prefixstr):
155 rv = _do_midx(outdir, outfilename, infilenames, prefixstr)
156 if rv and opt['print']:
160 def do_midx_dir(path):
163 if opt.force and not opt.auto:
164 midxs = [] # don't use existing midx files
166 midxs = glob.glob('%s/*.midx' % path)
169 m = git.open_idx(mname)
170 contents[mname] = [('%s/%s' % (path,i)) for i in m.idxnames]
171 sizes[mname] = len(m)
173 # sort the biggest+newest midxes first, so that we can eliminate
174 # smaller (or older) redundant ones that come later in the list
175 midxs.sort(key=lambda ix: (-sizes[ix], -xstat.stat(ix).st_mtime))
179 for iname in contents[mname]:
180 if not already.get(iname):
184 debug1('%r is redundant\n' % mname)
188 midxs = [k for k in midxs if not already.get(k)]
189 idxs = [k for k in glob.glob('%s/*.idx' % path) if not already.get(k)]
192 i = git.open_idx(iname)
193 sizes[iname] = len(i)
195 all = [(sizes[n],n) for n in (midxs + idxs)]
197 # FIXME: what are the optimal values? Does this make sense?
198 DESIRED_HWM = opt.force and 1 or 5
199 DESIRED_LWM = opt.force and 1 or 2
200 existed = dict((name,1) for sz,name in all)
201 debug1('midx: %d indexes; want no more than %d.\n'
202 % (len(all), DESIRED_HWM))
203 if len(all) <= DESIRED_HWM:
204 debug1('midx: nothing to do.\n')
205 while len(all) > DESIRED_HWM:
207 part1 = [name for sz,name in all[:len(all)-DESIRED_LWM+1]]
208 part2 = all[len(all)-DESIRED_LWM+1:]
209 all = list(do_midx_group(path, part1)) + part2
210 if len(all) > DESIRED_HWM:
211 debug1('\nStill too many indexes (%d > %d). Merging again.\n'
212 % (len(all), DESIRED_HWM))
216 if not existed.get(name):
220 def do_midx_group(outdir, infiles):
221 groups = list(_group(infiles, opt.max_files))
223 for n,sublist in enumerate(groups):
225 gprefix = 'Group %d: ' % (n+1)
226 rv = _do_midx(path, None, sublist, gprefix)
233 o = options.Options(optspec)
234 (opt, flags, extra) = o.parse(sys.argv[1:])
236 if extra and (opt.auto or opt.force):
237 o.fatal("you can't use -f/-a and also provide filenames")
238 if opt.check and (not extra and not opt.auto):
239 o.fatal("if using --check, you must provide filenames or -a")
241 git.check_repo_or_die()
243 if opt.max_files < 0:
244 opt.max_files = max_files()
245 assert(opt.max_files >= 5)
248 # check existing midx files
253 paths = opt.dir and [opt.dir] or git.all_packdirs()
255 debug1('midx: scanning %s\n' % path)
256 midxes += glob.glob(os.path.join(path, '*.midx'))
260 log('All tests passed.\n')
263 do_midx(git.repo('objects/pack'), opt.output, extra, '')
264 elif opt.auto or opt.force:
265 paths = opt.dir and [opt.dir] or git.all_packdirs()
267 debug1('midx: scanning %s\n' % path)
270 o.fatal("you must use -f or -a or provide input filenames")
273 log('WARNING: %d errors encountered.\n' % len(saved_errors))