2 import sys, math, struct, glob, resource
4 from bup import options, git, midx, _helpers, xstat
5 from bup.helpers import *
8 SHA_PER_PAGE=PAGE_SIZE/20.
11 bup midx [options...] <idxnames...>
13 o,output= output midx filename (default: auto-generated)
14 a,auto automatically use all existing .midx/.idx files as input
15 f,force merge produce exactly one .midx containing all objects
16 p,print print names of generated midx files
17 check validate contents of the given midx files (with -a, all midx files)
18 max-files= maximum number of idx files to open at once [-1]
19 d,dir= directory containing idx/midx files
22 merge_into = _helpers.merge_into
26 for i in xrange(0, len(l), count):
31 mf = min(resource.getrlimit(resource.RLIMIT_NOFILE))
33 mf -= 20 # just a safety margin
35 mf -= 6 # minimum safety margin
40 nicename = git.repo_rel(name)
41 log('Checking %s.\n' % nicename)
43 ix = git.open_idx(name)
44 except git.GitError, e:
45 add_error('%s: %s' % (name, e))
47 for count,subname in enumerate(ix.idxnames):
48 sub = git.open_idx(os.path.join(os.path.dirname(name), subname))
49 for ecount,e in enumerate(sub):
50 if not (ecount % 1234):
51 qprogress(' %d/%d: %s %d/%d\r'
52 % (count, len(ix.idxnames),
53 git.shorten_hash(subname), ecount, len(sub)))
55 add_error("%s: %s: %s missing from idx"
56 % (nicename, git.shorten_hash(subname),
57 str(e).encode('hex')))
59 add_error("%s: %s: %s missing from midx"
60 % (nicename, git.shorten_hash(subname),
61 str(e).encode('hex')))
63 for ecount,e in enumerate(ix):
64 if not (ecount % 1234):
65 qprogress(' Ordering: %d/%d\r' % (ecount, len(ix)))
67 add_error('%s: ordering error: %s < %s'
69 str(e).encode('hex'), str(prev).encode('hex')))
74 def _do_midx(outdir, outfilename, infilenames, prefixstr):
78 sum = Sha1('\0'.join(infilenames)).hexdigest()
79 outfilename = '%s/midx-%s.midx' % (outdir, sum)
86 for name in infilenames:
87 ix = git.open_idx(name)
93 isinstance(ix, midx.PackMidx) and ix.which_ofs or 0,
97 allfilenames.append(os.path.basename(n))
99 inp.sort(lambda x,y: cmp(str(y[0][y[2]:y[2]+20]),str(x[0][x[2]:x[2]+20])))
101 if not _first: _first = outdir
102 dirprefix = (_first != outdir) and git.repo_rel(outdir)+': ' or ''
103 debug1('midx: %s%screating from %d files (%d objects).\n'
104 % (dirprefix, prefixstr, len(infilenames), total))
105 if (opt.auto and (total < 1024 and len(infilenames) < 3)) \
106 or ((opt.auto or opt.force) and len(infilenames) < 2) \
107 or (opt.force and not total):
108 debug1('midx: nothing to do.\n')
111 pages = int(total/SHA_PER_PAGE) or 1
112 bits = int(math.ceil(math.log(pages, 2)))
114 debug1('midx: table size: %d (%d bits)\n' % (entries*4, bits))
117 f = open(outfilename + '.tmp', 'w+b')
119 f.write(struct.pack('!II', midx.MIDX_VERSION, bits))
120 assert(f.tell() == 12)
122 f.truncate(12 + 4*entries + 20*total + 4*total)
124 fdatasync(f.fileno())
126 fmap = mmap_readwrite(f, close=False)
128 count = merge_into(fmap, bits, total, inp)
129 del fmap # Assume this calls msync() now.
132 if isinstance(ix, midx.PackMidx):
137 f.seek(0, os.SEEK_END)
138 f.write('\0'.join(allfilenames))
140 os.rename(outfilename + '.tmp', outfilename)
142 # This is just for testing (if you enable this, don't clear inp above)
144 p = midx.PackMidx(outfilename)
145 assert(len(p.idxnames) == len(infilenames))
147 assert(len(p) == total)
148 for pe, e in p, git.idxmerge(inp, final_progress=False):
153 return total, outfilename
156 def do_midx(outdir, outfilename, infilenames, prefixstr):
157 rv = _do_midx(outdir, outfilename, infilenames, prefixstr)
158 if rv and opt['print']:
162 def do_midx_dir(path):
165 if opt.force and not opt.auto:
166 midxs = [] # don't use existing midx files
168 midxs = glob.glob('%s/*.midx' % path)
171 m = git.open_idx(mname)
172 contents[mname] = [('%s/%s' % (path,i)) for i in m.idxnames]
173 sizes[mname] = len(m)
175 # sort the biggest+newest midxes first, so that we can eliminate
176 # smaller (or older) redundant ones that come later in the list
177 midxs.sort(key=lambda ix: (-sizes[ix], -xstat.stat(ix).st_mtime))
181 for iname in contents[mname]:
182 if not already.get(iname):
186 debug1('%r is redundant\n' % mname)
190 midxs = [k for k in midxs if not already.get(k)]
191 idxs = [k for k in glob.glob('%s/*.idx' % path) if not already.get(k)]
194 i = git.open_idx(iname)
195 sizes[iname] = len(i)
197 all = [(sizes[n],n) for n in (midxs + idxs)]
199 # FIXME: what are the optimal values? Does this make sense?
200 DESIRED_HWM = opt.force and 1 or 5
201 DESIRED_LWM = opt.force and 1 or 2
202 existed = dict((name,1) for sz,name in all)
203 debug1('midx: %d indexes; want no more than %d.\n'
204 % (len(all), DESIRED_HWM))
205 if len(all) <= DESIRED_HWM:
206 debug1('midx: nothing to do.\n')
207 while len(all) > DESIRED_HWM:
209 part1 = [name for sz,name in all[:len(all)-DESIRED_LWM+1]]
210 part2 = all[len(all)-DESIRED_LWM+1:]
211 all = list(do_midx_group(path, part1)) + part2
212 if len(all) > DESIRED_HWM:
213 debug1('\nStill too many indexes (%d > %d). Merging again.\n'
214 % (len(all), DESIRED_HWM))
218 if not existed.get(name):
222 def do_midx_group(outdir, infiles):
223 groups = list(_group(infiles, opt.max_files))
225 for n,sublist in enumerate(groups):
227 gprefix = 'Group %d: ' % (n+1)
228 rv = _do_midx(path, None, sublist, gprefix)
235 o = options.Options(optspec)
236 (opt, flags, extra) = o.parse(sys.argv[1:])
238 if extra and (opt.auto or opt.force):
239 o.fatal("you can't use -f/-a and also provide filenames")
240 if opt.check and (not extra and not opt.auto):
241 o.fatal("if using --check, you must provide filenames or -a")
243 git.check_repo_or_die()
245 if opt.max_files < 0:
246 opt.max_files = max_files()
247 assert(opt.max_files >= 5)
250 # check existing midx files
255 paths = opt.dir and [opt.dir] or git.all_packdirs()
257 debug1('midx: scanning %s\n' % path)
258 midxes += glob.glob(os.path.join(path, '*.midx'))
262 log('All tests passed.\n')
265 do_midx(git.repo('objects/pack'), opt.output, extra, '')
266 elif opt.auto or opt.force:
267 paths = opt.dir and [opt.dir] or git.all_packdirs()
269 debug1('midx: scanning %s\n' % path)
272 o.fatal("you must use -f or -a or provide input filenames")
275 log('WARNING: %d errors encountered.\n' % len(saved_errors))