2 import sys, math, struct, glob, resource
4 from bup import options, git, midx, _helpers, xstat
5 from bup.helpers import *
8 SHA_PER_PAGE=PAGE_SIZE/20.
11 bup midx [options...] <idxnames...>
13 o,output= output midx filename (default: auto-generated)
14 a,auto automatically use all existing .midx/.idx files as input
15 f,force merge produce exactly one .midx containing all objects
16 p,print print names of generated midx files
17 check validate contents of the given midx files (with -a, all midx files)
18 max-files= maximum number of idx files to open at once [-1]
19 d,dir= directory containing idx/midx files
22 merge_into = _helpers.merge_into
26 for i in xrange(0, len(l), count):
31 mf = min(resource.getrlimit(resource.RLIMIT_NOFILE))
33 mf -= 20 # just a safety margin
35 mf -= 6 # minimum safety margin
40 nicename = git.repo_rel(name)
41 log('Checking %s.\n' % nicename)
43 ix = git.open_idx(name)
44 except git.GitError, e:
45 add_error('%s: %s' % (name, e))
47 for count,subname in enumerate(ix.idxnames):
48 sub = git.open_idx(os.path.join(os.path.dirname(name), subname))
49 for ecount,e in enumerate(sub):
50 if not (ecount % 1234):
51 qprogress(' %d/%d: %s %d/%d\r'
52 % (count, len(ix.idxnames),
53 git.shorten_hash(subname), ecount, len(sub)))
55 add_error("%s: %s: %s missing from idx"
56 % (nicename, git.shorten_hash(subname),
57 str(e).encode('hex')))
59 add_error("%s: %s: %s missing from midx"
60 % (nicename, git.shorten_hash(subname),
61 str(e).encode('hex')))
63 for ecount,e in enumerate(ix):
64 if not (ecount % 1234):
65 qprogress(' Ordering: %d/%d\r' % (ecount, len(ix)))
67 add_error('%s: ordering error: %s < %s'
69 str(e).encode('hex'), str(prev).encode('hex')))
74 def _do_midx(outdir, outfilename, infilenames, prefixstr):
78 sum = Sha1('\0'.join(infilenames)).hexdigest()
79 outfilename = '%s/midx-%s.midx' % (outdir, sum)
84 for name in infilenames:
85 ix = git.open_idx(name)
90 isinstance(ix, midx.PackMidx) and ix.which_ofs or 0,
94 allfilenames.append(os.path.basename(n))
96 inp.sort(lambda x,y: cmp(str(y[0][y[2]:y[2]+20]),str(x[0][x[2]:x[2]+20])))
98 if not _first: _first = outdir
99 dirprefix = (_first != outdir) and git.repo_rel(outdir)+': ' or ''
100 debug1('midx: %s%screating from %d files (%d objects).\n'
101 % (dirprefix, prefixstr, len(infilenames), total))
102 if (opt.auto and (total < 1024 and len(infilenames) < 3)) \
103 or ((opt.auto or opt.force) and len(infilenames) < 2) \
104 or (opt.force and not total):
105 debug1('midx: nothing to do.\n')
108 pages = int(total/SHA_PER_PAGE) or 1
109 bits = int(math.ceil(math.log(pages, 2)))
111 debug1('midx: table size: %d (%d bits)\n' % (entries*4, bits))
114 f = open(outfilename + '.tmp', 'w+b')
116 f.write(struct.pack('!II', midx.MIDX_VERSION, bits))
117 assert(f.tell() == 12)
119 f.truncate(12 + 4*entries + 20*total + 4*total)
121 fdatasync(f.fileno())
123 fmap = mmap_readwrite(f, close=False)
125 count = merge_into(fmap, bits, total, inp)
126 del fmap # Assume this calls msync() now.
128 f.seek(0, os.SEEK_END)
129 f.write('\0'.join(allfilenames))
131 os.rename(outfilename + '.tmp', outfilename)
133 # this is just for testing
135 p = midx.PackMidx(outfilename)
136 assert(len(p.idxnames) == len(infilenames))
138 assert(len(p) == total)
139 for pe, e in p, git.idxmerge(inp, final_progress=False):
144 return total, outfilename
147 def do_midx(outdir, outfilename, infilenames, prefixstr):
148 rv = _do_midx(outdir, outfilename, infilenames, prefixstr)
149 if rv and opt['print']:
153 def do_midx_dir(path):
156 if opt.force and not opt.auto:
157 midxs = [] # don't use existing midx files
159 midxs = glob.glob('%s/*.midx' % path)
162 m = git.open_idx(mname)
163 contents[mname] = [('%s/%s' % (path,i)) for i in m.idxnames]
164 sizes[mname] = len(m)
166 # sort the biggest+newest midxes first, so that we can eliminate
167 # smaller (or older) redundant ones that come later in the list
168 midxs.sort(key=lambda ix: (-sizes[ix], -xstat.stat(ix).st_mtime))
172 for iname in contents[mname]:
173 if not already.get(iname):
177 debug1('%r is redundant\n' % mname)
181 midxs = [k for k in midxs if not already.get(k)]
182 idxs = [k for k in glob.glob('%s/*.idx' % path) if not already.get(k)]
185 i = git.open_idx(iname)
186 sizes[iname] = len(i)
188 all = [(sizes[n],n) for n in (midxs + idxs)]
190 # FIXME: what are the optimal values? Does this make sense?
191 DESIRED_HWM = opt.force and 1 or 5
192 DESIRED_LWM = opt.force and 1 or 2
193 existed = dict((name,1) for sz,name in all)
194 debug1('midx: %d indexes; want no more than %d.\n'
195 % (len(all), DESIRED_HWM))
196 if len(all) <= DESIRED_HWM:
197 debug1('midx: nothing to do.\n')
198 while len(all) > DESIRED_HWM:
200 part1 = [name for sz,name in all[:len(all)-DESIRED_LWM+1]]
201 part2 = all[len(all)-DESIRED_LWM+1:]
202 all = list(do_midx_group(path, part1)) + part2
203 if len(all) > DESIRED_HWM:
204 debug1('\nStill too many indexes (%d > %d). Merging again.\n'
205 % (len(all), DESIRED_HWM))
209 if not existed.get(name):
213 def do_midx_group(outdir, infiles):
214 groups = list(_group(infiles, opt.max_files))
216 for n,sublist in enumerate(groups):
218 gprefix = 'Group %d: ' % (n+1)
219 rv = _do_midx(path, None, sublist, gprefix)
226 o = options.Options(optspec)
227 (opt, flags, extra) = o.parse(sys.argv[1:])
229 if extra and (opt.auto or opt.force):
230 o.fatal("you can't use -f/-a and also provide filenames")
231 if opt.check and (not extra and not opt.auto):
232 o.fatal("if using --check, you must provide filenames or -a")
234 git.check_repo_or_die()
236 if opt.max_files < 0:
237 opt.max_files = max_files()
238 assert(opt.max_files >= 5)
241 # check existing midx files
246 paths = opt.dir and [opt.dir] or git.all_packdirs()
248 debug1('midx: scanning %s\n' % path)
249 midxes += glob.glob(os.path.join(path, '*.midx'))
253 log('All tests passed.\n')
256 do_midx(git.repo('objects/pack'), opt.output, extra, '')
257 elif opt.auto or opt.force:
258 paths = opt.dir and [opt.dir] or git.all_packdirs()
260 debug1('midx: scanning %s\n' % path)
263 o.fatal("you must use -f or -a or provide input filenames")
266 log('WARNING: %d errors encountered.\n' % len(saved_errors))