2 import sys, math, struct, glob, resource
4 from bup import options, git, midx, _helpers, xstat
5 from bup.helpers import *
8 SHA_PER_PAGE=PAGE_SIZE/20.
11 bup midx [options...] <idxnames...>
13 o,output= output midx filename (default: auto-generated)
14 a,auto automatically use all existing .midx/.idx files as input
15 f,force merge produce exactly one .midx containing all objects
16 p,print print names of generated midx files
17 check validate contents of the given midx files (with -a, all midx files)
18 max-files= maximum number of idx files to open at once [-1]
19 d,dir= directory containing idx/midx files
22 merge_into = _helpers.merge_into
26 for i in xrange(0, len(l), count):
31 mf = min(resource.getrlimit(resource.RLIMIT_NOFILE))
33 mf -= 20 # just a safety margin
35 mf -= 6 # minimum safety margin
40 nicename = git.repo_rel(name)
41 log('Checking %s.\n' % nicename)
43 ix = git.open_idx(name)
44 except git.GitError, e:
45 add_error('%s: %s' % (name, e))
47 for count,subname in enumerate(ix.idxnames):
48 sub = git.open_idx(os.path.join(os.path.dirname(name), subname))
49 for ecount,e in enumerate(sub):
50 if not (ecount % 1234):
51 qprogress(' %d/%d: %s %d/%d\r'
52 % (count, len(ix.idxnames),
53 git.shorten_hash(subname), ecount, len(sub)))
55 add_error("%s: %s: %s missing from idx"
56 % (nicename, git.shorten_hash(subname),
57 str(e).encode('hex')))
59 add_error("%s: %s: %s missing from midx"
60 % (nicename, git.shorten_hash(subname),
61 str(e).encode('hex')))
63 for ecount,e in enumerate(ix):
64 if not (ecount % 1234):
65 qprogress(' Ordering: %d/%d\r' % (ecount, len(ix)))
67 add_error('%s: ordering error: %s < %s'
69 str(e).encode('hex'), str(prev).encode('hex')))
74 def _do_midx(outdir, outfilename, infilenames, prefixstr):
78 sum = Sha1('\0'.join(infilenames)).hexdigest()
79 outfilename = '%s/midx-%s.midx' % (outdir, sum)
84 for name in infilenames:
85 ix = git.open_idx(name)
90 isinstance(ix, midx.PackMidx) and ix.which_ofs or 0,
94 allfilenames.append(os.path.basename(n))
96 inp.sort(lambda x,y: cmp(str(y[0][y[2]:y[2]+20]),str(x[0][x[2]:x[2]+20])))
98 if not _first: _first = outdir
99 dirprefix = (_first != outdir) and git.repo_rel(outdir)+': ' or ''
100 debug1('midx: %s%screating from %d files (%d objects).\n'
101 % (dirprefix, prefixstr, len(infilenames), total))
102 if (opt.auto and (total < 1024 and len(infilenames) < 3)) \
103 or ((opt.auto or opt.force) and len(infilenames) < 2) \
104 or (opt.force and not total):
105 debug1('midx: nothing to do.\n')
108 pages = int(total/SHA_PER_PAGE) or 1
109 bits = int(math.ceil(math.log(pages, 2)))
111 debug1('midx: table size: %d (%d bits)\n' % (entries*4, bits))
114 f = open(outfilename + '.tmp', 'w+b')
116 f.write(struct.pack('!II', midx.MIDX_VERSION, bits))
117 assert(f.tell() == 12)
119 f.truncate(12 + 4*entries + 20*total + 4*total)
121 fmap = mmap_readwrite(f, close=False)
123 count = merge_into(fmap, bits, total, inp)
126 f.seek(0, os.SEEK_END)
127 f.write('\0'.join(allfilenames))
129 os.rename(outfilename + '.tmp', outfilename)
131 # this is just for testing
133 p = midx.PackMidx(outfilename)
134 assert(len(p.idxnames) == len(infilenames))
136 assert(len(p) == total)
137 for pe, e in p, git.idxmerge(inp, final_progress=False):
142 return total, outfilename
145 def do_midx(outdir, outfilename, infilenames, prefixstr):
146 rv = _do_midx(outdir, outfilename, infilenames, prefixstr)
147 if rv and opt['print']:
151 def do_midx_dir(path):
154 if opt.force and not opt.auto:
155 midxs = [] # don't use existing midx files
157 midxs = glob.glob('%s/*.midx' % path)
160 m = git.open_idx(mname)
161 contents[mname] = [('%s/%s' % (path,i)) for i in m.idxnames]
162 sizes[mname] = len(m)
164 # sort the biggest+newest midxes first, so that we can eliminate
165 # smaller (or older) redundant ones that come later in the list
166 midxs.sort(key=lambda ix: (-sizes[ix], -xstat.stat(ix).st_mtime))
170 for iname in contents[mname]:
171 if not already.get(iname):
175 debug1('%r is redundant\n' % mname)
179 midxs = [k for k in midxs if not already.get(k)]
180 idxs = [k for k in glob.glob('%s/*.idx' % path) if not already.get(k)]
183 i = git.open_idx(iname)
184 sizes[iname] = len(i)
186 all = [(sizes[n],n) for n in (midxs + idxs)]
188 # FIXME: what are the optimal values? Does this make sense?
189 DESIRED_HWM = opt.force and 1 or 5
190 DESIRED_LWM = opt.force and 1 or 2
191 existed = dict((name,1) for sz,name in all)
192 debug1('midx: %d indexes; want no more than %d.\n'
193 % (len(all), DESIRED_HWM))
194 if len(all) <= DESIRED_HWM:
195 debug1('midx: nothing to do.\n')
196 while len(all) > DESIRED_HWM:
198 part1 = [name for sz,name in all[:len(all)-DESIRED_LWM+1]]
199 part2 = all[len(all)-DESIRED_LWM+1:]
200 all = list(do_midx_group(path, part1)) + part2
201 if len(all) > DESIRED_HWM:
202 debug1('\nStill too many indexes (%d > %d). Merging again.\n'
203 % (len(all), DESIRED_HWM))
207 if not existed.get(name):
211 def do_midx_group(outdir, infiles):
212 groups = list(_group(infiles, opt.max_files))
214 for n,sublist in enumerate(groups):
216 gprefix = 'Group %d: ' % (n+1)
217 rv = _do_midx(path, None, sublist, gprefix)
224 o = options.Options(optspec)
225 (opt, flags, extra) = o.parse(sys.argv[1:])
227 if extra and (opt.auto or opt.force):
228 o.fatal("you can't use -f/-a and also provide filenames")
229 if opt.check and (not extra and not opt.auto):
230 o.fatal("if using --check, you must provide filenames or -a")
232 git.check_repo_or_die()
234 if opt.max_files < 0:
235 opt.max_files = max_files()
236 assert(opt.max_files >= 5)
239 # check existing midx files
244 paths = opt.dir and [opt.dir] or git.all_packdirs()
246 debug1('midx: scanning %s\n' % path)
247 midxes += glob.glob(os.path.join(path, '*.midx'))
251 log('All tests passed.\n')
254 do_midx(git.repo('objects/pack'), opt.output, extra, '')
255 elif opt.auto or opt.force:
256 paths = opt.dir and [opt.dir] or git.all_packdirs()
258 debug1('midx: scanning %s\n' % path)
261 o.fatal("you must use -f or -a or provide input filenames")
264 log('WARNING: %d errors encountered.\n' % len(saved_errors))