2 import sys, math, struct, glob, resource
4 from bup import options, git, midx, _helpers, xstat
5 from bup.helpers import *
8 SHA_PER_PAGE=PAGE_SIZE/20.
11 bup midx [options...] <idxnames...>
13 o,output= output midx filename (default: auto-generated)
14 a,auto automatically use all existing .midx/.idx files as input
15 f,force merge produce exactly one .midx containing all objects
16 p,print print names of generated midx files
17 check validate contents of the given midx files (with -a, all midx files)
18 max-files= maximum number of idx files to open at once [-1]
19 d,dir= directory containing idx/midx files
22 merge_into = _helpers.merge_into
26 for i in xrange(0, len(l), count):
31 mf = min(resource.getrlimit(resource.RLIMIT_NOFILE))
33 mf -= 20 # just a safety margin
35 mf -= 6 # minimum safety margin
40 nicename = git.repo_rel(name)
41 log('Checking %s.\n' % nicename)
43 ix = git.open_idx(name)
44 except git.GitError, e:
45 add_error('%s: %s' % (name, e))
47 for count,subname in enumerate(ix.idxnames):
48 sub = git.open_idx(os.path.join(os.path.dirname(name), subname))
49 for ecount,e in enumerate(sub):
50 if not (ecount % 1234):
51 qprogress(' %d/%d: %s %d/%d\r'
52 % (count, len(ix.idxnames),
53 git.shorten_hash(subname), ecount, len(sub)))
55 add_error("%s: %s: %s missing from idx"
56 % (nicename, git.shorten_hash(subname),
57 str(e).encode('hex')))
59 add_error("%s: %s: %s missing from midx"
60 % (nicename, git.shorten_hash(subname),
61 str(e).encode('hex')))
63 for ecount,e in enumerate(ix):
64 if not (ecount % 1234):
65 qprogress(' Ordering: %d/%d\r' % (ecount, len(ix)))
67 add_error('%s: ordering error: %s < %s'
69 str(e).encode('hex'), str(prev).encode('hex')))
74 def _do_midx(outdir, outfilename, infilenames, prefixstr):
78 sum = Sha1('\0'.join(infilenames)).hexdigest()
79 outfilename = '%s/midx-%s.midx' % (outdir, sum)
84 for name in infilenames:
85 ix = git.open_idx(name)
90 isinstance(ix, midx.PackMidx) and ix.which_ofs or 0,
94 allfilenames.append(os.path.basename(n))
96 inp.sort(lambda x,y: cmp(str(y[0][y[2]:y[2]+20]),str(x[0][x[2]:x[2]+20])))
98 if not _first: _first = outdir
99 dirprefix = (_first != outdir) and git.repo_rel(outdir)+': ' or ''
100 debug1('midx: %s%screating from %d files (%d objects).\n'
101 % (dirprefix, prefixstr, len(infilenames), total))
102 if (opt.auto and (total < 1024 and len(infilenames) < 3)) \
103 or ((opt.auto or opt.force) and len(infilenames) < 2) \
104 or (opt.force and not total):
105 debug1('midx: nothing to do.\n')
108 pages = int(total/SHA_PER_PAGE) or 1
109 bits = int(math.ceil(math.log(pages, 2)))
111 debug1('midx: table size: %d (%d bits)\n' % (entries*4, bits))
114 f = open(outfilename + '.tmp', 'w+b')
116 f.write(struct.pack('!II', midx.MIDX_VERSION, bits))
117 assert(f.tell() == 12)
119 f.truncate(12 + 4*entries + 20*total + 4*total)
121 fmap = mmap_readwrite(f, close=False)
123 count = merge_into(fmap, bits, total, inp)
126 f.seek(0, git.SEEK_END)
127 f.write('\0'.join(allfilenames))
129 os.rename(outfilename + '.tmp', outfilename)
131 # this is just for testing
133 p = midx.PackMidx(outfilename)
134 assert(len(p.idxnames) == len(infilenames))
136 assert(len(p) == total)
137 for pe, e in p, git.idxmerge(inp, final_progress=False):
138 assert(i == pi.next())
141 return total, outfilename
144 def do_midx(outdir, outfilename, infilenames, prefixstr):
145 rv = _do_midx(outdir, outfilename, infilenames, prefixstr)
146 if rv and opt['print']:
150 def do_midx_dir(path):
153 if opt.force and not opt.auto:
154 midxs = [] # don't use existing midx files
156 midxs = glob.glob('%s/*.midx' % path)
159 m = git.open_idx(mname)
160 contents[mname] = [('%s/%s' % (path,i)) for i in m.idxnames]
161 sizes[mname] = len(m)
163 # sort the biggest+newest midxes first, so that we can eliminate
164 # smaller (or older) redundant ones that come later in the list
165 midxs.sort(key=lambda ix: (-sizes[ix], -xstat.stat(ix).st_mtime))
169 for iname in contents[mname]:
170 if not already.get(iname):
174 debug1('%r is redundant\n' % mname)
178 midxs = [k for k in midxs if not already.get(k)]
179 idxs = [k for k in glob.glob('%s/*.idx' % path) if not already.get(k)]
182 i = git.open_idx(iname)
183 sizes[iname] = len(i)
185 all = [(sizes[n],n) for n in (midxs + idxs)]
187 # FIXME: what are the optimal values? Does this make sense?
188 DESIRED_HWM = opt.force and 1 or 5
189 DESIRED_LWM = opt.force and 1 or 2
190 existed = dict((name,1) for sz,name in all)
191 debug1('midx: %d indexes; want no more than %d.\n'
192 % (len(all), DESIRED_HWM))
193 if len(all) <= DESIRED_HWM:
194 debug1('midx: nothing to do.\n')
195 while len(all) > DESIRED_HWM:
197 part1 = [name for sz,name in all[:len(all)-DESIRED_LWM+1]]
198 part2 = all[len(all)-DESIRED_LWM+1:]
199 all = list(do_midx_group(path, part1)) + part2
200 if len(all) > DESIRED_HWM:
201 debug1('\nStill too many indexes (%d > %d). Merging again.\n'
202 % (len(all), DESIRED_HWM))
206 if not existed.get(name):
210 def do_midx_group(outdir, infiles):
211 groups = list(_group(infiles, opt.max_files))
213 for n,sublist in enumerate(groups):
215 gprefix = 'Group %d: ' % (n+1)
216 rv = _do_midx(path, None, sublist, gprefix)
223 o = options.Options(optspec)
224 (opt, flags, extra) = o.parse(sys.argv[1:])
226 if extra and (opt.auto or opt.force):
227 o.fatal("you can't use -f/-a and also provide filenames")
228 if opt.check and (not extra and not opt.auto):
229 o.fatal("if using --check, you must provide filenames or -a")
231 git.check_repo_or_die()
233 if opt.max_files < 0:
234 opt.max_files = max_files()
235 assert(opt.max_files >= 5)
238 # check existing midx files
243 paths = opt.dir and [opt.dir] or git.all_packdirs()
245 debug1('midx: scanning %s\n' % path)
246 midxes += glob.glob(os.path.join(path, '*.midx'))
250 log('All tests passed.\n')
253 do_midx(git.repo('objects/pack'), opt.output, extra, '')
254 elif opt.auto or opt.force:
255 paths = opt.dir and [opt.dir] or git.all_packdirs()
257 debug1('midx: scanning %s\n' % path)
260 o.fatal("you must use -f or -a or provide input filenames")
263 log('WARNING: %d errors encountered.\n' % len(saved_errors))