2 from __future__ import absolute_import, print_function
3 from binascii import hexlify
4 import glob, os, math, resource, struct, sys
6 from bup import options, git, midx, _helpers, xstat
7 from bup.compat import argv_bytes, hexstr, range
8 from bup.helpers import (Sha1, add_error, atomically_replaced_file, debug1, fdatasync,
9 log, mmap_readwrite, qprogress,
11 from bup.io import byte_stream, path_msg
15 SHA_PER_PAGE=PAGE_SIZE/20.
18 bup midx [options...] <idxnames...>
20 o,output= output midx filename (default: auto-generated)
21 a,auto automatically use all existing .midx/.idx files as input
22 f,force merge produce exactly one .midx containing all objects
23 p,print print names of generated midx files
24 check validate contents of the given midx files (with -a, all midx files)
25 max-files= maximum number of idx files to open at once [-1]
26 d,dir= directory containing idx/midx files
29 merge_into = _helpers.merge_into
33 for i in range(0, len(l), count):
38 mf = min(resource.getrlimit(resource.RLIMIT_NOFILE))
40 mf -= 20 # just a safety margin
42 mf -= 6 # minimum safety margin
47 nicename = git.repo_rel(name)
48 log('Checking %s.\n' % path_msg(nicename))
50 ix = git.open_idx(name)
51 except git.GitError as e:
52 add_error('%s: %s' % (path_msg(name), e))
54 for count,subname in enumerate(ix.idxnames):
55 sub = git.open_idx(os.path.join(os.path.dirname(name), subname))
56 for ecount,e in enumerate(sub):
57 if not (ecount % 1234):
58 qprogress(' %d/%d: %s %d/%d\r'
59 % (count, len(ix.idxnames),
60 git.shorten_hash(subname).decode('ascii'),
63 add_error("%s: %s: %s missing from idx"
64 % (path_msg(nicename),
65 git.shorten_hash(subname).decode('ascii'),
68 add_error("%s: %s: %s missing from midx"
69 % (path_msg(nicename),
70 git.shorten_hash(subname).decode('ascii'),
73 for ecount,e in enumerate(ix):
74 if not (ecount % 1234):
75 qprogress(' Ordering: %d/%d\r' % (ecount, len(ix)))
76 if e and prev and not e >= prev:
77 add_error('%s: ordering error: %s < %s'
78 % (nicename, hexstr(e), hexstr(prev)))
83 def _do_midx(outdir, outfilename, infilenames, prefixstr,
84 auto=False, force=False):
88 sum = hexlify(Sha1(b'\0'.join(infilenames)).digest())
89 outfilename = b'%s/midx-%s.midx' % (outdir, sum)
96 for name in infilenames:
97 ix = git.open_idx(name)
103 isinstance(ix, midx.PackMidx) and ix.which_ofs or 0,
106 for n in ix.idxnames:
107 allfilenames.append(os.path.basename(n))
109 inp.sort(reverse=True, key=lambda x: x[0][x[2] : x[2] + 20])
111 if not _first: _first = outdir
112 dirprefix = (_first != outdir) and git.repo_rel(outdir) + b': ' or b''
113 debug1('midx: %s%screating from %d files (%d objects).\n'
114 % (dirprefix, prefixstr, len(infilenames), total))
115 if (auto and (total < 1024 and len(infilenames) < 3)) \
116 or ((auto or force) and len(infilenames) < 2) \
117 or (force and not total):
118 debug1('midx: nothing to do.\n')
121 pages = int(total/SHA_PER_PAGE) or 1
122 bits = int(math.ceil(math.log(pages, 2)))
124 debug1('midx: table size: %d (%d bits)\n' % (entries*4, bits))
127 with atomically_replaced_file(outfilename, 'wb') as f:
129 f.write(struct.pack('!II', midx.MIDX_VERSION, bits))
130 assert(f.tell() == 12)
132 f.truncate(12 + 4*entries + 20*total + 4*total)
134 fdatasync(f.fileno())
136 fmap = mmap_readwrite(f, close=False)
137 count = merge_into(fmap, bits, total, inp)
138 del fmap # Assume this calls msync() now.
139 f.seek(0, os.SEEK_END)
140 f.write(b'\0'.join(allfilenames))
143 if isinstance(ix, midx.PackMidx):
149 # This is just for testing (if you enable this, don't clear inp above)
151 # p = midx.PackMidx(outfilename)
152 # assert(len(p.idxnames) == len(infilenames))
153 # log(repr(p.idxnames) + '\n')
154 # assert(len(p) == total)
155 # for pe, e in p, git.idxmerge(inp, final_progress=False):
158 # assert(p.exists(i))
160 return total, outfilename
163 def do_midx(outdir, outfilename, infilenames, prefixstr, prout,
164 auto=False, force=False, print_names=False):
165 rv = _do_midx(outdir, outfilename, infilenames, prefixstr,
166 auto=auto, force=force)
167 if rv and print_names:
168 prout.write(rv[1] + b'\n')
171 def do_midx_dir(path, outfilename, prout, auto=False, force=False,
172 max_files=-1, print_names=False):
175 if force and not auto:
176 midxs = [] # don't use existing midx files
178 midxs = glob.glob(b'%s/*.midx' % path)
181 m = git.open_idx(mname)
182 contents[mname] = [(b'%s/%s' % (path,i)) for i in m.idxnames]
183 sizes[mname] = len(m)
185 # sort the biggest+newest midxes first, so that we can eliminate
186 # smaller (or older) redundant ones that come later in the list
187 midxs.sort(key=lambda ix: (-sizes[ix], -xstat.stat(ix).st_mtime))
191 for iname in contents[mname]:
192 if not already.get(iname):
196 debug1('%r is redundant\n' % mname)
200 midxs = [k for k in midxs if not already.get(k)]
201 idxs = [k for k in glob.glob(b'%s/*.idx' % path) if not already.get(k)]
204 i = git.open_idx(iname)
205 sizes[iname] = len(i)
207 all = [(sizes[n],n) for n in (midxs + idxs)]
209 # FIXME: what are the optimal values? Does this make sense?
210 DESIRED_HWM = force and 1 or 5
211 DESIRED_LWM = force and 1 or 2
212 existed = dict((name,1) for sz,name in all)
213 debug1('midx: %d indexes; want no more than %d.\n'
214 % (len(all), DESIRED_HWM))
215 if len(all) <= DESIRED_HWM:
216 debug1('midx: nothing to do.\n')
217 while len(all) > DESIRED_HWM:
219 part1 = [name for sz,name in all[:len(all)-DESIRED_LWM+1]]
220 part2 = all[len(all)-DESIRED_LWM+1:]
221 all = list(do_midx_group(path, outfilename, part1,
222 auto=auto, force=force, max_files=max_files)) \
224 if len(all) > DESIRED_HWM:
225 debug1('\nStill too many indexes (%d > %d). Merging again.\n'
226 % (len(all), DESIRED_HWM))
230 if not existed.get(name):
231 prout.write(name + b'\n')
234 def do_midx_group(outdir, outfilename, infiles, auto=False, force=False,
236 groups = list(_group(infiles, max_files))
238 for n,sublist in enumerate(groups):
240 gprefix = 'Group %d: ' % (n+1)
241 rv = _do_midx(outdir, outfilename, sublist, gprefix,
242 auto=auto, force=force)
248 o = options.Options(optspec)
249 opt, flags, extra = o.parse_bytes(argv[1:])
250 opt.output = argv_bytes(opt.output) if opt.output else None
252 if extra and (opt.auto or opt.force):
253 o.fatal("you can't use -f/-a and also provide filenames")
254 if opt.check and (not extra and not opt.auto):
255 o.fatal("if using --check, you must provide filenames or -a")
257 git.check_repo_or_die()
259 if opt.max_files < 0:
260 opt.max_files = max_files()
261 assert(opt.max_files >= 5)
263 path = opt.dir and argv_bytes(opt.dir) or git.repo(b'objects/pack')
265 extra = [argv_bytes(x) for x in extra]
268 # check existing midx files
272 debug1('midx: scanning %s\n' % path)
273 midxes = glob.glob(os.path.join(path, b'*.midx'))
277 log('All tests passed.\n')
281 do_midx(path, opt.output, extra, b'',
282 byte_stream(sys.stdout), auto=opt.auto, force=opt.force,
283 print_names=opt.print)
284 elif opt.auto or opt.force:
286 debug1('midx: scanning %s\n' % path_msg(path))
287 do_midx_dir(path, opt.output, byte_stream(sys.stdout),
288 auto=opt.auto, force=opt.force,
289 max_files=opt.max_files)
291 o.fatal("you must use -f or -a or provide input filenames")
294 log('WARNING: %d errors encountered.\n' % len(saved_errors))