2 from __future__ import absolute_import, print_function
3 from binascii import hexlify
4 import glob, os, math, resource, struct, sys
6 from bup import options, git, midx, _helpers, xstat
7 from bup.compat import ExitStack, argv_bytes, hexstr, range
8 from bup.helpers import (Sha1, add_error, atomically_replaced_file, debug1, fdatasync,
9 log, mmap_readwrite, qprogress,
11 from bup.io import byte_stream, path_msg
15 SHA_PER_PAGE=PAGE_SIZE/20.
18 bup midx [options...] <idxnames...>
20 o,output= output midx filename (default: auto-generated)
21 a,auto automatically use all existing .midx/.idx files as input
22 f,force merge produce exactly one .midx containing all objects
23 p,print print names of generated midx files
24 check validate contents of the given midx files (with -a, all midx files)
25 max-files= maximum number of idx files to open at once [-1]
26 d,dir= directory containing idx/midx files
29 merge_into = _helpers.merge_into
33 for i in range(0, len(l), count):
38 mf = min(resource.getrlimit(resource.RLIMIT_NOFILE))
40 mf -= 20 # just a safety margin
42 mf -= 6 # minimum safety margin
47 nicename = git.repo_rel(name)
48 log('Checking %s.\n' % path_msg(nicename))
50 ix = git.open_idx(name)
51 except git.GitError as e:
52 add_error('%s: %s' % (path_msg(name), e))
55 for count,subname in enumerate(ix.idxnames):
56 with git.open_idx(os.path.join(os.path.dirname(name), subname)) \
58 for ecount,e in enumerate(sub):
59 if not (ecount % 1234):
60 qprogress(' %d/%d: %s %d/%d\r'
61 % (count, len(ix.idxnames),
62 git.shorten_hash(subname).decode('ascii'),
65 add_error("%s: %s: %s missing from idx"
66 % (path_msg(nicename),
67 git.shorten_hash(subname).decode('ascii'),
70 add_error("%s: %s: %s missing from midx"
71 % (path_msg(nicename),
72 git.shorten_hash(subname).decode('ascii'),
75 for ecount,e in enumerate(ix):
76 if not (ecount % 1234):
77 qprogress(' Ordering: %d/%d\r' % (ecount, len(ix)))
78 if e and prev and not e >= prev:
79 add_error('%s: ordering error: %s < %s'
80 % (nicename, hexstr(e), hexstr(prev)))
85 def _do_midx(outdir, outfilename, infilenames, prefixstr,
86 auto=False, force=False):
90 sum = hexlify(Sha1(b'\0'.join(infilenames)).digest())
91 outfilename = b'%s/midx-%s.midx' % (outdir, sum)
96 with ExitStack() as contexts:
97 for name in infilenames:
98 ix = git.open_idx(name)
99 contexts.enter_context(ix)
104 isinstance(ix, midx.PackMidx) and ix.which_ofs or 0,
107 for n in ix.idxnames:
108 allfilenames.append(os.path.basename(n))
110 inp.sort(reverse=True, key=lambda x: x[0][x[2] : x[2] + 20])
112 if not _first: _first = outdir
113 dirprefix = (_first != outdir) and git.repo_rel(outdir) + b': ' or b''
114 debug1('midx: %s%screating from %d files (%d objects).\n'
115 % (dirprefix, prefixstr, len(infilenames), total))
116 if (auto and (total < 1024 and len(infilenames) < 3)) \
117 or ((auto or force) and len(infilenames) < 2) \
118 or (force and not total):
119 debug1('midx: nothing to do.\n')
122 pages = int(total/SHA_PER_PAGE) or 1
123 bits = int(math.ceil(math.log(pages, 2)))
125 debug1('midx: table size: %d (%d bits)\n' % (entries*4, bits))
128 with atomically_replaced_file(outfilename, 'wb') as f:
130 f.write(struct.pack('!II', midx.MIDX_VERSION, bits))
131 assert(f.tell() == 12)
133 f.truncate(12 + 4*entries + 20*total + 4*total)
135 fdatasync(f.fileno())
137 with mmap_readwrite(f, close=False) as fmap:
138 count = merge_into(fmap, bits, total, inp)
139 f.seek(0, os.SEEK_END)
140 f.write(b'\0'.join(allfilenames))
142 # This is just for testing (if you enable this, don't clear inp above)
144 # p = midx.PackMidx(outfilename)
145 # assert(len(p.idxnames) == len(infilenames))
146 # log(repr(p.idxnames) + '\n')
147 # assert(len(p) == total)
148 # for pe, e in p, git.idxmerge(inp, final_progress=False):
151 # assert(p.exists(i))
153 return total, outfilename
156 def do_midx(outdir, outfilename, infilenames, prefixstr, prout,
157 auto=False, force=False, print_names=False):
158 rv = _do_midx(outdir, outfilename, infilenames, prefixstr,
159 auto=auto, force=force)
160 if rv and print_names:
161 prout.write(rv[1] + b'\n')
164 def do_midx_dir(path, outfilename, prout, auto=False, force=False,
165 max_files=-1, print_names=False):
168 if force and not auto:
169 midxs = [] # don't use existing midx files
171 midxs = glob.glob(b'%s/*.midx' % path)
174 with git.open_idx(mname) as m:
175 contents[mname] = [(b'%s/%s' % (path,i)) for i in m.idxnames]
176 sizes[mname] = len(m)
178 # sort the biggest+newest midxes first, so that we can eliminate
179 # smaller (or older) redundant ones that come later in the list
180 midxs.sort(key=lambda ix: (-sizes[ix], -xstat.stat(ix).st_mtime))
184 for iname in contents[mname]:
185 if not already.get(iname):
189 debug1('%r is redundant\n' % mname)
193 midxs = [k for k in midxs if not already.get(k)]
194 idxs = [k for k in glob.glob(b'%s/*.idx' % path) if not already.get(k)]
197 with git.open_idx(iname) as i:
198 sizes[iname] = len(i)
200 all = [(sizes[n],n) for n in (midxs + idxs)]
202 # FIXME: what are the optimal values? Does this make sense?
203 DESIRED_HWM = force and 1 or 5
204 DESIRED_LWM = force and 1 or 2
205 existed = dict((name,1) for sz,name in all)
206 debug1('midx: %d indexes; want no more than %d.\n'
207 % (len(all), DESIRED_HWM))
208 if len(all) <= DESIRED_HWM:
209 debug1('midx: nothing to do.\n')
210 while len(all) > DESIRED_HWM:
212 part1 = [name for sz,name in all[:len(all)-DESIRED_LWM+1]]
213 part2 = all[len(all)-DESIRED_LWM+1:]
214 all = list(do_midx_group(path, outfilename, part1,
215 auto=auto, force=force, max_files=max_files)) \
217 if len(all) > DESIRED_HWM:
218 debug1('\nStill too many indexes (%d > %d). Merging again.\n'
219 % (len(all), DESIRED_HWM))
223 if not existed.get(name):
224 prout.write(name + b'\n')
227 def do_midx_group(outdir, outfilename, infiles, auto=False, force=False,
229 groups = list(_group(infiles, max_files))
231 for n,sublist in enumerate(groups):
233 gprefix = 'Group %d: ' % (n+1)
234 rv = _do_midx(outdir, outfilename, sublist, gprefix,
235 auto=auto, force=force)
241 o = options.Options(optspec)
242 opt, flags, extra = o.parse_bytes(argv[1:])
243 opt.output = argv_bytes(opt.output) if opt.output else None
245 if extra and (opt.auto or opt.force):
246 o.fatal("you can't use -f/-a and also provide filenames")
247 if opt.check and (not extra and not opt.auto):
248 o.fatal("if using --check, you must provide filenames or -a")
250 git.check_repo_or_die()
252 if opt.max_files < 0:
253 opt.max_files = max_files()
254 assert(opt.max_files >= 5)
256 path = opt.dir and argv_bytes(opt.dir) or git.repo(b'objects/pack')
258 extra = [argv_bytes(x) for x in extra]
261 # check existing midx files
265 debug1('midx: scanning %s\n' % path)
266 midxes = glob.glob(os.path.join(path, b'*.midx'))
270 log('All tests passed.\n')
274 do_midx(path, opt.output, extra, b'',
275 byte_stream(sys.stdout), auto=opt.auto, force=opt.force,
276 print_names=opt.print)
277 elif opt.auto or opt.force:
279 debug1('midx: scanning %s\n' % path_msg(path))
280 do_midx_dir(path, opt.output, byte_stream(sys.stdout),
281 auto=opt.auto, force=opt.force,
282 max_files=opt.max_files)
284 o.fatal("you must use -f or -a or provide input filenames")
287 log('WARNING: %d errors encountered.\n' % len(saved_errors))