2 from __future__ import absolute_import
3 import math, struct, sys
5 from bup import options, git, _helpers
6 from bup.helpers import log
7 from bup.io import byte_stream
9 POPULATION_OF_EARTH=6.7e9 # as of September, 2010
14 predict Guess object offsets and report the maximum deviation
15 ignore-midx Don't use midx files; use only plain pack idx files.
19 o = options.Options(optspec)
20 opt, flags, extra = o.parse_bytes(argv[1:])
23 o.fatal("no arguments expected")
25 git.check_repo_or_die()
27 with git.PackIdxList(git.repo(b'objects/pack'),
28 ignore_midx=opt.ignore_midx) as mi:
30 def do_predict(ix, out):
33 for count,i in enumerate(ix):
34 prefix = struct.unpack('!Q', i[:8])[0]
35 expected = prefix * total // (1 << 64)
36 diff = count - expected
37 maxdiff = max(maxdiff, abs(diff))
38 out.write(b'%d of %d (%.3f%%) '
39 % (maxdiff, len(ix), maxdiff * 100.0 / len(ix)))
41 assert(count+1 == len(ix))
44 out = byte_stream(sys.stdout)
53 # default mode: find longest matching prefix
59 #assert(str(i) >= last)
60 pm = _helpers.bitmatch(last, i)
61 longmatch = max(longmatch, pm)
63 out.write(b'%d\n' % longmatch)
64 log('%d matching prefix bits\n' % longmatch)
65 doublings = math.log(len(mi), 2)
67 bpd = longmatch / doublings
68 log('%.2f bits per doubling\n' % bpd)
69 remain = 160 - longmatch
70 rdoublings = remain / bpd
71 log('%d bits (%.2f doublings) remaining\n' % (remain, rdoublings))
72 larger = 2**rdoublings
73 log('%g times larger is possible\n' % larger)
74 perperson = larger/POPULATION_OF_EARTH
75 log('\nEveryone on earth could have %d data sets like yours, all in one\n'
76 'repository, and we would expect 1 object collision.\n'