2 from __future__ import absolute_import, print_function
3 from collections import defaultdict
4 from itertools import chain, dropwhile, groupby, takewhile
6 from random import choice, randint
7 from shutil import copytree, rmtree
8 from subprocess import PIPE
10 from time import localtime, strftime, time, tzset
13 if sys.version_info[:2] >= (3, 5):
14 from difflib import diff_bytes, unified_diff
16 from difflib import unified_diff
18 from bup import compat
19 from bup.compat import environ
20 from bup.helpers import partition, period_as_secs, readpipe
21 from bup.io import byte_stream
22 from buptest import ex, exo
23 from wvpytest import wvfail, wvpass, wvpasseq, wvpassne, wvstart
26 if sys.version_info[:2] < (3, 5):
27 def diff_bytes(_, *args):
28 return unified_diff(*args)
30 def create_older_random_saves(n, start_utc, end_utc):
31 with open(b'foo', 'wb') as f:
33 ex([b'git', b'add', b'foo'])
36 utcs.add(randint(start_utc, end_utc))
39 with open(b'foo', 'wb') as f:
40 f.write(b'%d\n' % utc)
41 ex([b'git', b'commit', b'--date', b'%d' % utc, b'-qam', b'%d' % utc])
42 ex([b'git', b'gc', b'--aggressive'])
45 # There is corresponding code in bup for some of this, but the
46 # computation method is different here, in part so that the test can
47 # provide a more effective cross-check.
49 period_kinds = [b'all', b'dailies', b'monthlies', b'yearlies']
50 period_scale = {b's': 1,
54 b'w': 60 * 60 * 24 * 7,
55 b'm': 60 * 60 * 24 * 31,
56 b'y': 60 * 60 * 24 * 366}
57 period_scale_kinds = list(period_scale.keys())
59 def expected_retentions(utcs, utc_start, spec):
62 utcs = sorted(utcs, reverse=True)
63 period_start = dict(spec)
64 for kind, duration in period_start.items():
65 period_start[kind] = utc_start - period_as_secs(duration)
66 period_start = defaultdict(lambda: float('inf'), period_start)
68 all = list(takewhile(lambda x: x >= period_start[b'all'], utcs))
69 utcs = list(dropwhile(lambda x: x >= period_start[b'all'], utcs))
71 matches = takewhile(lambda x: x >= period_start[b'dailies'], utcs)
72 dailies = [max(day_utcs) for yday, day_utcs
73 in groupby(matches, lambda x: localtime(x).tm_yday)]
74 utcs = list(dropwhile(lambda x: x >= period_start[b'dailies'], utcs))
76 matches = takewhile(lambda x: x >= period_start[b'monthlies'], utcs)
77 monthlies = [max(month_utcs) for month, month_utcs
78 in groupby(matches, lambda x: localtime(x).tm_mon)]
79 utcs = dropwhile(lambda x: x >= period_start[b'monthlies'], utcs)
81 matches = takewhile(lambda x: x >= period_start[b'yearlies'], utcs)
82 yearlies = [max(year_utcs) for year, year_utcs
83 in groupby(matches, lambda x: localtime(x).tm_year)]
85 return chain(all, dailies, monthlies, yearlies)
87 def period_spec(start_utc, end_utc):
88 global period_kinds, period_scale, period_scale_kinds
90 desired_specs = randint(1, 2 * len(period_kinds))
91 assert(desired_specs >= 1) # At least one --keep argument is required
92 while len(result) < desired_specs:
94 if randint(1, 100) <= 5:
97 assert(end_utc > start_utc)
98 period_secs = randint(1, end_utc - start_utc)
99 scale = choice(period_scale_kinds)
100 mag = int(float(period_secs) / period_scale[scale])
102 period = (b'%d' % mag) + scale
104 result += [(choice(period_kinds), period)]
107 def unique_period_specs(n, start_utc, end_utc):
109 while len(invocations) < n:
110 invocations.add(period_spec(start_utc, end_utc))
111 return tuple(invocations)
113 def period_spec_to_period_args(spec):
114 return tuple(chain(*((b'--keep-' + kind + b'-for', period)
115 for kind, period in spec)))
117 def result_diffline(x):
119 % (x, strftime(' %Y-%m-%d-%H%M%S', localtime(x)).encode('ascii')))
121 def check_prune_result(expected):
122 actual = sorted([int(x)
123 for x in exo([b'git', b'log',
124 b'--pretty=format:%at']).out.splitlines()])
126 if expected != actual:
128 print('ex:', x, strftime('%Y-%m-%d-%H%M%S', localtime(x)),
130 for line in diff_bytes(unified_diff,
131 [result_diffline(x) for x in expected],
132 [result_diffline(x) for x in actual],
133 fromfile=b'expected', tofile=b'actual'):
135 byte_stream(sys.stderr).write(line)
136 wvpass(expected == actual)
139 def test_prune_older(tmpdir):
140 environ[b'GIT_AUTHOR_NAME'] = b'bup test'
141 environ[b'GIT_COMMITTER_NAME'] = b'bup test'
142 environ[b'GIT_AUTHOR_EMAIL'] = b'bup@a425bc70a02811e49bdf73ee56450e6f'
143 environ[b'GIT_COMMITTER_EMAIL'] = b'bup@a425bc70a02811e49bdf73ee56450e6f'
145 seed = int(environ.get(b'BUP_TEST_SEED', time()))
147 print('random seed:', seed, file=stderr)
149 save_population = int(environ.get(b'BUP_TEST_PRUNE_OLDER_SAVES', 2000))
150 prune_cycles = int(environ.get(b'BUP_TEST_PRUNE_OLDER_CYCLES', 20))
151 prune_gc_cycles = int(environ.get(b'BUP_TEST_PRUNE_OLDER_GC_CYCLES', 10))
153 bup_cmd = bup.path.exe()
155 environ[b'BUP_DIR'] = tmpdir + b'/work/.git'
156 environ[b'GIT_DIR'] = tmpdir + b'/work/.git'
158 three_years_ago = now - (60 * 60 * 24 * 366 * 3)
160 ex([b'git', b'init', b'work'])
161 ex([b'git', b'symbolic-ref', b'HEAD', b'refs/heads/main'])
162 ex([b'git', b'config', b'gc.autoDetach', b'false'])
164 wvstart('generating ' + str(save_population) + ' random saves')
165 chdir(tmpdir + b'/work')
166 save_utcs = create_older_random_saves(save_population, three_years_ago, now)
168 test_set_hash = exo([b'git', b'show-ref', b'-s', b'main']).out.rstrip()
169 ls_saves = exo((bup_cmd, b'ls', b'main')).out.splitlines()
170 wvpasseq(save_population + 1, len(ls_saves))
172 wvstart('ensure everything kept, if no keep arguments')
173 ex([b'git', b'reset', b'--hard', test_set_hash])
175 b'prune-older', b'-v', b'--unsafe', b'--no-gc',
176 b'--wrt', b'%d' % now) \
178 stdout=None, stderr=PIPE, check=False)
180 wvpass(b'at least one keep argument is required' in proc.err)
181 check_prune_result(save_utcs)
184 wvstart('running %d generative no-gc tests on %d saves' % (prune_cycles,
186 for spec in unique_period_specs(prune_cycles,
187 # Make it more likely we'll have
188 # some outside the save range.
189 three_years_ago - period_scale[b'm'],
191 ex([b'git', b'reset', b'--hard', test_set_hash])
192 expected = sorted(expected_retentions(save_utcs, now, spec))
194 b'prune-older', b'-v', b'--unsafe', b'--no-gc', b'--wrt',
196 + period_spec_to_period_args(spec) \
198 check_prune_result(expected)
201 # More expensive because we have to recreate the repo each time
202 wvstart('running %d generative gc tests on %d saves' % (prune_gc_cycles,
204 ex([b'git', b'reset', b'--hard', test_set_hash])
205 copytree(b'work/.git', b'clean-test-repo', symlinks=True)
206 for spec in unique_period_specs(prune_gc_cycles,
207 # Make it more likely we'll have
208 # some outside the save range.
209 three_years_ago - period_scale[b'm'],
212 copytree(b'clean-test-repo', b'work/.git')
213 expected = sorted(expected_retentions(save_utcs, now, spec))
215 b'prune-older', b'-v', b'--unsafe', b'--wrt', b'%d' % now) \
216 + period_spec_to_period_args(spec) \
218 check_prune_result(expected)