test/ext/test_prune_older.py

   1
   2 from __future__ import absolute_import, print_function
   3 from collections import defaultdict
   4 from itertools import chain, dropwhile, groupby, takewhile
   5 from os import chdir
   6 from random import choice, randint
   7 from shutil import copytree, rmtree
   8 from subprocess import PIPE
   9 from sys import stderr
  10 from time import localtime, strftime, time, tzset
  11 import random, sys
  12
  13 if sys.version_info[:2] >= (3, 5):
  14     from difflib import diff_bytes, unified_diff
  15 else:
  16     from difflib import unified_diff
  17
  18 from bup import compat
  19 from bup.compat import environ
  20 from bup.helpers import partition, period_as_secs, readpipe
  21 from bup.io import byte_stream
  22 from buptest import ex, exo
  23 from wvpytest import wvfail, wvpass, wvpasseq, wvpassne, wvstart
  24 import bup.path
  25
  26 if sys.version_info[:2] < (3, 5):
  27     def diff_bytes(_, *args):
  28         return unified_diff(*args)
  29
  30 def create_older_random_saves(n, start_utc, end_utc):
  31     with open(b'foo', 'wb') as f:
  32         pass
  33     ex([b'git', b'add', b'foo'])
  34     utcs = set()
  35     while len(utcs) != n:
  36         utcs.add(randint(start_utc, end_utc))
  37     utcs = sorted(utcs)
  38     for utc in utcs:
  39         with open(b'foo', 'wb') as f:
  40             f.write(b'%d\n' % utc)
  41         ex([b'git', b'commit', b'--date', b'%d' % utc, b'-qam', b'%d' % utc])
  42     ex([b'git', b'gc', b'--aggressive'])
  43     return utcs
  44
  45 # There is corresponding code in bup for some of this, but the
  46 # computation method is different here, in part so that the test can
  47 # provide a more effective cross-check.
  48
  49 period_kinds = [b'all', b'dailies', b'monthlies', b'yearlies']
  50 period_scale = {b's': 1,
  51                 b'min': 60,
  52                 b'h': 60 * 60,
  53                 b'd': 60 * 60 * 24,
  54                 b'w': 60 * 60 * 24 * 7,
  55                 b'm': 60 * 60 * 24 * 31,
  56                 b'y': 60 * 60 * 24 * 366}
  57 period_scale_kinds = list(period_scale.keys())
  58
  59 def expected_retentions(utcs, utc_start, spec):
  60     if not spec:
  61         return utcs
  62     utcs = sorted(utcs, reverse=True)
  63     period_start = dict(spec)
  64     for kind, duration in period_start.items():
  65         period_start[kind] = utc_start - period_as_secs(duration)
  66     period_start = defaultdict(lambda: float('inf'), period_start)
  67
  68     all = list(takewhile(lambda x: x >= period_start[b'all'], utcs))
  69     utcs = list(dropwhile(lambda x: x >= period_start[b'all'], utcs))
  70
  71     matches = takewhile(lambda x: x >= period_start[b'dailies'], utcs)
  72     dailies = [max(day_utcs) for yday, day_utcs
  73                in groupby(matches, lambda x: localtime(x).tm_yday)]
  74     utcs = list(dropwhile(lambda x: x >= period_start[b'dailies'], utcs))
  75
  76     matches = takewhile(lambda x: x >= period_start[b'monthlies'], utcs)
  77     monthlies = [max(month_utcs) for month, month_utcs
  78                  in groupby(matches, lambda x: localtime(x).tm_mon)]
  79     utcs = dropwhile(lambda x: x >= period_start[b'monthlies'], utcs)
  80
  81     matches = takewhile(lambda x: x >= period_start[b'yearlies'], utcs)
  82     yearlies = [max(year_utcs) for year, year_utcs
  83                 in groupby(matches, lambda x: localtime(x).tm_year)]
  84
  85     return chain(all, dailies, monthlies, yearlies)
  86
  87 def period_spec(start_utc, end_utc):
  88     global period_kinds, period_scale, period_scale_kinds
  89     result = []
  90     desired_specs = randint(1, 2 * len(period_kinds))
  91     assert(desired_specs >= 1)  # At least one --keep argument is required
  92     while len(result) < desired_specs:
  93         period = None
  94         if randint(1, 100) <= 5:
  95             period = b'forever'
  96         else:
  97             assert(end_utc > start_utc)
  98             period_secs = randint(1, end_utc - start_utc)
  99             scale = choice(period_scale_kinds)
 100             mag = int(float(period_secs) / period_scale[scale])
 101             if mag != 0:
 102                 period = (b'%d' % mag) + scale
 103         if period:
 104             result += [(choice(period_kinds), period)]
 105     return tuple(result)
 106
 107 def unique_period_specs(n, start_utc, end_utc):
 108     invocations = set()
 109     while len(invocations) < n:
 110         invocations.add(period_spec(start_utc, end_utc))
 111     return tuple(invocations)
 112
 113 def period_spec_to_period_args(spec):
 114     return tuple(chain(*((b'--keep-' + kind + b'-for', period)
 115                          for kind, period in spec)))
 116
 117 def result_diffline(x):
 118     return (b'%d %s\n'
 119             % (x, strftime(' %Y-%m-%d-%H%M%S', localtime(x)).encode('ascii')))
 120
 121 def check_prune_result(expected):
 122     actual = sorted([int(x)
 123                      for x in exo([b'git', b'log',
 124                                    b'--pretty=format:%at']).out.splitlines()])
 125
 126     if expected != actual:
 127         for x in expected:
 128             print('ex:', x, strftime('%Y-%m-%d-%H%M%S', localtime(x)),
 129                   file=stderr)
 130         for line in diff_bytes(unified_diff,
 131                                [result_diffline(x) for x in expected],
 132                                [result_diffline(x) for x in actual],
 133                                fromfile=b'expected', tofile=b'actual'):
 134             sys.stderr.flush()
 135             byte_stream(sys.stderr).write(line)
 136     wvpass(expected == actual)
 137
 138
 139 def test_prune_older(tmpdir):
 140     environ[b'GIT_AUTHOR_NAME'] = b'bup test'
 141     environ[b'GIT_COMMITTER_NAME'] = b'bup test'
 142     environ[b'GIT_AUTHOR_EMAIL'] = b'bup@a425bc70a02811e49bdf73ee56450e6f'
 143     environ[b'GIT_COMMITTER_EMAIL'] = b'bup@a425bc70a02811e49bdf73ee56450e6f'
 144
 145     seed = int(environ.get(b'BUP_TEST_SEED', time()))
 146     random.seed(seed)
 147     print('random seed:', seed, file=stderr)
 148
 149     save_population = int(environ.get(b'BUP_TEST_PRUNE_OLDER_SAVES', 2000))
 150     prune_cycles = int(environ.get(b'BUP_TEST_PRUNE_OLDER_CYCLES', 20))
 151     prune_gc_cycles = int(environ.get(b'BUP_TEST_PRUNE_OLDER_GC_CYCLES', 10))
 152
 153     bup_cmd = bup.path.exe()
 154
 155     environ[b'BUP_DIR'] = tmpdir + b'/work/.git'
 156     environ[b'GIT_DIR'] = tmpdir + b'/work/.git'
 157     now = int(time())
 158     three_years_ago = now - (60 * 60 * 24 * 366 * 3)
 159     chdir(tmpdir)
 160     ex([b'git', b'init', b'work'])
 161     ex([b'git', b'symbolic-ref', b'HEAD', b'refs/heads/main'])
 162     ex([b'git', b'config', b'gc.autoDetach', b'false'])
 163
 164     wvstart('generating ' + str(save_population) + ' random saves')
 165     chdir(tmpdir + b'/work')
 166     save_utcs = create_older_random_saves(save_population, three_years_ago, now)
 167     chdir(tmpdir)
 168     test_set_hash = exo([b'git', b'show-ref', b'-s', b'main']).out.rstrip()
 169     ls_saves = exo((bup_cmd, b'ls', b'main')).out.splitlines()
 170     wvpasseq(save_population + 1, len(ls_saves))
 171
 172     wvstart('ensure everything kept, if no keep arguments')
 173     ex([b'git', b'reset', b'--hard', test_set_hash])
 174     proc = ex((bup_cmd,
 175                b'prune-older', b'-v', b'--unsafe', b'--no-gc',
 176                b'--wrt', b'%d' % now) \
 177               + (b'main',),
 178               stdout=None, stderr=PIPE, check=False)
 179     wvpassne(proc.rc, 0)
 180     wvpass(b'at least one keep argument is required' in proc.err)
 181     check_prune_result(save_utcs)
 182
 183
 184     wvstart('running %d generative no-gc tests on %d saves' % (prune_cycles,
 185                                                                save_population))
 186     for spec in unique_period_specs(prune_cycles,
 187                                     # Make it more likely we'll have
 188                                     # some outside the save range.
 189                                     three_years_ago - period_scale[b'm'],
 190                                     now):
 191         ex([b'git', b'reset', b'--hard', test_set_hash])
 192         expected = sorted(expected_retentions(save_utcs, now, spec))
 193         ex((bup_cmd,
 194             b'prune-older', b'-v', b'--unsafe', b'--no-gc', b'--wrt',
 195             b'%d' % now) \
 196            + period_spec_to_period_args(spec) \
 197            + (b'main',))
 198         check_prune_result(expected)
 199
 200
 201     # More expensive because we have to recreate the repo each time
 202     wvstart('running %d generative gc tests on %d saves' % (prune_gc_cycles,
 203                                                             save_population))
 204     ex([b'git', b'reset', b'--hard', test_set_hash])
 205     copytree(b'work/.git', b'clean-test-repo', symlinks=True)
 206     for spec in unique_period_specs(prune_gc_cycles,
 207                                     # Make it more likely we'll have
 208                                     # some outside the save range.
 209                                     three_years_ago - period_scale[b'm'],
 210                                     now):
 211         rmtree(b'work/.git')
 212         copytree(b'clean-test-repo', b'work/.git')
 213         expected = sorted(expected_retentions(save_utcs, now, spec))
 214         ex((bup_cmd,
 215             b'prune-older', b'-v', b'--unsafe', b'--wrt', b'%d' % now) \
 216            + period_spec_to_period_args(spec) \
 217            + (b'main',))
 218         check_prune_result(expected)