t/test-prune-older

   1 #!/bin/sh
   2 """": # -*-python-*-
   3 bup_python="$(dirname "$0")/../cmd/bup-python" || exit $?
   4 exec "$bup_python" "$0" ${1+"$@"}
   5 """
   6 # end of bup preamble
   7
   8 from __future__ import print_function
   9 from collections import defaultdict
  10 from difflib import unified_diff
  11 from itertools import chain, dropwhile, groupby, takewhile
  12 from os import environ, chdir
  13 from os.path import abspath, dirname
  14 from random import choice, randint
  15 from shutil import copytree, rmtree
  16 from subprocess import PIPE
  17 from sys import stderr
  18 from time import localtime, strftime, time
  19 import os, random, sys
  20
  21 script_home = abspath(dirname(sys.argv[0] or '.'))
  22 sys.path[:0] = [abspath(script_home + '/../lib'), abspath(script_home + '/..')]
  23 top = os.getcwd()
  24 bup_cmd = top + '/bup'
  25
  26 from buptest import exc, exo, test_tempdir
  27 from wvtest import wvfail, wvpass, wvpasseq, wvpassne, wvstart
  28
  29 from bup.helpers import partition, period_as_secs, readpipe
  30
  31
  32 def bup(*args):
  33     return exo((bup_cmd,) + args).out
  34
  35 def bupc(*args):
  36     return exc((bup_cmd,) + args)
  37
  38 def create_older_random_saves(n, start_utc, end_utc):
  39     with open('foo', 'w') as f:
  40         pass
  41     exc(['git', 'add', 'foo'])
  42     utcs = set()
  43     while len(utcs) != n:
  44         utcs.add(randint(start_utc, end_utc))
  45     utcs = sorted(utcs)
  46     for utc in utcs:
  47         with open('foo', 'w') as f:
  48             f.write(str(utc) + '\n')
  49         exc(['git', 'commit', '--date', str(utc), '-qam', str(utc)])
  50     exc(['git', 'gc', '--aggressive'])
  51     return utcs
  52
  53 # There is corresponding code in bup for some of this, but the
  54 # computation method is different here, in part so that the test can
  55 # provide a more effective cross-check.
  56
  57 period_kinds = ['all', 'dailies', 'monthlies', 'yearlies']
  58 period_scale = {'s': 1,
  59                 'min': 60,
  60                 'h': 60 * 60,
  61                 'd': 60 * 60 * 24,
  62                 'w': 60 * 60 * 24 * 7,
  63                 'm': 60 * 60 * 24 * 31,
  64                 'y': 60 * 60 * 24 * 366}
  65 period_scale_kinds = period_scale.keys()
  66
  67 def expected_retentions(utcs, utc_start, spec):
  68     if not spec:
  69         return utcs
  70     utcs = sorted(utcs, reverse=True)
  71     period_start = dict(spec)
  72     for kind, duration in period_start.iteritems():
  73         period_start[kind] = utc_start - period_as_secs(duration)
  74     period_start = defaultdict(lambda: float('inf'), period_start)
  75
  76     all = list(takewhile(lambda x: x >= period_start['all'], utcs))
  77     utcs = list(dropwhile(lambda x: x >= period_start['all'], utcs))
  78
  79     matches = takewhile(lambda x: x >= period_start['dailies'], utcs)
  80     dailies = [max(day_utcs) for yday, day_utcs
  81                in groupby(matches, lambda x: localtime(x).tm_yday)]
  82     utcs = list(dropwhile(lambda x: x >= period_start['dailies'], utcs))
  83
  84     matches = takewhile(lambda x: x >= period_start['monthlies'], utcs)
  85     monthlies = [max(month_utcs) for month, month_utcs
  86                  in groupby(matches, lambda x: localtime(x).tm_mon)]
  87     utcs = dropwhile(lambda x: x >= period_start['monthlies'], utcs)
  88
  89     matches = takewhile(lambda x: x >= period_start['yearlies'], utcs)
  90     yearlies = [max(year_utcs) for year, year_utcs
  91                 in groupby(matches, lambda x: localtime(x).tm_year)]
  92
  93     return chain(all, dailies, monthlies, yearlies)
  94
  95 def period_spec(start_utc, end_utc):
  96     global period_kinds, period_scale, period_scale_kinds
  97     result = []
  98     desired_specs = randint(1, 2 * len(period_kinds))
  99     assert(desired_specs >= 1)  # At least one --keep argument is required
 100     while len(result) < desired_specs:
 101         period = None
 102         if randint(1, 100) <= 5:
 103             period = 'forever'
 104         else:
 105             assert(end_utc > start_utc)
 106             period_secs = randint(1, end_utc - start_utc)
 107             scale = choice(period_scale_kinds)
 108             mag = int(float(period_secs) / period_scale[scale])
 109             if mag != 0:
 110                 period = str(mag) + scale
 111         if period:
 112             result += [(choice(period_kinds), period)]
 113     return tuple(result)
 114
 115 def unique_period_specs(n, start_utc, end_utc):
 116     invocations = set()
 117     while len(invocations) < n:
 118         invocations.add(period_spec(start_utc, end_utc))
 119     return tuple(invocations)
 120
 121 def period_spec_to_period_args(spec):
 122     return tuple(chain(*(('--keep-' + kind + '-for', period)
 123                          for kind, period in spec)))
 124
 125 def result_diffline(x):
 126     return str(x) + strftime(' %Y-%m-%d-%H%M%S', localtime(x)) + '\n'
 127
 128 def check_prune_result(expected):
 129     actual = sorted([int(x)
 130                      for x in exo(['git', 'log',
 131                                    '--pretty=format:%at']).out.splitlines()])
 132     if expected != actual:
 133         for x in expected:
 134             print('ex:', x, strftime('%Y-%m-%d-%H%M%S', localtime(x)),
 135                   file=stderr)
 136         for line in unified_diff([result_diffline(x) for x in expected],
 137                                  [result_diffline(x) for x in actual],
 138                                  fromfile='expected', tofile='actual'):
 139             sys.stderr.write(line)
 140     wvpass(expected == actual)
 141
 142
 143 environ['GIT_AUTHOR_NAME'] = 'bup test'
 144 environ['GIT_COMMITTER_NAME'] = 'bup test'
 145 environ['GIT_AUTHOR_EMAIL'] = 'bup@a425bc70a02811e49bdf73ee56450e6f'
 146 environ['GIT_COMMITTER_EMAIL'] = 'bup@a425bc70a02811e49bdf73ee56450e6f'
 147
 148 seed = int(environ.get('BUP_TEST_SEED', time()))
 149 random.seed(seed)
 150 print('random seed:', seed, file=stderr)
 151
 152 save_population = int(environ.get('BUP_TEST_PRUNE_OLDER_SAVES', 2000))
 153 prune_cycles = int(environ.get('BUP_TEST_PRUNE_OLDER_CYCLES', 20))
 154 prune_gc_cycles = int(environ.get('BUP_TEST_PRUNE_OLDER_GC_CYCLES', 10))
 155
 156 with test_tempdir('prune-older-') as tmpdir:
 157     environ['BUP_DIR'] = tmpdir + '/work/.git'
 158     environ['GIT_DIR'] = tmpdir + '/work/.git'
 159     now = int(time())
 160     three_years_ago = now - (60 * 60 * 24 * 366 * 3)
 161     chdir(tmpdir)
 162     exc(['git', 'init', 'work'])
 163
 164     wvstart('generating ' + str(save_population) + ' random saves')
 165     chdir(tmpdir + '/work')
 166     save_utcs = create_older_random_saves(save_population, three_years_ago, now)
 167     chdir(tmpdir)
 168     test_set_hash = exo(['git', 'show-ref', '-s', 'master']).out.rstrip()
 169     ls_saves = bup('ls', 'master').splitlines()
 170     wvpasseq(save_population + 1, len(ls_saves))
 171
 172     wvstart('ensure everything kept, if no keep arguments')
 173     exc(['git', 'reset', '--hard', test_set_hash])
 174     proc = exo((bup_cmd,
 175                 'prune-older', '-v', '--unsafe', '--no-gc',
 176                 '--wrt', str(now)) \
 177                + ('master',),
 178                stdout=None, stderr=PIPE, check=False)
 179     wvpassne(proc.rc, 0)
 180     wvpass('at least one keep argument is required' in proc.err)
 181     check_prune_result(save_utcs)
 182
 183
 184     wvstart('running %d generative no-gc tests on %d saves' % (prune_cycles,
 185                                                                save_population))
 186     for spec in unique_period_specs(prune_cycles,
 187                                     # Make it more likely we'll have
 188                                     # some outside the save range.
 189                                     three_years_ago - period_scale['m'],
 190                                     now):
 191         exc(['git', 'reset', '--hard', test_set_hash])
 192         expected = sorted(expected_retentions(save_utcs, now, spec))
 193         exc((bup_cmd,
 194              'prune-older', '-v', '--unsafe', '--no-gc', '--wrt', str(now)) \
 195             + period_spec_to_period_args(spec) \
 196             + ('master',))
 197         check_prune_result(expected)
 198
 199
 200     # More expensive because we have to recreate the repo each time
 201     wvstart('running %d generative gc tests on %d saves' % (prune_gc_cycles,
 202                                                             save_population))
 203     exc(['git', 'reset', '--hard', test_set_hash])
 204     copytree('work/.git', 'clean-test-repo', symlinks=True)
 205     for spec in unique_period_specs(prune_gc_cycles,
 206                                     # Make it more likely we'll have
 207                                     # some outside the save range.
 208                                     three_years_ago - period_scale['m'],
 209                                     now):
 210         rmtree('work/.git')
 211         copytree('clean-test-repo', 'work/.git')
 212         expected = sorted(expected_retentions(save_utcs, now, spec))
 213         exc((bup_cmd,
 214              'prune-older', '-v', '--unsafe', '--wrt', str(now)) \
 215             + period_spec_to_period_args(spec) \
 216             + ('master',))
 217         check_prune_result(expected)