t/test-prune-older

   1 #!/bin/sh
   2 """": # -*-python-*-
   3 bup_python="$(dirname "$0")/../cmd/bup-python" || exit $?
   4 exec "$bup_python" "$0" ${1+"$@"}
   5 """
   6 # end of bup preamble
   7
   8 from __future__ import print_function
   9 from collections import defaultdict
  10 from difflib import unified_diff
  11 from itertools import chain, dropwhile, groupby, takewhile
  12 from os import environ, chdir
  13 from os.path import abspath, dirname
  14 from random import choice, randint
  15 from shutil import copytree, rmtree
  16 from sys import stderr
  17 from time import localtime, strftime, time
  18 import os, random, sys
  19
  20 script_home = abspath(dirname(sys.argv[0] or '.'))
  21 sys.path[:0] = [abspath(script_home + '/../lib'), abspath(script_home + '/..')]
  22 top = os.getcwd()
  23 bup_cmd = top + '/bup'
  24
  25 from buptest import exc, exo, test_tempdir
  26 from wvtest import wvfail, wvpass, wvpasseq, wvpassne, wvstart
  27
  28 from bup.helpers import partition, period_as_secs, readpipe
  29
  30
  31 def bup(*args):
  32     return exo((bup_cmd,) + args)[0]
  33
  34 def bupc(*args):
  35     return exc((bup_cmd,) + args)
  36
  37 def create_older_random_saves(n, start_utc, end_utc):
  38     with open('foo', 'w') as f:
  39         pass
  40     exc(['git', 'add', 'foo'])
  41     utcs = set()
  42     while len(utcs) != n:
  43         utcs.add(randint(start_utc, end_utc))
  44     utcs = sorted(utcs)
  45     for utc in utcs:
  46         with open('foo', 'w') as f:
  47             f.write(str(utc) + '\n')
  48         exc(['git', 'commit', '--date', str(utc), '-qam', str(utc)])
  49     exc(['git', 'gc', '--aggressive'])
  50     return utcs
  51
  52 # There is corresponding code in bup for some of this, but the
  53 # computation method is different here, in part so that the test can
  54 # provide a more effective cross-check.
  55
  56 period_kinds = ['all', 'dailies', 'monthlies', 'yearlies']
  57 period_scale = {'s': 1,
  58                 'min': 60,
  59                 'h': 60 * 60,
  60                 'd': 60 * 60 * 24,
  61                 'w': 60 * 60 * 24 * 7,
  62                 'm': 60 * 60 * 24 * 31,
  63                 'y': 60 * 60 * 24 * 366}
  64 period_scale_kinds = period_scale.keys()
  65
  66 def expected_retentions(utcs, utc_start, spec):
  67     if not spec:
  68         return utcs
  69     utcs = sorted(utcs, reverse=True)
  70     period_start = dict(spec)
  71     for kind, duration in period_start.iteritems():
  72         period_start[kind] = utc_start - period_as_secs(duration)
  73     period_start = defaultdict(lambda: float('inf'), period_start)
  74
  75     all = list(takewhile(lambda x: x >= period_start['all'], utcs))
  76     utcs = list(dropwhile(lambda x: x >= period_start['all'], utcs))
  77
  78     matches = takewhile(lambda x: x >= period_start['dailies'], utcs)
  79     dailies = [max(day_utcs) for yday, day_utcs
  80                in groupby(matches, lambda x: localtime(x).tm_yday)]
  81     utcs = list(dropwhile(lambda x: x >= period_start['dailies'], utcs))
  82
  83     matches = takewhile(lambda x: x >= period_start['monthlies'], utcs)
  84     monthlies = [max(month_utcs) for month, month_utcs
  85                  in groupby(matches, lambda x: localtime(x).tm_mon)]
  86     utcs = dropwhile(lambda x: x >= period_start['monthlies'], utcs)
  87
  88     matches = takewhile(lambda x: x >= period_start['yearlies'], utcs)
  89     yearlies = [max(year_utcs) for year, year_utcs
  90                 in groupby(matches, lambda x: localtime(x).tm_year)]
  91
  92     return chain(all, dailies, monthlies, yearlies)
  93
  94 def period_spec(start_utc, end_utc):
  95     global period_kinds, period_scale, period_scale_kinds
  96     result = []
  97     desired_specs = randint(1, 2 * len(period_kinds))
  98     assert(desired_specs >= 1)  # At least one --keep argument is required
  99     while len(result) < desired_specs:
 100         period = None
 101         if randint(1, 100) <= 5:
 102             period = 'forever'
 103         else:
 104             assert(end_utc > start_utc)
 105             period_secs = randint(1, end_utc - start_utc)
 106             scale = choice(period_scale_kinds)
 107             mag = int(float(period_secs) / period_scale[scale])
 108             if mag != 0:
 109                 period = str(mag) + scale
 110         if period:
 111             result += [(choice(period_kinds), period)]
 112     return tuple(result)
 113
 114 def unique_period_specs(n, start_utc, end_utc):
 115     invocations = set()
 116     while len(invocations) < n:
 117         invocations.add(period_spec(start_utc, end_utc))
 118     return tuple(invocations)
 119
 120 def period_spec_to_period_args(spec):
 121     return tuple(chain(*(('--keep-' + kind + '-for', period)
 122                          for kind, period in spec)))
 123
 124 def result_diffline(x):
 125     return str(x) + strftime(' %Y-%m-%d-%H%M%S', localtime(x)) + '\n'
 126
 127 def check_prune_result(expected):
 128     actual = sorted([int(x)
 129                      for x in exo(['git', 'log',
 130                                    '--pretty=format:%at'])[0].splitlines()])
 131     if expected != actual:
 132         for x in expected:
 133             print('ex:', x, strftime('%Y-%m-%d-%H%M%S', localtime(x)),
 134                   file=stderr)
 135         for line in unified_diff([result_diffline(x) for x in expected],
 136                                  [result_diffline(x) for x in actual],
 137                                  fromfile='expected', tofile='actual'):
 138             sys.stderr.write(line)
 139     wvpass(expected == actual)
 140
 141
 142 environ['GIT_AUTHOR_NAME'] = 'bup test'
 143 environ['GIT_COMMITTER_NAME'] = 'bup test'
 144 environ['GIT_AUTHOR_EMAIL'] = 'bup@a425bc70a02811e49bdf73ee56450e6f'
 145 environ['GIT_COMMITTER_EMAIL'] = 'bup@a425bc70a02811e49bdf73ee56450e6f'
 146
 147 seed = int(environ.get('BUP_TEST_SEED', time()))
 148 random.seed(seed)
 149 print('random seed:', seed, file=stderr)
 150
 151 save_population = int(environ.get('BUP_TEST_PRUNE_OLDER_SAVES', 2000))
 152 prune_cycles = int(environ.get('BUP_TEST_PRUNE_OLDER_CYCLES', 20))
 153 prune_gc_cycles = int(environ.get('BUP_TEST_PRUNE_OLDER_GC_CYCLES', 10))
 154
 155 with test_tempdir('prune-older-') as tmpdir:
 156     environ['BUP_DIR'] = tmpdir + '/work/.git'
 157     environ['GIT_DIR'] = tmpdir + '/work/.git'
 158     now = int(time())
 159     three_years_ago = now - (60 * 60 * 24 * 366 * 3)
 160     chdir(tmpdir)
 161     exc(['git', 'init', 'work'])
 162
 163     wvstart('generating ' + str(save_population) + ' random saves')
 164     chdir(tmpdir + '/work')
 165     save_utcs = create_older_random_saves(save_population, three_years_ago, now)
 166     chdir(tmpdir)
 167     test_set_hash = exo(['git', 'show-ref', '-s', 'master'])[0].rstrip()
 168     ls_saves = bup('ls', 'master').splitlines()
 169     wvpasseq(save_population + 1, len(ls_saves))
 170
 171     wvstart('ensure everything kept, if no keep arguments')
 172     exc(['git', 'reset', '--hard', test_set_hash])
 173     _, errmsg, proc = exo((bup_cmd,
 174                            'prune-older', '-v', '--unsafe', '--no-gc',
 175                            '--wrt', str(now)) \
 176                           + ('master',),
 177                           stdout=False, stderr=True, check=False)
 178     wvpassne(proc.returncode, 0)
 179     wvpass('at least one keep argument is required' in errmsg)
 180     check_prune_result(save_utcs)
 181
 182
 183     wvstart('running %d generative no-gc tests on %d saves' % (prune_cycles,
 184                                                                save_population))
 185     for spec in unique_period_specs(prune_cycles,
 186                                     # Make it more likely we'll have
 187                                     # some outside the save range.
 188                                     three_years_ago - period_scale['m'],
 189                                     now):
 190         exc(['git', 'reset', '--hard', test_set_hash])
 191         expected = sorted(expected_retentions(save_utcs, now, spec))
 192         exc((bup_cmd,
 193              'prune-older', '-v', '--unsafe', '--no-gc', '--wrt', str(now)) \
 194             + period_spec_to_period_args(spec) \
 195             + ('master',))
 196         check_prune_result(expected)
 197
 198
 199     # More expensive because we have to recreate the repo each time
 200     wvstart('running %d generative gc tests on %d saves' % (prune_gc_cycles,
 201                                                             save_population))
 202     exc(['git', 'reset', '--hard', test_set_hash])
 203     copytree('work/.git', 'clean-test-repo', symlinks=True)
 204     for spec in unique_period_specs(prune_gc_cycles,
 205                                     # Make it more likely we'll have
 206                                     # some outside the save range.
 207                                     three_years_ago - period_scale['m'],
 208                                     now):
 209         rmtree('work/.git')
 210         copytree('clean-test-repo', 'work/.git')
 211         expected = sorted(expected_retentions(save_utcs, now, spec))
 212         exc((bup_cmd,
 213              'prune-older', '-v', '--unsafe', '--wrt', str(now)) \
 214             + period_spec_to_period_args(spec) \
 215             + ('master',))
 216         check_prune_result(expected)