t/test-prune-older

   1 #!/bin/sh
   2 """": # -*-python-*-
   3 bup_python="$(dirname "$0")/../cmd/bup-python" || exit $?
   4 exec "$bup_python" "$0" ${1+"$@"}
   5 """
   6 # end of bup preamble
   7
   8 from __future__ import print_function
   9 from collections import defaultdict
  10 from difflib import unified_diff
  11 from itertools import chain, dropwhile, groupby, takewhile
  12 from os import environ, chdir
  13 from os.path import abspath, dirname
  14 from random import choice, randint
  15 from shutil import copytree, rmtree
  16 from subprocess import PIPE
  17 from sys import stderr
  18 from time import localtime, strftime, time
  19 import os, random, sys
  20
  21 script_home = abspath(dirname(sys.argv[0] or '.'))
  22 sys.path[:0] = [abspath(script_home + '/../lib'), abspath(script_home + '/..')]
  23 top = os.getcwd()
  24 bup_cmd = top + '/bup'
  25
  26 from buptest import exc, exo, test_tempdir
  27 from wvtest import wvfail, wvpass, wvpasseq, wvpassne, wvstart
  28
  29 from bup import compat
  30 from bup.helpers import partition, period_as_secs, readpipe
  31
  32
  33 def bup(*args):
  34     return exo((bup_cmd,) + args).out
  35
  36 def bupc(*args):
  37     return exc((bup_cmd,) + args)
  38
  39 def create_older_random_saves(n, start_utc, end_utc):
  40     with open('foo', 'w') as f:
  41         pass
  42     exc(['git', 'add', 'foo'])
  43     utcs = set()
  44     while len(utcs) != n:
  45         utcs.add(randint(start_utc, end_utc))
  46     utcs = sorted(utcs)
  47     for utc in utcs:
  48         with open('foo', 'w') as f:
  49             f.write(str(utc) + '\n')
  50         exc(['git', 'commit', '--date', str(utc), '-qam', str(utc)])
  51     exc(['git', 'gc', '--aggressive'])
  52     return utcs
  53
  54 # There is corresponding code in bup for some of this, but the
  55 # computation method is different here, in part so that the test can
  56 # provide a more effective cross-check.
  57
  58 period_kinds = ['all', 'dailies', 'monthlies', 'yearlies']
  59 period_scale = {'s': 1,
  60                 'min': 60,
  61                 'h': 60 * 60,
  62                 'd': 60 * 60 * 24,
  63                 'w': 60 * 60 * 24 * 7,
  64                 'm': 60 * 60 * 24 * 31,
  65                 'y': 60 * 60 * 24 * 366}
  66 period_scale_kinds = period_scale.keys()
  67
  68 def expected_retentions(utcs, utc_start, spec):
  69     if not spec:
  70         return utcs
  71     utcs = sorted(utcs, reverse=True)
  72     period_start = dict(spec)
  73     for kind, duration in compat.items(period_start):
  74         period_start[kind] = utc_start - period_as_secs(duration)
  75     period_start = defaultdict(lambda: float('inf'), period_start)
  76
  77     all = list(takewhile(lambda x: x >= period_start['all'], utcs))
  78     utcs = list(dropwhile(lambda x: x >= period_start['all'], utcs))
  79
  80     matches = takewhile(lambda x: x >= period_start['dailies'], utcs)
  81     dailies = [max(day_utcs) for yday, day_utcs
  82                in groupby(matches, lambda x: localtime(x).tm_yday)]
  83     utcs = list(dropwhile(lambda x: x >= period_start['dailies'], utcs))
  84
  85     matches = takewhile(lambda x: x >= period_start['monthlies'], utcs)
  86     monthlies = [max(month_utcs) for month, month_utcs
  87                  in groupby(matches, lambda x: localtime(x).tm_mon)]
  88     utcs = dropwhile(lambda x: x >= period_start['monthlies'], utcs)
  89
  90     matches = takewhile(lambda x: x >= period_start['yearlies'], utcs)
  91     yearlies = [max(year_utcs) for year, year_utcs
  92                 in groupby(matches, lambda x: localtime(x).tm_year)]
  93
  94     return chain(all, dailies, monthlies, yearlies)
  95
  96 def period_spec(start_utc, end_utc):
  97     global period_kinds, period_scale, period_scale_kinds
  98     result = []
  99     desired_specs = randint(1, 2 * len(period_kinds))
 100     assert(desired_specs >= 1)  # At least one --keep argument is required
 101     while len(result) < desired_specs:
 102         period = None
 103         if randint(1, 100) <= 5:
 104             period = 'forever'
 105         else:
 106             assert(end_utc > start_utc)
 107             period_secs = randint(1, end_utc - start_utc)
 108             scale = choice(period_scale_kinds)
 109             mag = int(float(period_secs) / period_scale[scale])
 110             if mag != 0:
 111                 period = str(mag) + scale
 112         if period:
 113             result += [(choice(period_kinds), period)]
 114     return tuple(result)
 115
 116 def unique_period_specs(n, start_utc, end_utc):
 117     invocations = set()
 118     while len(invocations) < n:
 119         invocations.add(period_spec(start_utc, end_utc))
 120     return tuple(invocations)
 121
 122 def period_spec_to_period_args(spec):
 123     return tuple(chain(*(('--keep-' + kind + '-for', period)
 124                          for kind, period in spec)))
 125
 126 def result_diffline(x):
 127     return str(x) + strftime(' %Y-%m-%d-%H%M%S', localtime(x)) + '\n'
 128
 129 def check_prune_result(expected):
 130     actual = sorted([int(x)
 131                      for x in exo(['git', 'log',
 132                                    '--pretty=format:%at']).out.splitlines()])
 133     if expected != actual:
 134         for x in expected:
 135             print('ex:', x, strftime('%Y-%m-%d-%H%M%S', localtime(x)),
 136                   file=stderr)
 137         for line in unified_diff([result_diffline(x) for x in expected],
 138                                  [result_diffline(x) for x in actual],
 139                                  fromfile='expected', tofile='actual'):
 140             sys.stderr.write(line)
 141     wvpass(expected == actual)
 142
 143
 144 environ['GIT_AUTHOR_NAME'] = 'bup test'
 145 environ['GIT_COMMITTER_NAME'] = 'bup test'
 146 environ['GIT_AUTHOR_EMAIL'] = 'bup@a425bc70a02811e49bdf73ee56450e6f'
 147 environ['GIT_COMMITTER_EMAIL'] = 'bup@a425bc70a02811e49bdf73ee56450e6f'
 148
 149 seed = int(environ.get('BUP_TEST_SEED', time()))
 150 random.seed(seed)
 151 print('random seed:', seed, file=stderr)
 152
 153 save_population = int(environ.get('BUP_TEST_PRUNE_OLDER_SAVES', 2000))
 154 prune_cycles = int(environ.get('BUP_TEST_PRUNE_OLDER_CYCLES', 20))
 155 prune_gc_cycles = int(environ.get('BUP_TEST_PRUNE_OLDER_GC_CYCLES', 10))
 156
 157 with test_tempdir('prune-older-') as tmpdir:
 158     environ['BUP_DIR'] = tmpdir + '/work/.git'
 159     environ['GIT_DIR'] = tmpdir + '/work/.git'
 160     now = int(time())
 161     three_years_ago = now - (60 * 60 * 24 * 366 * 3)
 162     chdir(tmpdir)
 163     exc(['git', 'init', 'work'])
 164
 165     wvstart('generating ' + str(save_population) + ' random saves')
 166     chdir(tmpdir + '/work')
 167     save_utcs = create_older_random_saves(save_population, three_years_ago, now)
 168     chdir(tmpdir)
 169     test_set_hash = exo(['git', 'show-ref', '-s', 'master']).out.rstrip()
 170     ls_saves = bup('ls', 'master').splitlines()
 171     wvpasseq(save_population + 1, len(ls_saves))
 172
 173     wvstart('ensure everything kept, if no keep arguments')
 174     exc(['git', 'reset', '--hard', test_set_hash])
 175     proc = exo((bup_cmd,
 176                 'prune-older', '-v', '--unsafe', '--no-gc',
 177                 '--wrt', str(now)) \
 178                + ('master',),
 179                stdout=None, stderr=PIPE, check=False)
 180     wvpassne(proc.rc, 0)
 181     wvpass('at least one keep argument is required' in proc.err)
 182     check_prune_result(save_utcs)
 183
 184
 185     wvstart('running %d generative no-gc tests on %d saves' % (prune_cycles,
 186                                                                save_population))
 187     for spec in unique_period_specs(prune_cycles,
 188                                     # Make it more likely we'll have
 189                                     # some outside the save range.
 190                                     three_years_ago - period_scale['m'],
 191                                     now):
 192         exc(['git', 'reset', '--hard', test_set_hash])
 193         expected = sorted(expected_retentions(save_utcs, now, spec))
 194         exc((bup_cmd,
 195              'prune-older', '-v', '--unsafe', '--no-gc', '--wrt', str(now)) \
 196             + period_spec_to_period_args(spec) \
 197             + ('master',))
 198         check_prune_result(expected)
 199
 200
 201     # More expensive because we have to recreate the repo each time
 202     wvstart('running %d generative gc tests on %d saves' % (prune_gc_cycles,
 203                                                             save_population))
 204     exc(['git', 'reset', '--hard', test_set_hash])
 205     copytree('work/.git', 'clean-test-repo', symlinks=True)
 206     for spec in unique_period_specs(prune_gc_cycles,
 207                                     # Make it more likely we'll have
 208                                     # some outside the save range.
 209                                     three_years_ago - period_scale['m'],
 210                                     now):
 211         rmtree('work/.git')
 212         copytree('clean-test-repo', 'work/.git')
 213         expected = sorted(expected_retentions(save_utcs, now, spec))
 214         exc((bup_cmd,
 215              'prune-older', '-v', '--unsafe', '--wrt', str(now)) \
 216             + period_spec_to_period_args(spec) \
 217             + ('master',))
 218         check_prune_result(expected)