t/test-prune-older

   1 #!/bin/sh
   2 """": # -*-python-*-
   3 bup_python="$(dirname "$0")/../cmd/bup-python" || exit $?
   4 exec "$bup_python" "$0" ${1+"$@"}
   5 """
   6 # end of bup preamble
   7
   8 from __future__ import print_function
   9 from collections import defaultdict
  10 from difflib import unified_diff
  11 from itertools import chain, dropwhile, groupby, takewhile
  12 from os import environ, chdir
  13 from os.path import abspath, dirname
  14 from pipes import quote
  15 from random import choice, randint
  16 from shutil import copytree, rmtree
  17 from subprocess import PIPE, Popen, check_call
  18 from sys import stderr
  19 from time import localtime, strftime, time
  20 import os, random, sys
  21
  22 script_home = abspath(dirname(sys.argv[0] or '.'))
  23 sys.path[:0] = [abspath(script_home + '/../lib'), abspath(script_home + '/..')]
  24 top = os.getcwd()
  25 bup_cmd = top + '/bup'
  26
  27 from buptest import test_tempdir
  28 from wvtest import wvfail, wvpass, wvpasseq, wvpassne, wvstart
  29
  30 from bup.helpers import partition, period_as_secs, readpipe
  31
  32
  33 def logcmd(cmd):
  34     if isinstance(cmd, basestring):
  35         print(cmd, file=stderr)
  36     else:
  37         print(' '.join(map(quote, cmd)), file=stderr)
  38
  39 def exc(cmd, shell=False):
  40     logcmd(cmd)
  41     check_call(cmd, shell=shell)
  42
  43 def exo(cmd, stdin=None, stdout=True, stderr=False, shell=False, check=True):
  44     logcmd(cmd)
  45     p = Popen(cmd,
  46               stdin=None,
  47               stdout=(PIPE if stdout else None),
  48               stderr=PIPE,
  49               shell=shell)
  50     out, err = p.communicate()
  51     if check and p.returncode != 0:
  52         raise Exception('subprocess %r failed with status %d, stderr: %r'
  53                         % (' '.join(argv), p.returncode, err))
  54     return out, err, p
  55
  56 def bup(*args):
  57     return exo((bup_cmd,) + args)[0]
  58
  59 def bupc(*args):
  60     return exc((bup_cmd,) + args)
  61
  62 def create_older_random_saves(n, start_utc, end_utc):
  63     with open('foo', 'w') as f:
  64         pass
  65     exc(['git', 'add', 'foo'])
  66     utcs = set()
  67     while len(utcs) != n:
  68         utcs.add(randint(start_utc, end_utc))
  69     utcs = sorted(utcs)
  70     for utc in utcs:
  71         with open('foo', 'w') as f:
  72             f.write(str(utc) + '\n')
  73         exc(['git', 'commit', '--date', str(utc), '-qam', str(utc)])
  74     exc(['git', 'gc', '--aggressive'])
  75     return utcs
  76
  77 # There is corresponding code in bup for some of this, but the
  78 # computation method is different here, in part so that the test can
  79 # provide a more effective cross-check.
  80
  81 period_kinds = ['all', 'dailies', 'monthlies', 'yearlies']
  82 period_scale = {'s': 1,
  83                 'min': 60,
  84                 'h': 60 * 60,
  85                 'd': 60 * 60 * 24,
  86                 'w': 60 * 60 * 24 * 7,
  87                 'm': 60 * 60 * 24 * 31,
  88                 'y': 60 * 60 * 24 * 366}
  89 period_scale_kinds = period_scale.keys()
  90
  91 def expected_retentions(utcs, utc_start, spec):
  92     if not spec:
  93         return utcs
  94     utcs = sorted(utcs, reverse=True)
  95     period_start = dict(spec)
  96     for kind, duration in period_start.iteritems():
  97         period_start[kind] = utc_start - period_as_secs(duration)
  98     period_start = defaultdict(lambda: float('inf'), period_start)
  99
 100     all = list(takewhile(lambda x: x >= period_start['all'], utcs))
 101     utcs = list(dropwhile(lambda x: x >= period_start['all'], utcs))
 102
 103     matches = takewhile(lambda x: x >= period_start['dailies'], utcs)
 104     dailies = [min(day_utcs) for yday, day_utcs
 105                in groupby(matches, lambda x: localtime(x).tm_yday)]
 106     utcs = list(dropwhile(lambda x: x >= period_start['dailies'], utcs))
 107
 108     matches = takewhile(lambda x: x >= period_start['monthlies'], utcs)
 109     monthlies = [min(month_utcs) for month, month_utcs
 110                  in groupby(matches, lambda x: localtime(x).tm_mon)]
 111     utcs = dropwhile(lambda x: x >= period_start['monthlies'], utcs)
 112
 113     matches = takewhile(lambda x: x >= period_start['yearlies'], utcs)
 114     yearlies = [min(year_utcs) for year, year_utcs
 115                 in groupby(matches, lambda x: localtime(x).tm_year)]
 116
 117     return chain(all, dailies, monthlies, yearlies)
 118
 119 def period_spec(start_utc, end_utc):
 120     global period_kinds, period_scale, period_scale_kinds
 121     result = []
 122     desired_specs = randint(1, 2 * len(period_kinds))
 123     assert(desired_specs >= 1)  # At least one --keep argument is required
 124     while len(result) < desired_specs:
 125         period = None
 126         if randint(1, 100) <= 5:
 127             period = 'forever'
 128         else:
 129             assert(end_utc > start_utc)
 130             period_secs = randint(1, end_utc - start_utc)
 131             scale = choice(period_scale_kinds)
 132             mag = int(float(period_secs) / period_scale[scale])
 133             if mag != 0:
 134                 period = str(mag) + scale
 135         if period:
 136             result += [(choice(period_kinds), period)]
 137     return tuple(result)
 138
 139 def unique_period_specs(n, start_utc, end_utc):
 140     invocations = set()
 141     while len(invocations) < n:
 142         invocations.add(period_spec(start_utc, end_utc))
 143     return tuple(invocations)
 144
 145 def period_spec_to_period_args(spec):
 146     return tuple(chain(*(('--keep-' + kind + '-for', period)
 147                          for kind, period in spec)))
 148
 149 def result_diffline(x):
 150     return str(x) + strftime(' %Y-%m-%d-%H%M%S', localtime(x)) + '\n'
 151
 152 def check_prune_result(expected):
 153     actual = sorted([int(x)
 154                      for x in exo(['git', 'log',
 155                                    '--pretty=format:%at'])[0].splitlines()])
 156     if expected != actual:
 157         for x in expected:
 158             print('ex:', x, strftime('%Y-%m-%d-%H%M%S', localtime(x)),
 159                   file=stderr)
 160         for line in unified_diff([result_diffline(x) for x in expected],
 161                                  [result_diffline(x) for x in actual],
 162                                  fromfile='expected', tofile='actual'):
 163             sys.stderr.write(line)
 164     wvpass(expected == actual)
 165
 166
 167 seed = int(environ.get('BUP_TEST_SEED', time()))
 168 random.seed(seed)
 169 print('random seed:', seed, file=stderr)
 170
 171 save_population = int(environ.get('BUP_TEST_PRUNE_OLDER_SAVES', 2000))
 172 prune_cycles = int(environ.get('BUP_TEST_PRUNE_OLDER_CYCLES', 20))
 173 prune_gc_cycles = int(environ.get('BUP_TEST_PRUNE_OLDER_GC_CYCLES', 10))
 174
 175 with test_tempdir('prune-older-') as tmpdir:
 176     environ['BUP_DIR'] = tmpdir + '/work/.git'
 177     environ['GIT_DIR'] = tmpdir + '/work/.git'
 178     now = int(time())
 179     three_years_ago = now - (60 * 60 * 24 * 366 * 3)
 180     chdir(tmpdir)
 181     exc(['git', 'init', 'work'])
 182
 183     wvstart('generating ' + str(save_population) + ' random saves')
 184     chdir(tmpdir + '/work')
 185     save_utcs = create_older_random_saves(save_population, three_years_ago, now)
 186     chdir(tmpdir)
 187     test_set_hash = exo(['git', 'show-ref', '-s', 'master'])[0].rstrip()
 188     ls_saves = bup('ls', 'master').splitlines()
 189     wvpasseq(save_population + 1, len(ls_saves))
 190
 191     wvstart('ensure everything kept, if no keep arguments')
 192     exc(['git', 'reset', '--hard', test_set_hash])
 193     _, errmsg, proc = exo((bup_cmd,
 194                            'prune-older', '-v', '--unsafe', '--no-gc',
 195                            '--wrt', str(now)) \
 196                           + ('master',),
 197                           stdout=False, stderr=True, check=False)
 198     wvpassne(proc.returncode, 0)
 199     wvpass('at least one keep argument is required' in errmsg)
 200     check_prune_result(save_utcs)
 201
 202
 203     wvstart('running %d generative no-gc tests on %d saves' % (prune_cycles,
 204                                                                save_population))
 205     for spec in unique_period_specs(prune_cycles,
 206                                     # Make it more likely we'll have
 207                                     # some outside the save range.
 208                                     three_years_ago - period_scale['m'],
 209                                     now):
 210         exc(['git', 'reset', '--hard', test_set_hash])
 211         expected = sorted(expected_retentions(save_utcs, now, spec))
 212         exc((bup_cmd,
 213              'prune-older', '-v', '--unsafe', '--no-gc', '--wrt', str(now)) \
 214             + period_spec_to_period_args(spec) \
 215             + ('master',))
 216         check_prune_result(expected)
 217
 218
 219     # More expensive because we have to recreate the repo each time
 220     wvstart('running %d generative gc tests on %d saves' % (prune_gc_cycles,
 221                                                             save_population))
 222     exc(['git', 'reset', '--hard', test_set_hash])
 223     copytree('work/.git', 'clean-test-repo', symlinks=True)
 224     for spec in unique_period_specs(prune_gc_cycles,
 225                                     # Make it more likely we'll have
 226                                     # some outside the save range.
 227                                     three_years_ago - period_scale['m'],
 228                                     now):
 229         rmtree('work/.git')
 230         copytree('clean-test-repo', 'work/.git')
 231         expected = sorted(expected_retentions(save_utcs, now, spec))
 232         exc((bup_cmd,
 233              'prune-older', '-v', '--unsafe', '--wrt', str(now)) \
 234             + period_spec_to_period_args(spec) \
 235             + ('master',))
 236         check_prune_result(expected)