t/test-prune-older

   1 #!/bin/sh
   2 """": # -*-python-*-
   3 bup_python="$(dirname "$0")/../cmd/bup-python" || exit $?
   4 exec "$bup_python" "$0" ${1+"$@"}
   5 """
   6 # end of bup preamble
   7
   8 from __future__ import print_function
   9 from collections import defaultdict
  10 from difflib import unified_diff
  11 from itertools import chain, dropwhile, groupby, takewhile
  12 from os import environ, chdir
  13 from os.path import abspath, dirname
  14 from pipes import quote
  15 from random import choice, randint
  16 from shutil import copytree, rmtree
  17 from subprocess import PIPE, Popen, check_call
  18 from sys import stderr
  19 from time import localtime, strftime, time
  20 import os, random, sys
  21
  22 script_home = abspath(dirname(sys.argv[0] or '.'))
  23 sys.path[:0] = [abspath(script_home + '/../lib'), abspath(script_home + '/..')]
  24 top = os.getcwd()
  25 bup_cmd = top + '/bup'
  26
  27 from buptest import test_tempdir
  28 from wvtest import wvfail, wvpass, wvpasseq, wvpassne, wvstart
  29
  30 from bup.helpers import partition, period_as_secs, readpipe
  31
  32
  33 def logcmd(cmd):
  34     if isinstance(cmd, basestring):
  35         print(cmd, file=stderr)
  36     else:
  37         print(' '.join(map(quote, cmd)), file=stderr)
  38
  39 def exc(cmd, shell=False):
  40     logcmd(cmd)
  41     check_call(cmd, shell=shell)
  42
  43 def exo(cmd, stdin=None, stdout=True, stderr=False, shell=False, check=True):
  44     logcmd(cmd)
  45     p = Popen(cmd,
  46               stdin=None,
  47               stdout=(PIPE if stdout else None),
  48               stderr=PIPE,
  49               shell=shell)
  50     out, err = p.communicate()
  51     if check and p.returncode != 0:
  52         raise Exception('subprocess %r failed with status %d, stderr: %r'
  53                         % (' '.join(map(quote, cmd)), p.returncode, err))
  54     return out, err, p
  55
  56 def bup(*args):
  57     return exo((bup_cmd,) + args)[0]
  58
  59 def bupc(*args):
  60     return exc((bup_cmd,) + args)
  61
  62 def create_older_random_saves(n, start_utc, end_utc):
  63     with open('foo', 'w') as f:
  64         pass
  65     exc(['git', 'add', 'foo'])
  66     utcs = set()
  67     while len(utcs) != n:
  68         utcs.add(randint(start_utc, end_utc))
  69     utcs = sorted(utcs)
  70     for utc in utcs:
  71         with open('foo', 'w') as f:
  72             f.write(str(utc) + '\n')
  73         exc(['git', 'commit', '--date', str(utc), '-qam', str(utc)])
  74     exc(['git', 'gc', '--aggressive'])
  75     return utcs
  76
  77 # There is corresponding code in bup for some of this, but the
  78 # computation method is different here, in part so that the test can
  79 # provide a more effective cross-check.
  80
  81 period_kinds = ['all', 'dailies', 'monthlies', 'yearlies']
  82 period_scale = {'s': 1,
  83                 'min': 60,
  84                 'h': 60 * 60,
  85                 'd': 60 * 60 * 24,
  86                 'w': 60 * 60 * 24 * 7,
  87                 'm': 60 * 60 * 24 * 31,
  88                 'y': 60 * 60 * 24 * 366}
  89 period_scale_kinds = period_scale.keys()
  90
  91 def expected_retentions(utcs, utc_start, spec):
  92     if not spec:
  93         return utcs
  94     utcs = sorted(utcs, reverse=True)
  95     period_start = dict(spec)
  96     for kind, duration in period_start.iteritems():
  97         period_start[kind] = utc_start - period_as_secs(duration)
  98     period_start = defaultdict(lambda: float('inf'), period_start)
  99
 100     all = list(takewhile(lambda x: x >= period_start['all'], utcs))
 101     utcs = list(dropwhile(lambda x: x >= period_start['all'], utcs))
 102
 103     matches = takewhile(lambda x: x >= period_start['dailies'], utcs)
 104     dailies = [max(day_utcs) for yday, day_utcs
 105                in groupby(matches, lambda x: localtime(x).tm_yday)]
 106     utcs = list(dropwhile(lambda x: x >= period_start['dailies'], utcs))
 107
 108     matches = takewhile(lambda x: x >= period_start['monthlies'], utcs)
 109     monthlies = [max(month_utcs) for month, month_utcs
 110                  in groupby(matches, lambda x: localtime(x).tm_mon)]
 111     utcs = dropwhile(lambda x: x >= period_start['monthlies'], utcs)
 112
 113     matches = takewhile(lambda x: x >= period_start['yearlies'], utcs)
 114     yearlies = [max(year_utcs) for year, year_utcs
 115                 in groupby(matches, lambda x: localtime(x).tm_year)]
 116
 117     return chain(all, dailies, monthlies, yearlies)
 118
 119 def period_spec(start_utc, end_utc):
 120     global period_kinds, period_scale, period_scale_kinds
 121     result = []
 122     desired_specs = randint(1, 2 * len(period_kinds))
 123     assert(desired_specs >= 1)  # At least one --keep argument is required
 124     while len(result) < desired_specs:
 125         period = None
 126         if randint(1, 100) <= 5:
 127             period = 'forever'
 128         else:
 129             assert(end_utc > start_utc)
 130             period_secs = randint(1, end_utc - start_utc)
 131             scale = choice(period_scale_kinds)
 132             mag = int(float(period_secs) / period_scale[scale])
 133             if mag != 0:
 134                 period = str(mag) + scale
 135         if period:
 136             result += [(choice(period_kinds), period)]
 137     return tuple(result)
 138
 139 def unique_period_specs(n, start_utc, end_utc):
 140     invocations = set()
 141     while len(invocations) < n:
 142         invocations.add(period_spec(start_utc, end_utc))
 143     return tuple(invocations)
 144
 145 def period_spec_to_period_args(spec):
 146     return tuple(chain(*(('--keep-' + kind + '-for', period)
 147                          for kind, period in spec)))
 148
 149 def result_diffline(x):
 150     return str(x) + strftime(' %Y-%m-%d-%H%M%S', localtime(x)) + '\n'
 151
 152 def check_prune_result(expected):
 153     actual = sorted([int(x)
 154                      for x in exo(['git', 'log',
 155                                    '--pretty=format:%at'])[0].splitlines()])
 156     if expected != actual:
 157         for x in expected:
 158             print('ex:', x, strftime('%Y-%m-%d-%H%M%S', localtime(x)),
 159                   file=stderr)
 160         for line in unified_diff([result_diffline(x) for x in expected],
 161                                  [result_diffline(x) for x in actual],
 162                                  fromfile='expected', tofile='actual'):
 163             sys.stderr.write(line)
 164     wvpass(expected == actual)
 165
 166
 167 environ['GIT_AUTHOR_NAME'] = 'bup test'
 168 environ['GIT_COMMITTER_NAME'] = 'bup test'
 169 environ['GIT_AUTHOR_EMAIL'] = 'bup@a425bc70a02811e49bdf73ee56450e6f'
 170 environ['GIT_COMMITTER_EMAIL'] = 'bup@a425bc70a02811e49bdf73ee56450e6f'
 171
 172 seed = int(environ.get('BUP_TEST_SEED', time()))
 173 random.seed(seed)
 174 print('random seed:', seed, file=stderr)
 175
 176 save_population = int(environ.get('BUP_TEST_PRUNE_OLDER_SAVES', 2000))
 177 prune_cycles = int(environ.get('BUP_TEST_PRUNE_OLDER_CYCLES', 20))
 178 prune_gc_cycles = int(environ.get('BUP_TEST_PRUNE_OLDER_GC_CYCLES', 10))
 179
 180 with test_tempdir('prune-older-') as tmpdir:
 181     environ['BUP_DIR'] = tmpdir + '/work/.git'
 182     environ['GIT_DIR'] = tmpdir + '/work/.git'
 183     now = int(time())
 184     three_years_ago = now - (60 * 60 * 24 * 366 * 3)
 185     chdir(tmpdir)
 186     exc(['git', 'init', 'work'])
 187
 188     wvstart('generating ' + str(save_population) + ' random saves')
 189     chdir(tmpdir + '/work')
 190     save_utcs = create_older_random_saves(save_population, three_years_ago, now)
 191     chdir(tmpdir)
 192     test_set_hash = exo(['git', 'show-ref', '-s', 'master'])[0].rstrip()
 193     ls_saves = bup('ls', 'master').splitlines()
 194     wvpasseq(save_population + 1, len(ls_saves))
 195
 196     wvstart('ensure everything kept, if no keep arguments')
 197     exc(['git', 'reset', '--hard', test_set_hash])
 198     _, errmsg, proc = exo((bup_cmd,
 199                            'prune-older', '-v', '--unsafe', '--no-gc',
 200                            '--wrt', str(now)) \
 201                           + ('master',),
 202                           stdout=False, stderr=True, check=False)
 203     wvpassne(proc.returncode, 0)
 204     wvpass('at least one keep argument is required' in errmsg)
 205     check_prune_result(save_utcs)
 206
 207
 208     wvstart('running %d generative no-gc tests on %d saves' % (prune_cycles,
 209                                                                save_population))
 210     for spec in unique_period_specs(prune_cycles,
 211                                     # Make it more likely we'll have
 212                                     # some outside the save range.
 213                                     three_years_ago - period_scale['m'],
 214                                     now):
 215         exc(['git', 'reset', '--hard', test_set_hash])
 216         expected = sorted(expected_retentions(save_utcs, now, spec))
 217         exc((bup_cmd,
 218              'prune-older', '-v', '--unsafe', '--no-gc', '--wrt', str(now)) \
 219             + period_spec_to_period_args(spec) \
 220             + ('master',))
 221         check_prune_result(expected)
 222
 223
 224     # More expensive because we have to recreate the repo each time
 225     wvstart('running %d generative gc tests on %d saves' % (prune_gc_cycles,
 226                                                             save_population))
 227     exc(['git', 'reset', '--hard', test_set_hash])
 228     copytree('work/.git', 'clean-test-repo', symlinks=True)
 229     for spec in unique_period_specs(prune_gc_cycles,
 230                                     # Make it more likely we'll have
 231                                     # some outside the save range.
 232                                     three_years_ago - period_scale['m'],
 233                                     now):
 234         rmtree('work/.git')
 235         copytree('clean-test-repo', 'work/.git')
 236         expected = sorted(expected_retentions(save_utcs, now, spec))
 237         exc((bup_cmd,
 238              'prune-older', '-v', '--unsafe', '--wrt', str(now)) \
 239             + period_spec_to_period_args(spec) \
 240             + ('master',))
 241         check_prune_result(expected)