t/test-prune-older

   1 #!/bin/sh
   2 """": # -*-python-*-
   3 bup_python="$(dirname "$0")/../cmd/bup-python" || exit $?
   4 exec "$bup_python" "$0" ${1+"$@"}
   5 """
   6 # end of bup preamble
   7
   8 from __future__ import print_function
   9 from collections import defaultdict
  10 from difflib import unified_diff
  11 from itertools import chain, dropwhile, groupby, takewhile
  12 from os import environ, chdir
  13 from os.path import abspath, dirname
  14 from pipes import quote
  15 from random import choice, randint
  16 from shutil import copytree, rmtree
  17 from subprocess import PIPE, Popen, check_call
  18 from sys import stderr
  19 from time import localtime, strftime, time
  20 import os, random, sys
  21
  22 script_home = abspath(dirname(sys.argv[0] or '.'))
  23 sys.path[:0] = [abspath(script_home + '/../lib'), abspath(script_home + '/..')]
  24 top = os.getcwd()
  25 bup_cmd = top + '/bup'
  26
  27 from buptest import test_tempdir
  28 from wvtest import wvfail, wvpass, wvpasseq, wvpassne, wvstart
  29
  30 from bup.helpers import partition, period_as_secs, readpipe
  31
  32
  33 def logcmd(cmd):
  34     if isinstance(cmd, basestring):
  35         print(cmd, file=stderr)
  36     else:
  37         print(' '.join(map(quote, cmd)), file=stderr)
  38
  39 def exc(cmd, shell=False):
  40     logcmd(cmd)
  41     check_call(cmd, shell=shell)
  42
  43 def exo(cmd, stdin=None, stdout=True, stderr=False, shell=False, check=True):
  44     logcmd(cmd)
  45     p = Popen(cmd,
  46               stdin=None,
  47               stdout=(PIPE if stdout else None),
  48               stderr=PIPE,
  49               shell=shell)
  50     out, err = p.communicate()
  51     if check and p.returncode != 0:
  52         raise Exception('subprocess %r failed with status %d, stderr: %r'
  53                         % (' '.join(argv), p.returncode, err))
  54     return out, err, p
  55
  56 def bup(*args):
  57     return exo((bup_cmd,) + args)[0]
  58
  59 def bupc(*args):
  60     return exc((bup_cmd,) + args)
  61
  62 def create_older_random_saves(n, start_utc, end_utc):
  63     with open('foo', 'w') as f:
  64         pass
  65     exc(['git', 'add', 'foo'])
  66     utcs = sorted(randint(start_utc, end_utc) for x in xrange(n))
  67     for utc in utcs:
  68         with open('foo', 'w') as f:
  69             f.write(str(utc) + '\n')
  70         exc(['git', 'commit', '--date', str(utc), '-qam', str(utc)])
  71     exc(['git', 'gc', '--aggressive'])
  72     return utcs
  73
  74 # There is corresponding code in bup for some of this, but the
  75 # computation method is different here, in part so that the test can
  76 # provide a more effective cross-check.
  77
  78 period_kinds = ['all', 'dailies', 'monthlies', 'yearlies']
  79 period_scale = {'s': 1,
  80                 'min': 60,
  81                 'h': 60 * 60,
  82                 'd': 60 * 60 * 24,
  83                 'w': 60 * 60 * 24 * 7,
  84                 'm': 60 * 60 * 24 * 31,
  85                 'y': 60 * 60 * 24 * 366}
  86 period_scale_kinds = period_scale.keys()
  87
  88 def expected_retentions(utcs, utc_start, spec):
  89     if not spec:
  90         return utcs
  91     utcs = sorted(utcs, reverse=True)
  92     period_start = dict(spec)
  93     for kind, duration in period_start.iteritems():
  94         period_start[kind] = utc_start - period_as_secs(duration)
  95     period_start = defaultdict(lambda: float('inf'), period_start)
  96
  97     all = list(takewhile(lambda x: x >= period_start['all'], utcs))
  98     utcs = list(dropwhile(lambda x: x >= period_start['all'], utcs))
  99
 100     matches = takewhile(lambda x: x >= period_start['dailies'], utcs)
 101     dailies = [min(day_utcs) for yday, day_utcs
 102                in groupby(matches, lambda x: localtime(x).tm_yday)]
 103     utcs = list(dropwhile(lambda x: x >= period_start['dailies'], utcs))
 104
 105     matches = takewhile(lambda x: x >= period_start['monthlies'], utcs)
 106     monthlies = [min(month_utcs) for month, month_utcs
 107                  in groupby(matches, lambda x: localtime(x).tm_mon)]
 108     utcs = dropwhile(lambda x: x >= period_start['monthlies'], utcs)
 109
 110     matches = takewhile(lambda x: x >= period_start['yearlies'], utcs)
 111     yearlies = [min(year_utcs) for year, year_utcs
 112                 in groupby(matches, lambda x: localtime(x).tm_year)]
 113
 114     return chain(all, dailies, monthlies, yearlies)
 115
 116 def period_spec(start_utc, end_utc):
 117     global period_kinds, period_scale, period_scale_kinds
 118     result = []
 119     desired_specs = randint(1, 2 * len(period_kinds))
 120     assert(desired_specs >= 1)  # At least one --keep argument is required
 121     while len(result) < desired_specs:
 122         period = None
 123         if randint(1, 100) <= 5:
 124             period = 'forever'
 125         else:
 126             assert(end_utc > start_utc)
 127             period_secs = randint(1, end_utc - start_utc)
 128             scale = choice(period_scale_kinds)
 129             mag = int(float(period_secs) / period_scale[scale])
 130             if mag != 0:
 131                 period = str(mag) + scale
 132         if period:
 133             result += [(choice(period_kinds), period)]
 134     return tuple(result)
 135
 136 def unique_period_specs(n, start_utc, end_utc):
 137     invocations = set()
 138     while len(invocations) < n:
 139         invocations.add(period_spec(start_utc, end_utc))
 140     return tuple(invocations)
 141
 142 def period_spec_to_period_args(spec):
 143     return tuple(chain(*(('--keep-' + kind + '-for', period)
 144                          for kind, period in spec)))
 145
 146 def result_diffline(x):
 147     return str(x) + strftime(' %Y-%m-%d-%H%M%S', localtime(x)) + '\n'
 148
 149 def check_prune_result(expected):
 150     actual = sorted([int(x)
 151                      for x in exo(['git', 'log',
 152                                    '--pretty=format:%at'])[0].splitlines()])
 153     if expected != actual:
 154         for x in expected:
 155             print('ex:', x, strftime('%Y-%m-%d-%H%M%S', localtime(x)),
 156                   file=stderr)
 157         for line in unified_diff([result_diffline(x) for x in expected],
 158                                  [result_diffline(x) for x in actual],
 159                                  fromfile='expected', tofile='actual'):
 160             sys.stderr.write(line)
 161     wvpass(expected == actual)
 162
 163
 164 seed = int(environ.get('BUP_TEST_SEED', time()))
 165 random.seed(seed)
 166 print('random seed:', seed, file=stderr)
 167
 168 save_population = int(environ.get('BUP_TEST_PRUNE_OLDER_SAVES', 2000))
 169 prune_cycles = int(environ.get('BUP_TEST_PRUNE_OLDER_CYCLES', 20))
 170 prune_gc_cycles = int(environ.get('BUP_TEST_PRUNE_OLDER_GC_CYCLES', 10))
 171
 172 with test_tempdir('prune-older-') as tmpdir:
 173     environ['BUP_DIR'] = tmpdir + '/work/.git'
 174     environ['GIT_DIR'] = tmpdir + '/work/.git'
 175     now = int(time())
 176     three_years_ago = now - (60 * 60 * 24 * 366 * 3)
 177     chdir(tmpdir)
 178     exc(['git', 'init', 'work'])
 179
 180     wvstart('generating ' + str(save_population) + ' random saves')
 181     chdir(tmpdir + '/work')
 182     save_utcs = create_older_random_saves(save_population, three_years_ago, now)
 183     chdir(tmpdir)
 184     test_set_hash = exo(['git', 'show-ref', '-s', 'master'])[0].rstrip()
 185     ls_saves = bup('ls', 'master').splitlines()
 186     wvpasseq(save_population + 1, len(ls_saves))
 187
 188     wvstart('ensure everything kept, if no keep arguments')
 189     exc(['git', 'reset', '--hard', test_set_hash])
 190     _, errmsg, proc = exo((bup_cmd,
 191                            'prune-older', '-v', '--unsafe', '--no-gc',
 192                            '--wrt', str(now)) \
 193                           + ('master',),
 194                           stdout=False, stderr=True, check=False)
 195     wvpassne(proc.returncode, 0)
 196     wvpass('at least one keep argument is required' in errmsg)
 197     check_prune_result(save_utcs)
 198
 199
 200     wvstart('running %d generative no-gc tests on %d saves' % (prune_cycles,
 201                                                                save_population))
 202     for spec in unique_period_specs(prune_cycles,
 203                                     # Make it more likely we'll have
 204                                     # some outside the save range.
 205                                     three_years_ago - period_scale['m'],
 206                                     now):
 207         exc(['git', 'reset', '--hard', test_set_hash])
 208         expected = sorted(expected_retentions(save_utcs, now, spec))
 209         exc((bup_cmd,
 210              'prune-older', '-v', '--unsafe', '--no-gc', '--wrt', str(now)) \
 211             + period_spec_to_period_args(spec) \
 212             + ('master',))
 213         check_prune_result(expected)
 214
 215
 216     # More expensive because we have to recreate the repo each time
 217     wvstart('running %d generative gc tests on %d saves' % (prune_gc_cycles,
 218                                                             save_population))
 219     exc(['git', 'reset', '--hard', test_set_hash])
 220     copytree('work/.git', 'clean-test-repo', symlinks=True)
 221     for spec in unique_period_specs(prune_gc_cycles,
 222                                     # Make it more likely we'll have
 223                                     # some outside the save range.
 224                                     three_years_ago - period_scale['m'],
 225                                     now):
 226         rmtree('work/.git')
 227         copytree('clean-test-repo', 'work/.git')
 228         expected = sorted(expected_retentions(save_utcs, now, spec))
 229         exc((bup_cmd,
 230              'prune-older', '-v', '--unsafe', '--wrt', str(now)) \
 231             + period_spec_to_period_args(spec) \
 232             + ('master',))
 233         check_prune_result(expected)