t/test-prune-older

   1 #!/bin/sh
   2 """": # -*-python-*-
   3 bup_python="$(dirname "$0")/../cmd/bup-python" || exit $?
   4 exec "$bup_python" "$0" ${1+"$@"}
   5 """
   6 # end of bup preamble
   7
   8 from __future__ import absolute_import, print_function
   9 from collections import defaultdict
  10 from difflib import unified_diff
  11 from itertools import chain, dropwhile, groupby, takewhile
  12 from os import environ, chdir
  13 from os.path import abspath, dirname
  14 from random import choice, randint
  15 from shutil import copytree, rmtree
  16 from subprocess import PIPE
  17 from sys import stderr
  18 from time import localtime, strftime, time
  19 import os, random, sys
  20
  21 script_home = abspath(dirname(sys.argv[0] or '.'))
  22 sys.path[:0] = [abspath(script_home + '/../lib'), abspath(script_home + '/..')]
  23 top = os.getcwd()
  24 bup_cmd = top + '/bup'
  25
  26 from buptest import ex, exo, test_tempdir
  27 from wvtest import wvfail, wvpass, wvpasseq, wvpassne, wvstart
  28
  29 from bup import compat
  30 from bup.helpers import partition, period_as_secs, readpipe
  31
  32
  33 def create_older_random_saves(n, start_utc, end_utc):
  34     with open('foo', 'w') as f:
  35         pass
  36     ex(['git', 'add', 'foo'])
  37     utcs = set()
  38     while len(utcs) != n:
  39         utcs.add(randint(start_utc, end_utc))
  40     utcs = sorted(utcs)
  41     for utc in utcs:
  42         with open('foo', 'w') as f:
  43             f.write(str(utc) + '\n')
  44         ex(['git', 'commit', '--date', str(utc), '-qam', str(utc)])
  45     ex(['git', 'gc', '--aggressive'])
  46     return utcs
  47
  48 # There is corresponding code in bup for some of this, but the
  49 # computation method is different here, in part so that the test can
  50 # provide a more effective cross-check.
  51
  52 period_kinds = ['all', 'dailies', 'monthlies', 'yearlies']
  53 period_scale = {'s': 1,
  54                 'min': 60,
  55                 'h': 60 * 60,
  56                 'd': 60 * 60 * 24,
  57                 'w': 60 * 60 * 24 * 7,
  58                 'm': 60 * 60 * 24 * 31,
  59                 'y': 60 * 60 * 24 * 366}
  60 period_scale_kinds = period_scale.keys()
  61
  62 def expected_retentions(utcs, utc_start, spec):
  63     if not spec:
  64         return utcs
  65     utcs = sorted(utcs, reverse=True)
  66     period_start = dict(spec)
  67     for kind, duration in compat.items(period_start):
  68         period_start[kind] = utc_start - period_as_secs(duration)
  69     period_start = defaultdict(lambda: float('inf'), period_start)
  70
  71     all = list(takewhile(lambda x: x >= period_start['all'], utcs))
  72     utcs = list(dropwhile(lambda x: x >= period_start['all'], utcs))
  73
  74     matches = takewhile(lambda x: x >= period_start['dailies'], utcs)
  75     dailies = [max(day_utcs) for yday, day_utcs
  76                in groupby(matches, lambda x: localtime(x).tm_yday)]
  77     utcs = list(dropwhile(lambda x: x >= period_start['dailies'], utcs))
  78
  79     matches = takewhile(lambda x: x >= period_start['monthlies'], utcs)
  80     monthlies = [max(month_utcs) for month, month_utcs
  81                  in groupby(matches, lambda x: localtime(x).tm_mon)]
  82     utcs = dropwhile(lambda x: x >= period_start['monthlies'], utcs)
  83
  84     matches = takewhile(lambda x: x >= period_start['yearlies'], utcs)
  85     yearlies = [max(year_utcs) for year, year_utcs
  86                 in groupby(matches, lambda x: localtime(x).tm_year)]
  87
  88     return chain(all, dailies, monthlies, yearlies)
  89
  90 def period_spec(start_utc, end_utc):
  91     global period_kinds, period_scale, period_scale_kinds
  92     result = []
  93     desired_specs = randint(1, 2 * len(period_kinds))
  94     assert(desired_specs >= 1)  # At least one --keep argument is required
  95     while len(result) < desired_specs:
  96         period = None
  97         if randint(1, 100) <= 5:
  98             period = 'forever'
  99         else:
 100             assert(end_utc > start_utc)
 101             period_secs = randint(1, end_utc - start_utc)
 102             scale = choice(period_scale_kinds)
 103             mag = int(float(period_secs) / period_scale[scale])
 104             if mag != 0:
 105                 period = str(mag) + scale
 106         if period:
 107             result += [(choice(period_kinds), period)]
 108     return tuple(result)
 109
 110 def unique_period_specs(n, start_utc, end_utc):
 111     invocations = set()
 112     while len(invocations) < n:
 113         invocations.add(period_spec(start_utc, end_utc))
 114     return tuple(invocations)
 115
 116 def period_spec_to_period_args(spec):
 117     return tuple(chain(*(('--keep-' + kind + '-for', period)
 118                          for kind, period in spec)))
 119
 120 def result_diffline(x):
 121     return str(x) + strftime(' %Y-%m-%d-%H%M%S', localtime(x)) + '\n'
 122
 123 def check_prune_result(expected):
 124     actual = sorted([int(x)
 125                      for x in exo(['git', 'log',
 126                                    '--pretty=format:%at']).out.splitlines()])
 127     if expected != actual:
 128         for x in expected:
 129             print('ex:', x, strftime('%Y-%m-%d-%H%M%S', localtime(x)),
 130                   file=stderr)
 131         for line in unified_diff([result_diffline(x) for x in expected],
 132                                  [result_diffline(x) for x in actual],
 133                                  fromfile='expected', tofile='actual'):
 134             sys.stderr.write(line)
 135     wvpass(expected == actual)
 136
 137
 138 environ['GIT_AUTHOR_NAME'] = 'bup test'
 139 environ['GIT_COMMITTER_NAME'] = 'bup test'
 140 environ['GIT_AUTHOR_EMAIL'] = 'bup@a425bc70a02811e49bdf73ee56450e6f'
 141 environ['GIT_COMMITTER_EMAIL'] = 'bup@a425bc70a02811e49bdf73ee56450e6f'
 142
 143 seed = int(environ.get('BUP_TEST_SEED', time()))
 144 random.seed(seed)
 145 print('random seed:', seed, file=stderr)
 146
 147 save_population = int(environ.get('BUP_TEST_PRUNE_OLDER_SAVES', 2000))
 148 prune_cycles = int(environ.get('BUP_TEST_PRUNE_OLDER_CYCLES', 20))
 149 prune_gc_cycles = int(environ.get('BUP_TEST_PRUNE_OLDER_GC_CYCLES', 10))
 150
 151 with test_tempdir('prune-older-') as tmpdir:
 152     environ['BUP_DIR'] = tmpdir + '/work/.git'
 153     environ['GIT_DIR'] = tmpdir + '/work/.git'
 154     now = int(time())
 155     three_years_ago = now - (60 * 60 * 24 * 366 * 3)
 156     chdir(tmpdir)
 157     ex(['git', 'init', 'work'])
 158     ex(['git', 'config', 'gc.autoDetach', 'false'])
 159
 160     wvstart('generating ' + str(save_population) + ' random saves')
 161     chdir(tmpdir + '/work')
 162     save_utcs = create_older_random_saves(save_population, three_years_ago, now)
 163     chdir(tmpdir)
 164     test_set_hash = exo(['git', 'show-ref', '-s', 'master']).out.rstrip()
 165     ls_saves = exo((bup_cmd, 'ls', 'master')).out.splitlines()
 166     wvpasseq(save_population + 1, len(ls_saves))
 167
 168     wvstart('ensure everything kept, if no keep arguments')
 169     ex(['git', 'reset', '--hard', test_set_hash])
 170     proc = ex((bup_cmd,
 171                'prune-older', '-v', '--unsafe', '--no-gc',
 172                '--wrt', str(now)) \
 173               + ('master',),
 174               stdout=None, stderr=PIPE, check=False)
 175     wvpassne(proc.rc, 0)
 176     wvpass('at least one keep argument is required' in proc.err)
 177     check_prune_result(save_utcs)
 178
 179
 180     wvstart('running %d generative no-gc tests on %d saves' % (prune_cycles,
 181                                                                save_population))
 182     for spec in unique_period_specs(prune_cycles,
 183                                     # Make it more likely we'll have
 184                                     # some outside the save range.
 185                                     three_years_ago - period_scale['m'],
 186                                     now):
 187         ex(['git', 'reset', '--hard', test_set_hash])
 188         expected = sorted(expected_retentions(save_utcs, now, spec))
 189         ex((bup_cmd,
 190             'prune-older', '-v', '--unsafe', '--no-gc', '--wrt', str(now)) \
 191            + period_spec_to_period_args(spec) \
 192            + ('master',))
 193         check_prune_result(expected)
 194
 195
 196     # More expensive because we have to recreate the repo each time
 197     wvstart('running %d generative gc tests on %d saves' % (prune_gc_cycles,
 198                                                             save_population))
 199     ex(['git', 'reset', '--hard', test_set_hash])
 200     copytree('work/.git', 'clean-test-repo', symlinks=True)
 201     for spec in unique_period_specs(prune_gc_cycles,
 202                                     # Make it more likely we'll have
 203                                     # some outside the save range.
 204                                     three_years_ago - period_scale['m'],
 205                                     now):
 206         rmtree('work/.git')
 207         copytree('clean-test-repo', 'work/.git')
 208         expected = sorted(expected_retentions(save_utcs, now, spec))
 209         ex((bup_cmd,
 210             'prune-older', '-v', '--unsafe', '--wrt', str(now)) \
 211            + period_spec_to_period_args(spec) \
 212            + ('master',))
 213         check_prune_result(expected)