t/test-prune-older

   1 #!/bin/sh
   2 """": # -*-python-*-
   3 bup_python="$(dirname "$0")/../dev/bup-python" || exit $?
   4 exec "$bup_python" "$0" ${1+"$@"}
   5 """
   6 # end of bup preamble
   7
   8 from __future__ import absolute_import, print_function
   9 from collections import defaultdict
  10 from difflib import unified_diff
  11 from itertools import chain, dropwhile, groupby, takewhile
  12 from os import chdir
  13 from os.path import abspath, dirname
  14 from random import choice, randint
  15 from shutil import copytree, rmtree
  16 from subprocess import PIPE
  17 from sys import stderr
  18 from time import localtime, strftime, time
  19 import os, random, sys
  20
  21 # For buptest, wvtest, ...
  22 sys.path[:0] = (abspath(os.path.dirname(__file__) + '/..'),)
  23 sys.path[:0] = [os.path.dirname(os.path.realpath(__file__)) + '/../lib']
  24
  25 from buptest import ex, exo, test_tempdir
  26 from wvtest import wvfail, wvpass, wvpasseq, wvpassne, wvstart
  27
  28 from bup import compat
  29 from bup.compat import environ
  30 from bup.helpers import partition, period_as_secs, readpipe
  31 import bup.path
  32
  33
  34 def create_older_random_saves(n, start_utc, end_utc):
  35     with open(b'foo', 'wb') as f:
  36         pass
  37     ex([b'git', b'add', b'foo'])
  38     utcs = set()
  39     while len(utcs) != n:
  40         utcs.add(randint(start_utc, end_utc))
  41     utcs = sorted(utcs)
  42     for utc in utcs:
  43         with open(b'foo', 'wb') as f:
  44             f.write(b'%d\n' % utc)
  45         ex([b'git', b'commit', b'--date', b'%d' % utc, b'-qam', b'%d' % utc])
  46     ex([b'git', b'gc', b'--aggressive'])
  47     return utcs
  48
  49 # There is corresponding code in bup for some of this, but the
  50 # computation method is different here, in part so that the test can
  51 # provide a more effective cross-check.
  52
  53 period_kinds = [b'all', b'dailies', b'monthlies', b'yearlies']
  54 period_scale = {b's': 1,
  55                 b'min': 60,
  56                 b'h': 60 * 60,
  57                 b'd': 60 * 60 * 24,
  58                 b'w': 60 * 60 * 24 * 7,
  59                 b'm': 60 * 60 * 24 * 31,
  60                 b'y': 60 * 60 * 24 * 366}
  61 period_scale_kinds = list(period_scale.keys())
  62
  63 def expected_retentions(utcs, utc_start, spec):
  64     if not spec:
  65         return utcs
  66     utcs = sorted(utcs, reverse=True)
  67     period_start = dict(spec)
  68     for kind, duration in compat.items(period_start):
  69         period_start[kind] = utc_start - period_as_secs(duration)
  70     period_start = defaultdict(lambda: float('inf'), period_start)
  71
  72     all = list(takewhile(lambda x: x >= period_start[b'all'], utcs))
  73     utcs = list(dropwhile(lambda x: x >= period_start[b'all'], utcs))
  74
  75     matches = takewhile(lambda x: x >= period_start[b'dailies'], utcs)
  76     dailies = [max(day_utcs) for yday, day_utcs
  77                in groupby(matches, lambda x: localtime(x).tm_yday)]
  78     utcs = list(dropwhile(lambda x: x >= period_start[b'dailies'], utcs))
  79
  80     matches = takewhile(lambda x: x >= period_start[b'monthlies'], utcs)
  81     monthlies = [max(month_utcs) for month, month_utcs
  82                  in groupby(matches, lambda x: localtime(x).tm_mon)]
  83     utcs = dropwhile(lambda x: x >= period_start[b'monthlies'], utcs)
  84
  85     matches = takewhile(lambda x: x >= period_start[b'yearlies'], utcs)
  86     yearlies = [max(year_utcs) for year, year_utcs
  87                 in groupby(matches, lambda x: localtime(x).tm_year)]
  88
  89     return chain(all, dailies, monthlies, yearlies)
  90
  91 def period_spec(start_utc, end_utc):
  92     global period_kinds, period_scale, period_scale_kinds
  93     result = []
  94     desired_specs = randint(1, 2 * len(period_kinds))
  95     assert(desired_specs >= 1)  # At least one --keep argument is required
  96     while len(result) < desired_specs:
  97         period = None
  98         if randint(1, 100) <= 5:
  99             period = b'forever'
 100         else:
 101             assert(end_utc > start_utc)
 102             period_secs = randint(1, end_utc - start_utc)
 103             scale = choice(period_scale_kinds)
 104             mag = int(float(period_secs) / period_scale[scale])
 105             if mag != 0:
 106                 period = (b'%d' % mag) + scale
 107         if period:
 108             result += [(choice(period_kinds), period)]
 109     return tuple(result)
 110
 111 def unique_period_specs(n, start_utc, end_utc):
 112     invocations = set()
 113     while len(invocations) < n:
 114         invocations.add(period_spec(start_utc, end_utc))
 115     return tuple(invocations)
 116
 117 def period_spec_to_period_args(spec):
 118     return tuple(chain(*((b'--keep-' + kind + b'-for', period)
 119                          for kind, period in spec)))
 120
 121 def result_diffline(x):
 122     return (b'%d %s\n'
 123             % (x, strftime(' %Y-%m-%d-%H%M%S', localtime(x)).encode('ascii')))
 124
 125 def check_prune_result(expected):
 126     actual = sorted([int(x)
 127                      for x in exo([b'git', b'log',
 128                                    b'--pretty=format:%at']).out.splitlines()])
 129     if expected != actual:
 130         for x in expected:
 131             print('ex:', x, strftime('%Y-%m-%d-%H%M%S', localtime(x)),
 132                   file=stderr)
 133         for line in unified_diff([result_diffline(x) for x in expected],
 134                                  [result_diffline(x) for x in actual],
 135                                  fromfile='expected', tofile='actual'):
 136             sys.stderr.write(line)
 137     wvpass(expected == actual)
 138
 139
 140 environ[b'GIT_AUTHOR_NAME'] = b'bup test'
 141 environ[b'GIT_COMMITTER_NAME'] = b'bup test'
 142 environ[b'GIT_AUTHOR_EMAIL'] = b'bup@a425bc70a02811e49bdf73ee56450e6f'
 143 environ[b'GIT_COMMITTER_EMAIL'] = b'bup@a425bc70a02811e49bdf73ee56450e6f'
 144
 145 seed = int(environ.get(b'BUP_TEST_SEED', time()))
 146 random.seed(seed)
 147 print('random seed:', seed, file=stderr)
 148
 149 save_population = int(environ.get(b'BUP_TEST_PRUNE_OLDER_SAVES', 2000))
 150 prune_cycles = int(environ.get(b'BUP_TEST_PRUNE_OLDER_CYCLES', 20))
 151 prune_gc_cycles = int(environ.get(b'BUP_TEST_PRUNE_OLDER_GC_CYCLES', 10))
 152
 153 bup_cmd = bup.path.exe()
 154
 155 with test_tempdir(b'prune-older-') as tmpdir:
 156     environ[b'BUP_DIR'] = tmpdir + b'/work/.git'
 157     environ[b'GIT_DIR'] = tmpdir + b'/work/.git'
 158     now = int(time())
 159     three_years_ago = now - (60 * 60 * 24 * 366 * 3)
 160     chdir(tmpdir)
 161     ex([b'git', b'init', b'work'])
 162     ex([b'git', b'config', b'gc.autoDetach', b'false'])
 163
 164     wvstart('generating ' + str(save_population) + ' random saves')
 165     chdir(tmpdir + b'/work')
 166     save_utcs = create_older_random_saves(save_population, three_years_ago, now)
 167     chdir(tmpdir)
 168     test_set_hash = exo([b'git', b'show-ref', b'-s', b'master']).out.rstrip()
 169     ls_saves = exo((bup_cmd, b'ls', b'master')).out.splitlines()
 170     wvpasseq(save_population + 1, len(ls_saves))
 171
 172     wvstart('ensure everything kept, if no keep arguments')
 173     ex([b'git', b'reset', b'--hard', test_set_hash])
 174     proc = ex((bup_cmd,
 175                b'prune-older', b'-v', b'--unsafe', b'--no-gc',
 176                b'--wrt', b'%d' % now) \
 177               + (b'master',),
 178               stdout=None, stderr=PIPE, check=False)
 179     wvpassne(proc.rc, 0)
 180     wvpass(b'at least one keep argument is required' in proc.err)
 181     check_prune_result(save_utcs)
 182
 183
 184     wvstart('running %d generative no-gc tests on %d saves' % (prune_cycles,
 185                                                                save_population))
 186     for spec in unique_period_specs(prune_cycles,
 187                                     # Make it more likely we'll have
 188                                     # some outside the save range.
 189                                     three_years_ago - period_scale[b'm'],
 190                                     now):
 191         ex([b'git', b'reset', b'--hard', test_set_hash])
 192         expected = sorted(expected_retentions(save_utcs, now, spec))
 193         ex((bup_cmd,
 194             b'prune-older', b'-v', b'--unsafe', b'--no-gc', b'--wrt',
 195             b'%d' % now) \
 196            + period_spec_to_period_args(spec) \
 197            + (b'master',))
 198         check_prune_result(expected)
 199
 200
 201     # More expensive because we have to recreate the repo each time
 202     wvstart('running %d generative gc tests on %d saves' % (prune_gc_cycles,
 203                                                             save_population))
 204     ex([b'git', b'reset', b'--hard', test_set_hash])
 205     copytree(b'work/.git', b'clean-test-repo', symlinks=True)
 206     for spec in unique_period_specs(prune_gc_cycles,
 207                                     # Make it more likely we'll have
 208                                     # some outside the save range.
 209                                     three_years_ago - period_scale[b'm'],
 210                                     now):
 211         rmtree(b'work/.git')
 212         copytree(b'clean-test-repo', b'work/.git')
 213         expected = sorted(expected_retentions(save_utcs, now, spec))
 214         ex((bup_cmd,
 215             b'prune-older', b'-v', b'--unsafe', b'--wrt', b'%d' % now) \
 216            + period_spec_to_period_args(spec) \
 217            + (b'master',))
 218         check_prune_result(expected)