t/test-prune-older

   1 #!/bin/sh
   2 """": # -*-python-*-
   3 bup_python="$(dirname "$0")/../cmd/bup-python" || exit $?
   4 exec "$bup_python" "$0" ${1+"$@"}
   5 """
   6 # end of bup preamble
   7
   8 from __future__ import absolute_import, print_function
   9 from collections import defaultdict
  10 from difflib import unified_diff
  11 from itertools import chain, dropwhile, groupby, takewhile
  12 from os import chdir
  13 from os.path import abspath, dirname
  14 from random import choice, randint
  15 from shutil import copytree, rmtree
  16 from subprocess import PIPE
  17 from sys import stderr
  18 from time import localtime, strftime, time
  19 import os, random, sys
  20
  21 # For buptest, wvtest, ...
  22 sys.path[:0] = (abspath(os.path.dirname(__file__) + '/..'),)
  23
  24 from buptest import ex, exo, test_tempdir
  25 from wvtest import wvfail, wvpass, wvpasseq, wvpassne, wvstart
  26
  27 from bup import compat
  28 from bup.compat import environ
  29 from bup.helpers import partition, period_as_secs, readpipe
  30 import bup.path
  31
  32
  33 def create_older_random_saves(n, start_utc, end_utc):
  34     with open(b'foo', 'wb') as f:
  35         pass
  36     ex([b'git', b'add', b'foo'])
  37     utcs = set()
  38     while len(utcs) != n:
  39         utcs.add(randint(start_utc, end_utc))
  40     utcs = sorted(utcs)
  41     for utc in utcs:
  42         with open(b'foo', 'wb') as f:
  43             f.write(b'%d\n' % utc)
  44         ex([b'git', b'commit', b'--date', b'%d' % utc, b'-qam', b'%d' % utc])
  45     ex([b'git', b'gc', b'--aggressive'])
  46     return utcs
  47
  48 # There is corresponding code in bup for some of this, but the
  49 # computation method is different here, in part so that the test can
  50 # provide a more effective cross-check.
  51
  52 period_kinds = [b'all', b'dailies', b'monthlies', b'yearlies']
  53 period_scale = {b's': 1,
  54                 b'min': 60,
  55                 b'h': 60 * 60,
  56                 b'd': 60 * 60 * 24,
  57                 b'w': 60 * 60 * 24 * 7,
  58                 b'm': 60 * 60 * 24 * 31,
  59                 b'y': 60 * 60 * 24 * 366}
  60 period_scale_kinds = list(period_scale.keys())
  61
  62 def expected_retentions(utcs, utc_start, spec):
  63     if not spec:
  64         return utcs
  65     utcs = sorted(utcs, reverse=True)
  66     period_start = dict(spec)
  67     for kind, duration in compat.items(period_start):
  68         period_start[kind] = utc_start - period_as_secs(duration)
  69     period_start = defaultdict(lambda: float('inf'), period_start)
  70
  71     all = list(takewhile(lambda x: x >= period_start[b'all'], utcs))
  72     utcs = list(dropwhile(lambda x: x >= period_start[b'all'], utcs))
  73
  74     matches = takewhile(lambda x: x >= period_start[b'dailies'], utcs)
  75     dailies = [max(day_utcs) for yday, day_utcs
  76                in groupby(matches, lambda x: localtime(x).tm_yday)]
  77     utcs = list(dropwhile(lambda x: x >= period_start[b'dailies'], utcs))
  78
  79     matches = takewhile(lambda x: x >= period_start[b'monthlies'], utcs)
  80     monthlies = [max(month_utcs) for month, month_utcs
  81                  in groupby(matches, lambda x: localtime(x).tm_mon)]
  82     utcs = dropwhile(lambda x: x >= period_start[b'monthlies'], utcs)
  83
  84     matches = takewhile(lambda x: x >= period_start[b'yearlies'], utcs)
  85     yearlies = [max(year_utcs) for year, year_utcs
  86                 in groupby(matches, lambda x: localtime(x).tm_year)]
  87
  88     return chain(all, dailies, monthlies, yearlies)
  89
  90 def period_spec(start_utc, end_utc):
  91     global period_kinds, period_scale, period_scale_kinds
  92     result = []
  93     desired_specs = randint(1, 2 * len(period_kinds))
  94     assert(desired_specs >= 1)  # At least one --keep argument is required
  95     while len(result) < desired_specs:
  96         period = None
  97         if randint(1, 100) <= 5:
  98             period = b'forever'
  99         else:
 100             assert(end_utc > start_utc)
 101             period_secs = randint(1, end_utc - start_utc)
 102             scale = choice(period_scale_kinds)
 103             mag = int(float(period_secs) / period_scale[scale])
 104             if mag != 0:
 105                 period = (b'%d' % mag) + scale
 106         if period:
 107             result += [(choice(period_kinds), period)]
 108     return tuple(result)
 109
 110 def unique_period_specs(n, start_utc, end_utc):
 111     invocations = set()
 112     while len(invocations) < n:
 113         invocations.add(period_spec(start_utc, end_utc))
 114     return tuple(invocations)
 115
 116 def period_spec_to_period_args(spec):
 117     return tuple(chain(*((b'--keep-' + kind + b'-for', period)
 118                          for kind, period in spec)))
 119
 120 def result_diffline(x):
 121     return (b'%d %s\n'
 122             % (x, strftime(' %Y-%m-%d-%H%M%S', localtime(x)).encode('ascii')))
 123
 124 def check_prune_result(expected):
 125     actual = sorted([int(x)
 126                      for x in exo([b'git', b'log',
 127                                    b'--pretty=format:%at']).out.splitlines()])
 128     if expected != actual:
 129         for x in expected:
 130             print('ex:', x, strftime('%Y-%m-%d-%H%M%S', localtime(x)),
 131                   file=stderr)
 132         for line in unified_diff([result_diffline(x) for x in expected],
 133                                  [result_diffline(x) for x in actual],
 134                                  fromfile='expected', tofile='actual'):
 135             sys.stderr.write(line)
 136     wvpass(expected == actual)
 137
 138
 139 environ[b'GIT_AUTHOR_NAME'] = b'bup test'
 140 environ[b'GIT_COMMITTER_NAME'] = b'bup test'
 141 environ[b'GIT_AUTHOR_EMAIL'] = b'bup@a425bc70a02811e49bdf73ee56450e6f'
 142 environ[b'GIT_COMMITTER_EMAIL'] = b'bup@a425bc70a02811e49bdf73ee56450e6f'
 143
 144 seed = int(environ.get(b'BUP_TEST_SEED', time()))
 145 random.seed(seed)
 146 print('random seed:', seed, file=stderr)
 147
 148 save_population = int(environ.get(b'BUP_TEST_PRUNE_OLDER_SAVES', 2000))
 149 prune_cycles = int(environ.get(b'BUP_TEST_PRUNE_OLDER_CYCLES', 20))
 150 prune_gc_cycles = int(environ.get(b'BUP_TEST_PRUNE_OLDER_GC_CYCLES', 10))
 151
 152 bup_cmd = bup.path.exe()
 153
 154 with test_tempdir(b'prune-older-') as tmpdir:
 155     environ[b'BUP_DIR'] = tmpdir + b'/work/.git'
 156     environ[b'GIT_DIR'] = tmpdir + b'/work/.git'
 157     now = int(time())
 158     three_years_ago = now - (60 * 60 * 24 * 366 * 3)
 159     chdir(tmpdir)
 160     ex([b'git', b'init', b'work'])
 161     ex([b'git', b'config', b'gc.autoDetach', b'false'])
 162
 163     wvstart('generating ' + str(save_population) + ' random saves')
 164     chdir(tmpdir + b'/work')
 165     save_utcs = create_older_random_saves(save_population, three_years_ago, now)
 166     chdir(tmpdir)
 167     test_set_hash = exo([b'git', b'show-ref', b'-s', b'master']).out.rstrip()
 168     ls_saves = exo((bup_cmd, b'ls', b'master')).out.splitlines()
 169     wvpasseq(save_population + 1, len(ls_saves))
 170
 171     wvstart('ensure everything kept, if no keep arguments')
 172     ex([b'git', b'reset', b'--hard', test_set_hash])
 173     proc = ex((bup_cmd,
 174                b'prune-older', b'-v', b'--unsafe', b'--no-gc',
 175                b'--wrt', b'%d' % now) \
 176               + (b'master',),
 177               stdout=None, stderr=PIPE, check=False)
 178     wvpassne(proc.rc, 0)
 179     wvpass(b'at least one keep argument is required' in proc.err)
 180     check_prune_result(save_utcs)
 181
 182
 183     wvstart('running %d generative no-gc tests on %d saves' % (prune_cycles,
 184                                                                save_population))
 185     for spec in unique_period_specs(prune_cycles,
 186                                     # Make it more likely we'll have
 187                                     # some outside the save range.
 188                                     three_years_ago - period_scale[b'm'],
 189                                     now):
 190         ex([b'git', b'reset', b'--hard', test_set_hash])
 191         expected = sorted(expected_retentions(save_utcs, now, spec))
 192         ex((bup_cmd,
 193             b'prune-older', b'-v', b'--unsafe', b'--no-gc', b'--wrt',
 194             b'%d' % now) \
 195            + period_spec_to_period_args(spec) \
 196            + (b'master',))
 197         check_prune_result(expected)
 198
 199
 200     # More expensive because we have to recreate the repo each time
 201     wvstart('running %d generative gc tests on %d saves' % (prune_gc_cycles,
 202                                                             save_population))
 203     ex([b'git', b'reset', b'--hard', test_set_hash])
 204     copytree(b'work/.git', b'clean-test-repo', symlinks=True)
 205     for spec in unique_period_specs(prune_gc_cycles,
 206                                     # Make it more likely we'll have
 207                                     # some outside the save range.
 208                                     three_years_ago - period_scale[b'm'],
 209                                     now):
 210         rmtree(b'work/.git')
 211         copytree(b'clean-test-repo', b'work/.git')
 212         expected = sorted(expected_retentions(save_utcs, now, spec))
 213         ex((bup_cmd,
 214             b'prune-older', b'-v', b'--unsafe', b'--wrt', b'%d' % now) \
 215            + period_spec_to_period_args(spec) \
 216            + (b'master',))
 217         check_prune_result(expected)