test/ext/test-prune-older

   1 #!/bin/sh
   2 """": # -*-python-*-
   3 bup_python="$(dirname "$0")/../../dev/bup-python" || exit $?
   4 exec "$bup_python" "$0" ${1+"$@"}
   5 """
   6 # end of bup preamble
   7
   8 from __future__ import absolute_import, print_function
   9 from collections import defaultdict
  10 from difflib import unified_diff
  11 from itertools import chain, dropwhile, groupby, takewhile
  12 from os import chdir
  13 from os.path import abspath, dirname
  14 from random import choice, randint
  15 from shutil import copytree, rmtree
  16 from subprocess import PIPE
  17 from sys import stderr
  18 from time import localtime, strftime, time
  19 import os, random, sys
  20
  21 # For buptest, wvtest, ...
  22 sys.path[:0] = (abspath(os.path.dirname(__file__) + '/../..'),)
  23 sys.path[:0] = (abspath(os.path.dirname(__file__) + '/../../test/lib'),)
  24 sys.path[:0] = [os.path.dirname(os.path.realpath(__file__)) + '/../../lib']
  25
  26 from buptest import ex, exo, test_tempdir
  27 from wvtest import wvfail, wvpass, wvpasseq, wvpassne, wvstart
  28
  29 from bup import compat
  30 from bup.compat import environ
  31 from bup.helpers import partition, period_as_secs, readpipe
  32 import bup.path
  33
  34
  35 def create_older_random_saves(n, start_utc, end_utc):
  36     with open(b'foo', 'wb') as f:
  37         pass
  38     ex([b'git', b'add', b'foo'])
  39     utcs = set()
  40     while len(utcs) != n:
  41         utcs.add(randint(start_utc, end_utc))
  42     utcs = sorted(utcs)
  43     for utc in utcs:
  44         with open(b'foo', 'wb') as f:
  45             f.write(b'%d\n' % utc)
  46         ex([b'git', b'commit', b'--date', b'%d' % utc, b'-qam', b'%d' % utc])
  47     ex([b'git', b'gc', b'--aggressive'])
  48     return utcs
  49
  50 # There is corresponding code in bup for some of this, but the
  51 # computation method is different here, in part so that the test can
  52 # provide a more effective cross-check.
  53
  54 period_kinds = [b'all', b'dailies', b'monthlies', b'yearlies']
  55 period_scale = {b's': 1,
  56                 b'min': 60,
  57                 b'h': 60 * 60,
  58                 b'd': 60 * 60 * 24,
  59                 b'w': 60 * 60 * 24 * 7,
  60                 b'm': 60 * 60 * 24 * 31,
  61                 b'y': 60 * 60 * 24 * 366}
  62 period_scale_kinds = list(period_scale.keys())
  63
  64 def expected_retentions(utcs, utc_start, spec):
  65     if not spec:
  66         return utcs
  67     utcs = sorted(utcs, reverse=True)
  68     period_start = dict(spec)
  69     for kind, duration in compat.items(period_start):
  70         period_start[kind] = utc_start - period_as_secs(duration)
  71     period_start = defaultdict(lambda: float('inf'), period_start)
  72
  73     all = list(takewhile(lambda x: x >= period_start[b'all'], utcs))
  74     utcs = list(dropwhile(lambda x: x >= period_start[b'all'], utcs))
  75
  76     matches = takewhile(lambda x: x >= period_start[b'dailies'], utcs)
  77     dailies = [max(day_utcs) for yday, day_utcs
  78                in groupby(matches, lambda x: localtime(x).tm_yday)]
  79     utcs = list(dropwhile(lambda x: x >= period_start[b'dailies'], utcs))
  80
  81     matches = takewhile(lambda x: x >= period_start[b'monthlies'], utcs)
  82     monthlies = [max(month_utcs) for month, month_utcs
  83                  in groupby(matches, lambda x: localtime(x).tm_mon)]
  84     utcs = dropwhile(lambda x: x >= period_start[b'monthlies'], utcs)
  85
  86     matches = takewhile(lambda x: x >= period_start[b'yearlies'], utcs)
  87     yearlies = [max(year_utcs) for year, year_utcs
  88                 in groupby(matches, lambda x: localtime(x).tm_year)]
  89
  90     return chain(all, dailies, monthlies, yearlies)
  91
  92 def period_spec(start_utc, end_utc):
  93     global period_kinds, period_scale, period_scale_kinds
  94     result = []
  95     desired_specs = randint(1, 2 * len(period_kinds))
  96     assert(desired_specs >= 1)  # At least one --keep argument is required
  97     while len(result) < desired_specs:
  98         period = None
  99         if randint(1, 100) <= 5:
 100             period = b'forever'
 101         else:
 102             assert(end_utc > start_utc)
 103             period_secs = randint(1, end_utc - start_utc)
 104             scale = choice(period_scale_kinds)
 105             mag = int(float(period_secs) / period_scale[scale])
 106             if mag != 0:
 107                 period = (b'%d' % mag) + scale
 108         if period:
 109             result += [(choice(period_kinds), period)]
 110     return tuple(result)
 111
 112 def unique_period_specs(n, start_utc, end_utc):
 113     invocations = set()
 114     while len(invocations) < n:
 115         invocations.add(period_spec(start_utc, end_utc))
 116     return tuple(invocations)
 117
 118 def period_spec_to_period_args(spec):
 119     return tuple(chain(*((b'--keep-' + kind + b'-for', period)
 120                          for kind, period in spec)))
 121
 122 def result_diffline(x):
 123     return (b'%d %s\n'
 124             % (x, strftime(' %Y-%m-%d-%H%M%S', localtime(x)).encode('ascii')))
 125
 126 def check_prune_result(expected):
 127     actual = sorted([int(x)
 128                      for x in exo([b'git', b'log',
 129                                    b'--pretty=format:%at']).out.splitlines()])
 130     if expected != actual:
 131         for x in expected:
 132             print('ex:', x, strftime('%Y-%m-%d-%H%M%S', localtime(x)),
 133                   file=stderr)
 134         for line in unified_diff([result_diffline(x) for x in expected],
 135                                  [result_diffline(x) for x in actual],
 136                                  fromfile='expected', tofile='actual'):
 137             sys.stderr.write(line)
 138     wvpass(expected == actual)
 139
 140
 141 environ[b'GIT_AUTHOR_NAME'] = b'bup test'
 142 environ[b'GIT_COMMITTER_NAME'] = b'bup test'
 143 environ[b'GIT_AUTHOR_EMAIL'] = b'bup@a425bc70a02811e49bdf73ee56450e6f'
 144 environ[b'GIT_COMMITTER_EMAIL'] = b'bup@a425bc70a02811e49bdf73ee56450e6f'
 145
 146 seed = int(environ.get(b'BUP_TEST_SEED', time()))
 147 random.seed(seed)
 148 print('random seed:', seed, file=stderr)
 149
 150 save_population = int(environ.get(b'BUP_TEST_PRUNE_OLDER_SAVES', 2000))
 151 prune_cycles = int(environ.get(b'BUP_TEST_PRUNE_OLDER_CYCLES', 20))
 152 prune_gc_cycles = int(environ.get(b'BUP_TEST_PRUNE_OLDER_GC_CYCLES', 10))
 153
 154 bup_cmd = bup.path.exe()
 155
 156 with test_tempdir(b'prune-older-') as tmpdir:
 157     environ[b'BUP_DIR'] = tmpdir + b'/work/.git'
 158     environ[b'GIT_DIR'] = tmpdir + b'/work/.git'
 159     now = int(time())
 160     three_years_ago = now - (60 * 60 * 24 * 366 * 3)
 161     chdir(tmpdir)
 162     ex([b'git', b'init', b'work'])
 163     ex([b'git', b'config', b'gc.autoDetach', b'false'])
 164
 165     wvstart('generating ' + str(save_population) + ' random saves')
 166     chdir(tmpdir + b'/work')
 167     save_utcs = create_older_random_saves(save_population, three_years_ago, now)
 168     chdir(tmpdir)
 169     test_set_hash = exo([b'git', b'show-ref', b'-s', b'master']).out.rstrip()
 170     ls_saves = exo((bup_cmd, b'ls', b'master')).out.splitlines()
 171     wvpasseq(save_population + 1, len(ls_saves))
 172
 173     wvstart('ensure everything kept, if no keep arguments')
 174     ex([b'git', b'reset', b'--hard', test_set_hash])
 175     proc = ex((bup_cmd,
 176                b'prune-older', b'-v', b'--unsafe', b'--no-gc',
 177                b'--wrt', b'%d' % now) \
 178               + (b'master',),
 179               stdout=None, stderr=PIPE, check=False)
 180     wvpassne(proc.rc, 0)
 181     wvpass(b'at least one keep argument is required' in proc.err)
 182     check_prune_result(save_utcs)
 183
 184
 185     wvstart('running %d generative no-gc tests on %d saves' % (prune_cycles,
 186                                                                save_population))
 187     for spec in unique_period_specs(prune_cycles,
 188                                     # Make it more likely we'll have
 189                                     # some outside the save range.
 190                                     three_years_ago - period_scale[b'm'],
 191                                     now):
 192         ex([b'git', b'reset', b'--hard', test_set_hash])
 193         expected = sorted(expected_retentions(save_utcs, now, spec))
 194         ex((bup_cmd,
 195             b'prune-older', b'-v', b'--unsafe', b'--no-gc', b'--wrt',
 196             b'%d' % now) \
 197            + period_spec_to_period_args(spec) \
 198            + (b'master',))
 199         check_prune_result(expected)
 200
 201
 202     # More expensive because we have to recreate the repo each time
 203     wvstart('running %d generative gc tests on %d saves' % (prune_gc_cycles,
 204                                                             save_population))
 205     ex([b'git', b'reset', b'--hard', test_set_hash])
 206     copytree(b'work/.git', b'clean-test-repo', symlinks=True)
 207     for spec in unique_period_specs(prune_gc_cycles,
 208                                     # Make it more likely we'll have
 209                                     # some outside the save range.
 210                                     three_years_ago - period_scale[b'm'],
 211                                     now):
 212         rmtree(b'work/.git')
 213         copytree(b'clean-test-repo', b'work/.git')
 214         expected = sorted(expected_retentions(save_utcs, now, spec))
 215         ex((bup_cmd,
 216             b'prune-older', b'-v', b'--unsafe', b'--wrt', b'%d' % now) \
 217            + period_spec_to_period_args(spec) \
 218            + (b'master',))
 219         check_prune_result(expected)