2 from __future__ import absolute_import, print_function
3 from binascii import hexlify, unhexlify
4 from collections import defaultdict
5 from itertools import groupby
6 from time import localtime, strftime, time
9 from bup import git, options
10 from bup.compat import argv_bytes
11 from bup.gc import bup_gc
12 from bup.helpers import die_if_errors, log, partition, period_as_secs
13 from bup.io import byte_stream
14 from bup.repo import LocalRepo
15 from bup.rm import bup_rm
18 def branches(refnames=tuple()):
19 return ((name[11:], hexlify(sha)) for (name,sha)
20 in git.list_refs(patterns=(b'refs/heads/' + n for n in refnames),
23 def save_name(branch, utc):
24 return branch + b'/' \
25 + strftime('%Y-%m-%d-%H%M%S', localtime(utc)).encode('ascii')
27 def classify_saves(saves, period_start):
28 """For each (utc, id) in saves, yield (True, (utc, id)) if the save
29 should be kept and (False, (utc, id)) if the save should be removed.
30 The ids are binary hashes.
33 def retain_newest_in_region(region):
34 for save in region[0:1]:
36 for save in region[1:]:
39 matches, rest = partition(lambda s: s[0] >= period_start['all'], saves)
43 tm_ranges = ((period_start['dailies'], lambda s: localtime(s[0]).tm_yday),
44 (period_start['monthlies'], lambda s: localtime(s[0]).tm_mon),
45 (period_start['yearlies'], lambda s: localtime(s[0]).tm_year))
47 # Break the decreasing utc sorted saves up into the respective
48 # period ranges (dailies, monthlies, ...). Within each range,
49 # group the saves by the period scale (days, months, ...), and
50 # then yield a "keep" action (True, utc) for the newest save in
51 # each group, and a "drop" action (False, utc) for the rest.
52 for pstart, time_region_id in tm_ranges:
53 matches, rest = partition(lambda s: s[0] >= pstart, rest)
54 for region_id, region_saves in groupby(matches, time_region_id):
55 for action in retain_newest_in_region(list(region_saves)):
58 # Finally, drop any saves older than the specified periods
64 bup prune-older [options...] [BRANCH...]
66 keep-all-for= retain all saves within the PERIOD
67 keep-dailies-for= retain the newest save per day within the PERIOD
68 keep-monthlies-for= retain the newest save per month within the PERIOD
69 keep-yearlies-for= retain the newest save per year within the PERIOD
70 wrt= end all periods at this number of seconds since the epoch
71 pretend don't prune, just report intended actions to standard output
72 gc collect garbage after removals [1]
73 gc-threshold= only rewrite a packfile if it's over this percent garbage [10]
74 #,compress= set compression level to # (0-9, 9 is highest) [1]
75 v,verbose increase log output (can be used more than once)
76 unsafe use the command even though it may be DANGEROUS
80 o = options.Options(optspec)
81 opt, flags, roots = o.parse_bytes(argv[1:])
82 roots = [argv_bytes(x) for x in roots]
85 o.fatal('refusing to run dangerous, experimental command without --unsafe')
87 now = int(time()) if opt.wrt is None else opt.wrt
88 if not isinstance(now, int):
89 o.fatal('--wrt value ' + str(now) + ' is not an integer')
92 for period, extent in (('all', opt.keep_all_for),
93 ('dailies', opt.keep_dailies_for),
94 ('monthlies', opt.keep_monthlies_for),
95 ('yearlies', opt.keep_yearlies_for)):
97 secs = period_as_secs(extent.encode('ascii'))
99 o.fatal('%r is not a valid period' % extent)
100 period_start[period] = now - secs
103 o.fatal('at least one keep argument is required')
105 period_start = defaultdict(lambda: float('inf'), period_start)
108 epoch_ymd = strftime('%Y-%m-%d-%H%M%S', localtime(0))
109 for kind in ['all', 'dailies', 'monthlies', 'yearlies']:
110 period_utc = period_start[kind]
111 if period_utc != float('inf'):
112 if not (period_utc > float('-inf')):
113 log('keeping all ' + kind)
116 when = strftime('%Y-%m-%d-%H%M%S', localtime(period_utc))
117 log('keeping ' + kind + ' since ' + when + '\n')
118 except ValueError as ex:
120 log('keeping %s since %d seconds before %s\n'
121 %(kind, abs(period_utc), epoch_ymd))
123 log('keeping %s since %d seconds after %s\n'
124 %(kind, period_utc, epoch_ymd))
126 log('keeping %s since %s\n' % (kind, epoch_ymd))
128 git.check_repo_or_die()
130 # This could be more efficient, but for now just build the whole list
131 # in memory and let bup_rm() do some redundant work.
134 author_secs = f.readline().strip()
135 return int(author_secs)
138 out = byte_stream(sys.stdout)
141 for branch, branch_id in branches(roots):
143 saves = ((utc, unhexlify(oidx)) for (oidx, utc) in
144 git.rev_list(branch_id, format=b'%at', parse=parse_info))
145 for keep_save, (utc, id) in classify_saves(saves, period_start):
146 assert(keep_save in (False, True))
147 # FIXME: base removals on hashes
149 out.write((b'+ ' if keep_save else b'- ')
150 + save_name(branch, utc) + b'\n')
152 removals.append(save_name(branch, utc))
156 with LocalRepo() as repo:
157 bup_rm(repo, removals, compression=opt.compress,
158 verbosity=opt.verbose)
161 bup_gc(threshold=opt.gc_threshold,
162 compression=opt.compress,
163 verbosity=opt.verbose)