File: expire_backups.py

package info (click to toggle)
s3ql 2.21%2Bdfsg-3
  • links: PTS, VCS
  • area: main
  • in suites: stretch
  • size: 2,776 kB
  • ctags: 2,427
  • sloc: python: 14,869; makefile: 128; sh: 33; ansic: 22
file content (288 lines) | stat: -rwxr-xr-x 9,366 bytes parent folder | download | duplicates (2)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
#!/usr/bin/env python3
'''
expire_backups.py - this file is part of S3QL.

Copyright © 2008 Nikolaus Rath <Nikolaus@rath.org>

This work can be distributed under the terms of the GNU GPLv3.
'''

import sys
import os
import re
import textwrap
import shutil
import pickle
from datetime import datetime, timedelta
from collections import defaultdict

# We are running from the S3QL source directory, make sure
# that we use modules from this directory
basedir = os.path.abspath(os.path.join(os.path.dirname(sys.argv[0]), '..'))
if (os.path.exists(os.path.join(basedir, 'setup.py')) and
    os.path.exists(os.path.join(basedir, 'src', 's3ql', '__init__.py'))):
    sys.path = [os.path.join(basedir, 'src')] + sys.path

from s3ql.logging import setup_logging, QuietError, logging
from s3ql.common import thaw_basic_mapping, freeze_basic_mapping
from s3ql.parse_args import ArgumentParser
from s3ql.remove import main as s3qlrm

log = logging.getLogger(__name__)

def parse_args(args):
    '''Parse command line'''

    parser = ArgumentParser(
        description=textwrap.dedent('''\
        ``expire_backups.py`` is a program to intelligently remove old backups
        that are no longer needed.

        To define what backups you want to keep for how long, you define a
        number of *age ranges*. ``expire_backups`` ensures that you will
        have at least one backup in each age range at all times. It will keep
        exactly as many backups as are required for that and delete any
        backups that become redundant.

        Age ranges are specified by giving a list of range boundaries in terms
        of backup cycles. Every time you create a new backup, the existing
        backups age by one cycle.

        Please refer to the S3QL documentation for details.
        '''))

    parser.add_quiet()
    parser.add_debug()
    parser.add_version()

    parser.add_argument('cycles', nargs='+', type=int, metavar='<age>',
                        help='Age range boundaries in terms of backup cycles')
    parser.add_argument('--state', metavar='<file>', type=str,
                        default='.expire_backups.dat',
                        # Add quotes around default to prevent groff
                        # from choking on leading . generated by buggy
                        # docutils man page generator.
                        help='File to save state information in (default: "%(default)s")')
    parser.add_argument("-n", action="store_true", default=False,
                        help="Dry run. Just show which backups would be deleted.")
    parser.add_argument('--reconstruct-state', action='store_true', default=False,
                        help='Try to reconstruct a missing state file from backup dates.')

    parser.add_argument("--use-s3qlrm", action="store_true",
                      help="Use `s3qlrm` command to delete backups.")

    options = parser.parse_args(args)

    if sorted(options.cycles) != options.cycles:
        parser.error('Age range boundaries must be in increasing order')

    return options

def main(args=None):

    if args is None:
        args = sys.argv[1:]

    options = parse_args(args)
    setup_logging(options)

    # Determine available backups
    backup_list = set(x for x in os.listdir('.')
                      if re.match(r'^\d{4}-\d\d-\d\d_\d\d:\d\d:\d\d$', x))

    if not os.path.exists(options.state) and len(backup_list) > 1:
        if not options.reconstruct_state:
            raise QuietError('Found more than one backup but no state file! Aborting.')

        log.warning('Trying to reconstruct state file..')
        state = upgrade_to_state(backup_list)
        if not options.n:
            log.info('Saving reconstructed state..')
            with open(options.state, 'wb') as fh:
                fh.write(freeze_basic_mapping(state))
    elif not os.path.exists(options.state):
        log.warning('Creating state file..')
        state = dict()
    else:
        log.info('Reading state...')
        with open(options.state, 'rb') as fh:
            state = thaw_basic_mapping(fh.read())

    to_delete = process_backups(backup_list, state, options.cycles)

    for x in to_delete:
        log.info('Backup %s is no longer needed, removing...', x)
        if not options.n:
            if options.use_s3qlrm:
                s3qlrm([x])
            else:
                shutil.rmtree(x)

    if options.n:
        log.info('Dry run, not saving state.')
    else:
        log.info('Saving state..')
        with open(options.state, 'wb') as fh:
            fh.write(freeze_basic_mapping(state))

def upgrade_to_state(backup_list):
    log.info('Several existing backups detected, trying to convert absolute ages to cycles')

    now = datetime.now()
    age = dict()
    for x in sorted(backup_list):
        age[x] = now - datetime.strptime(x, '%Y-%m-%d_%H:%M:%S')
        log.info('Backup %s is %s hours old', x, age[x])

    deltas = [ abs(x - y) for x in age.values()
                          for y in age.values() if x != y ]
    step = min(deltas)
    log.info('Assuming backup interval of %s hours', step)

    state = dict()
    for x in sorted(age):
        state[x] = 0
        while age[x] > timedelta(0):
            state[x] += 1
            age[x] -= step
        log.info('Backup %s is %d cycles old', x, state[x])

    log.info('State construction complete.')
    return state

def simulate(args):

    options = parse_args(args)
    setup_logging(options)

    state = dict()
    backup_list = set()
    for i in range(50):
        backup_list.add('backup-%2d' % i)
        delete = process_backups(backup_list, state, options.cycles)
        log.info('Deleting %s', delete)
        backup_list -= delete

        log.info('Available backups on day %d:', i)
        for x in sorted(backup_list):
            log.info(x)

def process_backups(backup_list, state, cycles):

    # New backups
    new_backups = backup_list - set(state)
    for x in sorted(new_backups):
        log.info('Found new backup %s', x)
        for y in state:
            state[y] += 1
        state[x] = 0

    for x in state:
        log.debug('Backup %s has age %d', x, state[x])

    # Missing backups
    missing_backups = set(state) - backup_list
    for x in missing_backups:
        log.warning('backup %s is missing. Did you delete it manually?', x)
        del state[x]

    # Ranges
    ranges = [ (0, cycles[0]) ]
    for i in range(1, len(cycles)):
        ranges.append((cycles[i - 1], cycles[i]))

    # Go forward in time to see what backups need to be kept
    simstate = dict()
    keep = set()
    missing = defaultdict(list)
    for step in range(max(cycles)):

        log.debug('Considering situation after %d more backups', step)
        for x in simstate:
            simstate[x] += 1
            log.debug('Backup x now has simulated age %d', simstate[x])

        # Add the hypothetical backup that has been made "just now"
        if step != 0:
            simstate[step] = 0

        for (min_, max_) in ranges:
            log.debug('Looking for backup for age range %d to %d', min_, max_)

            # Look in simstate
            found = False
            for (backup, age) in simstate.items():
                if min_ <= age < max_:
                    found = True
                    break
            if found:
                # backup and age will be defined
                #pylint: disable=W0631
                log.debug('Using backup %s (age %d)', backup, age)
                continue

            # Look in state
            for (backup, age) in state.items():
                age += step
                if min_ <= age < max_:
                    log.info('Keeping backup %s (current age %d) for age range %d to %d%s',
                             backup, state[backup], min_, max_,
                             (' in %d cycles' % step) if step else '')
                    simstate[backup] = age
                    keep.add(backup)
                    break

            else:
                if step == 0:
                    log.info('Note: there is currently no backup available '
                             'for age range %d to %d', min_, max_)
                else:
                    missing['%d to %d' % (min_, max_)].append(step)

    for range_ in sorted(missing):
        log.info('Note: there will be no backup for age range %s '
                 'in (forthcoming) cycle(s): %s',
                 range_, format_list(missing[range_]))

    to_delete = set(state) - keep
    for x in to_delete:
        del state[x]

    return to_delete


def format_list(l):
    if not l:
        return ''
    l = l[:]

    # Append bogus end element
    l.append(l[-1] + 2)

    range_start = l.pop(0)
    cur = range_start
    res = list()
    for n in l:
        if n == cur + 1:
            pass
        elif range_start == cur:
            res.append('%d' % cur)
        elif range_start == cur - 1:
            res.append('%d' % range_start)
            res.append('%d' % cur)
        else:
            res.append('%d-%d' % (range_start, cur))

        if n != cur + 1:
            range_start = n
        cur = n

    if len(res) > 1:
        return ('%s and %s' % (', '.join(res[:-1]), res[-1]))
    else:
        return ', '.join(res)


if __name__ == '__main__':
    #simulate(sys.argv[1:])
    main(sys.argv[1:])