File: BackupPC_nightly

package info (click to toggle)
backuppc 4.4.0-11
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid, trixie
  • size: 4,752 kB
  • sloc: perl: 37,523; sh: 607; javascript: 176; makefile: 38; ansic: 6
file content (371 lines) | stat: -rwxr-xr-x 13,306 bytes parent folder | download | duplicates (3)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
#!/usr/bin/perl
#============================================================= -*-perl-*-
#
# BackupPC_nightly: Nightly cleanup & statistics script.
#
# DESCRIPTION
#
#   BackupPC_nightly performs several administrative tasks:
#
#      - monthly aging of per-PC log files (only with -m option)
#
#      - updating reference counts for V4+ backups
#
#      - pruning files from pool no longer used (ie: those with only one
#        hard link in V3, or a reference count of zero for V4+)
#
#      - sending email to users and administrators (only with -m option)
#
#   Usage: BackupPC_nightly [-m] [-r] [-p] [-P phase] poolRangeStart poolRangeEnd
#
#   Flags:
#
#     -m   Primary (master) BackupPC_nightly. Sends email.
#          Otherise, BackupPC_nightly just does pool pruning.
#          Since several BackupPC_nightly processes might run
#          concurrently, just the first one is given the -m flag
#          by BackupPC.
#
#     -r   Don't run BackupPC_refCountUpdate because there are long
#          running BackupPC_dump jobs that were running the last time
#          we ran BackupPC_refCountUpdate.
#
#     -p   don't show progress
#
#     -P phase
#          Phase from 0..15 each time we run BackupPC_nightly.  Used by
#          BackupPC_refCountUpdate to compute exact pool size for portions
#          of the pool based on $Conf{PoolSizeNightlyUpdatePeriod}.
#
#   The poolRangeStart and poolRangeEnd arguments are integers from 0 to 255.
#   These specify which parts of the subtasks or pool to process.
#
#   In the V3 pool, there are 256 2nd-level directories in the pool
#   (0/0, 0/1, ..., f/e, f/f).  BackupPC_nightly processes the given
#   subset of this list (0 means 0/0, 255 means f/f).  Therefore,
#   arguments of 0 255 process the entire pool, 0 127 does the first
#   half (ie: 0/0 through 7/f), 127 255 does the other half (eg: 8/0
#   through f/f) and 0 15 does just the first 1/16 of the pool (ie: 0/0
#   through 0/f).
#
#   In V4, reference count updating is done for each host.  The host
#   list is divided up, and processed based on the 8 bit argument range.
#
#   The V4 pool has 128 first-level directories and 128 second-level
#   directories.  The 0..255 argument is divided by 2 to select which
#   top-level directories to process.  Odd numbers are skipped.
#
# AUTHOR
#   Craig Barratt  <cbarratt@users.sourceforge.net>
#
# COPYRIGHT
#   Copyright (C) 2001-2020  Craig Barratt
#
#   This program is free software: you can redistribute it and/or modify
#   it under the terms of the GNU General Public License as published by
#   the Free Software Foundation, either version 3 of the License, or
#   (at your option) any later version.
#
#   This program is distributed in the hope that it will be useful,
#   but WITHOUT ANY WARRANTY; without even the implied warranty of
#   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
#   GNU General Public License for more details.
#
#   You should have received a copy of the GNU General Public License
#   along with this program.  If not, see <http://www.gnu.org/licenses/>.
#
#========================================================================
#
# Version 4.4.0, released 20 Jun 2020.
#
# See http://backuppc.sourceforge.net.
#
#========================================================================

use strict;
no utf8;

use lib "__INSTALLDIR__/lib";
use BackupPC::Lib qw( :BPC_DT_ALL );
use BackupPC::XS;
use BackupPC::DirOps;
use Getopt::Std;

use File::Path;
use Data::Dumper;

die("BackupPC::Lib->new failed\n") if ( !(my $bpc = BackupPC::Lib->new) );
my $TopDir = $bpc->TopDir();
my $BinDir = $bpc->BinDir();
my %Conf   = $bpc->Conf();
my(%Status, %Info, %Jobs, @BgQueue, @UserQueue, @CmdQueue);

#
# We delete unused pool files (link count 1) in sorted inode
# order by gathering batches.  We delete the first half of
# each batch (ie: $PendingDeleteMax / 2 at a time).
#
my @PendingDelete;
my $PendingDeleteMax = 10240;

$bpc->ChildInit();

my %opts;
if ( !getopts("mprP:", \%opts) || @ARGV != 2 ) {
    print("usage: $0 [-m] [-p] [-r] [-P phase] poolRangeStart poolRangeEnd\n");
    exit(1);
}
if ( $ARGV[0] !~ /^(\d+)$/ || $1 > 255 ) {
    print("$0: bad poolRangeStart '$ARGV[0]'\n");
    exit(1);
}
my $poolRangeStart = $1;
if ( $ARGV[1] !~ /^(\d+)$/ || $1 > 255 ) {
    print("$0: bad poolRangeEnd '$ARGV[1]'\n");
    exit(1);
}
my $poolRangeEnd = $1;

if ( $opts{m} ) {
    my $err = $bpc->ServerConnect($Conf{ServerHost}, $Conf{ServerPort});
    if ( $err ) {
        print("Can't connect to server ($err)\n");
        exit(1);
    }
    my $reply = $bpc->ServerMesg("status hosts");
    $reply = $1 if ( $reply =~ /(.*)/s );
    eval($reply);
}

###########################################################################
# V3 pool: get statistics, and remove files that have only one link.
###########################################################################
my $fileCnt;          # total number of v3 files
my $dirCnt;           # total number of v3 directories
my $blkCnt;           # total block size of v3 files
my $fileCntRm;        # total number of removed v3 files
my $blkCntRm;         # total block size of removed v3 files
my $fileCntRep;       # total number of v3 file names containing "_", ie: files
                      # that have repeated md5 checksums
my $fileRepMax;       # worse case number of v3 files that have repeated checksums
                      # (ie: max(nnn+1) for all names xxxxxxxxxxxxxxxx_nnn)
my $fileLinkMax;      # maximum number of hardlinks on a v3 pool file
my $fileLinkTotal;    # total number of hardlinks on entire v3 pool
my $fileCntRename;    # number of renamed v3 files (to keep file numbering
                      # contiguous)
my %FixList;          # list of v3 paths that need to be renamed to avoid
                      # new holes

if ( $Conf{PoolV3Enabled} ) {
    ScanAndCleanV3Pool();
}

###########################################################################
# Prior to V3, we need to tell BackupPC that it is now ok to start running
# BackupPC_dump commands. In V3+ they are decoupled, so this isn't actually
# needed anymore.
###########################################################################
printf("BackupPC_nightly lock_off\n");

###########################################################################
# V4 pool: run reference count updating and pool cleaning
# This runs in parallel for each subset of the pool
###########################################################################
if ( $opts{r} ) {
    print("log BackupPC_nightly skipping BackupPC_refCountUpdate\n");
} else {
    $opts{P} ||= 0;
    print(
        "log BackupPC_nightly now running BackupPC_refCountUpdate -m -s -c -P $opts{P} -r $poolRangeStart-$poolRangeEnd\n"
    );
    system("$BinDir/BackupPC_refCountUpdate -m -s -c -P $opts{P} -r $poolRangeStart-$poolRangeEnd");
}

###########################################################################
# Send email and generation of backupInfo files for each backup
# Also clean any temp pool/cpool files
###########################################################################
if ( $opts{m} ) {
    CleanV4PoolTempFiles();
    print("log BackupPC_nightly now running BackupPC_sendEmail\n");
    print("__bpc_progress_state__ BackupPC_sendEmail\n") if ( !$opts{p} );
    system("$BinDir/BackupPC_sendEmail");
}

exit(0);

sub GetPoolStats_V3
{
    my($file, $fullPath) = @_;
    my($inode, $nlinks, $nblocks) = (lstat($fullPath))[1, 3, 12];

    if ( -d _ ) {
        $dirCnt++;
        return;
    } elsif ( !-f _ ) {
        return;
    }
    if ( $nlinks == 1 ) {
        $blkCntRm += $nblocks;
        $fileCntRm++;
        #
        # Save the files for later batch deletion.
        #
        # This is so we can remove them in inode order, and additionally
        # reduce any remaining chance of race condition of linking to
        # pool files vs removing pool files.  (Other aspects of the
        # design should eliminate race conditions.)
        #
        push(
            @PendingDelete,
            {
                inode => $inode,
                path  => $fullPath
            }
        );
        if ( @PendingDelete > $PendingDeleteMax ) {
            processPendingDeletes(0);
        }
        #
        # We must keep repeated files numbered sequential (ie: files
        # that have the same checksum are appended with _0, _1 etc).
        # There are two cases: we remove the base file xxxx, but xxxx_0
        # exists, or we remove any file of the form xxxx_nnn.  We remember
        # the base name and fix it up later (not in the middle of find).
        #
        $fullPath =~ s/_\d+$//;
        $FixList{$fullPath}++;
    } else {
        if ( $file =~ /_(\d+)$/ ) {
            $fileRepMax = $1 + 1 if ( $fileRepMax <= $1 );
            $fileCntRep++;
        }
        $fileCnt += 1;
        $blkCnt  += $nblocks;
        $fileLinkMax = $nlinks if ( $fileLinkMax < $nlinks );
        $fileLinkTotal += $nlinks - 1;
    }
}

sub processPendingDeletes
{
    my($doAll) = @_;
    my @delete;

    if ( !$doAll ) {
        @delete = splice(@PendingDelete, 0, $PendingDeleteMax / 2);
    } else {
        @delete        = @PendingDelete;
        @PendingDelete = ();
    }
    for my $f ( sort({ $a->{inode} <=> $b->{inode} } @delete) ) {
        my($nlinks) = (lstat($f->{path}))[3];

        next if ( $nlinks != 1 );

        # print("Deleting $f->{path} ($f->{inode})\n");
        unlink($f->{path});
    }
}

sub ScanAndCleanV3Pool()
{
    my @hexChars = qw(0 1 2 3 4 5 6 7 8 9 a b c d e f);

    for my $pool ( qw(pool cpool) ) {
        print("__bpc_progress_state__ v3 $pool scan\n") if ( !$opts{p} );
        for ( my $i = $poolRangeStart ; $i <= $poolRangeEnd ; $i++ ) {
            my $dir = "$hexChars[int($i / 16)]/$hexChars[$i % 16]";

            # print("Doing $pool/$dir\n") if ( ($i % 16) == 0 );
            $fileCnt       = 0;
            $dirCnt        = 0;
            $blkCnt        = 0;
            $fileCntRm     = 0;
            $blkCntRm      = 0;
            $fileCntRep    = 0;
            $fileRepMax    = 0;
            $fileLinkMax   = 0;
            $fileLinkTotal = 0;
            $fileCntRename = 0;
            %FixList       = ();
            print("__bpc_progress_fileCnt__ $i/$poolRangeEnd\n") if ( !$opts{p} );
            BackupPC::DirOps::find($bpc, {wanted => \&GetPoolStats_V3}, "$TopDir/$pool/$dir")
              if ( -d "$TopDir/$pool/$dir" );
            my $kb   = $blkCnt / 2;
            my $kbRm = $blkCntRm / 2;

            #
            # Main BackupPC_nightly counts the top-level directory
            #
            $dirCnt++ if ( $opts{m} && -d "$TopDir/$pool" && $i == 0 );

            #
            # Also count the next level directories
            #
            $dirCnt++ if ( ($i % 16) == 0 && -d "$TopDir/$pool/$hexChars[int($i / 16)]" );

            #
            # We need to process all pending deletes before we do the
            # renames
            #
            if ( @PendingDelete ) {
                sleep(1);
                processPendingDeletes(1);
            }

            #
            # Now make sure that files with repeated checksums are still
            # sequentially numbered
            #
            foreach my $name ( sort(keys(%FixList)) ) {
                my $rmCnt = $FixList{$name} + 1;
                my $new   = -1;
                for ( my $old = -1 ; ; $old++ ) {
                    my $oldName = $name;
                    $oldName .= "_$old" if ( $old >= 0 );
                    if ( !-f $oldName ) {
                        #
                        # We know we are done when we have missed at least
                        # the number of files that were removed from this
                        # base name, plus a couple just to be sure
                        #
                        last if ( $rmCnt-- <= 0 );
                        next;
                    }
                    my $newName = $name;
                    $newName .= "_$new" if ( $new >= 0 );
                    $new++;
                    next if ( $oldName eq $newName );
                    rename($oldName, $newName);
                    $fileCntRename++;
                }
            }
            print(  "BackupPC_stats $i = $pool,$fileCnt,$dirCnt,$kb,$kbRm,"
                  . "$fileCntRm,$fileCntRep,$fileRepMax,"
                  . "$fileCntRename,$fileLinkMax,$fileLinkTotal\n");
        }
    }
    sleep(1);
    processPendingDeletes(1);
}

#
# Remove any orphan pool write temp files.  The file names is three numbers
# separated by periods.  The first number is the pid, and we check if that
# process is still alive.  If not, we delete the file.
#
sub CleanV4PoolTempFiles
{
    my $pidRunning = {};
    foreach my $pool ( qw(pool cpool) ) {
        foreach my $e ( @{BackupPC::DirOps::dirRead($bpc, "$TopDir/$pool")} ) {
            next if ( $e->{name} !~ /^(\d+)\.\d+\.\d+$/ || !-f "$TopDir/$pool/$e->{name}" );
            my $pid = $1;
            $pidRunning->{$pid} = kill(0, $pid) ? 1 : 0 if ( !defined($pidRunning->{$pid}) );
            next if ( $pidRunning->{$pid} );
            #print("pid $pid: unlink $TopDir/$pool/$e->{name}\n");
            unlink("$TopDir/$pool/$e->{name}");
        }
    }
}