File: mirmon.pl

package info (click to toggle)
mirmon 2.6-2
  • links: PTS
  • area: main
  • in suites: wheezy
  • size: 1,300 kB
  • sloc: perl: 2,887; sh: 34; makefile: 4
file content (643 lines) | stat: -rwxr-xr-x 16,610 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
#! /usr/bin/perl -w

use strict ;

# if Mirmon.pm lives in directory DIR,
# change . to DIR in the next line :
use lib '.' ; # Mirmon.pm lives here

use Mirmon ;

use IO::Pipe ;
use IO::Select ;
use Net::hostent ;

my $VERSION = Base::Version . ' - Fri Mar 30 09:56:03 2012 - henkp' ;
my $DEF_CNF = join ', ', Mirmon -> config_list ;
my $TIMEOUT = Base::DEF_TIMEOUT ;

my $prog = substr $0, rindex ( $0, '/' ) + 1 ;
my $Usage = <<USAGE ;
Usage: $prog [ -v ] [ -q ] [ -t timeout ] [ -get opt ] [ -c conf ]
option v   : be verbose
option q   : be quiet
option t   : set timeout ; default $TIMEOUT
option get : 'all'    : probe all sites
           : 'update' : probe a selection of the sites (see doc)
option c   : configuration file ; default search :
             ( $DEF_CNF )
-------------------------------------------------------------------
Mirmon normally only reports errors and changes in the mirror list.
This is $VERSION.
-------------------------------------------------------------------
USAGE
sub Usage { die "$_[0]$Usage" ; }
sub Error { die "$prog: $_[0]\n" ; }
sub Warn  { warn "$prog: $_[0]\n" ; }

# usage: &GetOptions(ARG,ARG,..) defines $opt_ID as 1 or user spec'ed value
# usage: &GetOptions(\%opt,ARG,ARG,..) defines $opt{ID} as 1 or user value
# ARG = 'ID' | 'ID=SPC' | 'ID:SPC' for no-arg, required-arg or optional-arg
# ID  = perl identifier
# SPC = i|f|s for integer, fixedpoint real or string argument

use Getopt::Long ;
Getopt::Long::config ( 'no_ignore_case' ) ;
my %opt = () ;
Usage '' unless GetOptions ( \%opt, qw(v q t=i get=s c=s version) ) ;
Usage "Arg count\n" unless @ARGV == 0 ;

if ( $opt{version} ) { printf "%s\n", Base::version () ; exit ; }

$opt{v} ||= $opt{d} ;

my $get = $opt{get} ;
if ( $get and ! Base::is_get_opt ( $get ) )
  { Error "unknown 'get option' '$get'" ; }

Mirmon::verbose ( $opt{v} ) ;
Mirmon::debug   ( $opt{d} ) ;
Mirmon::quiet   ( $opt{q} ) ;

my $M = Mirmon -> new ( $opt{c} ) ;
$M -> conf -> timeout ( $opt{t} ) if $opt{t} ;
if ( $get ) { $M -> get_dates ( $get ) ; $M -> put_state ; }
$M -> gen_page ( $get, $VERSION ) ;

__END__

=pod

=head1 NAME

mirmon - monitor the state of mirrors

=head1 SYNOPSIS

  mirmon [ -v ] [ -q ] [ -t timeout ] [ -get opt ] [ -c conf ]

=head1 OPTIONS

  option v   : be verbose
  option q   : be quiet
  option t   : set timeout [ default 300 ] ;
  option get : 'all'    : probe all sites
             : 'update' : probe a selection of the sites (see doc)
  option c   : configuration file ; default list :
               ./mirmon.conf $HOME/.mirmon.conf /etc/mirmon.conf
  -------------------------------------------------------------------
  Mirmon normally only reports errors and changes in the mirror list.
  -------------------------------------------------------------------

=head1 USAGE

The program is intended to be run by cron every hour.

  42 * * * * perl /path/to/mirmon -get update

It quietly probes a subset of the sites in a given list,
writes the results in the 'state' file and generates a web page
with the results. The subset contains the sites that are new, bad
and/or not probed for a specified time.

When no 'get' option is specified, the program just generates a
new web page from the last known state.

The program checks the mirrors by running a (user specified)
program on a pipe. A (user specified) number of probes is
run in parallel using nonblocking IO. When something can be
read from the pipe, it switches the pipe to blocking IO and
reads one line from the pipe. Then it flushes and closes the
pipe. No attempt is made to kill the probe.

The probe should return something that looks like

  1043625600 ...

that is, a line of text starting with a timestamp. The exit status
of the probe is ignored.

=head1 CONFIG FILE

=head2 location

A config file can be specified with the -c option.
If -c is not used, the program looks for a config file in

=over

=item * B<./mirmon.conf>

=item * B<$HOME/.mirmon.conf>

=item * B</etc/mirmon.conf>

=back

=head2 syntax

A config file looks like this :

  +--------------------------------------------------
  |# lines that start with '#' are comment
  |# blank lines are ignored too
  |# tabs are replaced by a space
  |
  |# the config entries are 'key' and 'value' pairs
  |# a 'key' begins in column 1
  |# the 'value' is the rest of the line
  |somekey  A_val B_val ...
  |otherkey X_val Y_val ...
  |
  |# indented lines are glued
  |# the next three lines mean 'somekey part1 part2 part3'
  |somekey part1
  |  part2
  |  part3
  |
  |# lines starting with a '+' are concatenated
  |# the next three lines mean 'somekey part1part2part3'
  |somekey part1
  |+ part2
  |+ part3
  |
  |# lines starting with a '.' are glued too
  |# don't use a '.' on a line by itself
  |# 'somekey' gets the value "part1\n part2\n part3"
  |somekey part1
  |. part2
  |. part3
  +--------------------------------------------------

=head1 CONFIG FILE : required entries

=head2 project_name I<name>

Specify a short plaintext name for the project.

  project_name Apache
  project_name CTAN

=head2 project_url I<url>

Specify an url pointing to the 'home' of the project.

  project_url http://www.apache.org/

=head2 mirror_list I<file-name>

Specify the file containing the mirrors to probe.

  mirror_list /path/to/mirror-list

If your mirror list is generated by a program, use

  mirror_list /path/to/program arg1 ... |

Two formats are supported :

=over

=item * plain : lines like

  us http://www.tux.org/ [email] ...
  nl http://apache.cs.uu.nl/dist/ [email] ...
  nl rsync://archive.cs.uu.nl/apache-dist/ [email] ...

=item * apache : lines like those in the apache mirrors.list

  ftp  us ftp://ftp.tux.org/pub/net/apache/dist/ user@tux.org ...
  http nl http://apache.cs.uu.nl/dist/ user@cs.uu.nl ...

=back

Note that in style 'plain' the third item is reserved for an
optional email address : the site's contact address.

Specify the required format with 'list_style' (see below).
The default style is 'plain'.

=head2 web_page I<file-name>

Specify where the html report page is written.

=head2 icons I<directory-name>

Specify the directory where the icons can be found,
relative to the I<web_page>, or relative to the
DOCUMENTROOT of the web server.

If/when the I<web_page> lives in directory C<.../mirmon/> and
the icons live in directory C<.../mirmon/icons/>,
specify

  icons icons

If/when the icons live in C</path/to/DOCUMENTROOT/icons/mirmon/>, specify

  icons /icons/mirmon

=head2 probe I<program + arguments>

Specify the program+args to probe the mirrors. Example:

  probe /usr/bin/wget -q -O - -T %TIMEOUT% -t 1 %URL%TIME

Before the program is started, %TIMEOUT% and %URL% are
substituted with the proper timeout and url values.

Here it is assumed that each hour the root server writes
a timestamp in /path/to/archive/TIME, for instance with
a crontab entry like

  42 * * * * perl -e 'printf "\%s\n", time' > /path/to/archive/TIME

Mirmon reads one line of output from the probe and interprets
the first word on that line as a timestamp ; for example :

  1043625600
  1043625600 Mon Jan 27 00:00:00 2003
  1043625600 www.apache.org Mon Jan 27 00:00:00 2003

Mirmon is distributed with a program C<probe> that handles
ftp, http and rsync urls.

=head2 state I<file-name>

Specify where the file containing the state is written.

The program reads this file on startup and writes the
file when mirrors are probed (-get is specified).

=head2 countries I<file-name>

Specify the file containing the country codes;
The file should contain lines like

  us - United States
  nl - Netherlands

The mirmon package contains a recent ISO list.

I<Fake> domains like I<Backup>, I<Master> are allowed,
and are listed first in the report ; lowercase-first
fake domains (like I<backup>) are listed last.

=head1 CONFIG FILE : optional entries

=head2 max_probes I<number>

Optionally specify the number of parallel probes (default 25).

=head2 timeout I<seconds>

Optionally specify the timeout for the probes (default 300).

After the last probe is started, the program waits for
<timeout> + 10 seconds, cleans up and exits.

=head2 project_logo I<logo>

Optionally specify (the SRC of the IMG of) a logo to be placed
top right on the page.

  project_logo /icons/apache.gif
  project_logo http://www.apache.org/icons/...

=head2 htm_head I<html>

Optionally specify some HTML to be placed before </HEAD>.

  htm_head
    <link REL=StyleSheet HREF="/style.css" TYPE="text/css">

=head2 htm_top I<html>

Optionally specify some HTML to be placed near the top of the page.

  htm_top testing 1, 2, 3

=head2 htm_foot I<html>

Optionally specify HTML to be placed near the bottom of the page.

  htm_foot
    <HR>
    <A HREF="..."><IMG SRC="..." BORDER=0></A>
    <HR>

=head2 put_histo top|bottom|nowhere

Optionally specify where the age histogram must be placed.
The default is 'top'.

=head2 min_poll I<time-spec>

For 'min_poll' see next item. A I<time-spec> is a number followed by
a unit 's' (seconds), or 'm' (minutes), or 'h' (hours), or 'd' (days).
For example '3d' (three days) or '36h' (36 hours).

=head2 max_poll I<time-spec>

Optionally specify the maximum probe interval. When the program is
called with option '-get update', all sites are probed which are :

=over 4

=item * new

the site appears in the list, but there is no known state

=item * bad

the last probe of the site was unsuccessful

=item * old

the last probe was more than 'max_poll' ago.

=back

Sites are not probed if the last probe was less than 'min_poll' ago.
So, if you specify

  min_poll 4h
  max_poll 12h

the 'reachable' sites are probed twice daily and the 'unreachable'
sites are probed at most six times a day.

The default 'min_poll' is '1h' (1 hour).
The default 'max_poll' is '4h' (4 hours).

=head2 min_sync I<time-spec>

Optionally specify how often the mirrors are required to make an update.

The default 'min_sync' is '1d' (1 day).

=head2 max_sync I<time-spec>

Optionally specify the maximum allowable sync interval.

Sites exceeding the limit will be considered 'old'.
The default 'max_sync' is '2d' (2 days).

=head2 no_randomize

Mirmon tries to balance the probe load over the hourly mirmon runs.
If the current run has a below average number of mirrors to probe,
mirmon probes a few extra, randomly chosen mirrors, picked from the
runs that have the highest load.

If you don't want this behaviour, use B<no_randomize>.

=head2 no_add_slash

If the url part of a line in the mirror_list doesn't end
in a slash ('/'), mirmon adds a slash and issues a warning
unless it is in quiet mode.

If you don't want this behaviour, use B<no_add_slash>.

=head2 list_style plain|apache

Optionally specify the format ('plain' or 'apache') of the mirror-list.

See the description of 'mirror_list' above.
The default list_style is 'plain'.

=head2 site_url I<site> I<url>

Optionally specify a substitute url for a site.

When access to a site is restricted (in Australia, for instance),
another (sometimes secret) url can be used to probe the site.
The <site> of an url is the part between '://' and the first '/'.

=head2 env I<key> I<value>

Optionally specify an environment variable.

=head2 include I<file-name>

Optionally specify a file to include.

The specified file is processed 'in situ'. After the specified file is
read and processed, config processing is resumed in the file where the
C<include> was encountered.
The include depth is unlimited. However, it is a fatal error to
include a file twice under the same name.

=head2 show

When the config processor encounters the 'show' command, it
dumps the content of the current config to standout, if option
C<-v> is specified. This is intented for debugging.

=head2 exit

When the config processor encounters the 'exit' command, it
terminates the program. This is intented for debugging.

=head1 STATE FILE FORMAT

The state file consists of lines; one line per site.
Each line consists of white space separated fields.
The seven fields are :

=over 4

=item * field 1 : url

The url as given in the mirror list.

=item * field 2 : age

The mirror's timestamp found by the last successful probe,
or 'undef' if no probe was ever successful.

=item * field 3 : status last probe

The status of the last probe, or 'undef' if the mirror was never probed.

=item * field 4 : time last successful probe

The timestamp of the last successful probe or 'undef'
if the mirror was never successfully probed.

=item * field 5 : probe history

The probe history is a list of 's' (for success) and 'f' (for failure)
characters indicating the result of the probe. New results are appended
whenever the mirror is probed.

=item * field 6 : state history

The state history consists of a timestamp, a '-' char, and a list of
chars indicating a past status: 's' (fresh), 'b' (oldish), 'f' (old),
'z' (bad) or 'x' (skip).
The timestamp indicates when the state history was last updated.
The current status of the mirror is determined by the mirror's age and
a few configuration parameters (min_sync, max_sync, max_poll).
The state history is updated when the mirror is probed.
If the last update of the history was less than 24 hours ago,
the last status is replaced by the current status.
If the last update of the history was more than 24 hours ago,
the current status is appended to the history.
One or more 'skip's is inserted, if the timestamp is two or more days old
(when mirmon hasn't run for more than two days).

=item * field 7 : last probe

The timestamp of the last probe, or 'undef' if the mirror was never probed.

=back

=head1 INSTALLATION

=head2 general

=over 4

=item * Note: The (empty) state file must exist before mirmon runs.

=item * The mirmon repository is here :

  https://subversion.cs.uu.nl/repos/staff.henkp.mirmon/trunk/

=item * The mirmon tarball is here :

  http://people.cs.uu.nl/henkp/mirmon/mirmon.tar.gz

=back

=head2 installation suggestions

To install and configure mirmon, take the following steps :

=over 2

=item * First, make the webdir :

  cd DOCUMENTROOT
  mkdir mirmon

For I<DOCUMENTROOT>, substitute the full pathname
of the document root of your webserver.

=item * Check out the mirmon repository :

  cd /usr/local/src
  svn checkout REPO mirmon

where

  REPO = https://subversion.cs.uu.nl/repos/staff.henkp.mirmon/trunk/

or download the package and unpack it.

=item * Chdir to directory mirmon :

  cd mirmon

=item * Create the (empty) state file :

  touch state.txt

=item * Install the icons in the webdir :

  mkdir DOCUMENTROOT/mirmon/icons
  cp icons/* DOCUMENTROOT/mirmon/icons

=item * Create a mirror list C<mirror_list> ;

Use your favorite editor, or genererate the list from an
existing database.

  nl http://archive.cs.uu.nl/your-project/ contact@cs.uu.nl
  uk http://mirrors.this.org/your-project/ mirrors@this.org
  us http://mirrors.that.org/your-project/ mirrors@that.org

The email addresses are optional.

=item * Create a mirmon config file C<mirmon.conf> with your favorite editor.

  # lines must start in the first column ; no leading white space
  project_name ....
  project_url  ....
  mirror_list mirror_list
  state state.txt
  countries countries.list
  web_page DOCUMENTROOT/mirmon/index.html
  icons /mirmon/icons
  probe /usr/bin/wget -q -O - -T %TIMEOUT% -t 1 %URL%TIME

This assumes the project's timestamp is in file C<TIME>.

=item * If you have rsync urls, change the probe line to :

  probe perl /usr/local/src/mirmon/probe -t %TIMEOUT% %URL%TIME

=item * Run mirmon :

  perl mirmon -v -get all

The mirmon report should now be in 'DOCUMENTROOT/mirmon/index.html'

  http://www.your.project.org/mirmon/

=item * If/when, at a later date, you want to upgrade mirmon :

  cd /usr/local/src/mirmon
  svn status -u
  svn up

=back

=head1 SEE ALSO

=begin html

<A HREF="mirmon.pm.html">mirmon.pm(3)</A>

=end html

=begin man

mirmon.pm(3)

=end man

=head1 AUTHOR

=begin html

  &copy; 2003-2012
  <A HREF="http://people.cs.uu.nl/henkp/">Henk P. Penning</A>,
  <A HREF="http://www.cs.uu.nl/">Computer Science Department</A>,
  <A HREF="http://www.uu.nl/">Utrecht University</A>
  <BR>
  mirmon-2.6 - Fri Mar 30 09:56:03 2012 ; henkp

=end html

=begin man

  (c) 2003-2012 Henk P. Penning
  Computer Science Department, Utrecht University
  http://people.cs.uu.nl/henkp/ -- penning@cs.uu.nl
  mirmon-2.6 - Fri Mar 30 09:56:03 2012 ; henkp

=end man

=begin text

  (c) 2003-2012 Henk P. Penning
  Computer Science Department, Utrecht University
  http://people.cs.uu.nl/henkp/ -- penning@cs.uu.nl
  mirmon-2.6 - Fri Mar 30 09:56:03 2012 ; henkp

=end text

=cut