1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198
|
#! /usr/local/bin/perl5
# /usr/local/perl5.002/bin/perl
#
#
$debug = 0;
$username = getlogin || (getpwuid($<))[0] ;
# restart_serv = 1 DOES NOT WORK because of bugs in perl.
# (I've worked around this bug below, but it is pretty hideous)
$restart_serv = 0;
$serv_p4 = "/home/MPI/mpich/lib/sun4/ch_p4/serv_p4";
$server_name = "serv_p4";
$run_dir = "$HOME";
$kill_serv = 0;
$rsh = "rsh";
#
use Socket;
#
# This is an experimental perl program to check on your servers
#
# Get the port to check
$portnum=1234;
# Get the list of machines
$machinelist="/home/MPI/mpich/util/machines/machines.sun4";
#
# Read command line for overrides
while ($_ = $ARGV[0]) {
shift;
print "Processing argument $_\n" if $debug;
if ($_ eq "-debug") {
$debug = 1;
}
elsif ($_ eq "-port") {
$portnum = $ARGV[0];
shift;
}
elsif ($_ eq "-startdir") {
$run_dir = $ARGV[0];
shift;
}
elsif ($_ eq "-machinelist") {
$machinelist = $ARGV[0];
shift;
}
elsif ($_ eq "-server") {
$serv_p4 = $ARGV[0];
shift;
}
elsif ($_ eq "-pgm") {
$serv_p4 = "/home/MPI/mpich/lib/sun4/ch_p4/$ARGV[0]";
shift;
}
elsif ($_ eq "-restart") {
$restart_serv = 1;
}
elsif ($_ eq "-user") {
$username = $ARGV[0];
shift;
}
elsif ($_ eq "-kill") {
$kill_serv = 1;
}
else {
print "chkserv [ -port num ] [ -restart ] [ -machinelist file ]";
print " [ -user name ] [ -server pgm ]";
if ($_ eq "-help") { exit 1; }
die "Unrecognized argument $_\n";
}
}
if ($username eq "") {
die "Could not get username!\n";
}
#
# If the program name was just the name without the location, try a
# default location.
if (! -x $serv_p4 ) {
$serv_p4 = "/home/MPI/mpich/lib/sun4/ch_p4/$serv_p4";
}
if (! -x $serv_p4 ) {
die "$serv_p4 is not executable!\n";
}
#
#
open( FLIST, $machinelist ) || die "Could not open $machinelist" ;
# Force output to be flushed
$| = 1;
#
# See perlipc for an example of a tcp client
# Because of bugs in perl, we should really read the file into a perl
# variable (array?) and then operate on that array.
#
# We start in the indicated directory ($HOME by default)
chdir $run_dir ;
#
while (<FLIST>) {
# Skip comment lines
if (/^#/) { next ; }
# Setup socket
$sockaddr = 'S n a4 x8';
chop;
($hostname,$clustersize) = split(/:/);
print "Read $hostname from process $$\n" if $debug;
($name,$aliases,$proto) = getprotobyname('tcp');
# ($name,$aliases,$port) = getservbyname($portnum,'tcp')
# unless $portnum =~/^\d+$/;
$port = $portnum;
($name,$aliases,$type,$len,$thataddr) =
gethostbyname($hostname);
($name,$aliases,$type,$len,$thisaddr) =
gethostbyname('hostname');
$this = pack($sockaddr,AF_INET,0,$thisaddr);
$that = pack($sockaddr,AF_INET,$port,$thataddr);
$hostok = 1;
print "Connecting to $hostname:$port with protocol $proto\n" if $debug;
socket(S,PF_INET,SOCK_STREAM,$proto) || ($hostok = 0);
if ($hostok == 0) { print "socket: $!\n"; next; }
bind(S,$this) || ($hostok = 0);
if ($hostok != 0) {
print "bind: $!\n" if $debug;
connect(S,$that) || ($hostok = 0);
if ($hostok == 0) {
print "connect: $!\n" if $debug; }
}
if ($hostok == 0) {
close( S );
if ($debug) { print "bind: $!\n" }
else {
# This has been hideously painful. perl seems to get confused
# when reading from FLIST when I do a fork; for some reason the
# EOF is lost, along with the first few characters of the input.
# This would have been easier in C, where fork and fgets work
# correctly. I've found documentation on this at
# http://www.perl.com/perl/bugs/NETaa14449-5.html.
# Note that the workaround is to (a) not close files and
# (b) not call exit. Thus, even when doing "norun", we
# have to exec /usr/bin/echo, not use print.
if ($restart_serv) {
print "Restarting server on $hostname:$portnum\n" ;
$cmd = "$rsh $hostname -n $serv_p4 -o -p $portnum";
$pid = fork();
if ($pid > 0) {
waitpid( $pid, 0 );
}
else {
# I am the child
#close( STDIN );
#close( FLIST );
# exec "/usr/bin/echo $cmd\n";
exec $cmd;
exit 1; # just in case exec fails
}
}
else {
print "$hostname:$port needs new server\n"; }
}
next; }
# We need to establish a timeout in case we're not talking to
# who we expect. To make this work, we'd need yet another fork,
# WHICH WON'T WORK FOR THE REASONS ABOVE!
#
$SIG{'ALRM'} = sub {
print "Timeout in connection to $hostname\n" ;
close( S );
};
alarm 20;
# Send request for id or kill
if ($kill_serv) {
$msg = "$username\n$username\n%exit\n";
}
else {
$msg = "$username\n$username\n%id\n";
}
$datalen = length $msg;
syswrite S,$msg,$datalen;
if ($debug) {
while (<S>) { print $_; }
}
else {
$msg = <S>;
if ($msg ne "Proceed\n") { print "Bad message from $hostname: $msg";}
if ($kill_serv != 1) {
$msg = <S>;
if ($msg eq
"Port $port for client $username and server user $username\n") {
print "$hostname:$port has valid server\n";
}
}
}
close(S);
alarm 0;
}
close( FLIST );
0;
|