File: sgemaster_template

package info (click to toggle)
gridengine 6.2-4
  • links: PTS, VCS
  • area: main
  • in suites: lenny
  • size: 51,532 kB
  • ctags: 51,172
  • sloc: ansic: 418,155; java: 37,080; sh: 22,593; jsp: 7,699; makefile: 5,292; csh: 4,244; xml: 2,901; cpp: 2,086; perl: 1,895; tcl: 1,188; lisp: 669; ruby: 642; yacc: 393; lex: 266
file content (623 lines) | stat: -rw-r--r-- 18,262 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
#!/bin/sh
#
#+-#+-#+-#-+-#+-#+-#-+-#+-#+-#-+-#+-#+-#-+-#+-#+-#-+-#+-#+-#-+-#+-#+-#-+-#
#
# $SGE_ROOT/util/master_template
#
# DO NOT EDIT THIS FILE - this file is used as an template
# Don't change the markers #+-#+-#+-# and "#-#-#-#" , they will be removed
#
#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-
#
# SGE/SGEEE startup script
#
#___INFO__MARK_BEGIN__
##########################################################################
#
#  The Contents of this file are made available subject to the terms of
#  the Sun Industry Standards Source License Version 1.2
#
#  Sun Microsystems Inc., March, 2001
#
#
#  Sun Industry Standards Source License Version 1.2
#  =================================================
#  The contents of this file are subject to the Sun Industry Standards
#  Source License Version 1.2 (the "License"); You may not use this file
#  except in compliance with the License. You may obtain a copy of the
#  License at http://gridengine.sunsource.net/Gridengine_SISSL_license.html
#
#  Software provided under this License is provided on an "AS IS" basis,
#  WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING,
#  WITHOUT LIMITATION, WARRANTIES THAT THE SOFTWARE IS FREE OF DEFECTS,
#  MERCHANTABLE, FIT FOR A PARTICULAR PURPOSE, OR NON-INFRINGING.
#  See the License for the specific provisions governing your rights and
#  obligations concerning the Software.
#
#  The Initial Developer of the Original Code is: Sun Microsystems, Inc.
#
#  Copyright: 2001 by Sun Microsystems, Inc.
#
#  All Rights Reserved.
#
##########################################################################
#___INFO__MARK_END__

#
# This script can be called with the following arguments:
#
#       start       start qmaster or shadowd 
#       stop        Terminates qmaster if we are on the master machine.
#       -qmaster    only starts qmaster 
#       -shadowd    start shadwod if found in the "shadow_masters" file
#       -migrate    shuts down qmaster if it is running
#                   on another host and start the daemons on this host
#
# If the file "primary_qmaster" in the $SGE_ROOT/$SGE_CELL/common
# exists and it contains the hostname of the current machine and qmaster
# is running on another host it will be shut down and started on this host
#
# Unix commands which may be used in this script:
#    cat cut tr ls grep awk sed basename
#
# This script requires the script $SGE_ROOT/util/arch
#

PATH=/bin:/usr/bin:/sbin:/usr/sbin

#---------------------------------------------------------------------------
# The following lines provide the necessary info for adding a startup script
# according to the Linux Standard Base Specification (LSB) which can
# be found at:
#
#    http://www.linuxfoundation.org/spec/booksets/LSB-Core-generic/LSB-Core-generic/initscrcomconv.html
#
### BEGIN INIT INFO
# Provides:       GENSGESVC
# Required-Start: $network $remote_fs
# Required-Stop:
# Default-Start:  3 5
# Default-Stop: 0 1 2 6
# Description:  start Grid Engine qmaster, schedd, shadowd
### END INIT INFO
#---------------------------------------------------------------------------

SGE_ROOT=GENROOT; export SGE_ROOT
SGE_CELL=GENCELL; export SGE_CELL
SGE_QMASTER_PORT=GENSGE_QMASTER_PORT; export SGE_QMASTER_PORT 
SGE_EXECD_PORT=GENSGE_EXECD_PORT; export SGE_EXECD_PORT

unset CODINE_ROOT GRD_ROOT COD_CELL GRD_CELL

ARCH=`$SGE_ROOT/util/arch`

# library path setting required only for architectures where RUNPATH is not supported
case $ARCH in
#ENFORCE_SHLIBPATH#sol*|lx*)
#ENFORCE_SHLIBPATH#   ;;
*)
   shlib_path_name=`$SGE_ROOT/util/arch -lib`
   old_value=`eval echo '$'$shlib_path_name`
   if [ x$old_value = x ]; then
      eval $shlib_path_name=$SGE_ROOT/lib/$ARCH
   else
      eval $shlib_path_name=$old_value:$SGE_ROOT/lib/$ARCH
   fi
   export $shlib_path_name
   ;;
esac

#---------------------------------------------------------------------------
# Shutdown
# Send SIGTERM to process name $1 with pid in file $2
#
Shutdown()
{
   name=$1
   pidfile=$2
   if [ -f $pidfile ]; then
      pid=`cat $pidfile`
      maxretries=20
      i=0
      while [ $i -lt $maxretries ]; do
         $utilbin_dir/checkprog $pid $name > /dev/null
         if [ "$?" = 0 ]; then
            #We keep killing Qmaster so that child processes get killed
            kill $pid
         else
            return 0
         fi
         sleep 2
         i=`expr $i + 1`

      done
      kill -9 $pid
      return $?
   fi
}


#---------------------------------------------------------------------------
# QmasterSpoolDir
#    Return qmasters spool directory
#
QmasterSpoolDir()
{
   qma_spool_dir=`grep qmaster_spool_dir \
                      $SGE_ROOT/$SGE_CELL/common/bootstrap | \
                      awk '{ print $2 }'`
   echo $qma_spool_dir
}




#---------------------------------------------------------------------------
# CheckIfQmasterHost
#    If our hostname given in $1 is the same as in the "act_qmaster" file
#    echo "true" else echo "false"
#
CheckIfQmasterHost()
{
   host=$1

   if [ "$host" = "`cat $SGE_ROOT/$SGE_CELL/common/act_qmaster`" ]; then
      echo true
   else
      echo false
   fi
}

#---------------------------------------------------------------------------
# CheckIfPrimaryQmasterHost
#    Check if our hostname given in $1 is the same as in the
#    "primary_qmaster" file
#    echo true if there is our hostname else echo false
#
CheckIfPrimaryQmasterHost()
{
   host=$1

   fname=$SGE_ROOT/$SGE_CELL/common/primary_qmaster

   if [ -f $fname ]; then
      if [ "$host" = "`cat $fname`" ]; then
         echo true
      else
         echo false
      fi
   else
      echo false
   fi
}


#---------------------------------------------------------------------------
# CheckIfShadowMasterHost
#    Check if our hostname given in $1 is contained in the
#    "shadow_masters" file
#    echo true if there is our hostname else echo false
#
CheckIfShadowMasterHost()
{
   host=$1

   fname=$SGE_ROOT/$SGE_CELL/common/shadow_masters

   if [ -f $fname ]; then
      grep -i $host $fname 2>&1 > /dev/null
      if [ $? = 0 ]; then
         shadow_host="true"
      else
         shadow_host="false"
      fi
   else
      shadow_host="false"
   fi
}

#---------------------------------------------------------------------------
# GetPathToBinaries
#    echo the name of the bin_dir on this system
#    The check is fullfilled if we can access the qstat binary
#    echo "none" if we can't determine the binary path
GetPathToBinaries()
{
   cfgname=$SGE_ROOT/$SGE_CELL/common/bootstrap

   base=none

   if [ -f $cfgname ]; then
      base=`grep binary_path $cfgname | awk '{ print $2 }'`
      if [ -f $base/qstat ]; then
         :
      elif [ -f $SGE_ROOT/util/arch ]; then
         arch=`$SGE_ROOT/util/arch`
         if [ -f $base/$arch/qstat ]; then
               base=$base/$arch
         fi
      fi
   fi

   echo $base
}


#---------------------------------------------------------------------------
# GetAdminUser
#    echo the name of the admin user on this system
#    echo "root" if admin user retrieval fails
GetAdminUser()
{
   cfgname=$SGE_ROOT/$SGE_CELL/common/bootstrap
   user=none

   if [ -f $cfgname ]; then
      user=`grep admin_user $cfgname | awk '{ print $2 }'`
   fi

   if [ `echo $user|tr "[A-Z]" "[a-z]"` = "none" ]; then
      user=root
   fi
   echo $user
}

#---------------------------------------------------------------------------
# GetPathToUtilbin
#    echo the path to the binaires in utilbin
#    The check is fullfilled if we can access the "gethostname" binary
#    echo "none" if we can't determine the binary path
#
GetPathToUtilbin()
{
   base=none

   if [ -f $SGE_ROOT/util/arch ]; then
      utilbindir=$SGE_ROOT/utilbin

      arch=`$SGE_ROOT/util/arch`
      if [ -f $utilbindir/$arch/gethostname ]; then
         base=$utilbindir/$arch
      fi
   fi

   echo $base
}

#---------------------------------------------------------------------------
# CheckRunningQmaster
# checks, if sge_qmaster is running
# In error case the sge_qmaster didn't start, silently
#
CheckRunningQmaster()
{
   masterhost=`cat $SGE_ROOT/$SGE_CELL/common/act_qmaster`
   running=false
   loop=0

   if [ "$SGE_QMASTER_PORT" = "" ]; then
      SGE_QMASTER_PORT=`$utilbin_dir/getservbyname -number sge_qmaster`
   fi

   while [ $running = "false" -a $loop -ne 30 ]; do 
      $bin_dir/qping -info $masterhost $SGE_QMASTER_PORT qmaster 1 > /dev/null 2>&1

      if [ "$?" = 0 ]; then
         running=true
      else
         sleep 2 
         masterhost=`cat $SGE_ROOT/$SGE_CELL/common/act_qmaster`
         loop=`expr $loop + 1`
      fi
   done

   if [ $running = "false" ]; then
      echo
      echo "sge_qmaster didn't start!"
      echo "Please check the messages file"
      echo
   fi
}

#---------------------------------------------------------------------------
# DetectSMFService - sets service to a mask maching the name
# $1 ... name
#
DetectSMFService()
{
   name=$1
   service=""

   if [ "$noSMF" = true ]; then
      return
   fi
   #Otherwise we try is it's available of the system
   if [ -f /lib/svc/share/smf_include.sh ]; then
      . /lib/svc/share/smf_include.sh
      smf_present
      if [ $? -ne 0 ]; then
         return
      fi
   else
      return
   fi

   #Check we have cluster_name file
   if [ ! -r "$SGE_ROOT/$SGE_CELL/common/cluster_name" ]; then
      echo "Error: could not find $SGE_ROOT/$SGE_CELL/common/cluster_name!"
      exit $SMF_EXIT_ERR_CONFIG
   fi
   #Cluster name must be unique
   SGE_CLUSTER_NAME=`cat $SGE_ROOT/$SGE_CELL/common/cluster_name 2>/dev/null`
   
   service="svc:/application/sge/$name:$SGE_CLUSTER_NAME"

   if [ -n "$SMF_FMRI" -a "$service" != "$SMF_FMRI" ]; then
      #Someone is trying to fool us
      echo "Error: $service does not match FMRI $SMF_FMRI provided by SMF!"
      echo "Check that service instance $SGE_CLUSTER_NAME exist on your system"
      exit $SMF_EXIT_ERR_CONFIG
   fi
   #Check if service exists
   /usr/bin/svcs $service > /dev/null 2>&1
   if [ $? -ne 0 ]; then
      #No such service found in the system
      service=""
   fi
}


#---------------------------------------------------------------------------
usage()
{
   echo "Grid Engine start/stop script. Valid parameters are:"
   echo ""
   echo "   (no parameters): start qmaster and shadow daemon if applicable"
   echo "   \"start\"        dto."
   echo "   \"stop\"         shutdown local Grid Engine processes and jobs"
   echo "   \"-qmaster\"     only start/stop qmaster (if applicable)"
   echo "   \"-shadowd\"     only start/stop shadowd (if applicable)"   
   echo "   \"-migrate\"     shutdown qmaster if it's running on another"
   echo "                    host and restart it on this host"
   echo "                    Migration only works if this host is an admin host"
   echo "   \"-nosmf\"       force no SMF"
   echo ""
   echo "Only one of the parameters \"start\", \"stop\" or \"softstop\" is allowed."
   echo "Only one of the parameters beginning  with \"-\" is allowed. Does not " 
   echo "apply to -nosmf."
   echo
   echo "Default argument is \"start\" for all components."
   echo "Default for \"stop\" is shutting down all components."
   echo
   exit 1
}


#---------------------------------------------------------------------------
# MAIN Procedure
#

if [ "$#" -gt 3 -o "$1" = "-h" -o "$1" = "help" ]; then
   usage
fi

startup=true
qmaster=true
shadowd=true
qstd=false
migrate_qmaster=false
softstop=false
noSMF=false

for i in $*; do
   if [ "$i" = start ]; then
      startup=true
   elif [ "$i" = stop ]; then
      startup=false
   elif [ "$i" = softstop ]; then
      startup=false
      softstop=true
   elif [ "$i" = -qmaster ]; then
      qmaster=true
      shadowd=false
   elif [ "$i" = -shadowd ]; then
      qmaster=false
      shadowd=true
   elif [ "$i" = -migrate ]; then
      migrate_qmaster=true
      qmaster=true
      shadowd=false
   elif [ "$i" = -nosmf ]; then
      noSMF=true
   else
      usage
   fi
done

bin_dir=`GetPathToBinaries`
if [ "$bin_dir" = "none" ]; then
   echo "can't determine path to Grid Engine binaries"
   exit 1
fi

utilbin_dir=`GetPathToUtilbin`
if [ "$utilbin_dir" = "none" ]; then
   echo "can't determine path to Grid Engine utility binaries"
   exit 1
fi

HOST=`$utilbin_dir/gethostname -aname`
UQHOST=`$utilbin_dir/gethostname -aname | cut -f1 -d.`
qmaster_spool_dir=`QmasterSpoolDir`
CheckIfShadowMasterHost $HOST

if [ "$startup" = true ]; then

   # if service tags are enabled
   st_enabled=`sh $SGE_ROOT/util/sgeST/sge_st "enabled"`
   if [ "$st_enabled" = "true" -a "$qmaster" = "true" ]; then
      sh $SGE_ROOT/util/sgeST/sge_st "register"  
   fi

   # qmaster_host=true if qmaster was running on this host the last time
   #                   this host is an execution host

   qmaster_host=`CheckIfQmasterHost $HOST`
   primary_qmaster_host=`CheckIfPrimaryQmasterHost $HOST`

   if [ $qmaster = true -a $qmaster_host = true -a $migrate_qmaster = true ]; then
      echo "   qmaster running on this host. Will not migrate qmaster."
      exit 1
   fi

   if [ $qmaster = true -a $qmaster_host = false -a  \
        \( $primary_qmaster_host = true -o $migrate_qmaster = true \) ]; then
       actual_qmaster_host=`cat $SGE_ROOT/$SGE_CELL/common/act_qmaster`
       echo "   shutting down qmaster on host \"$actual_qmaster_host\" ..."
       qconf_output=`$bin_dir/qconf -ks 2>&1 | grep "denied"`
       if [ "$qconf_output" != "" ]; then
          echo "   denied: host \"$HOST\" is no admin host."
          exit 1
       fi
       $bin_dir/qconf -km > /dev/null 2>&1
       
       qping_count=0
       qping_retries=10
       qping_exit_state=0
       while [ $qping_count -lt $qping_retries ]; do
          $bin_dir/qping -info $actual_qmaster_host $SGE_QMASTER_PORT qmaster 1  > /dev/null 2>&1
          qping_exit_state=$?
          if [ $qping_exit_state -ne 0 ]; then
             break
          fi
          sleep 3
          qping_count=`expr $qping_count + 1`
       done

       if [ $qping_exit_state -eq 0 ]; then
       #  qmaster is still running
          echo "   qmaster on host $actual_qmaster_host still alive. Cannot migrate qmaster."
          exit 1
       fi

       lock_file_read_retries=10
       lock_file_read_count=0
       lock_file_found=0
       while [ $lock_file_read_count -lt $lock_file_read_retries ]; do
          if [ -f $qmaster_spool_dir/lock ]; then
             lock_file_found=1
             break
          fi
          sleep 3
          lock_file_read_count=`expr $lock_file_read_count + 1`
       done

       if [ $lock_file_found -eq 0 ]; then
       #  old qmaster did not write lock file 
          echo "   old qmaster did not write lock file. Cannot migrate qmaster."
          echo "   Please verify that qmaster on host $actual_qmaster_host is down"
          echo "   and make sure that the lock file in qmaster spool directory is"
          echo "   read-able."
          exit 1
       fi

       qmaster_host=true
   fi

   if [ $qmaster = true ]; then
      DetectSMFService qmaster
      #We want to use smf
      if [ -z "$SMF_FMRI" -a -n "$service"  ]; then
         echo "   starting sge_qmaster"
         svcadm enable -st $service
      #For -migrate with SMF qmaster_host is not yet set for SMF start (2nd)
      elif [ $qmaster_host = true -o \( -n "$SMF_FMRI" -a "$SMF_FMRI" = "$service" \) ]; then
         echo "   starting sge_qmaster"
         $bin_dir/sge_qmaster
         [ $? -eq 0 -a -d /var/lock/subsys ] && touch /var/lock/subsys/sgemaster >/dev/null 2>&1
         CheckRunningQmaster
      fi
   elif [ $qmaster = true -a $qmaster_host = false ]; then
      echo
      echo "sge_qmaster didn't start!"
      echo "This is not a qmaster host!"
      echo "Please, check your act_qmaster file!" 
      echo
   fi
      
   if [ $shadowd = true -a $shadow_host = true ]; then
      start_shadowd=true
      UQpidfile=$qmaster_spool_dir/shadowd_$UQHOST.pid
      pidfile=$qmaster_spool_dir/shadowd_$HOST.pid

      if [ -f $pidfile ]; then
         pid=`cat $pidfile`
         $utilbin_dir/checkprog $pid sge_shadowd > /dev/null
         if [ "$?" = 0 ]; then
            start_shadowd=false 
         fi
      fi

      if [ -f $UQpidfile ]; then
         pid=`cat $UQpidfile`
         $utilbin_dir/checkprog $pid sge_shadowd > /dev/null
         if [ "$?" = 0 ]; then
            start_shadowd=false
         fi
      fi

      if [ $start_shadowd = true ]; then
         DetectSMFService shadowd
         echo "   starting sge_shadowd"
         #We want to use smf
         if [ -z "$SMF_FMRI" -a -n "$service"  ]; then
            svcadm enable -st $service
         else
            $bin_dir/sge_shadowd
	 fi
      else
	 echo "   found running sge_shadowd - not starting"
         exit 1
      fi
   fi
else
   if [ $shadowd = true -a $shadow_host = true ]; then
      echo "   Shutting down Grid Engine shadowd"
      DetectSMFService shadowd
      if [ -z "$SMF_FMRI" -a -n "$service"  ]; then
         svcadm disable -st $service
      else
         # Send SIGTERM to shadowd
         if [ -f $qmaster_spool_dir/shadowd_$UQHOST.pid ]; then
            Shutdown sge_shadowd $qmaster_spool_dir/shadowd_$UQHOST.pid
         elif [ -f $qmaster_spool_dir/shadowd_$HOST.pid ]; then
            Shutdown sge_shadowd $qmaster_spool_dir/shadowd_$HOST.pid
         fi	
      fi
   fi

   if [ $qmaster = true ]; then
      if [ `CheckIfQmasterHost $HOST` = true ]; then
         echo "   Shutting down Grid Engine qmaster"
         DetectSMFService qmaster
         if [ -z "$SMF_FMRI" -a -n "$service"  ]; then
            svcadm disable -st $service
            exit $?
         else
            # Send SIGTERM to qmaster
            Shutdown sge_qmaster $qmaster_spool_dir/qmaster.pid
            ret=$?
            if [ -f /var/lock/subsys/sgemaster ]; then
               uid=`$utilbin_dir/uidgid -uid`
               if [ "$uid" = "0" -a "$ret" = "0" ]; then            
                  rm -f /var/lock/subsys/sgemaster >/dev/null 2>&1
               else
                  echo "Can't shut down qmaster!"
                  exit 1
               fi
            fi
         fi
      fi
   fi

fi