File: monitor_qmaster.sh

package info (click to toggle)
gridengine 8.1.9%2Bdfsg-13.2
  • links: PTS, VCS
  • area: main
  • in suites: sid
  • size: 57,848 kB
  • sloc: ansic: 432,690; java: 87,068; cpp: 31,958; sh: 29,445; jsp: 7,757; perl: 6,336; xml: 5,828; makefile: 4,705; csh: 3,934; ruby: 2,221; tcl: 1,676; lisp: 669; yacc: 519; python: 503; lex: 361; javascript: 200
file content (52 lines) | stat: -rwxr-xr-x 999 bytes parent folder | download | duplicates (7)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
#!/bin/sh

# For investigating qmaster leaks:  dump core when it exceeds a
# certain size.  See sge_qmaster(8) about enabling core dumps under Linux.

# Posted on the sunsource site without an licence, so under SISSL.

START_THRESHOLD=2000000
STEP=500000
MAX_CORE=5
INTERVAL=10

if [ $# -ne 1 ]; then
   echo "usage: $0 <qmaster pid>"
   exit 1
fi

PID=$1

dump_core()
{
   echo "qmaster size exceeded $3 kb"
   echo "trying to create core dump"
   gcore $1
   if [ $? -eq 0 ]; then
      mv core.$1 core.$1.$2
      echo "done - wrote core.$1.$2"
   else
      echo "failed"
   fi
}

dumps=0
threshold=$START_THRESHOLD

while [ 1 ]; do
   size=`ps -p $PID -o vsz --no-headers`
   if [ $? -ne 0 ]; then
      echo "no process with pid $PID - exiting"
      exit 0
   fi

   echo "`date`: $size"

   if [ $size -gt $threshold -a $dumps -lt $MAX_CORE ]; then
      dump_core $PID $dumps $threshold
      dumps=`expr $dumps + 1`
      threshold=`expr $threshold + $STEP`
   fi

   sleep $INTERVAL
done