File: sx6_migration_command

package info (click to toggle)
gridengine 6.2u5-7.1
  • links: PTS, VCS
  • area: main
  • in suites: wheezy
  • size: 57,216 kB
  • sloc: ansic: 438,030; java: 66,252; sh: 36,399; jsp: 7,757; xml: 5,850; makefile: 5,520; csh: 4,571; cpp: 2,848; perl: 2,401; tcl: 692; lisp: 669; yacc: 668; ruby: 642; lex: 344
file content (129 lines) | stat: -rwxr-xr-x 3,437 bytes parent folder | download | duplicates (3)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
#!/bin/sh
#
#
#___INFO__MARK_BEGIN__
##########################################################################
#
#  The Contents of this file are made available subject to the terms of
#  the Sun Industry Standards Source License Version 1.2
#
#  Sun Microsystems Inc., March, 2001
#
#
#  Sun Industry Standards Source License Version 1.2
#  =================================================
#  The contents of this file are subject to the Sun Industry Standards
#  Source License Version 1.2 (the "License"); You may not use this file
#  except in compliance with the License. You may obtain a copy of the
#  License at http://gridengine.sunsource.net/Gridengine_SISSL_license.html
#
#  Software provided under this License is provided on an "AS IS" basis,
#  WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING,
#  WITHOUT LIMITATION, WARRANTIES THAT THE SOFTWARE IS FREE OF DEFECTS,
#  MERCHANTABLE, FIT FOR A PARTICULAR PURPOSE, OR NON-INFRINGING.
#  See the License for the specific provisions governing your rights and
#  obligations concerning the Software.
#
#  The Initial Developer of the Original Code is: Sun Microsystems, Inc.
#
#  Copyright: 2001 by Sun Microsystems, Inc.
#
#  All Rights Reserved.
#
##########################################################################
#___INFO__MARK_END__

set +u

ckpt_dir=$3

if [ ! -f $ckpt_dir/ckpt.log ]; then
   touch $ckpt_dir/ckpt.log
   chmod 666 $ckpt_dir/ckpt.log
fi

# create temp directory for holding checkpoint info

if [ "$SGE_TASK_ID" = "undefined" -o "$SGE_TASK_ID" = "" ]; then
   jobid=$JOB_ID
   jobdir=$JOB_ID.1
else
   jobid=$JOB_ID.$SGE_TASK_ID
   jobdir=$JOB_ID.$SGE_TASK_ID
fi

tmpdir=$ckpt_dir/ckpt.$jobid
mkdir -p $tmpdir
cd $tmpdir

# create log file

F=$tmpdir/checkpoint.log
touch $F
exec >> $F 2>&1

echo -------------------------------------------------------------
echo `basename $0` called at `date`
echo called by: `id`
echo with args: $*

echo SGE_TASK_ID=$SGE_TASK_ID
echo JOB_ID=$JOB_ID

# Cray checkpoint workaround - delete the job script so chkpnt(1)
# will save and restore it.  For this to work, "shell_start_mode"
# should be set to "script_from_stdin" in the global cluster
# configuration

#rm -f $JOB_SCRIPT

# get the checkpoint identifier

if [ -f osjobid ]
then
    osjobid=`cat osjobid`
else
    job_dir=`dirname $JOB_SCRIPT`/../active_jobs/$jobdir
    echo job_dir=$job_dir
    echo JOB_SCRIPT=$JOB_SCRIPT
    osjobid=`cat $job_dir/osjobid`
    echo $osjobid > osjobid
fi

if [ -f job_pid ]
then
    job_pid=`cat job_pid`
else
    job_dir=`dirname $JOB_SCRIPT`/../active_jobs/$jobdir
    echo job_dir=$job_dir
    echo JOB_SCRIPT=$JOB_SCRIPT
    job_pid=`cat $job_dir/job_pid`
    echo $job_pid > job_pid
fi

#
# Save any existing restart file, just in case the
# host crashes while checkpointing
#

echo Removing any old restart files from restart database
echo /usr/bin/rmresf -d chkpnt_$jobid
/usr/bin/rmresf -d chkpnt_$jobid
mv chkpnt_$jobid chkpnt_$jobid.save

#
# Checkpoint the job
#

echo /usr/bin/chkpnt -P $job_pid -v -k -f chkpnt_$jobid
/usr/bin/chkpnt -P $job_pid -v -k -f chkpnt_$jobid
cc=$?
if [ $cc -eq 0 ]; then
   rm -f chkpnt_$jobid.save
fi

echo `date +"%D %T"` Job $jobid "(job_pid=$job_pid, osjobid=$osjobid) checkpointed and migrated, status=$cc"
echo `date +"%D %T"` Job $jobid "(job_pid=$job_pid, osjobid=$osjobid) checkpointed and migrated, status=$cc" >> $ckpt_dir/ckpt.log

exit $cc