File: ClusterSession.java

package info (click to toggle)
libjaba-client-java 2.2.0-2
  • links: PTS, VCS
  • area: main
  • in suites: bookworm, bullseye, forky, sid, trixie
  • size: 2,052 kB
  • sloc: java: 17,308; makefile: 12
file content (320 lines) | stat: -rw-r--r-- 11,372 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
/* Copyright (c) 2009 Peter Troshin
 *  
 *  JAva Bioinformatics Analysis Web Services (JABAWS) @version: 1.0     
 * 
 *  This library is free software; you can redistribute it and/or modify it under the terms of the
 *  Apache License version 2 as published by the Apache Software Foundation
 * 
 *  This library is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without
 *  even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the Apache 
 *  License for more details.
 * 
 *  A copy of the license is in apache_license.txt. It is also available here:
 * @see: http://www.apache.org/licenses/LICENSE-2.0.txt
 * 
 * Any republication or derived work distributed in source code form
 * must include this copyright and license notice.
 */

package compbio.engine.cluster.drmaa;

import java.io.File;
import java.io.IOException;
import java.util.Calendar;
import java.util.List;
import java.util.concurrent.CopyOnWriteArrayList;

import org.apache.log4j.Logger;
import org.ggf.drmaa.DrmaaException;
import org.ggf.drmaa.InvalidJobException;
import org.ggf.drmaa.JobInfo;
import org.ggf.drmaa.Session;
import org.ggf.drmaa.SessionFactory;

import compbio.engine.ClusterJobId;
import compbio.engine.Job;
import compbio.engine.Configurator;
import compbio.engine.client.ConfiguredExecutable;
import compbio.engine.client.PathValidator;
import compbio.engine.client.EngineUtil;
import compbio.engine.conf.PropertyHelperManager;
import compbio.metadata.JobStatus;
import compbio.metadata.ResultNotAvailableException;
import compbio.util.FileUtil;
import compbio.util.PropertyHelper;
import compbio.util.Util;

public final class ClusterSession {

	private static final Logger log = Logger.getLogger(ClusterSession.class);

	private static final PropertyHelper ph = PropertyHelperManager.getPropertyHelper();

	public static final String JOBID = "JOBID";
	// TaskId (getTaskDirectory()) -> ConfiguredExecutable<?> map
	// Cluster jobId is only stored in a file
	// static final Map<JobId, ConfiguredExecutable<?>> jobs = new
	// ConcurrentHashMap<JobId, ConfiguredExecutable<?>>();

	static final List<Job> jobs = new CopyOnWriteArrayList<Job>();
	private static boolean open = true;

	// Keep this at the end of other static initializers to avoid making
	// incomplete instance!
	private static final ClusterSession INSTANCE = new ClusterSession();

	private final Session session;
	// can be used in init method to reconnect the the session
	private final String sContact;

	// TODO deside whether the task list is needed!
	// private static BufferedWriter tasks;

	private ClusterSession() {
		log.debug("Initializing session " + Util.datef.format(Calendar.getInstance().getTime()));
		SessionFactory factory = SessionFactory.getFactory();
		session = factory.getSession();
		sContact = session.getContact();
		try {
			/*
			 * String tasksFileName = ph.getProperty("cluster.tasks.file"); File
			 * taskFile = new File(tasksFileName); if(!taskFile.exists()) {
			 * taskFile.createNewFile(); } tasks = new BufferedWriter(new
			 * PrintWriter(taskFile));
			 */
			session.init(null);
			// Make sure that the session is going to be properly closed
			Runtime.getRuntime().addShutdownHook(new Thread() {
				@Override
				public void run() {
					/*
					 * try { if(tasks!=null) { tasks.close(); } }
					 * catch(IOException e) { log.error(e.getMessage()); }
					 */
					close();
				}
			});

		} catch (DrmaaException e) {
			log.error(e.getMessage());
		}
		/*
		 * throw new RuntimeException("Could not create task file! " +
		 * "Please check that Engine.cluster.properties " +
		 * "file is provided and contains cluster.tasks.file property. " +
		 * "This property should contain the file name " +
		 * "for storing tasks ids! Cause: " + e.getMessage());
		 */

	}

	synchronized static ClusterSession getInstance() {
		return INSTANCE;
	}

	public Session getSession() {
		return INSTANCE.session;
	}

	public void close() {
		try {
			if (open) {
				session.exit();
				open = false;
				log.debug("Closing the session at: " + Util.datef.format(Calendar.getInstance().getTime()));
			}
		} catch (DrmaaException dre) {
			// Cannot recover at this point, just log
			dre.printStackTrace();
		}
	}

	void addJob(String jobId, ConfiguredExecutable<?> executable) {
		String taskDirectory = executable.getTaskId();
		assert !PathValidator.isValidDirectory(taskDirectory) : "Directory provided is not valid! Directory: "
				+ taskDirectory;
		assert !Util.isEmpty(jobId);

		EngineUtil.writeStatFile(executable.getWorkDirectory(), JobStatus.SUBMITTED.toString());
		EngineUtil.writeFile(executable.getWorkDirectory(), JOBID, jobId, false);
		log.debug("Adding taskId: " + taskDirectory + " to cluster job list");
		assert EngineUtil.isValidJobId(taskDirectory);
		jobs.add(new Job(taskDirectory, jobId, executable));
	}

	public void removeJob(String taskId) {
		assert !Util.isEmpty(taskId);
		assert EngineUtil.isValidJobId(taskId);
		removeJobFromListbyTaskId(taskId);
	}

	/*
	 * public List<JobInfo> waitForJobs(List<String> jobIds) throws
	 * DrmaaException { return waitForJobs(jobIds,
	 * Session.TIMEOUT_WAIT_FOREVER); }
	 * 
	 * public List<JobInfo> waitForJobs(List<String> jobIds, long waitingTime)
	 * throws DrmaaException { if (!open) { throw new
	 * IllegalStateException("Session is already closed!"); } assert jobIds !=
	 * null && jobIds.size() > 1;
	 * 
	 * session.synchronize(jobIds, waitingTime, false); List<JobInfo> jobsInfo =
	 * new ArrayList<JobInfo>(jobIds.size()); for (String jobId : jobIds) {
	 * jobsInfo.add(waitForJob(jobId)); } return jobsInfo; }
	 */

	/*
	 * public List<JobInfo> waitForAll() throws DrmaaException { assert
	 * jobs.size() > 0; return waitForJobs(new
	 * ArrayList<String>(ClusterSession.jobs.keySet())); }
	 * 
	 * 
	 * public void waitForAll_DropStatistics() throws DrmaaException { assert
	 * jobs.size() > 0; session.synchronize(new ArrayList<String>(Collections
	 * .unmodifiableCollection(ClusterSession.jobs.keySet())),
	 * Session.TIMEOUT_WAIT_FOREVER, true); }
	 */

	public JobInfo waitForJob(String taskId) throws DrmaaException, IOException {
		// String clusterJobId = ClusterSession.getClusterJobId(jobId);
		return waitForJob(taskId, Session.TIMEOUT_WAIT_FOREVER);
	}

	public static ClusterJobId getClusterJobId(String taskId) throws IOException {
		Job job = Job.getByTaskId(taskId, jobs);
		if (job != null) {
			return job.getJobId();
		}
		// The job must have been removed from the task list use work
		// directory to find out jobid
		String workDir = compbio.engine.Configurator.getWorkDirectory(taskId);
		assert !Util.isEmpty(workDir);
		File file = new File(workDir, JOBID);
		log.debug("Looking up cluster jobid by the task id " + taskId + " File path is " + file.getAbsolutePath());
		assert file.exists();
		return new ClusterJobId(FileUtil.readFileToString(file));
	}

	public JobInfo waitForJob(String jobId, long waitingTime) throws DrmaaException, IOException {
		ClusterJobId cjobId = getClusterJobId(jobId);
		JobInfo status = session.wait(cjobId.getJobId(), waitingTime);
		// Once the job has been waited for it will be finished
		// Next time it will not be found in the session, so removed from the
		// job list
		EngineUtil.writeStatFile(Configurator.getWorkDirectory(jobId), JobStatus.FINISHED.toString());

		return status;
	}

	private static void removeJobFromListbyTaskId(String taskId) {
		assert !Util.isEmpty(taskId);
		Job job = Job.getByTaskId(taskId, jobs);
		if (job != null) {
			log.debug("Removing taskId" + taskId + " from cluster job list");
			jobs.remove(job);
		}
	}

	public ConfiguredExecutable<?> getResults(String taskId) throws DrmaaException, ResultNotAvailableException {

		EngineUtil.isValidJobId(taskId);
		try {
			JobInfo status = waitForJob(taskId);
		} catch (InvalidJobException e) {
			// Its OK to continue, the job may have already completed normally
			log.warn("Could not find the cluster job with id " + taskId + " perhaps it has completed", e.getCause());
		} catch (IOException e) {
			log.error("Could not read JOBID file for the job " + taskId + " Message " + e.getLocalizedMessage(),
					e.getCause());
		}
		// Once the job has been waited for it will be finished
		// Next time it will not be found in the session, so removed from the
		// job list
		ConfiguredExecutable<?> exec = null;
		Job job = Job.getByTaskId(taskId, jobs);
		if (job != null) {
			exec = job.getConfExecutable();
			removeJobFromListbyTaskId(taskId);
		} else {
			// If task was not find in the list of jobs, than it must have been
			// collected already
			// Resurrect the job to find out there the output is
			exec = EngineUtil.loadExecutable(taskId);
		}
		if (exec != null) {
			EngineUtil.writeMarker(exec.getWorkDirectory(), JobStatus.COLLECTED);
		}
		return exec;
	}

	public static StatisticManager getStatistics(JobInfo status) throws DrmaaException {
		return new StatisticManager(status);
	}

	static void logStatistics(JobInfo status) throws DrmaaException {
		log.info(getStatistics(status).getAllStats());
	}

	/**
	 * Apparently completed jobs cannot be found! If this happened most likely
	 * that the job is not running any more and Most likely it has been
	 * cancelled, finished or failed.
	 * 
	 * @throws InvalidJobException
	 *             if the job is no longer in the queue or running. basically it
	 *             will throw this exception for all finished or cancelled jobs
	 */
	public int getJobStatus(ClusterJobId jobId) throws DrmaaException, InvalidJobException {
		return session.getJobProgramStatus(jobId.getJobId());
	}

	/**
	 * Method for getting jobs status by quering the cluster, It returns status
	 * in therms of a Sessions, not a JobStatus Should only be used for testing!
	 * 
	 * @param status
	 * @return job status
	 * @throws DrmaaException
	 */
	@Deprecated
	public static String getJobStatus(final int status) throws DrmaaException {
		String statusString = null;
		switch (status) {
		case Session.UNDETERMINED:
			statusString = "Job status cannot be determined\n";
			break;
		case Session.QUEUED_ACTIVE:
			statusString = "Job is queued and active\n";
			break;
		case Session.SYSTEM_ON_HOLD:
			statusString = "Job is queued and in system hold\n";
			break;
		case Session.USER_ON_HOLD:
			statusString = "Job is queued and in user hold\n";
			break;
		case Session.USER_SYSTEM_ON_HOLD:
			statusString = "Job is queued and in user and system hold\n";
			break;
		case Session.RUNNING:
			statusString = "Job is running\n";
			break;
		case Session.SYSTEM_SUSPENDED:
			statusString = "Job is system suspended\n";
			break;
		case Session.USER_SUSPENDED:
			statusString = "Job is user suspended\n";
			break;
		case Session.USER_SYSTEM_SUSPENDED:
			statusString = "Job is user and system suspended\n";
			break;
		case Session.DONE:
			statusString = "Job finished normally\n";
			break;
		case Session.FAILED:
			statusString = "Job finished, but failed\n";
			break;
		}
		return statusString;
	}

}