File: iof.h

package info (click to toggle)
openmpi 2.0.2-2
  • links: PTS, VCS
  • area: main
  • in suites: stretch
  • size: 99,912 kB
  • ctags: 55,589
  • sloc: ansic: 525,999; f90: 18,307; makefile: 12,062; sh: 6,583; java: 6,278; asm: 3,515; cpp: 2,227; perl: 2,136; python: 1,350; lex: 734; fortran: 52; tcl: 12
file content (233 lines) | stat: -rw-r--r-- 9,706 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */
/*
 * Copyright (c) 2004-2007 The Trustees of Indiana University and Indiana
 *                         University Research and Technology
 *                         Corporation.  All rights reserved.
 * Copyright (c) 2004-2005 The University of Tennessee and The University
 *                         of Tennessee Research Foundation.  All rights
 *                         reserved.
 * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
 *                         University of Stuttgart.  All rights reserved.
 * Copyright (c) 2004-2005 The Regents of the University of California.
 *                         All rights reserved.
 * Copyright (c) 2007-2008 Cisco Systems, Inc.  All rights reserved.
 * Copyright (c) 2012-2015 Los Alamos National Security, LLC. All rights
 *                         reserved.
 * Copyright (c) 2014      Intel, Inc. All rights reserved.
 * $COPYRIGHT$
 *
 * Additional copyrights may follow
 *
 * $HEADER$
 */
/**
 * @file
 *
 * I/O Forwarding Service
 * The I/O forwarding service (IOF) is used to connect stdin, stdout, and
 * stderr file descriptor streams from MPI processes to the user
 *
 * The design is fairly simple: when a proc is spawned, the IOF establishes
 * connections between its stdin, stdout, and stderr to a
 * corresponding IOF stream. In addition, the IOF designates a separate
 * stream for passing OMPI/ORTE internal diagnostic/help output to mpirun.
 * This is done specifically to separate such output from the user's
 * stdout/err - basically, it allows us to present it to the user in
 * a separate format for easier recognition. Data read from a source
 * on any stream (e.g., printed to stdout by the proc) is relayed
 * by the local daemon to the other end of the stream - i.e., stdin
 * is relayed to the local proc, while stdout/err is relayed to mpirun.
 * Thus, the eventual result is to connect ALL streams to/from
 * the application process and mpirun.
 *
 * Note: By default, data read from stdin is forwarded -only- to rank=0.
 * Stdin for all other procs is tied to "/dev/null".
 *
 * External tools can "pull" copies of stdout/err and
 * the diagnostic stream from mpirun for any process. In this case,
 * mpirun will send a copy of the output to the "pulling" process. Note that external tools
 * cannot "push" something into stdin unless the user specifically directed
 * that stdin remain open, nor under any conditions "pull" a copy of the
 * stdin being sent to rank=0.
 *
 * Tools can exploit either of two mechanisms for this purpose:
 *
 * (a) call orte_init themselves and utilize the ORTE tool comm
 *     library to access the IOF. This also provides access to
 *     other tool library functions - e.g., to order that a job
 *     be spawned; or
 *
 * (b) fork/exec the "orte-iof" tool and let it serve as the interface
 *     to mpirun. This lets the tool avoid calling orte_init, and means
 *     the tool will not have to compile against the ORTE/OMPI libraries.
 *     However, the orte-iof tool is limited solely to interfacing
 *     stdio and cannot be used for other functions included in
 *     the tool comm library
 *
 * Thus, mpirun acts as a "switchyard" for IO, taking input from stdin
 * and passing it to rank=0 of the job, and taking stdout/err/diag from all
 * ranks and passing it to its own stdout/err/diag plus any "pull"
 * requestors.
 *
 * Streams are identified by ORTE process name (to include wildcards,
 * such as "all processes in ORTE job X") and tag.  There are
 * currently only 4 allowed predefined tags:
 *
 * - ORTE_IOF_STDIN (value 0)
 * - ORTE_IOF_STDOUT (value 1)
 * - ORTE_IOF_STDERR (value 2)
 * - ORTE_IOF_INTERNAL (value 3): for "internal" messages
 *   from the infrastructure, just to differentiate them from user job
 *   stdout/stderr
 *
 * Note that since streams are identified by ORTE process name, the
 * caller has no idea whether the stream is on the local node or a
 * remote node -- it's just a stream.
 *
 * IOF components are selected on a "one of many" basis, meaning that
 * only one IOF component will be selected for a given process.
 * Details for the various components are given in their source code
 * bases.
 *
 * Each IOF component must support the following API:
 *
 * push: Tie a local file descriptor (*not* a stream!) to the stdin
 * of the specified process. If the user has not specified that stdin
 * of the specified process is to remain open, this will return an error.
 *
 * pull: Tie a local file descriptor (*not* a stream!) to a stream.
 * Subsequent input that appears via the stream will
 * automatically be sent to the target file descriptor until the
 * stream is "closed" or an EOF is received on the local file descriptor.
 * Valid source values include ORTE_IOF_STDOUT, ORTE_IOF_STDERR, and
 * ORTE_IOF_INTERNAL
 *
 * close: Closes a stream, flushing any pending data down it and
 * terminating any "push/pull" connections against it. Unclear yet
 * if this needs to be blocking, or can be done non-blocking.
 *
 * flush: Block until all pending data on all open streams has been
 * written down local file descriptors and/or completed sending across
 * the OOB to remote process targets.
 *
 */

#ifndef ORTE_IOF_H
#define ORTE_IOF_H

#include "orte_config.h"
#include "orte/types.h"

#include "orte/mca/mca.h"

#include "orte/runtime/orte_globals.h"

#include "iof_types.h"

BEGIN_C_DECLS

/* define a macro for requesting a proxy PULL of IO on
 * behalf of a tool that had the HNP spawn a job. First
 * argument is the orte_job_t of the spawned job, second
 * is a pointer to the name of the requesting tool */
#define ORTE_IOF_PROXY_PULL(a, b)                               \
    do {                                                        \
        opal_buffer_t *buf;                                     \
        orte_iof_tag_t tag;                                     \
        orte_process_name_t nm;                                 \
                                                                \
        buf = OBJ_NEW(opal_buffer_t);                           \
                                                                \
        /* setup the tag to pull from HNP */                    \
        tag = ORTE_IOF_STDOUTALL | ORTE_IOF_PULL;               \
        opal_dss.pack(buf, &tag, 1, ORTE_IOF_TAG);              \
        /* pack the name of the source we want to pull */       \
        nm.jobid = (a)->jobid;                                  \
        nm.vpid = ORTE_VPID_WILDCARD;                           \
        opal_dss.pack(buf, &nm, 1, ORTE_NAME);                  \
        /* pack the name of the tool */                         \
        opal_dss.pack(buf, (b), 1, ORTE_NAME);                  \
                                                                \
        /* send the buffer to the HNP */                        \
        orte_rml.send_buffer_nb(ORTE_PROC_MY_HNP, buf,          \
                                ORTE_RML_TAG_IOF_HNP,           \
                                orte_rml_send_callback, NULL);  \
    } while(0);

/* Initialize the selected module */
typedef int (*orte_iof_base_init_fn_t)(void);

/**
 * Explicitly push data from the specified input file descriptor to
 * the stdin of the indicated peer(s). The provided peer name can
 * include wildcard values.
 *
 * @param peer  Name of target peer(s)
 * @param fd    Local file descriptor for input.
 */
typedef int (*orte_iof_base_push_fn_t)(const orte_process_name_t* peer,
                                       orte_iof_tag_t src_tag, int fd);

/**
 * Explicitly pull data from the specified set of SOURCE peers and
 * dump to the indicated output file descriptor. Any fragments that
 * arrive on the stream will automatically be written down the fd.
 *
 * @param peer          Name used to qualify set of origin peers.
 * @param source_tag    Indicates the output streams to be forwarded
 * @param fd            Local file descriptor for output.
 */
typedef int (*orte_iof_base_pull_fn_t)(const orte_process_name_t* peer,
                                       orte_iof_tag_t source_tag,
                                       int fd);

/**
 * Close the specified iof stream(s) from the indicated peer(s)
 */
typedef int (*orte_iof_base_close_fn_t)(const orte_process_name_t* peer,
                                        orte_iof_tag_t source_tag);

/* Flag that a job is complete */
typedef void (*orte_iof_base_complete_fn_t)(const orte_job_t *jdata);

/* finalize the selected module */
typedef int (*orte_iof_base_finalize_fn_t)(void);

/**
 * FT Event Notification
 */
typedef int (*orte_iof_base_ft_event_fn_t)(int state);

/**
 *  IOF module.
 */
struct orte_iof_base_module_2_0_0_t {
    orte_iof_base_init_fn_t     init;
    orte_iof_base_push_fn_t     push;
    orte_iof_base_pull_fn_t     pull;
    orte_iof_base_close_fn_t    close;
    orte_iof_base_complete_fn_t complete;
    orte_iof_base_finalize_fn_t finalize;
    orte_iof_base_ft_event_fn_t ft_event;
};

typedef struct orte_iof_base_module_2_0_0_t orte_iof_base_module_2_0_0_t;
typedef orte_iof_base_module_2_0_0_t orte_iof_base_module_t;
ORTE_DECLSPEC extern orte_iof_base_module_t orte_iof;

struct orte_iof_base_component_2_0_0_t {
  mca_base_component_t iof_version;
  mca_base_component_data_t iof_data;
};
typedef struct orte_iof_base_component_2_0_0_t orte_iof_base_component_2_0_0_t;
typedef struct orte_iof_base_component_2_0_0_t orte_iof_base_component_t;

END_C_DECLS

/*
 * Macro for use in components that are of type iof
 */
#define ORTE_IOF_BASE_VERSION_2_0_0 \
    ORTE_MCA_BASE_VERSION_2_1_0("iof", 2, 0, 0)

#endif /* ORTE_IOF_H */