File: ad_iwrite.c

package info (click to toggle)
openmpi 5.0.8-3
  • links: PTS, VCS
  • area: main
  • in suites:
  • size: 201,692 kB
  • sloc: ansic: 613,078; makefile: 42,353; sh: 11,194; javascript: 9,244; f90: 7,052; java: 6,404; perl: 5,179; python: 1,859; lex: 740; fortran: 61; cpp: 20; tcl: 12
file content (345 lines) | stat: -rw-r--r-- 11,081 bytes parent folder | download | duplicates (7)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
/*
 * Copyright (C) by Argonne National Laboratory
 *     See COPYRIGHT in top-level directory
 */

#include "adio.h"

#ifdef HAVE_UNISTD_H
#include <unistd.h>
#endif
#ifdef HAVE_SIGNAL_H
#include <signal.h>
#endif
#ifdef HAVE_SYS_TYPES_H
#include <sys/types.h>
#endif
#ifdef HAVE_AIO_H
#include <aio.h>
#endif
#ifdef HAVE_SYS_AIO_H
#include <sys/aio.h>
#endif
#include <time.h>

#include "../../mpi-io/mpioimpl.h"
#ifdef MPIO_BUILD_PROFILING
#include "../../mpi-io/mpioprof.h"
#endif
#include "mpiu_greq.h"
/* Workaround for incomplete set of definitions if __REDIRECT is not
   defined and large file support is used in aio.h */
#if !defined(__REDIRECT) && defined(__USE_FILE_OFFSET64)
#define aiocb aiocb64
#endif

#ifdef ROMIO_HAVE_WORKING_AIO

static MPIX_Grequest_class ADIOI_GEN_greq_class = 0;

/* ADIOI_GEN_IwriteContig
 *
 * This code handles only the case where ROMIO_HAVE_WORKING_AIO is
 * defined. We post an asynchronous I/O operations using the appropriate aio
 * routines.  Otherwise, the ADIOI_Fns_struct will point to the FAKE
 * version.
 */
void ADIOI_GEN_IwriteContig(ADIO_File fd, const void *buf, int count,
                            MPI_Datatype datatype, int file_ptr_type,
                            ADIO_Offset offset, ADIO_Request * request, int *error_code)
{
    MPI_Count len, typesize;
    int aio_errno = 0;
    static char myname[] = "ADIOI_GEN_IWRITECONTIG";

    MPI_Type_size_x(datatype, &typesize);
    len = count * typesize;

    if (file_ptr_type == ADIO_INDIVIDUAL)
        offset = fd->fp_ind;
    /* Cast away the const'ness of 'buf' as ADIOI_GEN_aio is used for
     * both read and write calls */
    aio_errno = ADIOI_GEN_aio(fd, (char *) buf, count, datatype, offset, 1, request);
    if (file_ptr_type == ADIO_INDIVIDUAL)
        fd->fp_ind += len;

    fd->fp_sys_posn = -1;

    /* --BEGIN ERROR HANDLING-- */
    if (aio_errno != 0) {
        MPIO_ERR_CREATE_CODE_ERRNO(myname, aio_errno, error_code);
        return;
    }
    /* --END ERROR HANDLING-- */

    *error_code = MPI_SUCCESS;
}

/* This function is for implementation convenience.
 * It takes care of the differences in the interface for nonblocking I/O
 * on various Unix machines! If wr==1 write, wr==0 read.
 *
 * Returns 0 on success, -errno on failure.
 */
int ADIOI_GEN_aio(ADIO_File fd, void *buf, int count, MPI_Datatype type,
                  ADIO_Offset offset, int wr, MPI_Request * request)
{
    int err = -1, fd_sys;

    int error_code;
    struct aiocb *aiocbp = NULL;
    ADIOI_AIO_Request *aio_req = NULL;
    MPI_Status status;
    MPI_Count len, typesize;

    MPI_Type_size_x(type, &typesize);
    len = count * typesize;

#if defined(ROMIO_XFS)
    unsigned maxiosz = wr ? fd->hints->fs_hints.xfs.write_chunk_sz :
        fd->hints->fs_hints.xfs.read_chunk_sz;
#endif /* ROMIO_XFS */

    fd_sys = fd->fd_sys;

#if defined(ROMIO_XFS)
    /* Use Direct I/O if desired and properly aligned */
    if (fd->fns == &ADIO_XFS_operations &&
        ((wr && fd->direct_write) || (!wr && fd->direct_read)) &&
        !(((long) buf) % fd->d_mem) && !(offset % fd->d_miniosz) &&
        !(len % fd->d_miniosz) && (len >= fd->d_miniosz) && (len <= maxiosz)) {
        fd_sys = fd->fd_direct;
    }
#endif /* ROMIO_XFS */

    aio_req = (ADIOI_AIO_Request *) ADIOI_Calloc(sizeof(ADIOI_AIO_Request), 1);
    aiocbp = (struct aiocb *) ADIOI_Calloc(sizeof(struct aiocb), 1);
    aiocbp->aio_offset = offset;
    aiocbp->aio_buf = buf;
    aiocbp->aio_nbytes = len;

#ifdef HAVE_STRUCT_AIOCB_AIO_WHENCE
    aiocbp->aio_whence = SEEK_SET;
#endif
#ifdef HAVE_STRUCT_AIOCB_AIO_FILDES
    aiocbp->aio_fildes = fd_sys;
#endif
#ifdef HAVE_STRUCT_AIOCB_AIO_SIGEVENT
#ifdef AIO_SIGNOTIFY_NONE
    aiocbp->aio_sigevent.sigev_notify = SIGEV_NONE;
#endif
    aiocbp->aio_sigevent.sigev_signo = 0;
#endif
#ifdef HAVE_STRUCT_AIOCB_AIO_REQPRIO
#ifdef AIO_PRIO_DFL
    aiocbp->aio_reqprio = AIO_PRIO_DFL; /* not needed in DEC Unix 4.0 */
#else
    aiocbp->aio_reqprio = 0;
#endif
#endif

#ifndef ROMIO_HAVE_AIO_CALLS_NEED_FILEDES
#ifndef HAVE_STRUCT_AIOCB_AIO_FILDES
#error 'No fildes set for aio structure'
#endif
    if (wr)
        err = aio_write(aiocbp);
    else
        err = aio_read(aiocbp);
#else
    /* Broken IBM interface */
    if (wr)
        err = aio_write(fd_sys, aiocbp);
    else
        err = aio_read(fd_sys, aiocbp);
#endif

    if (err == -1) {
        if (errno == EAGAIN || errno == ENOSYS) {
            /* exceeded the max. no. of outstanding requests.
             * or, aio routines are not actually implemented
             * treat this as a blocking request and return.  */
            if (wr)
                ADIO_WriteContig(fd, buf, count, type,
                                 ADIO_EXPLICIT_OFFSET, offset, &status, &error_code);
            else
                ADIO_ReadContig(fd, buf, count, type,
                                ADIO_EXPLICIT_OFFSET, offset, &status, &error_code);

            MPIO_Completed_request_create(&fd, len, &error_code, request);
            if (aiocbp != NULL)
                ADIOI_Free(aiocbp);
            if (aio_req != NULL)
                ADIOI_Free(aio_req);
            return 0;
        } else {
            ADIOI_Free(aio_req);
            ADIOI_Free(aiocbp);
            return errno;
        }
    }
    aio_req->aiocbp = aiocbp;
    if (ADIOI_GEN_greq_class == 0) {
        MPIX_Grequest_class_create(ADIOI_GEN_aio_query_fn,
                                   ADIOI_GEN_aio_free_fn, MPIU_Greq_cancel_fn,
                                   ADIOI_GEN_aio_poll_fn, ADIOI_GEN_aio_wait_fn,
                                   &ADIOI_GEN_greq_class);
    }
    MPIX_Grequest_class_allocate(ADIOI_GEN_greq_class, aio_req, request);
    memcpy(&(aio_req->req), request, sizeof(MPI_Request));
    return 0;
}
#endif


/* Generic implementation of IwriteStrided calls the blocking WriteStrided
 * immediately.
 */
void ADIOI_GEN_IwriteStrided(ADIO_File fd, const void *buf, int count,
                             MPI_Datatype datatype, int file_ptr_type,
                             ADIO_Offset offset, MPI_Request * request, int *error_code)
{
    ADIO_Status status;
    MPI_Count typesize;
    MPI_Offset nbytes = 0;

    /* Call the blocking function.  It will create an error code
     * if necessary.
     */
    ADIO_WriteStrided(fd, buf, count, datatype, file_ptr_type, offset, &status, error_code);

    if (*error_code == MPI_SUCCESS) {
        MPI_Type_size_x(datatype, &typesize);
        nbytes = (MPI_Offset) count *(MPI_Offset) typesize;
    }
    MPIO_Completed_request_create(&fd, nbytes, error_code, request);
}

#ifdef ROMIO_HAVE_WORKING_AIO
/* generic POSIX aio completion test routine */
int ADIOI_GEN_aio_poll_fn(void *extra_state, MPI_Status * status)
{
    ADIOI_AIO_Request *aio_req;
    int errcode = MPI_SUCCESS;

    aio_req = (ADIOI_AIO_Request *) extra_state;

    /* aio_error returns an ERRNO value */
    errno = aio_error(aio_req->aiocbp);
    if (errno == EINPROGRESS) {
        /* TODO: need to diddle with status somehow */
    } else if (errno == ECANCELED) {
        /* TODO: unsure how to handle this */
    } else if (errno == 0) {
        ssize_t n = aio_return(aio_req->aiocbp);
        aio_req->nbytes = n;
        errcode = MPI_Grequest_complete(aio_req->req);
        /* --BEGIN ERROR HANDLING-- */
        if (errcode != MPI_SUCCESS) {
            errcode = MPIO_Err_create_code(MPI_SUCCESS,
                                           MPIR_ERR_RECOVERABLE,
                                           "ADIOI_GEN_aio_poll_fn", __LINE__,
                                           MPI_ERR_IO, "**mpi_grequest_complete", 0);
        }
        /* --END ERROR HANDLING-- */
    }
    return errcode;
}

/* wait for multiple requests to complete */
int ADIOI_GEN_aio_wait_fn(int count, void **array_of_states, double timeout, MPI_Status * status)
{
    const struct aiocb **cblist;
    int err, errcode = MPI_SUCCESS;
    int nr_complete = 0;
    double starttime;
    struct timespec aio_timer;
    struct timespec *aio_timer_p = NULL;

    ADIOI_AIO_Request **aio_reqlist;
    int i;

    aio_reqlist = (ADIOI_AIO_Request **) array_of_states;

    cblist = (const struct aiocb **) ADIOI_Calloc(count, sizeof(struct aiocb *));

    starttime = MPI_Wtime();
    if (timeout > 0) {
        aio_timer.tv_sec = (time_t) timeout;
        aio_timer.tv_nsec = timeout - aio_timer.tv_sec;
        aio_timer_p = &aio_timer;
    }
    for (i = 0; i < count; i++) {
        cblist[i] = aio_reqlist[i]->aiocbp;
    }

    while (nr_complete < count) {
        do {
            err = aio_suspend(cblist, count, aio_timer_p);
        } while (err < 0 && errno == EINTR);
        if (err == 0) { /* run through the list of requests, and mark all the completed
                         * ones as done */
            for (i = 0; i < count; i++) {
                /* aio_error returns an ERRNO value */
                if (aio_reqlist[i]->aiocbp == NULL)
                    continue;
                errno = aio_error(aio_reqlist[i]->aiocbp);
                if (errno == 0) {
                    ssize_t n = aio_return(aio_reqlist[i]->aiocbp);
                    aio_reqlist[i]->nbytes = n;
                    errcode = MPI_Grequest_complete(aio_reqlist[i]->req);
                    if (errcode != MPI_SUCCESS) {
                        errcode = MPIO_Err_create_code(MPI_SUCCESS,
                                                       MPIR_ERR_RECOVERABLE,
                                                       "ADIOI_GEN_aio_wait_fn",
                                                       __LINE__, MPI_ERR_IO,
                                                       "**mpi_grequest_complete", 0);
                    }
                    ADIOI_Free(aio_reqlist[i]->aiocbp);
                    aio_reqlist[i]->aiocbp = NULL;
                    cblist[i] = NULL;
                    nr_complete++;
                }
                /* TODO: need to handle error conditions somehow */
            }
        }       /* TODO: also need to handle errors here  */
        if ((timeout > 0) && (timeout < (MPI_Wtime() - starttime)))
            break;
    }

    if (cblist != NULL)
        ADIOI_Free(cblist);
    return errcode;
}

int ADIOI_GEN_aio_free_fn(void *extra_state)
{
    ADIOI_AIO_Request *aio_req;
    aio_req = (ADIOI_AIO_Request *) extra_state;

    if (aio_req->aiocbp != NULL)
        ADIOI_Free(aio_req->aiocbp);
    ADIOI_Free(aio_req);

    return MPI_SUCCESS;
}
#endif /* working AIO */

int ADIOI_GEN_aio_query_fn(void *extra_state, MPI_Status * status)
{
    ADIOI_AIO_Request *aio_req;

    aio_req = (ADIOI_AIO_Request *) extra_state;

    MPI_Status_set_elements_x(status, MPI_BYTE, aio_req->nbytes);

    /* can never cancel so always true */
    MPI_Status_set_cancelled(status, 0);

    /* choose not to return a value for this */
    status->MPI_SOURCE = MPI_UNDEFINED;
    /* tag has no meaning for this generalized request */
    status->MPI_TAG = MPI_UNDEFINED;
    /* this generalized request never fails */
    return MPI_SUCCESS;
}