File: ad_opencoll.c

package info (click to toggle)
openmpi 2.0.2-2
  • links: PTS, VCS
  • area: main
  • in suites: stretch
  • size: 99,912 kB
  • ctags: 55,589
  • sloc: ansic: 525,999; f90: 18,307; makefile: 12,062; sh: 6,583; java: 6,278; asm: 3,515; cpp: 2,227; perl: 2,136; python: 1,350; lex: 734; fortran: 52; tcl: 12
file content (129 lines) | stat: -rw-r--r-- 4,776 bytes parent folder | download | duplicates (2)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil ; -*- */
/*
 *
 *   Copyright (C) 2007 UChicago/Argonne LLC
 *   See COPYRIGHT notice in top-level directory.
 */

#include "adio.h"

/* Generic version of a "collective open".  Assumes a "real" underlying
 * file system (meaning no wonky consistency semantics like NFS).
 *
 * optimization: by having just one process create a file, close it,
 * then have all N processes open it, we can possibly avoid contention
 * for write locks on a directory for some file systems.
 *
 * Happy side-effect: exclusive create (error if file already exists)
 * just falls out
 *
 * Note: this is not a "scalable open" (c.f. "The impact of file systems
 * on MPI-IO scalability").
 */

void ADIOI_GEN_OpenColl(ADIO_File fd, int rank,
	int access_mode, int *error_code)
{
    int orig_amode_excl, orig_amode_wronly;
    MPI_Comm tmp_comm;

    orig_amode_excl = access_mode;

    if (access_mode & ADIO_CREATE ){
       if(rank == fd->hints->ranklist[0]) {
	   /* remove delete_on_close flag if set */
	   if (access_mode & ADIO_DELETE_ON_CLOSE)
	       fd->access_mode = access_mode ^ ADIO_DELETE_ON_CLOSE;
	   else
	       fd->access_mode = access_mode;

	   tmp_comm = fd->comm;
	   fd->comm = MPI_COMM_SELF;
	   (*(fd->fns->ADIOI_xxx_Open))(fd, error_code);
	   fd->comm = tmp_comm;
	   MPI_Bcast(error_code, 1, MPI_INT, \
		     fd->hints->ranklist[0], fd->comm);
	   /* if no error, close the file and reopen normally below */
	   if (*error_code == MPI_SUCCESS)
	       (*(fd->fns->ADIOI_xxx_Close))(fd, error_code);

	   fd->access_mode = access_mode; /* back to original */
       }
       else MPI_Bcast(error_code, 1, MPI_INT, fd->hints->ranklist[0], fd->comm);

       if (*error_code != MPI_SUCCESS) {
	   return;
       }
       else {
           /* turn off CREAT (and EXCL if set) for real multi-processor open */
           access_mode ^= ADIO_CREATE;
	   if (access_mode & ADIO_EXCL)
		   access_mode ^= ADIO_EXCL;
       }
    }
    fd->blksize = 1024*1024*4; /* this large default value should be good for
				 most file systems.  any ROMIO driver is free
				 to stat the file and find an optimial value */

    /* if we are doing deferred open, non-aggregators should return now */
    if (fd->hints->deferred_open ) {
        if (!(fd->is_agg)) {
            /* we might have turned off EXCL for the aggregators.
             * restore access_mode that non-aggregators get the right
             * value from get_amode */
            fd->access_mode = orig_amode_excl;
	    /* In file-system specific open, a driver might collect some
	     * information via stat().  Deferred open means not every process
	     * participates in fs-specific open, but they all participate in
	     * this open call.  Broadcast a bit of information in case
	     * lower-level file system driver (e.g. 'bluegene') collected it
	     * (not all do)*/
	    MPI_Bcast(&(fd->blksize), 1, MPI_LONG, fd->hints->ranklist[0], fd->comm);
	    *error_code = MPI_SUCCESS;
	    ADIOI_Assert(fd->blksize > 0);
	    return;
	}
    }

/* For writing with data sieving, a read-modify-write is needed. If
   the file is opened for write_only, the read will fail. Therefore,
   if write_only, open the file as read_write, but record it as write_only
   in fd, so that get_amode returns the right answer. */

    /* observation from David Knaak: file systems that do not support data
     * sieving do not need to change the mode */

    orig_amode_wronly = access_mode;
    if ( (access_mode & ADIO_WRONLY) &&
	    ADIO_Feature(fd, ADIO_DATA_SIEVING_WRITES) ) {
	access_mode = access_mode ^ ADIO_WRONLY;
	access_mode = access_mode | ADIO_RDWR;
    }
    fd->access_mode = access_mode;

    (*(fd->fns->ADIOI_xxx_Open))(fd, error_code);

    /* if error, may be it was due to the change in amode above.
       therefore, reopen with access mode provided by the user.*/
    fd->access_mode = orig_amode_wronly;
    if (*error_code != MPI_SUCCESS)
        (*(fd->fns->ADIOI_xxx_Open))(fd, error_code);

    /* if we turned off EXCL earlier, then we should turn it back on */
    if (fd->access_mode != orig_amode_excl) fd->access_mode = orig_amode_excl;

    /* broadcast a bit of information (blocksize for now) to all proceses in
     * communicator, not just those who participated in open */
    MPI_Bcast(&(fd->blksize), 1, MPI_LONG, fd->hints->ranklist[0], fd->comm);
    /* file domain code will get terribly confused in a hard-to-debug way if
     * gpfs blocksize not sensible */
    ADIOI_Assert( fd->blksize > 0);
    /* for deferred open: this process has opened the file (because if we are
     * not an aggregaor and we are doing deferred open, we returned earlier)*/
    fd->is_open = 1;

}

/*
 * vim: ts=8 sts=4 sw=4 noexpandtab
 */