File: stencil.h

package info (click to toggle)
starpu-contrib 1.0.1%2Bdfsg-1
  • links: PTS, VCS
  • area: contrib
  • in suites: wheezy
  • size: 13,836 kB
  • sloc: ansic: 77,357; cpp: 23,334; sh: 12,088; makefile: 2,086; lisp: 758; yacc: 185; sed: 126; fortran: 13
file content (153 lines) | stat: -rw-r--r-- 4,499 bytes parent folder | download | duplicates (2)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
/* StarPU --- Runtime system for heterogeneous multicore architectures.
 *
 * Copyright (C) 2010, 2011  Centre National de la Recherche Scientifique
 * Copyright (C) 2010-2011  Université de Bordeaux 1
 *
 * StarPU is free software; you can redistribute it and/or modify
 * it under the terms of the GNU Lesser General Public License as published by
 * the Free Software Foundation; either version 2.1 of the License, or (at
 * your option) any later version.
 *
 * StarPU is distributed in the hope that it will be useful, but
 * WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
 *
 * See the GNU Lesser General Public License in COPYING.LGPL for more details.
 */

#ifndef __STENCIL_H__
#define __STENCIL_H__

#include <stdlib.h>
#include <stdio.h>
#include <starpu.h>
#include <starpu_top.h>
#ifdef STARPU_USE_CUDA
#include <starpu_cuda.h>
#endif

#ifndef __CUDACC__
#ifdef STARPU_USE_MPI
#include <mpi.h>
#include <starpu_mpi.h>
#endif
#endif

#define LIFE

#ifdef LIFE
#define TYPE	unsigned char
extern void life_update(int bz, const TYPE *old, TYPE *newp, int nx, int ny, int nz, int ldy, int ldz, int iter);
#else
#define TYPE	float
#endif

#define K	1

#define NDIRS 2
extern struct starpu_top_data* starpu_top_init_loop;
extern struct starpu_top_data* starpu_top_achieved_loop;


/* Split only on the z axis to make things simple */
typedef enum
{
	B = 0,
	T = 1
} direction;

/* Description of a domain block */
struct block_description
{
	/* Which MPI node should process that block ? */
	unsigned mpi_node;
	
	unsigned preferred_worker;

	unsigned bz;


	/* For each of the following buffers, there are two (0/1) buffers to
	 * make new/old switch costless. */

	/* This is the computation buffer for this block, it includes
	 * neighbours' border to make computation easier */
	TYPE *layers[2];
	starpu_data_handle_t layers_handle[2];

	/* This is the "save" buffer, i.e. a copy of our neighbour's border.
	 * This one is used for CPU/GPU or MPI communication (rather than the
	 * whole domain block) */
	TYPE *boundaries[NDIRS][2];
	starpu_data_handle_t boundaries_handle[NDIRS][2];

	/* Shortcut pointer to the neighbours */
	struct block_description *boundary_blocks[NDIRS];
};

#define TAG_INIT_TASK			((starpu_tag_t)1)

starpu_tag_t TAG_FINISH(int z);
starpu_tag_t TAG_START(int z, int dir);
int MPI_TAG0(int z, int iter, int dir);
int MPI_TAG1(int z, int iter, int dir);

#define MIN(a,b)	((a)<(b)?(a):(b))

void create_blocks_array(unsigned sizex, unsigned sizey, unsigned sizez, unsigned nbz);
struct block_description *get_block_description(int z);
void assign_blocks_to_mpi_nodes(int world_size);
void allocate_memory_on_node(int rank);
void assign_blocks_to_workers(int rank);
void create_tasks(int rank);
void wait_end_tasks(int rank);
void check(int rank);

void display_memory_consumption(int rank);

unsigned get_block_mpi_node(int z);
unsigned get_block_size(int z);
unsigned get_bind_tasks(void);

unsigned get_nbz(void);
unsigned get_niter(void);
unsigned get_ticks(void);

unsigned global_workerid(unsigned local_workerid);

void create_task_update(unsigned iter, unsigned z, unsigned local_rank);
void create_task_save(unsigned iter, unsigned z, int dir, unsigned local_rank);

extern int starpu_mpi_initialize(void);
extern int starpu_mpi_shutdown(void);

/* kernels */
extern struct starpu_codelet cl_update;
extern struct starpu_codelet save_cl_bottom;
extern struct starpu_codelet save_cl_top;

extern unsigned update_per_worker[STARPU_NMAXWORKERS];
extern unsigned top_per_worker[STARPU_NMAXWORKERS];
extern unsigned bottom_per_worker[STARPU_NMAXWORKERS];

extern struct timeval start;
extern int who_runs_what_len;
extern int *who_runs_what;
extern int *who_runs_what_index;
extern struct timeval *last_tick;

#ifndef _externC
#define _externC
#endif

_externC void cuda_life_update_host(int bz, const TYPE *old, TYPE *newp, int nx, int ny, int nz, int ldy, int ldz, int iter);
_externC void cuda_shadow_host(int bz, TYPE *ptr, int nx, int ny, int nz, int ldy, int ldz, int i);

_externC void opencl_shadow_init(void);
_externC void opencl_shadow_free(void);
_externC void opencl_shadow_host(int bz, TYPE *ptr, int nx, int ny, int nz, int ldy, int ldz, int i);
_externC void opencl_life_init(void);
_externC void opencl_life_free(void);
_externC void opencl_life_update_host(int bz, const TYPE *old, TYPE *newp, int nx, int ny, int nz, int ldy, int ldz, int iter);

#endif /* __STENCIL_H__ */