1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159
|
// RUN: %libomp-compile-and-run
// RUN: %libomp-compile && env KMP_TASKLOOP_MIN_TASKS=1 %libomp-run
#include <stdio.h>
#include <omp.h>
#include "omp_my_sleep.h"
#define N 4
#define GRAIN 10
#define STRIDE 3
// globals
int th_counter[N];
int counter;
// Compiler-generated code (emulation)
typedef struct ident {
void* dummy;
} ident_t;
typedef struct shar {
int(*pth_counter)[N];
int *pcounter;
int *pj;
} *pshareds;
typedef struct task {
pshareds shareds;
int(* routine)(int,struct task*);
int part_id;
// privates:
unsigned long long lb; // library always uses ULONG
unsigned long long ub;
int st;
int last;
int i;
int j;
int th;
} *ptask, kmp_task_t;
typedef int(* task_entry_t)( int, ptask );
void
__task_dup_entry(ptask task_dst, ptask task_src, int lastpriv)
{
// setup lastprivate flag
task_dst->last = lastpriv;
// could be constructor calls here...
}
// OpenMP RTL interfaces
typedef unsigned long long kmp_uint64;
typedef long long kmp_int64;
#ifdef __cplusplus
extern "C" {
#endif
void
__kmpc_taskloop(ident_t *loc, int gtid, kmp_task_t *task, int if_val,
kmp_uint64 *lb, kmp_uint64 *ub, kmp_int64 st,
int nogroup, int sched, kmp_int64 grainsize, void *task_dup );
ptask
__kmpc_omp_task_alloc( ident_t *loc, int gtid, int flags,
size_t sizeof_kmp_task_t, size_t sizeof_shareds,
task_entry_t task_entry );
void __kmpc_atomic_fixed4_add(void *id_ref, int gtid, int * lhs, int rhs);
int __kmpc_global_thread_num(void *id_ref);
#ifdef __cplusplus
}
#endif
// User's code
int task_entry(int gtid, ptask task)
{
pshareds pshar = task->shareds;
for( task->i = task->lb; task->i <= (int)task->ub; task->i += task->st ) {
task->th = omp_get_thread_num();
__kmpc_atomic_fixed4_add(NULL,gtid,pshar->pcounter,1);
__kmpc_atomic_fixed4_add(NULL,gtid,&((*pshar->pth_counter)[task->th]),1);
task->j = task->i;
}
my_sleep( 0.1 ); // sleep 100 ms in order to allow other threads to steal tasks
if( task->last ) {
*(pshar->pj) = task->j; // lastprivate
}
return 0;
}
int main()
{
int i, j, gtid = __kmpc_global_thread_num(NULL);
ptask task;
pshareds psh;
omp_set_dynamic(0);
counter = 0;
for( i=0; i<N; ++i )
th_counter[i] = 0;
#pragma omp parallel num_threads(N)
{
#pragma omp master
{
int gtid = __kmpc_global_thread_num(NULL);
/*
* This is what the OpenMP runtime calls correspond to:
#pragma omp taskloop num_tasks(N) lastprivate(j)
for( i=0; i<N*GRAIN*STRIDE-1; i+=STRIDE )
{
int th = omp_get_thread_num();
#pragma omp atomic
counter++;
#pragma omp atomic
th_counter[th]++;
j = i;
}
*/
task = __kmpc_omp_task_alloc(NULL,gtid,1,sizeof(struct task),sizeof(struct shar),&task_entry);
psh = task->shareds;
psh->pth_counter = &th_counter;
psh->pcounter = &counter;
psh->pj = &j;
task->lb = 0;
task->ub = N*GRAIN*STRIDE-2;
task->st = STRIDE;
__kmpc_taskloop(
NULL, // location
gtid, // gtid
task, // task structure
1, // if clause value
&task->lb, // lower bound
&task->ub, // upper bound
STRIDE, // loop increment
0, // 1 if nogroup specified
2, // schedule type: 0-none, 1-grainsize, 2-num_tasks
N, // schedule value (ignored for type 0)
(void*)&__task_dup_entry // tasks duplication routine
);
} // end master
} // end parallel
// check results
if( j != N*GRAIN*STRIDE-STRIDE ) {
printf("Error in lastprivate, %d != %d\n",j,N*GRAIN*STRIDE-STRIDE);
return 1;
}
if( counter != N*GRAIN ) {
printf("Error, counter %d != %d\n",counter,N*GRAIN);
return 1;
}
for( i=0; i<N; ++i ) {
if( th_counter[i] % GRAIN ) {
printf("Error, th_counter[%d] = %d\n",i,th_counter[i]);
return 1;
}
}
printf("passed\n");
return 0;
}
|