Actual source code: threadcomm.c
petsc-3.4.2 2013-07-02
1: #include <petsc-private/threadcommimpl.h> /*I "petscthreadcomm.h" I*/
2: #include <petscviewer.h>
3: #if defined(PETSC_HAVE_MALLOC_H)
4: #include <malloc.h>
5: #endif
7: static PetscInt N_CORES = -1;
8: PetscBool PetscThreadCommRegisterAllCalled = PETSC_FALSE;
9: PetscFunctionList PetscThreadCommList = NULL;
10: PetscMPIInt Petsc_ThreadComm_keyval = MPI_KEYVAL_INVALID;
11: PetscThreadCommJobQueue PetscJobQueue = NULL;
12: PetscThreadComm PETSC_THREAD_COMM_WORLD = NULL;
14: /* Logging support */
15: PetscLogEvent ThreadComm_RunKernel, ThreadComm_Barrier;
17: static PetscErrorCode PetscThreadCommRunKernel0_Private(PetscThreadComm tcomm,PetscErrorCode (*func)(PetscInt,...));
21: /*@
22: PetscGetNCores - Gets the number of available cores on the system
24: Not Collective
26: Level: developer
28: Notes
29: Defaults to 1 if the available core count cannot be found
31: @*/
32: PetscErrorCode PetscGetNCores(PetscInt *ncores)
33: {
35: if (N_CORES == -1) {
36: N_CORES = 1; /* Default value if number of cores cannot be found out */
38: #if defined(PETSC_HAVE_SYS_SYSINFO_H) && (PETSC_HAVE_GET_NPROCS) /* Linux */
39: N_CORES = get_nprocs();
40: #elif defined(PETSC_HAVE_SYS_SYSCTL_H) && (PETSC_HAVE_SYSCTLBYNAME) /* MacOS, BSD */
41: {
43: size_t len = sizeof(N_CORES);
44: sysctlbyname("hw.activecpu",&N_CORES,&len,NULL,0); /* osx preferes activecpu over ncpu */
45: if (ierr) { /* freebsd check ncpu */
46: sysctlbyname("hw.ncpu",&N_CORES,&len,NULL,0);
47: /* continue even if there is an error */
48: }
49: }
50: #elif defined(PETSC_HAVE_WINDOWS_H) /* Windows */
51: {
52: SYSTEM_INFO sysinfo;
53: GetSystemInfo(&sysinfo);
54: N_CORES = sysinfo.dwNumberOfProcessors;
55: }
56: #endif
57: }
58: if (ncores) *ncores = N_CORES;
59: return(0);
60: }
62: PetscErrorCode PetscThreadCommWorldInitialize();
65: /*
66: PetscGetThreadCommWorld - Gets the global thread communicator.
67: Creates it if it does not exist already.
69: Not Collective
71: Output Parameters:
72: tcommp - pointer to the global thread communicator
74: Level: Intermediate
75: */
76: PetscErrorCode PetscGetThreadCommWorld(PetscThreadComm *tcommp)
77: {
81: if (!PETSC_THREAD_COMM_WORLD) {
82: PetscThreadCommWorldInitialize();
83: }
84: *tcommp = PETSC_THREAD_COMM_WORLD;
85: return(0);
86: }
90: /*@C
91: PetscCommGetThreadComm - Gets the thread communicator
92: associated with the MPI communicator
94: Not Collective
96: Input Parameters:
97: . comm - the MPI communicator
99: Output Parameters:
100: . tcommp - pointer to the thread communicator
102: Notes: If no thread communicator is on the MPI_Comm then the global thread communicator
103: is returned.
104: Level: Intermediate
106: .seealso: PetscThreadCommCreate(), PetscThreadCommDestroy()
107: @*/
108: PetscErrorCode PetscCommGetThreadComm(MPI_Comm comm,PetscThreadComm *tcommp)
109: {
111: PetscMPIInt flg;
112: void *ptr;
115: MPI_Attr_get(comm,Petsc_ThreadComm_keyval,(PetscThreadComm*)&ptr,&flg);
116: if (!flg) {
117: PetscGetThreadCommWorld(tcommp);
118: } else *tcommp = (PetscThreadComm)ptr;
119: return(0);
120: }
124: /*
125: PetscThreadCommCreate - Allocates a thread communicator object
127: Not Collective
129: Output Parameters:
130: . tcomm - pointer to the thread communicator object
132: Level: developer
134: .seealso: PetscThreadCommDestroy()
135: */
136: PetscErrorCode PetscThreadCommCreate(PetscThreadComm *tcomm)
137: {
138: PetscErrorCode ierr;
139: PetscThreadComm tcommout;
144: *tcomm = NULL;
146: PetscNew(struct _p_PetscThreadComm,&tcommout);
147: tcommout->refct = 0;
148: tcommout->nworkThreads = -1;
149: tcommout->affinities = NULL;
150: PetscNew(struct _PetscThreadCommOps,&tcommout->ops);
151: tcommout->leader = 0;
152: *tcomm = tcommout;
154: return(0);
155: }
157: #if defined(PETSC_USE_DEBUG)
159: PetscErrorCode PetscThreadCommStackCreate_kernel(PetscInt trank)
160: {
161: PetscStack *petscstack_in;
162: if (!trank && PetscStackActive()) return 0;
164: petscstack_in = (PetscStack*)malloc(sizeof(PetscStack));
165: petscstack_in->currentsize = 0;
166: PetscThreadLocalSetValue((PetscThreadKey*)&petscstack,petscstack_in);
167: return 0;
168: }
170: /* Creates stack frames for threads other than the main thread */
173: PetscErrorCode PetscThreadCommStackCreate(void)
174: {
177: PetscThreadCommRunKernel0(PETSC_COMM_SELF,(PetscThreadKernel)PetscThreadCommStackCreate_kernel);
178: PetscThreadCommBarrier(PETSC_COMM_SELF);
179: return 0;
180: }
182: PetscErrorCode PetscThreadCommStackDestroy_kernel(PetscInt trank)
183: {
184: if (trank && PetscStackActive()) {
185: PetscStack *petscstack_in;
186: petscstack_in = (PetscStack*)PetscThreadLocalGetValue(petscstack);
187: free(petscstack_in);
188: PetscThreadLocalSetValue((PetscThreadKey*)&petscstack,(PetscStack*)0);
189: }
190: return 0;
191: }
195: /* Destroy stack frames for threads other than main thread
196: *
197: * The keyval may have been destroyed by the time this function is called, thus we must call
198: * PetscThreadCommRunKernel0_Private so that we never reference an MPI_Comm.
199: */
200: PetscErrorCode PetscThreadCommStackDestroy(void)
201: {
204: PetscThreadCommRunKernel0_Private(PETSC_THREAD_COMM_WORLD,(PetscThreadKernel)PetscThreadCommStackDestroy_kernel);
205: PETSC_THREAD_COMM_WORLD = NULL;
206: return(0);
207: return 0;
208: }
209: #else
212: PetscErrorCode PetscThreadCommStackCreate(void)
213: {
215: return(0);
216: }
220: PetscErrorCode PetscThreadCommStackDestroy(void)
221: {
223: PETSC_THREAD_COMM_WORLD = NULL;
224: return(0);
225: }
227: #endif
231: /*
232: PetscThreadCommDestroy - Frees a thread communicator object
234: Not Collective
236: Input Parameters:
237: . tcomm - the PetscThreadComm object
239: Level: developer
241: .seealso: PetscThreadCommCreate()
242: */
243: PetscErrorCode PetscThreadCommDestroy(PetscThreadComm *tcomm)
244: {
248: if (!*tcomm) return(0);
249: if (!--(*tcomm)->refct) {
250: PetscThreadCommStackDestroy();
251: /* Destroy the implementation specific data struct */
252: if ((*tcomm)->ops->destroy) (*(*tcomm)->ops->destroy)(*tcomm);
254: PetscFree((*tcomm)->affinities);
255: PetscFree((*tcomm)->ops);
256: PetscFree(PetscJobQueue->jobs[0].job_status);
257: PetscFree(PetscJobQueue->jobs);
258: PetscFree(PetscJobQueue);
259: PetscThreadCommReductionDestroy((*tcomm)->red);
260: PetscFree((*tcomm));
261: }
262: *tcomm = NULL;
263: return(0);
264: }
268: /*@C
269: PetscThreadCommView - view a thread communicator
271: Collective on comm
273: Input Parameters:
274: + comm - MPI communicator
275: - viewer - viewer to display, for example PETSC_VIEWER_STDOUT_WORLD
277: Level: developer
279: .seealso: PetscThreadCommCreate()
280: @*/
281: PetscErrorCode PetscThreadCommView(MPI_Comm comm,PetscViewer viewer)
282: {
283: PetscErrorCode ierr;
284: PetscBool iascii;
285: PetscThreadComm tcomm=0;
288: PetscCommGetThreadComm(comm,&tcomm);
289: if (!viewer) {PetscViewerASCIIGetStdout(comm,&viewer);}
290: PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);
291: if (iascii) {
292: PetscViewerASCIIPrintf(viewer,"Thread Communicator\n");
293: PetscViewerASCIIPushTab(viewer);
294: PetscViewerASCIIPrintf(viewer,"Number of threads = %D\n",tcomm->nworkThreads);
295: PetscViewerASCIIPrintf(viewer,"Type = %s\n",tcomm->type);
296: PetscViewerASCIIPopTab(viewer);
297: if (tcomm->ops->view) {
298: PetscViewerASCIIPushTab(viewer);
299: (*tcomm->ops->view)(tcomm,viewer);
300: PetscViewerASCIIPopTab(viewer);
301: }
302: }
303: return(0);
304: }
308: /*
309: PetscThreadCommSetNThreads - Set the thread count for the thread communicator
311: Not collective
313: Input Parameters:
314: + tcomm - the thread communicator
315: - nthreads - Number of threads
317: Options Database keys:
318: -threadcomm_nthreads <nthreads> Number of threads to use
320: Level: developer
322: Notes:
323: Defaults to using 1 thread.
325: Use nthreads = PETSC_DECIDE or -threadcomm_nthreads PETSC_DECIDE for PETSc to decide the number of threads.
328: .seealso: PetscThreadCommGetNThreads()
329: */
330: PetscErrorCode PetscThreadCommSetNThreads(PetscThreadComm tcomm,PetscInt nthreads)
331: {
333: PetscBool flg;
334: PetscInt nthr;
337: if (nthreads == PETSC_DECIDE) {
338: tcomm->nworkThreads = 1;
339: PetscOptionsBegin(PETSC_COMM_WORLD,NULL,"Thread comm - setting number of threads",NULL);
340: PetscOptionsInt("-threadcomm_nthreads","number of threads to use in the thread communicator","PetscThreadCommSetNThreads",1,&nthr,&flg);
341: PetscOptionsEnd();
342: if (flg) {
343: if (nthr == PETSC_DECIDE) tcomm->nworkThreads = N_CORES;
344: else tcomm->nworkThreads = nthr;
345: }
346: } else tcomm->nworkThreads = nthreads;
347: return(0);
348: }
352: /*@C
353: PetscThreadCommGetNThreads - Gets the thread count from the thread communicator
354: associated with the MPI communicator
356: Not collective
358: Input Parameters:
359: . comm - the MPI communicator
361: Output Parameters:
362: . nthreads - number of threads
364: Level: developer
366: .seealso: PetscThreadCommSetNThreads()
367: @*/
368: PetscErrorCode PetscThreadCommGetNThreads(MPI_Comm comm,PetscInt *nthreads)
369: {
370: PetscErrorCode ierr;
371: PetscThreadComm tcomm=0;
374: PetscCommGetThreadComm(comm,&tcomm);
375: *nthreads = tcomm->nworkThreads;
376: return(0);
377: }
381: /*
382: PetscThreadCommSetAffinities - Sets the core affinity for threads
383: (which threads run on which cores)
385: Not collective
387: Input Parameters:
388: + tcomm - the thread communicator
389: - affinities - array of core affinity for threads
391: Options Database keys:
392: . -threadcomm_affinities <list of thread affinities>
394: Level: developer
396: Notes:
397: Use affinities = NULL for PETSc to decide the affinities.
398: If PETSc decides affinities, then each thread has affinity to
399: a unique core with the main thread on Core 0, thread0 on core 1,
400: and so on. If the thread count is more the number of available
401: cores then multiple threads share a core.
403: The first value is the affinity for the main thread
405: The affinity list can be passed as
406: a comma seperated list: 0,1,2,3,4,5,6,7
407: a range (start-end+1): 0-8
408: a range with given increment (start-end+1:inc): 0-7:2
409: a combination of values and ranges seperated by commas: 0,1-8,8-15:2
411: There must be no intervening spaces between the values.
413: .seealso: PetscThreadCommGetAffinities(), PetscThreadCommSetNThreads()
414: */
415: PetscErrorCode PetscThreadCommSetAffinities(PetscThreadComm tcomm,const PetscInt affinities[])
416: {
418: PetscBool flg;
419: PetscInt nmax=tcomm->nworkThreads;
422: /* Free if affinities set already */
423: PetscFree(tcomm->affinities);
424: PetscMalloc(tcomm->nworkThreads*sizeof(PetscInt),&tcomm->affinities);
426: if (!affinities) {
427: /* Check if option is present in the options database */
428: PetscOptionsBegin(PETSC_COMM_WORLD,NULL,"Thread comm - setting thread affinities",NULL);
429: PetscOptionsIntArray("-threadcomm_affinities","Set core affinities of threads","PetscThreadCommSetAffinities",tcomm->affinities,&nmax,&flg);
430: PetscOptionsEnd();
431: if (flg) {
432: if (nmax != tcomm->nworkThreads) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Must set affinities for all threads, Threads = %D, Core affinities set = %D",tcomm->nworkThreads,nmax);
433: } else {
434: /* PETSc default affinities */
435: PetscInt i;
436: for (i=0; i<tcomm->nworkThreads; i++) tcomm->affinities[i] = i%N_CORES;
437: }
438: } else {
439: PetscMemcpy(tcomm->affinities,affinities,tcomm->nworkThreads*sizeof(PetscInt));
440: }
441: return(0);
442: }
446: /*@C
447: PetscThreadCommGetAffinities - Returns the core affinities set for the
448: thread communicator associated with the MPI_Comm
450: Not collective
452: Input Parameters:
453: . comm - MPI communicator
455: Output Parameters:
456: . affinities - thread affinities
458: Level: developer
460: Notes:
461: The user must allocate space (nthreads PetscInts) for the
462: affinities. Must call PetscThreadCommSetAffinities before.
464: */
465: PetscErrorCode PetscThreadCommGetAffinities(MPI_Comm comm,PetscInt affinities[])
466: {
467: PetscErrorCode ierr;
468: PetscThreadComm tcomm=0;
471: PetscCommGetThreadComm(comm,&tcomm);
473: PetscMemcpy(affinities,tcomm->affinities,tcomm->nworkThreads*sizeof(PetscInt));
474: return(0);
475: }
479: /*
480: PetscThreadCommSetType - Sets the threading model for the thread communicator
482: Logically collective
484: Input Parameters:
485: + tcomm - the thread communicator
486: - type - the type of thread model needed
489: Options Database keys:
490: -threadcomm_type <type>
492: Available types
493: See "petsc/include/petscthreadcomm.h" for available types
495: */
496: PetscErrorCode PetscThreadCommSetType(PetscThreadComm tcomm,PetscThreadCommType type)
497: {
498: PetscErrorCode ierr,(*r)(PetscThreadComm);
499: char ttype[256];
500: PetscBool flg;
504: if (!PetscThreadCommRegisterAllCalled) { PetscThreadCommRegisterAll();}
506: PetscOptionsBegin(PETSC_COMM_WORLD,NULL,"Thread comm - setting threading model",NULL);
507: PetscOptionsList("-threadcomm_type","Thread communicator model","PetscThreadCommSetType",PetscThreadCommList,type,ttype,256,&flg);
508: PetscOptionsEnd();
509: if (!flg) {
510: PetscStrcpy(ttype,type);
511: }
512: PetscFunctionListFind(PetscThreadCommList,ttype,&r);
513: if (!r) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_UNKNOWN_TYPE,"Unable to find requested PetscThreadComm type %s",ttype);
514: (*r)(tcomm);
515: PetscStrcmp(NOTHREAD,tcomm->type,&tcomm->isnothread);
516: return(0);
517: }
521: /* PetscThreadCommBarrier - Apply a barrier on the thread communicator
522: associated with the MPI communicator
524: Not collective
526: Input Parameters:
527: . comm - the MPI communicator
529: Level: developer
531: Notes:
532: This routine provides an interface to put an explicit barrier between
533: successive kernel calls to ensure that the first kernel is executed
534: by all the threads before calling the next one.
536: Called by the main thread only.
538: May not be applicable to all types.
539: */
540: PetscErrorCode PetscThreadCommBarrier(MPI_Comm comm)
541: {
542: PetscErrorCode ierr;
543: PetscThreadComm tcomm=0;
546: PetscLogEventBegin(ThreadComm_Barrier,0,0,0,0);
547: PetscCommGetThreadComm(comm,&tcomm);
548: if (tcomm->ops->barrier) {
549: (*tcomm->ops->barrier)(tcomm);
550: }
551: PetscLogEventEnd(ThreadComm_Barrier,0,0,0,0);
552: return(0);
553: }
557: /*@C
558: PetscThreadCommRegister -
560: Level: advanced
561: @*/
562: PetscErrorCode PetscThreadCommRegister(const char sname[],PetscErrorCode (*function)(PetscThreadComm))
563: {
567: PetscFunctionListAdd(&PetscThreadCommList,sname,function);
568: return(0);
569: }
573: /*@C
574: PetscThreadCommGetScalars - Gets pointers to locations for storing three PetscScalars that may be passed
575: to PetscThreadCommRunKernel to ensure that the scalar values remain valid
576: even after the main thread exits the calling function.
578: Input Parameters:
579: + comm - the MPI communicator having the thread communicator
580: . val1 - pointer to store the first scalar value
581: . val2 - pointer to store the second scalar value
582: - val3 - pointer to store the third scalar value
584: Level: developer
586: Notes:
587: This is a utility function to ensure that any scalars passed to PetscThreadCommRunKernel remain
588: valid even after the main thread exits the calling function. If any scalars need to passed to
589: PetscThreadCommRunKernel then these should be first stored in the locations provided by PetscThreadCommGetScalars()
591: Pass NULL if any pointers are not needed.
593: Called by the main thread only, not from within kernels
595: Typical usage:
597: PetscScalar *valptr;
598: PetscThreadCommGetScalars(comm,&valptr,NULL,NULL);
599: *valptr = alpha; (alpha is the scalar you wish to pass in PetscThreadCommRunKernel)
601: PetscThreadCommRunKernel(comm,(PetscThreadKernel)kernel_func,3,x,y,valptr);
603: .seealso: PetscThreadCommRunKernel()
604: @*/
605: PetscErrorCode PetscThreadCommGetScalars(MPI_Comm comm,PetscScalar **val1, PetscScalar **val2, PetscScalar **val3)
606: {
607: PetscErrorCode ierr;
608: PetscThreadComm tcomm;
609: PetscThreadCommJobCtx job;
610: PetscInt job_num;
613: PetscCommGetThreadComm(comm,&tcomm);
614: job_num = PetscJobQueue->ctr%tcomm->nkernels;
615: job = &PetscJobQueue->jobs[job_num];
616: if (val1) *val1 = &job->scalars[0];
617: if (val2) *val2 = &job->scalars[1];
618: if (val3) *val3 = &job->scalars[2];
619: return(0);
620: }
624: /*@C
625: PetscThreadCommGetInts - Gets pointers to locations for storing three PetscInts that may be passed
626: to PetscThreadCommRunKernel to ensure that the scalar values remain valid
627: even after the main thread exits the calling function.
629: Input Parameters:
630: + comm - the MPI communicator having the thread communicator
631: . val1 - pointer to store the first integer value
632: . val2 - pointer to store the second integer value
633: - val3 - pointer to store the third integer value
635: Level: developer
637: Notes:
638: This is a utility function to ensure that any scalars passed to PetscThreadCommRunKernel remain
639: valid even after the main thread exits the calling function. If any scalars need to passed to
640: PetscThreadCommRunKernel then these should be first stored in the locations provided by PetscThreadCommGetInts()
642: Pass NULL if any pointers are not needed.
644: Called by the main thread only, not from within kernels
646: Typical usage:
648: PetscScalar *valptr;
649: PetscThreadCommGetScalars(comm,&valptr,NULL,NULL);
650: *valptr = alpha; (alpha is the scalar you wish to pass in PetscThreadCommRunKernel)
652: PetscThreadCommRunKernel(comm,(PetscThreadKernel)kernel_func,3,x,y,valptr);
654: .seealso: PetscThreadCommRunKernel()
655: @*/
656: PetscErrorCode PetscThreadCommGetInts(MPI_Comm comm,PetscInt **val1, PetscInt **val2, PetscInt **val3)
657: {
658: PetscErrorCode ierr;
659: PetscThreadComm tcomm;
660: PetscThreadCommJobCtx job;
661: PetscInt job_num;
664: PetscCommGetThreadComm(comm,&tcomm);
665: job_num = PetscJobQueue->ctr%tcomm->nkernels;
666: job = &PetscJobQueue->jobs[job_num];
667: if (val1) *val1 = &job->ints[0];
668: if (val2) *val2 = &job->ints[1];
669: if (val3) *val3 = &job->ints[2];
670: return(0);
671: }
675: /*@C
676: PetscThreadCommRunKernel - Runs the kernel using the thread communicator
677: associated with the MPI communicator
679: Not Collective
681: Input Parameters:
682: + comm - the MPI communicator
683: . func - the kernel (needs to be cast to PetscThreadKernel)
684: . nargs - Number of input arguments for the kernel
685: - ... - variable list of input arguments
687: Level: developer
689: Notes:
690: All input arguments to the kernel must be passed by reference, Petsc objects are
691: inherrently passed by reference so you don't need to additionally & them.
693: Example usage - PetscThreadCommRunKernel(comm,(PetscThreadKernel)kernel_func,3,x,y,z);
694: with kernel_func declared as
695: PetscErrorCode kernel_func(PetscInt thread_id,PetscInt* x, PetscScalar* y, PetscReal* z)
697: The first input argument of kernel_func, thread_id, is the thread rank. This is passed implicitly
698: by PETSc.
700: .seealso: PetscThreadCommCreate(), PetscThreadCommGNThreads()
701: @*/
702: PetscErrorCode PetscThreadCommRunKernel(MPI_Comm comm,PetscErrorCode (*func)(PetscInt,...),PetscInt nargs,...)
703: {
704: PetscErrorCode ierr;
705: va_list argptr;
706: PetscInt i;
707: PetscThreadComm tcomm=0;
708: PetscThreadCommJobCtx job;
711: if (nargs > PETSC_KERNEL_NARGS_MAX) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Requested %D input arguments for kernel, max. limit %D",nargs,PETSC_KERNEL_NARGS_MAX);
712: PetscLogEventBegin(ThreadComm_RunKernel,0,0,0,0);
713: PetscCommGetThreadComm(comm,&tcomm);
714: job = &PetscJobQueue->jobs[PetscJobQueue->ctr]; /* Get the job context from the queue to launch this job */
715: if (job->job_status[0] != THREAD_JOB_NONE) {
716: for (i=0; i<tcomm->nworkThreads; i++) {
717: while (PetscReadOnce(int,job->job_status[i]) != THREAD_JOB_COMPLETED) ;
718: }
719: }
721: job->tcomm = tcomm;
722: job->tcomm->job_ctr = PetscJobQueue->ctr;
723: job->nargs = nargs;
724: job->pfunc = (PetscThreadKernel)func;
725: va_start(argptr,nargs);
726: for (i=0; i < nargs; i++) job->args[i] = va_arg(argptr,void*);
727: va_end(argptr);
728: for (i=0; i<tcomm->nworkThreads; i++) job->job_status[i] = THREAD_JOB_POSTED;
730: PetscJobQueue->ctr = (PetscJobQueue->ctr+1)%tcomm->nkernels; /* Increment the queue ctr to point to the next available slot */
731: PetscJobQueue->kernel_ctr++;
732: if (tcomm->isnothread) {
733: PetscRunKernel(0,job->nargs,job);
734: job->job_status[0] = THREAD_JOB_COMPLETED;
735: } else {
736: (*tcomm->ops->runkernel)(tcomm,job);
737: }
738: PetscLogEventEnd(ThreadComm_RunKernel,0,0,0,0);
739: return(0);
740: }
744: /* The zero-argument kernel needs to be callable with an unwrapped PetscThreadComm after Petsc_ThreadComm_keyval has been freed. */
745: static PetscErrorCode PetscThreadCommRunKernel0_Private(PetscThreadComm tcomm,PetscErrorCode (*func)(PetscInt,...))
746: {
747: PetscErrorCode ierr;
748: PetscInt i;
749: PetscThreadCommJobCtx job;
752: if (tcomm->isnothread) {
753: (*func)(0);
754: return(0);
755: }
757: if (!PetscJobQueue) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Trying to run kernel with no job queue");
758: job = &PetscJobQueue->jobs[PetscJobQueue->ctr]; /* Get the job context from the queue to launch this job */
759: if (job->job_status[0] != THREAD_JOB_NONE) {
760: for (i=0; i<tcomm->nworkThreads; i++) {
761: while (PetscReadOnce(int,job->job_status[i]) != THREAD_JOB_COMPLETED) ;
762: }
763: }
765: job->tcomm = tcomm;
766: job->tcomm->job_ctr = PetscJobQueue->ctr;
767: job->nargs = 1;
768: job->pfunc = (PetscThreadKernel)func;
770: for (i=0; i<tcomm->nworkThreads; i++) job->job_status[i] = THREAD_JOB_POSTED;
772: PetscJobQueue->ctr = (PetscJobQueue->ctr+1)%tcomm->nkernels; /* Increment the queue ctr to point to the next available slot */
773: PetscJobQueue->kernel_ctr++;
775: (*tcomm->ops->runkernel)(tcomm,job);
776: return(0);
777: }
781: /*@C
782: PetscThreadCommRunKernel0 - PetscThreadCommRunKernel version for kernels with no
783: input arguments
785: Input Parameters:
786: + comm - the MPI communicator
787: - func - the kernel (needs to be cast to PetscThreadKernel)
789: Level: developer
791: Notes:
792: All input arguments to the kernel must be passed by reference, Petsc objects are
793: inherrently passed by reference so you don't need to additionally & them.
795: Example usage - PetscThreadCommRunKernel0(comm,(PetscThreadKernel)kernel_func);
796: with kernel_func declared as
797: PetscErrorCode kernel_func(PetscInt thread_id)
799: The first input argument of kernel_func, thread_id, is the thread rank. This is passed implicitly
800: by PETSc.
802: .seealso: PetscThreadCommCreate(), PetscThreadCommGNThreads()
803: @*/
804: PetscErrorCode PetscThreadCommRunKernel0(MPI_Comm comm,PetscErrorCode (*func)(PetscInt,...))
805: {
806: PetscErrorCode ierr;
807: PetscThreadComm tcomm=0;
810: PetscLogEventBegin(ThreadComm_RunKernel,0,0,0,0);
811: PetscCommGetThreadComm(comm,&tcomm);
812: PetscThreadCommRunKernel0_Private(tcomm,func);
813: PetscLogEventEnd(ThreadComm_RunKernel,0,0,0,0);
814: return(0);
815: }
819: /*@C
820: PetscThreadCommRunKernel1 - PetscThreadCommRunKernel version for kernels with 1
821: input argument
823: Input Parameters:
824: + comm - the MPI communicator
825: . func - the kernel (needs to be cast to PetscThreadKernel)
826: - in1 - input argument for the kernel
828: Level: developer
830: Notes:
831: All input arguments to the kernel must be passed by reference, Petsc objects are
832: inherrently passed by reference so you don't need to additionally & them.
834: Example usage - PetscThreadCommRunKernel1(comm,(PetscThreadKernel)kernel_func,x);
835: with kernel_func declared as
836: PetscErrorCode kernel_func(PetscInt thread_id,PetscInt* x)
838: The first input argument of kernel_func, thread_id, is the thread rank. This is passed implicitly
839: by PETSc.
841: .seealso: PetscThreadCommCreate(), PetscThreadCommGNThreads()
842: @*/
843: PetscErrorCode PetscThreadCommRunKernel1(MPI_Comm comm,PetscErrorCode (*func)(PetscInt,...),void *in1)
844: {
845: PetscErrorCode ierr;
846: PetscInt i;
847: PetscThreadComm tcomm=0;
848: PetscThreadCommJobCtx job;
851: PetscLogEventBegin(ThreadComm_RunKernel,0,0,0,0);
852: PetscCommGetThreadComm(comm,&tcomm);
853: if (tcomm->isnothread) {
854: (*func)(0,in1);
855: PetscLogEventEnd(ThreadComm_RunKernel,0,0,0,0);
856: return(0);
857: }
859: if (!PetscJobQueue) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Trying to run kernel with no job queue");
860: job = &PetscJobQueue->jobs[PetscJobQueue->ctr]; /* Get the job context from the queue to launch this job */
861: if (job->job_status[0] != THREAD_JOB_NONE) {
862: for (i=0; i<tcomm->nworkThreads; i++) {
863: while (PetscReadOnce(int,job->job_status[i]) != THREAD_JOB_COMPLETED) ;
864: }
865: }
867: job->tcomm = tcomm;
868: job->tcomm->job_ctr = PetscJobQueue->ctr;
869: job->nargs = 1;
870: job->pfunc = (PetscThreadKernel)func;
871: job->args[0] = in1;
873: for (i=0; i<tcomm->nworkThreads; i++) job->job_status[i] = THREAD_JOB_POSTED;
875: PetscJobQueue->ctr = (PetscJobQueue->ctr+1)%tcomm->nkernels; /* Increment the queue ctr to point to the next available slot */
876: PetscJobQueue->kernel_ctr++;
878: (*tcomm->ops->runkernel)(tcomm,job);
880: PetscLogEventEnd(ThreadComm_RunKernel,0,0,0,0);
881: return(0);
882: }
886: /*@C
887: PetscThreadCommRunKernel2 - PetscThreadCommRunKernel version for kernels with 2
888: input arguments
890: Input Parameters:
891: + comm - the MPI communicator
892: . func - the kernel (needs to be cast to PetscThreadKernel)
893: . in1 - 1st input argument for the kernel
894: - in2 - 2nd input argument for the kernel
896: Level: developer
898: Notes:
899: All input arguments to the kernel must be passed by reference, Petsc objects are
900: inherrently passed by reference so you don't need to additionally & them.
902: Example usage - PetscThreadCommRunKernel1(comm,(PetscThreadKernel)kernel_func,x);
903: with kernel_func declared as
904: PetscErrorCode kernel_func(PetscInt thread_id,PetscInt *x,PetscInt *y)
906: The first input argument of kernel_func, thread_id, is the thread rank. This is passed implicitly
907: by PETSc.
909: .seealso: PetscThreadCommCreate(), PetscThreadCommGNThreads()
910: @*/
911: PetscErrorCode PetscThreadCommRunKernel2(MPI_Comm comm,PetscErrorCode (*func)(PetscInt,...),void *in1,void *in2)
912: {
913: PetscErrorCode ierr;
914: PetscInt i;
915: PetscThreadComm tcomm=0;
916: PetscThreadCommJobCtx job;
919: PetscLogEventBegin(ThreadComm_RunKernel,0,0,0,0);
920: PetscCommGetThreadComm(comm,&tcomm);
921: if (tcomm->isnothread) {
922: (*func)(0,in1,in2);
923: PetscLogEventEnd(ThreadComm_RunKernel,0,0,0,0);
924: return(0);
925: }
927: if (!PetscJobQueue) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Trying to run kernel with no job queue");
928: job = &PetscJobQueue->jobs[PetscJobQueue->ctr]; /* Get the job context from the queue to launch this job */
929: if (job->job_status[0] != THREAD_JOB_NONE) {
930: for (i=0; i<tcomm->nworkThreads; i++) {
931: while (PetscReadOnce(int,job->job_status[i]) != THREAD_JOB_COMPLETED) ;
932: }
933: }
935: job->tcomm = tcomm;
936: job->tcomm->job_ctr = PetscJobQueue->ctr;
937: job->nargs = 2;
938: job->pfunc = (PetscThreadKernel)func;
939: job->args[0] = in1;
940: job->args[1] = in2;
942: for (i=0; i<tcomm->nworkThreads; i++) job->job_status[i] = THREAD_JOB_POSTED;
944: PetscJobQueue->ctr = (PetscJobQueue->ctr+1)%tcomm->nkernels; /* Increment the queue ctr to point to the next available slot */
945: PetscJobQueue->kernel_ctr++;
947: (*tcomm->ops->runkernel)(tcomm,job);
949: PetscLogEventEnd(ThreadComm_RunKernel,0,0,0,0);
950: return(0);
951: }
955: /*@C
956: PetscThreadCommRunKernel3 - PetscThreadCommRunKernel version for kernels with 3
957: input argument
959: Input Parameters:
960: + comm - the MPI communicator
961: . func - the kernel (needs to be cast to PetscThreadKernel)
962: . in1 - first input argument for the kernel
963: . in2 - second input argument for the kernel
964: - in3 - third input argument for the kernel
966: Level: developer
968: Notes:
969: All input arguments to the kernel must be passed by reference, Petsc objects are
970: inherrently passed by reference so you don't need to additionally & them.
972: Example usage - PetscThreadCommRunKernel1(comm,(PetscThreadKernel)kernel_func,x);
973: with kernel_func declared as
974: PetscErrorCode kernel_func(PetscInt thread_id,PetscInt* x)
976: The first input argument of kernel_func, thread_id, is the thread rank. This is passed implicitly
977: by PETSc.
979: .seealso: PetscThreadCommCreate(), PetscThreadCommGNThreads()
980: @*/
981: PetscErrorCode PetscThreadCommRunKernel3(MPI_Comm comm,PetscErrorCode (*func)(PetscInt,...),void *in1,void *in2,void *in3)
982: {
983: PetscErrorCode ierr;
984: PetscInt i;
985: PetscThreadComm tcomm=0;
986: PetscThreadCommJobCtx job;
989: PetscLogEventBegin(ThreadComm_RunKernel,0,0,0,0);
990: PetscCommGetThreadComm(comm,&tcomm);
991: if (tcomm->isnothread) {
992: (*func)(0,in1,in2,in3);
993: PetscLogEventEnd(ThreadComm_RunKernel,0,0,0,0);
994: return(0);
995: }
997: if (!PetscJobQueue) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Trying to run kernel with no job queue");
998: job = &PetscJobQueue->jobs[PetscJobQueue->ctr]; /* Get the job context from the queue to launch this job */
999: if (job->job_status[0] != THREAD_JOB_NONE) {
1000: for (i=0; i<tcomm->nworkThreads; i++) {
1001: while (PetscReadOnce(int,job->job_status[i]) != THREAD_JOB_COMPLETED) ;
1002: }
1003: }
1005: job->tcomm = tcomm;
1006: job->tcomm->job_ctr = PetscJobQueue->ctr;
1007: job->nargs = 3;
1008: job->pfunc = (PetscThreadKernel)func;
1009: job->args[0] = in1;
1010: job->args[1] = in2;
1011: job->args[2] = in3;
1013: for (i=0; i<tcomm->nworkThreads; i++) job->job_status[i] = THREAD_JOB_POSTED;
1015: PetscJobQueue->ctr = (PetscJobQueue->ctr+1)%tcomm->nkernels; /* Increment the queue ctr to point to the next available slot */
1016: PetscJobQueue->kernel_ctr++;
1018: (*tcomm->ops->runkernel)(tcomm,job);
1020: PetscLogEventEnd(ThreadComm_RunKernel,0,0,0,0);
1021: return(0);
1022: }
1026: /*@C
1027: PetscThreadCommRunKernel4 - PetscThreadCommRunKernel version for kernels with 4
1028: input argument
1030: Input Parameters:
1031: + comm - the MPI communicator
1032: . func - the kernel (needs to be cast to PetscThreadKernel)
1033: . in1 - first input argument for the kernel
1034: . in2 - second input argument for the kernel
1035: . in3 - third input argument for the kernel
1036: - in4 - fourth input argument for the kernel
1038: Level: developer
1040: Notes:
1041: All input arguments to the kernel must be passed by reference, Petsc objects are
1042: inherrently passed by reference so you don't need to additionally & them.
1044: Example usage - PetscThreadCommRunKernel1(comm,(PetscThreadKernel)kernel_func,x);
1045: with kernel_func declared as
1046: PetscErrorCode kernel_func(PetscInt thread_id,PetscInt* x)
1048: The first input argument of kernel_func, thread_id, is the thread rank. This is passed implicitly
1049: by PETSc.
1051: .seealso: PetscThreadCommCreate(), PetscThreadCommGNThreads()
1052: @*/
1053: PetscErrorCode PetscThreadCommRunKernel4(MPI_Comm comm,PetscErrorCode (*func)(PetscInt,...),void *in1,void *in2,void *in3,void *in4)
1054: {
1055: PetscErrorCode ierr;
1056: PetscInt i;
1057: PetscThreadComm tcomm=0;
1058: PetscThreadCommJobCtx job;
1061: PetscLogEventBegin(ThreadComm_RunKernel,0,0,0,0);
1062: PetscCommGetThreadComm(comm,&tcomm);
1063: if (tcomm->isnothread) {
1064: (*func)(0,in1,in2,in3,in4);
1065: PetscLogEventEnd(ThreadComm_RunKernel,0,0,0,0);
1066: return(0);
1067: }
1069: if (!PetscJobQueue) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Trying to run kernel with no job queue");
1070: job = &PetscJobQueue->jobs[PetscJobQueue->ctr]; /* Get the job context from the queue to launch this job */
1071: if (job->job_status[0] != THREAD_JOB_NONE) {
1072: for (i=0; i<tcomm->nworkThreads; i++) {
1073: while (PetscReadOnce(int,job->job_status[i]) != THREAD_JOB_COMPLETED) ;
1074: }
1075: }
1077: job->tcomm = tcomm;
1078: job->tcomm->job_ctr = PetscJobQueue->ctr;
1079: job->nargs = 4;
1080: job->pfunc = (PetscThreadKernel)func;
1081: job->args[0] = in1;
1082: job->args[1] = in2;
1083: job->args[2] = in3;
1084: job->args[3] = in4;
1086: for (i=0; i<tcomm->nworkThreads; i++) job->job_status[i] = THREAD_JOB_POSTED;
1088: PetscJobQueue->ctr = (PetscJobQueue->ctr+1)%tcomm->nkernels; /* Increment the queue ctr to point to the next available slot */
1089: PetscJobQueue->kernel_ctr++;
1091: (*tcomm->ops->runkernel)(tcomm,job);
1093: PetscLogEventEnd(ThreadComm_RunKernel,0,0,0,0);
1094: return(0);
1095: }
1099: /*@C
1100: PetscThreadCommRunKernel6 - PetscThreadCommRunKernel version for kernels with 6
1101: input arguments
1103: Input Parameters:
1104: + comm - the MPI communicator
1105: . func - the kernel (needs to be cast to PetscThreadKernel)
1106: . in1 - first input argument for the kernel
1107: . in2 - second input argument for the kernel
1108: . in3 - third input argument for the kernel
1109: . in4 - fourth input argument for the kernel
1110: . in5 - fifth input argument for the kernel
1111: - in6 - sixth input argument for the kernel
1113: Level: developer
1115: Notes:
1116: All input arguments to the kernel must be passed by reference, Petsc objects are
1117: inherrently passed by reference so you don't need to additionally & them.
1119: Example usage - PetscThreadCommRunKernel1(comm,(PetscThreadKernel)kernel_func,x);
1120: with kernel_func declared as
1121: PetscErrorCode kernel_func(PetscInt thread_id,PetscInt* x)
1123: The first input argument of kernel_func, thread_id, is the thread rank. This is passed implicitly
1124: by PETSc.
1126: .seealso: PetscThreadCommCreate(), PetscThreadCommGNThreads()
1127: @*/
1128: PetscErrorCode PetscThreadCommRunKernel6(MPI_Comm comm,PetscErrorCode (*func)(PetscInt,...),void *in1,void *in2,void *in3,void *in4,void *in5,void *in6)
1129: {
1130: PetscErrorCode ierr;
1131: PetscInt i;
1132: PetscThreadComm tcomm=0;
1133: PetscThreadCommJobCtx job;
1136: PetscLogEventBegin(ThreadComm_RunKernel,0,0,0,0);
1137: PetscCommGetThreadComm(comm,&tcomm);
1138: if (tcomm->isnothread) {
1139: (*func)(0,in1,in2,in3,in4,in5,in6);
1140: PetscLogEventEnd(ThreadComm_RunKernel,0,0,0,0);
1141: return(0);
1142: }
1144: if (!PetscJobQueue) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Trying to run kernel with no job queue");
1145: job = &PetscJobQueue->jobs[PetscJobQueue->ctr]; /* Get the job context from the queue to launch this job */
1146: if (job->job_status[0] != THREAD_JOB_NONE) {
1147: for (i=0; i<tcomm->nworkThreads; i++) {
1148: while (PetscReadOnce(int,job->job_status[i]) != THREAD_JOB_COMPLETED) ;
1149: }
1150: }
1152: job->tcomm = tcomm;
1153: job->tcomm->job_ctr = PetscJobQueue->ctr;
1154: job->nargs = 6;
1155: job->pfunc = (PetscThreadKernel)func;
1156: job->args[0] = in1;
1157: job->args[1] = in2;
1158: job->args[2] = in3;
1159: job->args[3] = in4;
1160: job->args[4] = in5;
1161: job->args[5] = in6;
1164: for (i=0; i<tcomm->nworkThreads; i++) job->job_status[i] = THREAD_JOB_POSTED;
1166: PetscJobQueue->ctr = (PetscJobQueue->ctr+1)%tcomm->nkernels; /* Increment the queue ctr to point to the next available slot */
1167: PetscJobQueue->kernel_ctr++;
1169: (*tcomm->ops->runkernel)(tcomm,job);
1171: PetscLogEventEnd(ThreadComm_RunKernel,0,0,0,0);
1172: return(0);
1173: }
1175: /*
1176: Detaches the thread communicator from the MPI communicator if it exists
1177: */
1180: PetscErrorCode PetscThreadCommDetach(MPI_Comm comm)
1181: {
1183: PetscMPIInt flg;
1184: void *ptr;
1187: MPI_Attr_get(comm,Petsc_ThreadComm_keyval,&ptr,&flg);
1188: if (flg) {
1189: MPI_Attr_delete(comm,Petsc_ThreadComm_keyval);
1190: }
1191: return(0);
1192: }
1194: /*
1195: This routine attaches the thread communicator to the MPI communicator if it does not
1196: exist already.
1197: */
1200: PetscErrorCode PetscThreadCommAttach(MPI_Comm comm,PetscThreadComm tcomm)
1201: {
1203: PetscMPIInt flg;
1204: void *ptr;
1207: MPI_Attr_get(comm,Petsc_ThreadComm_keyval,&ptr,&flg);
1208: if (!flg) {
1209: tcomm->refct++;
1210: MPI_Attr_put(comm,Petsc_ThreadComm_keyval,tcomm);
1211: }
1212: return(0);
1213: }
1217: /*
1218: PetscThreadCommWorldInitialize - Initializes the global thread communicator object
1220: PetscThreadCommWorldInitialize() defaults to using the nonthreaded communicator.
1221: */
1222: PetscErrorCode PetscThreadCommWorldInitialize(void)
1223: {
1224: PetscErrorCode ierr;
1225: PetscThreadComm tcomm;
1226: PetscInt i,j;
1229: PetscThreadCommCreate(&PETSC_THREAD_COMM_WORLD);
1230: tcomm = PETSC_THREAD_COMM_WORLD;
1231: PetscThreadCommSetNThreads(tcomm,PETSC_DECIDE);
1232: PetscThreadCommSetAffinities(tcomm,NULL);
1233: PetscNew(struct _p_PetscThreadCommJobQueue,&PetscJobQueue);
1235: tcomm->nkernels = 16;
1237: PetscOptionsBegin(PETSC_COMM_WORLD,NULL,"Thread comm - setting number of kernels",NULL);
1238: PetscOptionsInt("-threadcomm_nkernels","number of kernels that can be launched simultaneously","",16,&tcomm->nkernels,NULL);
1239: PetscOptionsEnd();
1241: PetscMalloc(tcomm->nkernels*sizeof(struct _p_PetscThreadCommJobCtx),&PetscJobQueue->jobs);
1242: PetscMalloc(tcomm->nworkThreads*tcomm->nkernels*sizeof(PetscInt),&PetscJobQueue->jobs[0].job_status);
1243: for (i=0; i<tcomm->nkernels; i++) {
1244: PetscJobQueue->jobs[i].job_status = PetscJobQueue->jobs[0].job_status + i*tcomm->nworkThreads;
1245: for (j=0; j<tcomm->nworkThreads; j++) PetscJobQueue->jobs[i].job_status[j] = THREAD_JOB_NONE;
1246: }
1247: PetscJobQueue->ctr = 0;
1248: PetscJobQueue->kernel_ctr = 0;
1249: tcomm->job_ctr = 0;
1251: PetscThreadCommSetType(tcomm,NOTHREAD);
1252: PetscThreadCommReductionCreate(tcomm,&tcomm->red);
1253: PetscThreadCommStackCreate();
1254: tcomm->refct++;
1255: return(0);
1256: }
1260: /*
1261: PetscThreadCommGetOwnershipRanges - Given the global size of an array, computes the local sizes and sets
1262: the starting array indices
1264: Input Parameters:
1265: + comm - the MPI communicator which holds the thread communicator
1266: - N - the global size of the array
1268: Output Parameters:
1269: . trstarts - The starting array indices for each thread. the size of trstarts is nthreads+1
1271: Notes:
1272: trstarts is malloced in this routine
1273: */
1274: PetscErrorCode PetscThreadCommGetOwnershipRanges(MPI_Comm comm,PetscInt N,PetscInt *trstarts[])
1275: {
1276: PetscErrorCode ierr;
1277: PetscInt Q,R;
1278: PetscBool S;
1279: PetscThreadComm tcomm = NULL;
1280: PetscInt *trstarts_out,nloc,i;
1283: PetscCommGetThreadComm(comm,&tcomm);
1285: PetscMalloc((tcomm->nworkThreads+1)*sizeof(PetscInt),&trstarts_out);
1286: trstarts_out[0] = 0;
1287: Q = N/tcomm->nworkThreads;
1288: R = N - Q*tcomm->nworkThreads;
1289: for (i=0; i<tcomm->nworkThreads; i++) {
1290: S = (PetscBool)(i < R);
1291: nloc = S ? Q+1 : Q;
1292: trstarts_out[i+1] = trstarts_out[i] + nloc;
1293: }
1295: *trstarts = trstarts_out;
1296: return(0);
1297: }
1301: /*
1302: PetscThreadCommGetRank - Gets the rank of the calling thread
1304: Input Parameters:
1305: . tcomm - the thread communicator
1307: Output Parameters:
1308: . trank - The rank of the calling thread
1310: */
1311: PetscErrorCode PetscThreadCommGetRank(PetscThreadComm tcomm,PetscInt *trank)
1312: {
1314: PetscInt rank = 0;
1317: if (tcomm->ops->getrank) {
1318: (*tcomm->ops->getrank)(&rank);
1319: }
1320: *trank = rank;
1321: return(0);
1322: }