Actual source code: threadcomm.c

petsc-3.4.2 2013-07-02
  1: #include <petsc-private/threadcommimpl.h>      /*I "petscthreadcomm.h" I*/
  2: #include <petscviewer.h>
  3: #if defined(PETSC_HAVE_MALLOC_H)
  4: #include <malloc.h>
  5: #endif

  7: static PetscInt         N_CORES                          = -1;
  8: PetscBool               PetscThreadCommRegisterAllCalled = PETSC_FALSE;
  9: PetscFunctionList       PetscThreadCommList              = NULL;
 10: PetscMPIInt             Petsc_ThreadComm_keyval          = MPI_KEYVAL_INVALID;
 11: PetscThreadCommJobQueue PetscJobQueue                    = NULL;
 12: PetscThreadComm         PETSC_THREAD_COMM_WORLD          = NULL;

 14: /* Logging support */
 15: PetscLogEvent ThreadComm_RunKernel, ThreadComm_Barrier;

 17: static PetscErrorCode PetscThreadCommRunKernel0_Private(PetscThreadComm tcomm,PetscErrorCode (*func)(PetscInt,...));

 21: /*@
 22:   PetscGetNCores - Gets the number of available cores on the system

 24:   Not Collective

 26:   Level: developer

 28:   Notes
 29:   Defaults to 1 if the available core count cannot be found

 31: @*/
 32: PetscErrorCode PetscGetNCores(PetscInt *ncores)
 33: {
 35:   if (N_CORES == -1) {
 36:     N_CORES = 1; /* Default value if number of cores cannot be found out */

 38: #if defined(PETSC_HAVE_SYS_SYSINFO_H) && (PETSC_HAVE_GET_NPROCS) /* Linux */
 39:     N_CORES = get_nprocs();
 40: #elif defined(PETSC_HAVE_SYS_SYSCTL_H) && (PETSC_HAVE_SYSCTLBYNAME) /* MacOS, BSD */
 41:     {
 43:       size_t         len = sizeof(N_CORES);
 44:       sysctlbyname("hw.activecpu",&N_CORES,&len,NULL,0); /* osx preferes activecpu over ncpu */
 45:       if (ierr) { /* freebsd check ncpu */
 46:         sysctlbyname("hw.ncpu",&N_CORES,&len,NULL,0);
 47:         /* continue even if there is an error */
 48:       }
 49:     }
 50: #elif defined(PETSC_HAVE_WINDOWS_H)   /* Windows */
 51:     {
 52:       SYSTEM_INFO sysinfo;
 53:       GetSystemInfo(&sysinfo);
 54:       N_CORES = sysinfo.dwNumberOfProcessors;
 55:     }
 56: #endif
 57:   }
 58:   if (ncores) *ncores = N_CORES;
 59:   return(0);
 60: }

 62: PetscErrorCode PetscThreadCommWorldInitialize();
 65: /*
 66:   PetscGetThreadCommWorld - Gets the global thread communicator.
 67:                             Creates it if it does not exist already.

 69:   Not Collective

 71:   Output Parameters:
 72:   tcommp - pointer to the global thread communicator

 74:   Level: Intermediate
 75: */
 76: PetscErrorCode PetscGetThreadCommWorld(PetscThreadComm *tcommp)
 77: {

 81:   if (!PETSC_THREAD_COMM_WORLD) {
 82:     PetscThreadCommWorldInitialize();
 83:   }
 84:   *tcommp = PETSC_THREAD_COMM_WORLD;
 85:   return(0);
 86: }

 90: /*@C
 91:   PetscCommGetThreadComm - Gets the thread communicator
 92:                            associated with the MPI communicator

 94:   Not Collective

 96:   Input Parameters:
 97: . comm - the MPI communicator

 99:   Output Parameters:
100: . tcommp - pointer to the thread communicator

102:   Notes: If no thread communicator is on the MPI_Comm then the global thread communicator
103:          is returned.
104:   Level: Intermediate

106: .seealso: PetscThreadCommCreate(), PetscThreadCommDestroy()
107: @*/
108: PetscErrorCode PetscCommGetThreadComm(MPI_Comm comm,PetscThreadComm *tcommp)
109: {
111:   PetscMPIInt    flg;
112:   void           *ptr;

115:   MPI_Attr_get(comm,Petsc_ThreadComm_keyval,(PetscThreadComm*)&ptr,&flg);
116:   if (!flg) {
117:     PetscGetThreadCommWorld(tcommp);
118:   } else *tcommp      = (PetscThreadComm)ptr;
119:   return(0);
120: }

124: /*
125:    PetscThreadCommCreate - Allocates a thread communicator object

127:    Not Collective

129:    Output Parameters:
130: .  tcomm - pointer to the thread communicator object

132:    Level: developer

134: .seealso: PetscThreadCommDestroy()
135: */
136: PetscErrorCode PetscThreadCommCreate(PetscThreadComm *tcomm)
137: {
138:   PetscErrorCode  ierr;
139:   PetscThreadComm tcommout;


144:   *tcomm = NULL;

146:   PetscNew(struct _p_PetscThreadComm,&tcommout);
147:   tcommout->refct        = 0;
148:   tcommout->nworkThreads =  -1;
149:   tcommout->affinities   = NULL;
150:   PetscNew(struct _PetscThreadCommOps,&tcommout->ops);
151:   tcommout->leader       = 0;
152:   *tcomm                 = tcommout;

154:   return(0);
155: }

157: #if defined(PETSC_USE_DEBUG)

159: PetscErrorCode PetscThreadCommStackCreate_kernel(PetscInt trank)
160: {
161:   PetscStack *petscstack_in;
162:   if (!trank && PetscStackActive()) return 0;

164:   petscstack_in              = (PetscStack*)malloc(sizeof(PetscStack));
165:   petscstack_in->currentsize = 0;
166:   PetscThreadLocalSetValue((PetscThreadKey*)&petscstack,petscstack_in);
167:   return 0;
168: }

170: /* Creates stack frames for threads other than the main thread */
173: PetscErrorCode  PetscThreadCommStackCreate(void)
174: {

177:   PetscThreadCommRunKernel0(PETSC_COMM_SELF,(PetscThreadKernel)PetscThreadCommStackCreate_kernel);
178:   PetscThreadCommBarrier(PETSC_COMM_SELF);
179:   return 0;
180: }

182: PetscErrorCode PetscThreadCommStackDestroy_kernel(PetscInt trank)
183: {
184:   if (trank && PetscStackActive()) {
185:     PetscStack *petscstack_in;
186:     petscstack_in = (PetscStack*)PetscThreadLocalGetValue(petscstack);
187:     free(petscstack_in);
188:     PetscThreadLocalSetValue((PetscThreadKey*)&petscstack,(PetscStack*)0);
189:   }
190:   return 0;
191: }

195: /* Destroy stack frames for threads other than main thread
196:  *
197:  * The keyval may have been destroyed by the time this function is called, thus we must call
198:  * PetscThreadCommRunKernel0_Private so that we never reference an MPI_Comm.
199:  */
200: PetscErrorCode  PetscThreadCommStackDestroy(void)
201: {
204:   PetscThreadCommRunKernel0_Private(PETSC_THREAD_COMM_WORLD,(PetscThreadKernel)PetscThreadCommStackDestroy_kernel);
205:   PETSC_THREAD_COMM_WORLD = NULL;
206:   return(0);
207:   return 0;
208: }
209: #else
212: PetscErrorCode  PetscThreadCommStackCreate(void)
213: {
215:   return(0);
216: }

220: PetscErrorCode  PetscThreadCommStackDestroy(void)
221: {
223:   PETSC_THREAD_COMM_WORLD = NULL;
224:   return(0);
225: }

227: #endif

231: /*
232:   PetscThreadCommDestroy - Frees a thread communicator object

234:   Not Collective

236:   Input Parameters:
237: . tcomm - the PetscThreadComm object

239:   Level: developer

241: .seealso: PetscThreadCommCreate()
242: */
243: PetscErrorCode PetscThreadCommDestroy(PetscThreadComm *tcomm)
244: {

248:   if (!*tcomm) return(0);
249:   if (!--(*tcomm)->refct) {
250:     PetscThreadCommStackDestroy();
251:     /* Destroy the implementation specific data struct */
252:     if ((*tcomm)->ops->destroy) (*(*tcomm)->ops->destroy)(*tcomm);

254:     PetscFree((*tcomm)->affinities);
255:     PetscFree((*tcomm)->ops);
256:     PetscFree(PetscJobQueue->jobs[0].job_status);
257:     PetscFree(PetscJobQueue->jobs);
258:     PetscFree(PetscJobQueue);
259:     PetscThreadCommReductionDestroy((*tcomm)->red);
260:     PetscFree((*tcomm));
261:   }
262:   *tcomm = NULL;
263:   return(0);
264: }

268: /*@C
269:    PetscThreadCommView - view a thread communicator

271:    Collective on comm

273:    Input Parameters:
274: +  comm - MPI communicator
275: -  viewer - viewer to display, for example PETSC_VIEWER_STDOUT_WORLD

277:    Level: developer

279: .seealso: PetscThreadCommCreate()
280: @*/
281: PetscErrorCode PetscThreadCommView(MPI_Comm comm,PetscViewer viewer)
282: {
283:   PetscErrorCode  ierr;
284:   PetscBool       iascii;
285:   PetscThreadComm tcomm=0;

288:   PetscCommGetThreadComm(comm,&tcomm);
289:   if (!viewer) {PetscViewerASCIIGetStdout(comm,&viewer);}
290:   PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);
291:   if (iascii) {
292:     PetscViewerASCIIPrintf(viewer,"Thread Communicator\n");
293:     PetscViewerASCIIPushTab(viewer);
294:     PetscViewerASCIIPrintf(viewer,"Number of threads = %D\n",tcomm->nworkThreads);
295:     PetscViewerASCIIPrintf(viewer,"Type = %s\n",tcomm->type);
296:     PetscViewerASCIIPopTab(viewer);
297:     if (tcomm->ops->view) {
298:       PetscViewerASCIIPushTab(viewer);
299:       (*tcomm->ops->view)(tcomm,viewer);
300:       PetscViewerASCIIPopTab(viewer);
301:     }
302:   }
303:   return(0);
304: }

308: /*
309:    PetscThreadCommSetNThreads - Set the thread count for the thread communicator

311:    Not collective

313:    Input Parameters:
314: +  tcomm - the thread communicator
315: -  nthreads - Number of threads

317:    Options Database keys:
318:    -threadcomm_nthreads <nthreads> Number of threads to use

320:    Level: developer

322:    Notes:
323:    Defaults to using 1 thread.

325:    Use nthreads = PETSC_DECIDE or -threadcomm_nthreads PETSC_DECIDE for PETSc to decide the number of threads.


328: .seealso: PetscThreadCommGetNThreads()
329: */
330: PetscErrorCode PetscThreadCommSetNThreads(PetscThreadComm tcomm,PetscInt nthreads)
331: {
333:   PetscBool      flg;
334:   PetscInt       nthr;

337:   if (nthreads == PETSC_DECIDE) {
338:     tcomm->nworkThreads = 1;
339:     PetscOptionsBegin(PETSC_COMM_WORLD,NULL,"Thread comm - setting number of threads",NULL);
340:     PetscOptionsInt("-threadcomm_nthreads","number of threads to use in the thread communicator","PetscThreadCommSetNThreads",1,&nthr,&flg);
341:     PetscOptionsEnd();
342:     if (flg) {
343:       if (nthr == PETSC_DECIDE) tcomm->nworkThreads = N_CORES;
344:       else tcomm->nworkThreads = nthr;
345:     }
346:   } else tcomm->nworkThreads = nthreads;
347:   return(0);
348: }

352: /*@C
353:    PetscThreadCommGetNThreads - Gets the thread count from the thread communicator
354:                                 associated with the MPI communicator

356:    Not collective

358:    Input Parameters:
359: .  comm - the MPI communicator

361:    Output Parameters:
362: .  nthreads - number of threads

364:    Level: developer

366: .seealso: PetscThreadCommSetNThreads()
367: @*/
368: PetscErrorCode PetscThreadCommGetNThreads(MPI_Comm comm,PetscInt *nthreads)
369: {
370:   PetscErrorCode  ierr;
371:   PetscThreadComm tcomm=0;

374:   PetscCommGetThreadComm(comm,&tcomm);
375:   *nthreads = tcomm->nworkThreads;
376:   return(0);
377: }

381: /*
382:    PetscThreadCommSetAffinities - Sets the core affinity for threads
383:                                   (which threads run on which cores)

385:    Not collective

387:    Input Parameters:
388: +  tcomm - the thread communicator
389: -  affinities - array of core affinity for threads

391:    Options Database keys:
392: .  -threadcomm_affinities <list of thread affinities>

394:    Level: developer

396:    Notes:
397:    Use affinities = NULL for PETSc to decide the affinities.
398:    If PETSc decides affinities, then each thread has affinity to
399:    a unique core with the main thread on Core 0, thread0 on core 1,
400:    and so on. If the thread count is more the number of available
401:    cores then multiple threads share a core.

403:    The first value is the affinity for the main thread

405:    The affinity list can be passed as
406:    a comma seperated list:                                 0,1,2,3,4,5,6,7
407:    a range (start-end+1):                                  0-8
408:    a range with given increment (start-end+1:inc):         0-7:2
409:    a combination of values and ranges seperated by commas: 0,1-8,8-15:2

411:    There must be no intervening spaces between the values.

413: .seealso: PetscThreadCommGetAffinities(), PetscThreadCommSetNThreads()
414: */
415: PetscErrorCode PetscThreadCommSetAffinities(PetscThreadComm tcomm,const PetscInt affinities[])
416: {
418:   PetscBool      flg;
419:   PetscInt       nmax=tcomm->nworkThreads;

422:   /* Free if affinities set already */
423:   PetscFree(tcomm->affinities);
424:   PetscMalloc(tcomm->nworkThreads*sizeof(PetscInt),&tcomm->affinities);

426:   if (!affinities) {
427:     /* Check if option is present in the options database */
428:     PetscOptionsBegin(PETSC_COMM_WORLD,NULL,"Thread comm - setting thread affinities",NULL);
429:     PetscOptionsIntArray("-threadcomm_affinities","Set core affinities of threads","PetscThreadCommSetAffinities",tcomm->affinities,&nmax,&flg);
430:     PetscOptionsEnd();
431:     if (flg) {
432:       if (nmax != tcomm->nworkThreads) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Must set affinities for all threads, Threads = %D, Core affinities set = %D",tcomm->nworkThreads,nmax);
433:     } else {
434:       /* PETSc default affinities */
435:       PetscInt i;
436:       for (i=0; i<tcomm->nworkThreads; i++) tcomm->affinities[i] = i%N_CORES;
437:     }
438:   } else {
439:     PetscMemcpy(tcomm->affinities,affinities,tcomm->nworkThreads*sizeof(PetscInt));
440:   }
441:   return(0);
442: }

446: /*@C
447:    PetscThreadCommGetAffinities - Returns the core affinities set for the
448:                                   thread communicator associated with the MPI_Comm

450:     Not collective

452:     Input Parameters:
453: .   comm - MPI communicator

455:     Output Parameters:
456: .   affinities - thread affinities

458:     Level: developer

460:     Notes:
461:     The user must allocate space (nthreads PetscInts) for the
462:     affinities. Must call PetscThreadCommSetAffinities before.

464: */
465: PetscErrorCode PetscThreadCommGetAffinities(MPI_Comm comm,PetscInt affinities[])
466: {
467:   PetscErrorCode  ierr;
468:   PetscThreadComm tcomm=0;

471:   PetscCommGetThreadComm(comm,&tcomm);
473:   PetscMemcpy(affinities,tcomm->affinities,tcomm->nworkThreads*sizeof(PetscInt));
474:   return(0);
475: }

479: /*
480:    PetscThreadCommSetType - Sets the threading model for the thread communicator

482:    Logically collective

484:    Input Parameters:
485: +  tcomm - the thread communicator
486: -  type  - the type of thread model needed


489:    Options Database keys:
490:    -threadcomm_type <type>

492:    Available types
493:    See "petsc/include/petscthreadcomm.h" for available types

495: */
496: PetscErrorCode PetscThreadCommSetType(PetscThreadComm tcomm,PetscThreadCommType type)
497: {
498:   PetscErrorCode ierr,(*r)(PetscThreadComm);
499:   char           ttype[256];
500:   PetscBool      flg;

504:   if (!PetscThreadCommRegisterAllCalled) { PetscThreadCommRegisterAll();}

506:   PetscOptionsBegin(PETSC_COMM_WORLD,NULL,"Thread comm - setting threading model",NULL);
507:   PetscOptionsList("-threadcomm_type","Thread communicator model","PetscThreadCommSetType",PetscThreadCommList,type,ttype,256,&flg);
508:   PetscOptionsEnd();
509:   if (!flg) {
510:     PetscStrcpy(ttype,type);
511:   }
512:   PetscFunctionListFind(PetscThreadCommList,ttype,&r);
513:   if (!r) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_UNKNOWN_TYPE,"Unable to find requested PetscThreadComm type %s",ttype);
514:   (*r)(tcomm);
515:   PetscStrcmp(NOTHREAD,tcomm->type,&tcomm->isnothread);
516:   return(0);
517: }

521: /*  PetscThreadCommBarrier - Apply a barrier on the thread communicator
522:                              associated with the MPI communicator

524:     Not collective

526:     Input Parameters:
527: .   comm - the MPI communicator

529:     Level: developer

531:     Notes:
532:     This routine provides an interface to put an explicit barrier between
533:     successive kernel calls to ensure that the first kernel is executed
534:     by all the threads before calling the next one.

536:     Called by the main thread only.

538:     May not be applicable to all types.
539: */
540: PetscErrorCode PetscThreadCommBarrier(MPI_Comm comm)
541: {
542:   PetscErrorCode  ierr;
543:   PetscThreadComm tcomm=0;

546:   PetscLogEventBegin(ThreadComm_Barrier,0,0,0,0);
547:   PetscCommGetThreadComm(comm,&tcomm);
548:   if (tcomm->ops->barrier) {
549:     (*tcomm->ops->barrier)(tcomm);
550:   }
551:   PetscLogEventEnd(ThreadComm_Barrier,0,0,0,0);
552:   return(0);
553: }

557: /*@C
558:   PetscThreadCommRegister -

560:   Level: advanced
561: @*/
562: PetscErrorCode  PetscThreadCommRegister(const char sname[],PetscErrorCode (*function)(PetscThreadComm))
563: {

567:   PetscFunctionListAdd(&PetscThreadCommList,sname,function);
568:   return(0);
569: }

573: /*@C
574:    PetscThreadCommGetScalars - Gets pointers to locations for storing three PetscScalars that may be passed
575:                                to PetscThreadCommRunKernel to ensure that the scalar values remain valid
576:                                even after the main thread exits the calling function.

578:    Input Parameters:
579: +  comm - the MPI communicator having the thread communicator
580: .  val1 - pointer to store the first scalar value
581: .  val2 - pointer to store the second scalar value
582: -  val3 - pointer to store the third scalar value

584:    Level: developer

586:    Notes:
587:    This is a utility function to ensure that any scalars passed to PetscThreadCommRunKernel remain
588:    valid even after the main thread exits the calling function. If any scalars need to passed to
589:    PetscThreadCommRunKernel then these should be first stored in the locations provided by PetscThreadCommGetScalars()

591:    Pass NULL if any pointers are not needed.

593:    Called by the main thread only, not from within kernels

595:    Typical usage:

597:    PetscScalar *valptr;
598:    PetscThreadCommGetScalars(comm,&valptr,NULL,NULL);
599:    *valptr = alpha;   (alpha is the scalar you wish to pass in PetscThreadCommRunKernel)

601:    PetscThreadCommRunKernel(comm,(PetscThreadKernel)kernel_func,3,x,y,valptr);

603: .seealso: PetscThreadCommRunKernel()
604: @*/
605: PetscErrorCode PetscThreadCommGetScalars(MPI_Comm comm,PetscScalar **val1, PetscScalar **val2, PetscScalar **val3)
606: {
607:   PetscErrorCode        ierr;
608:   PetscThreadComm       tcomm;
609:   PetscThreadCommJobCtx job;
610:   PetscInt              job_num;

613:   PetscCommGetThreadComm(comm,&tcomm);
614:   job_num = PetscJobQueue->ctr%tcomm->nkernels;
615:   job     = &PetscJobQueue->jobs[job_num];
616:   if (val1) *val1 = &job->scalars[0];
617:   if (val2) *val2 = &job->scalars[1];
618:   if (val3) *val3 = &job->scalars[2];
619:   return(0);
620: }

624: /*@C
625:    PetscThreadCommGetInts - Gets pointers to locations for storing three PetscInts that may be passed
626:                                to PetscThreadCommRunKernel to ensure that the scalar values remain valid
627:                                even after the main thread exits the calling function.

629:    Input Parameters:
630: +  comm - the MPI communicator having the thread communicator
631: .  val1 - pointer to store the first integer value
632: .  val2 - pointer to store the second integer value
633: -  val3 - pointer to store the third integer value

635:    Level: developer

637:    Notes:
638:    This is a utility function to ensure that any scalars passed to PetscThreadCommRunKernel remain
639:    valid even after the main thread exits the calling function. If any scalars need to passed to
640:    PetscThreadCommRunKernel then these should be first stored in the locations provided by PetscThreadCommGetInts()

642:    Pass NULL if any pointers are not needed.

644:    Called by the main thread only, not from within kernels

646:    Typical usage:

648:    PetscScalar *valptr;
649:    PetscThreadCommGetScalars(comm,&valptr,NULL,NULL);
650:    *valptr = alpha;   (alpha is the scalar you wish to pass in PetscThreadCommRunKernel)

652:    PetscThreadCommRunKernel(comm,(PetscThreadKernel)kernel_func,3,x,y,valptr);

654: .seealso: PetscThreadCommRunKernel()
655: @*/
656: PetscErrorCode PetscThreadCommGetInts(MPI_Comm comm,PetscInt **val1, PetscInt **val2, PetscInt **val3)
657: {
658:   PetscErrorCode        ierr;
659:   PetscThreadComm       tcomm;
660:   PetscThreadCommJobCtx job;
661:   PetscInt              job_num;

664:   PetscCommGetThreadComm(comm,&tcomm);
665:   job_num = PetscJobQueue->ctr%tcomm->nkernels;
666:   job     = &PetscJobQueue->jobs[job_num];
667:   if (val1) *val1 = &job->ints[0];
668:   if (val2) *val2 = &job->ints[1];
669:   if (val3) *val3 = &job->ints[2];
670:   return(0);
671: }

675: /*@C
676:    PetscThreadCommRunKernel - Runs the kernel using the thread communicator
677:                               associated with the MPI communicator

679:    Not Collective

681:    Input Parameters:
682: +  comm  - the MPI communicator
683: .  func  - the kernel (needs to be cast to PetscThreadKernel)
684: .  nargs - Number of input arguments for the kernel
685: -  ...   - variable list of input arguments

687:    Level: developer

689:    Notes:
690:    All input arguments to the kernel must be passed by reference, Petsc objects are
691:    inherrently passed by reference so you don't need to additionally & them.

693:    Example usage - PetscThreadCommRunKernel(comm,(PetscThreadKernel)kernel_func,3,x,y,z);
694:    with kernel_func declared as
695:    PetscErrorCode kernel_func(PetscInt thread_id,PetscInt* x, PetscScalar* y, PetscReal* z)

697:    The first input argument of kernel_func, thread_id, is the thread rank. This is passed implicitly
698:    by PETSc.

700: .seealso: PetscThreadCommCreate(), PetscThreadCommGNThreads()
701: @*/
702: PetscErrorCode PetscThreadCommRunKernel(MPI_Comm comm,PetscErrorCode (*func)(PetscInt,...),PetscInt nargs,...)
703: {
704:   PetscErrorCode        ierr;
705:   va_list               argptr;
706:   PetscInt              i;
707:   PetscThreadComm       tcomm=0;
708:   PetscThreadCommJobCtx job;

711:   if (nargs > PETSC_KERNEL_NARGS_MAX) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Requested %D input arguments for kernel, max. limit %D",nargs,PETSC_KERNEL_NARGS_MAX);
712:   PetscLogEventBegin(ThreadComm_RunKernel,0,0,0,0);
713:   PetscCommGetThreadComm(comm,&tcomm);
714:   job  = &PetscJobQueue->jobs[PetscJobQueue->ctr]; /* Get the job context from the queue to launch this job */
715:   if (job->job_status[0] != THREAD_JOB_NONE) {
716:     for (i=0; i<tcomm->nworkThreads; i++) {
717:       while (PetscReadOnce(int,job->job_status[i]) != THREAD_JOB_COMPLETED) ;
718:     }
719:   }

721:   job->tcomm          = tcomm;
722:   job->tcomm->job_ctr = PetscJobQueue->ctr;
723:   job->nargs          = nargs;
724:   job->pfunc          = (PetscThreadKernel)func;
725:   va_start(argptr,nargs);
726:   for (i=0; i < nargs; i++) job->args[i] = va_arg(argptr,void*);
727:   va_end(argptr);
728:   for (i=0; i<tcomm->nworkThreads; i++) job->job_status[i] = THREAD_JOB_POSTED;

730:   PetscJobQueue->ctr = (PetscJobQueue->ctr+1)%tcomm->nkernels; /* Increment the queue ctr to point to the next available slot */
731:   PetscJobQueue->kernel_ctr++;
732:   if (tcomm->isnothread) {
733:     PetscRunKernel(0,job->nargs,job);
734:     job->job_status[0] = THREAD_JOB_COMPLETED;
735:   } else {
736:     (*tcomm->ops->runkernel)(tcomm,job);
737:   }
738:   PetscLogEventEnd(ThreadComm_RunKernel,0,0,0,0);
739:   return(0);
740: }

744: /* The zero-argument kernel needs to be callable with an unwrapped PetscThreadComm after Petsc_ThreadComm_keyval has been freed. */
745: static PetscErrorCode PetscThreadCommRunKernel0_Private(PetscThreadComm tcomm,PetscErrorCode (*func)(PetscInt,...))
746: {
747:   PetscErrorCode        ierr;
748:   PetscInt              i;
749:   PetscThreadCommJobCtx job;

752:   if (tcomm->isnothread) {
753:     (*func)(0);
754:     return(0);
755:   }

757:   if (!PetscJobQueue) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Trying to run kernel with no job queue");
758:   job = &PetscJobQueue->jobs[PetscJobQueue->ctr]; /* Get the job context from the queue to launch this job */
759:   if (job->job_status[0] != THREAD_JOB_NONE) {
760:     for (i=0; i<tcomm->nworkThreads; i++) {
761:       while (PetscReadOnce(int,job->job_status[i]) != THREAD_JOB_COMPLETED) ;
762:     }
763:   }

765:   job->tcomm          = tcomm;
766:   job->tcomm->job_ctr = PetscJobQueue->ctr;
767:   job->nargs          = 1;
768:   job->pfunc          = (PetscThreadKernel)func;

770:   for (i=0; i<tcomm->nworkThreads; i++) job->job_status[i] = THREAD_JOB_POSTED;

772:   PetscJobQueue->ctr = (PetscJobQueue->ctr+1)%tcomm->nkernels; /* Increment the queue ctr to point to the next available slot */
773:   PetscJobQueue->kernel_ctr++;

775:   (*tcomm->ops->runkernel)(tcomm,job);
776:   return(0);
777: }

781: /*@C
782:    PetscThreadCommRunKernel0 - PetscThreadCommRunKernel version for kernels with no
783:                                input arguments

785:    Input Parameters:
786: +  comm  - the MPI communicator
787: -  func  - the kernel (needs to be cast to PetscThreadKernel)

789:    Level: developer

791:    Notes:
792:    All input arguments to the kernel must be passed by reference, Petsc objects are
793:    inherrently passed by reference so you don't need to additionally & them.

795:    Example usage - PetscThreadCommRunKernel0(comm,(PetscThreadKernel)kernel_func);
796:    with kernel_func declared as
797:    PetscErrorCode kernel_func(PetscInt thread_id)

799:    The first input argument of kernel_func, thread_id, is the thread rank. This is passed implicitly
800:    by PETSc.

802: .seealso: PetscThreadCommCreate(), PetscThreadCommGNThreads()
803: @*/
804: PetscErrorCode PetscThreadCommRunKernel0(MPI_Comm comm,PetscErrorCode (*func)(PetscInt,...))
805: {
806:   PetscErrorCode        ierr;
807:   PetscThreadComm       tcomm=0;

810:   PetscLogEventBegin(ThreadComm_RunKernel,0,0,0,0);
811:   PetscCommGetThreadComm(comm,&tcomm);
812:   PetscThreadCommRunKernel0_Private(tcomm,func);
813:   PetscLogEventEnd(ThreadComm_RunKernel,0,0,0,0);
814:   return(0);
815: }

819: /*@C
820:    PetscThreadCommRunKernel1 - PetscThreadCommRunKernel version for kernels with 1
821:                                input argument

823:    Input Parameters:
824: +  comm  - the MPI communicator
825: .  func  - the kernel (needs to be cast to PetscThreadKernel)
826: -  in1   - input argument for the kernel

828:    Level: developer

830:    Notes:
831:    All input arguments to the kernel must be passed by reference, Petsc objects are
832:    inherrently passed by reference so you don't need to additionally & them.

834:    Example usage - PetscThreadCommRunKernel1(comm,(PetscThreadKernel)kernel_func,x);
835:    with kernel_func declared as
836:    PetscErrorCode kernel_func(PetscInt thread_id,PetscInt* x)

838:    The first input argument of kernel_func, thread_id, is the thread rank. This is passed implicitly
839:    by PETSc.

841: .seealso: PetscThreadCommCreate(), PetscThreadCommGNThreads()
842: @*/
843: PetscErrorCode PetscThreadCommRunKernel1(MPI_Comm comm,PetscErrorCode (*func)(PetscInt,...),void *in1)
844: {
845:   PetscErrorCode        ierr;
846:   PetscInt              i;
847:   PetscThreadComm       tcomm=0;
848:   PetscThreadCommJobCtx job;

851:   PetscLogEventBegin(ThreadComm_RunKernel,0,0,0,0);
852:   PetscCommGetThreadComm(comm,&tcomm);
853:   if (tcomm->isnothread) {
854:     (*func)(0,in1);
855:     PetscLogEventEnd(ThreadComm_RunKernel,0,0,0,0);
856:     return(0);
857:   }

859:   if (!PetscJobQueue) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Trying to run kernel with no job queue");
860:   job = &PetscJobQueue->jobs[PetscJobQueue->ctr]; /* Get the job context from the queue to launch this job */
861:   if (job->job_status[0] != THREAD_JOB_NONE) {
862:     for (i=0; i<tcomm->nworkThreads; i++) {
863:       while (PetscReadOnce(int,job->job_status[i]) != THREAD_JOB_COMPLETED) ;
864:     }
865:   }

867:   job->tcomm          = tcomm;
868:   job->tcomm->job_ctr = PetscJobQueue->ctr;
869:   job->nargs          = 1;
870:   job->pfunc          = (PetscThreadKernel)func;
871:   job->args[0]        = in1;

873:   for (i=0; i<tcomm->nworkThreads; i++) job->job_status[i] = THREAD_JOB_POSTED;

875:   PetscJobQueue->ctr = (PetscJobQueue->ctr+1)%tcomm->nkernels; /* Increment the queue ctr to point to the next available slot */
876:   PetscJobQueue->kernel_ctr++;

878:   (*tcomm->ops->runkernel)(tcomm,job);

880:   PetscLogEventEnd(ThreadComm_RunKernel,0,0,0,0);
881:   return(0);
882: }

886: /*@C
887:    PetscThreadCommRunKernel2 - PetscThreadCommRunKernel version for kernels with 2
888:                                input arguments

890:    Input Parameters:
891: +  comm  - the MPI communicator
892: .  func  - the kernel (needs to be cast to PetscThreadKernel)
893: .  in1   - 1st input argument for the kernel
894: -  in2   - 2nd input argument for the kernel

896:    Level: developer

898:    Notes:
899:    All input arguments to the kernel must be passed by reference, Petsc objects are
900:    inherrently passed by reference so you don't need to additionally & them.

902:    Example usage - PetscThreadCommRunKernel1(comm,(PetscThreadKernel)kernel_func,x);
903:    with kernel_func declared as
904:    PetscErrorCode kernel_func(PetscInt thread_id,PetscInt *x,PetscInt *y)

906:    The first input argument of kernel_func, thread_id, is the thread rank. This is passed implicitly
907:    by PETSc.

909: .seealso: PetscThreadCommCreate(), PetscThreadCommGNThreads()
910: @*/
911: PetscErrorCode PetscThreadCommRunKernel2(MPI_Comm comm,PetscErrorCode (*func)(PetscInt,...),void *in1,void *in2)
912: {
913:   PetscErrorCode        ierr;
914:   PetscInt              i;
915:   PetscThreadComm       tcomm=0;
916:   PetscThreadCommJobCtx job;

919:   PetscLogEventBegin(ThreadComm_RunKernel,0,0,0,0);
920:   PetscCommGetThreadComm(comm,&tcomm);
921:   if (tcomm->isnothread) {
922:     (*func)(0,in1,in2);
923:     PetscLogEventEnd(ThreadComm_RunKernel,0,0,0,0);
924:     return(0);
925:   }

927:   if (!PetscJobQueue) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Trying to run kernel with no job queue");
928:   job = &PetscJobQueue->jobs[PetscJobQueue->ctr]; /* Get the job context from the queue to launch this job */
929:   if (job->job_status[0] != THREAD_JOB_NONE) {
930:     for (i=0; i<tcomm->nworkThreads; i++) {
931:       while (PetscReadOnce(int,job->job_status[i]) != THREAD_JOB_COMPLETED) ;
932:     }
933:   }

935:   job->tcomm          = tcomm;
936:   job->tcomm->job_ctr = PetscJobQueue->ctr;
937:   job->nargs          = 2;
938:   job->pfunc          = (PetscThreadKernel)func;
939:   job->args[0]        = in1;
940:   job->args[1]        = in2;

942:   for (i=0; i<tcomm->nworkThreads; i++) job->job_status[i] = THREAD_JOB_POSTED;

944:   PetscJobQueue->ctr = (PetscJobQueue->ctr+1)%tcomm->nkernels; /* Increment the queue ctr to point to the next available slot */
945:   PetscJobQueue->kernel_ctr++;

947:   (*tcomm->ops->runkernel)(tcomm,job);

949:   PetscLogEventEnd(ThreadComm_RunKernel,0,0,0,0);
950:   return(0);
951: }

955: /*@C
956:    PetscThreadCommRunKernel3 - PetscThreadCommRunKernel version for kernels with 3
957:                                input argument

959:    Input Parameters:
960: +  comm  - the MPI communicator
961: .  func  - the kernel (needs to be cast to PetscThreadKernel)
962: .  in1   - first input argument for the kernel
963: .  in2   - second input argument for the kernel
964: -  in3   - third input argument for the kernel

966:    Level: developer

968:    Notes:
969:    All input arguments to the kernel must be passed by reference, Petsc objects are
970:    inherrently passed by reference so you don't need to additionally & them.

972:    Example usage - PetscThreadCommRunKernel1(comm,(PetscThreadKernel)kernel_func,x);
973:    with kernel_func declared as
974:    PetscErrorCode kernel_func(PetscInt thread_id,PetscInt* x)

976:    The first input argument of kernel_func, thread_id, is the thread rank. This is passed implicitly
977:    by PETSc.

979: .seealso: PetscThreadCommCreate(), PetscThreadCommGNThreads()
980: @*/
981: PetscErrorCode PetscThreadCommRunKernel3(MPI_Comm comm,PetscErrorCode (*func)(PetscInt,...),void *in1,void *in2,void *in3)
982: {
983:   PetscErrorCode        ierr;
984:   PetscInt              i;
985:   PetscThreadComm       tcomm=0;
986:   PetscThreadCommJobCtx job;

989:   PetscLogEventBegin(ThreadComm_RunKernel,0,0,0,0);
990:   PetscCommGetThreadComm(comm,&tcomm);
991:   if (tcomm->isnothread) {
992:     (*func)(0,in1,in2,in3);
993:     PetscLogEventEnd(ThreadComm_RunKernel,0,0,0,0);
994:     return(0);
995:   }

997:   if (!PetscJobQueue) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Trying to run kernel with no job queue");
998:   job = &PetscJobQueue->jobs[PetscJobQueue->ctr]; /* Get the job context from the queue to launch this job */
999:   if (job->job_status[0] != THREAD_JOB_NONE) {
1000:     for (i=0; i<tcomm->nworkThreads; i++) {
1001:       while (PetscReadOnce(int,job->job_status[i]) != THREAD_JOB_COMPLETED) ;
1002:     }
1003:   }

1005:   job->tcomm          = tcomm;
1006:   job->tcomm->job_ctr = PetscJobQueue->ctr;
1007:   job->nargs          = 3;
1008:   job->pfunc          = (PetscThreadKernel)func;
1009:   job->args[0]        = in1;
1010:   job->args[1]        = in2;
1011:   job->args[2]        = in3;

1013:   for (i=0; i<tcomm->nworkThreads; i++) job->job_status[i] = THREAD_JOB_POSTED;

1015:   PetscJobQueue->ctr = (PetscJobQueue->ctr+1)%tcomm->nkernels; /* Increment the queue ctr to point to the next available slot */
1016:   PetscJobQueue->kernel_ctr++;

1018:   (*tcomm->ops->runkernel)(tcomm,job);

1020:   PetscLogEventEnd(ThreadComm_RunKernel,0,0,0,0);
1021:   return(0);
1022: }

1026: /*@C
1027:    PetscThreadCommRunKernel4 - PetscThreadCommRunKernel version for kernels with 4
1028:                                input argument

1030:    Input Parameters:
1031: +  comm  - the MPI communicator
1032: .  func  - the kernel (needs to be cast to PetscThreadKernel)
1033: .  in1   - first input argument for the kernel
1034: .  in2   - second input argument for the kernel
1035: .  in3   - third input argument for the kernel
1036: -  in4   - fourth input argument for the kernel

1038:    Level: developer

1040:    Notes:
1041:    All input arguments to the kernel must be passed by reference, Petsc objects are
1042:    inherrently passed by reference so you don't need to additionally & them.

1044:    Example usage - PetscThreadCommRunKernel1(comm,(PetscThreadKernel)kernel_func,x);
1045:    with kernel_func declared as
1046:    PetscErrorCode kernel_func(PetscInt thread_id,PetscInt* x)

1048:    The first input argument of kernel_func, thread_id, is the thread rank. This is passed implicitly
1049:    by PETSc.

1051: .seealso: PetscThreadCommCreate(), PetscThreadCommGNThreads()
1052: @*/
1053: PetscErrorCode PetscThreadCommRunKernel4(MPI_Comm comm,PetscErrorCode (*func)(PetscInt,...),void *in1,void *in2,void *in3,void *in4)
1054: {
1055:   PetscErrorCode        ierr;
1056:   PetscInt              i;
1057:   PetscThreadComm       tcomm=0;
1058:   PetscThreadCommJobCtx job;

1061:   PetscLogEventBegin(ThreadComm_RunKernel,0,0,0,0);
1062:   PetscCommGetThreadComm(comm,&tcomm);
1063:   if (tcomm->isnothread) {
1064:     (*func)(0,in1,in2,in3,in4);
1065:     PetscLogEventEnd(ThreadComm_RunKernel,0,0,0,0);
1066:     return(0);
1067:   }

1069:   if (!PetscJobQueue) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Trying to run kernel with no job queue");
1070:   job = &PetscJobQueue->jobs[PetscJobQueue->ctr]; /* Get the job context from the queue to launch this job */
1071:   if (job->job_status[0] != THREAD_JOB_NONE) {
1072:     for (i=0; i<tcomm->nworkThreads; i++) {
1073:       while (PetscReadOnce(int,job->job_status[i]) != THREAD_JOB_COMPLETED) ;
1074:     }
1075:   }

1077:   job->tcomm          = tcomm;
1078:   job->tcomm->job_ctr = PetscJobQueue->ctr;
1079:   job->nargs          = 4;
1080:   job->pfunc          = (PetscThreadKernel)func;
1081:   job->args[0]        = in1;
1082:   job->args[1]        = in2;
1083:   job->args[2]        = in3;
1084:   job->args[3]        = in4;

1086:   for (i=0; i<tcomm->nworkThreads; i++) job->job_status[i] = THREAD_JOB_POSTED;

1088:   PetscJobQueue->ctr = (PetscJobQueue->ctr+1)%tcomm->nkernels; /* Increment the queue ctr to point to the next available slot */
1089:   PetscJobQueue->kernel_ctr++;

1091:   (*tcomm->ops->runkernel)(tcomm,job);

1093:   PetscLogEventEnd(ThreadComm_RunKernel,0,0,0,0);
1094:   return(0);
1095: }

1099: /*@C
1100:    PetscThreadCommRunKernel6 - PetscThreadCommRunKernel version for kernels with 6
1101:                                input arguments

1103:    Input Parameters:
1104: +  comm  - the MPI communicator
1105: .  func  - the kernel (needs to be cast to PetscThreadKernel)
1106: .  in1   - first input argument for the kernel
1107: .  in2   - second input argument for the kernel
1108: .  in3   - third input argument for the kernel
1109: .  in4   - fourth input argument for the kernel
1110: .  in5   - fifth input argument for the kernel
1111: -  in6   - sixth input argument for the kernel

1113:    Level: developer

1115:    Notes:
1116:    All input arguments to the kernel must be passed by reference, Petsc objects are
1117:    inherrently passed by reference so you don't need to additionally & them.

1119:    Example usage - PetscThreadCommRunKernel1(comm,(PetscThreadKernel)kernel_func,x);
1120:    with kernel_func declared as
1121:    PetscErrorCode kernel_func(PetscInt thread_id,PetscInt* x)

1123:    The first input argument of kernel_func, thread_id, is the thread rank. This is passed implicitly
1124:    by PETSc.

1126: .seealso: PetscThreadCommCreate(), PetscThreadCommGNThreads()
1127: @*/
1128: PetscErrorCode PetscThreadCommRunKernel6(MPI_Comm comm,PetscErrorCode (*func)(PetscInt,...),void *in1,void *in2,void *in3,void *in4,void *in5,void *in6)
1129: {
1130:   PetscErrorCode        ierr;
1131:   PetscInt              i;
1132:   PetscThreadComm       tcomm=0;
1133:   PetscThreadCommJobCtx job;

1136:   PetscLogEventBegin(ThreadComm_RunKernel,0,0,0,0);
1137:   PetscCommGetThreadComm(comm,&tcomm);
1138:   if (tcomm->isnothread) {
1139:     (*func)(0,in1,in2,in3,in4,in5,in6);
1140:     PetscLogEventEnd(ThreadComm_RunKernel,0,0,0,0);
1141:     return(0);
1142:   }

1144:   if (!PetscJobQueue) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Trying to run kernel with no job queue");
1145:   job = &PetscJobQueue->jobs[PetscJobQueue->ctr]; /* Get the job context from the queue to launch this job */
1146:   if (job->job_status[0] != THREAD_JOB_NONE) {
1147:     for (i=0; i<tcomm->nworkThreads; i++) {
1148:       while (PetscReadOnce(int,job->job_status[i]) != THREAD_JOB_COMPLETED) ;
1149:     }
1150:   }

1152:   job->tcomm          = tcomm;
1153:   job->tcomm->job_ctr = PetscJobQueue->ctr;
1154:   job->nargs          = 6;
1155:   job->pfunc          = (PetscThreadKernel)func;
1156:   job->args[0]        = in1;
1157:   job->args[1]        = in2;
1158:   job->args[2]        = in3;
1159:   job->args[3]        = in4;
1160:   job->args[4]        = in5;
1161:   job->args[5]        = in6;


1164:   for (i=0; i<tcomm->nworkThreads; i++) job->job_status[i] = THREAD_JOB_POSTED;

1166:   PetscJobQueue->ctr = (PetscJobQueue->ctr+1)%tcomm->nkernels; /* Increment the queue ctr to point to the next available slot */
1167:   PetscJobQueue->kernel_ctr++;

1169:   (*tcomm->ops->runkernel)(tcomm,job);

1171:   PetscLogEventEnd(ThreadComm_RunKernel,0,0,0,0);
1172:   return(0);
1173: }

1175: /*
1176:    Detaches the thread communicator from the MPI communicator if it exists
1177: */
1180: PetscErrorCode PetscThreadCommDetach(MPI_Comm comm)
1181: {
1183:   PetscMPIInt    flg;
1184:   void           *ptr;

1187:   MPI_Attr_get(comm,Petsc_ThreadComm_keyval,&ptr,&flg);
1188:   if (flg) {
1189:     MPI_Attr_delete(comm,Petsc_ThreadComm_keyval);
1190:   }
1191:   return(0);
1192: }

1194: /*
1195:    This routine attaches the thread communicator to the MPI communicator if it does not
1196:    exist already.
1197: */
1200: PetscErrorCode PetscThreadCommAttach(MPI_Comm comm,PetscThreadComm tcomm)
1201: {
1203:   PetscMPIInt    flg;
1204:   void           *ptr;

1207:   MPI_Attr_get(comm,Petsc_ThreadComm_keyval,&ptr,&flg);
1208:   if (!flg) {
1209:     tcomm->refct++;
1210:     MPI_Attr_put(comm,Petsc_ThreadComm_keyval,tcomm);
1211:   }
1212:   return(0);
1213: }

1217: /*
1218:   PetscThreadCommWorldInitialize - Initializes the global thread communicator object

1220:   PetscThreadCommWorldInitialize() defaults to using the nonthreaded communicator.
1221: */
1222: PetscErrorCode PetscThreadCommWorldInitialize(void)
1223: {
1224:   PetscErrorCode  ierr;
1225:   PetscThreadComm tcomm;
1226:   PetscInt        i,j;

1229:   PetscThreadCommCreate(&PETSC_THREAD_COMM_WORLD);
1230:   tcomm = PETSC_THREAD_COMM_WORLD;
1231:   PetscThreadCommSetNThreads(tcomm,PETSC_DECIDE);
1232:   PetscThreadCommSetAffinities(tcomm,NULL);
1233:   PetscNew(struct _p_PetscThreadCommJobQueue,&PetscJobQueue);

1235:   tcomm->nkernels = 16;

1237:   PetscOptionsBegin(PETSC_COMM_WORLD,NULL,"Thread comm - setting number of kernels",NULL);
1238:   PetscOptionsInt("-threadcomm_nkernels","number of kernels that can be launched simultaneously","",16,&tcomm->nkernels,NULL);
1239:   PetscOptionsEnd();

1241:   PetscMalloc(tcomm->nkernels*sizeof(struct _p_PetscThreadCommJobCtx),&PetscJobQueue->jobs);
1242:   PetscMalloc(tcomm->nworkThreads*tcomm->nkernels*sizeof(PetscInt),&PetscJobQueue->jobs[0].job_status);
1243:   for (i=0; i<tcomm->nkernels; i++) {
1244:     PetscJobQueue->jobs[i].job_status = PetscJobQueue->jobs[0].job_status + i*tcomm->nworkThreads;
1245:     for (j=0; j<tcomm->nworkThreads; j++) PetscJobQueue->jobs[i].job_status[j] = THREAD_JOB_NONE;
1246:   }
1247:   PetscJobQueue->ctr        = 0;
1248:   PetscJobQueue->kernel_ctr = 0;
1249:   tcomm->job_ctr            = 0;

1251:   PetscThreadCommSetType(tcomm,NOTHREAD);
1252:   PetscThreadCommReductionCreate(tcomm,&tcomm->red);
1253:   PetscThreadCommStackCreate();
1254:   tcomm->refct++;
1255:   return(0);
1256: }

1260: /*
1261:    PetscThreadCommGetOwnershipRanges - Given the global size of an array, computes the local sizes and sets
1262:                                        the starting array indices

1264:    Input Parameters:
1265: +  comm - the MPI communicator which holds the thread communicator
1266: -  N    - the global size of the array

1268:    Output Parameters:
1269: .  trstarts - The starting array indices for each thread. the size of trstarts is nthreads+1

1271:    Notes:
1272:    trstarts is malloced in this routine
1273: */
1274: PetscErrorCode PetscThreadCommGetOwnershipRanges(MPI_Comm comm,PetscInt N,PetscInt *trstarts[])
1275: {
1276:   PetscErrorCode  ierr;
1277:   PetscInt        Q,R;
1278:   PetscBool       S;
1279:   PetscThreadComm tcomm = NULL;
1280:   PetscInt        *trstarts_out,nloc,i;

1283:   PetscCommGetThreadComm(comm,&tcomm);

1285:   PetscMalloc((tcomm->nworkThreads+1)*sizeof(PetscInt),&trstarts_out);
1286:   trstarts_out[0] = 0;
1287:   Q               = N/tcomm->nworkThreads;
1288:   R               = N - Q*tcomm->nworkThreads;
1289:   for (i=0; i<tcomm->nworkThreads; i++) {
1290:     S                 = (PetscBool)(i < R);
1291:     nloc              = S ? Q+1 : Q;
1292:     trstarts_out[i+1] = trstarts_out[i] + nloc;
1293:   }

1295:   *trstarts = trstarts_out;
1296:   return(0);
1297: }

1301: /*
1302:    PetscThreadCommGetRank - Gets the rank of the calling thread

1304:    Input Parameters:
1305: .  tcomm - the thread communicator

1307:    Output Parameters:
1308: .  trank - The rank of the calling thread

1310: */
1311: PetscErrorCode PetscThreadCommGetRank(PetscThreadComm tcomm,PetscInt *trank)
1312: {
1314:   PetscInt       rank = 0;

1317:   if (tcomm->ops->getrank) {
1318:     (*tcomm->ops->getrank)(&rank);
1319:   }
1320:   *trank = rank;
1321:   return(0);
1322: }