Actual source code: superlu_dist.c
petsc-3.4.2 2013-07-02
2: /*
3: Provides an interface to the SuperLU_DIST_2.2 sparse solver
4: */
6: #include <../src/mat/impls/aij/seq/aij.h>
7: #include <../src/mat/impls/aij/mpi/mpiaij.h>
8: #include <petsctime.h>
9: #if defined(PETSC_HAVE_STDLIB_H) /* This is to get around weird problem with SuperLU on cray */
10: #include <stdlib.h>
11: #endif
13: #if defined(PETSC_USE_64BIT_INDICES)
14: /* ugly SuperLU_Dist variable telling it to use long long int */
15: #define _LONGINT
16: #endif
18: EXTERN_C_BEGIN
19: #if defined(PETSC_USE_COMPLEX)
20: #include <superlu_zdefs.h>
21: #else
22: #include <superlu_ddefs.h>
23: #endif
24: EXTERN_C_END
26: /*
27: GLOBAL - The sparse matrix and right hand side are all stored initially on process 0. Should be called centralized
28: DISTRIBUTED - The sparse matrix and right hand size are initially stored across the entire MPI communicator.
29: */
30: typedef enum {GLOBAL,DISTRIBUTED} SuperLU_MatInputMode;
31: const char *SuperLU_MatInputModes[] = {"GLOBAL","DISTRIBUTED","SuperLU_MatInputMode","PETSC_",0};
33: typedef struct {
34: int_t nprow,npcol,*row,*col;
35: gridinfo_t grid;
36: superlu_options_t options;
37: SuperMatrix A_sup;
38: ScalePermstruct_t ScalePermstruct;
39: LUstruct_t LUstruct;
40: int StatPrint;
41: SuperLU_MatInputMode MatInputMode;
42: SOLVEstruct_t SOLVEstruct;
43: fact_t FactPattern;
44: MPI_Comm comm_superlu;
45: #if defined(PETSC_USE_COMPLEX)
46: doublecomplex *val;
47: #else
48: double *val;
49: #endif
50: PetscBool matsolve_iscalled,matmatsolve_iscalled;
51: PetscBool CleanUpSuperLU_Dist; /* Flag to clean up (non-global) SuperLU objects during Destroy */
52: } Mat_SuperLU_DIST;
54: extern PetscErrorCode MatFactorInfo_SuperLU_DIST(Mat,PetscViewer);
55: extern PetscErrorCode MatLUFactorNumeric_SuperLU_DIST(Mat,Mat,const MatFactorInfo*);
56: extern PetscErrorCode MatDestroy_SuperLU_DIST(Mat);
57: extern PetscErrorCode MatView_SuperLU_DIST(Mat,PetscViewer);
58: extern PetscErrorCode MatSolve_SuperLU_DIST(Mat,Vec,Vec);
59: extern PetscErrorCode MatLUFactorSymbolic_SuperLU_DIST(Mat,Mat,IS,IS,const MatFactorInfo*);
60: extern PetscErrorCode MatDestroy_MPIAIJ(Mat);
64: PetscErrorCode MatDestroy_SuperLU_DIST(Mat A)
65: {
66: PetscErrorCode ierr;
67: Mat_SuperLU_DIST *lu = (Mat_SuperLU_DIST*)A->spptr;
68: PetscBool flg;
71: if (lu && lu->CleanUpSuperLU_Dist) {
72: /* Deallocate SuperLU_DIST storage */
73: if (lu->MatInputMode == GLOBAL) {
74: PetscStackCall("SuperLU_DIST:Destroy_CompCol_Matrix_dist",Destroy_CompCol_Matrix_dist(&lu->A_sup));
75: } else {
76: PetscStackCall("SuperLU_DIST:Destroy_CompRowLoc_Matrix_dist",Destroy_CompRowLoc_Matrix_dist(&lu->A_sup));
77: if (lu->options.SolveInitialized) {
78: #if defined(PETSC_USE_COMPLEX)
79: PetscStackCall("SuperLU_DIST:zSolveFinalize",zSolveFinalize(&lu->options, &lu->SOLVEstruct));
80: #else
81: PetscStackCall("SuperLU_DIST:dSolveFinalize",dSolveFinalize(&lu->options, &lu->SOLVEstruct));
82: #endif
83: }
84: }
85: PetscStackCall("SuperLU_DIST:Destroy_LU",Destroy_LU(A->cmap->N, &lu->grid, &lu->LUstruct));
86: PetscStackCall("SuperLU_DIST:ScalePermstructFree",ScalePermstructFree(&lu->ScalePermstruct));
87: PetscStackCall("SuperLU_DIST:LUstructFree",LUstructFree(&lu->LUstruct));
89: /* Release the SuperLU_DIST process grid. */
90: PetscStackCall("SuperLU_DIST:superlu_gridexit",superlu_gridexit(&lu->grid));
91: MPI_Comm_free(&(lu->comm_superlu));
92: }
93: PetscFree(A->spptr);
95: PetscObjectTypeCompare((PetscObject)A,MATSEQAIJ,&flg);
96: if (flg) {
97: MatDestroy_SeqAIJ(A);
98: } else {
99: MatDestroy_MPIAIJ(A);
100: }
101: return(0);
102: }
106: PetscErrorCode MatSolve_SuperLU_DIST(Mat A,Vec b_mpi,Vec x)
107: {
108: Mat_SuperLU_DIST *lu = (Mat_SuperLU_DIST*)A->spptr;
109: PetscErrorCode ierr;
110: PetscMPIInt size;
111: PetscInt m=A->rmap->n,M=A->rmap->N,N=A->cmap->N;
112: SuperLUStat_t stat;
113: double berr[1];
114: PetscScalar *bptr;
115: PetscInt nrhs=1;
116: Vec x_seq;
117: IS iden;
118: VecScatter scat;
119: int info; /* SuperLU_Dist info code is ALWAYS an int, even with long long indices */
122: MPI_Comm_size(PetscObjectComm((PetscObject)A),&size);
123: if (size > 1 && lu->MatInputMode == GLOBAL) {
124: /* global mat input, convert b to x_seq */
125: VecCreateSeq(PETSC_COMM_SELF,N,&x_seq);
126: ISCreateStride(PETSC_COMM_SELF,N,0,1,&iden);
127: VecScatterCreate(b_mpi,iden,x_seq,iden,&scat);
128: ISDestroy(&iden);
130: VecScatterBegin(scat,b_mpi,x_seq,INSERT_VALUES,SCATTER_FORWARD);
131: VecScatterEnd(scat,b_mpi,x_seq,INSERT_VALUES,SCATTER_FORWARD);
132: VecGetArray(x_seq,&bptr);
133: } else { /* size==1 || distributed mat input */
134: if (lu->options.SolveInitialized && !lu->matsolve_iscalled) {
135: /* see comments in MatMatSolve() */
136: #if defined(PETSC_USE_COMPLEX)
137: PetscStackCall("SuperLU_DIST:zSolveFinalize",zSolveFinalize(&lu->options, &lu->SOLVEstruct));
138: #else
139: PetscStackCall("SuperLU_DIST:dSolveFinalize",dSolveFinalize(&lu->options, &lu->SOLVEstruct));
140: #endif
141: lu->options.SolveInitialized = NO;
142: }
143: VecCopy(b_mpi,x);
144: VecGetArray(x,&bptr);
145: }
147: if (lu->options.Fact != FACTORED) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"SuperLU_DIST options.Fact mush equal FACTORED");
149: PetscStackCall("SuperLU_DIST:PStatInit",PStatInit(&stat)); /* Initialize the statistics variables. */
150: if (lu->MatInputMode == GLOBAL) {
151: #if defined(PETSC_USE_COMPLEX)
152: PetscStackCall("SuperLU_DIST:pzgssvx_ABglobal",pzgssvx_ABglobal(&lu->options,&lu->A_sup,&lu->ScalePermstruct,(doublecomplex*)bptr,M,nrhs,&lu->grid,&lu->LUstruct,berr,&stat,&info));
153: #else
154: PetscStackCall("SuperLU_DIST:pdgssvx_ABglobal",pdgssvx_ABglobal(&lu->options, &lu->A_sup, &lu->ScalePermstruct,bptr,M,nrhs,&lu->grid,&lu->LUstruct,berr,&stat,&info));
155: #endif
156: } else { /* distributed mat input */
157: #if defined(PETSC_USE_COMPLEX)
158: PetscStackCall("SuperLU_DIST:pzgssvx",pzgssvx(&lu->options,&lu->A_sup,&lu->ScalePermstruct,(doublecomplex*)bptr,m,nrhs,&lu->grid,&lu->LUstruct,&lu->SOLVEstruct,berr,&stat,&info));
159: #else
160: PetscStackCall("SuperLU_DIST:pdgssvx",pdgssvx(&lu->options,&lu->A_sup,&lu->ScalePermstruct,bptr,m,nrhs,&lu->grid,&lu->LUstruct,&lu->SOLVEstruct,berr,&stat,&info));
161: #endif
162: }
163: if (info) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"pdgssvx fails, info: %d\n",info);
165: if (lu->options.PrintStat) PStatPrint(&lu->options, &stat, &lu->grid); /* Print the statistics. */
166: PetscStackCall("SuperLU_DIST:PStatFree",PStatFree(&stat));
168: if (size > 1 && lu->MatInputMode == GLOBAL) {
169: /* convert seq x to mpi x */
170: VecRestoreArray(x_seq,&bptr);
171: VecScatterBegin(scat,x_seq,x,INSERT_VALUES,SCATTER_REVERSE);
172: VecScatterEnd(scat,x_seq,x,INSERT_VALUES,SCATTER_REVERSE);
173: VecScatterDestroy(&scat);
174: VecDestroy(&x_seq);
175: } else {
176: VecRestoreArray(x,&bptr);
178: lu->matsolve_iscalled = PETSC_TRUE;
179: lu->matmatsolve_iscalled = PETSC_FALSE;
180: }
181: return(0);
182: }
186: PetscErrorCode MatMatSolve_SuperLU_DIST(Mat A,Mat B_mpi,Mat X)
187: {
188: Mat_SuperLU_DIST *lu = (Mat_SuperLU_DIST*)A->spptr;
189: PetscErrorCode ierr;
190: PetscMPIInt size;
191: PetscInt M=A->rmap->N,m=A->rmap->n,nrhs;
192: SuperLUStat_t stat;
193: double berr[1];
194: PetscScalar *bptr;
195: int info; /* SuperLU_Dist info code is ALWAYS an int, even with long long indices */
196: PetscBool flg;
199: if (lu->options.Fact != FACTORED) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"SuperLU_DIST options.Fact mush equal FACTORED");
200: PetscObjectTypeCompareAny((PetscObject)B_mpi,&flg,MATSEQDENSE,MATMPIDENSE,NULL);
201: if (!flg) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Matrix B must be MATDENSE matrix");
202: PetscObjectTypeCompareAny((PetscObject)X,&flg,MATSEQDENSE,MATMPIDENSE,NULL);
203: if (!flg) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Matrix X must be MATDENSE matrix");
205: MPI_Comm_size(PetscObjectComm((PetscObject)A),&size);
206: if (size > 1 && lu->MatInputMode == GLOBAL) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"MatInputMode=GLOBAL for nproc %d>1 is not supported",size);
207: /* size==1 or distributed mat input */
208: if (lu->options.SolveInitialized && !lu->matmatsolve_iscalled) {
209: /* communication pattern of SOLVEstruct is unlikely created for matmatsolve,
210: thus destroy it and create a new SOLVEstruct.
211: Otherwise it may result in memory corruption or incorrect solution
212: See src/mat/examples/tests/ex125.c */
213: #if defined(PETSC_USE_COMPLEX)
214: PetscStackCall("SuperLU_DIST:zSolveFinalize",zSolveFinalize(&lu->options, &lu->SOLVEstruct));
215: #else
216: PetscStackCall("SuperLU_DIST:dSolveFinalize",dSolveFinalize(&lu->options, &lu->SOLVEstruct));
217: #endif
218: lu->options.SolveInitialized = NO;
219: }
220: MatCopy(B_mpi,X,SAME_NONZERO_PATTERN);
222: MatGetSize(B_mpi,NULL,&nrhs);
224: PetscStackCall("SuperLU_DIST:PStatInit",PStatInit(&stat)); /* Initialize the statistics variables. */
225: MatDenseGetArray(X,&bptr);
226: if (lu->MatInputMode == GLOBAL) { /* size == 1 */
227: #if defined(PETSC_USE_COMPLEX)
228: PetscStackCall("SuperLU_DIST:pzgssvx_ABglobal",pzgssvx_ABglobal(&lu->options, &lu->A_sup, &lu->ScalePermstruct,(doublecomplex*)bptr, M, nrhs,&lu->grid, &lu->LUstruct, berr, &stat, &info));
229: #else
230: PetscStackCall("SuperLU_DIST:pdgssvx_ABglobal",pdgssvx_ABglobal(&lu->options, &lu->A_sup, &lu->ScalePermstruct,bptr, M, nrhs, &lu->grid, &lu->LUstruct, berr, &stat, &info));
231: #endif
232: } else { /* distributed mat input */
233: #if defined(PETSC_USE_COMPLEX)
234: PetscStackCall("SuperLU_DIST:pzgssvx",pzgssvx(&lu->options,&lu->A_sup,&lu->ScalePermstruct,(doublecomplex*)bptr,m,nrhs,&lu->grid, &lu->LUstruct,&lu->SOLVEstruct,berr,&stat,&info));
235: #else
236: PetscStackCall("SuperLU_DIST:pdgssvx",pdgssvx(&lu->options,&lu->A_sup,&lu->ScalePermstruct,bptr,m,nrhs,&lu->grid,&lu->LUstruct,&lu->SOLVEstruct,berr,&stat,&info));
237: #endif
238: }
239: if (info) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"pdgssvx fails, info: %d\n",info);
240: MatDenseRestoreArray(X,&bptr);
242: if (lu->options.PrintStat) PStatPrint(&lu->options, &stat, &lu->grid); /* Print the statistics. */
243: PetscStackCall("SuperLU_DIST:PStatFree",PStatFree(&stat));
244: lu->matsolve_iscalled = PETSC_FALSE;
245: lu->matmatsolve_iscalled = PETSC_TRUE;
246: return(0);
247: }
252: PetscErrorCode MatLUFactorNumeric_SuperLU_DIST(Mat F,Mat A,const MatFactorInfo *info)
253: {
254: Mat *tseq,A_seq = NULL;
255: Mat_SeqAIJ *aa,*bb;
256: Mat_SuperLU_DIST *lu = (Mat_SuperLU_DIST*)(F)->spptr;
257: PetscErrorCode ierr;
258: PetscInt M=A->rmap->N,N=A->cmap->N,i,*ai,*aj,*bi,*bj,nz,rstart,*garray,
259: m=A->rmap->n, colA_start,j,jcol,jB,countA,countB,*bjj,*ajj;
260: int sinfo; /* SuperLU_Dist info flag is always an int even with long long indices */
261: PetscMPIInt size;
262: SuperLUStat_t stat;
263: double *berr=0;
264: IS isrow;
265: PetscLogDouble time0,time,time_min,time_max;
266: Mat F_diag=NULL;
267: #if defined(PETSC_USE_COMPLEX)
268: doublecomplex *av, *bv;
269: #else
270: double *av, *bv;
271: #endif
274: MPI_Comm_size(PetscObjectComm((PetscObject)A),&size);
276: if (lu->options.PrintStat) { /* collect time for mat conversion */
277: MPI_Barrier(PetscObjectComm((PetscObject)A));
278: PetscTime(&time0);
279: }
281: if (lu->MatInputMode == GLOBAL) { /* global mat input */
282: if (size > 1) { /* convert mpi A to seq mat A */
283: ISCreateStride(PETSC_COMM_SELF,M,0,1,&isrow);
284: MatGetSubMatrices(A,1,&isrow,&isrow,MAT_INITIAL_MATRIX,&tseq);
285: ISDestroy(&isrow);
287: A_seq = *tseq;
288: PetscFree(tseq);
289: aa = (Mat_SeqAIJ*)A_seq->data;
290: } else {
291: PetscBool flg;
292: PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&flg);
293: if (flg) {
294: Mat_MPIAIJ *At = (Mat_MPIAIJ*)A->data;
295: A = At->A;
296: }
297: aa = (Mat_SeqAIJ*)A->data;
298: }
300: /* Convert Petsc NR matrix to SuperLU_DIST NC.
301: Note: memories of lu->val, col and row are allocated by CompRow_to_CompCol_dist()! */
302: if (lu->options.Fact != DOFACT) {/* successive numeric factorization, sparsity pattern is reused. */
303: PetscStackCall("SuperLU_DIST:Destroy_CompCol_Matrix_dist",Destroy_CompCol_Matrix_dist(&lu->A_sup));
304: if (lu->FactPattern == SamePattern_SameRowPerm) {
305: lu->options.Fact = SamePattern_SameRowPerm; /* matrix has similar numerical values */
306: } else { /* lu->FactPattern == SamePattern */
307: PetscStackCall("SuperLU_DIST:Destroy_LU",Destroy_LU(N, &lu->grid, &lu->LUstruct));
308: lu->options.Fact = SamePattern;
309: }
310: }
311: #if defined(PETSC_USE_COMPLEX)
312: PetscStackCall("SuperLU_DIST:zCompRow_to_CompCol_dist",zCompRow_to_CompCol_dist(M,N,aa->nz,(doublecomplex*)aa->a,aa->j,aa->i,&lu->val,&lu->col, &lu->row));
313: #else
314: PetscStackCall("SuperLU_DIST:dCompRow_to_CompCol_dist",dCompRow_to_CompCol_dist(M,N,aa->nz,aa->a,aa->j,aa->i,&lu->val, &lu->col, &lu->row));
315: #endif
317: /* Create compressed column matrix A_sup. */
318: #if defined(PETSC_USE_COMPLEX)
319: PetscStackCall("SuperLU_DIST:zCreate_CompCol_Matrix_dist",zCreate_CompCol_Matrix_dist(&lu->A_sup, M, N, aa->nz, lu->val, lu->col, lu->row, SLU_NC, SLU_Z, SLU_GE));
320: #else
321: PetscStackCall("SuperLU_DIST:dCreate_CompCol_Matrix_dist",dCreate_CompCol_Matrix_dist(&lu->A_sup, M, N, aa->nz, lu->val, lu->col, lu->row, SLU_NC, SLU_D, SLU_GE));
322: #endif
323: } else { /* distributed mat input */
324: Mat_MPIAIJ *mat = (Mat_MPIAIJ*)A->data;
325: aa=(Mat_SeqAIJ*)(mat->A)->data;
326: bb=(Mat_SeqAIJ*)(mat->B)->data;
327: ai=aa->i; aj=aa->j;
328: bi=bb->i; bj=bb->j;
329: #if defined(PETSC_USE_COMPLEX)
330: av=(doublecomplex*)aa->a;
331: bv=(doublecomplex*)bb->a;
332: #else
333: av=aa->a;
334: bv=bb->a;
335: #endif
336: rstart = A->rmap->rstart;
337: nz = aa->nz + bb->nz;
338: garray = mat->garray;
340: if (lu->options.Fact == DOFACT) { /* first numeric factorization */
341: #if defined(PETSC_USE_COMPLEX)
342: PetscStackCall("SuperLU_DIST:zallocateA_dist",zallocateA_dist(m, nz, &lu->val, &lu->col, &lu->row));
343: #else
344: PetscStackCall("SuperLU_DIST:dallocateA_dist",dallocateA_dist(m, nz, &lu->val, &lu->col, &lu->row));
345: #endif
346: } else { /* successive numeric factorization, sparsity pattern and perm_c are reused. */
347: /* Destroy_CompRowLoc_Matrix_dist(&lu->A_sup); */ /* this leads to crash! However, see SuperLU_DIST_2.5/EXAMPLE/pzdrive2.c */
348: if (lu->FactPattern == SamePattern_SameRowPerm) {
349: lu->options.Fact = SamePattern_SameRowPerm; /* matrix has similar numerical values */
350: } else {
351: PetscStackCall("SuperLU_DIST:Destroy_LU",Destroy_LU(N, &lu->grid, &lu->LUstruct)); /* Deallocate storage associated with the L and U matrices. */
352: lu->options.Fact = SamePattern;
353: }
354: }
355: nz = 0;
356: for (i=0; i<m; i++) {
357: lu->row[i] = nz;
358: countA = ai[i+1] - ai[i];
359: countB = bi[i+1] - bi[i];
360: ajj = aj + ai[i]; /* ptr to the beginning of this row */
361: bjj = bj + bi[i];
363: /* B part, smaller col index */
364: colA_start = rstart + ajj[0]; /* the smallest global col index of A */
365: jB = 0;
366: for (j=0; j<countB; j++) {
367: jcol = garray[bjj[j]];
368: if (jcol > colA_start) {
369: jB = j;
370: break;
371: }
372: lu->col[nz] = jcol;
373: lu->val[nz++] = *bv++;
374: if (j==countB-1) jB = countB;
375: }
377: /* A part */
378: for (j=0; j<countA; j++) {
379: lu->col[nz] = rstart + ajj[j];
380: lu->val[nz++] = *av++;
381: }
383: /* B part, larger col index */
384: for (j=jB; j<countB; j++) {
385: lu->col[nz] = garray[bjj[j]];
386: lu->val[nz++] = *bv++;
387: }
388: }
389: lu->row[m] = nz;
390: #if defined(PETSC_USE_COMPLEX)
391: PetscStackCall("SuperLU_DIST:zCreate_CompRowLoc_Matrix_dist",zCreate_CompRowLoc_Matrix_dist(&lu->A_sup, M, N, nz, m, rstart,lu->val, lu->col, lu->row, SLU_NR_loc, SLU_Z, SLU_GE));
392: #else
393: PetscStackCall("SuperLU_DIST:dCreate_CompRowLoc_Matrix_dist",dCreate_CompRowLoc_Matrix_dist(&lu->A_sup, M, N, nz, m, rstart,lu->val, lu->col, lu->row, SLU_NR_loc, SLU_D, SLU_GE));
394: #endif
395: }
396: if (lu->options.PrintStat) {
397: PetscTime(&time);
398: time0 = time - time0;
399: }
401: /* Factor the matrix. */
402: PetscStackCall("SuperLU_DIST:PStatInit",PStatInit(&stat)); /* Initialize the statistics variables. */
403: if (lu->MatInputMode == GLOBAL) { /* global mat input */
404: #if defined(PETSC_USE_COMPLEX)
405: PetscStackCall("SuperLU_DIST:pzgssvx_ABglobal",pzgssvx_ABglobal(&lu->options, &lu->A_sup, &lu->ScalePermstruct, 0, M, 0,&lu->grid, &lu->LUstruct, berr, &stat, &sinfo));
406: #else
407: PetscStackCall("SuperLU_DIST:pdgssvx_ABglobal",pdgssvx_ABglobal(&lu->options, &lu->A_sup, &lu->ScalePermstruct, 0, M, 0,&lu->grid, &lu->LUstruct, berr, &stat, &sinfo));
408: #endif
409: } else { /* distributed mat input */
410: #if defined(PETSC_USE_COMPLEX)
411: PetscStackCall("SuperLU_DIST:pzgssvx",pzgssvx(&lu->options, &lu->A_sup, &lu->ScalePermstruct, 0, m, 0, &lu->grid,&lu->LUstruct, &lu->SOLVEstruct, berr, &stat, &sinfo));
412: if (sinfo) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"pzgssvx fails, info: %d\n",sinfo);
413: #else
414: PetscStackCall("SuperLU_DIST:pdgssvx",pdgssvx(&lu->options, &lu->A_sup, &lu->ScalePermstruct, 0, m, 0, &lu->grid,&lu->LUstruct, &lu->SOLVEstruct, berr, &stat, &sinfo));
415: if (sinfo) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"pdgssvx fails, info: %d\n",sinfo);
416: #endif
417: }
419: if (lu->MatInputMode == GLOBAL && size > 1) {
420: MatDestroy(&A_seq);
421: }
423: if (lu->options.PrintStat) {
424: MPI_Reduce(&time0,&time_max,1,MPI_DOUBLE,MPI_MAX,0,PetscObjectComm((PetscObject)A));
425: MPI_Reduce(&time0,&time_min,1,MPI_DOUBLE,MPI_MIN,0,PetscObjectComm((PetscObject)A));
426: MPI_Reduce(&time0,&time,1,MPI_DOUBLE,MPI_SUM,0,PetscObjectComm((PetscObject)A));
427: time = time/size; /* average time */
428: PetscPrintf(PetscObjectComm((PetscObject)A), " Mat conversion(PETSc->SuperLU_DIST) time (max/min/avg): \n %g / %g / %g\n",time_max,time_min,time);
429: PStatPrint(&lu->options, &stat, &lu->grid); /* Print the statistics. */
430: }
431: PStatFree(&stat);
432: if (size > 1) {
433: F_diag = ((Mat_MPIAIJ*)(F)->data)->A;
434: F_diag->assembled = PETSC_TRUE;
435: }
436: (F)->assembled = PETSC_TRUE;
437: (F)->preallocated = PETSC_TRUE;
438: lu->options.Fact = FACTORED; /* The factored form of A is supplied. Local option used by this func. only */
439: return(0);
440: }
442: /* Note the Petsc r and c permutations are ignored */
445: PetscErrorCode MatLUFactorSymbolic_SuperLU_DIST(Mat F,Mat A,IS r,IS c,const MatFactorInfo *info)
446: {
447: Mat_SuperLU_DIST *lu = (Mat_SuperLU_DIST*)F->spptr;
448: PetscInt M = A->rmap->N,N=A->cmap->N;
451: /* Initialize the SuperLU process grid. */
452: PetscStackCall("SuperLU_DIST:superlu_gridinit",superlu_gridinit(lu->comm_superlu, lu->nprow, lu->npcol, &lu->grid));
454: /* Initialize ScalePermstruct and LUstruct. */
455: PetscStackCall("SuperLU_DIST:ScalePermstructInit",ScalePermstructInit(M, N, &lu->ScalePermstruct));
456: PetscStackCall("SuperLU_DIST:LUstructInit",LUstructInit(M, N, &lu->LUstruct));
457: F->ops->lufactornumeric = MatLUFactorNumeric_SuperLU_DIST;
458: F->ops->solve = MatSolve_SuperLU_DIST;
459: F->ops->matsolve = MatMatSolve_SuperLU_DIST;
460: lu->CleanUpSuperLU_Dist = PETSC_TRUE;
461: return(0);
462: }
466: PetscErrorCode MatFactorGetSolverPackage_aij_superlu_dist(Mat A,const MatSolverPackage *type)
467: {
469: *type = MATSOLVERSUPERLU_DIST;
470: return(0);
471: }
475: PETSC_EXTERN PetscErrorCode MatGetFactor_aij_superlu_dist(Mat A,MatFactorType ftype,Mat *F)
476: {
477: Mat B;
478: Mat_SuperLU_DIST *lu;
479: PetscErrorCode ierr;
480: PetscInt M=A->rmap->N,N=A->cmap->N,indx;
481: PetscMPIInt size;
482: superlu_options_t options;
483: PetscBool flg;
484: const char *colperm[] = {"NATURAL","MMD_AT_PLUS_A","MMD_ATA","METIS_AT_PLUS_A","PARMETIS"};
485: const char *rowperm[] = {"LargeDiag","NATURAL"};
486: const char *factPattern[] = {"SamePattern","SamePattern_SameRowPerm"};
489: /* Create the factorization matrix */
490: MatCreate(PetscObjectComm((PetscObject)A),&B);
491: MatSetSizes(B,A->rmap->n,A->cmap->n,M,N);
492: MatSetType(B,((PetscObject)A)->type_name);
493: MatSeqAIJSetPreallocation(B,0,NULL);
494: MatMPIAIJSetPreallocation(B,0,NULL,0,NULL);
496: B->ops->lufactorsymbolic = MatLUFactorSymbolic_SuperLU_DIST;
497: B->ops->view = MatView_SuperLU_DIST;
498: B->ops->destroy = MatDestroy_SuperLU_DIST;
500: PetscObjectComposeFunction((PetscObject)B,"MatFactorGetSolverPackage_C",MatFactorGetSolverPackage_aij_superlu_dist);
502: B->factortype = MAT_FACTOR_LU;
504: PetscNewLog(B,Mat_SuperLU_DIST,&lu);
505: B->spptr = lu;
507: /* Set the default input options:
508: options.Fact = DOFACT;
509: options.Equil = YES;
510: options.ParSymbFact = NO;
511: options.ColPerm = METIS_AT_PLUS_A;
512: options.RowPerm = LargeDiag;
513: options.ReplaceTinyPivot = YES;
514: options.IterRefine = DOUBLE;
515: options.Trans = NOTRANS;
516: options.SolveInitialized = NO; -hold the communication pattern used MatSolve() and MatMatSolve()
517: options.RefineInitialized = NO;
518: options.PrintStat = YES;
519: */
520: set_default_options_dist(&options);
522: MPI_Comm_dup(PetscObjectComm((PetscObject)A),&(lu->comm_superlu));
523: MPI_Comm_size(PetscObjectComm((PetscObject)A),&size);
524: /* Default num of process columns and rows */
525: PetscMPIIntCast((PetscInt)(0.5 + PetscSqrtReal((PetscReal)size)),&lu->npcol);
526: if (!lu->npcol) lu->npcol = 1;
527: while (lu->npcol > 0) {
528: PetscMPIIntCast(size/lu->npcol,&lu->nprow);
529: if (size == lu->nprow * lu->npcol) break;
530: lu->npcol--;
531: }
533: PetscOptionsBegin(PetscObjectComm((PetscObject)A),((PetscObject)A)->prefix,"SuperLU_Dist Options","Mat");
534: PetscOptionsInt("-mat_superlu_dist_r","Number rows in processor partition","None",lu->nprow,&lu->nprow,NULL);
535: PetscOptionsInt("-mat_superlu_dist_c","Number columns in processor partition","None",lu->npcol,&lu->npcol,NULL);
536: if (size != lu->nprow * lu->npcol) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Number of processes %d must equal to nprow %d * npcol %d",size,lu->nprow,lu->npcol);
538: lu->MatInputMode = DISTRIBUTED;
540: PetscOptionsEnum("-mat_superlu_dist_matinput","Matrix input mode (global or distributed)","None",SuperLU_MatInputModes,(PetscEnum)lu->MatInputMode,(PetscEnum*)&lu->MatInputMode,NULL);
541: if (lu->MatInputMode == DISTRIBUTED && size == 1) lu->MatInputMode = GLOBAL;
543: PetscOptionsBool("-mat_superlu_dist_equil","Equilibrate matrix","None",PETSC_TRUE,&flg,0);
544: if (!flg) options.Equil = NO;
546: PetscOptionsEList("-mat_superlu_dist_rowperm","Row permutation","None",rowperm,2,rowperm[0],&indx,&flg);
547: if (flg) {
548: switch (indx) {
549: case 0:
550: options.RowPerm = LargeDiag;
551: break;
552: case 1:
553: options.RowPerm = NOROWPERM;
554: break;
555: }
556: }
558: PetscOptionsEList("-mat_superlu_dist_colperm","Column permutation","None",colperm,5,colperm[3],&indx,&flg);
559: if (flg) {
560: switch (indx) {
561: case 0:
562: options.ColPerm = NATURAL;
563: break;
564: case 1:
565: options.ColPerm = MMD_AT_PLUS_A;
566: break;
567: case 2:
568: options.ColPerm = MMD_ATA;
569: break;
570: case 3:
571: options.ColPerm = METIS_AT_PLUS_A;
572: break;
573: case 4:
574: options.ColPerm = PARMETIS; /* only works for np>1 */
575: break;
576: default:
577: SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Unknown column permutation");
578: }
579: }
581: PetscOptionsBool("-mat_superlu_dist_replacetinypivot","Replace tiny pivots","None",PETSC_TRUE,&flg,0);
582: if (!flg) options.ReplaceTinyPivot = NO;
584: options.ParSymbFact = NO;
586: PetscOptionsBool("-mat_superlu_dist_parsymbfact","Parallel symbolic factorization","None",PETSC_FALSE,&flg,0);
587: if (flg) {
588: #if defined(PETSC_HAVE_PARMETIS)
589: options.ParSymbFact = YES;
590: options.ColPerm = PARMETIS; /* in v2.2, PARMETIS is forced for ParSymbFact regardless of user ordering setting */
591: #else
592: printf("parsymbfact needs PARMETIS");
593: #endif
594: }
596: lu->FactPattern = SamePattern_SameRowPerm;
598: PetscOptionsEList("-mat_superlu_dist_fact","Sparsity pattern for repeated matrix factorization","None",factPattern,2,factPattern[1],&indx,&flg);
599: if (flg) {
600: switch (indx) {
601: case 0:
602: lu->FactPattern = SamePattern;
603: break;
604: case 1:
605: lu->FactPattern = SamePattern_SameRowPerm;
606: break;
607: }
608: }
610: options.IterRefine = NOREFINE;
611: PetscOptionsBool("-mat_superlu_dist_iterrefine","Use iterative refinement","None",PETSC_FALSE,&flg,0);
612: if (flg) options.IterRefine = SLU_DOUBLE;
614: if (PetscLogPrintInfo) options.PrintStat = YES;
615: else options.PrintStat = NO;
616: PetscOptionsBool("-mat_superlu_dist_statprint","Print factorization information","None",
617: (PetscBool)options.PrintStat,(PetscBool*)&options.PrintStat,0);
618: PetscOptionsEnd();
620: lu->options = options;
621: lu->options.Fact = DOFACT;
622: lu->matsolve_iscalled = PETSC_FALSE;
623: lu->matmatsolve_iscalled = PETSC_FALSE;
625: *F = B;
626: return(0);
627: }
631: PETSC_EXTERN PetscErrorCode MatGetFactor_seqaij_superlu_dist(Mat A,MatFactorType ftype,Mat *F)
632: {
636: MatGetFactor_aij_superlu_dist(A,ftype,F);
637: return(0);
638: }
642: PETSC_EXTERN PetscErrorCode MatGetFactor_mpiaij_superlu_dist(Mat A,MatFactorType ftype,Mat *F)
643: {
647: MatGetFactor_aij_superlu_dist(A,ftype,F);
648: return(0);
649: }
653: PetscErrorCode MatFactorInfo_SuperLU_DIST(Mat A,PetscViewer viewer)
654: {
655: Mat_SuperLU_DIST *lu=(Mat_SuperLU_DIST*)A->spptr;
656: superlu_options_t options;
657: PetscErrorCode ierr;
660: /* check if matrix is superlu_dist type */
661: if (A->ops->solve != MatSolve_SuperLU_DIST) return(0);
663: options = lu->options;
664: PetscViewerASCIIPrintf(viewer,"SuperLU_DIST run parameters:\n");
665: PetscViewerASCIIPrintf(viewer," Process grid nprow %D x npcol %D \n",lu->nprow,lu->npcol);
666: PetscViewerASCIIPrintf(viewer," Equilibrate matrix %s \n",PetscBools[options.Equil != NO]);
667: PetscViewerASCIIPrintf(viewer," Matrix input mode %d \n",lu->MatInputMode);
668: PetscViewerASCIIPrintf(viewer," Replace tiny pivots %s \n",PetscBools[options.ReplaceTinyPivot != NO]);
669: PetscViewerASCIIPrintf(viewer," Use iterative refinement %s \n",PetscBools[options.IterRefine == SLU_DOUBLE]);
670: PetscViewerASCIIPrintf(viewer," Processors in row %d col partition %d \n",lu->nprow,lu->npcol);
671: PetscViewerASCIIPrintf(viewer," Row permutation %s \n",(options.RowPerm == NOROWPERM) ? "NATURAL" : "LargeDiag");
673: switch (options.ColPerm) {
674: case NATURAL:
675: PetscViewerASCIIPrintf(viewer," Column permutation NATURAL\n");
676: break;
677: case MMD_AT_PLUS_A:
678: PetscViewerASCIIPrintf(viewer," Column permutation MMD_AT_PLUS_A\n");
679: break;
680: case MMD_ATA:
681: PetscViewerASCIIPrintf(viewer," Column permutation MMD_ATA\n");
682: break;
683: case METIS_AT_PLUS_A:
684: PetscViewerASCIIPrintf(viewer," Column permutation METIS_AT_PLUS_A\n");
685: break;
686: case PARMETIS:
687: PetscViewerASCIIPrintf(viewer," Column permutation PARMETIS\n");
688: break;
689: default:
690: SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Unknown column permutation");
691: }
693: PetscViewerASCIIPrintf(viewer," Parallel symbolic factorization %s \n",PetscBools[options.ParSymbFact != NO]);
695: if (lu->FactPattern == SamePattern) {
696: PetscViewerASCIIPrintf(viewer," Repeated factorization SamePattern\n");
697: } else {
698: PetscViewerASCIIPrintf(viewer," Repeated factorization SamePattern_SameRowPerm\n");
699: }
700: return(0);
701: }
705: PetscErrorCode MatView_SuperLU_DIST(Mat A,PetscViewer viewer)
706: {
707: PetscErrorCode ierr;
708: PetscBool iascii;
709: PetscViewerFormat format;
712: PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);
713: if (iascii) {
714: PetscViewerGetFormat(viewer,&format);
715: if (format == PETSC_VIEWER_ASCII_INFO) {
716: MatFactorInfo_SuperLU_DIST(A,viewer);
717: }
718: }
719: return(0);
720: }
723: /*MC
724: MATSOLVERSUPERLU_DIST - Parallel direct solver package for LU factorization
726: Works with AIJ matrices
728: Options Database Keys:
729: + -mat_superlu_dist_r <n> - number of rows in processor partition
730: . -mat_superlu_dist_c <n> - number of columns in processor partition
731: . -mat_superlu_dist_matinput <0,1> - matrix input mode; 0=global, 1=distributed
732: . -mat_superlu_dist_equil - equilibrate the matrix
733: . -mat_superlu_dist_rowperm <LargeDiag,NATURAL> - row permutation
734: . -mat_superlu_dist_colperm <MMD_AT_PLUS_A,MMD_ATA,NATURAL> - column permutation
735: . -mat_superlu_dist_replacetinypivot - replace tiny pivots
736: . -mat_superlu_dist_fact <SamePattern> - (choose one of) SamePattern SamePattern_SameRowPerm
737: . -mat_superlu_dist_iterrefine - use iterative refinement
738: - -mat_superlu_dist_statprint - print factorization information
740: Level: beginner
742: .seealso: PCLU
744: .seealso: PCFactorSetMatSolverPackage(), MatSolverPackage
746: M*/