1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124
|
/*! @file ssnode_bmod.c
* \brief Performs numeric block updates within the relaxed snode.
*
* <pre>
* -- SuperLU routine (version 3.0) --
* Univ. of California Berkeley, Xerox Palo Alto Research Center,
* and Lawrence Berkeley National Lab.
* October 15, 2003
*
* Copyright (c) 1994 by Xerox Corporation. All rights reserved.
*
* THIS MATERIAL IS PROVIDED AS IS, WITH ABSOLUTELY NO WARRANTY
* EXPRESSED OR IMPLIED. ANY USE IS AT YOUR OWN RISK.
*
* Permission is hereby granted to use or copy this program for any
* purpose, provided the above notices are retained on all copies.
* Permission to modify the code and to distribute modified code is
* granted, provided the above notices are retained, and a notice that
* the code was modified is included with the above copyright notice.
* </pre>
*/
#include "slu_sdefs.h"
/*! \brief Performs numeric block updates within the relaxed snode.
*/
int
ssnode_bmod (
const int jcol, /* in */
const int jsupno, /* in */
const int fsupc, /* in */
float *dense, /* in */
float *tempv, /* working array */
GlobalLU_t *Glu, /* modified */
SuperLUStat_t *stat /* output */
)
{
#ifdef USE_VENDOR_BLAS
#ifdef _CRAY
_fcd ftcs1 = _cptofcd("L", strlen("L")),
ftcs2 = _cptofcd("N", strlen("N")),
ftcs3 = _cptofcd("U", strlen("U"));
#endif
int incx = 1, incy = 1;
float alpha = -1.0, beta = 1.0;
#endif
int luptr, nsupc, nsupr, nrow;
int isub, irow, i, iptr;
register int ufirst, nextlu;
int *lsub, *xlsub;
float *lusup;
int *xlusup;
flops_t *ops = stat->ops;
lsub = Glu->lsub;
xlsub = Glu->xlsub;
lusup = (float *) Glu->lusup;
xlusup = Glu->xlusup;
nextlu = xlusup[jcol];
/*
* Process the supernodal portion of L\U[*,j]
*/
for (isub = xlsub[fsupc]; isub < xlsub[fsupc+1]; isub++) {
irow = lsub[isub];
lusup[nextlu] = dense[irow];
dense[irow] = 0;
++nextlu;
}
xlusup[jcol + 1] = nextlu; /* Initialize xlusup for next column */
if ( fsupc < jcol ) {
luptr = xlusup[fsupc];
nsupr = xlsub[fsupc+1] - xlsub[fsupc];
nsupc = jcol - fsupc; /* Excluding jcol */
ufirst = xlusup[jcol]; /* Points to the beginning of column
jcol in supernode L\U(jsupno). */
nrow = nsupr - nsupc;
ops[TRSV] += nsupc * (nsupc - 1);
ops[GEMV] += 2 * nrow * nsupc;
#ifdef USE_VENDOR_BLAS
#ifdef _CRAY
STRSV( ftcs1, ftcs2, ftcs3, &nsupc, &lusup[luptr], &nsupr,
&lusup[ufirst], &incx );
SGEMV( ftcs2, &nrow, &nsupc, &alpha, &lusup[luptr+nsupc], &nsupr,
&lusup[ufirst], &incx, &beta, &lusup[ufirst+nsupc], &incy );
#else
#if SCIPY_FIX
if (nsupr < nsupc) {
/* Fail early rather than passing in invalid parameters to TRSV. */
ABORT("failed to factorize matrix");
}
#endif
strsv_( "L", "N", "U", &nsupc, &lusup[luptr], &nsupr,
&lusup[ufirst], &incx );
sgemv_( "N", &nrow, &nsupc, &alpha, &lusup[luptr+nsupc], &nsupr,
&lusup[ufirst], &incx, &beta, &lusup[ufirst+nsupc], &incy );
#endif
#else
slsolve ( nsupr, nsupc, &lusup[luptr], &lusup[ufirst] );
smatvec ( nsupr, nrow, nsupc, &lusup[luptr+nsupc],
&lusup[ufirst], &tempv[0] );
/* Scatter tempv[*] into lusup[*] */
iptr = ufirst + nsupc;
for (i = 0; i < nrow; i++) {
lusup[iptr++] -= tempv[i];
tempv[i] = 0.0;
}
#endif
}
return 0;
}
|