1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153
|
/* @cond INNERDOC */
/*!
* @file
* @author Michele Martone
* @brief
* This source file contains some CSR sparse matrices multiplication code.
* */
include(`rsb_misc.m4')dnl
include(`do_unroll.m4')dnl
RSB_M4_HEADER_MESSAGE()dnl
dnl
ifdef(`ONLY_WANT_HEADERS',`
#ifndef RSB_SPGEMM_COO_H_INCLUDED
#define RSB_SPGEMM_COO_H_INCLUDED
#include "rsb_internals.h"
',`dnl
#include "rsb_internals.h"
')
dnl
rsb_err_t rsb__do_util_csr_csr_sparse_mul_serial(rsb_nnz_idx_t * PA, rsb_coo_idx_t * JA, void *VA_, const rsb_nnz_idx_t *ARP, const rsb_nnz_idx_t *BRP, const rsb_coo_idx_t *AJA, const rsb_coo_idx_t *BJA, const void * aVA_, const void * bVA_, const rsb_coo_idx_t cm, const rsb_coo_idx_t ck, rsb_nnz_idx_t * p, void * acc_, rsb_nnz_idx_t * opsp , rsb_type_t typecode, const rsb_coo_idx_t afr, const rsb_coo_idx_t ars)
ifdef(`ONLY_WANT_HEADERS',`;',`dnl
{
rsb_nnz_idx_t cblocks=0;
rsb_nnz_idx_t ops=0;
rsb_coo_idx_t ai,aj;
rsb_coo_idx_t al,bl,cl;
rsb_coo_idx_t bj;
//rsb_coo_idx_t bi;
foreach(`mtype',RSB_M4_TYPES,`dnl
`#ifdef 'RSB_M4_NUMERICAL_TYPE_PREPROCESSOR_SYMBOL(mtype)
if( typecode == RSB_M4_NUMERICAL_TYPE_PREPROCESSOR_SYMBOL(mtype) )
dnl
{
mtype *VA=VA_,*acc=acc_;
const mtype * aVA=aVA_,*bVA=bVA_; ;
dnl for(ai=0;ai<cm;++ai)
for(ai=afr;ai<cm;ai+=ars)
{
rsb_nnz_idx_t aro;
rsb_nnz_idx_t are;
//rsb_nnz_idx_t arb;
rsb_nnz_idx_t marker;
//assert(cblocks==PA[ai]); // this is true on the serial execution of this loop
cblocks=PA[ai]; // this shall work even in a parallel execution of this loop (with differing acc/p arrays)
marker=cblocks+1;
aro=ARP[ai];
are=ARP[ai+1];
//arb=ARP[ai+1]-ARP[ai];
/* we start row ai of target matrix C */
for(al=aro;al<are;++al)
{
rsb_nnz_idx_t bro=BRP[aj=AJA[al]];
rsb_nnz_idx_t bre=BRP[aj+1];
/* rsb_nnz_idx_t bcb=BRP[aj+1] - BRP[aj];*/
for(bl=bro;bl<bre;++bl)
{
//bi=aj;
bj=BJA[bl];
if(p[bj]<marker)
p[bj]=marker,
(JA)[cblocks++]=bj,
acc[bj] =aVA[al]*bVA[bl];
else
acc[bj]+=aVA[al]*bVA[bl];
}
/*#if RSB_WANT_SPGEMM_MFLOPS*/
ops+=(bre-bro);
/*#endif*/
}
for(cl=(PA)[ai];cl<(PA)[ai+1];++cl)
{
((mtype*)(VA))[cl]=acc[(JA)[cl]]; /* FIXME */
}
}
if(opsp)*opsp=ops;
}
else
#endif /* RSB_M4_NUMERICAL_TYPE_PREPROCESSOR_SYMBOL(mtype) */
')dnl
return RSB_ERR_UNSUPPORTED_TYPE ;
return RSB_ERR_NO_ERROR;
}
')dnl
dnl
rsb_err_t rsb__do_util_csr_csr_dense_mul_serial(rsb_coo_idx_t ldc, rsb_coo_idx_t nr, rsb_coo_idx_t nc, rsb_bool_t isccolmajor, void *cVA_, const rsb_nnz_idx_t *ARP, const rsb_nnz_idx_t *BRP, const rsb_coo_idx_t *AJA, const rsb_coo_idx_t *BJA, const void * aVA_, const void * bVA_, const rsb_coo_idx_t cm, const rsb_coo_idx_t ck, rsb_nnz_idx_t * opsp , rsb_type_t typecode, const rsb_coo_idx_t afr, const rsb_coo_idx_t ars)
ifdef(`ONLY_WANT_HEADERS',`;',`dnl
{
rsb_nnz_idx_t ops=0;
rsb_coo_idx_t ai,aj;
rsb_coo_idx_t al,bl;
//rsb_coo_idx_t bi;
rsb_coo_idx_t bj;
foreach(`mtype',RSB_M4_TYPES,`dnl
`#ifdef 'RSB_M4_NUMERICAL_TYPE_PREPROCESSOR_SYMBOL(mtype)
if( typecode == RSB_M4_NUMERICAL_TYPE_PREPROCESSOR_SYMBOL(mtype) )
dnl
{
mtype *cVA=cVA_;
const mtype * aVA=aVA_,*bVA=bVA_; ;
dnl for(ai=0;ai<cm;++ai)
for(ai=afr;ai<cm;ai+=ars)
{
rsb_nnz_idx_t aro;
rsb_nnz_idx_t are;
//rsb_nnz_idx_t arb;
aro=ARP[ai];
are=ARP[ai+1];
//arb=ARP[ai+1]-ARP[ai];
/* we start row ai of target matrix C */
for(al=aro;al<are;++al)
{
rsb_nnz_idx_t bro=BRP[aj=AJA[al]];
rsb_nnz_idx_t bre=BRP[aj+1];
/* rsb_nnz_idx_t bcb=BRP[aj+1] - BRP[aj];*/
for(bl=bro;bl<bre;++bl)
{
//bi=aj;
bj=BJA[bl];
dnl *(mtype*)(RSB_BLOCK_ROWMAJOR_ADDRESS(cVA,ldc,nr,nc,ai,bj,(sizeof(mtype))))+=aVA[al]*bVA[bl];
RSB_BLOCK_X_MAJOR_REFERENCE(cVA,ldc,ai,bj,isccolmajor)+=aVA[al]*bVA[bl];
}
/*#if RSB_WANT_SPGEMM_MFLOPS*/
ops+=(bre-bro);
/*#endif*/
}
}
if(opsp)*opsp=ops;
}
else
#endif /* RSB_M4_NUMERICAL_TYPE_PREPROCESSOR_SYMBOL(mtype) */
')dnl
return RSB_ERR_UNSUPPORTED_TYPE ;
return RSB_ERR_NO_ERROR;
}
')dnl
dnl
dnl
ifdef(`ONLY_WANT_HEADERS',`dnl
#endif /* RSB_SPGEMM_COO_H_INCLUDED */
')dnl
dnl
/* @endcond */
|