1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66
|
/*
Copyright (C) 2014, The University of Texas at Austin
This file is part of libflame and is available under the 3-Clause
BSD license, which can be found in the LICENSE file at the top-level
directory, or at http://opensource.org/licenses/BSD-3-Clause
*/
#include "FLAME.h"
#include "Gemm_prototypes.h"
int Gemm_blk_var3( FLA_Obj A, FLA_Obj B, FLA_Obj C, int nb_alg )
{
FLA_Obj AT, A0,
AB, A1,
A2;
FLA_Obj CT, C0,
CB, C1,
C2;
dim_t b;
FLA_Part_2x1( A, &AT,
&AB, 0, FLA_TOP );
FLA_Part_2x1( C, &CT,
&CB, 0, FLA_TOP );
while ( FLA_Obj_length( AT ) < FLA_Obj_length( A ) ){
b = min( FLA_Obj_length( AB ), nb_alg );
FLA_Repart_2x1_to_3x1( AT, &A0,
/* ** */ /* ** */
&A1,
AB, &A2, b, FLA_BOTTOM );
FLA_Repart_2x1_to_3x1( CT, &C0,
/* ** */ /* ** */
&C1,
CB, &C2, b, FLA_BOTTOM );
/*------------------------------------------------------------*/
FLA_Gemm( FLA_NO_TRANSPOSE, FLA_NO_TRANSPOSE,
FLA_ONE, A1, B, FLA_ONE, C1 );
/*------------------------------------------------------------*/
FLA_Cont_with_3x1_to_2x1( &AT, A0,
A1,
/* ** */ /* ** */
&AB, A2, FLA_TOP );
FLA_Cont_with_3x1_to_2x1( &CT, C0,
C1,
/* ** */ /* ** */
&CB, C2, FLA_TOP );
}
return FLA_SUCCESS;
}
|