File: sfmult_xA.c

package info (click to toggle)
suitesparse 1%3A7.10.1%2Bdfsg-1
  • links: PTS, VCS
  • area: main
  • in suites: trixie
  • size: 254,920 kB
  • sloc: ansic: 1,134,743; cpp: 46,133; makefile: 4,875; fortran: 2,087; java: 1,826; sh: 996; ruby: 725; python: 495; asm: 371; sed: 166; awk: 44
file content (73 lines) | stat: -rw-r--r-- 1,938 bytes parent folder | download | duplicates (2)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
//==============================================================================
// sfmult_xA: Y = X*A, unblocked
//==============================================================================

// SFMULT, Copyright (c) 2009, Timothy A Davis. All Rights Reserved.
// SPDX-License-Identifier: BSD-3-clause

// This kernel is unique; it operates on all of X and Y, not just a few rows or
// columns.  It is used only when A is very sparse and X is large since it can
// be very slow otherwise.

#include "sfmult.h"

void sfmult_xA		// y = (A'*x')' = x*A,	x is k-by-m, and y is k-by-n
(
    // --- outputs, not initialized on input
    double *Yx,		// k-by-n
    double *Yz,		// k-by-n if Y is complex (TO DO)

    // --- inputs, not modified
    const Int *Ap,	// size n+1 column pointers
    const Int *Ai,	// size nz = Ap[n] row indices
    const double *Ax,	// size nz values
    const double *Az,	// size nz imaginary values if A is complex (TO DO)
    Int m,		// A is m-by-n
    Int n,
    const double *Xx,	// k-by-m
    const double *Xz,	// k-by-m if X complex (TO DO)
    int ac,		// true: use conj(A), otherwise use A (TO DO)
    int xc,		// true: use conj(X), otherwise use X (TO DO)
    int yc		// true: compute conj(Y), otherwise compute Y (TO DO)
    , Int k
)
{
    double a ;
    const double *xx, *xz ;
    Int p, pend, j, i, k1 ;

    p = 0 ;
    for (j = 0 ; j < n ; j++)
    {
	pend = Ap [j+1] ;
	for (k1 = 0 ; k1 < k ; k1++)
	{
	    Yx [k1] = 0 ;
	}
	for ( ; p < pend ; p++)
	{
	    i = Ai [p] ;
	    a = Ax [p] ;
	    xx = Xx + i*k ;
	    xz = Xz + i*k ;
	    k1 = k % 4 ;
	    switch (k1)
	    {
		case 3: Yx [2] += a * xx [2] ;
		case 2: Yx [1] += a * xx [1] ;
		case 1: Yx [0] += a * xx [0] ;
		case 0: ;
	    }
	    for ( ; k1 < k ; k1 += 4)
	    {
		Yx [k1  ] += a * xx [k1  ] ;
		Yx [k1+1] += a * xx [k1+1] ;
		Yx [k1+2] += a * xx [k1+2] ;
		Yx [k1+3] += a * xx [k1+3] ;
	    }
	}
	Yx += k ;
	Yz += k ;
    }
}