File: test114.m

package info (click to toggle)
suitesparse-graphblas 7.4.0%2Bdfsg-2
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid, trixie
  • size: 67,112 kB
  • sloc: ansic: 1,072,243; cpp: 8,081; sh: 512; makefile: 506; asm: 369; python: 125; awk: 10
file content (141 lines) | stat: -rw-r--r-- 3,704 bytes parent folder | download | duplicates (3)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
function test114
%TEST114 performance of reduce-to-scalar

% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2022, All Rights Reserved.
% SPDX-License-Identifier: Apache-2.0

rng ('default') ;

[save save_chunk] = nthreads_get ;
chunk = 4096 ;
nthreads_list = [1 2 4 8 16 2 40 64 160] ;
nthreads_max = GB_mex_omp_max_threads ;
ntrials = 10 ;

%-------------------------------------------------------------------------------
% big matrix ...
fprintf ('\nbig matrix, no early exit\n') ;
n = 8000 ;
A = sparse (ones (n)) ;

tic
for trial = 1:ntrials
    s = full (max (max (A))) ;
end
tm = toc ;
fprintf ('builtin max: %g\n', tm) ;

tic
for trial = 1:ntrials
    s = full (min (min (A))) ;
end
tm = toc ;
fprintf ('builtin min: %g\n', tm) ;

tic
for trial = 1:ntrials
    s = full (sum (sum (A))) ;
end
tm = toc ;
fprintf ('builtin sum: %g\n', tm) ;

tic
for trial = 1:ntrials
    s = full (prod (prod (A))) ;
end
tm = toc ;
fprintf ('builtin prod: %g\n', tm) ;

S.matrix = A ;
S.pattern = logical (spones (A)) ;

[~, ~, add_ops, types, ~, ~] = GB_spec_opsall ;
types = types.all ;

ops = { 'or', 'and', 'xor', 'eq', 'any' } ;
for k1 = 1:length(ops)
    op = ops {k1} ;
    fprintf ('\nGraphBLAS: op %s\n', op) ;
    S.class = 'logical' ;
    cin = logical (0) ;
    for nthreads = nthreads_list
        if (nthreads > nthreads_max)
            break ;
        end
        nthreads_set (nthreads,chunk) ;
        t = 0 ;
        tic
        for trial = 1:ntrials
            c1 = GB_mex_reduce_to_scalar (cin, [ ], op, S) ;
        end
        t = toc ;
        if (nthreads == 1)
            t1 = t ;
        end
        fprintf ('nthreads %3d %12.4f  speedup %12.4f\n', ...
            nthreads, t, t1/t) ;
    end
end

ops = add_ops ;
for k1 = 1:length(ops)
    op = ops {k1} ;
    fprintf ('\nGraphBLAS: op %s\n', op) ;
    for k2 = 2:length(types)
        atype = types {k2} ;
        S.class = atype ;
        fprintf ('\ntype: %s\n', atype) ;
        try
            GB_spec_operator (op, atype) ;
        catch
            continue
        end
        switch atype
            case 'logical'
                cin = logical (0) ;
            case 'int8'          % GrB_INT8
                cin = int8 (0) ;
            case 'uint8'         % GrB_UINT8
                cin = uint8 (0) ;
            case 'int16'         % GrB_INT16
                cin = int16 (0) ;
            case 'uint16'        % GrB_UINT16
                cin = uint16 (0) ;
            case 'int32'         % GrB_INT32
                cin = int32 (0) ;
            case 'uint32'        % GrB_UINT32
                cin = uint32 (0) ;
            case 'int64'         % GrB_INT64
                cin = int64 (0) ;
            case 'uint64'        % GrB_UINT64
                cin = uint64 (0) ;
            case 'single'        % GrB_FP32
                cin = single (0) ;
            case 'double'        % GrB_FP64
                cin = double (0) ;
            case 'single complex'        % GxB_FC32
                cin = complex (single (0)) ;
            case 'double complex'        % GxB_FC64
                cin = complex (double (0)) ;
        end
        for nthreads = nthreads_list
            if (nthreads > nthreads_max)
                break ;
            end
            nthreads_set (nthreads,chunk) ;
            t = 0 ;
            tic
            for trial = 1:ntrials
                c1 = GB_mex_reduce_to_scalar (cin, [ ], op, S) ;
            end
            t = toc ;
            if (nthreads == 1)
                t1 = t ;
            end
            fprintf ('nthreads %3d %12.4f  speedup %12.4f\n', ...
                nthreads, t, t1/t) ;
        end
    end
end

nthreads_set (save, save_chunk) ;