File: perfbench_serial.cpp

package info (click to toggle)
ispc 1.28.2-1
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid
  • size: 97,620 kB
  • sloc: cpp: 77,067; python: 8,303; yacc: 3,337; lex: 1,126; ansic: 631; sh: 475; makefile: 17
file content (42 lines) | stat: -rw-r--r-- 920 bytes parent folder | download | duplicates (2)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
/*
  Copyright (c) 2012-2023, Intel Corporation

  SPDX-License-Identifier: BSD-3-Clause
*/

#include <math.h>

#if defined(_WIN32) || defined(_WIN64)
#define WINDOWS
#endif

#ifdef WINDOWS
#define CALLINGCONV /*__vectorcall*/
#else
#define CALLINGCONV
#endif

void CALLINGCONV xyzSumAOS(float *a, int count, float *zeros, float *result) {
    float xsum = 0, ysum = 0, zsum = 0;
    for (int i = 0; i < count; i += 3) {
        xsum += a[i];
        ysum += a[i + 1];
        zsum += a[i + 2];
    }
    result[0] = xsum;
    result[1] = ysum;
    result[2] = zsum;
}

void CALLINGCONV xyzSumSOA(float *a, int count, float *zeros, float *result) {
    float xsum = 0, ysum = 0, zsum = 0;
    for (int i = 0; i < count / 3; ++i) {
        float *p = a + (i >> 3) * 24 + (i & 7);
        xsum += p[0];
        ysum += p[8];
        zsum += p[16];
    }
    result[0] = xsum;
    result[1] = ysum;
    result[2] = zsum;
}