File: main.cpp

package info (click to toggle)
ispc 1.28.2-1
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid
  • size: 97,620 kB
  • sloc: cpp: 77,067; python: 8,303; yacc: 3,337; lex: 1,126; ansic: 631; sh: 475; makefile: 17
file content (123 lines) | stat: -rw-r--r-- 3,147 bytes parent folder | download | duplicates (2)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
/*
  Copyright (c) 2021-2023, Intel Corporation
*/

#include <algorithm>
#include <cmath>
#include <iomanip>
#include <iostream>

// ispcrt
#include "ispcrt.hpp"

std::ostream &operator<<(std::ostream &os, const ISPCRTDeviceType dt) {
    switch (dt) {
    case ISPCRT_DEVICE_TYPE_AUTO:
        os << "Auto";
        break;
    case ISPCRT_DEVICE_TYPE_GPU:
        os << "GPU";
        break;
    case ISPCRT_DEVICE_TYPE_CPU:
        os << "CPU";
        break;
    default:
        break;
    }
    return os;
}

struct Parameters {
    uint8_t *objects;
    float *output;
};

static int run(const ISPCRTDeviceType device_type) {
    ispcrt::Device device(device_type);

    // 32 bytes for 2 objects, each is 16
    std::vector<uint8_t> objects(2 * 16);

    std::vector<float> output(2);
    output[0] = 0;
    output[1] = 0;

    ispcrt::Array<uint8_t> objects_dev(device, objects);
    ispcrt::Array<float> output_dev(device, output);

    // Setup parameters structure
    Parameters p;

    p.objects = objects_dev.devicePtr();
    p.output = output_dev.devicePtr();

    auto p_dev = ispcrt::Array<Parameters>(device, p);

    // Create module and kernel to execute
    ispcrt::Module module(device, "callback-esimd_ispc2esimd");
    ispcrt::Kernel make_objects_kernel(device, module, "make_objects");
    ispcrt::Kernel call_objects_kernel(device, module, "call_objects");

    // Create task queue and execute kernel
    ispcrt::TaskQueue queue(device);

    // ispcrt::Array objects which used as inputs for ISPC kernel should be
    // explicitly copied to device from host
    queue.copyToDevice(p_dev);

    queue.launch(make_objects_kernel, p_dev, 1);
    queue.barrier();

    // Launch the kernel on the device using 1 thread
    queue.launch(call_objects_kernel, p_dev, 1);

    // ispcrt::Array objects which used as outputs of ISPC kernel should be
    // explicitly copied to host from device
    queue.copyToHost(output_dev);

    // Execute queue and sync
    queue.sync();

    std::cout << "Executed on: " << device_type << '\n' << std::setprecision(6) << std::fixed;

    // Check and print result
    bool bValid = output[0] == -1 && output[1] == -2;
    if (bValid) {
        std::cout << "Function was called successfully, output:\n";
        for (size_t i = 0; i < output.size(); ++i) {
            std::cout << "output[" << i << "] = " << output[i] << "\n";
        }
        return 0;
    }
    return -1;
}

void usage(const char *p) {
    std::cout << "Usage:\n";
    std::cout << p << " --cpu | --gpu | -h\n";
}

int main(int argc, char *argv[]) {
    ISPCRTDeviceType device_type = ISPCRT_DEVICE_TYPE_AUTO;

    if (argc > 2 || (argc == 2 && std::string(argv[1]) == "-h")) {
        usage(argv[0]);
        return -1;
    }

    if (argc == 2) {
        std::string dev_param = argv[1];
        if (dev_param == "--cpu") {
            device_type = ISPCRT_DEVICE_TYPE_CPU;
        } else if (dev_param == "--gpu") {
            device_type = ISPCRT_DEVICE_TYPE_GPU;
        } else {
            usage(argv[0]);
            return -1;
        }
    }

    int success = run(device_type);
    return success;
}