File: dcompute_host_and_device.d

package info (click to toggle)
ldc 1%3A1.40.0-5
  • links: PTS, VCS
  • area: main
  • in suites: sid, trixie
  • size: 63,308 kB
  • sloc: cpp: 85,368; ansic: 21,877; makefile: 1,705; sh: 1,018; asm: 584; objc: 135; exp: 48; python: 12
file content (68 lines) | stat: -rw-r--r-- 2,362 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
// Check that we can generate code for both the host and device in one compiler invocation
// REQUIRES: target_NVPTX
// RUN: %ldc -c -mdcompute-targets=cuda-350 -m64 -output-ll -mdcompute-file-prefix=host_and_device -Iinputs -output-o %s %S/inputs/kernel.d
// RUN: FileCheck %s --check-prefix=PTX < host_and_device_cuda350_64.ptx
// RUN: FileCheck %s --check-prefix=LL < dcompute_host_and_device.ll

import inputs.kernel : k_foo;

import ldc.dcompute;

int tlGlobal;
__gshared int gGlobal;

void main(string[] args)
{
    tlGlobal = 0;
    gGlobal  = 0;
    string s = foo.mangleof;
    string k_s = k_foo.mangleof;

    GlobalPointer!float global_x;
    foo(global_x);
}

void foo(GlobalPointer!float x_in) {
    // LL-LABEL: foo
    SharedPointer!float shared_x;
	PrivatePointer!float private_x;
	ConstantPointer!float const_x;

    // LL: [[s_load_reg:%[0-9]*]] = load ptr, ptr {{%[0-9]*}}
    // LL: [[s_addr_reg:%[0-9]*]] = load ptr, ptr {{%[0-9]*}}
    // LL: [[s_store_reg:%[0-9]*]] = load float, ptr [[s_addr_reg]]
    // LL: store float [[s_store_reg]], ptr [[s_load_reg]]
	*shared_x = *x_in;
  
    // LL: [[p_load_reg:%[0-9]*]] = load ptr, ptr {{%[0-9]*}}
    // LL: [[p_addr_reg:%[0-9]*]] = load ptr, ptr {{%[0-9]*}}
    // LL: [[p_store_reg:%[0-9]*]] = load float, ptr [[p_addr_reg]]
    // LL: store float [[p_store_reg]], ptr [[p_load_reg]]
	*private_x = *x_in;
  
    // LL: [[c_load_reg:%[0-9]*]] = load ptr, ptr {{%[0-9]*}}
    // LL: [[c_addr_reg:%[0-9]*]] = load ptr, ptr {{%[0-9]*}}
    // LL: [[c_store_reg:%[0-9]*]] = load float, ptr [[c_addr_reg]]
    // LL: store float [[c_store_reg]], ptr [[c_load_reg]]
	*x_in = *const_x;

    // LL: [[g1_load_reg:%[0-9]*]] = load ptr, ptr {{%[0-9]*}}
    // LL: [[g1_addr_reg:%[0-9]*]] = load ptr, ptr {{%[0-9]*}}
    // LL: [[g1_store_reg:%[0-9]*]] = load float, ptr [[g1_addr_reg]]
    // LL: store float [[g1_store_reg]], ptr [[g1_load_reg]]
    *x_in = *shared_x;

    // LL: [[g2_load_reg:%[0-9]*]] = load ptr, ptr {{%[0-9]*}}
    // LL: [[g2_addr_reg:%[0-9]*]] = load ptr, ptr {{%[0-9]*}}
    // LL: [[g2_store_reg:%[0-9]*]] = load float, ptr [[g2_addr_reg]]
    // LL: store float [[g2_store_reg]], ptr [[g2_load_reg]]
	*x_in = *private_x;
}

// PTX-LABEL: k_foo
// PTX: ld.global.f32
// PTX: st.shared.f32
// PTX: st.local.f32
// PTX: ld.const.f32
// PTX: ld.shared.f32
// PTX: ld.local.f32