File: swap-large-types.rs

package info (click to toggle)
rustc 1.88.0%2Bdfsg1-1
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid
  • size: 934,128 kB
  • sloc: xml: 158,127; python: 36,062; javascript: 19,855; sh: 19,700; cpp: 18,947; ansic: 12,993; asm: 4,792; makefile: 690; lisp: 29; perl: 29; ruby: 19; sql: 11
file content (116 lines) | stat: -rw-r--r-- 3,409 bytes parent folder | download | duplicates (7)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
//@ compile-flags: -Copt-level=3
//@ only-x86_64

#![crate_type = "lib"]

use std::mem::swap;
use std::ptr::{copy_nonoverlapping, read, write};

type KeccakBuffer = [[u64; 5]; 5];

// A basic read+copy+write swap implementation ends up copying one of the values
// to stack for large types, which is completely unnecessary as the lack of
// overlap means we can just do whatever fits in registers at a time.

// The tests here (after the first one showing that the problem still exists)
// are less about testing *exactly* what the codegen is, and more about testing
// 1) That things are swapped directly from one argument to the other,
//    never going through stack along the way, and
// 2) That we're doing the swapping for big things using large vector types,
//    rather then `i64` or `<8 x i8>` (or, even worse, `i8`) at a time.
//
// (There are separate tests for intrinsics::typed_swap_nonoverlapping that
//  check that it, as an intrinsic, are emitting exactly what it should.)

// CHECK-LABEL: @swap_basic
#[no_mangle]
pub fn swap_basic(x: &mut KeccakBuffer, y: &mut KeccakBuffer) {
    // CHECK: alloca [200 x i8]

    // SAFETY: exclusive references are always valid to read/write,
    // are non-overlapping, and nothing here panics so it's drop-safe.
    unsafe {
        let z = read(x);
        copy_nonoverlapping(y, x, 1);
        write(y, z);
    }
}

// CHECK-LABEL: @swap_std
#[no_mangle]
pub fn swap_std(x: &mut KeccakBuffer, y: &mut KeccakBuffer) {
    // CHECK-NOT: alloca
    // CHECK: load <{{2|4}} x i64>
    // CHECK: store <{{2|4}} x i64>
    swap(x, y)
}

// CHECK-LABEL: @swap_slice
#[no_mangle]
pub fn swap_slice(x: &mut [KeccakBuffer], y: &mut [KeccakBuffer]) {
    // CHECK-NOT: alloca
    // CHECK: load <{{2|4}} x i64>
    // CHECK: store <{{2|4}} x i64>
    if x.len() == y.len() {
        x.swap_with_slice(y);
    }
}

type OneKilobyteBuffer = [u8; 1024];

// CHECK-LABEL: @swap_1kb_slices
#[no_mangle]
pub fn swap_1kb_slices(x: &mut [OneKilobyteBuffer], y: &mut [OneKilobyteBuffer]) {
    // CHECK-NOT: alloca

    // CHECK-NOT: load i32
    // CHECK-NOT: store i32
    // CHECK-NOT: load i16
    // CHECK-NOT: store i16
    // CHECK-NOT: load i8
    // CHECK-NOT: store i8

    // CHECK: load <{{2|4}} x i64>{{.+}}align 1,
    // CHECK: store <{{2|4}} x i64>{{.+}}align 1,

    // CHECK-NOT: load i32
    // CHECK-NOT: store i32
    // CHECK-NOT: load i16
    // CHECK-NOT: store i16
    // CHECK-NOT: load i8
    // CHECK-NOT: store i8

    if x.len() == y.len() {
        x.swap_with_slice(y);
    }
}

#[repr(align(64))]
pub struct BigButHighlyAligned([u8; 64 * 3]);

// CHECK-LABEL: @swap_big_aligned
#[no_mangle]
pub fn swap_big_aligned(x: &mut BigButHighlyAligned, y: &mut BigButHighlyAligned) {
    // CHECK-NOT: call void @llvm.memcpy
    // CHECK-NOT: load i32
    // CHECK-NOT: store i32
    // CHECK-NOT: load i16
    // CHECK-NOT: store i16
    // CHECK-NOT: load i8
    // CHECK-NOT: store i8

    // CHECK-COUNT-2: load <{{2|4}} x i64>{{.+}}align 64,
    // CHECK-COUNT-2: store <{{2|4}} x i64>{{.+}}align 64,

    // CHECK-COUNT-2: load <{{2|4}} x i64>{{.+}}align 32,
    // CHECK-COUNT-2: store <{{2|4}} x i64>{{.+}}align 32,

    // CHECK-NOT: load i32
    // CHECK-NOT: store i32
    // CHECK-NOT: load i16
    // CHECK-NOT: store i16
    // CHECK-NOT: load i8
    // CHECK-NOT: store i8
    // CHECK-NOT: call void @llvm.memcpy
    swap(x, y)
}