File: main.c

package info (click to toggle)
aws-crt-python 0.28.4%2Bdfsg-1
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid
  • size: 78,428 kB
  • sloc: ansic: 437,955; python: 27,657; makefile: 5,855; sh: 4,289; ruby: 208; java: 82; perl: 73; cpp: 25; xml: 11
file content (151 lines) | stat: -rw-r--r-- 6,622 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
/**
 * Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
 * SPDX-License-Identifier: Apache-2.0.
 */

#include <aws/checksums/crc.h>
#include <aws/checksums/private/crc64_priv.h>
#include <aws/checksums/private/crc32_priv.h>

#include <aws/common/allocator.h>
#include <aws/common/byte_buf.h>
#include <aws/common/clock.h>
#include <aws/common/cpuid.h>
#include <aws/common/device_random.h>

#include <inttypes.h>

struct aws_allocator_types {
    struct aws_allocator *allocator;
    const char *name;
};

struct checksum_profile_run {
    void (*profile_run)(struct aws_byte_cursor checksum_this);
    const char *name;
};

static void s_runcrc32_sw(struct aws_byte_cursor checksum_this) {
    uint32_t crc = aws_checksums_crc32_sw(checksum_this.ptr, (int)checksum_this.len, 0);
    (void)crc;
}

static void s_runcrc32(struct aws_byte_cursor checksum_this) {
    uint32_t crc = aws_checksums_crc32(checksum_this.ptr, (int)checksum_this.len, 0);
    (void)crc;
}

static void s_runcrc32c_sw(struct aws_byte_cursor checksum_this) {
    uint32_t crc = aws_checksums_crc32c_sw(checksum_this.ptr, (int)checksum_this.len, 0);
    (void)crc;
}

static void s_runcrc32c(struct aws_byte_cursor checksum_this) {
    uint32_t crc = aws_checksums_crc32c(checksum_this.ptr, (int)checksum_this.len, 0);
    (void)crc;
}

static void s_runcrc64_sw(struct aws_byte_cursor checksum_this) {
    uint64_t crc = aws_checksums_crc64nvme_sw(checksum_this.ptr, (int)checksum_this.len, 0);
    (void)crc;
}

static void s_runcrc64(struct aws_byte_cursor checksum_this) {
    uint64_t crc = aws_checksums_crc64nvme(checksum_this.ptr, (int)checksum_this.len, 0);
    (void)crc;
}

static void s_runcrc64_multi(struct aws_byte_cursor checksum_this) {
    if (checksum_this.len <= 8 * 1024) {
        uint64_t crc = aws_checksums_crc64nvme(checksum_this.ptr, (int)checksum_this.len, 0);
        (void)crc;
    } else {
        uint64_t crc = 0;
        for (size_t offset = 0; offset < checksum_this.len; offset += 8 * 1024) {
            // Calculate size of this chunk (handles last chunk if not full 8KB)
            size_t chunk_size = ((offset + 8 * 1024) > checksum_this.len) ? 
                               (checksum_this.len - offset) : 8 * 1024;

            crc = aws_checksums_crc64nvme(checksum_this.ptr + offset, (int)chunk_size, crc);
        }
        (void)crc;
    }
}

#define KB_TO_BYTES(kb) ((kb) * 1024)
#define MB_TO_BYTES(mb) ((mb) * 1024 * 1024)
#define GB_TO_BYTES(gb) ((gb) * 1024 * 1024 * 1024ULL)

int main(void) {

    fprintf(stdout, "hw features for this run:\n");
    fprintf(stdout, "clmul: %s\n", aws_cpu_has_feature(AWS_CPU_FEATURE_CLMUL) ? "true" : "false");
    fprintf(stdout, "sse4.1: %s\n", aws_cpu_has_feature(AWS_CPU_FEATURE_SSE_4_1) ? "true" : "false");
    fprintf(stdout, "sse4.2: %s\n", aws_cpu_has_feature(AWS_CPU_FEATURE_SSE_4_2) ? "true" : "false");
    fprintf(stdout, "avx2: %s\n", aws_cpu_has_feature(AWS_CPU_FEATURE_AVX2) ? "true" : "false");
    fprintf(stdout, "avx512: %s\n", aws_cpu_has_feature(AWS_CPU_FEATURE_AVX512) ? "true" : "false");
    fprintf(stdout, "arm crc: %s\n", aws_cpu_has_feature(AWS_CPU_FEATURE_ARM_CRC) ? "true" : "false");
    fprintf(stdout, "bmi2: %s\n", aws_cpu_has_feature(AWS_CPU_FEATURE_BMI2) ? "true" : "false");
    fprintf(stdout, "vpclmul: %s\n", aws_cpu_has_feature(AWS_CPU_FEATURE_VPCLMULQDQ) ? "true" : "false");
    fprintf(stdout, "arm pmull: %s\n", aws_cpu_has_feature(AWS_CPU_FEATURE_ARM_PMULL) ? "true" : "false");
    fprintf(stdout, "arm crypto: %s\n\n", aws_cpu_has_feature(AWS_CPU_FEATURE_ARM_CRYPTO) ? "true" : "false");

    struct aws_allocator_types allocators[2];
    allocators[0].allocator = aws_default_allocator();
    allocators[0].name = "Default runtime allocator";
    allocators[1].allocator = aws_aligned_allocator();
    allocators[1].name = "Aligned allocator";

    struct checksum_profile_run profile_runs[] = {
        {.profile_run = s_runcrc32_sw, .name = "crc32 C only"},
        {.profile_run = s_runcrc32, .name = "crc32 with hw optimizations"},
        {.profile_run = s_runcrc32c_sw, .name = "crc32c C only"},
        {.profile_run = s_runcrc32c, .name = "crc32c with hw optimizations"},
        {.profile_run = s_runcrc64_sw, .name = "crc64nvme C only"},
        {.profile_run = s_runcrc64, .name = "crc64nvme with hw optimizations"},
        {.profile_run = s_runcrc64_multi, .name = "crc64nvme with hw optimizations(multi)"},
    };

    const size_t allocators_array_size = AWS_ARRAY_SIZE(allocators);
    const size_t profile_runs_size = AWS_ARRAY_SIZE(profile_runs);

    for (size_t i = 0; i < profile_runs_size; ++i) {
        fprintf(stdout, "--------Profile %s---------\n", profile_runs[i].name);

        for (size_t j = 0; j < allocators_array_size; ++j) {
            fprintf(stdout, "%s\n\n", allocators[j].name);

            struct aws_allocator *allocator = allocators[j].allocator;

            // get buffer sizes large enough that all the simd code paths get hit hard, but
            // also measure the smaller buffer paths since they often can't be optimized as thoroughly.
            size_t buffer_sizes[] = {8, 16, 64, 128, 256, 512, 1024, 2048, 4096, 8192, 16384,
                KB_TO_BYTES(32), KB_TO_BYTES(64), KB_TO_BYTES(256), MB_TO_BYTES(1), MB_TO_BYTES(10), MB_TO_BYTES(100), GB_TO_BYTES(1)};
            size_t buffer_sizes_len = AWS_ARRAY_SIZE(buffer_sizes);

            // warm it up to factor out the cpuid checks:
            struct aws_byte_cursor warmup_cur = aws_byte_cursor_from_array(buffer_sizes, buffer_sizes_len);
            profile_runs[i].profile_run(warmup_cur);

            for (size_t k = 0; k < buffer_sizes_len; ++k) {
                struct aws_byte_buf x_bytes;
                aws_byte_buf_init(&x_bytes, allocator, buffer_sizes[k]);
                aws_device_random_buffer(&x_bytes);
                uint64_t start_time = 0;
                aws_high_res_clock_get_ticks(&start_time);
                profile_runs[i].profile_run(aws_byte_cursor_from_buf(&x_bytes));
                uint64_t end_time = 0;
                aws_high_res_clock_get_ticks(&end_time);
                fprintf(
                    stdout,
                    "buffer size %zu (bytes), latency: %" PRIu64 " ns throughput: %f GiB/s\n",
                    buffer_sizes[k],
                    end_time - start_time,
                    (buffer_sizes[k] * 1000000000.0 /* ns -> sec factor */ / GB_TO_BYTES(1)) / (end_time - start_time));
                aws_byte_buf_clean_up(&x_bytes);
            }
            fprintf(stdout, "\n");
        }
    }
    return 0;
}