File: test_profile.cc

package info (click to toggle)
mpich 4.3.2-2
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid
  • size: 101,184 kB
  • sloc: ansic: 1,040,629; cpp: 82,270; javascript: 40,763; perl: 27,933; python: 16,041; sh: 14,676; xml: 14,418; f90: 12,916; makefile: 9,270; fortran: 8,046; java: 4,635; asm: 324; ruby: 103; awk: 27; lisp: 19; php: 8; sed: 4
file content (484 lines) | stat: -rw-r--r-- 15,944 bytes parent folder | download | duplicates (6)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
/**
* Copyright (c) NVIDIA CORPORATION & AFFILIATES, 2001-2012. ALL RIGHTS RESERVED.
* Copyright (C) UT-Battelle, LLC. 2014. ALL RIGHTS RESERVED.
* See file LICENSE for terms.
*/

#include <common/test.h>
extern "C" {
#include <ucs/sys/sys.h>
#include <ucs/time/time.h>
#include <ucs/profile/profile.h>
#include <ucs/config/parser.h>
#include <ucp/api/ucp.h>
}

#include <pthread.h>
#include <fstream>

class scoped_profile {
public:
    scoped_profile(ucs::test_base &test, const std::string &file_name,
                   const char *mode) :
        m_test(test), m_file_name(file_name), m_tls_env(TLS_ENV, TLS_ENV_VALUE)
    {
        ucp_config_t *config;
        EXPECT_EQ(ucp_config_read(NULL, NULL, &config), UCS_OK);
        ucs_profile_reset_locations_id(ucs_profile_default_ctx);
        ucs_profile_cleanup(ucs_profile_default_ctx);
        m_test.push_config();
        m_test.modify_config("PROFILE_MODE", mode);
        m_test.modify_config("PROFILE_FILE", m_file_name.c_str());
        ucs_profile_init(ucs_global_opts.profile_mode,
                         ucs_global_opts.profile_file,
                         ucs_global_opts.profile_log_size,
                         &ucs_profile_default_ctx);
        ucp_config_release(config);
    }

    std::string read() {
        ucs_profile_dump(ucs_profile_default_ctx);
        std::ifstream f(m_file_name.c_str());
        return std::string(std::istreambuf_iterator<char>(f),
                           std::istreambuf_iterator<char>());
    }

    ~scoped_profile() {
        ucs_profile_reset_locations_id(ucs_profile_default_ctx);
        ucs_profile_cleanup(ucs_profile_default_ctx);
        unlink(m_file_name.c_str());
        m_test.pop_config();
        ucs_profile_init(ucs_global_opts.profile_mode,
                         ucs_global_opts.profile_file,
                         ucs_global_opts.profile_log_size,
                         &ucs_profile_default_ctx);
    }

    static constexpr const char *TLS_ENV       = "UCX_TLS";
    static constexpr const char *TLS_ENV_VALUE = "all";

private:
    ucs::test_base     &m_test;
    const std::string  m_file_name;
    ucs::scoped_setenv m_tls_env;
};

class test_profile : public testing::TestWithParam<int>,
                     public ucs::test_base {
public:
    test_profile();
    ~test_profile();

    UCS_TEST_BASE_IMPL;

protected:
    static const int      MIN_LINE;
    static const int      MAX_LINE;
    static const unsigned NUM_LOCAITONS;
    static const char*    PROFILE_FILENAME;


    std::set<int>      m_tids;
    pthread_spinlock_t m_tids_lock;

    struct thread_param {
        test_profile *test;
        int          iters;
    };

    void add_tid(int tid);

    static void *profile_thread_func(void *arg);

    int num_threads() const;

    void run_profiled_code(int num_iters);

    void test_header(const ucs_profile_header_t *hdr, unsigned exp_mode,
                     const void **ptr, unsigned num_locations);

    void test_locations(const ucs_profile_location_t *locations,
                        unsigned num_locations, const void **ptr);

    void test_thread_locations(const ucs_profile_thread_header_t *thread_hdr,
                               unsigned num_locations, uint64_t exp_count,
                               unsigned exp_num_records, const void **ptr);

    void test_nesting(const ucs_profile_location_t *loc, int nesting,
                      const std::string &exp_name, int exp_nesting);

    void test_env(const void **ptr, const ucs_profile_block_header_t &env_vars);

    void do_test(unsigned int_mode, const std::string &str_mode);
};

static int sum(int a, int b)
{
    return a + b;
}

const int test_profile::MIN_LINE = __LINE__;

static void *test_request = &test_request;

UCS_PROFILE_FUNC_VOID_ALWAYS(profile_test_func1, ())
{
    UCS_PROFILE_REQUEST_NEW(test_request, "allocate", 10);
    UCS_PROFILE_REQUEST_EVENT_ALWAYS(test_request, "work", 0);
    UCS_PROFILE_REQUEST_FREE(test_request);
    UCS_PROFILE_CODE_ALWAYS("code", { UCS_PROFILE_SAMPLE_ALWAYS("sample"); });
}

UCS_PROFILE_FUNC_ALWAYS(int, profile_test_func2, (a, b), int a, int b)
{
    return UCS_PROFILE_CALL_ALWAYS(sum, a, b);
}

const int test_profile::MAX_LINE           = __LINE__;
#ifdef HAVE_PROFILING
const unsigned test_profile::NUM_LOCAITONS = 12; /* With request alloc/free */
#else
const unsigned test_profile::NUM_LOCAITONS = 10; /* Without request alloc/free */
#endif
const char* test_profile::PROFILE_FILENAME = "test.prof";

test_profile::test_profile()
{
    pthread_spin_init(&m_tids_lock, 0);
}

test_profile::~test_profile()
{
    pthread_spin_destroy(&m_tids_lock);
}

void test_profile::add_tid(int tid)
{
    pthread_spin_lock(&m_tids_lock);
    m_tids.insert(tid);
    pthread_spin_unlock(&m_tids_lock);
}

void *test_profile::profile_thread_func(void *arg)
{
    const thread_param *param = (const thread_param*)arg;

    param->test->add_tid(ucs_get_tid());

    for (int i = 0; i < param->iters; ++i) {
        profile_test_func1();
        profile_test_func2(1, 2);
    }

    return NULL;
}

int test_profile::num_threads() const
{
    return GetParam();
}

void test_profile::run_profiled_code(int num_iters)
{
    int ret;
    thread_param param;

    param.iters = num_iters;
    param.test  = this;

    if (num_threads() == 1) {
        profile_thread_func(&param);
    } else {
        std::vector<pthread_t> threads;

        for (int i = 0; i < num_threads(); ++i) {
            pthread_t profile_thread;
            ret = pthread_create(&profile_thread, NULL, profile_thread_func,
                                 (void*)&param);
            if (ret < 0) {
                ADD_FAILURE() << "pthread_create failed: " << strerror(errno);
                break;
            }

            threads.push_back(profile_thread);
        }

        while (!threads.empty()) {
            void *result;
            ret = pthread_join(threads.back(), &result);
            if (ret < 0) {
                ADD_FAILURE() << "pthread_join failed: " << strerror(errno);
            }

            threads.pop_back();
        }
    }
}

void test_profile::test_header(const ucs_profile_header_t *hdr,
                               unsigned exp_mode, const void **ptr,
                               unsigned num_locations)
{
    EXPECT_EQ(UCS_PROFILE_FILE_VERSION,         hdr->version);
    EXPECT_EQ(std::string(ucs_get_host_name()), std::string(hdr->hostname));
    EXPECT_EQ(getpid(),                         (pid_t)hdr->pid);
    EXPECT_EQ(exp_mode,                         hdr->mode);
    EXPECT_EQ(NUM_LOCAITONS,                    num_locations);
    EXPECT_NEAR(hdr->one_second / ucs_time_from_sec(1.0), 1.0, 0.01);

    *ptr = hdr + 1;
}

void test_profile::test_locations(const ucs_profile_location_t *locations,
                                  unsigned num_locations, const void **ptr)
{
    std::set<std::string> loc_names;
    for (unsigned i = 0; i < num_locations; ++i) {
        const ucs_profile_location_t *loc = &locations[i];
        EXPECT_EQ(std::string(basename(__FILE__)), std::string(loc->file));
        EXPECT_GE(loc->line, MIN_LINE);
        EXPECT_LE(loc->line, MAX_LINE);
        loc_names.insert(loc->name);
    }

    EXPECT_NE(loc_names.end(), loc_names.find("profile_test_func1"));
    EXPECT_NE(loc_names.end(), loc_names.find("profile_test_func2"));
    EXPECT_NE(loc_names.end(), loc_names.find("code"));
    EXPECT_NE(loc_names.end(), loc_names.find("sample"));
    EXPECT_NE(loc_names.end(), loc_names.find("sum"));
#ifdef HAVE_PROFILING
    EXPECT_NE(loc_names.end(), loc_names.find("allocate"));
#endif
    EXPECT_NE(loc_names.end(), loc_names.find("work"));

    *ptr = locations + num_locations;
}

void test_profile::test_thread_locations(
                               const ucs_profile_thread_header_t *thread_hdr,
                               unsigned num_locations, uint64_t exp_count,
                               unsigned exp_num_records, const void **ptr)
{
    const ucs_profile_thread_location_t *loc;

    EXPECT_NE(m_tids.end(),    m_tids.find(thread_hdr->tid));
    EXPECT_EQ(exp_num_records, thread_hdr->num_records);

    EXPECT_LE(thread_hdr->end_time,   ucs_get_time());
    EXPECT_LE(thread_hdr->start_time, thread_hdr->end_time);
    EXPECT_LE(thread_hdr->end_time - thread_hdr->start_time,
              ucs_time_from_sec(1.0) * ucs::test_time_multiplier() * (1 + exp_count));

    for (unsigned i = 0; i < num_locations; ++i) {
        loc = &reinterpret_cast<const ucs_profile_thread_location_t*>
                        (thread_hdr + 1)[i];
        EXPECT_EQ(exp_count, loc->count);
        EXPECT_LE(loc->total_time,
                  ucs_time_from_sec(1.0) * ucs::test_time_multiplier() * exp_count);
    }

    *ptr = reinterpret_cast<const ucs_profile_thread_location_t*>(thread_hdr + 1) +
           num_locations;
}

void test_profile::test_nesting(const ucs_profile_location_t *loc, int nesting,
                                const std::string &exp_name, int exp_nesting)
{
    if (loc->name == exp_name) {
        EXPECT_EQ(exp_nesting, nesting)
                << "nesting level of " << exp_name << " is wrong";
    }
}

void test_profile::test_env(const void **ptr,
                            const ucs_profile_block_header_t &env_vars)
{
    auto expected_env = std::string(scoped_profile::TLS_ENV) + "=" +
                        std::string(scoped_profile::TLS_ENV_VALUE);
    std::string env((char*)*ptr, env_vars.size);
    EXPECT_NE(env.find(expected_env), std::string::npos);
    *ptr = UCS_PTR_BYTE_OFFSET(*ptr, env_vars.size);
}

void test_profile::do_test(unsigned int_mode, const std::string& str_mode)
{
    const int ITER           = 5;
    uint64_t exp_count       = (int_mode & UCS_BIT(UCS_PROFILE_MODE_ACCUM)) ?
                               ITER : 0;
    uint64_t exp_num_records = (int_mode & UCS_BIT(UCS_PROFILE_MODE_LOG)) ?
                               (NUM_LOCAITONS * ITER) : 0;


    scoped_profile p(*this, PROFILE_FILENAME, str_mode.c_str());
    run_profiled_code(ITER);

    std::string data = p.read();
    const void *ptr  = &data[0];

    /* Read and test file header */
    /* coverity[tainted_data_downcast] */
    const ucs_profile_header_t *hdr =
                    reinterpret_cast<const ucs_profile_header_t*>(ptr);
    uint32_t num_locations = hdr->locations.size /
                             sizeof(ucs_profile_location_t);
    test_header(hdr, int_mode, &ptr, num_locations);

    /* Read and test env variables*/
    test_env(&ptr, hdr->env_vars);

    EXPECT_EQ(&data[hdr->locations.offset], ptr);
    /* Read and test global locations */
    /* coverity[tainted_data_downcast] */
    const ucs_profile_location_t *locations =
                    reinterpret_cast<const ucs_profile_location_t*>(ptr);
    test_locations(locations, num_locations, &ptr);

    EXPECT_EQ(UCS_PTR_BYTE_OFFSET(locations, hdr->locations.size), ptr);

    EXPECT_EQ(&data[hdr->threads.offset], ptr);
    const void *threads_end = UCS_PTR_BYTE_OFFSET(ptr, hdr->threads.size);

    size_t total_num_records = 0;

    /* Read and test threads */
    for (int i = 0; i < num_threads(); ++i) {
        /* coverity[tainted_data_downcast] */
        const ucs_profile_thread_header_t *thread_hdr =
                        reinterpret_cast<const ucs_profile_thread_header_t*>(ptr);

        test_thread_locations(thread_hdr, num_locations, exp_count,
                              exp_num_records, &ptr);

        const ucs_profile_record_t *records =
                reinterpret_cast<const ucs_profile_record_t*>(ptr);
        uint64_t prev_ts = records[0].timestamp;
        int nesting      = 0;

        if (!(hdr->mode & UCS_BIT(UCS_PROFILE_MODE_LOG))) {
            EXPECT_EQ(0, thread_hdr->num_records);
        }

        for (uint64_t i = 0; i < thread_hdr->num_records; ++i) {
            const ucs_profile_record_t *rec = &records[i];

            /* test location index */
            EXPECT_GE(rec->location, 0u);
            EXPECT_LT(rec->location, uint32_t(NUM_LOCAITONS));

            /* test timestamp */
            EXPECT_GE(rec->timestamp, prev_ts);
            prev_ts = rec->timestamp;

            /* test param64 */
            const ucs_profile_location_t *loc = &locations[rec->location];
            switch (loc->type) {
            case UCS_PROFILE_TYPE_REQUEST_NEW:
            case UCS_PROFILE_TYPE_REQUEST_EVENT:
            case UCS_PROFILE_TYPE_REQUEST_FREE:
                EXPECT_EQ((uintptr_t)&test_request, rec->param64);
                break;
            case UCS_PROFILE_TYPE_SCOPE_BEGIN:
                ++nesting;
                break;
            case UCS_PROFILE_TYPE_SCOPE_END:
                --nesting;
                break;
            default:
                break;
            };

            test_nesting(loc, nesting, "profile_test_func1", 0);
            test_nesting(loc, nesting, "code", 1);
            test_nesting(loc, nesting, "sample", 2);
            test_nesting(loc, nesting, "profile_test_func2", 0);
            test_nesting(loc, nesting, "sum", 1);
        }

        ptr                = records + thread_hdr->num_records;
        total_num_records += thread_hdr->num_records;
    }

    unsigned profiled_threads = ucs_profile_calc_num_threads(total_num_records,
                                                             hdr);
    EXPECT_EQ(num_threads(), profiled_threads);
    EXPECT_EQ(threads_end, ptr);

    /* Test that ptr reached the end of the file */
    EXPECT_EQ(&data[data.size()], ptr) << data.size();
}

UCS_TEST_P(test_profile, accum) {
    do_test(UCS_BIT(UCS_PROFILE_MODE_ACCUM), "accum");
}

UCS_TEST_P(test_profile, log) {
    do_test(UCS_BIT(UCS_PROFILE_MODE_LOG), "log");
}

UCS_TEST_P(test_profile, log_accum) {
    do_test(UCS_BIT(UCS_PROFILE_MODE_LOG) | UCS_BIT(UCS_PROFILE_MODE_ACCUM),
            "log,accum");
}

INSTANTIATE_TEST_SUITE_P(st, test_profile, ::testing::Values(1));
INSTANTIATE_TEST_SUITE_P(mt, test_profile, ::testing::Values(2, 4, 8));

class test_profile_perf : public test_profile {
};

UCS_TEST_SKIP_COND_P(test_profile_perf, overhead, RUNNING_ON_VALGRIND) {

#if defined(__x86_64__) || defined(__powerpc64__)
    const double EXP_OVERHEAD_NSEC = 100.0;
#else
    const double EXP_OVERHEAD_NSEC = 150.0;
#endif
    const int ITERS                = 100;
    const int WARMUP_ITERS         = 5;
    const int COUNT                = 100000;
    double overhead_nsec           = 0.0;

    scoped_profile p(*this, PROFILE_FILENAME, "accum");

    for (int retry = 0; retry < (ucs::perf_retry_count + 1); ++retry) {
        ucs_time_t  time_profile_on  = 0;
        ucs_time_t  time_profile_off = 0;

        for (int i = 0; i < WARMUP_ITERS + ITERS; ++i) {
            ucs_time_t t;

            t = ucs_get_time();
            for (volatile int j = 0; j < COUNT;) {
                ++j;
            }
            if (i > WARMUP_ITERS) {
                time_profile_off += ucs_get_time() - t;
            }

            t = ucs_get_time();
            for (volatile int j = 0; j < COUNT;) {
                UCS_PROFILE_CODE_ALWAYS("test", ++j);
            }
            if (i > WARMUP_ITERS) {
                time_profile_on += ucs_get_time() - t;
            }
        }

        overhead_nsec = ucs_time_to_nsec(time_profile_on - time_profile_off) /
                        COUNT / ITERS;
        UCS_TEST_MESSAGE << "overhead: " << overhead_nsec << " nsec";

        if (!ucs::perf_retry_count) {
            UCS_TEST_MESSAGE << "not validating performance";
            return; /* Success */
        } else if (overhead_nsec < EXP_OVERHEAD_NSEC) {
            return; /* Success */
        } else {
            ucs::safe_sleep(ucs::perf_retry_interval);
        }
    }

    EXPECT_LT(overhead_nsec, EXP_OVERHEAD_NSEC) << "Profiling overhead is too high";
}

INSTANTIATE_TEST_SUITE_P(st, test_profile_perf, ::testing::Values(1));