File: btl_ugni_progress_thread.c

package info (click to toggle)
openmpi 5.0.8-4
  • links: PTS, VCS
  • area: main
  • in suites:
  • size: 201,684 kB
  • sloc: ansic: 613,078; makefile: 42,353; sh: 11,194; javascript: 9,244; f90: 7,052; java: 6,404; perl: 5,179; python: 1,859; lex: 740; fortran: 61; cpp: 20; tcl: 12
file content (128 lines) | stat: -rw-r--r-- 3,541 bytes parent folder | download | duplicates (5)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */
/*
 * Copyright (c) 2011-2017 Los Alamos National Security, LLC. All rights
 *                         reserved.
 * Copyright (c) 2011      UT-Battelle, LLC. All rights reserved.
 * $COPYRIGHT$
 *
 * Additional copyrights may follow
 *
 * $HEADER$
 */

#include "opal_config.h"

#include "btl_ugni.h"
#include "btl_ugni_frag.h"
#include "btl_ugni_smsg.h"

#include "opal/include/opal/align.h"

static pthread_t mca_btl_ugni_progress_thread_id;

static volatile int stop_progress_thread = 0;

unsigned int mca_btl_ugni_progress_thread_wakeups = 0;

static void *mca_btl_ugni_prog_thread_fn(void *data)
{
    uint32_t which;
    gni_return_t status;
    gni_cq_handle_t cq_vec[1 + MCA_BTL_UGNI_MAX_DEV_HANDLES];

    struct mca_btl_ugni_module_t *btl = (mca_btl_ugni_module_t *) data;
    int cq_count = 1 + mca_btl_ugni_component.virtual_device_count;

    /*
     * need to block signals
     */

    cq_vec[0] = btl->smsg_remote_irq_cq;
    for (int i = 0; i < mca_btl_ugni_component.virtual_device_count; ++i) {
        cq_vec[i + 1] = btl->devices[i].dev_rdma_local_irq_cq.gni_handle;
    }

    while (stop_progress_thread == 0) {

        /*
         * this ugni call doesn't need a lock
         */

        status = GNI_CqVectorMonitor(cq_vec, cq_count, -1, &which);

        if (status == GNI_RC_NOT_DONE)
            continue;

        if ((status == GNI_RC_SUCCESS) && (stop_progress_thread == 0)) {
            mca_btl_ugni_progress_thread_wakeups++;
            opal_progress();
        }
    }

    return (void *) (intptr_t) OPAL_SUCCESS;
}

int mca_btl_ugni_spawn_progress_thread(struct mca_btl_base_module_t *btl)
{
    int rc, ret = OPAL_SUCCESS;
    pthread_attr_t attr;

    pthread_attr_init(&attr);
    rc = pthread_attr_setdetachstate(&attr, PTHREAD_CREATE_DETACHED);
    if (0 != rc) {
        BTL_ERROR(("btl/ugni pthread_attr_setdetachstate returned %s ", strerror(rc)));
        ret = OPAL_ERROR;
        goto fn_exit;
    }

    rc = pthread_create(&mca_btl_ugni_progress_thread_id, &attr, mca_btl_ugni_prog_thread_fn,
                        (void *) btl);
    if (0 != rc) {
        BTL_ERROR(("btl/ugni pthread_create returned %s ", strerror(rc)));
        ret = OPAL_ERROR;
        goto fn_exit;
    }

    rc = pthread_attr_destroy(&attr);
    if (0 != rc) {
        BTL_ERROR(("btl/ugni pthread_attr_destory returned %s ", strerror(rc)));
        ret = OPAL_ERROR;
    }

fn_exit:
    return ret;
}

int mca_btl_ugni_kill_progress_thread(void)
{
    int ret = OPAL_SUCCESS;
    void *thread_rc;

    stop_progress_thread = 1;

    /*
     * post a CQ to myself to wake my thread up
     */

    ret = mca_btl_ugni_post_cqwrite(mca_btl_ugni_component.modules[0].local_ep,
                                    &mca_btl_ugni_component.modules[0].devices[0].dev_rdma_local_cq,
                                    mca_btl_ugni_component.modules[0].devices[0].smsg_irq_mhndl,
                                    0xdead, NULL, NULL, NULL);
    /*
     * TODO: if error returned, need to kill off thread manually
     */
    if (OPAL_SUCCESS != ret) {
        /* force the thread to exit */
        pthread_cancel(mca_btl_ugni_progress_thread_id);
        goto fn_exit;
    }

    pthread_join(mca_btl_ugni_progress_thread_id, &thread_rc);
    if (0 != (intptr_t) thread_rc) {
        BTL_ERROR(("btl/ugni error returned from progress thread: %d", (int) (intptr_t) thread_rc));
        ret = (int) (intptr_t) thread_rc;
    }

fn_exit:
    return ret;
}