File: memory-attributes.c

package info (click to toggle)
mpich 4.3.2-2
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid
  • size: 101,184 kB
  • sloc: ansic: 1,040,629; cpp: 82,270; javascript: 40,763; perl: 27,933; python: 16,041; sh: 14,676; xml: 14,418; f90: 12,916; makefile: 9,270; fortran: 8,046; java: 4,635; asm: 324; ruby: 103; awk: 27; lisp: 19; php: 8; sed: 4
file content (91 lines) | stat: -rw-r--r-- 3,226 bytes parent folder | download | duplicates (15)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
/* This example program plays with:
 * - finding local NUMA nodes
 * - finding the best NUMA nodes for bandwidth/latency
 * - displaying the bandwidth/latency values of NUMA nodes
 * - allocating on the best NUMA node for bandwidth
 *
 * Copyright © 2022 Inria.  All rights reserved.
 * See COPYING in top-level directory.
 */

#include "hwloc.h"

#include <errno.h>
#include <stdio.h>
#include <string.h>

int main(void)
{
    hwloc_topology_t topology;
    hwloc_obj_t core, *nodes, bestnode;
    struct hwloc_location initiator;
    unsigned i,n;
    char *s, *buffer;
    int err;

    /* Allocate, initialize and load topology object. */
    hwloc_topology_init(&topology);
    hwloc_topology_load(topology);

    /* Find max number of NUMA nodes to allocate the array for hwloc_get_local_numanode_objs() */
    n = hwloc_bitmap_weight(hwloc_topology_get_topology_nodeset(topology));
    printf("There are %u NUMA nodes\n", n);
    nodes = malloc(n * sizeof(*nodes));
    assert(nodes);

    /* Take the first core */
    core = hwloc_get_obj_by_type(topology, HWLOC_OBJ_CORE, 0);
    if (!core)
      goto out;

    hwloc_bitmap_asprintf(&s, core->cpuset);
    printf("Core L#0 cpuset = %s\n", s);
    free(s);

    /* setup the initiator to the first core cpuset */
    initiator.type = HWLOC_LOCATION_TYPE_CPUSET;
    initiator.location.cpuset = core->cpuset;

    /* get local NUMA nodes and display their attributes */
    err = hwloc_get_local_numanode_objs(topology, &initiator, &n, nodes,
                                        /* we want NUMA nodes that are local to that core or to more */
                                        HWLOC_LOCAL_NUMANODE_FLAG_LARGER_LOCALITY);
    printf("Found %u local NUMA nodes\n", n);
    for(i=0; i<n; i++) {
      hwloc_uint64_t latency, bandwidth;

      printf("NUMA node L#%u P#%u (subtype %s) is local to core L#0\n", nodes[i]->logical_index, nodes[i]->os_index, nodes[i]->subtype);

      err = hwloc_memattr_get_value(topology, HWLOC_MEMATTR_ID_BANDWIDTH, nodes[i], &initiator, 0, &bandwidth);
      if (err < 0) {
        printf("  bandwidth is unknown\n");
      } else {
        printf("  bandwidth = %llu MiB/s\n", (unsigned long long) bandwidth);
      }
      err = hwloc_memattr_get_value(topology, HWLOC_MEMATTR_ID_LATENCY, nodes[i], &initiator, 0, &latency);
      if (err < 0) {
        printf("  latency is unknown\n");
      } else {
        printf("  latency = %llu ns\n", (unsigned long long) latency);
      }
    }
    free(nodes);

    /* allocate on best-bandwidth node */
    err = hwloc_memattr_get_best_target(topology, HWLOC_MEMATTR_ID_BANDWIDTH, &initiator, 0, &bestnode, NULL);
    if (err < 0) {
      printf("Couldn't find best NUMA node for bandwidth to core L#0\n");
    } else {
      printf("Best bandwidth NUMA node for core L#0 is L#%u P#%u\n", bestnode->logical_index, bestnode->os_index);
      /* allocate memory on best node */
      buffer = hwloc_alloc_membind(topology, 1048576, bestnode->nodeset, HWLOC_MEMBIND_BIND, HWLOC_MEMBIND_BYNODESET);
      printf("Allocated buffer %p on best node\n", buffer);
      free(buffer);
    }

 out:
    /* Destroy topology object. */
    hwloc_topology_destroy(topology);

    return 0;
}