File: bit_ops.h

package info (click to toggle)
openmpi 5.0.8-4
  • links: PTS, VCS
  • area: main
  • in suites:
  • size: 201,684 kB
  • sloc: ansic: 613,078; makefile: 42,353; sh: 11,194; javascript: 9,244; f90: 7,052; java: 6,404; perl: 5,179; python: 1,859; lex: 740; fortran: 61; cpp: 20; tcl: 12
file content (161 lines) | stat: -rw-r--r-- 4,422 bytes parent folder | download | duplicates (5)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
/*
 * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
 *                         University Research and Technology
 *                         Corporation.  All rights reserved.
 * Copyright (c) 2004-2005 The University of Tennessee and The University
 *                         of Tennessee Research Foundation.  All rights
 *                         reserved.
 * Copyright (c) 2004-2011 High Performance Computing Center Stuttgart,
 *                         University of Stuttgart.  All rights reserved.
 * Copyright (c) 2004-2005 The Regents of the University of California.
 *                         All rights reserved.
 * $COPYRIGHT$
 *
 * Additional copyrights may follow
 *
 * $HEADER$
 */

#ifndef OPAL_BIT_OPS_H
#define OPAL_BIT_OPS_H

#include "opal/prefetch.h"

/**
 * Calculates the highest bit in an integer
 *
 * @param value The integer value to examine
 * @param start Position to start looking
 *
 * @returns pos Position of highest-set integer or -1 if none are set.
 *
 * Look at the integer "value" starting at position "start", and move
 * to the right.  Return the index of the highest bit that is set to
 * 1.
 *
 * WARNING: *NO* error checking is performed.  This is meant to be a
 * fast inline function.
 * Using __builtin_clz (count-leading-zeros) uses 3 cycles instead
 * of 17 cycles (on average value, with start=32)
 * compared to the loop-version (on Intel Nehalem -- with icc-12.1.0 -O2).
 */
static inline int opal_hibit(int value, int start)
{
    unsigned int mask;

#if OPAL_C_HAVE_BUILTIN_CLZ
    /* Only look at the part that the caller wanted looking at */
    mask = value & ((1 << start) - 1);

    if (OPAL_UNLIKELY(0 == mask)) {
        return -1;
    }

    start = (8 * sizeof(int) - 1) - __builtin_clz(mask);
#else
    --start;
    mask = 1 << start;

    for (; start >= 0; --start, mask >>= 1) {
        if (value & mask) {
            break;
        }
    }
#endif

    return start;
}

/**
 * Returns the cube dimension of a given value.
 *
 * @param value The integer value to examine
 *
 * @returns cubedim The smallest cube dimension containing that value
 *
 * Look at the integer "value" and calculate the smallest power of two
 * dimension that contains that value.
 *
 * WARNING: *NO* error checking is performed.  This is meant to be a
 * fast inline function.
 * Using __builtin_clz (count-leading-zeros) uses 3 cycles instead of 50 cycles
 * compared to the loop-version (on Intel Nehalem -- with icc-12.1.0 -O2).
 */
static inline int opal_cube_dim(int value)
{
    int dim, size;

#if OPAL_C_HAVE_BUILTIN_CLZ
    if (OPAL_UNLIKELY(1 >= value)) {
        return 0;
    }
    size = 8 * sizeof(int);
    dim = size - __builtin_clz(value - 1);
#else
    for (dim = 0, size = 1; size < value; ++dim, size <<= 1) /* empty */
        ;
#endif

    return dim;
}

/**
 * @brief Returns next power-of-two of the given value.
 *
 * @param value The integer value to return power of 2
 *
 * @returns The next power of two
 *
 * WARNING: *NO* error checking is performed.  This is meant to be a
 * fast inline function.
 * Using __builtin_clz (count-leading-zeros) uses 4 cycles instead of 77
 * compared to the loop-version (on Intel Nehalem -- with icc-12.1.0 -O2).
 */
static inline int opal_next_poweroftwo(int value)
{
    int power2;

#if OPAL_C_HAVE_BUILTIN_CLZ
    if (OPAL_UNLIKELY(0 == value)) {
        return 1;
    }
    power2 = 1 << (8 * sizeof(int) - __builtin_clz(value));
#else
    for (power2 = 1; value > 0; value >>= 1, power2 <<= 1) /* empty */
        ;
#endif

    return power2;
}

/**
 * @brief Returns next power-of-two of the given value (and the value itselve if already
 * power-of-two).
 *
 * @param value The integer value to return power of 2
 *
 * @returns The next power of two (inclusive)
 *
 * WARNING: *NO* error checking is performed.  This is meant to be a
 * fast inline function.
 * Using __builtin_clz (count-leading-zeros) uses 4 cycles instead of 56
 * compared to the loop-version (on Intel Nehalem -- with icc-12.1.0 -O2).
 */
static inline int opal_next_poweroftwo_inclusive(int value)
{
    int power2;

#if OPAL_C_HAVE_BUILTIN_CLZ
    if (OPAL_UNLIKELY(1 >= value)) {
        return 1;
    }
    power2 = 1 << (8 * sizeof(int) - __builtin_clz(value - 1));
#else
    for (power2 = 1; power2 < value; power2 <<= 1) /* empty */
        ;
#endif

    return power2;
}

#endif /* OPAL_BIT_OPS_H */