1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161
|
/*
* Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
* University Research and Technology
* Corporation. All rights reserved.
* Copyright (c) 2004-2005 The University of Tennessee and The University
* of Tennessee Research Foundation. All rights
* reserved.
* Copyright (c) 2004-2011 High Performance Computing Center Stuttgart,
* University of Stuttgart. All rights reserved.
* Copyright (c) 2004-2005 The Regents of the University of California.
* All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
*
* $HEADER$
*/
#ifndef OPAL_BIT_OPS_H
#define OPAL_BIT_OPS_H
#include "opal/prefetch.h"
/**
* Calculates the highest bit in an integer
*
* @param value The integer value to examine
* @param start Position to start looking
*
* @returns pos Position of highest-set integer or -1 if none are set.
*
* Look at the integer "value" starting at position "start", and move
* to the right. Return the index of the highest bit that is set to
* 1.
*
* WARNING: *NO* error checking is performed. This is meant to be a
* fast inline function.
* Using __builtin_clz (count-leading-zeros) uses 3 cycles instead
* of 17 cycles (on average value, with start=32)
* compared to the loop-version (on Intel Nehalem -- with icc-12.1.0 -O2).
*/
static inline int opal_hibit(int value, int start)
{
unsigned int mask;
#if OPAL_C_HAVE_BUILTIN_CLZ
/* Only look at the part that the caller wanted looking at */
mask = value & ((1 << start) - 1);
if (OPAL_UNLIKELY(0 == mask)) {
return -1;
}
start = (8 * sizeof(int) - 1) - __builtin_clz(mask);
#else
--start;
mask = 1 << start;
for (; start >= 0; --start, mask >>= 1) {
if (value & mask) {
break;
}
}
#endif
return start;
}
/**
* Returns the cube dimension of a given value.
*
* @param value The integer value to examine
*
* @returns cubedim The smallest cube dimension containing that value
*
* Look at the integer "value" and calculate the smallest power of two
* dimension that contains that value.
*
* WARNING: *NO* error checking is performed. This is meant to be a
* fast inline function.
* Using __builtin_clz (count-leading-zeros) uses 3 cycles instead of 50 cycles
* compared to the loop-version (on Intel Nehalem -- with icc-12.1.0 -O2).
*/
static inline int opal_cube_dim(int value)
{
int dim, size;
#if OPAL_C_HAVE_BUILTIN_CLZ
if (OPAL_UNLIKELY(1 >= value)) {
return 0;
}
size = 8 * sizeof(int);
dim = size - __builtin_clz(value - 1);
#else
for (dim = 0, size = 1; size < value; ++dim, size <<= 1) /* empty */
;
#endif
return dim;
}
/**
* @brief Returns next power-of-two of the given value.
*
* @param value The integer value to return power of 2
*
* @returns The next power of two
*
* WARNING: *NO* error checking is performed. This is meant to be a
* fast inline function.
* Using __builtin_clz (count-leading-zeros) uses 4 cycles instead of 77
* compared to the loop-version (on Intel Nehalem -- with icc-12.1.0 -O2).
*/
static inline int opal_next_poweroftwo(int value)
{
int power2;
#if OPAL_C_HAVE_BUILTIN_CLZ
if (OPAL_UNLIKELY(0 == value)) {
return 1;
}
power2 = 1 << (8 * sizeof(int) - __builtin_clz(value));
#else
for (power2 = 1; value > 0; value >>= 1, power2 <<= 1) /* empty */
;
#endif
return power2;
}
/**
* @brief Returns next power-of-two of the given value (and the value itselve if already
* power-of-two).
*
* @param value The integer value to return power of 2
*
* @returns The next power of two (inclusive)
*
* WARNING: *NO* error checking is performed. This is meant to be a
* fast inline function.
* Using __builtin_clz (count-leading-zeros) uses 4 cycles instead of 56
* compared to the loop-version (on Intel Nehalem -- with icc-12.1.0 -O2).
*/
static inline int opal_next_poweroftwo_inclusive(int value)
{
int power2;
#if OPAL_C_HAVE_BUILTIN_CLZ
if (OPAL_UNLIKELY(1 >= value)) {
return 1;
}
power2 = 1 << (8 * sizeof(int) - __builtin_clz(value - 1));
#else
for (power2 = 1; power2 < value; power2 <<= 1) /* empty */
;
#endif
return power2;
}
#endif /* OPAL_BIT_OPS_H */
|