1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310
|
# sc-membench Makefile
#
# Portable build system for Linux, macOS, and BSD
# Uses OpenMP for parallel bandwidth measurement
#
# Build options:
# make - Auto-detect features and build universal binary
# make basic - Build minimal version (no optional dependencies)
# make hwloc - Build with hwloc (portable cache detection)
# make numa - Build with NUMA support (Linux only)
# make full - Build with all available features
# make clean - Remove built files
# make test - Quick test run
# =============================================================================
# Platform and Compiler Detection
# =============================================================================
# Detect OS
UNAME_S := $(shell uname -s)
# Auto-detect compiler: prefer gcc, fall back to clang, then cc
CC ?= $(shell command -v gcc 2>/dev/null || command -v clang 2>/dev/null || echo cc)
# Base flags (portable across gcc/clang)
CFLAGS_BASE = -g -Wall -Wextra -std=c11
# OpenMP flag (same for gcc and clang)
OPENMP_FLAG = -fopenmp
# =============================================================================
# Source Files and Targets
# =============================================================================
SRC = membench.c
TARGET = membench
TARGET_BASIC = membench-basic
TARGET_HWLOC = membench-hwloc
TARGET_NUMA = membench-numa
TARGET_FULL = membench-full
# =============================================================================
# Platform-Specific Universal Optimization Flags
# =============================================================================
# Platform-specific adjustments with UNIVERSAL compatibility
ifeq ($(UNAME_S),Darwin)
# macOS: packages typically in /opt/homebrew (ARM) or /usr/local (Intel)
ARCH := $(shell uname -m)
ifeq ($(ARCH),arm64)
# ARM64 macOS: Use generic ARMv8-A (works on all Apple Silicon)
CFLAGS_ARCH = -mcpu=generic
CFLAGS_PATHS = -I/opt/homebrew/include
LDFLAGS_PATHS = -L/opt/homebrew/lib
else
# x86_64 macOS: Use baseline x86-64 (works on all Intel Macs)
CFLAGS_ARCH = -march=x86-64 -mtune=generic
CFLAGS_PATHS = -I/usr/local/include
LDFLAGS_PATHS = -L/usr/local/lib
endif
# macOS with clang needs libomp
LDFLAGS_BASE = -lm
# Check if using clang (needs -lomp for OpenMP)
IS_CLANG := $(shell $(CC) --version 2>/dev/null | grep -q clang && echo yes)
ifeq ($(IS_CLANG),yes)
OPENMP_LIBS = -lomp
else
OPENMP_LIBS =
endif
else ifeq ($(UNAME_S),FreeBSD)
# FreeBSD: packages in /usr/local, use baseline x86-64
CFLAGS_ARCH = -march=x86-64 -mtune=generic
CFLAGS_PATHS = -I/usr/local/include
LDFLAGS_PATHS = -L/usr/local/lib
LDFLAGS_BASE = -lm
OPENMP_LIBS =
else ifeq ($(UNAME_S),OpenBSD)
CFLAGS_ARCH = -march=x86-64 -mtune=generic
CFLAGS_PATHS = -I/usr/local/include
LDFLAGS_PATHS = -L/usr/local/lib
LDFLAGS_BASE = -lm
OPENMP_LIBS =
else ifeq ($(UNAME_S),NetBSD)
CFLAGS_ARCH = -march=x86-64 -mtune=generic
CFLAGS_PATHS = -I/usr/local/include -I/usr/pkg/include
LDFLAGS_PATHS = -L/usr/local/lib -L/usr/pkg/lib
LDFLAGS_BASE = -lm
OPENMP_LIBS =
else
# Linux (default) - Use conservative, universally compatible flags
ARCH := $(shell uname -m)
ifeq ($(ARCH),aarch64)
# ARM64: Use generic ARMv8-A with CRC (universally supported)
# This works on all ARM64 CPUs from Cortex-A53 to Neoverse-V2
CFLAGS_ARCH = -mcpu=generic+crc
else ifeq ($(ARCH),x86_64)
# x86_64: Use baseline x86-64 with SSE2 (universally supported since 2003)
# This works on all x86_64 CPUs from Opteron/Pentium 4 to latest Xeon/EPYC
CFLAGS_ARCH = -march=x86-64 -mtune=generic
else
# Other architectures: use generic optimization
CFLAGS_ARCH = -mtune=generic
endif
CFLAGS_PATHS =
LDFLAGS_PATHS =
LDFLAGS_BASE = -lm
OPENMP_LIBS =
endif
CFLAGS = $(CFLAGS_BASE) $(CFLAGS_ARCH) $(CFLAGS_PATHS) $(OPENMP_FLAG)
LDFLAGS = $(OPENMP_FLAG) $(LDFLAGS_BASE) $(LDFLAGS_PATHS) $(OPENMP_LIBS)
# =============================================================================
# Library Detection
# =============================================================================
# Check for hwloc (cross-platform)
HAVE_HWLOC := $(shell pkg-config --exists hwloc 2>/dev/null && echo yes || \
(test -f /usr/include/hwloc.h && echo yes) || \
(test -f /usr/local/include/hwloc.h && echo yes) || \
(test -f /opt/homebrew/include/hwloc.h && echo yes))
# Check for libnuma (Linux only)
ifeq ($(UNAME_S),Linux)
HAVE_NUMA := yes
else
HAVE_NUMA := no
endif
# Check for libhugetlbfs (Linux only)
ifeq ($(UNAME_S),Linux)
HAVE_HUGETLBFS := $(shell pkg-config --exists hugetlbfs 2>/dev/null && echo yes || \
test -f /usr/include/hugetlbfs.h && echo yes)
else
HAVE_HUGETLBFS := no
endif
# Auto-detect features and compile with all available
DETECTED_DEFS =
DETECTED_LIBS =
ifeq ($(HAVE_HUGETLBFS),yes)
DETECTED_DEFS += -DHAVE_HUGETLBFS
DETECTED_LIBS += -lhugetlbfs
endif
ifeq ($(HAVE_HWLOC),yes)
DETECTED_DEFS += -DUSE_HWLOC
DETECTED_LIBS += -lhwloc
endif
ifeq ($(HAVE_NUMA),yes)
DETECTED_DEFS += -DUSE_NUMA
DETECTED_LIBS += -lnuma
endif
# =============================================================================
# Build Targets
# =============================================================================
.PHONY: default all clean test basic hwloc numa full help info
# Default: auto-detect and build universal binary
default: $(TARGET)
$(TARGET): $(SRC)
@echo "Building universal binary for $(UNAME_S) $(ARCH)..."
@echo " Compiler: $(CC)"
@echo " Optimization: $(CFLAGS_ARCH) (universal compatibility)"
@echo " OpenMP: enabled"
@echo " hwloc: $(HAVE_HWLOC)"
@echo " numa: $(HAVE_NUMA)"
@echo " hugetlbfs: $(HAVE_HUGETLBFS)"
$(CC) $(CFLAGS) $(DETECTED_DEFS) -o $@ $< $(LDFLAGS) $(DETECTED_LIBS)
# Basic: minimal build, no optional dependencies
basic: $(TARGET_BASIC)
$(TARGET_BASIC): $(SRC)
$(CC) $(CFLAGS) -o $@ $< $(LDFLAGS)
# Build with hwloc support (portable cache/topology detection)
hwloc: $(TARGET_HWLOC)
$(TARGET_HWLOC): $(SRC)
ifeq ($(HAVE_HWLOC),yes)
$(CC) $(CFLAGS) -DUSE_HWLOC -o $@ $< $(LDFLAGS) -lhwloc
else
@echo "Error: hwloc 2.x not found. Install with:"
@echo " Linux: apt install libhwloc-dev (or: yum install hwloc-devel)"
@echo " macOS: brew install hwloc"
@echo " BSD: pkg install hwloc2"
@exit 1
endif
# Build with NUMA support (Linux only)
numa: $(TARGET_NUMA)
$(TARGET_NUMA): $(SRC)
ifeq ($(UNAME_S),Linux)
ifeq ($(HAVE_NUMA),yes)
$(CC) $(CFLAGS) -DUSE_NUMA -o $@ $< $(LDFLAGS) -lnuma
else
@echo "Error: libnuma not found. Install with:"
@echo " apt install libnuma-dev (or: yum install numactl-devel)"
@exit 1
endif
else
@echo "Error: NUMA support is only available on Linux"
@exit 1
endif
# Build with all features (recommended for production Linux servers)
full: $(TARGET_FULL)
$(TARGET_FULL): $(SRC)
ifeq ($(UNAME_S),Linux)
$(CC) $(CFLAGS) -DUSE_HWLOC -DUSE_NUMA $(if $(filter yes,$(HAVE_HUGETLBFS)),-DHAVE_HUGETLBFS) \
-o $@ $< $(LDFLAGS) -lhwloc -lnuma $(if $(filter yes,$(HAVE_HUGETLBFS)),-lhugetlbfs)
else
@echo "Note: Building without NUMA (not available on $(UNAME_S))"
$(CC) $(CFLAGS) -DUSE_HWLOC -o $@ $< $(LDFLAGS) -lhwloc
endif
# Build all versions that can be built on this platform
all: $(TARGET) $(TARGET_BASIC)
ifeq ($(HAVE_HWLOC),yes)
$(MAKE) hwloc
endif
ifeq ($(HAVE_NUMA),yes)
$(MAKE) numa
endif
ifeq ($(UNAME_S),Linux)
ifeq ($(HAVE_HWLOC),yes)
ifeq ($(HAVE_NUMA),yes)
$(MAKE) full
endif
endif
endif
# Quick test (30 seconds)
test: $(TARGET)
./$(TARGET) -v -t 30
clean:
rm -f $(TARGET) $(TARGET_BASIC) $(TARGET_HWLOC) $(TARGET_NUMA) $(TARGET_FULL)
# Install to /usr/local/bin
install: $(TARGET)
install -m 755 $(TARGET) /usr/local/bin/membench
# Show detected configuration
info:
@echo "Platform Detection:"
@echo " OS: $(UNAME_S)"
@echo " Arch: $(ARCH)"
@echo " Compiler: $(CC)"
@echo " CFLAGS: $(CFLAGS)"
@echo " LDFLAGS: $(LDFLAGS)"
@echo ""
@echo "Library Detection:"
@echo " hwloc: $(HAVE_HWLOC)"
@echo " numa: $(HAVE_NUMA)"
@echo " hugetlbfs: $(HAVE_HUGETLBFS)"
@echo ""
@echo "Universal Optimization:"
ifeq ($(ARCH),aarch64)
@echo " ARM64: -mcpu=generic+crc (works on all ARM64 CPUs)"
else ifeq ($(ARCH),x86_64)
@echo " x86_64: -march=x86-64 (works on all x86_64 CPUs since 2003)"
else
@echo " Other: -mtune=generic"
endif
# Help target
help:
@echo "sc-membench - Universal Memory Benchmark (OpenMP)"
@echo ""
@echo "Build targets:"
@echo " make - Auto-detect features and build universal binary"
@echo " make basic - Minimal build (no optional dependencies)"
@echo " make hwloc - With hwloc (portable cache detection)"
@echo " make numa - With NUMA support (Linux only)"
@echo " make full - With all features (hwloc + numa, Linux recommended)"
@echo " make all - Build all available versions"
@echo " make info - Show detected platform and libraries"
@echo ""
@echo "Universal Compatibility:"
@echo " This build system uses conservative optimization flags that work"
@echo " on ALL CPUs of the target architecture:"
@echo " - ARM64: -mcpu=generic+crc (Cortex-A53 to Neoverse-V2)"
@echo " - x86_64: -march=x86-64 (Opteron/P4 to latest Xeon/EPYC)"
@echo " No illegal instruction errors, works in any Docker container."
@echo ""
@echo "OpenMP thread control (environment variables):"
@echo " OMP_PROC_BIND=spread Distribute threads across NUMA nodes"
@echo " OMP_PLACES=cores One thread per physical core"
@echo " OMP_NUM_THREADS=N Override thread count"
@echo ""
@echo "Optional dependencies:"
@echo " hwloc 2: Portable cache/topology detection (requires hwloc 2.x)"
@echo " Linux: apt install libhwloc-dev"
@echo " macOS: brew install hwloc libomp"
@echo " BSD: pkg install hwloc2"
@echo ""
@echo " numa: NUMA-aware memory allocation (Linux only)"
@echo " apt install libnuma-dev"
@echo ""
@echo " hugetlbfs: Better huge page detection (Linux only)"
@echo " apt install libhugetlbfs-dev"
|