1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433
|
#!/usr/bin/env sh
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
# This script makes sure we have our C++ dependencies in order
# so that we can compile the bindings in src/ and build the
# R package's shared library.
#
# The core logic is:
#
# * Find libarrow on the system. If it is present, make sure
# that its version is compatible with the R package.
# * If no suitable libarrow is found, download it (where allowed)
# or build it from source.
# * Determine what features this libarrow has and what other
# flags it requires, and set them in src/Makevars for use when
# compiling the bindings.
# * Run a test program to confirm that arrow headers are found
#
# It is intended to be flexible enough to account for several
# workflows, and should be expected to always result in a successful
# build as long as you haven't set environment variables that
# would limit its options.
#
# * Installing a released version from source, as from CRAN, with
# no other prior setup
# * On macOS and Linux, the nixlibs.R build script will download
# or build libarrow and dependencies
# * Installing a released version but first installing libarrow.
# It will use pkg-config and brew to search for libraries.
# * Installing a development version from source as a user.
# Any libarrow found on the system corresponding to a release will not match
# the dev version and will thus be skipped. nixlibs.R will be used to build libarrow.
# * TODO(GH-35049): Installing a dev version as an infrequent contributor.
# Currently the configure script doesn't offer much to make this easy.
# If you expect to rebuild multiple times, you should set up a dev
# environment.
# * Installing a dev version as a regular developer.
# The best way is to maintain your own cmake build and install it
# to a directory (not system) that you set as the env var
# $ARROW_HOME.
#
# For more information, see the various installation and developer vignettes.
# Library settings
PKG_CONFIG_NAME="arrow"
PKG_BREW_NAME="apache-arrow"
PKG_TEST_HEADER="<arrow/api.h>"
# Some env vars that control the build (all logical, case-insensitive)
# Development mode, also increases verbosity in the bundled build
ARROW_R_DEV=`echo $ARROW_R_DEV | tr '[:upper:]' '[:lower:]'`
# The bundled build compiles arrow C++ from source; FORCE ensures we don't pick up
# any other packages that may be found on the system
FORCE_BUNDLED_BUILD=`echo $FORCE_BUNDLED_BUILD | tr '[:upper:]' '[:lower:]'`
# If present, `pkg-config` will be used to find libarrow on the system,
# unless this is set to false
ARROW_USE_PKG_CONFIG=`echo $ARROW_USE_PKG_CONFIG | tr '[:upper:]' '[:lower:]'`
# Just used in testing: whether or not it is ok to download dependencies (in the
# bundled build)
ARROW_OFFLINE_BUILD=`echo $ARROW_OFFLINE_BUILD | tr '[:upper:]' '[:lower:]'`
VERSION=`grep '^Version' DESCRIPTION | sed s/Version:\ //`
UNAME=`uname -s`
: ${PKG_CONFIG:="pkg-config"}
# If in development mode, run the codegen script to render arrowExports.*
if [ "$ARROW_R_DEV" = "true" ] && [ -f "data-raw/codegen.R" ]; then
echo "*** Generating code with data-raw/codegen.R"
${R_HOME}/bin/Rscript data-raw/codegen.R
fi
# Arrow requires C++20, so check for it
if [ ! "`${R_HOME}/bin/R CMD config CXX20`" ]; then
echo "------------------------- NOTE ---------------------------"
echo "Cannot install arrow: a C++20 compiler is required."
echo "See https://arrow.apache.org/docs/r/articles/install.html"
echo "---------------------------------------------------------"
exit 1
fi
# Test if pkg-config is available to use
if ${PKG_CONFIG} --version >/dev/null 2>&1; then
PKG_CONFIG_AVAILABLE="true"
echo "*** pkg-config found."
else
echo "*** pkg-config not found."
PKG_CONFIG_AVAILABLE="false"
ARROW_USE_PKG_CONFIG="false"
fi
## Find openssl
# Arrow's cmake process uses this same process to find openssl,
# but doing it now allows us to catch it in
# nixlibs.R and activate S3 and GCS support for the source build.
# macOS ships with libressl. openssl is installable with brew, but it is
# generally not linked. We can over-ride this and find
# openssl by setting OPENSSL_ROOT_DIR (which cmake will pick up later in
# the installation process).
if [ "${OPENSSL_ROOT_DIR}" = "" ] && brew --prefix openssl >/dev/null 2>&1; then
export OPENSSL_ROOT_DIR="`brew --prefix openssl`"
export PKG_CONFIG_PATH="${OPENSSL_ROOT_DIR}/lib/pkgconfig${PKG_CONFIG_PATH:+:${PKG_CONFIG_PATH}}"
fi
# Look for openssl with pkg-config for non-brew sources(e.g. CRAN) and Linux
if [ "${OPENSSL_ROOT_DIR}" = "" ] && [ "${PKG_CONFIG_AVAILABLE}" = "true" ]; then
if ${PKG_CONFIG} --exists openssl; then
export OPENSSL_ROOT_DIR="`${PKG_CONFIG} --variable=prefix openssl`"
fi
fi
#############
# Functions #
#############
find_or_build_libarrow () {
if [ "$FORCE_BUNDLED_BUILD" = "true" ]; then
do_bundled_build
else
find_arrow
if [ "$_LIBARROW_FOUND" = "false" ]; then
# If we haven't found a suitable version of libarrow, build it
do_bundled_build
fi
fi
}
# This function looks in a few places for libarrow on the system already.
# If the found library version is not compatible with the R package,
# it won't be used.
find_arrow () {
# Preserve original PKG_CONFIG_PATH. We'll add ${LIB_DIR}/pkgconfig to it if needed
OLD_PKG_CONFIG_PATH="${PKG_CONFIG_PATH}"
if [ "$ARROW_HOME" ] && [ -d "$ARROW_HOME" ]; then
# 1. ARROW_HOME is a directory you've built and installed libarrow into.
# If the env var is set, we use it
_LIBARROW_FOUND="${ARROW_HOME}"
echo "*** Trying Arrow C++ in ARROW_HOME: $_LIBARROW_FOUND"
export PKG_CONFIG_PATH="${_LIBARROW_FOUND}/lib/pkgconfig${PKG_CONFIG_PATH:+:${PKG_CONFIG_PATH}}"
elif [ "$ARROW_USE_PKG_CONFIG" != "false" ] && ${PKG_CONFIG} ${PKG_CONFIG_NAME}; then
# 2. Use pkg-config to find arrow on the system
_LIBARROW_FOUND="`${PKG_CONFIG} --variable=prefix --silence-errors ${PKG_CONFIG_NAME}`"
echo "*** Trying Arrow C++ found by pkg-config: $_LIBARROW_FOUND"
else
_LIBARROW_FOUND="false"
fi
if [ "$_LIBARROW_FOUND" != "false" ]; then
# We found a library, so check for version mismatch
if [ "$PKG_CONFIG_AVAILABLE" = "true" ]; then
PC_LIB_VERSION=`${PKG_CONFIG} --modversion ${PKG_CONFIG_NAME}`
else
PC_LIB_VERSION=`grep '^Version' ${_LIBARROW_FOUND}/lib/pkgconfig/arrow.pc | sed s/Version:\ //`
fi
# This is in an R script for convenience and testability.
# Success means the found C++ library is ok to use.
# Error means the versions don't line up and we shouldn't use it.
# More specific messaging to the user is in the R script
if ! ${R_HOME}/bin/Rscript tools/check-versions.R $VERSION $PC_LIB_VERSION 2> /dev/null; then
_LIBARROW_FOUND="false"
fi
fi
if [ "$_LIBARROW_FOUND" = "false" ]; then
# We didn't find a suitable library, so reset the pkg-config search path
export PKG_CONFIG_PATH="${OLD_PKG_CONFIG_PATH}"
fi
}
do_bundled_build () {
${R_HOME}/bin/Rscript tools/nixlibs.R $VERSION
if [ $? -ne 0 ]; then
# If the nixlibs.R script failed, we can't continue
echo "------------------------- NOTE ---------------------------"
echo "There was an issue building the Arrow C++ libraries."
echo "See https://arrow.apache.org/docs/r/articles/install.html"
echo "---------------------------------------------------------"
exit 1
fi
if [ -d "libarrow/arrow-$VERSION" ]; then
_LIBARROW_FOUND="`pwd`/libarrow/arrow-${VERSION}"
else
# It's possible that the version of the libarrow binary is not identical to the
# R version, e.g. if the R build is a patch release, so find what the dir is
# actually called. If there is more than one version present, use the one
# with the highest version:
_LIBARROW_FOUND="`pwd`/libarrow/$(ls libarrow/ | grep ^arrow- | tail -n 1)"
fi
# Handle a few special cases, using what we know about the bundled build
# and our ability to make edits to it since we "own" it.
LIB_DIR="${_LIBARROW_FOUND}/lib"
if [ -d "$LIB_DIR" ]; then
if [ "${PKG_CONFIG_AVAILABLE}" = "true" ]; then
# Use pkg-config to do static linking of libarrow's dependencies
export PKG_CONFIG_PATH="${LIB_DIR}/pkgconfig${PKG_CONFIG_PATH:+:${PKG_CONFIG_PATH}}"
# pkg-config on CentOS 7 doesn't have --define-prefix option.
if ${PKG_CONFIG} --help | grep -- --define-prefix >/dev/null 2>&1; then
# --define-prefix is for binary packages. Binary packages
# uses "/arrow/r/libarrow/dist" as prefix but it doesn't
# match the extracted path. --define-prefix uses a directory
# that arrow.pc exists as its prefix instead of
# "/arrow/r/libarrow/dist".
PKG_CONFIG="${PKG_CONFIG} --define-prefix"
else
# Rewrite prefix= in arrow.pc on CentOS 7.
sed \
-i.bak \
-e "s,prefix=/arrow/r/libarrow/dist,prefix=${LIB_DIR}/..,g" \
${LIB_DIR}/pkgconfig/*.pc
rm -f ${LIB_DIR}/pkgconfig/*.pc.bak
fi
fi
else
# If the library directory does not exist, the script must not have been successful
_LIBARROW_FOUND="false"
fi
}
# Once libarrow is obtained, this function sets `PKG_LIBS`, `PKG_DIRS`, and `PKG_CFLAGS`
# either from pkg-config or by inferring things about the directory in $1
set_pkg_vars () {
if [ "$PKG_CONFIG_AVAILABLE" = "true" ]; then
set_lib_dir_with_pc
else
set_lib_dir_without_pc $1
fi
# Check cmake options for enabled features. This uses LIB_DIR that
# is set by the above set_lib_dir_* call.
add_feature_flags
if [ "$PKG_CONFIG_AVAILABLE" = "true" ]; then
set_pkg_vars_with_pc
else
set_pkg_vars_without_pc $1
fi
# Set any user-defined CXXFLAGS
if [ "$ARROW_R_CXXFLAGS" ]; then
PKG_CFLAGS="$PKG_CFLAGS $ARROW_R_CXXFLAGS"
fi
}
# If we have pkg-config, it will tell us what libarrow needs
set_lib_dir_with_pc () {
LIB_DIR="`${PKG_CONFIG} --variable=libdir ${PKG_CONFIG_NAME}`"
}
set_pkg_vars_with_pc () {
pkg_config_names="${PKG_CONFIG_NAME} ${PKG_CONFIG_NAMES_FEATURES}"
PKG_CFLAGS="`${PKG_CONFIG} --cflags ${pkg_config_names}` $PKG_CFLAGS"
PKG_CFLAGS="$PKG_CFLAGS $PKG_CFLAGS_FEATURES"
PKG_LIBS=`${PKG_CONFIG} --libs-only-l --libs-only-other ${pkg_config_names}`
PKG_LIBS="$PKG_LIBS $PKG_LIBS_FEATURES"
PKG_DIRS=`${PKG_CONFIG} --libs-only-L ${pkg_config_names}`
}
# If we don't have pkg-config, we can make some inferences
set_lib_dir_without_pc () {
LIB_DIR="$1/lib"
}
set_pkg_vars_without_pc () {
PKG_CFLAGS="-I$1/include $PKG_CFLAGS $PKG_CFLAGS_FEATURES"
if grep -q "_GLIBCXX_USE_CXX11_ABI=0" "${LIB_DIR}/pkgconfig/arrow.pc"; then
PKG_CFLAGS="${PKG_CFLAGS} -D_GLIBCXX_USE_CXX11_ABI=0"
fi
PKG_DIRS="-L${LIB_DIR}"
if [ "${OPENSSL_ROOT_DIR}" != "" ]; then
PKG_DIRS="${PKG_DIRS} -L${OPENSSL_ROOT_DIR}/lib"
fi
PKG_LIBS="$PKG_LIBS_FEATURES_WITHOUT_PC"
PKG_LIBS="$PKG_LIBS -larrow"
if [ -n "$(find "$LIB_DIR" -name 'libarrow_bundled_dependencies.*')" ]; then
PKG_LIBS="$PKG_LIBS -larrow_bundled_dependencies"
fi
PKG_LIBS="$PKG_LIBS $PKG_LIBS_FEATURES $SSL_LIBS_WITHOUT_PC"
# If on Raspberry Pi, need to manually link against latomic
# See: https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81358 for similar example
# pkg-config will handle this for us automatically, see ARROW-6312
if grep raspbian /etc/os-release >/dev/null 2>&1; then
PKG_CFLAGS="$PKG_CFLAGS -DARROW_CXXFLAGS=-latomic"
PKG_LIBS="$PKG_LIBS -latomic"
fi
}
add_feature_flags () {
PKG_CFLAGS_FEATURES=""
PKG_CONFIG_NAMES_FEATURES=""
PKG_LIBS_FEATURES=""
PKG_LIBS_FEATURES_WITHOUT_PC=""
# Now we need to check what features it was built with and enable
# the corresponding feature flags in the R bindings (-DARROW_R_WITH_stuff).
# We do this by inspecting ArrowOptions.cmake, which the libarrow build
# generates.
ARROW_OPTS_CMAKE="$LIB_DIR/cmake/Arrow/ArrowOptions.cmake"
if [ ! -f "${ARROW_OPTS_CMAKE}" ]; then
echo "*** $ARROW_OPTS_CMAKE not found; some features will not be enabled"
else
if arrow_built_with ARROW_PARQUET; then
PKG_CFLAGS_FEATURES="$PKG_CFLAGS_FEATURES -DARROW_R_WITH_PARQUET"
PKG_CONFIG_NAMES_FEATURES="$PKG_CONFIG_NAMES_FEATURES parquet"
PKG_LIBS_FEATURES_WITHOUT_PC="-lparquet $PKG_LIBS_FEATURES_WITHOUT_PC"
# NOTE: parquet is assumed to have the same -L flag as arrow
# so there is no need to add its location to PKG_DIRS
fi
if arrow_built_with ARROW_COMPUTE; then
PKG_CFLAGS_FEATURES="$PKG_CFLAGS_FEATURES -DARROW_R_WITH_COMPUTE"
PKG_CONFIG_NAMES_FEATURES="$PKG_CONFIG_NAMES_FEATURES arrow-compute"
PKG_LIBS_FEATURES_WITHOUT_PC="-larrow_compute $PKG_LIBS_FEATURES_WITHOUT_PC"
# NOTE: arrow_compute is assumed to have the same -L flag as arrow
# so there is no need to add its location to PKG_DIRS
fi
if arrow_built_with ARROW_DATASET; then
PKG_CFLAGS_FEATURES="$PKG_CFLAGS_FEATURES -DARROW_R_WITH_DATASET"
PKG_CONFIG_NAMES_FEATURES="$PKG_CONFIG_NAMES_FEATURES arrow-dataset"
PKG_LIBS_FEATURES_WITHOUT_PC="-larrow_dataset $PKG_LIBS_FEATURES_WITHOUT_PC"
# NOTE: arrow_dataset is assumed to have the same -L flag as arrow
# so there is no need to add its location to PKG_DIRS
fi
if arrow_built_with ARROW_ACERO; then
PKG_CFLAGS_FEATURES="$PKG_CFLAGS_FEATURES -DARROW_R_WITH_ACERO"
PKG_CONFIG_NAMES_FEATURES="$PKG_CONFIG_NAMES_FEATURES arrow-acero"
PKG_LIBS_FEATURES_WITHOUT_PC="-larrow_acero $PKG_LIBS_FEATURES_WITHOUT_PC"
# NOTE: arrow_acero is assumed to have the same -L flag as arrow
# so there is no need to add its location to PKG_DIRS
fi
if arrow_built_with ARROW_SUBSTRAIT; then
PKG_CFLAGS_FEATURES="$PKG_CFLAGS_FEATURES -DARROW_R_WITH_SUBSTRAIT"
PKG_CONFIG_NAMES_FEATURES="$PKG_CONFIG_NAMES_FEATURES arrow-substrait"
PKG_LIBS_FEATURES_WITHOUT_PC="-larrow_substrait $PKG_LIBS_FEATURES_WITHOUT_PC"
# NOTE: arrow_substrait is assumed to have the same -L flag as arrow
# so there is no need to add its location to PKG_DIRS
fi
if arrow_built_with ARROW_JSON; then
PKG_CFLAGS_FEATURES="$PKG_CFLAGS_FEATURES -DARROW_R_WITH_JSON"
fi
if arrow_built_with ARROW_S3; then
PKG_CFLAGS_FEATURES="$PKG_CFLAGS_FEATURES -DARROW_R_WITH_S3"
fi
if arrow_built_with ARROW_GCS; then
PKG_CFLAGS_FEATURES="$PKG_CFLAGS_FEATURES -DARROW_R_WITH_GCS"
fi
if arrow_built_with ARROW_GCS || arrow_built_with ARROW_S3; then
# If pkg-config is available it will handle this for us automatically
SSL_LIBS_WITHOUT_PC="-lcurl -lssl -lcrypto"
fi
fi
}
arrow_built_with() {
# Function to check cmake options for features
grep -i 'set('"$1"' "ON")' $ARROW_OPTS_CMAKE >/dev/null 2>&1
}
##############
# Main logic #
##############
# First:
find_or_build_libarrow
# We should have a valid libarrow build in $_LIBARROW_FOUND
# Now set `PKG_LIBS`, `PKG_DIRS`, and `PKG_CFLAGS` based on that.
if [ "$_LIBARROW_FOUND" != "false" ] && [ "$_LIBARROW_FOUND" != "" ]; then
set_pkg_vars ${_LIBARROW_FOUND}
# If we didn't find any libraries with pkg-config, try again without pkg-config
FOUND_PKG_LIBS=`echo "$PKG_LIBS" | tr -d '[:space:]'`
if [ -z "$FOUND_PKG_LIBS" ] && [ "$PKG_CONFIG_AVAILABLE" = "true" ]; then
echo "*** pkg-config failed to find libraries. Running detection without pkg-config."
PKG_CONFIG_AVAILABLE="false"
set_pkg_vars ${_LIBARROW_FOUND}
fi
else
# To make it easier to debug which code path was taken add a specific
# message to the log in addition to the 'NOTE'
echo "*** Failed to find Arrow C++ libraries."
fi
# Test that we can compile something with those flags
CXX20="`${R_HOME}/bin/R CMD config CXX20` -E"
CXX20FLAGS=`"${R_HOME}"/bin/R CMD config CXX20FLAGS`
CXX20STD=`"${R_HOME}"/bin/R CMD config CXX20STD`
CPPFLAGS=`"${R_HOME}"/bin/R CMD config CPPFLAGS`
TEST_CMD="${CXX20} ${CPPFLAGS} ${PKG_CFLAGS} ${CXX20FLAGS} ${CXX20STD} -xc++ -"
TEST_ERROR=$(echo "#include $PKG_TEST_HEADER" | ${TEST_CMD} -o /dev/null 2>&1)
if [ $? -eq 0 ]; then
# Prepend PKG_DIRS to PKG_LIBS and write to Makevars
PKG_LIBS="$PKG_DIRS $PKG_LIBS"
echo "PKG_CFLAGS=$PKG_CFLAGS"
echo "PKG_LIBS=$PKG_LIBS"
sed -e "s|@cflags@|$PKG_CFLAGS|" -e "s|@libs@|$PKG_LIBS|" src/Makevars.in > src/Makevars
# Success
exit 0
else
echo "------------------------- NOTE ---------------------------"
echo "There was an issue preparing the Arrow C++ libraries."
echo "See https://arrow.apache.org/docs/r/articles/install.html"
echo "----------------------------------------------------------"
echo ""
echo "Test compile error: ${TEST_ERROR}"
echo "Failing compile command: ${TEST_CMD}"
echo "PKG_CFLAGS=$PKG_CFLAGS"
echo "PKG_LIBS=$PKG_LIBS"
PKG_LIBS=""
PKG_CFLAGS=""
exit 1
fi
|