1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78
|
#!/bin/bash
# Install spark
INSTALL_DIR=${INSTALL_DIR:-/usr}
USER=`whoami`
SPARK_VER=${SPARK_VER:-3.4.0}
SPARK=spark-$SPARK_VER-bin-hadoop${SPARK_HADOOP_VER:-3}
SPARK_DIR=${INSTALL_DIR}/$SPARK
SPARK_LOCAL_DIR="/usr/local/spark"
SPARK_ENV=${SPARK_ENV:-$HOME/spark_env.sh}
# retry logic from: https://docs.microsoft.com/en-us/azure/hdinsight/hdinsight-hadoop-script-actions-linux
MAXATTEMPTS=3
retry() {
local -r CMD="$@"
local -i ATTEMPTNUM=1
local -i RETRYINTERVAL=2
until $CMD
do
if (( ATTEMPTNUM == MAXATTEMPTS ))
then
echo "Attempt $ATTEMPTNUM failed. no more attempts left."
return 1
else
echo "Attempt $ATTEMPTNUM failed! Retrying in $RETRYINTERVAL seconds..."
sleep $(( RETRYINTERVAL ))
ATTEMPTNUM=$ATTEMPTNUM+1
fi
done
}
download_spark() {
retry wget -nv --trust-server-names "https://archive.apache.org/dist/spark/spark-$SPARK_VER/$SPARK.tgz"
sudo tar -zxf $SPARK.tgz --directory $INSTALL_DIR &&
sudo chown -R $USER:$USER $SPARK_DIR &&
sudo ln -s $INSTALL_DIR/$SPARK $SPARK_LOCAL_DIR &&
echo "download_spark successful"
}
setup_spark_env() {
echo "export SPARK_HOME=${SPARK_LOCAL_DIR}" >> $SPARK_ENV &&
echo "export PATH=${SPARK_LOCAL_DIR}/bin:${SPARK_LOCAL_DIR}/sbin:$PATH" >> $SPARK_ENV &&
echo "export CLASSPATH=$CLASSPATH" >> $SPARK_ENV &&
source $SPARK_ENV
}
configure_spark() {
echo "export SPARK_HOME=${SPARK_LOCAL_DIR}" >> $SPARK_ENV &&
echo "export PATH=${SPARK_LOCAL_DIR}/bin:${SPARK_LOCAL_DIR}/sbin:$PATH" >> $SPARK_ENV &&
echo "export CLASSPATH=$CLASSPATH" >> $SPARK_ENV &&
source $SPARK_ENV &&
sudo echo "SPARK_MASTER_HOST=127.0.0.1" > ${SPARK_LOCAL_DIR}/conf/spark-env.sh &&
sudo echo "SPARK_LOCAL_IP=127.0.0.1" >> ${SPARK_LOCAL_DIR}/conf/spark-env.sh &&
sudo echo "localhost" > ${SPARK_LOCAL_DIR}/conf/slaves &&
sudo cp ${SPARK_LOCAL_DIR}/conf/log4j2.properties.template ${SPARK_LOCAL_DIR}/conf/log4j2.properties &&
echo "configure_spark successful"
}
install_spark() {
if [[ ! -f ${SPARK_DIR}/conf/slaves ]]; then
echo "Installing Spark..."
download_spark &&
configure_spark &&
echo "Install Spark successful"
else
echo "Found cached Spark install"
fi
}
install_spark &&
if [[ ! -L ${SPARK_LOCAL_DIR} ]]; then sudo ln -s $INSTALL_DIR/$SPARK $SPARK_LOCAL_DIR; fi &&
setup_spark_env &&
${SPARK_LOCAL_DIR}/sbin/stop-master.sh &&
${SPARK_LOCAL_DIR}/sbin/start-master.sh &&
echo "Started spark"
|