File: install_spark.sh

package info (click to toggle)
genomicsdb 1.5.5-2
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid
  • size: 25,316 kB
  • sloc: cpp: 68,637; ansic: 58,281; java: 8,230; python: 2,315; sh: 2,115; perl: 1,621; makefile: 499; xml: 496
file content (78 lines) | stat: -rwxr-xr-x 2,523 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
#!/bin/bash

# Install spark

INSTALL_DIR=${INSTALL_DIR:-/usr}
USER=`whoami`

SPARK_VER=${SPARK_VER:-3.4.0}
SPARK=spark-$SPARK_VER-bin-hadoop${SPARK_HADOOP_VER:-3}
SPARK_DIR=${INSTALL_DIR}/$SPARK
SPARK_LOCAL_DIR="/usr/local/spark"
SPARK_ENV=${SPARK_ENV:-$HOME/spark_env.sh}

# retry logic from: https://docs.microsoft.com/en-us/azure/hdinsight/hdinsight-hadoop-script-actions-linux
MAXATTEMPTS=3
retry() {
    local -r CMD="$@"
    local -i ATTEMPTNUM=1
    local -i RETRYINTERVAL=2

    until $CMD
    do
        if (( ATTEMPTNUM == MAXATTEMPTS ))
        then
                echo "Attempt $ATTEMPTNUM failed. no more attempts left."
                return 1
        else
                echo "Attempt $ATTEMPTNUM failed! Retrying in $RETRYINTERVAL seconds..."
                sleep $(( RETRYINTERVAL ))
                ATTEMPTNUM=$ATTEMPTNUM+1
        fi
    done
}

download_spark() {
  retry wget -nv --trust-server-names "https://archive.apache.org/dist/spark/spark-$SPARK_VER/$SPARK.tgz"
  sudo tar -zxf $SPARK.tgz --directory $INSTALL_DIR &&
  sudo chown -R $USER:$USER $SPARK_DIR &&
  sudo ln -s $INSTALL_DIR/$SPARK $SPARK_LOCAL_DIR &&
  echo "download_spark successful"
}

setup_spark_env() {
  echo "export SPARK_HOME=${SPARK_LOCAL_DIR}" >> $SPARK_ENV &&
  echo "export PATH=${SPARK_LOCAL_DIR}/bin:${SPARK_LOCAL_DIR}/sbin:$PATH" >> $SPARK_ENV &&
  echo "export CLASSPATH=$CLASSPATH" >> $SPARK_ENV &&
  source $SPARK_ENV
}

configure_spark() {
  echo "export SPARK_HOME=${SPARK_LOCAL_DIR}" >> $SPARK_ENV &&
  echo "export PATH=${SPARK_LOCAL_DIR}/bin:${SPARK_LOCAL_DIR}/sbin:$PATH" >> $SPARK_ENV &&
  echo "export CLASSPATH=$CLASSPATH" >> $SPARK_ENV &&
  source $SPARK_ENV &&
  sudo echo "SPARK_MASTER_HOST=127.0.0.1" > ${SPARK_LOCAL_DIR}/conf/spark-env.sh &&
  sudo echo "SPARK_LOCAL_IP=127.0.0.1" >> ${SPARK_LOCAL_DIR}/conf/spark-env.sh &&
  sudo echo "localhost" > ${SPARK_LOCAL_DIR}/conf/slaves &&
  sudo cp ${SPARK_LOCAL_DIR}/conf/log4j2.properties.template ${SPARK_LOCAL_DIR}/conf/log4j2.properties &&
  echo "configure_spark successful"
}

install_spark() {
  if [[ ! -f ${SPARK_DIR}/conf/slaves ]]; then
    echo "Installing Spark..."
    download_spark &&
    configure_spark &&
    echo "Install Spark successful"
  else
    echo "Found cached Spark install" 
  fi
}

install_spark &&
if [[ ! -L ${SPARK_LOCAL_DIR} ]]; then sudo ln -s $INSTALL_DIR/$SPARK $SPARK_LOCAL_DIR; fi &&
setup_spark_env &&
${SPARK_LOCAL_DIR}/sbin/stop-master.sh &&
${SPARK_LOCAL_DIR}/sbin/start-master.sh &&
echo "Started spark"