#!/bin/bash
# usage
# Bootstrap action
# installer.sh <s3://bucketname/location/of/jarfile> <s3://bucketname/location/of/Metricfilter.json> <CloudWatchNamespace>

#Download Jars and config
jarLocation=$1
configLocation=$2
CWNamespace=$3

tmpDir="/tmp/EMRCustomSparkCloudWatchSink"
installDir="/etc/amazon/EMRCustomSparkCloudWatchSink"
CLUSTERID=$(jq '.jobFlowId' -r /emr/instance-controller/lib/info/job-flow.json)
# 0 = true ; 1 = false
installCW=0
mkdir $tmpDir

# If this is EMR 7.x, it may already be configured with cloudwatch agent
EMRRelease=$(grep "releaseLabel" /emr/instance-controller/lib/info/job-flow-state.txt | cut -d'"' -f2)
if [[ ${EMRRelease:0:5} == "emr-7" ]]; then
  echo "$EMRRelease"
  if [[ $(grep "emr-amazon-cloudwatch-agent" /emr/instance-controller/lib/info/job-flow-state.txt | wc -l) -gt 0 ]]; then
    echo "CW Agent already installed"
    installCW=1
  fi
fi
aws s3 cp $jarLocation $tmpDir/
sudo mkdir -p /usr/lib/spark/jars/
sudo mv $tmpDir/* /usr/lib/spark/jars/
aws s3 cp $configLocation $tmpDir/
sudo mkdir -p $installDir/
sudo mv $tmpDir/Metricfilter.json $installDir/
sudo chmod 644 $installDir/Metricfilter.json

if [[ $installCW -eq 0 ]]; then
  # Install CloudWatch Agent
  ARCH=$(uname -m)
  URLARCH="amd64"
  if [[ "$ARCH" == "aarch64" ]]; then
    URLARCH="arm64"
  fi

  cd $tmpDir
  wget https://s3.amazonaws.com/amazoncloudwatch-agent/linux/$URLARCH/latest/AmazonCloudWatchAgent.zip -O AmazonCloudWatchAgent.zip
  unzip -o AmazonCloudWatchAgent.zip
  sudo ./install.sh

  # Configure CloudWatch Agent
  echo '{
    "metrics": {
      "append_dimensions": {
        "ClusterID": "'${CLUSTERID}'"
      },
      "metrics_collected": {
        "statsd": {
          "metrics_collection_interval":1,
          "metrics_aggregation_interval":30
        }
      },
      "namespace": "'${CWNamespace}'/'${CLUSTERID}'"
    }
  }' | sudo tee $installDir/amazon-cloudwatch-agent-conf.json
  sudo chmod 644 $installDir/amazon-cloudwatch-agent-conf.json
  sudo /opt/aws/amazon-cloudwatch-agent/bin/amazon-cloudwatch-agent-ctl -a fetch-config -m ec2 -c file:$installDir/amazon-cloudwatch-agent-conf.json -s
  rm -rf /tmp/EMRCustomSparkCloudWatchSink
else 
  SPARK_CW_BOOTSTRAP_SCRIPT=$(cat <<'EOF_MAIN'
#!/bin/bash
#------------------------------------------------------------------------------
# Main Execution Loop
#------------------------------------------------------------------------------
# Add timeout mechanism to prevent infinite loops
MAX_ATTEMPTS=60  # 10 minutes (10 seconds * 60)
ATTEMPT=0

while [ $ATTEMPT -lt $MAX_ATTEMPTS ]; do
    NODEPROVISIONSTATE=$(sed -n '/localInstance [{]/,/[}]/{
    /nodeProvisionCheckinRecord [{]/,/[}]/ {
    /status: / { p }
    /[}]/a
    }
    /[}]/a
    }' /emr/instance-controller/lib/info/job-flow-state.txt | awk ' { print $2 }')

    if [ "$NODEPROVISIONSTATE" == "SUCCESSFUL" ]; then
        sleep 10
        echo "Beginning post-provision bootstrap process..."
        tmpDir="/tmp/EMRCustomSparkCloudWatchSink"
        if [ -f /opt/aws/amazon-cloudwatch-agent/etc/emr-amazon-cloudwatch-agent.json ]; then
          echo "CW Agent already exists"
          jq '.metrics.metrics_collected.statsd = {"metrics_collection_interval":1,"metrics_aggregation_interval":30}' /opt/aws/amazon-cloudwatch-agent/etc/emr-amazon-cloudwatch-agent.json > $tmpDir/amazon-cloudwatch-agent-conf.json
          installDir="/opt/aws/amazon-cloudwatch-agent/etc"
          cat $tmpDir/amazon-cloudwatch-agent-conf.json | sudo tee $installDir/emr-amazon-cloudwatch-agent.json
          sudo chmod 644 $installDir/emr-amazon-cloudwatch-agent.json
          sudo /opt/aws/amazon-cloudwatch-agent/bin/amazon-cloudwatch-agent-ctl -a fetch-config -m ec2 -c file:$installDir/emr-amazon-cloudwatch-agent.json -s
          rm -rf /tmp/EMRCustomSparkCloudWatchSink
        fi
        exit 0
    fi
    ATTEMPT=$((ATTEMPT + 1))
    echo "Waiting for node provisioning to complete... Attempt ${ATTEMPT}/${MAX_ATTEMPTS}"
    sleep 10
done
EOF_MAIN
)
  # Write bootstrap script to file and execute
  echo "${SPARK_CW_BOOTSTRAP_SCRIPT}" | tee /tmp/emr-spark-cw-bootstrap.sh
  chmod u+x /tmp/emr-spark-cw-bootstrap.sh
  sudo bash /tmp/emr-spark-cw-bootstrap.sh &> /tmp/emr-spark-cw-bootstrap.log &
  exit 0
fi

