#!/bin/sh
# @description monitor agent health
# @man Check that rudder agent has no problem
# @man +
# @man *Options*:
# @man +
# @man *-n*: run in nrpe mode, print a single line and return 0,1 or 2
# @man  put this line in your nrpe.cfg to use it
# @man  command[check_rudder]=${RUDDER_DIR}/bin/rudder agent health -n

. "${BASEDIR}/../lib/common.sh"

CFE_DIR=${RUDDER_VAR}/cfengine-community
CFE_DISABLE_FILE=${RUDDER_DIR}/etc/disable-agent

NRPE=

while getopts "n" opt; do
  case $opt in
    n)
      NRPE=y
      ;;
  esac
done

# test if disabled
if [ -e "${CFE_DISABLE_FILE}" ]
then
  echo "Rudder agent disabled"
  [ -z "${NRPE}" ] && echo "Type 'rudder agent enable' to enable it"
  exit 1 # warning
fi

# test policy server
if [ ! -e "${CFE_DIR}/policy_server.dat" ] || grep "%" "${CFE_DIR}/policy_server.dat" > /dev/null
then
  echo "Rudder agent not configured"
  [ -z "${NRPE}" ] && echo "Edit ${CFE_DIR}/policy_server.dat to configure it"
  exit 2 # error
fi

# test policy server value if it's not an ip
if ! grep -E "^[0-9.]+$|^[0-9a-fA-F:]+$" "${CFE_DIR}/policy_server.dat">/dev/null; then
  if type getent >/dev/null 2>/dev/null; then
    cut -d: -f1 "${CFE_DIR}/policy_server.dat" | xargs getent hosts > /dev/null
  else
    cut -d: -f1 "${CFE_DIR}/policy_server.dat" | xargs host > /dev/null 2> /dev/null
  fi
  if [ $? -ne 0 ]
  then
    echo "Unknown Rudder policy server"
    [ -z "${NRPE}" ] && echo "Rudder server name must be in the DNS or in /etc/hosts"
    exit 2 # error
  fi
fi

# test failsafe promises
${RUDDER_DIR}/bin/cf-promises -f failsafe.cf > /dev/null 2>&1
if [ $? -ne 0 ]
then
  echo "Broken failsafe policies"
  [ -z "${NRPE}" ] && echo "The failsafe policies are broken, you can run 'rudder agent reset' to go back to the initial policies"
  exit 2 # error
fi

# test connection errors, only if the previous file exists
# otherwise we are before the first agent run or in bootstrap promises
if [ -e "${CFE_DIR}/outputs/previous" ]
then
  grep -E "FATAL:|Fatal :|could not get an updated configuration" "${CFE_DIR}/outputs/previous" > /dev/null
  if [ $? -ne 1 ]
  then
    echo "Connection errors in Rudder agent last run"
    [ -z "${NRPE}" ] && echo "See ${CFE_DIR}/outputs/previous for more details"
    exit 2 # error
  fi
fi

# test promises
CFPROMISES=$(${RUDDER_DIR}/bin/cf-promises -I 2>&1)
if [ $? -ne 0 ]
then
  echo "Broken policies"
  [ -z "${NRPE}" ] && echo "The policies are broken, you should fix them on the policy server or run 'rudder agent reset' to go back to the initial policies"
  exit 2 # error
fi

# test hostname resolution errors
RESOLUTION=$(printf '%s' "${CFPROMISES}" | grep "Unable to lookup hostname")
if [ $? -eq 0 ]
then
  echo "Failed name resolution"
  [ -z "${NRPE}" ] && printf '%s' "${RESOLUTION}" | awk -F"'" '{ print "could not resolve: " $2 }' | sort | uniq
  exit 1 # warning
fi

echo "OK"
exit 0 # success
