#!/bin/sh

. $LKP_SRC/lib/env.sh
. $LKP_SRC/lib/wait.sh

[ -n "$max_uptime" ] || max_uptime=$((5 * 3600))
max_uptime=${max_uptime%%.*}

should_touch_watchdog=
watchdog_initial_countdown=${boot_timeout:-300}
watchdog_reset_period=32

[ -x /usr/bin/schedtool ] && /usr/bin/schedtool -R -p 1 $$	# SCHED_RR

timed_echo()
{
	echo "$(date) $*"
}

wait_touch_watchdog()
{
	local elapsed=0

	while :
	do
		wait_job_finished --timeout $watchdog_reset_period && exit 0
		[ -f "$TMP/job-finished" ] && exit

		bmc-watchdog --reset

		elapsed=$(( elapsed + watchdog_reset_period ))
		[ $elapsed -gt $max_uptime ] && return
	done
}

stop_bmc_watchdog()
{
	[ -x /usr/sbin/bmc-watchdog ] || return
	bmc-watchdog --stop 2>/dev/null
}

start_bmc_watchdog()
{
	is_virt && return

	stop_bmc_watchdog

	[ -x /usr/sbin/dmidecode ] || return
	dmidecode | grep "IPMI" 2>/dev/null
	if [ $? -ne 0 ]; then
		echo "IPMI BMC is not supported on this machine, skip bmc-watchdog setup!"
		return
	fi

	[ -x /usr/local/bin/ipmi-setup ] || return
	ipmi-setup 2> /dev/null | grep "BMC"
	if [ $? -ne 0 ]; then
		echo "ipmi setup fail, skip bmc-watchdog setup!"
		return
	fi

	[ -f /var/run/watchdog.pid ] && return
	[ -f /var/run/bmc-watchdog.pid ] && return
	(
	        set -o noclobber
		echo $$ > /var/lock/bmc-watchdog
	) 2>/dev/null || return

	bmc-watchdog -d -u 4 -p 0 -a 1 -F -P -L -S -O -i $watchdog_initial_countdown -e $watchdog_reset_period && {
		sleep 10
		local pid="$(pidof bmc-watchdog)"
		[ -n "$pid" ] &&
		bmc-watchdog --get | grep -q 'Timer:.*Running' && {
			echo $pid > /var/run/bmc-watchdog.pid
			return
		}
	}

	bmc-watchdog --set --start-after-set -i $watchdog_initial_countdown || {
		bmc-watchdog --stop
		return
	}

	should_touch_watchdog=1
}

start_watchdog()
{
	if [ -n "$should_touch_watchdog" ]; then
		wait_touch_watchdog
	else
		wait_job_finished --timeout $max_uptime && exit 0
	fi

	timed_echo 'detected soft_timeout'
	touch $TMP/soft_timeout

	if has_cmd 'timeout'; then
		timeout 10.11s touch $RESULT_ROOT/soft_timeout
	else
		touch $RESULT_ROOT/soft_timeout
	fi

	ps faux 2>/dev/null > $TMP_RESULT_ROOT/watchdog-debug || ps > $TMP_RESULT_ROOT/watchdog-debug

	# try to unblock the main shell, which will copy files to
	# $RESULT_ROOT, which helps debug the hang reason
	kill_tests

	timed_echo 'wait_job_finished --timeout 60'
	wait_job_finished --timeout 60 && exit 0

	# reboot it the hard way, if the main shell failed to kill me
	timed_echo 'sleep 141'
	sleep 141 || exit

	test -f $TMP/job-finished && exit

	timed_echo 'try to force reboot'
	reboot

	sleep 144 || exit

	echo w > /proc/sysrq-trigger
	echo s > /proc/sysrq-trigger
	sleep 10
	echo b > /proc/sysrq-trigger
}

start_bmc_watchdog
start_watchdog # will block wait until test finish
