#!/bin/sh

export job_file="$1"
[ -z "$job_file" ] && echo "Usage: run-lkp job.yaml" 1>&2 && exit 1

# TODO: replace job with job_file completely in future
export job=$job_file

. $LKP_SRC/lib/common.sh
. $LKP_SRC/lib/wait.sh
. $LKP_SRC/lib/http.sh
. $LKP_SRC/lib/mount.sh
. $LKP_SRC/lib/job-init.sh
. $LKP_SRC/lib/ucode.sh

export BENCHMARK_ROOT=/lkp/benchmarks
export CGROUP_MNT=/cgroup

job_init
export LKP_USER=${user:-lkp}

echo $$ > $TMP/run-lkp.pid	# for use by monitors/watchdog

setup_result_root || { job_done; exit 1; }

# the NMI warning messages are often in a mess
[ -f '/sys/kernel/debug/x86/nmi_longest_ns' ] && echo 100000000 > /sys/kernel/debug/x86/nmi_longest_ns

echo run-job $job
set_job_state 'running'

[ -n "${ucode}" ] && echo "target ucode: $ucode"
if [ -n "$ucode" ] && ! is_same_as_current_ucode_version "$ucode"; then
	echo "microcode is not matched, skip testing..."
	set_job_state 'microcode_is_not_matched'
	exit 1
fi

if [ ${job%.yaml} != $job ]; then
	job_script=${job%.yaml}.sh
else
	job_script=$job
	chmod +x $job
fi

# set subprocess oom_score_adj to 0 (default value)
cat > /proc/$$/oom_score_adj <<EOF
0
EOF

if [ -x "$job_script" ]; then
	# disable oom killing entirely for run-lkp
	# sleep a while to make sure run_job starts earlier than setting oom_score_adj
	sh -c "sleep 3 && cat > /proc/$$/oom_score_adj <<EOF
	-1000
EOF" &
	$LKP_DEBUG_PREFIX $job_script run_job
	last_state_file=$RESULT_ROOT/last_state
	[ -f "$last_state_file" ] &&
		exit_code=$(sed -ne "s/\(.*\)exit_code.\(.*\):\(.*\)/\\2/p" "$last_state_file")
	run_job_exit_code=${exit_code:-0}
else
	echo "job shell script does not exist: $job_script"
	exit 1
fi

clean_job_resource

end_time=$(date '+%s')

set_job_state 'post_run'
$LKP_DEBUG_PREFIX $LKP_SRC/bin/post-run

[ "$result_fs" = 'cifs' -o "$result_fs" = 'virtfs' ] || {
	chown -R .lkp $RESULT_ROOT 2>/dev/null
	chmod -R g+rw $RESULT_ROOT 2>/dev/null
}

[ -s "$TMP/time_delta" ] && time_delta="time_delta=$(cat $TMP/time_delta)"
[ -s "$LKP_SRC/version" ] && version="version=$(cat $LKP_SRC/version)"
jobfile_append_var		"loadavg=$(cat /proc/loadavg)" \
				"start_time=$(cat $TMP/start_time)" \
				"end_time=$end_time" \
				"$version" \
				"$time_delta"

if [ -f "$TMP/OOM" ] || check_oom; then
	set_job_state "OOM"
	echo "reboot_for_next_job: 1" >> $job
elif [ -f "$TMP/soft_timeout" ]; then
	set_job_state 'soft_timeout'
elif [ "$run_job_exit_code" != 0 ]; then
		program_type=$(sed -ne "s/\(.*\)\..*\.exit_code\..*/\\1/p" "$last_state_file")
		# "99" is a fixed number from lib/debug die() function
		if [ "$run_job_exit_code" = "99" -o "$program_type" = "setup" -o "$program_type" = "daemon" ]; then
			set_job_state 'incomplete'
		else
			set_job_state 'failed'
		fi

		source $job_script
		[ "$(type -t on_fail)" = "function" ] && {
			set_job_stage "on_fail" "$on_fail_timeout"
			$LKP_DEBUG_PREFIX $job_script on_fail
		}

elif [ -f '/var/log/wtmp' ] && has_cmd last && last -n1 | grep -q -E '(pts/|tty)[0-9].*(still logged in)'; then
	set_job_state 'disturbed'
	disturbed=1
else
	set_job_state 'finished'
fi

sync
job_done
sleep 1  # hopefully help debug messages show up in serial output
