;; Owner: wolfson@readyforzero.com
;; implementation of "process" nodes. If the process has a truthy
;; :blocks? attribute, runs the process and waits for exit. Otherwise,
;; creates or updates (if necessary) a service directory and instructs
;; runit to start monitoring the process.
(ns borg.state.types.process
  (:require [borg.state.graph :as g]
            [borg.state.types.fs :as fs]
            [borg.state.types.provided :as provided]
            [borg.state.util :as u]
            [clojure.java.shell :as sh]
            [clojure.string :as str]
            [me.raynes.fs :as raynes-fs])
  (:use [borg.state.types.core :only [sh-result defop defnodefn node-path]]))

;;; should actually read this from a config?
(def borg-service-root "/etc/borg/service")

(def service-root "/etc/service")

(defn service-path [& elems]
  (str/join "/" (concat [borg-service-root] (map name elems))))

(defn root-path [& elems]
  (str/join "/" (concat [service-root] (map name elems))))

(defn service-provides-name [provides & suffix]
  (keyword (apply str (name provides)
                  (when suffix ["-" (str/join "-" suffix)]))))

(defn make-run-script [pathname args provides requires user group env exec-dir svc-dir]
  (let [env-preface (reduce (fn [acc [key val]] (conj acc (str (name key) "=" val))) [] env)
        chdir-preface (when exec-dir ["cd" exec-dir "&&"])
        start-dependencies (map #(str "sv up " (name %) " || exit 1") requires)]
;;; We create two scripts in the service directory.

;;; The "run" script is the one that runit runs; it tries to start
;;; services that this service depends on, and then executes "su -l -c ./actual-run [user]"
;;; in actual_run, we (optionally) change directories, then run the
;;; process of interest with additional environment variables set.

;;; The two-script approach means that we don't need to worry (as
;;; much) about shell escapes for the arguments (since su's -c
;;; argument would have needed to be wrapped in quotation marks).

;;; The group is ignored: we can't necessarily run chpst inside the
;;; actual_run script, because we won't have the required privileges
;;; if we've su-ed into an unprivileged user. And we can't run su from
;;; within chpst, because it will complain about not being run from a
;;; terminal.
    (g/merge-graphs
     (provided/file (service-path provides "actual_run")
                    {:user user
                     :group group
                     :template {:literal
                                (str/join "\n"
                                          (concat ["#!/bin/sh"
                                                   "### AUTOMATICALLY GENERATED"]
                                                  [(str/join " "
                                                             (concat chdir-preface env-preface ["exec"]
                                                                     [pathname] args))
                                                   ""]))}
                     :permissions "744"
                     :context-fn (constantly nil)
                     :provides :actual-run-script
                     :requires [svc-dir]})
     (provided/file (service-path provides "run")
                    {:user user
                     :group group
                     :template {:literal
                                (str/join "\n"
                                          (concat ["#!/bin/sh"
                                                   "### AUTOMATICALLY GENERATED"]
                                                  start-dependencies
                                                  [(str/join " "
                                                             (concat ["exec"]
                                                                     ["su -l -c"
                                                                      (service-path provides "actual_run")
                                                                      user]))
                                                   ""]))}
                     :permissions "744"
                     :context-fn (constantly nil)
                     :provides (service-provides-name provides "run-script")
                     :requires [svc-dir :actual-run-script]}))))

(defn is-process? [name nodemap]
  (let [node (get nodemap name)]
    (and (= "process" (g/node-type node))
         (not (g/node-attr node :blocks?)))))

(defn make-service-dir [{{:keys [user group env args pathname target exec-dir]} :attrs
                         provides :provides
                         requires :requires
                         :as node}
                        nodemap]
  (let [svc-dir (provided/directory target
                                    {:provides (service-provides-name provides "service-dir")
                                     :user user :group group :permissions "744"})
        process-depends (filter #(is-process? % nodemap) (u/transitive-dependencies node nodemap))
        run-script (make-run-script pathname args provides process-depends user group env
                                    exec-dir svc-dir)]
    run-script))

(defop service-down [service]
  ;; should check status first?
  (sh-result (sh/sh "sv" "down" service)))

(defop sighup [service]
  (sh-result (sh/sh "sv" "sighup" service)))

(defop ensure-running [service]
  (Thread/sleep 5000)
  ;; this is a no-op if it was already running, but if we brought it
  ;; down, we have to bring it back up explicitly.
  ;; if sv start gives us an error, explicitly try to bring the
  ;; service down so runit doesn't repeatedly try and fail to bring it up.
  (let [status (sh-result (sh/sh "sv" "start" service))]
    (when (not (u/ok? status))
      ((:fn (service-down service))))
    status))

(defn inspect-child-actions [action-map on-child-ops]
  (when on-child-ops
    (let [responses (->> on-child-ops
                         (keep (fn [[child-name child-response-spec]]
                                 (when-let [child-actions (get action-map child-name nil)]
                                   (let [[op action] (first child-response-spec)
                                         action (keyword action)]
                                     (when (or (and (= op :restart-first)
                                                    (:restart-first child-actions))
                                               (some #(= op (:op %)) (:actions child-actions)))
                                       action)))))
                         set)]
;;; responses can be any of :sighup, :restart, and :shutdown-first
;;; :restart should take priority over :sighup
;;; :shutdown-first should take priority over both of the others
      (cond
       (empty? responses) nil
       (contains? responses :shutdown-first) :shutdown-first
       (contains? responses :restart) :restart
       :otherwise :sighup))))

(defn check-service-process [node nodemap previous-actions]
  (let [service-dir-actions (->> (make-service-dir node nodemap)
                                 g/simulate-send
                                 g/check-nodes
                                 (mapcat vals)
                                 (mapcat :actions))
        on-child-ops (g/node-attr node :on-child-ops)
        child-response (inspect-child-actions (reduce merge previous-actions) on-child-ops)]
    (let [link (g/node-attr node :link)
          target (g/node-attr node :target)]
      (cond
       ;; for both of these first two options, we can ignore
       ;; child-response, since either
       ;;  1. we take the first branch, in which case we're changing
       ;;  the service directory and will be restarting if already
       ;;  running anyway, or
       ;;  2. we take the second branch, in which case we were never
       ;;  linked into the service directory in the first place so we
       ;;  must not be running yet.       
       (seq service-dir-actions) ((node-path (raynes-fs/directory? link) => service-down
                                             (raynes-fs/exists? link) => (fs/delete link)
                                             ~@service-dir-actions
                                             (fs/link target link)
                                             ensure-running)
                                  node)
       (not (raynes-fs/exists? link)) ((node-path (fs/link target link)
                                                  ensure-running)
                                       node)
       :else (case child-response
               nil [(node-ensure-running node)]
               ;; down then up
               :restart ((node-path service-down ensure-running) node)
               :sighup ((node-path sighup ensure-running) node)
               :shutdown-first {:actions [(node-ensure-running node)]
                                :shutdown-first (node-service-down node)})))))

(defop run-command [pathname args user group env exec-dir]
  (sh-result
   (sh/with-sh-env env
     (if exec-dir
       (sh/with-sh-dir exec-dir
         (apply sh/sh "chpst" "-u" (if group (str user ":" group) user)
                pathname args))
       (apply sh/sh "chpst" "-u" (if group (str user ":" group) user)
              pathname args)))))

(defmethod g/check-node "process"
  [node nodemap previous]
  (if-not (g/node-attr node :block?)
    (check-service-process node nodemap previous)
    ;; run these every time. 
    [(node-run-command node)]))

(defmethod g/from-wire "process"
  [node]
  (let [provides (:provides node)]
    (update-in node [:attrs] assoc
               :target (service-path provides)
               :link (root-path provides)
               :service (name provides))))

(defmethod g/to-wire "process"
  [node]
  (update-in node [:attrs :args] (partial map #(if (string? %) % (%)))))
