(ns com.timezynk.domus.mongo.queue.metrics
  (:require [clojure.tools.logging :as log]
            [somnium.congomongo :as mongo]
            [com.timezynk.useful.prometheus.core :as prom]))

(def ^:private ^:const POLLING_LOOP_INTERVAL
  "Number of milliseconds to sleep for between polling cycles."
  1000)

(defn length
  "Number of jobs in the queue."
  [queue]
  (-> queue .collection mongo/fetch-count))

(defn- fetch
  "Returns the metric under key `k`, `nil` if none."
  [queue k]
  (-> queue (.metrics) (deref) (get k)))

(defn report-job-stats
  "Reports to Prometheus on the processing of `job`.
   Expects time in milliseconds."
  [queue job run-time wait-time]
  (let [job-type (:type job)]
    (log/debug "Job" job-type "in" (.collection queue) "is ready to run."
               "Latency" wait-time "ms")
    (-> queue (fetch :run-counter) (prom/inc-by! (/ run-time 1000.0) job-type))
    (-> queue (fetch :run-gauge) (prom/gauge-with-labels job-type) (.set run-time))
    (-> queue (fetch :wait-gauge) (prom/gauge-with-labels job-type) (.set wait-time))
    (-> queue (fetch :job-counter) (prom/inc! job-type))))

(defn mark-drained
  "Reports to Prometheus that the queue has been drained."
  [queue]
  (-> queue (fetch :run-gauge) (.clear))
  (-> queue (fetch :wait-gauge) (.clear)))

(defmacro report
  "Wraps `handle-form` such as to log the event and record statistics."
  [queue job handle-form]
  `(do
     (if ~job
       (let [latency# (- (System/currentTimeMillis) (:run-at ~job))
             start-time# (System/nanoTime)
             _# ~handle-form
             diff-ns# (-> (System/nanoTime) (- start-time#) (double))
             diff-ms# (/ diff-ns# 1000000.0)]
         (report-job-stats ~queue ~job diff-ms# latency#))
       (mark-drained ~queue))
     ~job))

(defn- reporting-loop
  "Publishes metrics of `queue` until the termination condition is met."
  [queue]
  (while @(.go-on? queue)
    (try
      (-> queue
          (fetch :size-gauge)
          (prom/gauge-with-labels)
          (.set (length queue)))
      (Thread/sleep (long POLLING_LOOP_INTERVAL))
      (catch Exception e
        (log/error e "Exception in reporting loop")))))

(defn thread
  "Creates, starts and returns a thread which runs the polling/reporting loop."
  [queue]
  (doto (Thread. (bound-fn [] (reporting-loop queue))
                 "mq-reporter")
    (.setDaemon false)
    (.setPriority Thread/MIN_PRIORITY)
    (.start)))

(defn acquire [queue]
  (let [id (.metrics-id queue)
        str-id (name id)]
    (doseq [[k v] [[:run-counter
                    (prom/counter (str str-id "_user_time_seconds")
                                  (str "Total user time of " id)
                                  :type)]
                   [:job-counter
                    (prom/counter (str str-id "_messages_total")
                                  (str "Messages processed in " id)
                                  :type)]
                   [:run-gauge
                    (prom/gauge (str str-id "_milliseconds_executing")
                                (str "Time to run a single " str-id " job (ms)")
                                :type)]
                   [:size-gauge
                    (prom/gauge (str str-id "_size")
                                (str "Size of the " str-id " queue"))]
                   [:wait-gauge
                    (prom/gauge (str str-id "_milliseconds_waiting")
                                (str "Latency of a single " str-id " job (ms)")
                                :type)]]]
      (-> queue
          (.metrics)
          (swap! assoc k v)))))

(defn release [queue]
  (let [metrics-atom (.metrics queue)]
    (->> @metrics-atom
         (vals)
         (run! prom/unregister))
    (reset! metrics-atom nil)))
