(ns com.timezynk.useful.mongo.queue
  (:require
   [clojure.tools.logging :as log]
   [com.timezynk.useful.env :as env]
   [com.timezynk.useful.prometheus.core :as prom]
   [com.timezynk.useful.string :as s]
   [com.timezynk.useful.time :as t]
   [com.timezynk.useful.timed-queue :refer [TimedQueue]]
   [somnium.congomongo :as mongo]))

(def ^:const DEFAULT_NUM_WORKERS 1)

(def ^:const DEFAULT_MIN_INTERVAL 250)

(def ^:const DEFAULT_MIN_SLEEP 100)

(def ^:const DEFAULT_MAX_ATTEMPTS 5)

(def ^:const DEFAULT_RETRY_INTERVAL_MS
  "Milliseconds to wait for before retrying a failed job."
  500)

(def ^:const MAX_RETRY_INTERVAL_MS (* 1000 10 10))

(def ^:private ^:const JOIN_TIMEOUT
  "Maximum number of milliseconds to wait for worker threads to finish."
  20000)

(def ^:const DEFAULT_THREAD_PRIORITY
  "Priority to assign to worker threads, if none specified at creation."
  (if env/test? Thread/MAX_PRIORITY Thread/MIN_PRIORITY))

(defprotocol WorkQueue
  (start-workers! [this thread-name] "Starts all worker threads.")
  (stop-workers! [this] "Stops all worker threads.")
  (length [this] "Number of currently enqueued jobs."))

(defn- reserve-job!
  "Makes the head of the queue inaccessible to other workers and returns it."
  [queue]
  (mongo/fetch-and-modify (.collection queue)
                          {:run-at {:$lte (System/currentTimeMillis)}
                           :locked nil}
                          {:$set {:locked true}}
                          :sort {:run-at 1}))

(defn- report-job-stats
  "Reports to Prometheus on the processing of `job`.
   Expects time in milliseconds."
  [queue job run-time wait-time]
  (let [job-type (:type job)]
    (log/debug "Job" (:type job) "in" (.collection queue) "is ready to run."
               "Latency" wait-time "ms")
    (-> queue (.run-counter) (deref) (prom/inc-by! (/ run-time 1000.0) job-type))
    (-> queue (.run-gauge) (deref) (prom/gauge-with-labels job-type) (.set run-time))
    (-> queue (.wait-gauge) (deref) (prom/gauge-with-labels job-type) (.set wait-time))
    (-> queue (.job-counter) (deref) (prom/inc! job-type))))

(defn- mark-drained
  "Reports to Prometheus that the queue has been drained."
  [queue]
  (-> queue .run-gauge deref .clear)
  (-> queue .wait-gauge deref .clear))

(defn failed-queue-collection-name
  "Name of the collection which holds the failed jobs."
  [queue]
  (-> queue (.collection) (name) (str ".failed") (keyword)))

(defn- on-error
  "Handles failure to run the handler of `queue` on `job`."
  [queue job error]
  (let [num-failures (-> job :errors count inc)
        interval (t/exponential-backoff-interval num-failures
                                                 (.retry-interval queue)
                                                 MAX_RETRY_INTERVAL_MS)]
    (mongo/insert!
      (if (< num-failures (.max-attempts queue))
        (.collection queue)
        (failed-queue-collection-name queue))
      (-> job
          (dissoc :_id)
          (update :errors conj {:stacktrace (s/from-throwable error)
                                :thrown-at (System/currentTimeMillis)})
          (assoc :run-at (+ (System/currentTimeMillis) interval))))))

(defmacro ^:private report
  "Wraps `handle-form` such as to log the event and record statistics."
  [queue job handle-form]
  `(do
     (if ~job
       (let [latency# (- (System/currentTimeMillis) (:run-at ~job))
             start-time# (System/nanoTime)
             _# ~handle-form
             diff-ns# (-> (System/nanoTime) (- start-time#) (double))
             diff-ms# (/ diff-ns# 1000000.0)]
         (report-job-stats ~queue ~job diff-ms# latency#))
       (mark-drained ~queue))
     ~job))

(defn- run-job! [queue job]
  (when job
    (try
      ((.handler queue) job)
      (catch Exception e
        (log/error e "Error while handling" (:type job) "job")
        (on-error queue job e)))))

(defn- process-job! [queue job]
  (->> job (run-job! queue) (report queue job)))

(defn- delete-job! [queue job]
  (when job
    (mongo/destroy! (.collection queue)
                    {:_id (:_id job)})))

(defn- add-payload-prefix [acc [k v]]
  (assoc acc (str "payload." (name k)) v))

(defmacro ^:private throttle
  "Delegates to `com.timezynk.useful.time/sleep-pad`, unless in testing mode.
   Executes `body` without delay, otherwise."
  [min-duration min-sleep & body]
  (if-not env/test?
    `(t/sleep-pad ~min-duration ~min-sleep ~@body)
    `(do ~@body)))

(defn- processing-loop
  "Processes `queue` until the termination condition is met."
  [queue]
  (let [collection (.collection queue)
        go-on? (.go-on? queue)
        min-interval (.min-interval queue)
        min-sleep (.min-sleep queue)]
    (log/info "Starting worker" (-> (Thread/currentThread) (.getName))
              "for queue" collection)
    (while @go-on?
      (try
        (->> (reserve-job! queue)
             (process-job! queue)
             (delete-job! queue)
             (throttle min-interval min-sleep))
        (catch Exception e
          (log/error e "Exception in queue" collection))))
    (log/info "Queue" collection "finished")))

(defn- acquire-metrics [queue]
  (let [id (.metrics-id queue)
        str-id (name id)]
    (-> queue
        (.run-counter)
        (reset! (prom/counter (keyword (str str-id "_user_time_seconds"))
                              (str "Total user time of " id)
                              :type)))
    (-> queue
        (.job-counter)
        (reset! (prom/counter (keyword (str str-id "_messages_total"))
                              (str "Messages processed in " id)
                              :type)))
    (-> queue
        (.run-gauge)
        (reset! (prom/gauge (keyword (str str-id "_milliseconds_executing"))
                            (str "Time to run a single " str-id " job (ms)")
                            :type)))
    (-> queue
        (.wait-gauge)
        (reset! (prom/gauge (keyword (str str-id "_milliseconds_waiting"))
                            (str "Latency of a single " str-id " job (ms)")
                            :type)))))

(defn- release-metrics [queue]
  (doseq [metric-atom [(.run-counter queue)
                       (.job-counter queue)
                       (.run-gauge queue)
                       (.wait-gauge queue)]]
    (when-let [metric @metric-atom]
      (prom/unregister metric)
      (reset! metric-atom nil))))

(defn- acquire-threads [queue thread-name]
  (let [num-workers (.num-workers queue)
        thread-priority (.thread-priority queue)]
    (dotimes [i num-workers]
      (swap! (.worker-threads queue)
             conj
             (doto (Thread. (bound-fn [] (processing-loop queue))
                            (cond-> thread-name
                              (not= 1 num-workers) (str "-" i)))
               (.setDaemon false)
               (.setPriority thread-priority)
               (.start))))))

(defn- release-threads [queue]
  (let [worker-threads (.worker-threads queue)]
    (when (seq @worker-threads)
      (run! #(.join % JOIN_TIMEOUT) @worker-threads)
      (reset! worker-threads []))))

(deftype MongoQueue
         [collection handler num-workers thread-priority min-interval
          min-sleep go-on? worker-threads write-concern
          metrics-id run-counter job-counter run-gauge wait-gauge
          max-attempts retry-interval]

  WorkQueue
  (start-workers! [this thread-name]
    (acquire-metrics this)
    (acquire-threads this thread-name)
    (reset! go-on? true))

  (stop-workers! [this]
    (reset! go-on? false)
    (release-threads this)
    (release-metrics this))

  (length [_this]
    (mongo/fetch-count collection))

  TimedQueue
  (push-job! [_this type run-at payload]
    (mongo/insert! collection
                   {:run-at run-at
                    :type type
                    :payload payload
                    :errors []}
                   :write-concern write-concern))

  (upsert-job! [_this type run-at selector update]
    (mongo/update! collection
                   (merge
                    {:type type}
                    (reduce add-payload-prefix {} selector))
                   (merge
                    {:$set
                     {:run-at run-at
                      :type type
                      :errors []}}
                    update)
                   :upsert true
                   :write-concern write-concern)))

(defn create [& {:as params}]
  (let [{:keys [id collection handler num-workers thread-priority
                min-interval min-sleep max-attempts retry-interval]} params]
    (MongoQueue. collection
                 handler
                 (or num-workers DEFAULT_NUM_WORKERS)
                 (or thread-priority DEFAULT_THREAD_PRIORITY)
                 (or min-interval DEFAULT_MIN_INTERVAL)
                 (or min-sleep DEFAULT_MIN_SLEEP)
                 (atom true)
                 (atom [])
                 (if env/test? :acknowledged :unacknowledged)
                 id
                 (atom nil)
                 (atom nil)
                 (atom nil)
                 (atom nil)
                 (or max-attempts DEFAULT_MAX_ATTEMPTS)
                 (or retry-interval DEFAULT_RETRY_INTERVAL_MS))))

(defn drain
  "Puts the calling thread to sleep until the length of `queue` becomes zero."
  [queue]
  (t/wait-till (-> queue length zero?)))
