(ns signal.data_platform_telemetry.metrics
  (:require [iapetos.core :as prometheus]
            [iapetos.collector.jvm :as jvm]
            [iapetos.collector.ring :as ring]
            [java-time.convert :refer [to-millis-from-epoch]]))

(defn- ms->seconds [time]
  (/ time 1000.0))

(defn take-while-and-n-more
  [pred n coll]
  (lazy-seq
   (when-let [s (seq coll)]
     (if (pred (first s))
       (cons (first s) (take-while-and-n-more pred n (rest s)))
       (take n s)))))

(defn- exponential-buckets
  "Generates exponential buckets based on <exponent>, starting at <start>, and ending with the bucket larger than the <max-value>."
  [start exponent max-value]
  (into [] (take-while-and-n-more #(<= % max-value) 1
                                  (map-indexed (fn [i _] (* start (Math/pow exponent i))) (range)))))

;; the histogram keywords contain fake namespaces to make sure they are not prefixed by "default-"
;; note that counters must end with _total to be valid in future Prometheus versions (i.e. under OpenMetrics)
(defn init
  "Initialises a Prometheus metrics registry and returns it."
  ([{:keys [registry
            ring?
            jvm?] :or {ring? true
                       jvm? true
                       registry prometheus/default-registry}}]
   (-> registry
       (cond->
        jvm? jvm/initialize
        ring? ring/initialize)))
  ([]
   (init nil)))

(def ^:private default-processing-stage-buckets-seconds [0.01 0.025 0.05 0.075 0.1 0.25 0.5 1.0 2.0])
(def ^:private default-results-stage-buckets-seconds [0.1 0.25 0.5 0.75 1.0 2.0 4.0 8.0 16.0])


(defn add-exponential-buckets-past-maximum
  ([base-buckets max-time-seconds]
   (if (and max-time-seconds
            (> max-time-seconds (last base-buckets)))
     (concat base-buckets (exponential-buckets (* 2 (last base-buckets)) 2 max-time-seconds))
     base-buckets))
  ([base-buckets]
   base-buckets))


(comment
  (exponential-buckets 25 2 50000)

  (add-exponential-buckets-past-maximum default-processing-stage-buckets-seconds 8))

(defn- queue-reader-metrics
  "Registers queue reader metrics on the given registry."
  [registry {:keys [max-batch-size
                    max-expected-stage-time-seconds
                    max-expected-result-time-seconds]}]
  (let [processing-stage-buckets (add-exponential-buckets-past-maximum default-processing-stage-buckets-seconds max-expected-stage-time-seconds)
        results-stage-buckets (add-exponential-buckets-past-maximum default-results-stage-buckets-seconds max-expected-result-time-seconds)]
    (-> registry
        (prometheus/register
         (prometheus/histogram :read/time
                               {:buckets processing-stage-buckets
                                :description "The time taken to read the data for the message from S3."})
         (prometheus/histogram :message/process-time
                               {:buckets results-stage-buckets
                                :description "The time taken to run the core process on the message in seconds."})
         (prometheus/histogram :message/total-time
                               {:buckets results-stage-buckets
                                :description "The total time for message processing in seconds."})
         (prometheus/histogram :message/batch-size
                               {:buckets (cond
                                           (= 1 0) [0 2]
                                           (= max-batch-size 50) [0 2 4 6 8 10 12 14 16 18 20 22 24 26 28 30 32 34 36 38 40 42 44 46 48 50]
                                           :else (exponential-buckets 25 2 max-batch-size))
                                :description "The number of documents in the message."})
         (prometheus/gauge :queue-worker/messages-in-flight {:description "The number of messages in flight being processed."})
         (prometheus/counter :queue-worker/errors-total {:description "Counter of errors occurred when processing messages."})))))

(defn pipeline-queue-worker
  "Registers queue worker metrics on the given registry."
  [registry {:keys [max-batch-size
                    max-expected-stage-time-seconds]}]
  (let [processing-stage-buckets (add-exponential-buckets-past-maximum default-processing-stage-buckets-seconds max-expected-stage-time-seconds)]
    (-> registry
        (queue-reader-metrics {:max-batch-size max-batch-size})
        (prometheus/register
         (prometheus/histogram :write/time
                               {:buckets processing-stage-buckets
                                :description "The time taken to write the output to S3."})
         (prometheus/histogram :sqs/write-time
                               {:buckets processing-stage-buckets
                                :description "The time taken to publish the output message in seconds. Named sqs for legacy compatibility."})
         (prometheus/histogram :ack/time
                               {:buckets processing-stage-buckets
                                :description "The time taken to acknowledge a message in seconds."})))))

(defn- pipeline-document-processed-age-seconds-buckets [] [])

(def ^:private high-watermark-time-seconds
  (atom {}))

(defn pipeline-sink
  "Registers pipeline sink metrics on the given registry."
  [registry {:keys [max-batch-size]}]
  (let [registry (-> registry
                     (queue-reader-metrics {:max-batch-size max-batch-size})
                     (prometheus/register
                      (prometheus/counter :pipeline-sink/processed-count-total {:description "The number of processed documents."
                                                                                :labels [:result]})

                      (prometheus/histogram :pipeline/document-processed-age-seconds {:description "A histogram of the age of the document from when Signal received the data. (time it was received by Signal - time it was output by the Sink)."
                                                                                      :buckets (pipeline-document-processed-age-seconds-buckets)
                                                                                      :labels [:mode]})

                      (prometheus/gauge :pipeline/document-processed-high-water-mark-age-seconds {:description "The received epoch time in seconds of the last outputted document."
                                                                                                  :labels [:mode]})))]

    (reset! high-watermark-time-seconds {})
    (add-watch high-watermark-time-seconds :record-pipeline-document-processed-high-water-mark-age-seconds
               (fn [_ _ _ val]
                 (doseq [[mode value] val]
                   (prometheus/set registry :pipeline/document-processed-high-water-mark-age-seconds {:mode mode} value))))

    registry))

(defn ring-wrap-instrumentation
  "Wraps a ring application in Prometheus instrumentation."
  [registry app]
  (ring/wrap-instrumentation app
                             registry
                             {:path-fn (fn [_] "/")}))

(defn ring-wrap-metrics-expose
  "Creates a ring handler which exposes Prometheus metrics for the given registry on /internal/metrics."
  [registry app]
  (ring/wrap-metrics-expose app
                            registry
                            {:path "/internal/metrics"}))


(defn observe-time-seconds
  "Givena metric name and a start/end timestamp in milliseconds, observes the time in seconds between the two times."
  [metric registry start-timestamp-ms end-timestamp-ms]
  (prometheus/observe registry metric (ms->seconds (- end-timestamp-ms start-timestamp-ms))))


(defmacro read-time [registry & body]
  `(prometheus/with-duration (~registry :read/time)
     ~@body))

(def observe-read-time (partial observe-time-seconds :read/time))


(defmacro write-time [registry & body]
  `(prometheus/with-duration (~registry :write/time)
     ~@body))

(def observe-write-time (partial observe-time-seconds :write/time))

(defmacro publish-time [registry & body]
  `(prometheus/with-duration (~registry :sqs/write-time)
     ~@body))

(def observe-publish-time (partial observe-time-seconds :publish/time))

(defmacro ack-time [registry & body]
  `(prometheus/with-duration (~registry :ack/time)
     ~@body))

(def observe-ack-time (partial observe-time-seconds :ack/time))


(defmacro message-process-time [registry & body]
  `(prometheus/with-duration (~registry :message/process-time)
     ~@body))

(def observe-message-process-time (partial observe-time-seconds :message/process-time))


(defmacro message-total-time [registry & body]
  `(prometheus/with-duration (~registry :message/total-time)
     ~@body))

(def observe-message-total-time (partial observe-time-seconds :message/total-time))


(defn increment-messages-in-flight [registry]
  (prometheus/inc (~registry :queue-worker/messages-in-flight) 1))

(defn decrement-messages-in-flight [registry]
  (prometheus/dec (~registry :queue-worker/messages-in-flight) 1))


(defn observe-message-batch-size [registry size]
  (prometheus/observe (~registry :message/batch-size) size))

(defn observe-queue-worker-error [registry _]
  (prometheus/inc (~registry :queue-worker/errors-total)))


(defn- calc-age-seconds [time-seconds]
  (- (ms->seconds (System/currentTimeMillis)) time-seconds))

(defn record-metrics-for-complete-document [registry received-date-time mode]
  (let [start-time-epoch-seconds (quot (to-millis-from-epoch received-date-time)
                                       1000)]
    (prometheus/observe (registry :pipeline/document-processed-age-seconds) {:mode mode} (calc-age-seconds start-time-epoch-seconds))
    ;; do not set a high watermark in the future
    (when (>= (/ (System/currentTimeMillis) 1000) start-time-epoch-seconds)
      (swap! high-watermark-time-seconds update mode #(max start-time-epoch-seconds %)))))
