(ns taoensso.tukey.sstats
  "SummaryStats stuff.
  Private ns, implementation detail."
  {:author "Peter Taoussanis (@ptaoussanis)"}
  (:require
   [taoensso.encore :as enc :refer [have have? have!]]
   [taoensso.tukey.impl :as impl])

  #?(:clj (:import [java.util LinkedList])))

;;;; TODO
;; - Add SummaryStatsRolling (rolling by max len and/or date)?

;;;; SummaryStats

(deftype SummaryStats
    ;; Field names chosen to avoid shadowing,
    ;; includes data to support merging
    [^boolean xlongs?
     ^long    nx
     ^double  xmin
     ^double  xmax
     ^double  xsum
     ^double  xmean
     ^double  xvar-sum
     ^double  xmad-sum
              xvar ; May be nil
     ^double  xmad
     ^double  p25
     ^double  p50
     ^double  p75
     ^double  p90
     ^double  p95
     ^double  p99
     as-map_]

  Object (toString [_] (str "SummaryStats[n=" nx "]"))
  #?@(:clj  [clojure.lang.IDeref ( deref [this] @as-map_)]
      :cljs [             IDeref (-deref [this] @as-map_)]))

(defn summary-stats?
  "Returns true iff given a SummaryStats argument."
  [x] (instance? SummaryStats x))

(defn ^:public summary-stats
  "Given a coll of numbers, returns a new mergeable ?SummaryStats with:
  (deref ss) => {:keys [n min max p25 ... p99 mean var mad]}

  See also `summary-stats-merge`."
  {:added "v0.5.0 (2022-12-15)"}
  [nums]
  (when nums
    (let [snums (impl/sorted-nums nums)
          nx    (count           snums)]

      (when-not (zero? nx)
        (let [xsum (double (reduce impl/rf-sum 0.0 snums))
              xbar (/ xsum (double nx))

              [^double xvar-sum ^double xmad-sum]
              (impl/multi-reduce
                (partial impl/rf-sum-variance      xbar) 0.0
                (partial impl/rf-sum-abs-deviation xbar) 0.0
                snums)

              xvar (/ xvar-sum nx) ; nx w/o bessel-correction
              xmad (/ xmad-sum nx)

              [xmin p25 p50 p75 p90 p95 p99 xmax]
              (impl/percentiles snums)

              xlongs? (impl/sorted-longs? snums)]

          (SummaryStats. xlongs?
            nx xmin xmax xsum xbar xvar-sum xmad-sum xvar xmad
            p25 p50 p75 p90 p95 p99
            (delay
              (let [fin (if xlongs? #(Math/round (double %)) identity)]
                {:n       nx
                 :min     (fin xmin)
                 :max     (fin xmax)
                 :sum     (fin xsum)
                 :mean    xbar
                 :var-sum xvar-sum
                 :mad-sum xmad-sum
                 :var     xvar
                 :mad     xmad
                 :p25     p25
                 :p50     p50
                 :p75     p75
                 :p90     p90
                 :p95     p95
                 :p99     p99}))))))))

(comment @(summary-stats [1 2 3]))

(defn ^:public summary-stats-merge
  "Given one or more SummaryStats, returns a new ?SummaryStats with:
    (summary-stats-merge
       (summary-stats nums1)
       (summary-stats nums2))

    an approximatation of (summary-stats (merge nums1 nums2))

  Useful when you want summary stats for a large coll of numbers for which
  it would be infeasible/expensive to keep all numbers for accurate merging."
  {:added "v0.5.0 (2022-12-15)"}
  ([ss1    ] ss1)
  ([ss1 ss2]
   (if ss1
     (if ss2
       (let [^SummaryStats ss1 ss1
             ^SummaryStats ss2 ss2

             nx1 (.-nx ss1)
             nx2 (.-nx ss2)

             _ (assert (pos? nx1))
             _ (assert (pos? nx2))

             xlongs1?  (.-xlongs?  ss1)
             xmin1     (.-xmin     ss1)
             xmax1     (.-xmax     ss1)
             xsum1     (.-xsum     ss1)
             xvar-sum1 (.-xvar-sum ss1)
             xmad-sum1 (.-xmad-sum ss1)
             p25-1     (.-p25      ss1)
             p50-1     (.-p50      ss1)
             p75-1     (.-p75      ss1)
             p90-1     (.-p90      ss1)
             p95-1     (.-p95      ss1)
             p99-1     (.-p99      ss1)

             xlongs2?  (.-xlongs?  ss2)
             xmin2     (.-xmin     ss2)
             xmax2     (.-xmax     ss2)
             xsum2     (.-xsum     ss2)
             xvar-sum2 (.-xvar-sum ss2)
             xmad-sum2 (.-xmad-sum ss2)
             p25-2     (.-p25      ss2)
             p50-2     (.-p50      ss2)
             p75-2     (.-p75      ss2)
             p90-2     (.-p90      ss2)
             p95-2     (.-p95      ss2)
             p99-2     (.-p99      ss2)

             xlongs3?  (and xlongs1? xlongs2?)
             nx3       (+ nx1 nx2)
             nx1-ratio (/ (double nx1) (double nx3))
             nx2-ratio (/ (double nx2) (double nx3))

             xsum3 (+ xsum1 xsum2)
             xbar3 (/ (double xsum3) (double nx3))
             xmin3 (if (< xmin1 xmin2) xmin1 xmin2)
             xmax3 (if (> xmax1 xmax2) xmax1 xmax2)

             ;; Batched "online" calculation here is better= the standard
             ;; Knuth/Welford method, Ref. http://goo.gl/QLSfOc,
             ;;                            http://goo.gl/mx5eSK.
             ;; No apparent advantage in using `xbar3` asap (?).
             xvar-sum3 (+ xvar-sum1 xvar-sum2)
             xmad-sum3 (+ xmad-sum1 xmad-sum2)

             ;; These are pretty rough approximations. More sophisticated
             ;; approaches not worth the extra cost/effort in our case.
             p25-3 (+ (* nx1-ratio p25-1) (* nx2-ratio p25-2))
             p50-3 (+ (* nx1-ratio p50-1) (* nx2-ratio p50-2))
             p75-3 (+ (* nx1-ratio p75-1) (* nx2-ratio p75-2))
             p90-3 (+ (* nx1-ratio p90-1) (* nx2-ratio p90-2))
             p95-3 (+ (* nx1-ratio p95-1) (* nx2-ratio p95-2))
             p99-3 (+ (* nx1-ratio p99-1) (* nx2-ratio p99-2))

             xvar3 (when (> nx3 2) (/ xvar-sum3 (impl/bessel-correction nx3 -2.0)))
             xmad3                 (/ xmad-sum3                         nx3)]

         (SummaryStats. xlongs3?
           nx3 xmin3 xmax3 xsum3 xbar3 xvar-sum3 xmad-sum3 xvar3 xmad3
           p25-3 p50-3 p75-3 p90-3 p95-3 p99-3
           (delay
             (let [fin (if xlongs3? #(Math/round (double %)) identity)]
               {:n       nx3
                :min     (fin xmin3)
                :max     (fin xmax3)
                :sum     (fin xsum3)
                :mean    xbar3
                :var-sum xvar-sum3
                :mad-sum xmad-sum3
                :var     xvar3
                :mad     xmad3
                :p25     p25-3
                :p50     p50-3
                :p75     p75-3
                :p90     p90-3
                :p95     p95-3
                :p99     p99-3}))))
       ss1)
     ss2)))

;;;; BufferedSummaryStats

(defn- buf-new
  ([    ] #?(:clj (LinkedList.) :cljs (cljs.core/array)))
  ([init]
   #?(:clj  (if init (LinkedList.     init) (LinkedList.))
      :cljs (if init (cljs.core/array init) (cljs.core/array)))))

(defn- buf-add [buf x]
  #?(:clj  (.add ^LinkedList buf x)
     :cljs (.push            buf x)))

(defn- buf-len ^long [buf]
  #?(:clj  (.size ^LinkedList buf)
     :cljs (alength           buf)))

(defprotocol ISummaryStatsBuffered
  ;; TODO Later generalize protocol for other SummaryStats types?
  (ssb-deref [_] [_ flush-buffer?] "Returns current ?sstats.")
  (ssb-clear [_]   "Clears all internal state and returns nil.")
  (ssb-flush [_]   "Flushes internal buffer and returns newly merged sstats or nil.")
  (ssb-push  [_ n] "Adds given num to internal buffer."))

(deftype SummaryStatsBuffered [sstats_ buf_ buf-size merge-counter merge-cb]
  Object
  (toString [_] ; "SummaryStatsBuffered[n=1, pending=8, merged=0]"
    (str
      "SummaryStatsBuffered[n=" (get @sstats_ :n 0)
      ", pending=" (buf-len @buf_)
      (when-let [mc merge-counter] (str ", merged=" @mc))
      "]"))

  #?@(:clj  [clojure.lang.IDeref ( deref [this] (ssb-deref this))]
      :cljs [             IDeref (-deref [this] (ssb-deref this))])

  #?@(:clj  [clojure.lang.IFn ( invoke [this n] (ssb-push this n))]
      :cljs [             IFn (-invoke [this n] (ssb-push this n))])

  ISummaryStatsBuffered
  (ssb-deref [this              ] (ssb-deref this true))
  (ssb-deref [this flush-buffer?] (or (and flush-buffer? (ssb-flush this)) @sstats_))
  (ssb-clear [_]
    (reset! buf_ (buf-new))
    (reset! sstats_ nil)
    (when-let [mc merge-counter] (mc :set 0))
    nil)

  (ssb-flush [this]
    (let [[drained] (reset-vals! buf_ (buf-new nil))]
      (if (== (buf-len drained) 0)
        nil
        (let [t0             (when merge-cb (enc/now-nano*))
              _              (when-let [mc merge-counter] (mc))
              sstats-drained (summary-stats drained)

              sstats-merged ; Only drainer will update, so should be no contention
              (swap! sstats_ summary-stats-merge sstats-drained)]

          (when merge-cb ; Handy for profilers, etc.
            (merge-cb this (- (enc/now-nano*) ^long t0)))

          sstats-merged))))

  (ssb-push [this n]
    (let [buf @buf_]
      (buf-add buf n)

      (when-let [^long nmax buf-size]
        (when (> (buf-len buf) nmax)
          (ssb-flush this)))

      nil)))

(defn ^:public summary-stats-buffered
  "Returns a new stateful SummaryStatsBuffered with:
    (ssb <num>) => Adds given number to internal buffer.
    (deref ssb) => Flushes buffer if necessary, and returns a mergeable
                   ?SummaryStats. Deref again to get a map of summary
                   stats for all numbers ever added to ssb:
                     {:keys [n min max p25 ... p99 mean var mad]}.

  Useful for summarizing a (possibly infinite) stream of numbers.
  Used by the Tufte profiling library, and the Carmine Redis library.

  Options:
    :buffer-size - The maximum number of numbers that may be buffered
                   before next (ssb <num>) call will block to flush
                   buffer and merge with any existing summary stats.

                   Larger buffers mean better performance and more
                   accurate stats, at the cost of more memory use
                   while buffering.

    :buffer-init - Initial buffer content, useful for persistent ssb.
    :sstats-init - Initial summary stats,  useful for persistent ssb."

  {:added "v0.5.0 (2022-12-15)"}
  ([] (summary-stats-buffered nil))
  ([{:keys [buffer-size buffer-init sstats-init merge-cb]
     :or   {buffer-size 1e5}
     :as   opts}]

   (SummaryStatsBuffered.
     (atom          sstats-init)
     (atom (buf-new buffer-init))
     (long          buffer-size)
     (enc/counter)
     merge-cb ; Undocumented
     )))

(defn summary-stats-buffered-fast
  "Returns fastest possible SummaryStatsBuffered."
  {:added "v0.5.0 (2022-12-15)"}
  [^long buffer-size merge-cb]
  (SummaryStatsBuffered.
     (atom nil)
     (atom (buf-new))
     buffer-size
     nil
     merge-cb))

(comment
  (let [ssb (summary-stats-buffered {:buffer-size 10})] ; 266 qb
    [(enc/qb 1e6 (ssb (rand-int 1000))) (str ssb) @@ssb]))

(defn summary-stats-buffered?
  "Returns true iff given a SummaryStatsBuffered argument."
  [x] (instance? SummaryStatsBuffered x))

;;;; Print methods

#?(:clj
   (let [ns *ns*]
     (defmethod print-method SummaryStats         [x ^java.io.Writer w] (.write w (str "#" ns "." x)))
     (defmethod print-method SummaryStatsBuffered [x ^java.io.Writer w] (.write w (str "#" ns "." x)))))
