(ns think-stats.cdf
  (:require (think-stats
              [constants :as c]
              [homeless :as h]
              [hist :as hist]
              [types :as types]))
  (:import org.apache.commons.math3.special.Erf))

(declare cdf cdf->value cdf->probability build-cdf-fn)

(defmulti cdff
  "Returns a function (f x) that computes the CDF(x) = p and it's inverse from the data set s.

  (def cdf (cdff (range 1 101)))
  (cdf 10) => 0.1
  (cdf 10 :probability) => 0.1
  (cdf 0.1 :value) => 10

  "
  class)

(defmethod cdff :types/seq
  [s]
  (let [m (cdf s)
        kys (keys m)
        vls (vals m)]
    (build-cdf-fn kys vls)))

(defmethod cdff :types/map
  [h]
  (let [m (hist/hist->cdf h)
        kys (vec (keys m))
        vls (vec (vals m))]
    (build-cdf-fn kys vls)))

(defn build-cdf-fn
  [kys vls]
  (fn [x &[direction]]
      (if (and (not (nil? direction)) (= direction :value))
        (cdf->value kys vls x)
        (cdf->probability kys vls x))))

(defn cdf
  [s]
  (assert (sequential? s) "Cannot compute the cdf on a non-seq data set.")
  (let [s (sort s)
        len (count s)]
    (into (sorted-map)
          (for [[r idx] (map vector s (range 1 (inc len)))
                :let [y (/ idx len)]]
            [r y]))))

(defn cdf->pmf
  "Convert a CDF to a PMF."
  [cdf]
  (assert (and (map? cdf) (sorted? cdf)) "CDF must be a sorted map.")
  (let [ks (keys cdf)
        vs (vals cdf)
        out (sorted-map (first ks) (first vs))]
    (into out
         (for [[k t] (map vector
                          (rest ks)
                          (map (comp (partial apply -) reverse) (partition 2 1 vs)))]
           [k t]))))


(defn pmf->cdf
  "Convert a PMF to a CDF."
  [pmf]
  (assert (map? pmf) "PMF must be a map.")
  ; FIXME: make this a fn
  ; FIXME: from here we need to divide the value by the reductions
  (let [m (into (sorted-map-by (fn [k1 k2] ; order by the values
                         (compare [(get pmf k1) k1]
                                  [(get pmf k2) k2])))
        pmf)]
    (into (sorted-map-by (fn [k1 k2]
                           (compare [(get pmf k1) k1]
                                    [(get pmf k2) k2])))
          (for [[k v] (map vector (keys m) (reductions + (vals m)))]
            [k v]))))


(defn cdf->probability
  [kys vls x]
  (cond
    (< x (first kys)) 0
    :else
    (let [kidx (h/bisect kys x :left)]
      (nth vls kidx))))

(defn cdf->value
  [kys vls prob]
  (cond
    (< prob 0) nil
    (> prob 1) nil
    :else
    (let [vidx (h/bisect vls prob :left)]
      (nth kys vidx))))

(defn cdf->median
  "Given a cdf fn generated by cdff, compute the median."
  [cdf]
  (cdf 0.5 :value))

(defn cdf->interquartile
  "Given a cdf fn generated by cdff, compute the interquartile range. Returns a vec with the 25th percentile,
  the mean and the 75th percentile in that order."
  [cdf]
  [(cdf 0.25 :value) (cdf 0.5 :value) (cdf 0.75 :value)])


; FIXME: the order of params is not consistent with random/sample
(defn sample-cdf
  "Generate a lazy seq of values chosen at random from the given cdf. See cdff above.

  (def cdf (cdff (take 50 (repeatedly #(rand-int 10)))))
  (sample cdf 10)
  "
  [cdf n]
  (for [i (range n)]
    (cdf (rand) :value)))


(defn cdf->probability-range
  "Given a CDF compute the probability P(lower <= x <= upper)."
  [cdf-fn lower upper]
  (Math/abs (- (cdf-fn upper)
               (cdf-fn lower))))

(defn normalcdf
  "CDF for the normal distribution. mu is the mean and sigma is the standard deviation."
  [mu sigma x]
  (* 0.5 (+ 1
            (Erf/erf (/ (- x mu)
                        (* sigma c/sqrt2))))))

(defn normalicdf
  "ICDF for the normal distribution."
  [mu sigma p]
  (let [x (* c/sqrt2 (Erf/erfInv (- (* 2 p) 1)))]
    (+ (* sigma x)
       mu)))

(defn expocdf
  "Compute CDFexpo(x)."
  [lambda x]
  (- 1.0 (Math/exp (* -1 lambda x))))


