(ns summaries.core
  (:require [clojure.string :as str]))

(defn- get-strict
  "Like get but throws an exception with the map label if map m doesn't
  contain key k."
  [m k m-label]
  (if (contains? m k)
    (get m k)
    (throw (ex-info (str "Key '" k "' not found in map '" m-label "'.")
                    {:existing-keys (keys m)}))))

(defn- expand-starred-keyword
  "Takes a keyword and returns a vector of keywords. If the input name
  ends in * (star) the resulting vector contains 2 keywords, the input
  without star, and the special keyword ::*. Otherwise the resulting
  vector just contains the input keyword."
  [kw]
  (let [nspc (namespace kw)
        n (name kw)]
    (if (str/ends-with? n "*")
      [(keyword nspc (subs n 0 (dec (count n))))
       ::*]
      [kw])))

(defn- explicitly-starred-path
  "Takes a vector of keywords and returns a vector of keywords. The
  resulting vector has all keywords that end in * (star) replaced with
  the keyword without * and a separate keyword ::*.

  eg.  (explicitly-starred-path [:foo* :bar])
       => [:foo ::* :bar]"
  [implicitly-starred-path]get-strict
  (lazy-seq
    (let [[k & r] implicitly-starred-path]
      (when k
        (concat (expand-starred-keyword k)
                (explicitly-starred-path r))))))

(defn- grow-specific-paths
  "Given a path and an associative piece of data, returns a lazy
  sequence of specific paths for that data that can be used with
  'get-in'. The path must be a vector of keywords. The names of
  keywords that lead to sequential data structures (vectors) in data
  must end in *.

  eg. (grow-specific-paths [:foo* :bar*] {:foo [{:bar [{:id 1}]}
                                                {:bar [{:id 2}
                                                       {:id 3}]}]})
      => [[:foo 0 :bar 0] [:foo 1 :bar 0] [:foo 1 :bar 1]]"
  [starred-path data]
  (let [path (explicitly-starred-path starred-path)
        grower (fn grower [[shoot & shoots]]
                 (when shoot
                   (lazy-seq
                     (let [[shoot-path [key-to-resolve & remaining-keys]] shoot
                           value-at-shoot-path (get-in data shoot-path)]
                       (when (list? value-at-shoot-path)
                         (throw (ex-info "Data structures must not contain lists, path finding works with vectors only."
                                         {:starred-path starred-path
                                          :failing-shoot shoot-path
                                          :key-to-resolve key-to-resolve})))
                       (if key-to-resolve
                         (if (sequential? value-at-shoot-path)
                           (if (= key-to-resolve ::*)
                             (grower (concat (map (fn [vector-index]
                                                    [(conj shoot-path vector-index) remaining-keys])
                                                  (range (count value-at-shoot-path)))
                                             shoots))
                             (throw (ex-info "Path must explicitly contain * postfix to go down lists."
                                             {:starred-path starred-path
                                              :failing-shoot shoot-path
                                              :key-to-resolve key-to-resolve})))
                           (if (= key-to-resolve ::*)
                             (throw (ex-info "Path must not explicitly * postfix at non-sequential locations."
                                             {:starred-path starred-path
                                              :failing-shoot shoot-path
                                              :key-to-resolve key-to-resolve}))
                             (if (and (associative? value-at-shoot-path)
                                      (contains? value-at-shoot-path key-to-resolve))
                               (grower (cons [(conj shoot-path key-to-resolve) remaining-keys] shoots))
                               (grower shoots))))
                         (cons shoot-path (grower shoots)))))))]
    (grower [[[] path]])))


(defn- evolve-xform-completion
  [rf result vs]
  (if (seq vs)
    (recur rf (rf result (first vs)) (rest vs))
    (rf result)))

(defn- create-collecting-transducer
  [{:keys [value-calc-fn decision-keep-new decision-keep-also]}]
  (fn [rf]
    (let [m (volatile! nil)]
      (fn
        ([] (rf))
        ([result]
         (if (nil? @m)
           (rf result)
           (evolve-xform-completion rf result (:values @m))))
        ([result input]
         (let [calculated (value-calc-fn input)]
           (cond
             (or (nil? @m) (decision-keep-new calculated (:cur-val @m)))
             (vreset! m {:cur-val calculated
                         :values [input]})

             (decision-keep-also calculated (:cur-val @m))
             (vswap! m update-in [:values] conj input))
           result))))))

(defn- max-by
  "Takes a function f and returns a stateful transducer that keeps
  values for which the function returns the maximum result seen. The
  values for which f returned this result are passed to the wrapped
  step function on completion."
  [f]
  (create-collecting-transducer
    {:value-calc-fn f
     :decision-keep-new #(and %1 (> (compare %1 %2) 0))
     :decision-keep-also #(and %1 (= (compare %1 %2) 0))}))

(defn- min-by
  "Takes a function f and returns a stateful transducer that keeps
  values for which the function returns the minimum result seen. The
  values for which f returned this result are passed to the wrapped
  step function on completion."
  [f]
  (create-collecting-transducer
    {:value-calc-fn f
     :decision-keep-new #(and %1 (< (compare %1 %2) 0))
     :decision-keep-also #(and %1 (= (compare %1 %2) 0))}))

(defn- abs
  "Caclualtes absolute value of v."
  [v]
  (if (< v 0) (- v) v))

(defn- closest-to
  "Takes a function f and a target value, and returns a stateful
  transducer that keeps values for which the function returns the
  values closest to the target value. The values for which f returned
  this result are passed to the wrapped step function on completion."
  [f target]
  (min-by (fn [v]
            (if-let [r (f v)]
              (abs (- target r))))))

(def ^:private default-transducing-fns
  {'max max-by
   'min min-by
   'closest-to closest-to})

(defn- contained-in
  [v vs]
  (boolean ((set vs) v)))

(def ^:private default-filtering-fns
  {'=      =
   '!=     not=
   '<=     <=
   'inside contained-in})

(defn- create-filtering-xform
  "Returns a transducer that implements the passed filter-exp."
  [filter-exp]
  (if (= 3 (count filter-exp))
    (let [[op k v] filter-exp]
      (if (= op 'closest-to)
        (closest-to k v)
        (let [filter-fn (get-strict default-filtering-fns op "default-filtering-fns")]
          (filter (fn [x] (filter-fn (k x) v))))))
    (if (= 2 (count filter-exp))
      (let [[op k] filter-exp
            op (get-strict default-transducing-fns op "default-transducing-fns")]
        (op k)))))

(defn path-parts
  "Splits a vector of keywords and filter-vectors into parts and labels
  them.

  eg.
  (path-parts [:go* :in :this [:v '<= 33] :find* :more ['max :ttt] :thingies* :id])

  => ([:path [:go* :in :this]]
      [:filter [<= :v 33]]
      [:path [:find* :more]]
      [:filter [max :ttt]]
      [:path [:thingies* :id]])"
  ([path]
   (if (seq path)
     (lazy-seq
       (if (keyword? (first path))
         (path-parts (rest path) [:path [(first path)]])
         (cons [:filter (first path)]
               (path-parts (rest path)))))))
  ([path acc]
   (if (seq path)
     (lazy-seq
       (if (keyword? (first path))
         (path-parts (rest path) [:path (conj (second acc) (first path))])
         (cons acc (path-parts path))))
     [acc])))

(defn collect-values
  "Finds and returns values in data according to path. Path is a vector
  containing keywords and subvectors. Keywords are followed in the
  nested data, keywords leading to vectors must end in * and all items
  in the vector are followed from that path on. Vectors are
  interpreted as filtering constructs. They don't navigate deeper in
  data, but limit the collection of values collected so far."
  [path data]
  (let [collector (fn collector [path-parts data]
                    (if (not (sequential? data))
                      (collector path-parts [data])
                      (if (seq path-parts)
                        (let [[[type pf]] path-parts]
                          (collector (rest path-parts)
                                     (if (= :path type)
                                       (mapcat (fn [v] (map #(get-in v %)
                                                            (grow-specific-paths pf v)))
                                               data)
                                       (sequence (create-filtering-xform pf) data))))
                        data)))]
    (collector (path-parts path) data)))

(defn- collect-non-nil-values
  "Returns the values as collect values, with nil values removed from
  the result sequence."
  [path data]
  (->> (collect-values path data)
       (remove nil?)))

(defn- values
  [{:keys [input-path input-paths]} data]
  (if input-paths
    (apply concat
           (map #(collect-non-nil-values % data)
                input-paths))
    (collect-non-nil-values input-path data)))

(defn- sum-values
  [sprinkle data]
  (->> (values sprinkle data)
       (apply +)))

(defn- count-values
  [sprinkle data]
  (->> (values sprinkle data)
       count))

(defn- avg-values
  [sprinkle data]
  (let [vs (values sprinkle data)]
    (when (seq vs)
      (double (/ (apply + vs)
                 (count vs))))))

(defn- any-values?
  [sprinkle data]
  (->> (count-values sprinkle data)
       (< 0)))

(defn- min-values
  [sprinkle data]
  (let [vs (values sprinkle data)]
    (if (< 0 (count vs))
      (apply min vs)
      nil)))

(defn- max-values
  [sprinkle data]
  (let [vs (values sprinkle data)]
    (if (< 0 (count vs))
      (apply max vs)
      nil)))

(defn- unique-values
  [sprinkle data]
  (->> (values sprinkle data)
       set
       vec))

(defn- count-unique-values
  [sprinkle data]
  (->> (unique-values sprinkle data)
       count))

(defn- procent
  [{:keys [numerator-path denominator-path]} data]
  (let [num (sum-values {:input-path numerator-path} data)
        den (sum-values {:input-path denominator-path} data)]
    (when (and den (not= den 0))
      (* 100.0
         (/ (double num) (double den))))))

(defn- single-value
  [sprinkle data]
  (first (values sprinkle data)))

(def ^:private default-operations
  {'procent      procent
   'avg          avg-values
   'sum          sum-values
   'count        count-values
   'unique-count count-unique-values
   'values       values
   'unique       unique-values
   'any?         any-values?
   'min          min-values
   'max          max-values
   'single       single-value})

(defn- calc-value
  [{:keys [operation formatter] :as task} data {:keys [operations formatters]}]
  (let [operations' (->> (for [[k f] operations]
                           [k (comp f values)])
                         (into {}))
        raw-val ((get-strict (merge default-operations operations') (or operation 'values) "operations")
                 task data)]
    (if formatter
      ((get-strict formatters formatter "formatters") raw-val)
      raw-val)))

(defn- split-after-starred-keyword
  "Takes a vector of keywords and returns a vector of 2 vectors of
  keywords. The first one containing all the original keywords and the
  second one empty if no keyword in the original ends on '*'.
  Otherwise the first vector contains up to and with that keyword and
  the second vector contains the rest.

  eg. [:keep :on :trucking] => [[:keep :on :trucking] []]
      [:keep :on* :trucking] => [[:keep :on] [:trucking]]"
  [path]
  (let [is-keyword-ending-in-star
        #(and (keyword? %)
              (clojure.string/ends-with? (name %) "*"))

        remove-last-char-from-keyword
        (fn [k]
          (let [n (name k)]
            (keyword (subs n 0 (dec (count n))))))

        produce
        (fn produce [[f & r :as p] acc]
          (if (seq p)
            (if (is-keyword-ending-in-star f)
              [(conj acc (remove-last-char-from-keyword f))
               (vec r)]
              (produce r (conj acc f)))
            [acc []]))]

    (produce path [])))

(defn- assoc-in-*
  "Takes a map, an insert path and a sequence of values vs. Returns the
  original map with vs associated at insert-path. If the insert path
  has a keyword ending in *, the values are each wrapped recursively
  in maps with the path-part after the starred keyword and the
  resulting maps are inserted in a vector at the path up to and
  including the first starred keyword, the latter with its ending star
  removed.

  eg. (inject-values {:a 1} [:they :keep :on :trucking] [1 2 3])
      => {:a 1
          :they {:keep {:on {:trucking [1 2 3]}}}}


      (inject-values {:a 1} [:they :keep* :on :trucking] [1 2 3])
      => {:a 1
          :they {:keep [{:on {:trucking 1}}
                        {:on {:trucking 2}}
                        {:on {:trucking 3}}]}}"
  [m insert-path vs]
  (let [[insert-path wrapper-path]
        (split-after-starred-keyword insert-path)]
    (assoc-in m insert-path
              (if (seq wrapper-path)
                (mapv (fn [v] (assoc-in {} wrapper-path v)) vs)
                vs))))

(defn apply-summary
  "Takes an summarization task and a piece of data. Returns the data
  with the task applied."
  [{:keys [start-path insert-path] :as task} data & opts]
  (let [specific-start-paths (grow-specific-paths start-path data)]
    (reduce
      (fn [acc specific-start-path]
        (assoc-in-* acc (concat specific-start-path insert-path)
                    (calc-value task (get-in data specific-start-path) opts)))
      data
      specific-start-paths)))
