(ns smx.eventstore.search.plan-node
  (:require [smx.eventstore.search.state :as state]
            [clojure.tools.logging :as log]
            [smx.eventstore.search.log :as slog]
            [qbits.alia :as alia]
            [qbits.hayt :as hayt]
            [clojure.core.async :as async]
            [clj-time.core :as time]
            [clj-time.coerce :as timec]
            [clj-time.format :as timef]
            [clj-time.periodic :as timep]
            [schema.core :as s]
            [clojure.tools.logging :as log]
            [smx.eventstore.search.glob :as glob])
  (:import (clojure.lang Keyword)
           (org.joda.time.format DateTimeFormatter)
           (smx.eventstore.search.glob Glob)))

;;;;;;;;;;;;;
;;; Schema

(defprotocol PlanNode
  (execute [^PlanNode this context]))

;;PredNode support

(declare Operator)

(defmulti predicator
  "Returns a function that matches the intent of `pred-cond`"
  first)

(defmethod predicator :and [[_ & children]]                 ;this should probably be truthy?
  (fn [result]
    (every? true? (map #((predicator %) result) children))))

(defmethod predicator :or [[_ & children]]                  ;this should probably be truthy?
  (fn [result]
    (some true? (map #((predicator %) result) children))))

;Given an collection `coll-field` do any items equal `oprd`
(defmethod predicator :any [[_ coll-field oprd & opts]]
  (if (= (type oprd) Glob)
    (fn [result]
      (if (coll-field result)
        (some? (some #(glob/matches? oprd %) (coll-field result)))))
    (if (and opts (:cased opts))
      (fn [result]
        (if (coll-field result)
          (some? (some #(= oprd (.toLowerCase %)) (coll-field result)))))
      (fn [result]
        (if (coll-field result)
          (.contains (coll-field result) oprd))))))

;Check string field `str-field`'s value matches glob
(defmethod predicator :contains [[_ str-field oprd & opts]]
  (if (= (type oprd) Glob)
    (fn [result]
      (log/trace "Filtering contains" str-field "with glob" oprd "against" (str \' (str-field result) \'))
      (if (str-field result)
        (glob/includes? oprd (str-field result))))
    (if (and opts (.contains opts :cased))
      (fn [result]
        (log/trace "Filtering contains" str-field oprd "against" (str \' (str-field result) \'))
        (if (str-field result)
          (.contains (str-field result) oprd)))
      (fn [result]
        (log/trace "Filtering contains" str-field oprd "against" (str \' (str-field result) \'))
        (if (str-field result)
          (.contains (.toLowerCase (str-field result)) oprd))))))

(defmethod predicator :equals [[_ field oprd & opts]]
  (predicator [= field oprd]))

(defmethod predicator = [[_ field oprd & opts]]
  (if (= (type oprd) Glob)
    (fn [result]
      (if (field result)
        (glob/matches? oprd (field result))))
    (if (and opts (:cased opts))
      (fn [result]
        (if (field result)
          (.equalsIgnoreCase (field result) oprd)))
      (fn [result]
        (if (field result)
          (= (field result) oprd))))))

; punting here on globs and cased for now

(defmethod predicator > [[_ field oprd & opts]]
  (fn [result]
    (if (result field)
      (pos? (compare (result field) oprd)))))

(defmethod predicator >= [[_ field oprd & opts]]
  (fn [result]
    (if (result field)
      (not (neg? (compare (result field) oprd))))))

(defmethod predicator < [[_ field oprd & opts]]
  (fn [result]
    (if (result field)
      (neg? (compare (result field) oprd)))))

(defmethod predicator <= [[_ field oprd & opts]]
  (fn [result]
    (if (result field)
      (not (pos? (compare (result field) oprd))))))

(defmethod predicator :default [[opr field oprd :as pred-cond] & opts]
  (throw (ex-info (str "Unknown predicate for " (first pred-cond)) {:cond pred-cond})))

(def Operator (s/enum (vec (keys (methods predicator)))))
(def PredCond s/Any)

(s/defn predicator-filter-xf
  "So this is a bit funkier than it looks.
  First, we go from data to fn with each pred-cond being processed
  by the predicator multmethod to produce an aggregration of preds.
  Second, we take these preds and produce a transducer for filtering values on a chan"
  [pred-conds :- PredCond]
  (filter (predicator pred-conds)))

;;;;;;;;;;;;;
;;; Impl

(def ^:const index-buffer-size 20)
(def ^:const index-fetch-size 20)
(def ^:const seq-scan-fetch-size 100)
(def ^:const seq-scan-timeout 10000)
(def ^:const index-timeout 2000)

; todo id -> sid resolution should be via model
(defn id-comparator [left right]
  (compare (:sid left) (:sid right)))

(defmacro go-try [& body]
  `(go (try ~@body
            (catch Throwable ex#
              ex#))))

(defn fail-node [search-id node t err-chan]
  (slog/error t search-id "Failing node" node " ex-data:" (ex-data t))
  (async/put! err-chan (ex-info (format "%s:%s failed" (:type node) (:id node)) {:search-id search-id :node node} t)))

(defn and-confluence
  "Takes a coll of ordered source channels and returns a channel which
  contains all values that appear in each channel in the order they were read.
  Optionally a conf-comparator can be supplied else `compare` will be used to determine
  value equality and ordering.
  The returned channel will be unbuffered by default, or a buf-or-n can be supplied.
  The channel will close after all the source channels have closed."
  ([chs buf-or-n conf-comparator]
   (let [chs-count       (count chs)
         out-chan        (async/chan buf-or-n)
         chs             (set chs)
         conf-comparator (or conf-comparator compare)]
     (if (empty? chs)
       (async/close! out-chan)
       (async/go
         (try
           (loop
            [[val ch] (async/alts! (vec chs))
             candidate       val
             matched-set   #{}
             unmatched-set chs]
             (log/tracef "\nchs %s \ncandidate %s \nval %s \nch %s \nmatched-set %s \nunmatched-set %s"
               chs candidate val ch matched-set unmatched-set)
             (if (nil? val)                                 ;in channels closed
               (async/close! out-chan)                      ;terminate out-chan
               (let [comparison (conf-comparator val candidate)]
                 (log/trace "AndComparison:" val candidate comparison "to" out-chan)
                 (if (zero? comparison)                     ;still valid val
                   (if (= chs-count (inc (count matched-set))) ;intersecting!
                     (do
                       (log/trace "outputting" candidate "to" out-chan)
                       (async/>! out-chan candidate)          ;parked put to out-chan
                       (let [[val ch] (async/alts! (vec chs))]
                         (recur [val ch] val #{} chs )))
                     ;more chans to check
                     (let [new-matched (conj matched-set ch)
                           new-unmatched (disj unmatched-set ch)
                           [val ch] (async/alts! (vec new-unmatched))]
                       (recur [val ch] candidate new-matched new-unmatched)))
                   (if (pos? comparison)                    ;skip   ;does this dep on order?
                     (let [[val ch] (async/alts! (vec unmatched-set))]
                       (recur [val ch] candidate matched-set unmatched-set))
                     (let [new-matched   #{ch}
                           new-unmatched (disj chs ch)
                           new-candidate val
                           [val ch] (async/alts! (vec new-unmatched))]
                       (recur [val ch] new-candidate new-matched new-unmatched)))))))
           (catch Throwable t
             (async/>! out-chan t)
             (async/close! out-chan)))))
     out-chan)))

(defn or-confluence
  "Takes a coll of ordered source channels and returns a channel which
  contains all values that appear in each channel in the order they were read.
  Optionally a comparator can be supplied else `compare` will be used to determine
  value equality and ordering.
  The returned channel will be unbuffered by default, or a buf-or-n can be supplied.
  The channel will close after all the source channels have closed."
  ([chs buf-or-n comparator]
   (let [out-chan (async/chan buf-or-n)]
     (if (empty? chs)
       (async/close! out-chan)
       (async/go
         (try
           (loop
            ;when n vals (minus dupes) we output the first val in order as per comparator
            [vals (sorted-map-by (or comparator compare))
             chs  (set chs)]
             (log/tracef "\nchs %s \nvals %s"  chs vals)
             (if (seq chs)
               (let [[val ch] (async/alts! (vec chs))
                     chs  (if (or (nil? val)                ;ch depleted
                                (not (vals val)))           ;not dupe
                            (disj chs ch) chs)
                     vals (if (and (not (nil? val))
                                (not (vals val)))
                            (assoc vals val ch) vals)]
                 (recur vals chs))
               (let [[next-val next-ch] (last vals)        ;n vals so no available chs so safe to choose
                     vals (dissoc vals next-val)
                     chs  (if next-ch (conj chs next-ch) chs)] ;ch selectable again
                 (when next-val
                   (log/trace "outputting " next-val)
                   (async/>! out-chan next-val)
                   (recur vals chs)))))
           (async/close! out-chan)
           (catch Throwable t
             (async/>! out-chan t)
             (async/close! out-chan)))))
     out-chan)))

(def ^DateTimeFormatter bin-formatter (.withZoneUTC (:basic-date timef/formatters)))

(defn get-bins [range bin-period]
  ;note commiting to 1 day utc bins here
  (if (= (first range) (second range))
    []
    (let [last-bin-date (.withTimeAtStartOfDay (second range))
          bin-dates     (doall (take-while (fn [bd]
                                             (and (time/before? bd (second range)) (not (time/after? bd last-bin-date))))
                                 (timep/periodic-seq (.withTimeAtStartOfDay (first range)) bin-period)))
          bins          (map #(timef/unparse bin-formatter %) bin-dates)
          desc          true]                               ;todo config?
      (if desc (vec (reverse bins)) (vec bins)))))

(defn get-bin [relation cust-ref date-str bin-period]
  (let [date (timef/parse bin-formatter date-str)]
    (timef/unparse bin-formatter date)))

(defmacro handle-or-err [search-id result-or-ex out-chan err-chan & body]

  `(let [err# (cond
                (ex-data ~result-or-ex) {:err :cql :exception ~result-or-ex}
                (instance? Throwable ~result-or-ex) {:err :unknown :exception ~result-or-ex}
                (nil? ~result-or-ex) {:err :timeout})]
     (if err#
       (try
         (async/put! ~err-chan (ex-info "Error in plan node" err# (:exception err#)))
         (catch Throwable t#
           (log/error t# (format "Couldn't put error: %s into chan" ~search-id err#)))
         (finally (async/close! ~out-chan)))
       (do ~@body))))

(defrecord SeqScan [type table-cf bins columns where-conds]
  PlanNode
  (execute [this context]
    (let [{:keys [search-id err-chan session]} context
          out-chan (async/chan 1)]
      (async/go
        (try
          (loop
           [bin  (first bins)
            bins (next bins)]
            (let [t                  (async/timeout seq-scan-timeout)
                  bin-chan           (async/chan index-fetch-size)
                  binned-where-conds (concat [[= :bin bin]] where-conds) ;todo bin walkthrough
                  query              (hayt/select
                                       table-cf
                                       (apply hayt/columns columns)
                                       (hayt/where binned-where-conds)
                                       (hayt/allow-filtering))
                  cql-chan           (alia/execute-chan session query {:consistency :quorum :fetch-size seq-scan-fetch-size})
                  [result _] (async/alts! [t cql-chan])]
              (slog/debug "SeqStatement:" (alia/query->statement query nil))
              (slog/debug "SeqResults:" result)
              (if (ex-data result)
                (do (slog/error (str "Unexpected error in seq plan " query))
                    (throw result))
                (do (async/>! out-chan bin-chan)
                    (async/onto-chan bin-chan (reverse (sort-by :sid result))) ;we block here till next bin needed; closed automatically by onto-chan
                    (if (nil? (first bins))
                      (async/close! out-chan)
                      (recur (first bins) (next bins)))))))
          (catch Throwable t (fail-node search-id this t err-chan))))
      out-chan)))                                           ;return this straight away for polling

;gotta to be a better way
(defn dedupe-xf [comparator]
  (fn [rf]
    (let [pv (volatile! ::none)]
      (fn
        ([] (rf))
        ([result] (rf result))
        ([result input]
         (let [prior @pv]
           (vreset! pv input)
           (if (zero? (comparator prior input))
             result
             (rf result input))))))))

(defrecord IndexScan [type index-cf index-field bins range where-conds filter-conds]
  PlanNode
  ;todo bin walkthrough
  ; With output chan of buffer size 1:
  ; for each bin:
  ; select [sid, msg-date] from index matching on where-conds
  ; create bin-chan
  ; sort by id and put seq onto bin-chan
  ; check ctrl-chan for abort
  (execute
    [this context]
    (let [{:keys [search-id session err-chan]} context
          out-chan            (async/chan 1)
          ranged-filter-conds [:and [>= :msg_date (first range)]
                               [< :msg_date (second range)]]
          ranged-filter-conds (if filter-conds (conj ranged-filter-conds filter-conds) ranged-filter-conds)
          predicator-filter   (when ranged-filter-conds
                                (slog/debug "IndexFilterConds:" ranged-filter-conds)
                                (predicator-filter-xf ranged-filter-conds))]
      (async/go
        (try
          (loop [bin             (first bins)
                 bins            (next bins)
                 extremity-label "Start"]
            (let [bin-chan           (async/chan index-fetch-size (if predicator-filter
                                                                    (comp predicator-filter (dedupe-xf id-comparator))
                                                                    (dedupe-xf id-comparator)))
                  binned-where-conds (into [[= :bin bin]] where-conds)
                  query              (hayt/select           ;could partly precompile todo?
                                       index-cf
                                       (hayt/columns :sid :msg_date index-field)
                                       (hayt/where binned-where-conds))]

              (if extremity-label (slog/debug (format "IndexStatement%s:" extremity-label)
                                    (alia/query->statement query nil)))
              (let [t        (async/timeout index-timeout)
                    cql-chan (alia/execute-chan session query {:consistency :quorum :fetch-size index-fetch-size})
                    [result chan] (async/alts! [t cql-chan])]
                (handle-or-err search-id result out-chan err-chan
                  (if extremity-label
                    (slog/debug (format "IndexBin%s:" extremity-label) index-cf "result" result))
                  (async/>! out-chan bin-chan)
                  ;we block here till next bin needed
                  (log/trace "result  from search is " result)
                  (log/trace "result  from search sorted is " (sort-by :sid result))
                  (log/trace "result  from search sorted and reverse is " (reverse (sort-by :sid result)))
                  (slog/trace "IndexResult:" (reverse (sort-by :sid result)))
                  (async/onto-chan bin-chan (reverse (sort-by :sid result)))
                  (if-not (first bins)
                    (async/close! out-chan)
                    (recur (first bins) (next bins) (when-not (next bins) "End")))))))
          (catch Throwable t (fail-node search-id this t err-chan))))
      ;return this straight away for polling
      out-chan)))

(defrecord MergeNode [type id children]
  PlanNode
  (execute [this context]
    "This currently relies on matching index bin sizes.
  With output chan of buffer size 1:
  Until nil child index for bin do:
  create bin-chan
  put onto bin-chan (confluence indexes)
  check ctrl-chan for abort "
    ;TODO support non matching (but aligned?  [t1..tn] [[t1..tf][tg..tn]] ) indexes
    (let [{:keys [search-id err-chan]} context
          out-chan  (async/chan 1)
          in-chans  (map #(execute % context) children)
          and-merge (= type "AndMergeNode")
          merge-fn  (if and-merge and-confluence or-confluence)] ;any value in :merge-type?
      ;walk idx-chans       /
      ; <! a bin chan off idx-chan
      ; if bin-chan is nil
      ;   close out-chan
      ; else if count(bin-chan) = count(idx-bin-chan)
      ;   output the merged confluence on out-chan
      ;   reset idx-bin-chans and rewalk
      ; else next bin-chan
      ;wont work with unaligned indexes
      (async/go
        (try
          (loop [ics in-chans
                 bcs []]
            (let [ic  (first ics)
                  nxt (next ics)
                  bc  (if ic (async/<! ic))]
              (if (instance? Throwable bc)
                (fail-node search-id this bc err-chan)
                (if (and (nil? bc)
                      ;terminate case depends on merge-fn bit ugh either fail fast for and or fail on nothing for or
                      (or and-merge (empty? bcs)))
                  (async/close! out-chan)
                  ;we'll always have a bin chan? todo what happens with a new index
                  (if-not nxt                               ;got all, now merge and reset
                    (do
                      (slog/trace "MergeInChans:" (vec (conj bcs bc)))
                      ;todo not totally happy with this, should we just chuck bc's through a buffering transducer?
                      (async/>! out-chan (merge-fn (if bc (conj bcs bc) bcs) nil #(compare (:sid %1) (:sid %2)))) ;merge-bin-chans
                      (recur in-chans []))
                    (recur nxt (if bc (conj bcs bc) bc))))))) ;still collecting ibc's before merge
          (catch Throwable t (fail-node search-id this t err-chan))))
      (slog/trace "MergeOutChan:" out-chan)
      out-chan)))

(defrecord FilterNode [type child bins table-cf columns where-conds]
  PlanNode
  ;With output chan of buffer size 1:
  ;For each merged bin chan -or index bin chan if 1 index and no merge-:
  ;check ctrl-chan for abort
  ;Get in chan

  (execute [this context]
    (let [{:keys [search-id session err-chan]} context
          out-chan     (async/chan 1)
          select-where (fn [idx-ptr]                        ;todo formalise idx-ptrs?
                         (let [bin                (get-bin table-cf nil (timef/unparse bin-formatter (:msg_date idx-ptr)) (state/default-bin-period)) ;todo cust-ref? bin-conds?
                               sid                (:sid idx-ptr)
                               binned-where-conds (concat [[= :bin bin]] where-conds)
                               query              (hayt/select table-cf
                                                    (apply hayt/columns columns)
                                                    (hayt/where (concat binned-where-conds [[= :sid sid]])))
                               _                  (slog/trace "FilterSelect:" (pr-str query))]
                           (let [result (alia/execute session query {:consistency :quorum :fetch-size index-fetch-size})]
                             (slog/trace "FilterResult:" (vec result))
                             (handle-or-err search-id result out-chan err-chan
                               (slog/trace (format "FilterBin: %s result is %s" bin (vec result)))
                               (if (ex-data result)
                                 (do (slog/error result "Unexpected error in filter plan")
                                     (throw result))
                                 result)))))                ;this will be nil if filter cond not met and filtered out on chan below
          ;if no indexes use we supply a chan of each bin
          select-all   (fn [bin]
                         (let [query (hayt/select table-cf
                                       (apply hayt/columns columns)
                                       (hayt/where (concat [[= :bin bin]] where-conds)))]
                           (slog/trace "FilterSelectAll:" (pr-str query))
                           (let [result (alia/execute-chan-buffered session query {:consistency :quorum :fetch-size index-fetch-size})]
                             (handle-or-err search-id result out-chan err-chan
                               (if (ex-data result)
                                 (do (slog/error result "Unexpected error in filter plan")
                                     (throw result))
                                 result)))))
          in-chan      (if child (execute child context)
                                 (let [in-chan (async/chan index-buffer-size)]
                                   (async/go-loop [bin (first bins)
                                                   bins (next bins)]
                                     (async/>! in-chan (select-all bin))
                                     (if-not bins
                                       (async/close! in-chan)
                                       (recur (first bins) (next bins))))
                                   in-chan))]
      (async/go
        (try
          (loop
           [in-bin-chan (async/<! in-chan)]
            (slog/trace "FilterInChan:" in-bin-chan)
            (if (nil? in-bin-chan)
              (async/close! out-chan)
              (let [out-bin-xchan (if child
                                    (async/chan index-buffer-size
                                      (comp (filter some?) (map select-where) (map first) (filter some?)))
                                    (async/chan index-buffer-size))]
                (async/>! out-chan out-bin-xchan)
                (loop []
                  (let [v (async/<! in-bin-chan)]
                    (slog/trace "FilterValue:" v)
                    (if (nil? v)
                      (async/close! out-bin-xchan)
                      (when (async/>! out-bin-xchan v)      ;todo do i err in such cases? or do i just assert
                        (recur)))))
                (recur (async/<! in-chan)))))
          (catch Throwable t (fail-node search-id this t err-chan))))
      out-chan)))

;;Filter Node conds doesnt work because of https://issues.apache.org/jira/browse/CASSANDRA-6377
;;   so for now do all filter conds as pred conds

(defrecord PredNode [type id child range pred-conds]
  PlanNode
  (execute [this context]
    (let [{:keys [search-id err-chan]} context
          ranged-pred-conds [:and [>= :received (first range)] [< :received (second range)]]
          pred-conds        (if (seq pred-conds) (conj ranged-pred-conds pred-conds) ranged-pred-conds)
          pred-filter       (predicator-filter-xf pred-conds)
          out-chan          (async/chan 1)
          in-chan           (execute child context)]
      (async/go
        (try
          (loop [first       true
                 in-bin-chan (async/<! in-chan)]
            (if (nil? in-bin-chan)
              (async/close! out-chan)
              (let [out-bin-xchan (async/chan index-fetch-size pred-filter)]
                (async/>! out-chan out-bin-xchan)
                ;this is basically c.c.a./pipe  (ditto in filter-node) but we need it complete before next bin
                (loop []
                  (let [v (async/<! in-bin-chan)]
                    (if first (slog/debug "PredConds:" pred-conds))
                    (slog/trace "PredInValue:" v)
                    (if (nil? v)
                      (async/close! out-bin-xchan)
                      (when (async/>! out-bin-xchan v)
                        (recur)))))
                (recur false (async/<! in-chan)))))
          ;at bottom else this prints before child thanks laziness
          (catch Throwable t (fail-node search-id this t err-chan))))
      out-chan)))

(defrecord EmptySearchNode []
  PlanNode
  (execute [this context]
    (let [out-chan (async/chan 1)]
      (async/close! out-chan)
      out-chan)))


;;;;;;;;;;;;;
;;; Public

(defn get-children [node]
  (if (:child node) [(:child node)] (:children node)))

(defn ^PlanNode seq-scan [table-cf bins columns where-conds]
  (map->SeqScan {:type        "SeqScan"
                 :table-cf    table-cf
                 :bins        bins
                 :columns     columns
                 :where-conds where-conds}))

(defn ^PlanNode index-scan [index-cf index-field partition-type bins range where-conds filter-conds]
  (map->IndexScan {:type          "IndexScan"
                   :index-cf      index-cf
                   :index-field   index-field
                   :partiton-type (or partition-type :cust-ref-day)
                   :bins          bins
                   :range         range
                   :where-conds   where-conds
                   :filter-conds  filter-conds}))            ;todo index-filters should be a IndexPred ?

(defn ^PlanNode and-merge-node [children]
  (map->MergeNode {:type     "AndMergeNode"
                   :children children}))

(defn ^PlanNode or-merge-node [children]
  (map->MergeNode {:type     "OrMergeNode"
                   :children children}))

(defn ^PlanNode filter-node [child table-cf bins columns where-conds]
  (map->FilterNode {:type        "FilterNode"
                    :child       child
                    :table-cf    table-cf
                    :bins        bins
                    :columns     columns
                    :where-conds where-conds}))

(defn ^PlanNode pred-node [child range pred-conds]
  (map->PredNode {:type       "PredNode"
                  :child      child
                  :range      range
                  :pred-conds pred-conds}))

(defn empty-search-node []
  (map->EmptySearchNode {:type "EmptySearchNode"}))
