(ns smx.eventstore.search.planner
  (:require [smx.eventstore.search.plan-node :as node]
            [smx.eventstore.search.state :as state]
            [smx.eventstore.search.model :as model]
            [smx.eventstore.search.dsl :as dsl]
            [schema.core :as s]
            [clojure.core.match :refer [match emit-pattern to-source]]
            [clojure.math.combinatorics :refer [combinations]]
            [clojure.tools.logging :refer [info error warn debug spy] :as log]
            [clojure.walk :as walk]
            [com.stuartsierra.component :as component]
            [smx.eventstore.search.glob :as glob]
            [clojure.string :as str]
            [clj-time.format :as timef]
            [smx.eventstore.search.log :as slog]
            [fipp.clojure :as fipp]
            [clj-time.coerce :as timec])
  (:import [clojure.lang Keyword Symbol]
           [org.joda.time DateTime]
           [smx.eventstore.search.model Model]
           [org.joda.time.format DateTimeFormatter DateTimeFormat]))

;;;;;;;;;;;;;
;;; Schema

(def default-options {:max-index-scans 2 :timeout-ms 5000})

(defrecord Planner [model state options]
  component/Lifecycle
  (start [this] this
    (info "Starting planner component")
    this)
  (stop [this]
    (info "Stopping planner component")
    this))

;;TODO SCHEMA doesnt really work here use herbert
(def Data (s/either {:dsl  String
                     s/Any s/Any}
            {s/Any                    s/Any
             (s/required-key :select) [Keyword]
             (s/optional-key :from)   [Keyword]
             (s/required-key :where)  [s/Any]}))

(def Query
  {(s/optional-key :user)    String
   (s/optional-key :options) {Keyword s/Any}
   (s/optional-key :scopes)  (s/either [[s/Any]] [s/Any])
   (s/optional-key :range)   [s/Any]                        ;long/string/datetime should prob just be inst
   (s/optional-key :search)  Data
   (s/optional-key :data)    Data                           ;deprecated
   s/Any                     s/Any})

(def BooleanOp (s/enum :and :or))

(def Cond
  [(s/one s/Any "opr") (s/one s/Any "field") (s/one s/Any "oprd")])

(def MergedConds
  [(s/one BooleanOp "bopr") (s/one Cond "cond")])

(def max-index-scan-tokens 3)

;;;;;;;;;;;;;
;;; Operators -WIP?

(def operators
  {;exact match, depending on type supports * as glob match
   :equals       {:negator      :not-equals
                  :selectivy-fn :equal-sel}
   ;element of list/set; key of map
   :any          {:negator      :not-contains
                  :selectivy-fn :contains-sel}
   ;text representation contains term
   :contains     {:negator      :not-matches
                  :selectivy-fn :matches-sel}
   ;value pattern - regex
   :regex        {:negator      :not-regex
                  :selectivy-fn :regex-sel}
   :less-than    {:negator      :greater-than-or-equals
                  :selectivy-fn :trans-sel}
   :greater-than {:negator      :less-than-or-equals
                  :selectivy-fn :trans-sel}})

;;;;;;;;;;;;;;;;;;;;;
;;; Query Normalizing

(def ^DateTimeFormatter day-formatter (.withZoneUTC (:basic-date timef/formatters)))
(def ^DateTimeFormatter daytime-formatter (.withZoneUTC (DateTimeFormat/forPattern "yyyyMMddHHmm")))

(defn throw-invalid-field!
  ([f]
   (throw-invalid-field! f nil))
  ([f msg]
   (let [err (str "Invalid field '" (name f) "'")
         _   (if msg (str err "," msg) err)]
     (throw (ex-info err
              {:user-msg err :error :validation})))))

(defn throw-invalid-search!
  ([msg]
   (let [err (str "Invalid search - " msg)]
     (throw (ex-info err
              {:user-msg err :error :validation})))))

(defn throw-no-sid-glob! []
  (throw (ex-info (str "Globs not supported for field: 'sid'")
           {:user-msg (str "Globs not supported for field: 'sid'") :error :validation})))

(defn validate-columns [model cols]
  (doseq [c cols]
    (if-not (or (= c :cust-ref) (c (:fields model)))        ;todo need scope conds in model
      (throw-invalid-field! c))))

(defn resolve-columns [model fields-and-views]
  ;better way? todo
  (let [expanded-cols (reduce (fn [cols f-or-v]
                                (if-let [view-fields (f-or-v (:views model))]
                                  (concat view-fields cols)
                                  (conj cols f-or-v)))
                        []
                        fields-and-views)]
    (validate-columns model expanded-cols)
    expanded-cols))

(defn ->es-query [model cond-tree]
  "Eventually a rewriter to simplify nots, reorder with commutativity, convert cidr etc.."
  (if (not-empty cond-tree)
    (letfn [(operator-clause? [clause]
              (.contains [:or :and] (first clause)))
            (process [node]
              (if (operator-clause? node)
                (concat [(first node)] (map process (next node)))
                (let [[opr field oprd] node
                      oprd (if (string? oprd) (str/trim oprd) oprd)
                      oprd (if #_(and (not (model/case-sensitive? model field))) (string? oprd)
                             (.toLowerCase oprd) oprd)]
                  (if-not (or (= field :cust-ref) (field (:fields model))) ;todo scope into model
                    (throw-invalid-field! field))
                  (if (= opr :equals)
                    [= field oprd]
                    [opr field oprd]))))]
      (process cond-tree))
    cond-tree))

(defn ->es-query-tree [this range scopes data]
  (let [model       (:model this)
        cols        (resolve-columns model (:select data))  ;validate columns before parsing dsl
        where       (:where data)
        where       (if (coll? (first where))
                      (if (second where)                    ;convert [[cond1][cond2]] to [:and [c1 [c2
                        (into [:and] where)
                        (first where))
                      where)
        all-where   (concat [] where (if (:dsl data) (dsl/parse (str/trim (:dsl data)))))
        range       (cond
                      (string? (first range)) (mapv (fn [date-str]
                                                      (cond
                                                        (= 8 (count date-str))
                                                        (timef/parse day-formatter date-str)

                                                        (= 12 (count date-str))
                                                        (timef/parse daytime-formatter date-str)

                                                        :else
                                                        (throw-invalid-search! ":range should be an instant or yyyyMMdd(HHmmm)")))
                                                range)
                      (integer? (first range)) (mapv #(timec/from-long %) range)
                      :else (mapv timec/to-date-time range))
        scope-conds (map #(vector = (first %) (second %)) (distinct scopes))
        scope-conds (if (second scope-conds) scope-conds (first scope-conds))
        esqt        {:columns     cols
                     :from        (or (first (:from data)) :msg-summary) ;uggh todo
                     :range       range
                     :scope-conds (->es-query model scope-conds)
                     :query-conds (->es-query model (distinct all-where)) ;this doesnt dedupe boolean trees
                     }]
    (debug "ES-tree:" esqt)
    esqt))

(defn ->cql_query_tree [this query-tree]
  ;todo gross get  rid of fucking keyword fields
  (let [skip (into (set (keys query-tree))
               [:left-qm :right-qm :left-star :right-star
                :bookend-stars :bookend-qms
                :case-sensitive
                :qmark-glob :star-glob :range-glob])
        cqt  (walk/postwalk (fn [form]
                              (if (and (not (contains? skip form))
                                    (keyword? form))
                                (model/cql_ize form) form)) query-tree)]
    (debug "CQL-tree:" cqt)
    cqt))


;;;;;;;;;;;;;
;;; Seq Plans WIP

;until table scan support in cas no filter scans no seq plans..
;https://issues.apache.org/jira/browse/CASSANDRA-6377

(defn create-seq-plan [this query-tree]                     ;todo match/any support
  (let [state       (:state this)
        {:keys [columns from range scope-conds query-conds]} query-tree
        table-bins  (node/get-bins range (state/default-bin-period))
        ;until table scan support in cas no filter scans
        ;https://issues.apache.org/jira/browse/CASSANDRA-6377 so we move to node
        [pred-conds where-conds] [query-conds nil]
        where-conds (concat scope-conds where-conds)        ;dont extract scope as pred
        scan        (node/seq-scan from table-bins columns where-conds)]
    (if (empty? pred-conds)
      scan
      (node/pred-node scan range pred-conds))))

;;;;;;;;;;;;;
;;; Index Plans
(defn next-char [i]
  (char (min 65535 (inc i))))

(s/defn get-index-cf [from :- Keyword index :- Keyword]
  (keyword (str (name from) "_" (name index) "_idx")))


;uggh how to better formalize
(defn- ->idx-opr [opr]
  (case opr
    :any =
    :contains =
    opr))

(defn- ->idx-filter-opr [opr]
  (case opr
    :any =
    opr))

(defn- ->idx-or-cond [field prefix ors suffix multi-glob?]
  (concat [:or]
    (if multi-glob?                                         ;filters will refine further
      (for [o ors]
        [:and
         [>= field (str prefix o suffix)]
         [< field (str prefix o suffix \uffff)]])
      (for [o ors]
        [= field (str prefix o suffix)]))))

(defn glob->index-scan-conds [glob [opr field oprd]]
  (let [prefix (glob/text-prefix glob)
        field  (if (= field :recipients)                    ;hack todo
                 :recipient field)]
    (cond
      (= (:shortcut glob) :any) nil
      (= (:shortcut glob) :text) {:index-conds [(->idx-opr opr) field oprd]}
      (empty? prefix) nil
      :else (let [pattern     (:pattern glob)
                  glob-spec   (second (:ast glob))          ;whats after text
                  after-glob  (get (:ast glob) 2)
                  suffix      (if (= (first after-glob) :chars) (second after-glob))
                  wildcards   (remove nil? (glob/wildcards glob))
                  multi-glob? (second wildcards)            ;more than the 1 glob
                  glob-idx    (count prefix)
                  filter-opr  (->idx-filter-opr opr)]
              ; we require filters against the index result if not tokenized and more than one token
              ;   since the index won't have exact value in that case
              ;   - if a glob has a text suffix or more than one glob
              ;       e.g  ab*d devolves to >=ab and <ac and filter= ab*d,
              ; NOTE tokenized fields need pred conds not filter conds cos spaces
              (case (first glob-spec)                       ;prob should be a protocol..
                :star-glob (cond-> {:index-conds [:and [>= field prefix]
                                                  [< field (str (subs pattern 0 (dec glob-idx))
                                                             ;need to consume char else boxed
                                                             (next-char (int (.charAt pattern (dec glob-idx)))))]]}
                             after-glob
                             (assoc :filter-conds [filter-opr field glob]))
                :qmark-glob {:index-conds  [:and [> field prefix]
                                            [<= field (str prefix \uffff)]]
                             :filter-conds [filter-opr field glob]} ;always needed since index for a? return abc as well as ab
                ;user=> (:ast (glob/->glob "ab[cd]ef" false))
                ;  ([:chars ab] [:range-glob [:chars cd]] [:chars ef])
                :range-glob (let [
                                  conds {:index-conds (->idx-or-cond field prefix
                                                        (seq (second (second glob-spec))) ;ugh todo
                                                        suffix multi-glob?)}]
                              (if multi-glob? (assoc conds :filter-conds [filter-opr field glob]) conds))
                ;user=> (:ast (glob/->glob "ab{c,d}ef" false))
                ;  ([:chars ab] [:brace-glob [:chars c] [:chars d]] [:chars ef])
                :brace-glob (let [conds {:index-conds (->idx-or-cond field prefix
                                                        (map second (rest glob-spec))
                                                        suffix multi-glob?)}]
                              (if multi-glob? (assoc conds :filter-conds [filter-opr field glob]) conds))
                (assert false (str "No valid glob type " (:ast glob))))))))

(s/defn ->index-scans [target :- s/Any
                       range :- [DateTime]
                       field :- Keyword
                       scope-conds :- [Cond]
                       index-conds :- (s/either Cond MergedConds)
                       filter-conds :- s/Any]
  (if (or (seq index-conds) filter-conds)
    (let [or?       (= :or (first index-conds))
          and-conds (if (= :and (first index-conds)) (rest index-conds) [index-conds])
          field     (if (= field :recipients)               ;todo rename index
                      :recipient field)
          index-cf  (get-index-cf target field)
          idx-bins  (node/get-bins range (state/default-bin-period))]
      (if or?
        (node/or-merge-node (map (fn [ic]
                                   (node/index-scan index-cf field :cust-ref-day idx-bins range (conj scope-conds ic) filter-conds))
                              (rest index-conds)))
        (node/index-scan index-cf field :cust-ref-day idx-bins range (concat scope-conds and-conds)
          filter-conds)))))

(s/defn es-cond->node-conds :- {Keyword s/Any} [es-cond :- Cond
                                                model :- Model
                                                search-id :- s/Any]
  ;index-conds need index-filters and pred conds in concert/instead too if globbed/cased/tokenized
  ;
  ; if tokenized & (count tokens) > 1
  ;   need exact match for order and also cos we may exceed max-index-scans
  ;      => pred-cond
  ; or if cased
  ;     => pred-cond to test case
  ; also; if glob
  ;   and not covered by index-restriction (e.g not  abc, abc*, abc?)
  ;     => index-filter-cond (to test rest of glob after >=,< for first glob)
  (let [[opr field oprd] es-cond
        glob             (if (model/globbable? model field) (glob/->glob oprd))
        _                (if (and (= field :sid) (seq (remove nil? (glob/wildcards glob))))
                           (throw-no-sid-glob!))
        tokenized        (model/tokenized? model field)
        ;; todo need to space tokenized but not glob chars
        tokens           (if tokenized (take max-index-scan-tokens (re-seq #"\S+" (.toLowerCase oprd))) [oprd])
        index-scan-conds (reduce
                           (fn [conds token]
                             (if glob
                               (if-let [gics (glob->index-scan-conds (glob/->glob token) [opr field token])]
                                 (conj conds gics)
                                 conds)
                               (conj conds
                                 {:index-conds [(->idx-opr opr) field (.toLowerCase token)]})))
                           [] tokens)
        pred-cond        (if (second tokens)
                           ; If multi tokens then pred to check ordering cos indexes don't know token order
                           ; then is just a question whether to use glob  or not
                           (if (and glob (not= (:shortcut glob) :text))
                             [opr field glob]
                             [opr field oprd])
                           ; If  (* or ? with more)  e.g.  a* not a
                           ;   or (2nd wildcard )    e.g   a*b*
                           ;   or (leading [] )   e.g [ab]
                           ;  then need pred conds cos indexes don't capture completely
                           ;
                           ; Improvements:
                           ;  - (2) this might catch some globs unnecessarily like a*?* if the glob compile
                           ;  - (3) is better as (or (= index a) (= index b))
                           (if (and glob
                                 (or
                                   (and (.contains [:qmark-glob :star-glob] (first (glob/wildcards glob)))
                                     (second (:ast glob)))
                                   (second (filter true? (glob/wildcards glob)))
                                   (= (first (glob/wildcards glob)) :range-glob)))
                             [opr field glob]))
        ;if phrase need to check ordering
        node-conds       (cond-> {:field field :index-scan-conds index-scan-conds}
                           pred-cond (assoc :pred-conds pred-cond))]
    (slog/debug "NodeConds:" node-conds)
    node-conds))

(s/defn create-index-plan [this :- Planner                  ;replace with lens?
                           cql_query_tree :- s/Any
                           search-id :- s/Any]

  (let [{:keys [model]} this
        cql_indexes (map model/cql_ize (:indexes model))
        {:keys [columns from range scope-conds query-conds]} cql_query_tree
        ;normalize scope conds as list of conds
        ; shall we just punt this back to api as [cond] (possibly anded) for scope conds?
        scope-conds (if (coll? (first scope-conds)) scope-conds [scope-conds])
        tree        {:plan nil :pred-conds nil :level 0}
        tree        (letfn
                     [(sort-sibling-nodes-by-index
                        [nodes]
                        (sort-by (fn [node]
                                   (if (keyword? node)
                                     -1
                                     (.indexOf cql_indexes (second node)))) nodes))
                      (query-node->plan-node
                        [node {:keys [level] :as tree*}]
                        (if (or (= (first node) :or) (= (first node) :and))
                          (let [merge-type (first node)
                                node-plans (mapcat
                                             (fn [cond]
                                               (let [node-plans (query-node->plan-node cond (update tree* :level inc))]
                                                 (if (map? node-plans) [node-plans] node-plans)))
                                             (sort-sibling-nodes-by-index (next node)))
                                node-pcs   (doall (remove empty? (map :pred-conds node-plans)))
                                pred-conds (if (second node-pcs)
                                             (into [merge-type] node-pcs)
                                             (if (first node-pcs)
                                               (first node-pcs)))
                                plans      (remove nil? (map :plan node-plans))]
                            {:plan       (if (second plans)
                                           (if (= merge-type :or)
                                             (node/or-merge-node plans)
                                             (node/and-merge-node plans))
                                           (first plans))
                             :level      level
                             :pred-conds pred-conds})
                          (let [node-conds  (es-cond->node-conds node model search-id)
                                index-scans (map
                                              (fn [{:keys [index-conds filter-conds]}]
                                                (->index-scans from range (:field node-conds) scope-conds index-conds filter-conds))
                                              (:index-scan-conds node-conds))
                                index-pcs   (:pred-conds node-conds)]
                            (if (second index-scans)
                              {:plan       (node/and-merge-node index-scans)
                               :level      level
                               :pred-conds index-pcs}
                              {:plan       (first index-scans)
                               :level      level
                               :pred-conds index-pcs}))))]
                      (query-node->plan-node query-conds tree))
        filter-bins (node/get-bins range (state/default-bin-period))
        ; todo do we be paranoid and check all conds again cos indexes are async,
        ; cql conds not used in index scans become filters on result stream in theory
        ;;BUT https://issues.apache.org/jira/browse/CASSANDRA-6377 so we move to pred-node
        filter-plan (node/filter-node
                      (:plan tree)
                      from
                      filter-bins
                      columns
                      scope-conds)]
    (node/pred-node filter-plan range (seq (:pred-conds tree)))))

;;;;;;;;;;;;;
;;; Planning

(defn create-plans
  "Consider each from-partition
      Consider sequential (table) scan and available index scans,
          applying predicates that involve this base relation."
  ;No seq scans till ;https://issues.apache.org/jira/browse/CASSANDRA-6377
  ;Only generating 1 index plan by index order atm
  [this query-tree search-id]
  (if (empty? (:query-conds query-tree))
    [(node/empty-search-node)]
    [(create-index-plan this query-tree search-id)]))

(defn label-plans [plans]
  "Walk plan tree label"
  (map
    (fn [plan]
      (letfn [(id-node [id node]
                (let [nodes (node/get-children node)]
                  (cond-> (assoc node :id id)

                    ;;uggh todo?
                    (contains? node :children)
                    (assoc :children
                           (mapv id-node
                             (range (inc id) (+ id 1 (count nodes)))
                             nodes))

                    (and (first nodes)
                      (contains? node :child))
                    (assoc :child
                           (id-node (inc id) (first nodes))))))]
        (id-node 1 plan)))
    plans))

;;;;;;;;;;
;;; Public

(s/defn new-planner
  ([] (map->Planner {:options default-options}))
  ([opts :- {Keyword s/Any}]
   (map->Planner opts))
  ([base-planner :- Planner
    opts :- (s/maybe {Keyword s/Any})]
   (debug "Planner options" (:options opts))
   (update base-planner :options merge opts)))

(s/defn create-plan
  "Give a search we annotate it with:
      _query the, resolved query after view resolution, negation and transitivity rewrites
      _plan containing a tree of plan nodes generated by bottom up combinatorics and costed via selectivity estimates
        as the cheapest for solving the query"
  [planner :- Planner
   search :- Query]
  (let [{:keys [scopes range data search-id]} search
        ;todo protect certain opts
        search-planner (new-planner planner (:options search))
        es-query-tree  (->es-query-tree search-planner range scopes data)
        cql_query_tree (->cql_query_tree search-planner es-query-tree)]
    (let [plans         (label-plans (create-plans search-planner cql_query_tree search-id))
          cheapest-plan (first plans)]
      (slog/debug "Choosen plan is :" (pr-str cheapest-plan))
      (-> search
        (assoc :_query es-query-tree)
        (assoc :_plan cheapest-plan)))))







