(ns smx.eventstore.search.glob
  (:require [instaparse.core :as insta]
            [instaparse.failure :as insta-fail]
            [instaparse.transform :as insta-trans]
            [clojure.java.io :as io]
            [clojure.tools.logging :as log]
            [schema.core :as s]
            [clojure.string :as str])
  (:import [java.util.regex Pattern]))

(def parser (insta/parser (slurp (io/resource "glob.ebnf"))))
(def ^:const max-glob-length 30)
(def ^:const shortcuts-on true)
(def ^:const non-word #"\W")

(s/defn validate [pattern :- Boolean]
  (let [r (insta/parse parser pattern)
        f (insta/get-failure r)]
    (or f true)))

(def ParseTree [])                                          ;todo fill in
(def Shortcut (s/enum :text :left-star :left-qm :left-brace :right-star :right-qm :right-brace
                :bookend-stars :bookend-qms :any))

(s/defrecord Glob [pattern :- String
                   ast :- s/Any
                   case-sensitive :- Boolean
                   shortcut :- (s/maybe Shortcut)
                   shortcut-re :- (s/maybe Pattern)
                   globbed :- Boolean])

(defn parse [pattern case-sensitive]
  (let [parse-result
        (parser (if-not case-sensitive
                  (.toLowerCase pattern)
                  pattern))]
    (if (insta/failure? parse-result)
      (let [failure (with-out-str (insta-fail/pprint-failure parse-result))]
        (throw (ex-info "Bad glob pattern."
                 {:user-msg (str "Bad glob pattern. " failure)
                  :error    :validation
                  :detail   {:glob-parse-error failure}})))
      parse-result)))

(defn- re-esc [s]
  (str "\\Q" s "\\E"))

(defn- glob-re-choices [choices]
  (str
    (reduce
      (fn [chs ch]
        (str chs "|" (re-esc ch)))
      (str "(" (re-esc (first choices)))
      (rest choices))
    ")"))

(def ch->str-transformer
  {:chars (fn [& chs] [:chars (apply str chs)])})

(defn ->ast [pattern case-sensitive]
  (if (= pattern "")
    [[:chars ""]]
    (vec (insta-trans/transform
           ch->str-transformer
           (rest (parse pattern case-sensitive))))))

(defn- get-val [node ntype]
  (case ntype
    :chars (second node)
    :brace-glob (map second (rest node))
    :range-glob (second (second node))
    nil))

(defn- re-bound [& strs]
  (re-pattern (str "\\b" (apply str strs) "\\b")))

(defn- ->shortcut [type-peek val-peek]
  (case type-peek
    [:chars]
    [:text (if (= (first val-peek) "")
             (re-pattern "")
             (re-bound (re-esc (first val-peek))))]

    [:star-glob :chars]
    [:left-star (re-bound (str ".*" (re-esc (second val-peek))))]

    [:chars :star-glob]
    [:right-star (re-bound (str (re-esc (first val-peek)) ".*"))]

    [:qmark-glob :chars]
    [:left-qm (re-bound (str "." (re-esc (second val-peek))))]

    [:chars :qmark-glob]
    [:right-qm (re-bound (str (re-esc (first val-peek)) "."))]

    [:brace-glob]
    [:left-brace (re-bound (str (glob-re-choices (first val-peek))))]

    [:brace-glob :chars]
    [:left-brace (re-bound (str (glob-re-choices (first val-peek)) (re-esc (second val-peek))))]

    [:chars :brace-glob]
    [:right-brace (re-bound (str (re-esc (first val-peek)) (glob-re-choices (second val-peek))))]

    [:star-glob :chars :star-glob]
    [:bookend-stars (re-bound (str ".*" (re-esc (second val-peek)) ".*"))]

    [:qmark-glob :chars :qmark-glob]
    [:bookend-qms (re-bound (str "." (re-esc (second val-peek)) "."))]

    [:star-glob]
    [:any]
    nil))

(s/defn wildcards
  "Walks match tree, returning vec of glob type or nil
    such that wildcards[tree-position] = glob-type|nil
    e.g. \"abc*de?f\" => (nil, :star-glob, nil, qm-glob, nil)"
  [glob :- Glob]
  (map #(if-not (= :chars (first %)) (first %)) (:ast glob)))

(s/defn ->glob                                              ;should probably be defrecordss
  ([pattern :- String]
   (->glob pattern false))
  ([pattern :- String
    case-sensitive :- Boolean]
   ;todo this is cacheable
   (let [ast  (->ast pattern case-sensitive)
         [shortcut-type shortcut-re] (let [type-peek (take 4 (map first ast)) ;take extra so we know its exact count if match
                                           val-peek  (map get-val ast type-peek)]
                                       (->shortcut type-peek val-peek))
         glob (map->Glob
                {:shortcut       shortcut-type
                 :shortcut-re    shortcut-re
                 :globbed        (not= shortcut-type :text)
                 :pattern        pattern
                 :case-sensitive case-sensitive
                 :ast            ast})]
     (if (and (seq (remove nil? (wildcards glob))) (< max-glob-length (count pattern)))
       (throw (ex-info (format "Too long glob pattern (max. %s chars)" max-glob-length)
                {:user-msg (format "Too long glob pattern (max. %s chars)" max-glob-length)
                 :error    :validation
                 :detail   {:glob-too-long-error pattern}}))
       glob))))

(s/defn text-prefix [glob :- Glob]
  (let [[type match] (first (:ast glob))]
    (if (= type :chars)
      match)))

(defn- check [glob text matches?]                           ;no schema on critical path
  (log/tracef "Checking glob: '%s' against '%s'" (:pattern glob) text)
  (let [text (if (:case-sensitive glob) text (.toLowerCase text))]
    (case (:shortcut glob)                                  ;todo busted without lucene
      :any true
      (if (and shortcuts-on (:shortcut-re glob))
        (some? ((if matches? re-matches re-find) (:shortcut-re glob) text))
        (letfn [(next-check                                 ;todo faster & no stack overflow as loop recur but too many branches?trampoline?
                  [target tree*]
                  (let [thing (first tree*)
                        type  (first thing)
                        match (get-val thing type)]
                    (log/trace "|type:" type "|match:" match "|target:" target "|tree:" tree*)
                    (if-not type
                      (= target "")
                      (case type                            ;ditch for regexs? todo at least compare pathologicals
                        :chars (and (.startsWith target match)
                                 (next-check (subs target (count match)) (next tree*)))
                        :star-glob (if (= target "")
                                     (next-check "" (next tree*))
                                     (if (re-matches non-word (subs target 0 1))
                                       (next-check target (next tree*))
                                       (or (next-check (subs target 1) (next tree*))
                                         (next-check (subs target 1) tree*)
                                         (next-check target (next tree*)))))
                        :qmark-glob (if (next tree*)
                                      (next-check (if (= target "") "" (subs target 1))
                                        (next tree*))
                                      (= (.length target) 1))
                        :range-glob (if (and (not= target "") (.contains match (subs target 0 1)))
                                      (if (next tree*)
                                        (next-check (if (= target "") "" (subs target 1))
                                          (next tree*))
                                        true)
                                      false)
                        :brace-glob (if (some true? (map #(.startsWith target %) match))
                                      (if (next tree*)
                                        (next-check (if (= target "") "" (subs target 1))
                                          (next tree*))
                                        true)
                                      false)
                        (assert false (str "Bad glob parse unknown type " type " for glob " glob))))))]
          (next-check text (:ast glob)))))))                ;todo lucene standardize tokenizing and return matched token

(defn matches? [glob text]
  (check glob text true))

(defn includes? [glob text]
  (let [glob-words (re-seq #"\S+" (:pattern glob))          ;lucene todo
        tokens     (partition (max 1 (count glob-words)) 1 (re-seq #"\S+" text))]
    ;todo is the " " dodgy?
    (some #(check glob (str/join " " %) false) tokens)))



;defn containing  => return token matched?




