(ns nl.jomco.spider
  (:require [babashka.http-client :as http]
            [clojure.data.json :as json]
            [clojure.set :as set]
            [clojure.string :as string]
            [ring.middleware.params :as params])
  (:import java.net.URL))

(defn placeholder?
  [x]
  (and (simple-symbol? x)
       (string/starts-with? (name x) "?")))

(defn entries
  "Return key => value pairs for each entry in coll. For vectors,
  returns index => value pairs."
  [coll]
  (cond
    (map? coll)
    (seq coll)

    (vector? coll)
    (map-indexed vector coll)))

(defn select
  "Select `query` from `data`.  A query is a list of terms navigating
  into the data.  Query terms which are symbols starting with a \\?
  character are considered to be placeholders.

  Returns a `clojure.set` compatible relation result:

  - Empty set when no matches are found
  - #{{}} (a set containing an empty map) for an exact match
  - A set of maps with bindings for every match with placeholders.

  Bindings are unified; if a placeholder appears multiple times, the
  value for that placeholder must be the same for every appearance in
  a match.

  Examples:

    (select {:name \"fred\",
             :friends [{:name \"barney\"}]}
            '[:name ?name])
    ;; => #{{?name \"fred\"}}

    (select {:name \"fred\",
             :friends [{:name \"barney\"
                        :name \"dino\"}]}
            '[:friends ?i :name ?name])
    ;; => #{{?i 0, ?name \"barney\"}
            {?i 1, ?name \"dino\"}}"
  [data query]
  (let [[term & sub-query] query]
    (cond
      ;; match placeholder
      (placeholder? term)
      (if sub-query
        ;; if query continues, we expect data to be a collection,
        ;; where placeholder matches all of the keys / indexes in
        ;; collection for which the sub-query matches the
        ;; corresponding value.
        (into #{}
              (mapcat (fn [[k descendant]]
                        (set/join #{{term k}} (select descendant sub-query)))
                      (entries data)))
        ;; no sub-query; placeholder matches data
        #{{term data}})

      ;; literal match for rest of data
      (nil? sub-query)
      (if (= term data)
        #{{}}
        #{})

      ;; term is key into rest of data, run sub-query against
      ;; descendant.
      :else
      (if-let [[_ descendant] (and (coll? data) (find data term))]
        (select descendant sub-query)
        #{}))))

(defn multi-select
  "Run rules (a collection of queries for `select` against `data` and
  join the results.

  Like select, returns a (possibly empty, for no matches) relation of
  bindings"
  [data rules]
  (->> rules
       (map #(select data %))
       (reduce set/join #{{}})))

(defn evaluate
  "Evaluate parsed `expr` with given `env`.

  Apart from the values and functions given in `env`, special
  forms `(if PRED TRUE-FN FALSE-FN)`, `(and PRED-1 PRED-2 ..)`
  and `(or PRED-1 PRED-2 ..)` are also interpreted.  Both `and` and
  `or` return a boolean.

  Examples:

    (evaluate '(+ 1 2) {'+ +})
    ;; => 3
    (evaluate '(+ 1 2 x) {'+ +, 'x 3})
    ;; => 6
    (evaluate '(if (or (< 0 x 5) (< 10 x 15)) \"yes\" \"no\") {'< <, 'x 3})
    ;; => \"yes\""
  [expr env]
  (cond
    (or (boolean? expr) (number? expr) (string? expr))
    expr

    (symbol? expr)
    (let [v (get env expr ::lookup-failed)]
      (when (= ::lookup-failed v)
        (throw (ex-info "lookup failed" {:expr expr})))
      v)

    (list? expr)
    (let [[oper & args] expr]
      (condp = oper
        'if
        (let [[test effect alternative] args]
          (if (evaluate test env)
            (evaluate effect env)
            (evaluate alternative env)))

        'and
        (every? identity (map #(evaluate % env) args))

        'or
        (boolean (some #(evaluate % env) args))

        ;; else
        (apply (evaluate oper env)
               (map #(evaluate % env) args))))

    :else
    (throw (ex-info "unexpected expression" {:expr expr}))))

(defn apply-templ
  "Apply template expressions in `val` string with given `env`.
  Expression are place within `{` and `}` characters, parsed using
  `read-string` and evaluated using `evaluate` and `env`.

  Example:

    (apply-templ \"1 + {x} = {(+ 1 x)}\" {'+ +, 'x 2})
    ;; -> \"1 + 2 = 3\""
  [val env]
  (if (string? val)
    (string/replace val
                    #"\{([^}]+)\}"
                    (fn [[_ expr]]
                      (str (evaluate (read-string expr) env))))
    val))

(defn- apply-templs
  "Apply `templs` with merge of `env` and each in `matches`."
  [templs env matches]
  (mapcat (fn [match]
            (let [env (merge env match)]
              (set
               (for [templ templs]
                 (update-vals templ #(apply-templ % env))))))
          matches))

(defn generate
  "Generate values from `templs` using `matches` (a result from
  `multi-select`) and `env` (for `evaluate`)."
  [templs matches env]
  (set (apply-templs templs env matches)))

(defn- assoc-parsed-body
  "Assoc parsed body when its content type is JSON, otherwise leave
  untouched."
  [{:keys [headers body] :as res}]
  (let [ctype (get headers "content-type")]
    (cond
      (re-matches #"application/json(;.*)?" ctype)
      (assoc res :body (json/read-str body))

      :else
      res)))

(defn- ringify-response
  [response]
  (-> response
      (assoc :headers
             (reduce-kv #(assoc %1 (string/lower-case (name %2)) %3)
                        {}
                        (:headers response)))
      (dissoc :request)))

(defn- assoc-ringified-from-url
  "Assoc `:scheme`, `:server-name`, `:server-port`, `:uri` and
  `:query-string` from `url` unto `req`.  For convenience `:path` is
  also added."
  [req url]
  (let [url   (URL. url)
        proto (.getProtocol url)
        host  (.getHost url)
        port  (.getPort url)
        path  (.getPath url)
        query (.getQuery url)]
    (-> req
        (assoc :scheme (keyword proto))
        (assoc :server-name host)
        (assoc :server-port (if (= port -1)
                              ({"http" 80, "https" 443} proto)
                              port))
        (assoc :path path) ;; not ring but for convenience
        (assoc :uri path)
        (assoc :query-string query))))

(defn- ringify-request
  "Fix up request to match ring spec for requests."
  [{:keys [method url] :as req}]
  (cond-> req
    url (assoc-ringified-from-url url)

    ;; note that :method is preserved because it is user input
    method (assoc :request-method
                  (cond
                    (keyword? method) method
                    (string? method)  (keyword (string/lower-case method))))

    :and (params/assoc-query-params "UTF-8")))

(def client
  (delay (http/client (assoc http/default-client-opts
                             :follow-redirects :never))))

(defn interact
  "Make given HTTP `request` and return an interaction of format:

    {:request {:method :get, :headers { .. }, :body { .. }}
     :response {:status 200, :headers { .. }, :body { .. }}"
  [request]
  {:request  (ringify-request request)
   :response (-> request
                 (assoc :client @client
                        :throw false
                        :uri (:url request))
                 (http/request)
                 (ringify-response)
                 (assoc-parsed-body))})

(defn make-url
  "Navigate on `base-url` with `path`."
  [base-url path]
  (.toString (URL. (URL. base-url) path)))

(defn harvest
  "Returns a set of requests generated from the given
  `interaction` (created by `interact`) and `rules` using `select` and
  `generate`."
  [interaction rules base-url env]
  (->> rules
       (mapcat (fn [rule]
                 (let [matches (multi-select interaction (:match rule))]
                   (when-not (empty? matches)
                     (generate (:generates rule) matches env)))))
       (map (fn [{:keys [path uri] :as req}]
              (-> req
                  (dissoc :path :uri)
                  (assoc :url (make-url base-url (or path uri))))))
       set))

(def default-env {'inc inc, 'dec dec, '+ +, '- -, '= =, 'not not})

(defn step
  "Take a spider step with `pool` being a set of requests to consider
  one of, `seen` the requests already probed, return a map containing
  new versions of `pool` and `seen`, and the interaction just taken or
  `nil` when no more requests left to consider.

  Use option `interact` to provide an alternative to the `interact`
  function, and option `env` to provide an alternative environment for
  `evaluate` (see also `default-env`).  If `follow?` option is
  provided, only consider generated requests for which `(follow?
  request)` is true."
  [{:keys [pool seen]} rules
   & {:keys [interact env follow?]
      :or   {interact interact
             follow?  any?
             env      default-env}}]
  (when-let [{:keys [url] :as req} (first (set/difference pool seen))]
    (let [interaction (interact req)
          yield       (filter follow? (harvest interaction rules url env))]
      {:interaction interaction
       :seen        (conj seen req)
       :pool        (into pool yield)})))
