(ns nl.jomco.spider
  (:require [clojure.set :as set]
            [clojure.string :as string]))

(defn placeholder?
  [x]
  (and (simple-symbol? x)
       (string/starts-with? (name x) "?")))

(defn entries
  "Return key => value pairs for each entry in coll. For vectors,
  returns index => value pairs."
  [coll]
  (cond
    (map? coll)
    (seq coll)

    (vector? coll)
    (map-indexed vector coll)))

(defn select
  "Select `query` from `data`.  A query is a list of terms navigating
  into the data.  Query terms which are symbols starting with a \\?
  character are considered to be placeholders.

  Returns a `clojure.set` compatible relation result:

  - Empty set when no matches are found
  - #{{}} (a set containing an empty map) for an exact match
  - A set of maps with bindings for every match with placeholders.

  Bindings are unified; if a placeholder appears multiple times, the
  value for that placeholder must be the same for every appearance in
  a match.

  Examples:

    (select {:name \"fred\",
             :friends [{:name \"barney\"}]}
            '[:name ?name])
    ;; => #{{?name \"fred\"}}

    (select {:name \"fred\",
             :friends [{:name \"barney\"
                        :name \"dino\"}]}
            '[:friends ?i :name ?name])
    ;; => #{{?i 0, ?name \"barney\"}
            {?i 1, ?name \"dino\"}}"
  [data query]
  (let [[term & sub-query] query]
    (cond
      ;; match placeholder
      (placeholder? term)
      (if sub-query
        ;; if query continues, we expect data to be a collection,
        ;; where placeholder matches all of the keys / indexes in
        ;; collection for which the sub-query matches the
        ;; corresponding value.
        (into #{}
              (mapcat (fn [[k descendant]]
                        (set/join #{{term k}} (select descendant sub-query)))
                      (entries data)))
        ;; no sub-query; placeholder matches data
        #{{term data}})

      ;; literal match for rest of data
      (nil? sub-query)
      (if (= term data)
        #{{}}
        #{})

      ;; term is key into rest of data, run sub-query against
      ;; descendant.
      :else
      (if-let [[_ descendant] (and (coll? data) (find data term))]
        (select descendant sub-query)
        #{}))))

(defn multi-select
  "Run rules (a collection of queries for `select` against `data` and
  join the results.

  Like select, returns a (possibly empty, for no matches) relation of
  bindings"
  [data rules]
  (->> rules
       (map #(select data %))
       (reduce set/join #{{}})))

(defn evaluate
  "Evaluate parsed `expr` with given `env`.

  Apart from the values and functions given in `env`, special
  forms `(if PRED TRUE-FN FALSE-FN)`, `(and PRED-1 PRED-2 ..)`
  and `(or PRED-1 PRED-2 ..)` are also interpreted.  Both `and` and
  `or` return a boolean.

  Examples:

    (evaluate '(+ 1 2) {'+ +})
    ;; => 3
    (evaluate '(+ 1 2 x) {'+ +, 'x 3})
    ;; => 6
    (evaluate '(if (or (< 0 x 5) (< 10 x 15)) \"yes\" \"no\") {'< <, 'x 3})
    ;; => \"yes\""
  [expr env]
  (cond
    (or (boolean? expr) (number? expr) (string? expr))
    expr

    (symbol? expr)
    (let [v (get env expr ::lookup-failed)]
      (when (= ::lookup-failed v)
        (throw (ex-info "lookup failed" {:expr expr})))
      v)

    (list? expr)
    (let [[oper & args] expr]
      (condp = oper
        'if
        (let [[test effect alternative] args]
          (if (evaluate test env)
            (evaluate effect env)
            (evaluate alternative env)))

        'and
        (every? identity (map #(evaluate % env) args))

        'or
        (boolean (some #(evaluate % env) args))

        ;; else
        (apply (evaluate oper env)
               (map #(evaluate % env) args))))

    :else
    (throw (ex-info "unexpected expression" {:expr expr}))))

(defn apply-templ
  "Apply template expressions in `val` with given `env`.

  Templates are datastructures that can contain:

   - maps and vectors; values are evaluated (not keys)
   - lists; evaluated as expressions
   - symbols starting with `?` evaluated as expressions
   - strings; evaluated for expressions within `{` and `}` characters.

  Example:

    (apply-templ \"1 + {x} = {(+ 1 x)}\" {'+ +, 'x 2})
    ;; -> \"1 + 2 = 3\""
  [val env]
  (cond
    (string? val)
    (string/replace val
                    #"\{([^}]+)\}"
                    (fn [[_ expr]]
                      (str (evaluate (read-string expr) env))))
    (map? val)
    (update-vals val #(apply-templ % env))

    (vector? val)
    (into (empty val) (map #(apply-templ % env)) val)

    (or (list? val)
        (and (symbol? val)
             (= \? (first (name val)))))
    (evaluate val env)

    :else
    val))

(defn- apply-templs
  "Apply `templs` with merge of `env` and each in `matches`."
  [templs env matches]
  (mapcat (fn [match]
            (let [env (merge env match)]
              (set
               (for [templ templs]
                 (apply-templ templ env)))))
          matches))

(defn generate
  "Generate values from `templs` using `matches` (a result from
  `multi-select`) and `env` (for `evaluate`)."
  [templs matches env]
  (set (apply-templs templs env matches)))

(defn harvest
  "Returns a set of requests generated from the given `interaction`.

  An interaction (map containing :request and :response) is matched
  against rules and the resulting selections are returned.

  `rules` and `env` arguments apply as in `select` and `generate`."
  [interaction rules env]
  (->> rules
       (mapcat (fn [rule]
                 (let [matches (multi-select interaction (:match rule))]
                   (when-not (empty? matches)
                     (generate (:generates rule) matches env)))))
       set))

(def default-env {'inc inc, 'dec dec, '+ +, '- -, '= =, 'not not})

(defn interactions
  "Returns lazy seq of interactions from  `seed-requests`.

  `exec-request` is called once for every request and should return
  either:

    - a response, resulting in an interaction
    - :nl.jomco.spider/skip if the request should be skipped
    - nil to stop spidering

  interactions are maps containing a :request and :response. Skipped
  requests do not result in an interaction.

  `seed-requests` is a collection of request maps.

  `generate-requests` takes an interaction and returns a collection of
  requests to be executed.

  `seen-requests` is an inital set of requests to ignore when
  generated by `generate-requests`."
  ([exec-request generate-requests seed-requests]
   (interactions exec-request generate-requests seed-requests #{}))
  ([exec-request generate-requests seed-requests seen-requests]
   ;; if exec-request returns ::skip, try the next seed
   (loop [seeds seed-requests]
     (when (seq seeds)
       (let [[request & rest-seeds] seeds
             response               (exec-request request)]
         (cond
           (= ::skip response)
           (recur rest-seeds)

           (nil? response)
           nil

           :else
           (let [interaction {:request  request
                              :response response}
                 yield       (generate-requests interaction)
                 seen        (conj seen-requests request)]
             (lazy-seq (cons interaction
                             (interactions exec-request
                                           generate-requests
                                           (into rest-seeds (set/difference (set yield) seen))
                                           seen))))))))))

(defn spider
  "Return a lazy seq of interactions by executing `requests`.

  Interactions are maps of {:request .. :response ...}

  Response is the result of `(exec-request request)` After a request is
  executed, the resulting interaction is used to generate new requests
  given `rules`.

  Options:
   - `env` - the environment for evaluation rules. Default is `default-env`

  See also [[harvest]] and [[interactions]]."
  [{:keys [env exec-request rules seeds]
    :or   {env default-env}}]
  (interactions exec-request #(harvest % rules env) seeds))
