(ns diglett
  (:refer-clojure :exclude [def])
  (:require [clojure.spec :as s])
  (:import [org.jsoup Jsoup]
           [org.jsoup.nodes Element Node]
           [org.jsoup.select Elements]))

(declare dig*)

(defonce ^:private extractions-ref (atom {}))

(defprotocol Extractable
  (text [x])
  (->node [x]))

(extend-protocol Extractable
  nil
  (text [_] nil)
  (->node [_] nil)

  java.lang.String
  (text [s] s)

  Node
  (text [node] (.text node))
  (->node [node] node)

  Elements
  (->node [nodes] (first nodes)))

(defn- parse [^String html]
  (.. (Jsoup/parseBodyFragment html) (children)))

(defn- select
  [^Node node ^String css-selector]
  (if css-selector
    (let [^Elements result (.select node css-selector)]
      (if (.isEmpty result)
        nil
        result))
    node))

(defn vectorize [x]
  (if (sequential? x)
    (vec x)
    [x]))

(defn sym->fn [sym]
  (case sym
    'string? text
    nil))

(defn attr [kw]
  (fn [node]
    (and node (let [s (.get (.attributes node) (name kw))]
                (when (seq s) s)))))

(s/def ::sel (s/and (s/or :sel (s/and string?
                                      (s/conformer (fn [s]
                                                     (fn [node]
                                                       (select node s)))))
                          :sel (s/and #{:self}
                                      (s/conformer (fn [_] identity))))
                    (s/conformer last)))

(s/def ::extr (s/and (s/or :none nil?
                           :extr (s/and ::sel
                                        (s/conformer (partial hash-map :sel)))
                           :extr (s/and (s/cat :sel ::sel
                                               :fns (s/* fn?))))
                     (s/conformer (fn [[k extr]]
                                    (if (= k :none)
                                      {}
                                      (last (vectorize extr)))))))

(s/def ::spec (s/and (s/or :spec (s/and s/spec?
                                        (s/conformer s/describe))
                           :spec symbol?
                           :spec nil?)
                     (s/conformer #(some-> % last))))

(s/def ::digging
  (s/and (s/cat :extr ::extr
                :spec ::spec)
         (s/conformer (fn [{:keys [extr spec]}]
                        (if (and (empty? extr)
                                 (not= (first (vectorize spec)) 'keys))
                          ::s/invalid
                          (let [{:keys [sel fns]} extr
                                fns (cond-> (or fns [])
                                      (sym->fn spec) (conj (sym->fn spec)))]
                            {:extr {:sel sel :fns (or (seq fns) [text])}
                             :spec spec}))))))

(defn def [x extr]
  (swap! extractions-ref assoc x extr))

(defn pull [node fns]
  ((apply comp (reverse fns)) node))

(defmulti dig (fn [_ digging]
                (some-> digging
                        :spec
                        vectorize
                        first)))

(defmethod dig 'coll-checker [node {{:keys [sel fns]} :extr spec :spec}]
  (let [nodes (sel node)
        spec (last spec)]
    (if (keyword? spec)
      (map #(dig* spec %) nodes)
      (map #(dig % (s/conform ::digging [:self spec])) nodes))))

(defmethod dig 'keys [node {{:keys [sel fns]} :extr spec :spec}]
  (let [{:keys [req opt]} (apply hash-map (rest spec))]
    (merge (->> opt
                (map (fn [k]
                       [k (dig* k node)]))
                (filter last)
                (into {}))
           (zipmap req (map #(dig* % node) req)))))

(defmethod dig :default [node {{:keys [sel fns]} :extr spec :spec}]
  (pull (->node (sel node)) fns))

(defn dig* [k node]
  (dig node
       (s/conform ::digging [(@extractions-ref k)
                             ((s/registry) k)])))

(defn extract [k html]
  (dig* k (parse html)))
