(ns diglett
  (:import [clojure.lang PersistentVector PersistentArrayMap]
           [java.lang String]
           [org.jsoup Jsoup]
           [org.jsoup.nodes Element Node]
           [org.jsoup.select Elements]
           [us.codecraft.xsoup Xsoup]
           [us.codecraft.xsoup.xevaluator DefaultXPathEvaluator]
           ))

(defn dig [selector extractions]
  {::selector selector
   ::extractions extractions})

(defn run [extractions & fs]
  {::function (apply comp fs)
   ::extractions extractions})

(defn xpath [selector]
  (Xsoup/compile selector))

(defn attr [kw]
  #(and % (let [s (.get (.attributes %) (name kw))]
            (when (seq s) s))))

(defn parse [^String html]
  (.. (Jsoup/parseBodyFragment html) (children)))

(defn select
  [^Node node ^String css-selector]
  (let [^Elements result (.select node css-selector)]
    (if (.isEmpty result)
      nil
      result)))

(defprotocol Extractable
  (text [x]))

(extend-protocol Extractable
  nil
  (text [_]
    nil)
  Element
  (text [element]
    (.text element)))

(defmulti extract (fn [[_ extractions]]
                    [(type extractions)
                     (when (sequential? extractions)
                       (type (first extractions)))]))

(defmethod extract [DefaultXPathEvaluator nil]
  [[node extractions f]]
  ((or f #(.get %)) (.evaluate extractions (first node))))

(defmethod extract [PersistentArrayMap nil]
  [[node extractions]]
  (condp some [extractions]
    ::selector
    (extract [(select node (::selector extractions))
              (::extractions extractions)])
    ::function
    (extract [node
              (::extractions extractions)
              (::function extractions)])
    (into {}
          (map (fn [[k v]]
                 [k (extract [node v])]))
          extractions)))

(defmethod extract [String nil]
  [[node extractions f]]
  ((or f text) (first (select node extractions))))

(defmethod extract [PersistentVector String]
  [[node [extractions] f]]
  (map (or f text) (select node extractions)))

(defmethod extract [PersistentVector PersistentArrayMap]
  [[node [extractions]]]
  (condp some [extractions]
    ::selector
    (extract [(select node (::selector extractions))
              [(::extractions extractions)]])
    ::function
    (extract [node
              [(::extractions extractions)]
              (::function extractions)])
    (into [] (map #(extract [% extractions]) node))))

(defn scrape [html extractions]
  (extract [(parse html) extractions]))
