(ns backend-shared.converters.mercury
  (:require [cljs.nodejs :as node]
            [clojure.string :as str]))

(def to-text (node/require "html-to-text"))
(def URL (node/require "url"))
(def nlp (node/require "compromise"))

(def hostname->type
  {"www.youtube.com" :video})

(def hostname->provider
  {"www.youtube.com" :youtube})

(defn remove-nil-fields [record]
  (into {} (remove (comp nil? second) record)))

(defn html->text [content]
  (.fromString to-text content (clj->js {:word-wrap false})))

(defn filter-tags [all-tags]
  (->> all-tags
       (filter #(< (count %) 20))
       frequencies
       (sort-by second)
       reverse
       (take 5)
       (map first)
       (into #{})))

(defn collect-tags [content]
  (if (< (count content) 200)
    #{}
    (-> content
        clj->js
        nlp
        .nouns
        (.out "array"))))

(defn text->tags [content]
  (-> content
      collect-tags
      filter-tags))

(defn html->tags [html]
  (-> html
      html->text
      text->tags))


(defn parse-url [url]
  (let [parsed-url (.parse URL url)
        hostname (.-hostname parsed-url)]
    {:hostname hostname
     :provider (hostname->provider hostname)
     :query (.-query parsed-url)
     :resource-type (or (get hostname->type hostname) :html)}))

(defn url->video-content [url]
  (let [parsed-url (parse-url url)
        id (second (str/split (:query parsed-url) "="))]
    {:provider (:provider parsed-url)
     :id id}))

(defn determine-type [url]
  (:resource-type (parse-url url)))

(defmulti to-resource (fn [{:keys [url]}] (determine-type url)))


(defmethod to-resource :video [{:keys [url title excerpt content]}]
  (let [record {:resource-url  url
                :title         title
                :resource-type (determine-type url)
                :content       (url->video-content url)
                :description   excerpt
                :tags          (html->tags content)}]
    (remove-nil-fields record)))

(defmethod to-resource :html [{:keys [url title excerpt content]}]
  (let [record {:resource-url  url
                :title         title
                :resource-type (determine-type url)
                :content       content
                :description   excerpt
                :tags          (html->tags content)}]
    (remove-nil-fields record)))

(defn ->resource [raw-resource]
  (-> raw-resource
      to-resource
      remove-nil-fields))
