(ns backend-shared.models.payload.to-resource.tags
  (:require [cljs.nodejs :as node]))

(def to-text (node/require "html-to-text"))

(defn html->text [content]
  (.fromString to-text content (clj->js {:word-wrap false})))

(def nlp (node/require "compromise"))

(defn filter-tags [all-tags]
  (->> all-tags
       (filter #(< (count %) 20))
       frequencies
       (sort-by second)
       reverse
       (take 5)
       (map first)
       (into #{})))

(defn collect-tags [content]
  (if (< (count content) 200)
    #{}
    (-> content
        clj->js
        nlp
        .nouns
        (.out "array"))))

(defn text->tags [content]
  (-> content
      collect-tags
      filter-tags))

(defn html->tags [html]
  (-> html
      html->text
      text->tags))
