(ns storm-commons.util
  (:require [net.cgrand.enlive-html :as html]
            [clj-http.client :as client]
            [clojure.string :as s]
            [cheshire.core :refer [parse-string]]
            [cheshire.factory :as factory]
            [backtype.storm.config :refer :all]
            [clojure.java.io :refer [resource]])
  (:import (backtype.storm StormSubmitter)
           (java.util.concurrent TimeoutException TimeUnit FutureTask))
  (:gen-class))

(def ^:private headers {"User-Agent" "Mozilla/5.0 (Windows NT 6.1;) Gecko/20100101 Firefox/13.0.1"})

(def dicts (atom {}))

(defn fetch-node
  ([url]
     (fetch-node url "utf-8"))
  ([url encoding]
     (-> (client/get url {:headers headers :as :stream})
         :body
         (java.io.InputStreamReader. encoding)
         html/html-resource)))

(defn fetch-url
  ([url]
     (fetch-url url "utf-8"))
  ([url encoding]
     (fetch-url url encoding {}))
  ([url encoding headers-opts]
     (-> (client/get url {:headers (merge headers headers-opts) :as encoding})
         :body)))

(defn fetch-json
  [url]
  (-> (client/get url {:headers headers :as :json})
      :body))

(defn parse-jsonp
  [string]
  (let [string (s/replace string #"[\r\n\t ]*jsonp\d*\(" "")]
    (s/replace string #"\)[\r\n\n\t ;]*$" "")))

(defn- unquote-handle
  [str]
  (-> str
      (s/replace #":\{(\d+)" ":{\"$1\"")
      (s/replace #",(\d+):" ",\"$1\":")))

(defn fetch-jsonp
  ([url encoding]
     (fetch-jsonp url encoding {}))
  ([url encoding headers]
     (binding [factory/*json-factory* (factory/make-json-factory
                                       {:allow-unquoted-field-names true
                                        :allow-single-quotes true
                                        :allow-backslash-escaping false
                                        :allow-unquoted-control-chars true})]
       (-> url
           (fetch-url encoding headers)
           parse-jsonp
           unquote-handle
           parse-string))))

(defn id-text
  [node selector]
  (s/trim (html/text (first (html/select node selector)))))

(defn class-texts
  [node selector keys]
  (map #(get-in % keys) (html/select node selector)))

(defn href-kvs
  [node selector href-keys content-keys]
  (let [m (html/select node selector)]
    (interleave
     (map #(get-in % href-keys) m)
     (mapcat #(get-in % content-keys) m))))

(defn split-pair
  [str]
  (let [delimiter (re-pattern (re-find #"[:：]+" str))
        result (mapv
                (comp s/trim
                      #(s/replace % #" " " "))
                (s/split str delimiter 2))]
    (if (= 1 (count result))
      (conj result "")
      result)))

(defn- meta-extractor
  [m attr]
  (first (filter #(= (->
                      % :attrs :name) attr) m)))

(defn- metas
  [node]
  (html/select node [:meta]))

(defn get-meta-keywords
  ([node]
     (get-meta-keywords node "keywords"))
  ([node keywords]
     (-> (meta-extractor (metas node)
                         keywords)
         :attrs
         :content)))

(defn get-meta-description
  ([node]
     (get-meta-description node "description"))
  ([node description]
     (-> (meta-extractor (metas node) description) :attrs :content)))

(defn sf
  [pattern resource]
  (second (re-find pattern resource)))

(defn get-domain
  [url]
  (sf #"(http[s]*://[\w.-]+)" url))

(defn simple-diff
  [map1 map2]
  (loop [result (hash-map)
         ks (keys map1)]
    (if (empty? ks)
      result
      (let [fkey (first ks)]
        (recur (if (= (map1 fkey)
                      (map2 fkey))
                 result
                 (assoc result fkey (map1 fkey)))
               (rest ks))))))

(defn- get-resource
  [path]
  (slurp (clojure.java.io/resource path)))

(defn get-dicts
  []
  (into {}
        (map #(clojure.string/split % #"\t")
             (clojure.string/split (get-resource "dict.txt") #"\n"))))

(defn translate
  [word]
  (if (seq @dicts)
    (get @dicts word word)
    (let [m (get-dicts)]
      (reset! dicts m)
      (get m word word))))

(defn run-local!
  [cluster storm-name topology opts]
  (.submitTopology cluster storm-name opts (topology)))

(defn submit-topology!
  [storm-name topology opts]
  (StormSubmitter/submitTopology
   storm-name
   opts
   (topology)))

(def ^{:doc "Create a map of pretty keywords to ugly TimeUnits"}
  uglify-time-unit
  (into {} (for [[enum aliases] {TimeUnit/NANOSECONDS [:ns :nanoseconds]
                                 TimeUnit/MICROSECONDS [:us :microseconds]
                                 TimeUnit/MILLISECONDS [:ms :milliseconds]
                                 TimeUnit/SECONDS [:s :sec :seconds]}
                 alias aliases]
             {alias enum})))

(defn thunk-timeout
  "Takes a function and an amount of time to wait for thse function to finish
  executing. The sandbox can do this for you. unit is any of :ns, :us, :ms,
  or :s which correspond to TimeUnit/NANOSECONDS, MICROSECONDS, MILLISECONDS,
  and SECONDS respectively."
  ([thunk ms]
     (thunk-timeout thunk ms :ms nil)) ; Default to milliseconds, because that's pretty common.
  ([thunk time unit]
     (thunk-timeout thunk time unit nil))
  ([thunk time unit tg]
     (let [task (FutureTask. thunk)
           thr (if tg (Thread. tg task) (Thread. task))]
       (try
         (.start thr)
         (.get task time (or (uglify-time-unit unit) unit))
         (catch TimeoutException e
           (.cancel task true)
           (.stop thr)
           (throw (TimeoutException. "Execution timed out.")))
         (catch Exception e
           (.cancel task true)
           (.stop thr)
           (throw e))
         (finally (when tg (.stop tg)))))))

(defmacro with-timeout [time & body]
  `(thunk-timeout (fn [] ~@body) ~time))
