(ns grapeshot.feed
  "Read the Grapeshot feed"
  (:require [kixipipe.protocols         :as kixi]
            [kixipipe.misc              :as misc]
            [kixipipe.digest            :as digest]
            [kixipipe.transport.ftp     :as ftp]
            [clojure.java.io            :as io]
            [clojure.set                :as set]
            [clj-time.core              :as t]
            [clj-time.format            :as tf]
            [clojure.string             :as str]
            [com.stuartsierra.component :as component]
            [schema.core                :as s]
            [clojure.tools.logging      :as log]
            [potemkin]))

(potemkin/import-macro kixipipe.transport.ftp/with-session)

(def ^:private SRC_NAME "grapeshot")

; maps "our name" -> "data vendor name"
(def ^:private FEED_NAME_OVERRIDES {"grapeshot" "anymedia"})

(def ^:private REV_FEED_NAME_OVERRIDES (set/map-invert FEED_NAME_OVERRIDES))

(def ^:private grapeshot-date-formatter (tf/formatter "yyyyMMdd"))

(def KNOWN_FEEDS  #{"grapeshot"})

(defn- name-override-remote->local [item]
    (let [{:keys [feed-name]} item
        new-name (get REV_FEED_NAME_OVERRIDES feed-name feed-name)]
    (assoc item :feed-name new-name)))

(defn- name-override-local->remote [item]
    (let [{:keys [feed-name]} item
        new-name (get REV_FEED_NAME_OVERRIDES feed-name feed-name)]
    (assoc item :feed-name new-name)))

(defn remove-db-sensitive-symbols
  "Removes database sensitive ${}"
  [row]
  (map #(str/replace % #"\$\{(\w+)\}" (fn [[_ word]] "")) row))

(defn discard-bad-ids
  "Removes invalid characters from integer columns."
  [row]
  (let [f (fn [r n]
           (if-let [id (re-find #"\d+" (nth r n))]
             (assoc r n id)
             (assoc r n 0)))]
       (-> row
           (f 0)
           (f 1))))

(defn append-columns
  "Appends empty columns to uneven rows in csv."
  [row]
  (let [missing-cols (- 8 (count row))]
    (if (> missing-cols 0)
      (into [] (concat row (repeat missing-cols "")))
      row)))

(defn truncate-urls
  "Truncates urls longer than 512 chars."
  [row]
  (let [url-length (count (nth row 3))]
    (if (> url-length 512)
      (update-in row [3] #(subs % 0 450))
      row)))

(declare download-grapeshot-item!)
(defrecord GrapeshotFeedItem [src-name feed-name dir filename checksum metadata]
  kixi/FeedItem
  (download! [this session]
    (download-grapeshot-item! session this))
  io/Coercions
  (as-file [item] (io/file (:dir item) (:filename item)))
  (as-url [item] (.toURI (io/as-file item))))

(declare grapeshot-feed-details)
(defrecord GrapeshotSession [url download-dir]
  component/Lifecycle
  (start [this]
    (println "Starting Grapeshot Session")
    (assoc this :ftp-client (atom nil) :url url))
  (stop [this]
    (println "Stopping Grapeshot Session")
    (if-let [ftp-client @(:ftp-client this)]
      (.disconnect ftp-client))
    (dissoc this :ftp-client))
  kixi/FeedSession
  (feed-details [this name options]
    (grapeshot-feed-details this name options)))

(defn- valid? [date]
  (try
    (tf/parse grapeshot-date-formatter date)
    (catch IllegalArgumentException e
      nil)))

(defn mk-session
  "Creates a grapeshot session using supplied config. The
   session should be passed to all grapeshot feed calls."
  [item]
  (GrapeshotSession. (ftp/to-ftp-url item)
                     (:download-dir item)))

(def ^:private FeedItem
  {:dir s/Str
   :filename s/Str
   :timestamp s/Str})

(defn length-equal [l]
  (s/pred
   (fn [x]
     (= (count x) l))))

(def ^:private MatchSchema
  [(s/one s/Str "filename")
   (s/one s/Str "name")
   (s/one (length-equal 8) "date")])


(defn- ->grapeshot-feed-item [{:keys [dir filename timestamp] :as feed-item}]
  ;; (s/validate FeedItem feed-item)
  (if-let [match (re-matches #"(\w+)-(\d{8}).csv.gz" filename)]
    (do (s/validate MatchSchema match)
        (let [[filename name date] match]
          (when-let [date (valid? date)]
            (map->GrapeshotFeedItem (name-override-remote->local {:src-name SRC_NAME
                                                                  :encoding :gzip
                                                                  :feed-name name
                                                                  :date date
                                                                  :dir dir
                                                                  :filename filename
                                                                  :delimiter \tab
                                                                  :metadata {:timestamp timestamp}})))))))

(defn- reverse-sort-by-timestamp [xs]
  (sort-by (comp :timestamp :metadata)
           (fn [x y] (.compareTo y x))
           xs))

(defn- filter-dates
  [files date]
  (filter #(t/within? (t/interval date (t/today-at 00 00)) (:date %)) files))

(defn- grapeshot-feed-details
  "Retrieve details of feed, restricted by name and/or date."
  [session feed-name options]
  (let [date (if-let [dt (:date options)]
               dt
               (t/minus (t/today-at 00 00) (t/days 5)))]
    (log/info "Retrieving details for feed " feed-name " with options " options)
    (-> (keep ->grapeshot-feed-item (rest (ftp/ftp-file-seq session "/")))
        (reverse-sort-by-timestamp)
        (cond->> feed-name (filter #(= feed-name (:feed-name %))))
        (filter-dates date))))

(defn- download-grapeshot-item!
  [session item]
  (log/info "Downloading " item)
  (let [{:keys [dir filename]} item
        {:keys [download-dir download-existing?]} session
        client                 @(:ftp-client session)
        resource               (str dir "/" filename)
        filename               (misc/local-filename-of item)]
    (with-open [src (digest/md5-input-stream (ftp/client-get-stream client resource))]
      (let [item (digest/copy-stream! src download-dir filename item)]
        (ftp/client-complete-pending-command client)
        item))))
