(ns appnexus.feed
  "Read the Appnexus feed"
  (:require [kixipipe.misc :as misc]
            [kixipipe.protocols :as kixi]
            [kixipipe.digest :as digest]
            [kixipipe.ioplus :as ioplus]
            [kixipipe.transport.sftp :as sftp]
            [kixipipe.data.scrub :as scrub]
            [appnexus.http :as http]
            [cheshire.core :as json]
            [clj-time.core :as t]
            [clj-time.format :as tf]
            [clojure.java.io :as io]
            [clojure.set :as set]
            [clojure.string :as str]
            [clojure.tools.logging :as log]
            [slingshot.slingshot :refer [try+ throw+]]
            [schema.core :as s]
            [com.stuartsierra.component :as component]))

(def ^:private SRC_NAME "appnexus")

;; FIXME - a better way for things to discover feeds.
(def SIPHON_FEEDS #{ "standard_feed" "segment_feed" "bid_landscape_feed"})
(def READ_ONLY_FEEDS #{"brand" "category" "segment" "media_type" "media_subtype" "platform_member"})
(def XAXIS_FEEDS #{"xaxis_conversions" "gdm_conversions" "groupmuk_conversions"})
(def KNOWN_FEEDS (set/union SIPHON_FEEDS READ_ONLY_FEEDS XAXIS_FEEDS))


(def ^:private READ_ONLY_FEED_RESULTS_KEY_MAP {"brand" :brands
                                               "category" :categories
                                               "segment" :segments
                                               "media_type" :media-types
                                               "media_subtype" :media-subtypes
                                               "platform_member" :platform-members})

; maps "our name" -> "data vendor name"
(def ^:private FEED_NAME_OVERRIDES {"xaxis_conversions" "Xaxis_conversions"
                          "media_type" "media-type"
                          "media_subtype" "media-subtype"
                          "platform_member" "platform-member"})

(def ^:private REV_FEED_NAME_OVERRIDES (set/map-invert FEED_NAME_OVERRIDES))
(def ^:private xaxis-date-formatter (tf/formatter "d_M_yyyy"))




(declare download-siphon-item!)
(defrecord
    ^{:doc "Item indicating an entry in a APIServices/ReportingServices/LogLevelDataService.
Typically an entry is a large (10's Mb) file download."}
    AppNexusSiphonFeedItem [src-name feed-name date metadata submission-time]
    kixi/FeedItem
    (src-name [_] SRC_NAME)
    (download! [item session]
      (download-siphon-item! session item))
    io/Coercions
    (as-file [item] (io/file (:dir item) (:filename item)))
    (as-url [item] (.toURI (io/as-file item))))

(declare download-read-only-item!)
(defrecord
    ^{:doc "Item indicating data from a APIServices/ReadOnlyService.
Typically this will be a page of json data."}
    AppNexusReadOnlyFeedItem [feed-name start-element metadata]
    kixi/FeedItem
    (src-name [_] "appnexus")
    (download! [item session]
      (download-read-only-item! session item)))

(declare download-xaxis-item!)
(defrecord
    ^{:doc "Item indicating data from a APIServices/ReadOnlyService.
Typically this will be a page of json data."}
    AppNexusXaxisFeedItem [feed-name metadata]
    kixi/FeedItem
    (src-name [_] "appnexus")
    (download! [item session]
      (download-xaxis-item! session item)))

(declare siphon-feed-details)
(declare read-only-feed-details)
(declare xaxis-feed-details)

(defrecord AppNexusSession [config]
  component/Lifecycle
  (start [this]
    (println "Starting AppNexusSession")
    this)
  (stop [this]
    (println "Stopping AppNexusSession")
    this)
  kixi/FeedSession
  (feed-details [session feed-name options]
    (condp #(%1 %2) feed-name
      SIPHON_FEEDS (siphon-feed-details session feed-name options)
      READ_ONLY_FEEDS (read-only-feed-details session feed-name options)
      XAXIS_FEEDS (xaxis-feed-details session feed-name options))))

(def ^:private appnexus-hour-formatter (tf/formatter "yyyy_MM_dd_HH"))

(defn- to-date-time-no-hours [dt]
  (when dt
    (apply t/date-time ((juxt t/year t/month t/day) dt))))

(defn- ->appnexus-hour [datetime]
  (tf/unparse appnexus-hour-formatter datetime))

(defn- appnexus-hour->dt [hour-str]
  (to-date-time-no-hours (tf/parse appnexus-hour-formatter hour-str)))

(defn siphon [session {:keys [feed-name hour] :as opts}]
  (log/debug "Retrieving siphon details:" opts)
  (http/service-get session "siphon" (-> {:siphon_name feed-name}
                                         (cond-> hour (assoc :hour hour)))))

(defn- siphon-download-as-stream [session feed-name {:keys [hour timestamp part]}]
  (http/service-get-stream session
                           "siphon-download"
                           {:siphon_name feed-name
                            :hour hour
                            :timestamp timestamp
                            :split_part part}))

; copied from clojure.core
(defmacro ^:private assert-args
  [& pairs]
  `(do (when-not ~(first pairs)
         (throw (IllegalArgumentException.
                  (str (first ~'&form) " requires " ~(second pairs) " in " ~'*ns* ":" (:line (meta ~'&form))))))
     ~(let [more (nnext pairs)]
        (when more
          (list* `assert-args more)))))

(defmacro with-session
  "authenticates if necessary and invokes body. Retries authentication
once if body fails with UNAUTH status and then invokes body again."
   [binding & body]
   (assert-args
    (vector? binding) "a vector for binding"
    (= 2 (count binding)) "exactly 2 forms in binding vector")
  `(binding [http/*session* ~(binding 1)
             sftp/*ssh-session* (sftp/mk-ssh-session (:xaxis ~(binding 1)))]
     (let [~(binding 0) http/*session*]
       (try
        (if (deref (:token-store ~(binding 0)))
          (try+
           (do ~@body)
           (catch [:type :appnexus.http/unauth] _#
             (http/authenticate http/*session*)
             (do ~@body))
           (catch [:type :appnexus.http/noauth] _#
             (http/authenticate http/*session*)
             (do ~@body)))
          (do (http/authenticate http/*session*)
              ~@body))
        (finally
          (sftp/disconnect sftp/*ssh-session*))))))

(defn- resolve-downloads
  "generates a sequence, each item is details of a download"
  [{:keys [date hour timestamp splits] :as item}]
  (map (fn [{:keys [status part checksum]}]
         (-> (dissoc item :splits :hour :timestamp)
             (assoc
                 :src-name SRC_NAME
                 :date (if date date (appnexus-hour->dt hour))
                 :metadata {:part part :timestamp timestamp :hour hour}
                 :checksum checksum
                 :appnexus-status status)
             map->AppNexusSiphonFeedItem))
       splits))

(defn- download-siphon-item!
  "will download the feed item detailed by item and enrich the item with further details.
Can be used to map over the results of feed-details.

```
(doall (map (partial do-download! session) (standard-feed-details ...)
```" [session item]
   (log/info "Downloading " item)
   (when item
     (let [{:keys [dir feed-name
                   checksum metadata]} item
           {:keys [hour part timestamp]} metadata
           {:keys [download-dir
                   validate-checksum?]} session
           in-stream (delay (siphon-download-as-stream session feed-name metadata))
           filename (misc/local-filename-of item)
           item (digest/copy-stream! in-stream download-dir filename item session)]
       item)))

(defn- find-latest-modified [data]
  (reduce (fn [latest {:keys [last-modified]}]
            (if (pos? (compare last-modified latest))
              last-modified
              latest))
          (t/date-time 0)
          data))

(defn- extract-results-as-stream [results-key data]
  (let [results (select-keys data [results-key])]
    (ioplus/string-input-stream (json/generate-string results {:pretty true}))))

(defn- apply-name-overrides [item]
  (let [{:keys [feed-name]} item
        new-name (get REV_FEED_NAME_OVERRIDES feed-name feed-name)]
    (assoc item :feed-name new-name)))

(defn- download-read-only-item! [session item]
  (when item
      (log/info "Downloading " item)
      (let [{:keys [feed-name start-element results-key]} item
            {:keys [download-dir]} session
            filename (misc/local-filename-of item)
            src (delay (digest/md5-input-stream
                        (extract-results-as-stream
                         results-key
                         (http/service-get session (get FEED_NAME_OVERRIDES feed-name feed-name)
                                           {:start_element start-element}))))
            item (digest/copy-stream! src download-dir filename item session)]
        (-> item
            apply-name-overrides
            (assoc :dir download-dir :filename filename)))))

(defn- connect-ssh-session! []
  (when-not (sftp/connected? sftp/*ssh-session*)
    (sftp/connect sftp/*ssh-session*)))

(defn- download-xaxis-item! [session item]
  (connect-ssh-session!)
  (sftp/sftp-get-to-file session (apply-name-overrides item)))

(defn- hours-as-strs [date hour-offsets]
  (map (fn [h] (->appnexus-hour (t/plus date (t/hours h))))
       hour-offsets))

(defn- assoc-siphon-date [siphon]
  (->> siphon
       :hour
       appnexus-hour->dt
       (assoc siphon :date)))

(defn get-all-parts-of-siphons [session feed-name]
  (:siphons (siphon session {:feed-name feed-name})))

(defn- siphon-query-hours [date date-offset]
  (if date
    (hours-as-strs date (range 0 24))
    (if date-offset
      (hours-as-strs (t/plus (to-date-time-no-hours (t/now)) (t/days date-offset)) (range 0 24))
      (hours-as-strs (t/now) (range -12 -2)))))

(defn- resolve-hrs-to-download [session feed-name date date-offset]
  (map assoc-siphon-date
       (mapcat #(:siphons (siphon session {:feed-name feed-name :hour %}))
               (siphon-query-hours date date-offset))))

(defn- siphon-feed-details
  "Using the given config retrieves a feed for given name and date and
returns a map with details of downloadable parts of that feed. If no
date is supplied will return all items in appnexus for which the status
is NOT \"completed\""
  [session feed-name options]
  (with-session [session session]
    (let [{:keys [date date-offset]} options
          siphons (resolve-hrs-to-download session feed-name date date-offset)
          process-siphon (fn [siphon]
                           (let [{:keys [name timestamp splits hour]} siphon]
                             (resolve-downloads
                              (merge options
                                     {:date (appnexus-hour->dt hour)
                                      :hour hour
                                      :feed-name name
                                      :filename (str name ".csv.gz")
                                      :encoding :gzip
                                      :timestamp timestamp
                                      :splits splits}))))]
      (log/info "Retrieving details for feed " feed-name " with options " options)
      (keep (fn [item] (if date
                        item
                        (when (not= (:appnexus-status item) "completed") (dissoc item :appnexus-status))))
            (mapcat process-siphon siphons)))))

(def ^:private Config {:auth {:username String
                              :password String}
                       :base-url String
                       :download-dir (s/pred ioplus/exists-as-dir? "<a directory>")
                       :xaxis {:host String
                               :user String
                               :ssh-key (s/pred ioplus/exists-as-file? "<a file>")
                               :dir String ;; can't validate remote dir.
                               (s/optional-key :port) s/Num
                               }
                       :validate-checksum? Boolean})

(defn mk-session
  "Creates an appnexus session with the given config. This session should be
passed to all appnexus service calls." [config]
   (let [{:keys [base-url]} config]
     (s/validate Config config)
     (map->AppNexusSession (-> config
                               (assoc :base-url (str/replace base-url #"\b/* *$" "/")
                                      :token-store (atom nil)
                                      :feeds KNOWN_FEEDS
                                      :xaxis (:xaxis config))))))

(defn read-only-feed-details
  "Get a lazy sequence of read-only feed items, handling paging. Each element is a
page worth of data from the feed."
  [session feed-name & [options]]
  (let [resp (http/service-get session (get FEED_NAME_OVERRIDES feed-name feed-name) options)
        {page-size :page-size results-key :results-key :or {page-size 100}} options
        {:keys [count]} resp
        num-pages (inc (quot (dec count) page-size))
        timestamp (str (System/currentTimeMillis))]
    (for [i (range num-pages)]
      (map->AppNexusReadOnlyFeedItem {:src-name SRC_NAME
                                      :feed-name feed-name
                                      :start-element (* i page-size)
                                      :filename (str feed-name ".json")
                                      :metadata {:part i
                                                 :timestamp timestamp}
                                      :results-key (get READ_ONLY_FEED_RESULTS_KEY_MAP feed-name)}))))

(defn- filename->item [f]
  (if-let [[_ feed-name date-str] (re-matches #"(\w+)_(\d{1,2}_\d{1,2}_\d{4})\.csv" f)]
    {:src-name SRC_NAME
     :filename f
     :feed-name (get REV_FEED_NAME_OVERRIDES feed-name feed-name)
     :date (tf/parse xaxis-date-formatter date-str)}))

(defn- xaxis-feed-details
  [session feed-name & [options]]
  (let [{:keys [date]} options
        dir (-> session :xaxis :dir)
        feed-name (get FEED_NAME_OVERRIDES feed-name feed-name)
        filename (when date (format "%s_%s.csv" feed-name (tf/unparse xaxis-date-formatter date)))
        to-xaxis-feed-item (fn [{:keys [filename]}]
                             (if-let [item (filename->item filename)]
                               (map->AppNexusXaxisFeedItem (assoc item :dir dir))))]
    (connect-ssh-session!)

    (doall (keep to-xaxis-feed-item
                 (-> (sftp/sftp-file-seq session dir)
                     rest
                     (cond->> feed-name (filter #(-> % :filename (.startsWith feed-name))))
                     (cond->> filename (filter #(= filename (:filename %)))))))))

(defmulti scrub (fn [in-item out-item]  (:feed-name in-item)))

(defmethod scrub :default [in-item out-item]
  (scrub/scrub-csv in-item out-item
                   (scrub/each-cell (comp scrub/scrub-null
                                          scrub/scrub-zero-dot))))

(defmethod scrub "standard_feed" [in-item out-item]
  (let [control_pct-idx 58]
    (scrub/scrub-csv in-item out-item
                     (scrub/each-row
                      (fn [row]
                        (map-indexed (fn [i x]
                                       (if (and (= i control_pct-idx)
                                                (re-matches #"0\.0*" x))
                                         "0"
                                         (-> x
                                             scrub/scrub-zero-dot
                                             scrub/scrub-null)))
                                     row))))))
