(ns kixipipe.storage.s3
  "Store files in s3"
  (:require [kixipipe.protocols                   :as kixi]
            [kixipipe.misc                        :as misc]
            [kixipipe.ioplus                      :as ioplus]
            [aws.sdk.s3                           :as s3]
            [clojure.set                          :as set]
            [clojure.java.io                      :as io]
            [clojure.string                       :as str]
            [clojure.tools.logging                :as log]
            [clj-time.core                        :as t]
            [clj-time.format                      :as tf]
            [clj-time.coerce                      :as tc]
            [kixipipe.storage                     :as storage]
            [kixipipe.storage.s3.status           :as status]
            [kixipipe.string                      :as kstr]
            [schema.core                          :as s]
            [com.stuartsierra.component           :as component]
            [pantomime.mime                       :refer [mime-type-of]]
            [kixipipe.storage.s3.multipart-upload :as mpu]))

(defn paged-results-seq
  "Wrap an AWS call that returns paged results, converting the
   paged result list into a lazy sequence that will retrieve the
   next page only when necessary.

   The options map MUST contain:
        src-fn     - a function that returns an AWS paged result
        result-key - the key under which the desired data is stored
                     (typically the response contains meta data about the
                     request)
        marker-map - a map of the form
                         {:next-key1 :key1
                          :next-key2 :key2}
                     which details which response keys identify paging
                     parameters and which key they should be passed in the
                     _next page_ request
  any remaining options are passed to src-fn AWS call."
  [cred bucket options]
  (let [{:keys [src-fn
                results-key
                marker-map]}     options
        resp                     (src-fn cred bucket options)
        {truncated? :truncated?} resp
        results-seq              (get resp results-key)
        updated-options          (merge options
                                        (set/rename-keys
                                         (select-keys resp (keys marker-map))
                                         marker-map))]
    (if truncated?
      (assoc resp
        results-key
        (lazy-cat results-seq
                  (get (paged-results-seq cred
                                          bucket
                                          updated-options)
                       results-key)))
      resp)))

(defn list-objects-seq
  "Make a call to list-objects and return `:objects` from the result
   as a lazy seq, making _next page_ calls as appropriate. `options`
   are passed to the underlying `list-objects` call
   e.g.
       `(list-objects cred bucket {:max-keys 100 :prefix \"foo\"})`
  returns a sequence of all objects with key matching the prefix \"foo\"
  with page size of 100."
  [{:keys [auth file-bucket]} options]
  (:objects
   (paged-results-seq auth
                      file-bucket
                      (merge options
                             {:src-fn s3/list-objects
                              :results-key :objects
                              :marker-map {:next-marker :marker}}))))

(defn list-versions-seq
  "Make a call to list-objects and return `:objects` from the result
   as a lazy seq, making _next page_ calls as appropriate. `options`
   are passed to the underlying `list-objects` call
   e.g.
       `(list-objects cred bucket {:max-keys 100 :prefix \"foo\"})`
  returns a sequence of all objects with key matching the prefix \"foo\"
  with page size of 100."
  [{:keys [auth file-bucket]} options]
    (:versions
     (paged-results-seq auth
                        file-bucket
                        (merge options
                               {:src-fn s3/list-versions
                                :results-key :versions
                                :marker-map  {:next-key-marker        :key-marker
                                              :next-version-id-marker :version-id-marker}}))))

(defmulti s3-key-from :src-name)
(defmethod s3-key-from nil [_] nil)
(defmethod s3-key-from :default [item]
  (let [{:keys [src-name date feed-name uuid]} item]
    (str/join \/ [src-name
                  feed-name
                  (tf/unparse storage/date-formatter date)
                  (misc/remote-filename-of item)])))

(defn- src-name-from-key [key] (first (str/split key #"/")))
(defmulti item-from-s3-key src-name-from-key)
(defmethod item-from-s3-key :default default-item-from-s3-key [{:keys [download-dir auth file-bucket]} key]
  (let [s3-meta (s3/get-object-metadata auth file-bucket key)]
    (-> (zipmap [:src-name :feed-name :date :filename] (str/split key #"/" key))
        (assoc :dir download-dir)
        (update-in [:metadata] merge s3-meta))))

(defn- merge-user-metadata [session version]
  (let [{:keys [auth file-bucket]} session]
    (update-in version [:metadata] merge
               (:user (s3/get-object-metadata auth file-bucket (:key version))))))

(defn item-exists? [session s3-key]
  (let [{:keys [auth file-bucket]} session]
    (s3/object-exists? auth file-bucket s3-key)))

(defn version-exists-pred? [session s3-key pred]
  (let [{:keys [auth file-bucket]} session]
    (and (item-exists? session s3-key)
         (some pred (map (partial merge-user-metadata session)
                         (take-while (complement :delete-marker?)
                                     (list-versions-seq session {:prefix s3-key})))))))

(defn with-metadata-value [key value]
  (every-pred #(not (nil? %))
              #(not (:delete-marker? %))
              #(when value (.equalsIgnoreCase value (get-in % [:metadata key])))))

(defn put-large-if-no-existing-matching-version
  " Put file to S3 if the checksum differs.

  Compare local file's :checksum to :large-file-processed-checksum
  of the exploded file in S3. At this point we've already decided that
  the files were different (:source-checksum in S3 is different from the
  one on the provider's box) so we downloaded it and exploded."
  [session item s3-key]
  (if (version-exists-pred? session s3-key
                            (with-metadata-value :large-file-processed-checksum (:checksum item)))
    (log/info "Skipping put of " item)
    (let [{:keys [auth file-bucket]} session
          {:keys [dir filename metadata]} item]
      (log/info "putting to " s3-key item)
      (mpu/put-multipart-object auth file-bucket s3-key (io/file dir filename)
                                {:metadata (assoc metadata :large-file-processed-checksum (:checksum item))}))))

(defn put-if-no-existing-matching-version [session item s3-key]
  (if (version-exists-pred? session s3-key
                             (with-metadata-value :etag (:checksum item)))
    (log/info "Skipping put of " item)
    (let [{:keys [auth file-bucket]} session
          {:keys [dir filename metadata]} item]
      (log/info "putting to " s3-key item)
      (s3/put-object auth file-bucket s3-key (io/file dir filename) metadata))))

(defmulti to-metadata-string
  "Convert the argument to a string representation suitable for
  storing as metadata."
  class :default ::unknown)

(defmethod to-metadata-string ::unknown [u] (str u))
(defmethod to-metadata-string java.util.Date [d] (tc/to-string d))
(defmethod to-metadata-string java.util.Calendar [c] (tc/to-string (.getTime c)))

(def ^:private metadata-reserved-keys
  #{:cache-control
    :content-disposition
    :content-encoding
    :content-length
    :content-md5
    :content-type
    :server-side-encryption})

(defn- prepend-src-name
  "AWS has a known list of metadata keys, any other keys we prefix with the src name to allow us to track where they came from. Non reserved keys end up in the user metadata." [src-name [k v :as e]]
  (if (metadata-reserved-keys k)
    ;; return entry unmolested
    e
    ;; prepend src and convert value
    [(keyword (str src-name "-" (name k))) (to-metadata-string v)]))

(defn- with-sourced-metadata [item]
  (let [{:keys [src-name
                metadata
                filename
                ]} item
        source-checksum    (select-keys metadata [:source-checksum])
        sourced-metadata   (into {} (map (partial prepend-src-name src-name)
                                         (dissoc metadata :source-checksum)))
        mime-type (or (:content-type metadata) (mime-type-of filename))]
    (assoc item :metadata (merge source-checksum sourced-metadata {:content-type mime-type}))))

(defn- delete-file! [item]
  (let [{:keys [dir filename]} item]
    (.delete (io/file dir filename))))

(defn copy-bucket
  "Copy the contents of src-bucket to dest-bucket, optionally copying only objects
   stored with keys starting with prefix"
  [{session ::session} src-bucket dest-bucket & [prefix]]
  (log/infof "Copying bucket \"%s\" to bucket \"%s\", prefix is %s" src-bucket dest-bucket prefix)
  (let [{:keys [auth]} session]
   (mapv (fn [{:keys [key]}] (s3/copy-object auth src-bucket key dest-bucket key))
         (list-objects-seq (assoc session :file-bucket src-bucket) {:prefix prefix}))))

(defn store-file
  "store a file, detailed by item, in the appropriate location in S3.
   config should be a map of

      {:access-key \"...\",
       :secret-key \"...\",
       :bucket-name \"...\"}"
  [session item]
  (put-if-no-existing-matching-version session
                                       (with-sourced-metadata item)
                                       (s3-key-from item)))

(defn store-large-file
  "Store a large file using multipart upload,
  detailed by item, in the appropriate location in S3.
  config should be a map of

      {:access-key \"...\",
       :secret-key \"...\",
       :bucket-name \"...\"}"
  [session item]
  (put-large-if-no-existing-matching-version session
                                             (with-sourced-metadata item)
                                             (s3-key-from item)))

(defmacro with-retries [timeouts & body]
  (let [cnt (count timeouts)
        [sleep-ms & more-timeouts] timeouts
        go (fn [timeouts]
             (if (seq timeouts)
               `(try ~@body
                     (catch Throwable t#
                       (log/errorf "[%02d] Error executing: %s, sleeping for %s ms before retrying"
                                   ~cnt
                                   '~@body
                                   ~sleep-ms)
                       (Thread/sleep ~sleep-ms)
                       (with-retries ~more-timeouts ~@body)))
               body))]
    (go timeouts)))

(defn rename-file [session src-key dest-key]
  (let [{:keys [auth file-bucket]} session]
    (log/debugf "Renaming %s to %s" src-key dest-key)
    (with-retries [1000 5000 30000]
      (s3/copy-object auth file-bucket src-key dest-key))
    (with-retries [1000 5000 30000]
      (s3/delete-object auth file-bucket src-key))))

(defn get-object-by-metadata
  "Gets the content of an object using data in metadata. metadata is the raw data returned from a query to s3. You MUST close the stream after using it."
  [session metadata]
  (let [{:keys [auth file-bucket]} session
        s3-key (:key metadata)]
    (:content (s3/get-object auth file-bucket s3-key))))


(defn- get-user-metadata [session s3-key]
  (let [{:keys [auth file-bucket]} session]
    (:user (s3/get-object-metadata auth file-bucket s3-key))))

(defn get-user-metadata-from-s3-object
  "Gets the user metadata of an object using the raw data returned from a query to s3"
  [session s3-object]
  (get-user-metadata session (:key s3-object)))

(defn get-user-metadata-from-item
  "Gets the user metadata of an object using an item."
  [session item]
  (get-user-metadata session (s3-key-from item)))

(defrecord StorageSession [auth file-bucket status-bucket]
  component/Lifecycle
  (start [this]
    (println "Starting StorageSession")
    this)
  (stop [this]
    (println "Stopping StorageSession")
    this)
  kixi/StorageSession
  (list-items [session src-name feed-name options]
    (let [{:keys [date]} options
          parts [src-name feed-name (storage/date-as-string date)]
          prefix (str/join \/ (remove nil? parts))
          ->item (fn [{:keys [key metadata] :as obj}] (let [[src feed-name date-str filename] (str/split key #"/")]
                          (-> obj
                              (dissoc :metadata)
                              (assoc
                                :src-name src
                                :feed-name feed-name
                                :date (storage/date-from-string date-str)
                                :checksum (:etag metadata)))))]
      (map ->item (list-objects-seq session {:prefix prefix})))))

(def ^:private Config {:access-key String
                       :secret-key String
                       :file-bucket String
                       :download-dir (s/pred ioplus/exists-as-dir? "<a directory>")
                       (s/optional-key :status-bucket) String
                       (s/optional-key :delete-local-after-s3-put?) Boolean})

(defn mk-session [config]
  (s/validate Config config)
  (->StorageSession (select-keys config [:access-key :secret-key])
                    (:file-bucket config)
                    (:status-bucket config)))

(defn delete-version [{session ::session} version]
  (let [{:keys [auth
                file-bucket]} session
        {:keys [key
                version-id]} version]
    (s3/delete-version auth file-bucket key version-id)
    (log/debugf "deleted %s [%s]\n" key version-id)))

;; TODO think about only deleting old duplicates. Handle delete-marker?
(defn delete-old-versions [{session ::session} prefix]
  (let [{:keys [auth
                file-bucket]} session
        versions-by-key       (->> (list-versions-seq session {:prefix prefix})
                                   (group-by :key)
                                   (keep (fn [[k v]] (when-let [oldv (next v)] (vector k oldv))))
                                   (into {}))
        key-versions          (for [[k vs] versions-by-key v vs] (vector k (:version-id v)))]
    (doseq [[key version-id] key-versions]
      (s3/delete-version auth file-bucket key version-id)
      (log/debug "deleted %s [%s]\n" key version-id))))

(defn build-aws-creds-arg-from-template
  "template should contain {access-key} and {secret-key} placeholders
  which will be replaced with the appropriate values."
  [session template]
  (kstr/map-replace template (:auth session)))

(defn key-uri-base [session]
  (str "s3://" (:file-bucket session)))

(defn generate-presigned-url [session s3-key]
  (s3/generate-presigned-url (:auth session) (:file-bucket session) s3-key))

(defn generate-plain-uri [session s3-key]
  (str "https://" (:file-bucket session) ".s3.amazonaws.com/" s3-key))
