(ns kixipipe.data.mutate
  "Short package description."
  (:require [kixipipe.ioplus  :as ioplus]
            [clojure.java.io  :as io]
            [cheshire.core    :as json]
            [clojure.data.csv :as csv]
            [clj-time.format  :as tf]
            [kixipipe.digest  :as digest]
            [clojure.string   :as str]
            [clojure.tools.logging      :as log]))

(def ^{:private true} appnexus-hour-formatter (tf/formatter "yyyy_MM_dd_HH"))

(defn update-checksums [item orig-checksum md5-source]
  (let [new-checksum (digest/md5-checksum-as-string md5-source)]
    (-> item
        (update-in [:metadata :source-checksum] (fnil identity orig-checksum))
        (assoc :checksum new-checksum ))))

(defn gzip-item? [item]
  (= :gzip (:encoding item)))

(defn gzip-item! [item]
  (let [{:keys [dir filename checksum]} item
        gzipped-filename (str filename ".gz")
        dest (io/file dir gzipped-filename)]
    (if-not (gzip-item? item)
      (with-open [in (-> (io/file dir filename)
                         io/reader)
                  out-md5 (digest/md5-output-stream dest)]
        (with-open [out (-> out-md5
                            ioplus/gzip-output-stream
                            io/writer)]
          (io/copy in out))
        (-> item
            (update-checksums checksum out-md5)
            (assoc :filename gzipped-filename :encoding :gzip)))
      item)))

(defn gunzip-item! [item & [opts]]
  (if (gzip-item? item)
    (let [{:keys [dir filename]} item]
      (with-open [input  (ioplus/gzip-input-stream (io/file dir filename))
                  output (io/writer (io/file dir (str/replace filename #"\.gz$" "")) :append (:append? opts))]
        (io/copy input output opts)))
    item))

(defn- do-strip-header! [in out]
  (let [[header & datalines] (line-seq in)]
    (doseq [line datalines]
      (.write out line)
      (.write out "\n"))))

(defn strip-header! [item]
  (let [{:keys [dir filename checksum]} item
        output-filename (ioplus/append-suffix filename "noheader")
        dest (io/file dir output-filename)]
    (with-open [in (-> (io/file dir filename)
                       (cond-> (gzip-item? item) (ioplus/gzip-input-stream))
                       io/reader)
                out-md5 (digest/md5-output-stream dest)]
      (with-open [out (-> out-md5
                          (cond-> (gzip-item? item) (ioplus/gzip-output-stream))
                          io/writer)]
        (do-strip-header! in out))
      (-> item
          (update-checksums checksum out-md5)
          (assoc :filename output-filename)))))

(defn add-header! [item header]
  (let [{:keys [dir filename checksum]} item
        file (io/file dir filename)
        tmp (io/file dir (str filename "-" (System/currentTimeMillis)))]

    (if (ioplus/exists-as-file? (io/file dir filename))
      (.renameTo file tmp)
      (.createNewFile tmp))
    (.deleteOnExit tmp)

    (with-open [in (-> tmp
                       (cond-> (gzip-item? item) (ioplus/gzip-input-stream))
                       io/reader)
                out-md5 (digest/md5-output-stream file)]
      (with-open [out (-> out-md5
                          (cond-> (gzip-item? item) (ioplus/gzip-output-stream))
                          (io/writer :append? true) )]
        (.write out (str header "\n"))
        (io/copy in out))
      (-> item
          (update-checksums checksum out-md5)
          (assoc :filename filename)))

    (.delete tmp)))

(defn merge-streams [item streams]
  (let [{:keys [dir filename checksum]} item]
   (with-open [out-md5 (digest/md5-output-stream (io/file dir filename))]
     (with-open [out (-> out-md5
                         (cond-> (gzip-item? item) (ioplus/gzip-output-stream))
                         (io/output-stream :append? true) )]
       (doseq [stream streams]
         (with-open [in (-> (force stream)
                            (ioplus/gzip-input-stream))]
           (io/copy in out)))
       (-> item
           (update-checksums checksum out-md5)
           (assoc :filename filename))))))

(defn json->tsv [item & [options]]
  (let [{:keys [columns] :or {columns [:id :name]}} options
        xs (get (json/parse-stream
                 (io/reader (io/file (:dir item) (:filename item))) keyword)
                (:results-key item))
        data (map (fn [x] (vec (map #(get x %) columns))) xs)
        filename (ioplus/with-ext "tsv" (:filename item))]
    (with-open [out-md5 (digest/md5-output-stream  (io/file (:dir item) filename))]
      (with-open [out (io/writer out-md5)]
        (csv/write-csv out data :separator \tab))
      (-> item
          (update-checksums (:checksum item) out-md5)
          (assoc :filename filename)))))

(defn filename->item [dir src-name file]
  (if-let [[filename feed-name hour timestamp part ext] (re-matches #"(\w+)_(\d{4}_\d{2}_\d{2}_\d{2})(?:_(\d+))?_(\d+)\.(.+)" (.getName file))]
    (merge
     (hash-map :src-name src-name
               :feed-name feed-name
               :date (tf/parse appnexus-hour-formatter hour)
               :metadata {:hour hour :timestamp timestamp :part part}
               :dir dir
               :filename filename)
     (when (.endsWith ext "gz") {:encoding :gzip}))))

(defn local-files-as-items [item]
  (let [{:keys [dir regex src-name]} item
        files (filter #(->> %
                            .getName
                            (re-matches regex)) (file-seq (io/file dir)))]
    (keep (partial filename->item dir src-name) files)))
