(ns kixipipe.data.mutate
  "Various fn to mutate items (and the things they refer to)."
  (:require [cheshire.core :as json]
            [clj-time.format :as tf]
            [clojure.data.csv :as csv]
            [clojure.java.io :as io]
            [clojure.string :as str]
            [clojure.tools.logging :as log]
            [kixipipe.digest :as digest]
            [kixipipe.ioplus :as ioplus]
            [kixipipe.misc :as misc]))

(def ^{:private true} appnexus-hour-formatter (tf/formatter "yyyy_MM_dd_HH"))

(defn update-checksums [item orig-checksum md5-source]
  (let [new-checksum (digest/md5-checksum-as-string md5-source)]
    (-> item
        (update-in [:metadata :source-checksum] (fnil identity orig-checksum))
        (assoc :checksum new-checksum ))))

(defn gzip-item? [item]
  (= :gzip (:encoding item)))

(defn gzip-item! [item]
  (let [{:keys [dir filename checksum]} item
        gzipped-filename (str filename ".gz")
        dest (io/file dir gzipped-filename)]
    (if-not (gzip-item? item)
      (with-open [in (-> (io/file dir filename)
                         io/reader)
                  out-md5 (digest/md5-output-stream dest)]
        (with-open [out (-> out-md5
                            ioplus/gzip-output-stream
                            io/writer)]
          (io/copy in out))
        (-> item
            (update-checksums checksum out-md5)
            (assoc :filename gzipped-filename :encoding :gzip)))
      item)))

(defn gunzip-item! [item & [opts]]
  (if (gzip-item? item)
    (let [{:keys [dir filename]} item]
      (with-open [input  (ioplus/gzip-input-stream (io/file dir filename))
                  output (io/writer (io/file dir (str/replace filename #"\.gz$" "")) :append (:append? opts))]
        (io/copy input output opts)))
    item))

(defn- do-strip-header! [in out]
  (let [[header & datalines] (line-seq in)]
    (doseq [line datalines]
      (.write out line)
      (.write out "\n"))))

(defn strip-header! [item]
  (let [{:keys [dir filename checksum]} item
        output-filename (ioplus/append-suffix filename "noheader")
        dest (io/file dir output-filename)]
    (with-open [in (-> (io/file dir filename)
                       (cond-> (gzip-item? item) (ioplus/gzip-input-stream))
                       io/reader)
                out-md5 (digest/md5-output-stream dest)]
      (with-open [out (-> out-md5
                          (cond-> (gzip-item? item) (ioplus/gzip-output-stream))
                          io/writer)]
        (do-strip-header! in out))
      (-> item
          (update-checksums checksum out-md5)
          (assoc :filename output-filename)))))

(defn add-header! [item header]
  (let [{:keys [dir filename checksum]} item
        file (io/file dir filename)
        tmp (io/file dir (str filename "-" (System/currentTimeMillis)))]

    (if (ioplus/exists-as-file? (io/file dir filename))
      (.renameTo file tmp)
      (.createNewFile tmp))
    (.deleteOnExit tmp)

    (with-open [in (-> tmp
                       (cond-> (gzip-item? item) (ioplus/gzip-input-stream))
                       io/reader)
                out-md5 (digest/md5-output-stream file)]
      (with-open [out (-> out-md5
                          (cond-> (gzip-item? item) (ioplus/gzip-output-stream))
                          (io/writer :append? true) )]
        (.write out (str header "\n"))
        (io/copy in out))
      (-> item
          (update-checksums checksum out-md5)
          (assoc :filename filename)))

    (.delete tmp)))

(defn merge-streams [item streams]
  (let [{:keys [dir filename checksum]} item]
   (with-open [out-md5 (digest/md5-output-stream (io/file dir filename))]
     (with-open [out (-> out-md5
                         (cond-> (gzip-item? item) (ioplus/gzip-output-stream))
                         (io/output-stream :append? true) )]
       (doseq [stream streams]
         (with-open [in (-> (force stream)
                            (ioplus/gzip-input-stream))]
           (io/copy in out)))
       (-> item
           (update-checksums checksum out-md5)
           (assoc :filename filename))))))

(defn json->tsv [item & [options]]
  (let [{:keys [columns] :or {columns [:id :name]}} options
        xs (get (json/parse-stream
                 (io/reader (io/file (:dir item) (:filename item))) keyword)
                (:results-key item))
        data (map (fn [x] (vec (map #(get x %) columns))) xs)
        filename (ioplus/with-ext "tsv" (:filename item))]
    (with-open [out-md5 (digest/md5-output-stream  (io/file (:dir item) filename))]
      (with-open [out (io/writer out-md5)]
        (csv/write-csv out data :separator \tab))
      (-> item
          (update-checksums (:checksum item) out-md5)
          (assoc :filename filename)))))

(defn filename->item [dir src-name file]
  (if-let [[filename feed-name hour timestamp part ext] (re-matches #"(\w+)_(\d{4}_\d{2}_\d{2}_\d{2})(?:_(\d+))?_(\d+)\.(.+)" (.getName file))]
    (merge
     (hash-map :src-name src-name
               :feed-name feed-name
               :date (tf/parse appnexus-hour-formatter hour)
               :metadata {:hour hour :timestamp timestamp :part part}
               :dir dir
               :filename filename)
     (when (.endsWith ext "gz") {:encoding :gzip}))))

(defn local-files-as-items [item]
  (let [{:keys [dir regex src-name]} item
        files (filter #(->> %
                            .getName
                            (re-matches regex)) (file-seq (io/file dir)))]
    (keep (partial filename->item dir src-name) files)))

(defn- safe-println [{:keys [out]} x]
  (try
    (.write out (cond-> x (string? x) (.getBytes)))
    (.write out (int \newline))
    (catch java.lang.Throwable t
      (when out
        (.close out) ;; can throw exception, but can't do anything
                     ;; about it so let it propagate
        )
      (throw t))))

(defn- new-split-item-output [{:keys [dir filename] :as item}]
  (let [out-file (io/file dir filename)
        out-md5 (digest/md5-output-stream out-file)
        out     (cond-> out-md5 (gzip-item? item) (ioplus/gzip-output-stream))]
    {:md5 out-md5
     :out out}))

(defn- write-md5-file-for-split-and-close-output [{:keys [md5 out]} item]
  (.close out) ;; can throw exception, but can't do anything about it
               ;; so let it propagate
  (digest/write-md5-to-md5-file! (:dir item)
                                 (:filename item)
                                 (digest/md5-checksum-as-string md5)))

(defn- new-file-required? [src max-lines]
  (let [line-number (.getLineNumber src)]
    (and (pos? line-number)
         (zero? (mod line-number max-lines)))))

(defn- update-item [item]
  (as-> item x
    (update-in x [:metadata :part] (fnil inc -1))
    (assoc x :filename (misc/local-filename-of x))))

(defn- new-item? [src max-lines out item]
  (when (new-file-required? src max-lines)
    (write-md5-file-for-split-and-close-output out item)
    (update-item item)))

(defn write-empty-marker-file [item]
  (let [item (update item :filename ioplus/append-suffix "marker")
        out  (new-split-item-output item)]
    (write-md5-file-for-split-and-close-output out item)
    (update-checksums item (:checksum item) (:md5 out))))

(defn split-item [{:keys [dir filename] :as item} max-lines]
  (let [original-item item]
    (with-open [src (-> (io/file dir filename)
                        (cond-> (gzip-item? item) (ioplus/gzip-input-stream))
                        io/reader
                        java.io.LineNumberReader.)]
      (loop [item  (update-item item)
             out   (new-split-item-output item)
             items []]
        (if-let [line (.readLine src)]
          (do
            (safe-println out line)
            (if-let [new-item (new-item? src max-lines out item)]
              (recur new-item
                     (new-split-item-output new-item)
                     (conj items item))
              (recur item out items)))
          (do
            (write-md5-file-for-split-and-close-output out item)
            (-> items
                (conj item)
                (conj (write-empty-marker-file original-item)))))))))
