(ns csv-export-bolt.storm
  (:require [backtype.storm
             [clojure :refer :all]
             [config :refer :all]
             [log :refer :all]]
            [clojure.string :as s]
            [clojure.java.io :as io]
            [csv-export-bolt.fields :refer [csv-export-output-fields]])
  (:import [backtype.storm Constants]))

;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
; Helpers
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;

(defn dissoc-in
  [m [k & ks]]
  (if-not ks
    (dissoc m k)
    (assoc m k (dissoc-in (m k) ks))))

(defn hive-escape [raw]
  "Escape special characters of v using a backslash."
  (s/escape (str raw) {\, "\\,"
                       \newline "\\n"
                       \return "\\r"
                       \tab "\\t"
                       \; "\\;"
                       \" "\\\""
                       \' "\\'"}))

(defn v->hive-vec
  "Convert each item in v to a hive-escaped coma separated string, and append newline."
  [v]
  (let [escaped (map hive-escape v)]
    (str (s/join "," escaped) \newline))) ;; newline since we'll be writing to a file

(defn coll->csv
  "Converts coll to csv. If batch is false, coll is expected to be a vector representing a line.
  If batch is true coll is expected to be a vector of vectors representing multiple lines."
  [coll b]
  (let [coll* (if b coll [coll]) ;; wrap in vector when not in batch mode
        cleaned (map v->hive-vec coll*)]
    (s/join cleaned)))

(defn gen-tmp-file
  "Generates a temporary file with the given prefix and suffix.
   Prefix should have a trailing underscore. Returns the path to the file.

   root-path is optional and will default the jvm temp file path. This is
   useful for storing temp files on another device or secure location.

   Example:
   (gen-tmp-file \"retweets_\" \".csv\" \"/tmp/files/here\")"
  [prefix suffix root-path]
  (let [directory (when root-path (java.io.File. root-path))
        tmp-file (java.io.File/createTempFile prefix suffix directory)]
    (.getAbsolutePath tmp-file)))

(defn get-or-create-file
  "Get or create a file for the given partition-key in state atom.
   If the key is not found, generates a new file and adds it
   to the state. Returns a vector of file path and created boolean."
  [state partition-key conf]
  (if-let [existing-file (get (:tmp-files @state) partition-key)]
    [existing-file false]
    (let [root-path (get conf "CSV_EXPORT_TEMP_FILE_PATH")
          file-path (gen-tmp-file "batches_" ".csv" root-path)]
      (swap! state assoc-in [:tmp-files partition-key] file-path)
      [file-path true])))

(defn tick-tuple? [tuple]
  (= (.getSourceStreamId tuple) (Constants/SYSTEM_TICK_STREAM_ID)))

;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
; Bolt
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;

;; Take [partition-key coll]
;; serializes to csv in Hive format.
;; Accumulates results in temporary files based on partition-key
;; Emits the csv-content by recieving a tick from a time interval spout

;;  Args:
;;  - batch: a boxed Java boolean (must be converted!) of whether this is a single or batch write

(defn csv-export-body
  [conf context collector batch]
  (let [state (atom {:last-tick (System/currentTimeMillis)
                     :tmp-files {}})]
    ;; Generate a temp file
    ;; https://github.com/clojure-cookbook/clojure-cookbook/blob/master/
    ;; local-io/using-temp-files/using-temp-files.asciidoc    
    (bolt
     (execute
      [tuple]
      ;; This bolt can take a tick tuple or a 'content' tuple
      (if (tick-tuple? tuple)
        ;; We can assume that the IO is safe as each bolt is a
        ;; separate process and will not run more than 1 tuple at a
        ;; time with the given state
        (let [tick-time (System/currentTimeMillis)]
          (log-message "Recieved clock tick: " tick-time)
          (log-message "Last tick: " (:last-tick @state))

          ;; Set the datetime since last tick
          (swap! state assoc :last-tick tick-time)
          (log-message "Temp files: " (or (keys (:tmp-files @state)) "None") )
          (doseq [[k v] (:tmp-files @state)]
            (log-message "Iterating through tmp-file " k)
            ;; Emit content of temp files for export
            (let [csv-content (slurp v)
                  output [k csv-content]]
              ;; If there is content then emit it
              (when-not (empty? csv-content)
                (log-message (format "Emitting content of temp file %s" k))
                (emit-bolt! collector output :anchor tuple)
                ;; Delete and remove the file from state
                (log-message "Deleting temp file: " k)
                (io/delete-file v)
                (log-message "Removing temp file from state")
                (swap! state dissoc-in [:tmp-files k])))))

        ;; else append content to temp file
        (let [{:keys [partition-key coll]} tuple
              ;; Get the tmp file path to accumulate results into
              [file-path created] (get-or-create-file state partition-key conf)
              batch (boolean batch)  ;; batch is a boxed Java boolean and must be converted!
              ;; otherwise it will always be true in Clojure
              csv-content (coll->csv coll batch)]
          (log-message "Temp file created? " created)
          (log-message "Appending to file " file-path)
          (spit file-path csv-content :append true)
          (log-message "State: " @state)))
      (ack! collector tuple)))))

(defmacro defcsvexport
  "Returns a csv-export bolt with the tick tuple frequency in seconds set"
  [var-name tick-tuple-freq]
  `(defbolt ~var-name csv-export-output-fields
     {:prepare true
      :params [batch#]
      :conf {"topology.tick.tuple.freq.secs" ~tick-tuple-freq}}
     [conf# context# collector#]
     (csv-export-body conf# context# collector# batch#)))

;; To maintain backwards compatibility, create a default csv-export
;; with 300 second tick tuple frequency
(defcsvexport csv-export 300)

