(ns invoice-analysis.core (:gen-class)
  (:require [cascalog.tap :refer [hfs-tap lfs-tap]]
            [cascalog.api :refer [<- ?- stdout select-fields hfs-textline lfs-textline with-job-conf compile-flow]]
            [cascalog.ops :refer [sum]]
            [cascalog.more-taps :refer [lfs-delimited hfs-delimited]]
            [clojure.tools.cli :refer [cli]])
  (:import [cascading.avro AvroScheme]))

(defn parse-int [string]
  (Integer/parseInt string))

(defn query [input-tap trap-tap]
  (<- [?sum]
      (input-tap :> ?letter ?number-string)
      (parse-int :< ?number-string :> ?number)
      (sum :< ?number :> ?sum)))

(def remote-query
    (query
      (hfs-delimited "hdfs:///user/luser/inputs/")
      (hfs-textline "hdfs:///user/luser/TRAPFILE.trap")))

(def local-query
    (query
      (lfs-delimited "resources/local/input.tsv")
      (lfs-textline "TRAPFILE.trap")))

(defn -main [& cmd-args]
  (let [[options args banner]
        (cli cmd-args
             ["-h" "--help" "Show help" :default false :flag true]
             ["-d" "--diagram" "Draw flow diagram." :default false :flag true]
             ["-l" "--local" "Use local data." :default false :flag true]
             ["-h" "--remote" "Use remote data." :default false :flag true])]
    (when (:help options)
      (println banner)
      (System/exit 0))

    (when (not (or (:local options) (:remote options) (:diagram options)))
      (println "Either local, remote, or diagram must be specified.")
      (System/exit 1))

    (cond
       (:remote options)
         (with-job-conf {"mapred.reduce.tasks" 16}
           (?- "JOB NAME"
               (hfs-textline "hdfs:///user/luser/OUTPUT.tsv")
                remote-query))
       (:local options)
         (?- "JOB NAME"
             (lfs-textline "OUTPUT.tsv")
             local-query)
       (:diagram options)
         (.writeDOT
           (compile-flow
             (lfs-textline "OUTPUT.tsv")
              local-query)
              "flow.dot"))))
