(ns midje.cascalog.impl
  (:use midje.sweet
        [clojure.set :only (difference)]
        [cascalog.api :only (with-job-conf <- ??-)])
  (:require cascalog.cascading.types
            [cascalog.cascading.io :as io]
            [cascalog.cascading.flow :as flow]
            [midje.checking.core :as checking])
  (:import [cascalog.cascading.types ClojureFlow]))

(defn- multifn? [x]
  (instance? clojure.lang.MultiFn x))

(def ^{:private true} mocking-forms
  #{'against-background 'provided})

(defn- mocking-form?
  "Returns true if the supplied form (or sequence) is a midje
   `provided` or `against-background` clause, false otherwise."
  [x]
  (when (coll? x)
    (contains? mocking-forms (first x))))

(defn- extract-mockers
  "Returns a vector of two sequences, obtained by splitting the
  supplied `coll` into midje forms and rest."
  [coll]
  ((juxt filter remove) mocking-form? coll))

(def ^{:private true} default-log-level :fatal)

(defn pop-log-level
  "Accepts a sequence with an optional log level as its first argument
  and returns a 2-vector with the log level (or nil if it wasn't
  present) and the non-log-level elements of the sequence."
  [bindings]
  (if-let [ll (first (filter io/log-levels bindings))]
    [ll (disj (set bindings) ll)]
    [default-log-level bindings]))

(defn execute
  "Executes the supplied query and returns the sequence of tuples it
  generates. Optionally accepts a log-level key."
  [query & {:keys [log-level] :or {log-level default-log-level}}]
  (io/with-log-level log-level
    (with-job-conf {"io.sort.mb" 10}
      (if (instance? ClojureFlow query)
        (flow/to-memory query)
        (first (??- query))))))

;; ## Midje-Style Checker Helpers

(def log-level-set
  (set (keys io/log-levels)))

(defn mk-opt-set
  "Accepts a sequence of options and returns the same sequence with
  all log-level keywords removed."
  [opts]
  (difference (set opts) log-level-set))

(defn valid-options?
  "Returns false if supplied-opts contains any item not present in
  `permitted-opts` or `log-level-set`, true otherwise."
  [permitted-opts supplied-opts]
  (empty? (difference (set supplied-opts)
                      log-level-set
                      (set permitted-opts))))

(def ^{:doc "Accepts a sequence of arguments to a
  collection-checker-generator and returns a vector containing two
  sequences:

  [<fn arguments> <keyword arguments>]

  fn-arguments are non-keywords meant to pass through unmolested into
  the checker. keyword arguments are optionally parsed by the wrapping
  checker."}
  split-forms
  (partial split-with (complement keyword?)))

;; ## Cascalog-style Checker Helpers

(defn- cascalog-checker [x]
  (:cascalog-checker (meta x)))

(defn data-laden-falsehood-hidden-from-midje? [result]
  (:cascalog-data-laden-falsehood (meta result)))

(defn as-data-laden-falsehood-hidden-from-midje [falsehood]
  (vary-meta (#'checking/data-laden-falsehood-to-map falsehood)
             assoc :cascalog-data-laden-falsehood true))

(defmacro fact-line-result-generator [expected-form query-form tuple-seq-maker]
  `(let [actual-result-of-check#
         (cond (#'cascalog-checker ~expected-form)
               (~expected-form ~query-form)

               (or (fn? ~expected-form) (#'multifn? ~expected-form))
               (~expected-form ~tuple-seq-maker)

               :else
               ( (just ~expected-form :in-any-order) ~tuple-seq-maker))]
     ;; Strip chatty failures of their chattiness so that
     ;; they can escape another layer of Midje => checking.
     (if (#'checking/data-laden-falsehood? actual-result-of-check#)
       (as-data-laden-falsehood-hidden-from-midje actual-result-of-check#)
       actual-result-of-check#)))

(defmacro cascalog-check [_ignored_just_here_for_error_output_]
  `(fn [actual#]
     (if (data-laden-falsehood-hidden-from-midje? actual#)
       (checking/as-data-laden-falsehood actual#)
       actual#)))

(defn- fact-line
  "Returns a syntax-quoted list representing the guts of a midje fact
  for the supplied cascalog query and result.

  Note that this fact will check that all tuples inside of `expected-form`
  are generated by the supplied query, in any order. Log Level "
  [expected-form query-form ll]
  (let [tuple-seq (list `execute query-form :log-level ll)]
    `[(fact-line-result-generator ~expected-form ~query-form ~tuple-seq) => (cascalog-check ~expected-form)]))

(defn build-fact?-
  "Accepts a sequence of fact?- bindings and a midje \"factor\" --
  `fact`, or `future-fact`, for example -- and returns a syntax-quoted
  version of the sequence with all result-query pairs replaced with
  corresponding midje fact-result pairs. For example:

  (build-fact?- '(\"string\" [[1]] (<- [[?a]] ([[1]] ?a))) `fact)
   ;=> (fact <results-of-query> => (just [[1]] :in-any-order)"
  [bindings factor]
  (let [[ll bindings] (pop-log-level bindings)]
    `(~factor
      ~@(loop [[x y & more :as forms] bindings, res []]
          (cond (not x) res
                (or (string? x)
                    (mocking-form? x)) (recur (rest forms) (conj (vec res) x))
                    :else (->> (#'fact-line x y ll)
                               (concat res)
                               (recur more)))))))

(defn build-fact?<-
  "Similar to `build-fact?-`; args must contain a result sequence, a
  query return arg vector, and any number of predicates. The last
  forms can be midje provided or background clauses."
  [args factor]
  (let [[ll :as args] (remove string? args)
        [begin args] (if (keyword? ll)
                       (split-at 2 args)
                       (split-at 1 args))
        [m body] (extract-mockers args)]
    `(~factor ~@begin (<- ~@body) ~@m)))
