(ns cascalog.logic.vars
  "This namespace deals with all Cascalog variable
  transformations."
  (:require [clojure.set :refer (intersection difference)]
            [clojure.walk :refer (postwalk)]
            [jackknife.seq :as s]))

;; # Var Generation
;;
;; This first section contains functions that allow Cascalog to
;; generate logic variables. There are three types of logic variables;
;; nullable (prefixed by !), non-nullable (prefixed by ?), and
;; ungrounding (prefixed by !!).

(defn uniquify-var
  "Appends a unique suffix to the supplied input."
  [v]
  (str v (gensym)))

(defn gen-var-fn
  "Accepts a prefix and returns a function of no arguments that, when
  called, produces a unique string with the supplied prefix."
  [prefix]
  (fn [] (uniquify-var prefix)))

(def gen-non-nullable-var
  "Returns a unique non-nullable var with a optional suffix."
  (gen-var-fn "?"))

(def gen-nullable-var
  "Returns a unique nullable var with a optional suffix."
  (gen-var-fn "!"))

(def gen-ungrounding-var
  "Returns a unique ungrounding var with an optional suffix."
  (gen-var-fn "!!"))

(defn gen-nullable-vars
  "Generates the given number, 'amt', of nullable variables in a sequence.

  Example:
  (let [var-seq (gen-nullable-vars n)]
    (?<- (hfs-textline out-path)
         var-seq
         (in :>> var-seq)))"
  [amt]
  (->> (repeatedly gen-nullable-var)
       (take amt)))

(defn gen-non-nullable-vars
  "Generates the given number, 'amt', of non-nullable variables in a sequence.

  Example:
  (let [var-seq (gen-non-nullable-vars n)]
    (?<- (hfs-textline out-path)
         var-seq
         (in :>> var-seq)))"
  [amt]
  (->> (repeatedly gen-non-nullable-var)
       (take amt)))

;; ## Reserved Keywords
;;
;; Certain keywords are reserved by Cascalog.

(def selectors
  "Keywords that have special meaning within Cascalog's predicates."
  #{:> :< :<< :>> :#>})

(def selector?
  "Returns true if the supplied keyword is reserved by cascalog, false
   otherwise."
  (comp boolean selectors))

(def logic-prefixes
  "Symbol prefixes reserved by Cascalog for use within predicates. Any
symbol or string prefixed by one of these characters will be
interpreted as a logic variable."
  #{"?" "!" "!!"})

(def wildcards
  "Wildcard strings reserved by Cascalog."
  #{'_ "_"})

(defn prefixed-by?
  "Returns true if the supplied var `v` is prefixed by the supplied
  prefix, false otherwise."
  [prefix v]
  (try (.startsWith (str v) prefix)
       (catch Exception _ false)))

(defn non-nullable-var?
  "Returns true if the supplied symbol (or string) references a
  non-nullable logic variable (prefixed by ?), false otherwise."
  [sym-or-str]
  (prefixed-by? "?" sym-or-str))

(def nullable-var?
  "Returns true of the supplied symbol (or string) references a
  nullable logic variable (prefixed by ! or !!)"
  (complement non-nullable-var?))

(defn unground-var?
  "Returns true if the supplied symbol (or string) references an
  ungrounding logic variable (prefixed by !!), false otherwise."
  [sym-or-str]
  (prefixed-by? "!!" sym-or-str))

(def ground-var?
  "Returns true of the supplied var is capable of triggering a join
  (prefixed by ! or ?), false otherwise."
  (complement unground-var?))

(def fully-ground?
  "Returns true if every supplied var is a ground variable, false
  otherwise."
  (partial every? ground-var?))

(defn cascalog-var?
  "A predicate on 'obj' to check is it a cascalog variable."
  [obj]
  (boolean (some #(prefixed-by? % obj)
                 logic-prefixes)))

(def reserved?
  "Returns true if the supplied symbol is reserved by Cascalog, false
  otherwise."
  (comp boolean (some-fn cascalog-var? wildcards #{"&" '&})))

(def logic-sym?
  "Returns true if the supplied symbol is a Cascalog logic variable,
  false otherwise. & and _ are also accepted."
  (every-pred symbol? reserved?))

(defmacro with-logic-vars
  "Binds all logic variables within the body of `with-logic-vars` to
  their string equivalents, allowing the user to write bare symbols. For example:

  (with-logic-vars
    (str ?a ?b :see))
  ;=>  \"?a?b:see\""
  [& body]
  (let [syms (->> (s/flatten body)
                  (filter logic-sym?)
                  (distinct))]
    `(let [~@(mapcat (fn [s] [s (str s)]) syms)]
       ~@body)))

;; # Sanitizing
;;
;; The following code serves to 'sanitize' a query by converting its
;; logic variables to strings.

(defn sanitize-fn
  "Returns a function that sanitizes an element by resolving logic
  variable names and replacing wildcards using the supplied
  generator."
  [anon-gen]
  (fn [x]
    (cond (contains? wildcards x) (anon-gen)
          (cascalog-var? x)       (str x)
          (= (str x) "&")         "&"
          :else x)))

(defn sanitize
  "Accepts a (potentially nested) data structure and returns a
  transformed, sanitized predicate generated by replacing all
  wildcards and logic variables with strings."
  [pred]
  (let [generator (if (some unground-var? (s/flatten pred))
                    gen-ungrounding-var
                    gen-nullable-var)]
    (postwalk (sanitize-fn generator) pred)))

(defn replace-dups
  "Accepts a sequence returns the set of replacements, plus a new
  sequence with all duplicates replaced by a call to `gen`."
  [coll]
  (let [[uniques cleaned-fields]
        (reduce (fn [[seen-set acc] elem]
                  (if (contains? seen-set elem)
                    [seen-set (conj acc (uniquify-var elem))]
                    [(conj seen-set elem) (conj acc elem)]))
                [#{} []]
                (s/collectify coll))]
    [(difference (set cleaned-fields)
                 uniques)
     cleaned-fields]))
