(ns bloom.omni.eav-infer
  (:require
    [clojure.set :as clojure.set]))

(defn- unnest 
  "Given a list of records, returns a list with nested records copied to the top level"
  [records]
  (->> records
       (mapcat (fn [r]
                 (concat [r]
                         (->> r
                              (mapcat (fn [[k v]]
                                        (cond
                                          (map? v)
                                          (unnest [v])

                                          (and (vector? v) (map? (first v)))
                                          (unnest v) 

                                          :else
                                          [])))))))))

(defn unwrap [r]
  (cond 
    (vector? r)
    (mapcat unwrap r)
    
    (map? r)
    (->> r
         (mapcat (fn [[k v]]
                   (cond
                     (map? v)
                     (unwrap v)

                     (vector? v)
                     (if (map? (first v))
                       (mapcat unwrap v)
                       (map (fn [v'] [k v']) v))  

                     :else
                     [[k v]]))))))

(defn recs->ids 
  "Given a vector of records, returns the inferred keys that are ids"
  [records]
  (let [; given vector of nested records
        ; returns a list of [[k v] [k v] ...]
        ; where v is a primitive value
        ; vectors of primitive values are included as multiple [k v] pairs
        unwrap (fn unwrap [r]
                 (cond
                   (vector? r)
                   (mapcat unwrap r)

                   (map? r)
                   (->> r
                        (mapcat (fn [[k v]]
                                  (cond
                                    (map? v)
                                    (unwrap v)
                                    (vector? v)
                                    (if (map? (first v))
                                      (mapcat unwrap v)
                                      (map (fn [v'] [k v']) v))  
                                    :else
                                    [[k v]]))))))

        records-flat (unnest records)

        primitive-kvs (unwrap records)

        primitive? (fn [v]
                     (not (or (vector? v)
                              (map? v))))

        ; [{:a 1 :b 2} {:a 3 :c 2}]
        ; ->
        ; {:a [1 3] :b [2] :c [2]} 
        k->vs (->> records-flat
                   (mapcat identity)
                   (reduce (fn [memo [k v]]
                             (if (memo k)
                               (update memo k conj v)
                               (assoc memo k [v]))) {}))

        id-candidates-by-value 
        (->> k->vs
             (filter (fn [[k vs]]
                       (every? primitive? vs)))
             (map first)
             set)

        ; ids are unique
        ; ie. no two records have the same kv pair
        id-candidates-by-distinction 
        (->> k->vs
             (filter (fn [[k vs]]
                       (let [primitives (filter primitive? vs)]
                         (when (seq primitives)
                           (apply distinct? primitives)))))
             (map first)
             set)

        ; [{:a 1 :b 2} {:a 3 :c 2}]
        ; ->
        ; {1 [:a] 2 [:b :c] 3 [:a]} 
        v->ks (->> records-flat
                   (mapcat identity)
                   (reduce (fn [memo [k v]]
                             (if (memo v)
                               (update memo v conj k)
                               (assoc memo v [k]))) {}))

        ; ids are referenced somewhere
        ; ie. they show up in multiple keys
        id-candidates-by-repetition
        (->> v->ks
             (filter (fn [[v ks]]
                       (< 1 (count ks))))
             (mapcat second)
             set)]
    (println "")
    (clojure.pprint/pprint records)
    (println "value:" id-candidates-by-value)
    (println "distinction:" id-candidates-by-distinction )
    (println "repetition:" id-candidates-by-repetition)
    (clojure.set/intersection id-candidates-by-distinction
                              #_id-candidates-by-repetition)))

(defn recs->rels
  "Given a vector of records, returns the inferred relationship type for each key"
  [records]
  (let [ids (set (map :id records))
        ->rels (fn ->schema [record]
                   (cond 
                     (map? record)
                     (apply merge 
                       (map (fn [[k v]]
                              (cond
                                (vector? v)
                                (cond 
                                  (map? (first v))
                                  {k :embed-many}
                                  (contains? ids (first v))
                                  {k :reference-many}
                                  :else
                                  {k :many})

                                (map? v)
                                {k :embed-one}

                                (and 
                                  (not= v (:id record)) 
                                  (contains? ids v))
                                {k :reference-one}

                                :else
                                {}))
                            record))

                     (vector? record)
                     (apply merge 
                       (map ->schema record))))]
    (->rels records)))

(defn recs->schema 
  "Given a vector of records, returns the inferred schema, that is, for each key, whether it is an id or relationship or just a value"
  [records]
  )
