(ns idm.graph.alg
  "Algorithms for traversing the graph"
  (:require
    [clojure.tools.logging :as log]
    [idm.graph.multipath :as mp]
    [idm.graph.relation :as rel]
    [idm.graph.relation.type :as rel.type]
    [idm.graph.util :refer [e->str]]
    [idm.log :refer [passf]]
    [ubergraph.core :as uber]))

(defn add-default-params
  "Standardize the options map for least-cost-path"
  [g opts]
  ;{:pre [(some #{:start-node :start-nodes} opts)]}
  (letfn [(default [opts k & defaults]
            (if (contains? opts k)
              opts
              (assoc opts k (some identity defaults))))
          (+targets [opts]
            (assoc opts :targets (set (:end-nodes opts))))

          (+multi-dest? [{:keys [targets traverse?] :as opts}]
            (log/trace "targets:" targets "traverse?:" traverse?)
            (assoc opts :multi-dest? (and (not-empty targets) traverse?)))

          (+final-node [{:keys [targets traverse?] :as opts}]
            (update opts :final-node?
                    #(or % (when (not traverse?) targets) (constantly false))))

          (+no-targets? [opts]
            (assoc opts :no-targets? (empty? (:end-nodes opts))))]
    (-> opts
        (default :start-nodes #{(:start-node opts)})
        (default :end-nodes (some-> opts :end-node list))
        +targets
        +multi-dest?
        +no-targets?
        (default :traverse? false)
        +final-node
        (default :may-final-edge? (constantly true))
        (default :cost-fn 
          (when-let [attr (:cost-attr opts)]
            #(uber/attr g % attr))
          (constantly 1))
        (default :node-filter (constantly true))
        (default :edge-filter (constantly true))
        (default :make-path?  true))))

(defn- compare-by
  "Apply f to x and y and compare the result"
  [f x y]
  (compare (f x) (f y)))

(defn- priority-queue
  "Create a sorted set to be used as a priority queue for entries of the form
  [edge prev-edge cost]"
  []
  ; see https://clojuredocs.org/clojure.core/sorted-set-by#example-542692d5c026201cdc327096
  ; for more on sorting functions
  (let [compare-edges (fn [e1 e2]
                        ;(let [c (compare (:total-cost e1) (:total-cost e2))]
                        (let [c (compare-by peek e1 e2)]
                          (if (not= c 0)
                            c
                            (compare-by hash e1 e2))))]
    (sorted-set-by compare-edges)))

(defn- update!
  "`update` for a transient collection, since one is not provided."
  ([m k f]
   (assoc! m k (f (get m k)))))

(defn least-cost-path
  "Finds the cheapest path satisfying the given constraints. Respects mult-edge
  relations (idm.graph.relation/[one->one, many->one, one->many])

  Arguments:
  `g`: graph
  `params`: map of parameters, with defaults provided by `add-default-params`
    - `start-nodes`: set of initial, known nodes
    - `end-nodes`: set of nodes to end at
    - `traverse?`: when true, requires all `end-nodes` be reached before
      terminating; otherwise, terminates on first node
    - `may-final-edge?` function indicating whether nor not a given edge may be
      the last one in the path
    - `cost-fn`: function providing cost of a given edge
    - `node-filter`: function returning true for allowed nodes
    - `edge-filter`: function returning true for allowed edges
    - `final-node?`: function instructing immediate termination for a node
      returning true, provided its edge passes `may-final-edge?`

  If `end-nodes` not provided:
    - and `final-node?` is provided, will exit on first matching node.
      `final-node?` is otherwise not used.
    - otherwise, `traverse?` is assumed and all possible edges will be visited
      before exiting
  Otherwise, if `traverse?` is true, will attempt to visit all nodes in
  `end-nodes` before exiting; if false, will exit on first node in `end-nodes`."
  ([g start-node end-node]
   (least-cost-path g {:start-node start-node :end-node end-node}))
  ([g start-node end-node cost-attr]
   (least-cost-path g {:start-node start-node :end-node end-node :cost-attr cost-attr}))
  ([g params]
   (let [params (if (map? params) params {:start-node params})
         params (add-default-params g params)
         {:keys [start-nodes final-node? targets multi-dest? no-targets?]} params
         {:keys [may-final-edge? cost-fn node-filter edge-filter]} params
         _ (log/debug "multi-dest?:" multi-dest?
                      "no-targets?:" no-targets?
                      "targets:" targets
                      "final-node?:" final-node?)
         ;; helper to reduce overhead from calls to relation-for
         relation-for (let [cache (atom {})]
                        (fn [edge]
                          (or (@cache edge)
                              (when-let [rel (rel/from-edge g edge)]
                                ;; update cache for all edges in rel, not just requested.
                                ;; this means any others from the rel will just hit cache
                                (swap! cache conj (zipmap (::rel/edges rel) (repeat rel)))
                                rel))))
         ;; lazy version of `comp`ing the transducers and wrapping in `into`
         init-edges (eduction (filter node-filter)
                              (mapcat (partial uber/out-edges g))
                              start-nodes)

         ;; take a transducible producing edges and return a transducible
         ;; providing edge definitions suitable for use in queue
         queue-entries (fn queue-entries
                         ([prev-item edge-producer]
                          (queue-entries prev-item 0 edge-producer))
                         ([prev-item base-cost edge-producer]
                          (eduction 
                            (filter edge-filter)
                            (map #(vector % prev-item (+ base-cost (cost-fn %))))
                            edge-producer)))]
     (log/debug "Beginning pathfinding algorithm")
     (loop [;; priority queue; initial edges have no prev
            queue      (into (priority-queue) (queue-entries () init-edges))
            ;; map from edge/relation to previous in path
            backlinks  (transient {})
            ;; lowest cost to traverse *through* the given edge/relation
            costs      (transient {})
            ;; keeping track of visited nodes for termination checks
            target-nodes  targets]
       (if-let [entry (first queue)]
         (let [;; prev is the edge prior to this one in traversal
               [edge prev $start->dest] entry
               ;; node this edge points to
               dest (uber/dest edge)
               ;; won't need this entry again
               tail (disj queue entry)]
           (log/tracef "Considering edge %s; prev: %s" (e->str edge) prev)
           (cond
             ;; We don't visit an edge more than once; first time is cheapest.
             (contains? backlinks edge)
             (-> (recur tail backlinks costs target-nodes)
                 (passf :trace "already visited, skipping"))
             ;; No need to consider edges that point to unusable nodes.
             (not (node-filter dest))
             (-> (recur tail backlinks costs target-nodes)
                 (passf :trace "dest does not qualify, skipping"))

             :else
             (let [;; get the relation associated with this edge
                   {::rel/keys [type edges] :as rel} (relation-for edge)
                   ;; Update backlinks based on relation type. Note that this
                   ;; ensures that an edge is always linked to either a single
                   ;; edge or a relation id.
                   backlinks' (case type
                                ;; For one->one, add a link from this edge to the previous edge.
                                ::rel.type/one->one  (assoc! backlinks edge prev)

                                ;; For one->many, add a link from this edge to the relation, and,
                                ;; when the relation has no link, add one from the relation to
                                ;; the previous edge, as this is the first time we are traversing
                                ;; this relation, and thus it is the cheapest path to it.
                                ::rel.type/one->many (-> (assoc! backlinks edge rel)
                                                         (update! rel #(or % prev)))

                                ;; For many->one, add a link from this edge to the previous edge,
                                ;; and add a link from this relation to all of its child edges.
                                ::rel.type/many->one (assoc! backlinks edge prev, rel edges))

                   ;; Always indicate current edge cost; true no matter rel type
                   costs' (assoc! costs edge $start->dest)

                   ;; Relation traversal cost is simply cost thru child edge for one->[one,many]
                   ;; When many->one, cost is sum of cost of all inputs, or nil if not all traversed.
                   rel-cost (if (not= type ::rel.type/many->one)
                              $start->dest
                              (or (costs' rel) ; we'll take a non-nil previously calculated cost for relation
                                  ; nil-safe addition in case of missing edges
                                  (reduce #(and %1 %2 (+ %1 %2)) (map costs' edges))))

                   ;; Update this relation's cost only if missing or nil.
                   costs' (update! costs' rel #(or % rel-cost))]
               (if (nil? rel-cost) ; true when many->one rel w/missing edges
                 ;; All edges in a many->one point to the same destination; as
                 ;; such, we neither queue outbound edges nor terminate on the
                 ;; destination node until all inbound edges are traversed
                 (recur tail backlinks' costs' target-nodes)
                 (let [prev-ref (if (= type ::rel.type/many->one) rel edge)
                       edge-may-term (may-final-edge? edge)
                       backlinks'    (if edge-may-term
                                       (update! backlinks' dest #(or % prev-ref))
                                       backlinks')
                       target-nodes  (if edge-may-term
                                       (-> (disj target-nodes dest)
                                           (passf :trace "Removing %s from %s" dest target-nodes))
                                       ;(disj target-nodes dest)
                                       target-nodes)]
                   (if (and edge-may-term ; only exit when edge qualifies
                            (or (final-node? dest) ; and this is a final node
                              (and multi-dest? ; and we need a subset
                                     (empty? target-nodes)))) ; and we have it
                     (mp/multipath g
                                   (persistent! backlinks')
                                   (persistent! costs')
                                   (if multi-dest?
                                     targets
                                     #{dest}))
                     (recur (->> (uber/out-edges g dest)
                                 (queue-entries prev-ref (costs' prev-ref))
                                 (into tail))
                            backlinks'
                            costs'
                            target-nodes)))))))
         ; edge is nil; just yield what we have
         (-> (mp/multipath g (persistent! backlinks) (persistent! costs) nil)
             (passf :debug "Terminating after exhausting all edges")))))))
