(ns idm.graph.alg
  "Algorithms for traversing the graph"
  (:require
    ;[clojure.data.priority-map :refer [priority-map-keyfn]]
    [clojure.tools.logging :as log]
    [idm.graph.alg.kosaraju :as kosaraju]
    [idm.log :refer [passf]]
    [ubergraph.core :as uber]
    [ubergraph.alg :as alg]
    [ubergraph.protocols :as uber.proto]))

(defprotocol Multipath
  "Functions going beyond the Ubergraph IPath protocol to deal with extended
  requirements, such as retrieving lists of relations etc."
  (relations [path] "A list of relations in this path")
  (edge-sets [path] 
    "A list of edge sets comprising a full traversal of this multipath. Even
    single edges will be wrapped in a set.")
  (path [path] "Get a vector of edges and relations in this multipath. For a
               path with only one->one edges, equivalent to [[alg/edges-in-path]]"))

; TODO: terrible idea?
(def ^:private ns-aliases-internal
  '{idm.graph               g
    idm.graph.relation      rel
    idm.graph.relation.type rel-type})

(doseq [[ns-sym ns-alias] ns-aliases-internal]
  (log/warnf "Aliasing %s to %s with some ridiculous custom thing." ns-sym ns-alias)
  (when-not (find-ns ns-sym)
    (log/warnf "Namespace %s does not exist; CREATING IT TO ALIAS" ns-sym)
    (create-ns ns-sym))
  (alias ns-alias ns-sym))

(defn- relation?
  "Tests whether or not x is a relation"
  [x]
  (and (associative? x)
       (every? #(contains? x %) [::rel/id ::rel/edges ::rel/type])))

; TODO: necessary?
(def no-goal 
  "Value indicating no actual goal"
  (with-meta #{} {:no-goal true}))

(defn e->str
  "Debug representation of an edge. Ignores attributes."
  [e]
  (if (uber/edge? e)
    (->> ((juxt uber/src uber/dest) e)
         (mapv #(cond-> (str %)
                  (keyword? %) (subs 1)))
         (apply format "%s->%s"))
    (str e)))

(defn queue-entry->str
  "Convert an entry in the priority queue into a string"
  [[edge prev-edge cost]]
  (str \[ cost "] "
       (e->str edge)
       "; prev edge: "
       (e->str prev-edge)))


(defn- compare-by
  "Apply f to x and y and compare the result"
  [f x y]
  (compare (f x) (f y)))

(defn- edge-queue
  "Create a sorted set to be used as a priority queue for entries of the form
  [edge prev-edge cost]"
  []
  ; see https://clojuredocs.org/clojure.core/sorted-set-by#example-542692d5c026201cdc327096
  ; for more on sorting functions
  (let [compare-edges (fn [e1 e2]
                        ;(let [c (compare (:total-cost e1) (:total-cost e2))]
                        (let [c (compare-by peek e1 e2)]
                          (if (not= c 0)
                            c
                            (compare-by hash e1 e2))))]
    (sorted-set-by compare-edges)))

(defn edge->vector
  "Convert an Ubergraph edge to a vector of the form [src dest attrs], using
  the graph, which is stored on a given edge as metadata (usually) to retrieve
  the attributes."
  [edge]
  [(uber/src edge)
   (uber/dest edge)
   (when-let [graph (not-empty (meta edge))]
     (uber/attrs graph edge))])

(defn pprint-multipath
  "Pretty-print a multipath"
  [multipath]
  (doall
    (map-indexed
      (fn [layer edges]
        (println "Layer" layer)
        (mapv #(println (e->str %)) edges))
      (edge-sets multipath)))
  multipath)

(defn- gen-relation-id
  "Generate a relation id for an edge based on its src, dest, and internal
  ubergraph id. Do not use for edges that have an ID; this will not produce
  that."
  [edge]
  (keyword (str (e->str edge) \# (:id edge))))

(defn- rel-id-for
  "get a relation id for a given edge"
  [graph edge]
  (or (uber/attr graph edge :idm.graph.relation/id)
      (and (= :idm.graph.relation.type/one->one 
              ; TODO: need a centralized way of doing this defaulting
              (or (uber/attr graph edge :idm.graph.relation/type)
                  :idm.graph.relation.type/one->one))
           (keyword (str (e->str edge) \# (:id edge))))
      (throw 
        (ex-info
          (format "Edge %s has no id but is not one->one" (e->str edge))
          {:idm.graph.relation/type (uber/attr graph edge :idm.graph.relation/type)}))))

(defn relation-for
  "Get the relation an edge belongs to. Relation will include type, id, and
  member edges.

  When `graph` is not provided, assumes you know this is a one->one edge and
  completes as such."
  ([edge]
   #:idm.graph.relation{:type   :idm.graph.relation.type/one->one
                        :id     (gen-relation-id edge)
                        :edges  #{edge}})
  ([graph edge]
   (let [{:idm.graph.relation/keys [type id] :as info}
         (-> (uber/attrs graph edge)
             (select-keys [:idm.graph.relation/type :idm.graph.relation/id])
             (update :idm.graph.relation/type
                     #(if (some? %) % :idm.graph.relation.type/one->one)))]
     (log/trace info)
     (log/trace (uber/attrs graph edge))
     (assert (or (some? id)
                 (= type :idm.graph.relation.type/one->one))
             (str "Edges must be either one->one or declare a relation ID"
                  \space info))
     (case type
       :idm.graph.relation.type/one->one
       (if id
         ;; already have id; just make sure edges are specified
         (assoc info :idm.graph.relation/edges #{edge})
         ;; otherwise just use 1-arity of relation-for
         (relation-for edge))
       
       :idm.graph.relation.type/many->one
       (assoc info :idm.graph.relation/edges
              (->> (uber/dest edge)
                   (uber/in-edges graph)
                   (into #{} (filter #(= id (uber/attr graph % :idm.graph.relation/id))))
                   (log/spyf :trace
                             (str "Sibling edges for " (e->str edge) ": %s"))))

       :idm.graph.relation.type/one->many
       (assoc info :idm.graph.relation/edges
              (->> (uber/src edge)
                   (uber/out-edges graph)
                   (into #{} (filter #(= id (uber/attr graph % :idm.graph.relation/id))))
                   (log/spyf :trace
                             (str "Sibling edges for " (e->str edge) ": %s"))))))))

(defn find-multipath
  "Given the final *relation id* in a path, return a sequence of relation ids
  or sets of relation ids from the initial node(s) to the final relation."
  [edge-or-rel backlinks]
  (log/debugf "Beginning reverse pathfinding for %s" edge-or-rel)
  (log/trace "Backlinks map:" backlinks)
  (loop [queue (conj clojure.lang.PersistentQueue/EMPTY edge-or-rel)
         path ()
         ; one->many edges to be tracked
         one->many-edge-sets {}]
    (if-let [curr (peek queue)]
      (let [tail (pop queue)
            prev (backlinks curr)]
        (log/tracef "Current: %s; prev: %s" curr prev)
        (cond
          ;; this path has terminated; just drop and continue
          (= curr ()) 
          (recur tail path one->many-edge-sets)

          ;; When prev is a one->many relation, then curr is a child edge and should be cached
          ;; to later ensure we know where to put it in the sequence of transitions.
          (= ::rel-type/one->many (::rel/type prev))
          (recur (conj tail prev)
                 ; insert the relation into the path, to be removed later (first instance used)
                 (cons prev path)
                 ; note that this edge was traversed for this one->many relation
                 (update one->many-edge-sets prev (fnil conj #{}) curr))

          ;; The only time a set is the previous in the backlinks is when
          ;; curr is a many->one relation. In that case, we queue all of its
          ;; edges to have their backlinks considered, and add the set to the
          ;; path as a single step.
          (set? prev)
          (recur (into tail (map backlinks) prev)
                 (cons curr path)
                 one->many-edge-sets)

          :else
          (recur (conj tail prev) (cons curr path) one->many-edge-sets)))
      ;; At this point, one->many relations may be sprinkled throughout the
      ;; path, and none of their child edges are in the path. We want to find
      ;; the first instance of such a relation and make note of its child edges
      ;; that were actually traversed in this path.
      (into []
            (comp (distinct) ; this ensures we only keep the first one
                  ; if this is a one->many rel, add the collected child edges
                  (map (fn [maybe-rel]
                         ; only one->many rels are in this map
                         (if-some [edges (one->many-edge-sets maybe-rel)]
                           ; so we know here that this is such a rel
                           (assoc maybe-rel ::rel/traversed-edges edges)
                           ; not a applicable in this case
                           maybe-rel))))
            path))))

(defn multipath
  "Create an object satisfying Ubergraph's IPath as well as IMultipath to
  represent a multipath."
  [backlinks costs last-edge last-rel]
  (let [;; When a many->one relation was last, we must start with its id.
        ;; For others, we should use the last edge.
        last-id (if (= ::rel-type/many->one (::rel/type last-rel))
                  last-rel
                  last-edge)
        path (delay (find-multipath last-id backlinks))]
        ;edge-set-path (delay (find-multipath last-id backlinks))]
    (reify
      Multipath
      (relations [this]
        ; wrap any single edges in a relation
        (into [] (map #(if (uber/edge? %) (relation-for %) %)) @path))
      (edge-sets [this]
        ; wrap single edges in a set and get the edges from relations
        (into [] 
              (map #(if (uber/edge? %) 
                         #{%} 
                         (or (::rel/traversed-edges %) ; one->many
                             (::rel/edges %))) ; many->one
                         @path)))
      (path [this] @path)
      uber.proto/IPath
      (edges-in-path [this]
        (into [] cat (edge-sets this)))
      (nodes-in-path [this]
        (let [edges (edge-sets this)
              ; distinct at the level of the step, not overall
              srcs  (into [] (mapcat (partial into #{} (map uber/src))) edges)]
          (->> (last edges)
               (into #{} (map uber/dest))
               (concat srcs)
               (into []))))
      (cost-of-path [_] (costs last-id))
      (end-of-path  [_] last-id) ; TODO
      (start-of-path [this]
        (log/warn "start-of-path may not be accurate for multipaths")
        (first (alg/nodes-in-path this)))
      (last-edge-of-path [this]
        (log/warn "last-edge-of-path may not be accurate for multipaths")
        (last (alg/edges-in-path this))))))

(defn all-multipaths
  "Prepare an ubergraph-compatible representation of all possible multipaths
  based on a priority map of visited edges and a map of edge->siblings"
  [backlinks costs]
  (let [edges     (into #{} (filter uber/edge?) (keys backlinks))
        all-dest  (into #{} (map uber/dest) edges)
        cheapest  (reduce
                    (fn [c edge]
                      (let [dest (uber/dest edge)]
                        (if (not (contains? c dest))
                          (assoc c dest edge)
                          (if (> (costs (c dest)) (costs edge))
                            (assoc c dest edge)
                            c))))
                    {}
                    edges)]
    (reify uber.proto/IAllPathsFromSource
      (path-to [this dest]
        (log/debug "Getting path to" dest)
        (if-let [edge (cheapest dest)]
          (multipath backlinks costs edge nil)
          (log/warn "No edge found for" dest)))
      (all-destinations [_] all-dest))))

(defn- update!
  "`update` for a transient collection, since one is not provided."
  ([m k f]
   (assoc! m k (f (get m k)))))

(defn least-cost-path
  "Finds the cheapest path satisfying the given constraints. Respects mult-edge
  relations (idm.graph.relation/[one->one, many->one, one->many])

  Arguments:
  `g`: graph
  `starting-nodes`: set of initial, known nodes
  `goal?`: function returning truthy value if a node is the target
  `may-final-edge?`: function indicating whether or not a given edge may be final
  `cost-fn`: function providing cost of a given edge
  `node-filter`: function returning true when a given node is allowed
  `edge-filter`: function returning true when a given edge is allowed"
  ; TODO: maybe make edge-filter accept more context and then dispose of may-final-edge?
  [g starting-nodes goal? may-final-edge? cost-fn node-filter edge-filter]
  {:pre [#(every? (partial uber/has-node?) starting-nodes)]}
  (log/tracef "Starting nodes: %s; goal? %s; may-final-edge?: %s; cost-fn: %s; node-filter: %s; edge-filter: %s"
              starting-nodes goal? may-final-edge? cost-fn node-filter edge-filter)
  (let [;; helper to reduce overhead from calls to relation-for
        relation-for (let [cache (atom {})]
                       (fn [edge]
                         (or (@cache edge)
                             (when-let [rel (relation-for g edge)]
                               ;; update cache for all edges in rel, not just requested.
                               ;; this means any others from the rel will just hit cache
                               (swap! cache conj (zipmap (::rel/edges rel) (repeat rel)))
                               rel))))
        ;; lazy version of `comp`ing the transducers and wrapping in `into`
        init-edges (eduction (filter node-filter)
                             (mapcat (partial uber/out-edges g))
                             starting-nodes)

        ;; take a transducible producing edges and return a transducible
        ;; providing edge definitions suitable for use in queue
        queue-entries (fn queue-entries
                        ([prev-item edge-producer]
                         (queue-entries prev-item 0 edge-producer))
                        ([prev-item base-cost edge-producer]
                         (eduction 
                           (filter edge-filter)
                           (map #(vector % prev-item (+ base-cost (cost-fn %))))
                           edge-producer)))]
    (log/debug "Beginning pathfinding algorithm")
    (loop [;; priority queue; initial edges have no prev
           queue      (into (edge-queue) (queue-entries () init-edges))
           ;; map from edge/relation to previous in path
           backlinks  (transient {})
           ;; lowest cost to traverse *through* the given edge/relation
           costs      (transient {})]
      (if-let [entry (first queue)]
        (let [;; prev is the edge prior to this one in traversal
              [edge prev $start->dest] entry
              ;; node this edge points to
              dest (uber/dest edge)
              ;; won't need this entry again
              tail (disj queue entry)]
          (log/tracef "Considering edge %s; prev: %s" (e->str edge) prev)
          (cond
            ;; We don't visit an edge more than once; first time is cheapest.
            (contains? backlinks edge)
            (-> (recur tail backlinks costs)
                (passf :trace "already visited, skipping"))
            ;; No need to consider edges that point to unusable nodes.
            (not (node-filter dest))
            (-> (recur tail backlinks costs)
                (passf :trace "dest does not qualify, skipping"))

            :else
            (let [;; get the relation associated with this edge
                  {::rel/keys [type edges] :as rel} (relation-for edge)
                  ;; Update backlinks based on relation type. Note that this
                  ;; ensures that an edge is always linked to either a single
                  ;; edge or a relation id.
                  backlinks' (case type
                               ;; For one->one, add a link from this edge to the previous edge.
                               ::rel-type/one->one  (assoc! backlinks edge prev)

                               ;; For one->many, add a link from this edge to the relation, and,
                               ;; when the relation has no link, add one from the relation to
                               ;; the previous edge, as this is the first time we are traversing
                               ;; this relation, and thus it is the cheapest path to it.
                               ::rel-type/one->many (-> (assoc! backlinks edge rel)
                                                        (update! rel #(or % prev)))

                               ;; For many->one, add a link from this edge to the previous edge,
                               ;; and add a link from this relation to all of its child edges.
                               ::rel-type/many->one (assoc! backlinks edge prev, rel edges))

                  ;; Always indicate current edge cost; true no matter rel type
                  costs' (assoc! costs edge $start->dest)

                  ;; Relation traversal cost is simply cost thru child edge for one->[one,many]
                  ;; When many->one, cost is sum of cost of all inputs, or nil if not all traversed.
                  rel-cost (if (not= type ::rel-type/many->one)
                             $start->dest
                             (or (costs' rel) ; we'll take a non-nil previously calculated cost for relation
                                 ; nil-safe addition in case of missing edges
                                 (reduce #(and %1 %2 (+ %1 %2)) (map costs' edges))))

                  ;; Update this relation's cost only if missing or nil.
                  costs' (update! costs' rel #(or % rel-cost))

                  ;; Edges out of many->one rels point back to the rel; otherwise 
                  ;; the previous to an edge in the queue is another edge.
                  prev-ref (if (= type ::rel-type/many->one) rel edge)]
              (if (nil? rel-cost) ; only true when many->one with untraversed inputs
                ;; All edges in a many->one point to the same destination; as such,
                ;; we don't queue outbound edges until all inbound edges are traversed.
                (recur tail backlinks' costs')

                ;; Either a complete many->one or any form of one->one/one->many
                (if (and (goal? dest) (may-final-edge? edge))
                  ;; Qualified for early termination
                  {:backlinks (persistent! backlinks')
                   :costs     (persistent! costs')
                   :last-edge edge
                   :last-rel  rel}

                  ;; Must queue additional edges
                  (recur (->> (uber/out-edges g dest) ;; edges outbound from this one
                              (queue-entries prev-ref (costs' prev-ref)) ;; filter and make queue entries
                              (into tail)) ;; put into queue
                         backlinks' costs'))))))
        ; edge is nil
        (if (identical? no-goal goal?)
          {:backlinks (persistent! backlinks)
           :costs     (persistent! costs)}
          (log/spyf :warn "Uh oh: %s" backlinks))))))

(defn least-cost-path-wrapper
  "Wrapper function for least-cost-path and similar to work with and
  standardize the many options available."
  ([g opts]
   ; for some reason if I use map destructuring for more than 8 keys it breaks
   ; so I have to do it in chunks
   (let [{:keys [start-node start-nodes]
          :or {start-nodes  [start-node]}} opts
         ; start-nodes usable

         {:keys [end-node? end-node end-nodes]} opts
         goal? (or end-node?
                   (and end-nodes (into #{} end-nodes))
                   (and end-node #{end-node})
                   no-goal)
         ; goal? usable

         {:keys [may-final-edge?]
          :or {may-final-edge? (constantly true)}} opts
         ; may-final-edge? usable

         {:keys [cost-attr cost-fn]
          :or {cost-fn (if (some? cost-attr)
                         #(uber/attr g % cost-attr)
                         (constantly 1))}} opts
         ; cost-fn usable

         {:keys [node-filter edge-filter]
          :or {node-filter      (constantly true)
               edge-filter      (constantly true)}} opts
         ; node-filter & edge-filter usable

         {:keys [make-path?]
          :or {make-path? true}} opts
         res (least-cost-path
               g start-nodes goal? may-final-edge? cost-fn node-filter edge-filter)
         {:keys [backlinks costs last-edge last-rel]} res]
     (if make-path?
       (if (or last-edge last-rel)
         (multipath backlinks costs last-edge last-rel)
         (all-multipaths backlinks costs))
       res)))
  ([g start-node end-node]
   (least-cost-path-wrapper g {:start-node start-node :end-node end-node}))
  ([g start-node end-node cost-attr]
   (least-cost-path-wrapper g {:start-node start-node :end-node end-node :cost-attr cost-attr})))

(defn arborescence
  ; TODO: broken at the moment
  "Calculate an arborescence in `D` rooted at `r` with cost fn `w`.
  Minimum spanning tree algorithm for a directed graph.
  See https://en.wikipedia.org/wiki/Edmonds%27_algorithm

  `w` should be a function of the edge attributes map"
  [G r w]
  (let [w' (fn [[_src _dest attrs]]
             (if-some [cost (::cost attrs)]
               ; return previously specified value
               cost
               ; otherwise just call w on edge
               (w attrs)))
        ; set of edges that are cheapest for each node
        P (into #{}
                (map #(->> (uber/in-edges G %)
                           (mapv (fn [e-obj]
                                   (conj (uber/edge-with-attrs G e-obj) e-obj)))
                           (into (sorted-set-by (fn [e1 e2] (compare (w' e1) (w' e2)))))
                           first))
                (disj (set (uber/nodes G)) r))]
        ;P (into #{}
        ;        ; we don't care about losing duplicates; tie-breaking is arbitrary
        ;        ; though maybe this should just be a reduce; TODO
        ;        (map #(->> % (uber/in-edges G) (into (sorted-set-by w')) first))
        ;        (disj (set (uber/nodes G)) r))]
    (if-some [C_edges (val (first (kosaraju/find-cycle G P)))]
      (let [_ (log/trace "Edges:" C_edges)
            C (into #{} (map uber/src) C_edges)
            _ (log/trace "Nodes in cycle:" C)
            P-by-dest (into {} (map (juxt uber/dest identity)) P)
            ;Cn (into #{} (map uber/src) C)
            _ (log/trace "P by dest:" P-by-dest)
            vC (keyword "idm.graph.alg" (str (gensym "v#")))
            ; TODO: yeah this definitely doesn't work. Kills all inbound and outbound edges as well.
            G' (-> G
                   (uber/remove-edges* C_edges)
                   (uber/add-nodes vC))
            ; TODO: maybe ::prev needs to be generated to avoid somehow overlapping?
            G' (reduce
                 (fn [G' e]
                   (let [u (uber/src e)
                         v (uber/dest e)]
                     (cond
                       ; edge coming into cycle
                       (contains? C v) 
                       ; add an edge to E' whose cost is original cost minus
                       ; cost of cheapest edge to same destination
                       (-> G'
                           (uber/remove-edges e)
                           (uber/add-edges [u vC (assoc (uber/attrs G' e)
                                                        ::cost (- (w' e) (w' (P-by-dest v)))
                                                        ::prev e)]))
                       ; edge going away from cycle
                       (contains? C u)
                       ; add an edge to E' whose cost is same as before
                       (-> G'
                           (uber/remove-edges e)
                           (uber/add-edges [vC v (assoc (uber/attrs G' e)
                                                        ::cost (w' e)
                                                        ::prev e)]))

                       ; edge not related to cycle; unmodified
                       :else G')))
                 G' 
                 (mapv (partial uber/edge-with-attrs G')
                       (uber/edges G')))
            _ (do (println "G'")
                  (uber/pprint G'))
            ; recur with original cost function
            A' (arborescence G' r w)
            A'-by-dest (into {} (map (juxt uber/dest identity)) (uber/edges A'))
            ; this edge corresponds with some edge u->v in E with v in C
            u->vC (A'-by-dest vC)
            ; edge was present in G' so we can retrieve corresponding original edge
            u->v (uber/attr G' u->vC ::prev)
            C_edges' (disj C_edges (P-by-dest (uber/dest u->v)))
            G'' (-> G ; original graph
                    ; remove all edges
                    (uber/remove-edges* (uber/edges G))
                    (uber/add-edges* C_edges'))]
        (reduce
          (fn [G'' e]
            (uber/add-edges G'' (uber/attr G' e ::prev)))
          G''
          (uber/edges A')))
      P)))
               
(defn msp-directed
  [graph root cost-fn]
  (arborescence [(set (uber/nodes graph))
                 (set (uber/edges graph))]
                root
                cost-fn))

