(ns idm.graph.multipath
  "Multipath record and protocol, with functions to produce a multipath from
  a backlinks map."
  (:require
    [clojure.tools.logging :as log]
    [idm.graph.relation :as rel]
    [idm.graph.relation.type :as rel.type]
    [idm.graph.util :refer [e->str]]
    [idm.log :refer [passf]]
    [ubergraph.core :as uber]
    [ubergraph.protocols :as uber.proto])
  (:import [clojure.lang PersistentQueue]))
    
(defprotocol IMultipath
  "Functions going beyond the Ubergraph IPath protocol to deal with extended
  requirements, such as retrieving lists of relations etc."
  (end-nodes [path] "A set of the nodes this path goes to.")
  (relations [path] "A list of relations in this path")
  (edge-sets [path] 
    "A list of edge sets comprising a full traversal of this multipath. Even
    single edges will be wrapped in a set.")
  (path [path] 
    "Get a vector of edges and relations in this multipath. For a
    path with only one->one edges, equivalent to [[alg/edges-in-path]]")
  (multipath-to [path dests]
    "Get a multipath to `dests`, using the same backlinks and costs as this
    one. If `dests` is not sequential, assumes single node and wraps in a
    vector."))

(defn find-multipath
  "Given the final desintations of a path, returns the sequence of edges or
  relations from the initial node(s) to the final targets."
  [backlinks targets]
  (log/debug "Beginning reverse pathfinding for" targets)
  (log/trace "Backlinks map:" backlinks)
  (loop [queue (into PersistentQueue/EMPTY (map backlinks) targets)
         path ()
         ; one->many edges to be tracked
         one->many-edge-sets {}]
    (if-let [curr (peek queue)]
      (let [tail (pop queue)
            prev (backlinks curr)]
        (log/tracef "Current: %s; prev: %s" curr prev)
        (cond
          ;; this path has terminated; just drop and continue
          (= curr ()) 
          (recur tail path one->many-edge-sets)

          ;; When prev is a one->many relation, then curr is a child edge and should be cached
          ;; to later ensure we know where to put it in the sequence of transitions.
          (= ::rel.type/one->many (::rel/type prev))
          (recur (conj tail prev)
                 ; insert the relation into the path, to be removed later (first instance used)
                 (cons prev path)
                 ; note that this edge was traversed for this one->many relation
                 (update one->many-edge-sets prev (fnil conj #{}) curr))

          ;; The only time a set is the previous in the backlinks is when
          ;; curr is a many->one relation. In that case, we queue all of its
          ;; edges to have their backlinks considered, and add the set to the
          ;; path as a single step.
          (set? prev)
          (recur (into tail (map backlinks) prev)
                 (cons curr path)
                 one->many-edge-sets)

          :else
          (recur (conj tail prev) (cons curr path) one->many-edge-sets)))
      ;; At this point, one->many relations may be sprinkled throughout the
      ;; path, and none of their child edges are in the path. We want to find
      ;; the first instance of such a relation and make note of its child edges
      ;; that were actually traversed in this path.
      (into []
            (comp (distinct) ; this ensures we only keep the first one
                  ; if this is a one->many rel, add the collected child edges
                  (map (fn [maybe-rel]
                         ; only one->many rels are in this map
                         (if-some [edges (one->many-edge-sets maybe-rel)]
                           ; so we know here that this is such a rel
                           (assoc maybe-rel ::rel/traversed-edges edges)
                           ; not a applicable in this case
                           maybe-rel))))
            path))))

(defrecord Multipath [graph backlinks costs targets cache]
  IMultipath
  (end-nodes [_] targets)
  (path [this]
    (if-some [cached (@cache targets)]
      (passf cached :debug "Got cached path to %s" targets)
      (let [p (find-multipath backlinks targets)]
        (log/debugf "Calculated path to %s" targets)
        (swap! cache assoc targets p)
        p)))
  (relations [this]
    ; wrap any single edges in a relation
    (->> (path this)
         (eduction (map #(if (uber/edge? %) (rel/from-edge graph %) %)))
         (into [])))
  (edge-sets [this]
    ; wrap single edges in a set and get the edges from relations
    (into []
          (map #(if (uber/edge? %)
                  #{%}
                  (or (::rel/traversed-edges %) ; one->many
                      (::rel/edges %)))) ; many->one
          (path this)))
  (multipath-to [this dests]
    (log/debug "Getting path to" dests)
    (let [dests (if (sequential? dests) dests (list dests))]
      (if (every? (uber.proto/all-destinations this) dests)
        ;; using a shared cache means that, if the new multipath is then asked
        ;; to produce the original path again, it's free.
        (->Multipath graph backlinks costs dests cache)
        (log/warn "Unable to locate backlinks for every dest in" dests))))

  uber.proto/IPath
  (edges-in-path [this]
    (into [] cat (edge-sets this)))
  (nodes-in-path [this]
    (let [edges (edge-sets this)
          ; distinct at the level of the step, not overall
          srcs  (into [] (mapcat (partial into #{} (map uber/src))) edges)]
      (->> (last edges)
           (into #{} (map uber/dest))
           (concat srcs)
           (into []))))
  ;; may produce unusual results compared to typical uber.alg/end-of-path
  (end-of-path [_] targets)
  (cost-of-path [_] (reduce + (eduction (map backlinks) (map costs) targets)))

  uber.proto/IAllPathsFromSource
  (all-destinations [_]
    (into #{} 
          (comp (filter uber/edge?)
                (map uber/dest))
          (keys backlinks)))
  (path-to [this dest] (multipath-to this dest)))

(defn multipath
  "Construct a multipath from a graph, backlinks map, costs map, and optional
  set of target nodes. If nodes are provided, they should be explicitly
  traversed by the given backlinks."
  ([g backlinks costs] (multipath g backlinks costs nil))
  ([g backlinks costs targets]
   (->Multipath g backlinks costs targets (atom {}))))

(defn valid?
  "Validate that the sequence of steps indicated by a multipath is actually
  traversable"
  ;; TODO: basically throwing away information. May not really be that useful.
  [start-nodes mp]
  ; get edge sets without unwrapping
  (let [edge-sets (edge-sets mp)
        res
        (reduce
          (fn [seen-nodes next-edges]
            (if-let [missing (first (remove #(contains? seen-nodes %)
                                            (map uber/src next-edges)))]
              (reduced missing)
              (into seen-nodes (map uber/dest) next-edges)))
          (set start-nodes)
          edge-sets)]
    (set? res)))

(defn pprint
  "Pretty-print a multipath"
  [mp]
  (doall
    (map-indexed
      (fn [layer edges]
        (println "Layer" layer)
        (mapv #(println (e->str %)) edges))
      (edge-sets mp))))
