(ns idm.graph.alg.kosaraju
  "Kosaraju algorithm implementation"
  (:require
    [clojure.tools.logging :as log]
    [ubergraph.core :as uber]))

(defn visit
  "Visits all nodes in a graph, producing an ordered sequence used by
  `assign` to locate strongly-connected components.

  Continuing the example from [[kosaraju]] (look at the graph there first):
  There isn't any requirement of initial node order; suppose we start with
  `(:a :b :c :d :e)`. `*` denotes start of `visit` call.
  ```
  * node: `:a`; visited: `#{}`; ordered: `()`
    - Not yet visited so recurse on children, namely `(:b :c)`.
    - Note that we mark `:a` as visited for recursion but do not yet add it to
      `ordered`.
    * node: `:b`; visited: `#{:a}`; ordered: `()`
      - Not yet visited so recurse on children, `(:c)`
      * node: `:c`; visited: `#{:a :b}`; ordered: `()`
        - Not yet visited so recurse on chilren, `(:a)`
        * node: `:a`; visited: `#{:a :b :c}`; ordered: `()`
          - Visited already! Return `ordered` unmodified: `()`
        - With recursive step complete, add `:c` to result of recursive step.
          Yields `(:c)`
      - Recursive step complete; add `:b` to result: `(:b :c)`.
    - Because `:a` has two children, feed result from `:b` into second
      recursive call.
    * node: `:c`; visited: #{:a :b :c}`; ordered: `(:b :c)`
      - Visited already! Return `ordered` unmodified: `(:b :c)`
    - With all children visited, now add `:a` to resulting list: `(:a :b :c)`
  * node: `:b`; visited: #{:a :b :c}`; ordered: `(:a :b :c)`
    - Visited; return input unmodified
  * node: `:c`; visited: #{:a :b :c}`; ordered: `(:a :b :c)`
    - Visited; return input unmodified
  * node: `:d`; visited: #{:a :b :c}`; ordered: `(:a :b :c)`
    - Not yet visited; recursive call on children `(:a)`
    * node: `:a`; visited: #{:a :b :c :d}`; ordered: `(:a :b :c)`
      - Visited already; return input unmodified
    - Recursive step complete; add `:d` to output: `(:d :a :b :c)`.
  * node: `:e`; visited: #{:a :b :c :d}`; ordered: `(:d :a :b :c)`
    - Not yet visited; recursive call on children `(:d)`
    * node: `:d`; visited: #{:a :b :c :d :e}`; ordered: `(:d :a :b :c)`
      - Visited already; return input unmodified
    - Recursive step complete; add `:e` to output: `(:e :d :a :b :c)`.
  - Original list exhausted; result is `(:a :d :a :b :c)`.
  ```
  For why this is useful, see `assign`."
  [graph]
  (letfn [(visit-recursive [ordered v]
            (if (contains? (:visited (meta ordered)) v)
              ; node is visited; skip it
              ordered
              (as-> ordered $
                ; mark this node as visited
                (vary-meta $ update :visited conj v)
                ; recurse on children of this node
                (reduce visit-recursive $ (uber/successors graph v))
                ; add this node to order immediately AFTER children
                (conj $ v))))]
    (reduce visit-recursive
            (with-meta () {:visited #{}})
            (uber/nodes graph))))

(defn visit-edges
  [graph]
  (letfn [(visit-recursive [ordered e]
            (log/trace e)
            (if (contains? (:visited (meta ordered)) e)
              ; edge is visited; skip
              ordered
              (as-> ordered $
                (vary-meta $ update :visited conj e)
                (reduce visit-recursive $ (uber/out-edges graph (uber/dest e)))
                (conj $ e))))]
    (reduce visit-recursive (with-meta () {:visited #{}}) (uber/edges graph))))


(defn assign
  "Iterates over the ordered nodes produced by `visit` to produce a
  map of pairs of the form [node root-node], where `node` belongs to the
  component rooted at `root-node`. For a graph with no cycles, all entries will
  be [node node]; i.e., every node belongs to a single-node compoment of which
  it is the root.

  The idea behind this part of the algorithm is that a node is the root of its
  own component (as encapsulated in the loop variable derived from `ordered`),
  unless we have reason to think otherwise. Specifically, if the first time we
  encounter a node is in the original `ordered` items, then it is not a
  predecessor of any node yet processed and is thus a root.

  Continuing example from `visit`; `ordered` is `(:e :d :a :b :c)`.
  We start by pairing each item in `ordered` with itself:
  `([:e :e] [:d :d] [:a :a] [:b :b] [:c :c])`
  ```
  * node: :e; root: :e; tail: ([:d :d] [:a :a] [:b :b] [:c :c]); components: {}
    - not yet in components, so we prepend predecessors to queue, add to
      components, and recur. :e has no predecessors so queue is unchanged.
  * node: :d; root: :d; tail: ([:a :a] [:b :b] [:c :c]); components: {:e :e}
    - not yet in components; add and prepend predecessors (:e) to queue paired
      with :d as their root
  * node: :e; root: :d; tail: ([:a :a] [:b :b] [:c :c]); components: {:e :e, :d :d}
    - already in components so simply recur with tail and components
  * node: :a; root: :a; tail: ([:b :b] [:c :c]); components: {:e :e, :d :d}
    - add to components and prepend predecessors (:c) to queue paired with :a
  * node: :c; root: :a; tail: ([:b :b] [:c :c]); components: {:e :e, :d :d, :a :a}
    - add to components; prepend predecessors (:b) to queue still paired with :a
  * node: :b; root: :a; tail: ([:b :b] [:c :c]); components: {:e :e, :d :d, :a :a, :c :a}
    - add to components; prepend predecessors (:a) to queue still paired with :a
  * node: :a; root: :a; tail: ([:b :b] [:c :c]); components: {:e :e, :d :d, :a :a, :c :a, :b :a}
    - already present; continue
  * node: :b; root: :b; tail: ([:c :c]); components: {:e :e, :d :d, :a :a, :c :a, :b :a}
    - already present; continue
  * node: :c; root: :c; tail: (); components: {:e :e, :d :d, :a :a, :c :a, :b :a}
    - already present; continue
  ```
  And we're done!
  ```clojure
  {:e :e
   :d :d
   :a :a
   :c :a
   :b :a}
  ```
  This map contains an entry for each node, where the value is the root node of
  the component it belongs to. As seen here, several nodes belong to a
  component rooted in `:a`, and this indicates a loop."
  [graph ordered]
  (loop [[[v root] & tail] (map #(vector % %) ordered)
         components {}]
    (if (nil? v)
      components
      (if (contains? components v)
        (recur tail components)
        (recur (into tail (map #(vector % root)) (uber/predecessors graph v))
               (assoc components v root))))))

(defn assign-edges
  [graph ordered]
  (loop [[[e root] & tail] (map #(vector % %) ordered)
         components {}]
    (if (nil? e)
      components
      (if (contains? components e)
        (recur tail components)
        (recur (into tail 
                     (map #(vector % root))
                     (uber/in-edges graph (uber/src e)))
               (assoc components e root))))))


(defn kosaraju
  "Finds all strongly-connected components in a graph. This is actually not
  ideal for the use case because we only need one at a time, and the graph will
  not be the same at each iteration.

  https://en.wikipedia.org/wiki/Kosaraju%27s_algorithm

  Consider a simple graph, consisting of 5 nodes:
    `:a :b :c :d :e`
  and 5 edges:
  ```clojure
    :a -> :b
    :b -> :c
    :c <-> :a
    :d -> :a
    :e -> :d
  ```
  This contains one loop, `:a -> :b -> :c <-> :a`. To detect it (and all 
  others), we will first order the nodes via [[visit]], then use that order to 
  produce a component map via [[assign]]. Example continued in `visit`."
  [graph]
  (->> (visit graph)
       (log/spyf :trace "Nodes in visited order: %s")
       (assign graph)))

(defn find-cycle
  "Wraps `kosaraju` by putting edges into a graph first."
  [orig-graph edges]
  (let [graph (uber/add-edges* (uber/remove-all orig-graph) edges)
        components
        (->> (visit graph)
             (assign graph)
             (log/spyf :trace "Edges post-Kosaraju: %s")
             (into {} (map (fn [kv] ; both edges
                             (mapv (partial uber/edge-with-attrs graph) kv))))
             (log/spyf :trace "Converted edges: %s")
             (reduce-kv
               (fn [components edge group]
                 (update components group (fnil conj #{}) edge))
               {})
             (log/spyf :trace "Component groups: %s")
             (into {} (remove #(-> % val count (= 1)))))]

    (uber/pprint graph)
    components))
