(ns de.uni-koblenz.funtg.xmltg
  (:use de.uni-koblenz.funtg.core)
  (:require [clojure.string :as str])
  (:import
   (org.xml.sax Attributes SAXException)
   (org.xml.sax.helpers DefaultHandler)
   (javax.xml XMLConstants)
   (javax.xml.parsers SAXParser SAXParserFactory)
   (javax.xml.validation SchemaFactory)
   (de.uni_koblenz.jgralab Graph Vertex Edge)
   (de.uni_koblenz.jgralab.schema Schema)))

(def ^:dynamic *graph*)
(def ^:dynamic *stack*)
(def ^:dynamic *current*)
(def ^:dynamic *state*)   ;; :element :chars :between
(def ^:dynamic *sb*)
(def ^:dynamic *attr-type-fn*)
(def ^:dynamic *id2elem*) ;; map from ID to Element vertex
(def ^:dynamic *attr2refd-ids*)  ;; map from Attribute vertex to a collection
                                 ;; of referenced element IDs (an attr can
                                 ;; reference multiple elements in terms of a
                                 ;; IDREFS attr type)

(defn- handle-attributes
  [elem ^Attributes attrs]
  (if (== 0 (.getLength attrs))
    nil
    (loop [i 0, l (.getLength attrs), as []]
      (if (== i l)
        (loop [a as]
          (when (seq a)
            (let [[n v t] (first a)
                  t (if (= t "CDATA")
                      (*attr-type-fn* (value elem :name) n)
                      t)
                  av (create-vertex! *graph* 'Attribute)]
              (set-value! av :name n)
              (set-value! av :value v)
              (create-edge! 'HasAttribute elem av)
              (cond
               (= t "ID")     (set! *id2elem* (assoc *id2elem* v elem))
               (= t "IDREF")  (set! *attr2refd-ids*
                                    (update-in *attr2refd-ids* [av]
                                               #(conj % v)))
               (= t "IDREFS") (set! *attr2refd-ids*
                                   (update-in *attr2refd-ids* [av]
                                              #(set (concat % (str/split v #" ")))))))
            (recur (rest a))))
        (recur (inc i)
               l
               (conj as [(.getLocalName attrs i)
                         (.getValue attrs i)
                         (.getType attrs i)]))))))
(defn- resolve-refs
  "Create References edges for ID/IDREF[S]s collected while parsing."
  []
  (loop [a2rs *attr2refd-ids*]
    (when (seq a2rs)
      (let [[attr refs] (first a2rs)]
        (loop [ref refs]
          (when (seq ref)
            (create-edge! 'References attr
                          (or (*id2elem* (first ref))
                              (throw (RuntimeException.
                                      (str "No element for id " (first ref))))))
            (recur (rest ref)))))
      (recur (rest a2rs)))))

(defn- content-handler
  []
  (letfn [(push-text []
            (when (and (= *state* :chars)
                       (some (complement #(Character/isWhitespace (char %)))
                             (str *sb*)))
              (let [t (create-vertex! *graph* 'Text)]
                (set-value! t :content (str/trim (str *sb*)))
                (create-edge! 'HasText *current* t))))]
    (proxy [DefaultHandler] []
      ;; ContentHandler
      (startElement [uri local-name qname ^Attributes attrs]
        (let [type (if (seq *stack*) 'Element 'RootElement)
              e (create-vertex! *graph* type)]
          (set-value! e :name qname)
          (handle-attributes e attrs)
          (when *current*
            (push-text)
            (create-edge! 'HasChild *current* e))
          (set! *stack* (conj *stack* *current*))
          (set! *current* e)
          (set! *state* :element))
        nil)
      (endElement [uri local-name qname]
        (push-text)
        (set! *current* (peek *stack*))
        (set! *stack* (pop *stack*))
        (set! *state* :between)
        nil)
      (characters [^chars ch start length]
        (when-not (= *state* :chars)
          (set! *sb* (new StringBuilder)))
        (let [^StringBuilder sb *sb*]
          (.append sb ch (int start) (int length))
          (set! *state* :chars))
        nil)
      (setDocumentLocator [locator])
      (startDocument [])
      (endDocument []
        (resolve-refs))
      (startPrefixMapping [prefix uri])
      (endPrefixMapping [prefix])
      (ignorableWhitespace [ch start length])
      (processingInstruction [target data])
      (skippedEntity [name])
      ;; ErrorHandler
      (error [^org.xml.sax.SAXParseException ex]
        (println "ERROR:" (.getMessage ex)))
      (fatalError [^org.xml.sax.SAXParseException ex]
        (println "FATAL ERROR:" (.getMessage ex)))
      (warning [^org.xml.sax.SAXParseException ex]
        (println "WARNING:" (.getMessage ex))))))

(defn- startparse-sax
  [^String uri ^DefaultHandler ch]
  (let [pfactory (SAXParserFactory/newInstance)]
    (.setNamespaceAware pfactory true)
    (-> pfactory
        .newSAXParser
        (.parse uri ch))))

(defn xml2graph
  "Parse the XML file f into a TGraph conforming the generic XML schema.
  IDREF resolving, which is needed for creating References edges, works
  automatically only for XML files containing a DTD describing them.  If you
  want IDREFs resolved anyway, you have to provide a attr-type-fn that takes 2
  arguments, an element's qname and an attribute name, and then returns that
  attribute's type as string: ID, IDREF, IDREFS, or nil (meaning CDATA)."
  ([f]
     (xml2graph f false))
  ([f attr-type-fn]
     (binding [*graph* (create-graph (load-schema "schemas/xml-schema.tg") f)
               *stack*   nil
               *current* nil
               *state*   :between
               *sb*      nil
               *id2elem* {}
               *attr2refd-ids* {}
               *attr-type-fn* (or attr-type-fn
                                  (constantly nil))]
       (startparse-sax f (content-handler))
       *graph*)))
