;;----------------------------------------------------------------------------
;; Copyright 2011 Factual, Inc.
;; All Rights Reserved.
;;
;; This is UNPUBLISHED PROPRIETARY SOURCE CODE of Factual, Inc.
;; Factual, Inc. reserves all rights in the source code as
;; delivered. The source code and all contents of this file, may not be
;; used, copied, modified, distributed, sold, disclosed to third parties
;; or duplicated in any form, in whole or in part, for any purpose,
;; without the prior written permission of Factual, Inc.
;;----------------------------------------------------------------------------

(ns leafgrabber.xpath
  (:require [clojure.string :as str]
            [clojure.contrib.singleton :as sing])
  (:import [javax.xml.xpath XPathConstants XPathExpression]
           [javax.xml.transform TransformerFactory dom.DOMSource stream.StreamResult]
           [java.io StringWriter]
           [org.w3c.dom Node]))


;; http://download.oracle.com/javase/7/docs/api/org/w3c/dom/Node.html
;; http://download.oracle.com/javase/7/docs/api/javax/xml/xpath/XPath.html


;;-------------------------------------------------------------
;; adapted from:
;; https://github.com/kyleburton/clj-xpath/blob/master/src/org/clojars/kyleburton/clj_xpath.clj

(def xpath-factory (org.apache.xpath.jaxp.XPathFactoryImpl.))
(def xpath-compiler (.newXPath xpath-factory))

(defn node-list->seq [node-list]
  (loop [length (.getLength node-list)
         idx    0
         res    []]
    (if (>= idx length)
      (reverse res)
      (recur length
             (inc idx)
             (cons (.item node-list idx) res)))))

(defn throwf [& args]
  (throw (RuntimeException. (apply format args))))

(defmulti xp-compile class)

(defmethod xp-compile String          [xp] (.compile xpath-compiler xp))
(defmethod xp-compile XPathExpression [xp] xp)
(defmethod xp-compile :default        [xp]
  (throwf "xpath: can't compile xpath expr '%s'\nof type: %s" xp (class xp)))

(defn xpath [xp node]
  (node-list->seq
   (.evaluate (xp-compile xp) node XPathConstants/NODESET)))

;;-------------------------------------------------------------


;; functions that vaguely emulate Nokogiri, e.g.
;; http://nokogiri.org/Nokogiri/XML/Node.html

(defn css
  "Provides a very limited emulation of CSS selectors via Xpath expressions. The emulation is currently
   limited to class selectors of the form 'dot followed by class name'. This works even when the class
   attribute value is a list of tokens."
  [cs node]
  (let [class-name (second
                    (re-find #"^\.([^.\s]*)$" cs))]
    (xpath (str ".//*[contains(concat(' ', normalize-space(@class), ' '), ' " class-name " ')]")
           node)))

(defn at [xp node]
  (first (xpath xp node)))

(defn at-css [cs node]
  (first (css cs node)))

(defn content [node]
  (.getTextContent node))

(defn setContent [node s]
  (.setTextContent node s))

(defn node-name [node]
  (.getNodeName node))

(defn node-value [node]
  (.getNodeValue node))

(defn node-type [node]
  (.getNodeType node))

(defn text? [node]
  (= Node/TEXT_NODE (node-type node)))

(defn attr [attr-name node]
  (if-let [attr-node (.getNamedItem (.getAttributes node)
                                    attr-name)]
    (node-value attr-node)))

(defn children [node]
  (node-list->seq (.getChildNodes node)))

(def xml-transformer
  (sing/per-thread-singleton
   #(.. (TransformerFactory/newInstance)
        (newTransformer))))

(defn remove-xml-decl [xml]
  (let [m (re-find #"\s*(?i:<\?xml.*\?>)?(?s)(.*)"
                   xml)]
    (m 1)))

(defn to-html [node]
  (let [source (DOMSource. node)
        out (StringWriter.)
        result (StreamResult. out)]
    (.transform (xml-transformer) source result)
    (remove-xml-decl (str out))))

(defn inner-html
  [node]
  (str/join
   (map to-html (children node))))

(defn unlink [node]
  (.removeChild (.getParentNode node) node))

(defn before?
  "true iff node1 is before node2"
  [node1 node2]
  (= (.compareDocumentPosition node1 node2)
     Node/DOCUMENT_POSITION_PRECEDING))

(defn after?
  "true iff node1 is after node2"
  [node1 node2]
  (= (.compareDocumentPosition node1 node2)
     Node/DOCUMENT_POSITION_FOLLOWING))
