(ns speccy.mzml
  (:require [clojure.java.io :as io]
            [clojure.data.xml :refer [parse]]
            [clojure.zip :refer [node xml-zip]]
            [clojure.data.zip.xml :refer
             [xml-> attr xml1-> text attr]]
            [clj-biosequence.core :refer [index-biosequence-file]]
            [clj-biosequence.indexing :as ind]
            [clojure.string :refer [blank?]]
            [speccy.core :as pr]))

;;;;;;;;;;;;;;;;;;
;; ref
;;;;;;;;;;;;;;;;;;

(defrecord mzmlReference [ref type])

(defn- get-reference
  [this tag param-tag]
  (if-let [r (tag (:info this))]
    (->mzmlReference r param-tag)))

;;;;;;;;;;;;;;;;;;
;; cv params
;;;;;;;;;;;;;;;;;;

(defprotocol mzmlInfo
  (mzml-info [this])
  (external-spectrum-ref [this]
    "Returns the id of a spectrum from an external file.")
  (source-file-ref [this]
    "Returns a mzmlReference to a source file.")
  (spectrum-ref [this]
    "Returns a mzmlReference to a spectrum.")
  (instrument-config-ref [this]
    "Returns a mzmlReference to an instrument config.")
  (data-processing-ref [this]
    "Returns a mzmlReference to a data processing record.")
  (software-ref [this]
    "Returns a mzmlReference to a software record.")
  (scan-settings-ref [this]
    "Returns a mzmlReference to a scan settings record.")
  (get-cv-param [this accession]))

(defn- cv-params
  ([loc] (cv-params loc false))
  ([loc params]
     (if loc
       (let [c (concat (map :attrs (xml-> loc :cvParam node))
                       (if params
                         (->> (xml-> loc :referenceableParamGroupRef
                                        (attr :ref))
                              (mapcat #(:content
                                        (get (:ref-params params) %)))
                              (map :attrs))))]
         (into {} (map #(vector (if (:accession %)
                                  (:accession %)
                                  (:unitAccession %))
                                %)
                       c))))))

(def mzml-info-default
  {:mzml-info
   (fn [this]
     (:info this))
   :external-spectrum-ref
   (fn [this]
     (:externalSpectrumID (:info this)))
   :source-file-ref
   (fn [this]
     (get-reference this :sourceFileRef :source-files))
   :spectrum-ref
   (fn [this]
     (get-reference this :spectrumRef :spectrum))
   :instrument-config-ref
   (fn [this]
     (get-reference this :instrumentConfigurationRef
                    :instrument-config))
   :data-processing-ref
   (fn [this]
     (get-reference this :dataProcessingRef
                    :processing))
   :software-ref
   (fn [this]
     (->mzmlReference (:software this) :software))
   :scan-settings-ref
   (fn [this]
     (get-reference this :scanSettingsRef
                    :scan-settings))
   :get-cv-param
   (fn [this accession]
     (get (mzml-info this) accession))})

;;;;;;;;;;;;;;;;;;
;; ion
;;;;;;;;;;;;;;;;;;

(defrecord mzmlSelectedIon [info]

  pr/Ion

  (mz [this]
    (let [mz (:value (get-cv-param this "MS:1000744"))]
      (if mz (bigdec mz))))

  (charge [this]
    (let [c (:value (get-cv-param this "MS:1000041"))]
      (if c (Integer/parseInt c))))

  (intensity [this]
    (let [i (:value (get-cv-param this "MS:1000042"))]
      (if i (bigdec i)))))

(extend mzmlSelectedIon mzmlInfo mzml-info-default)

;;;;;;;;;;;;;;;;;;
;; precursor
;;;;;;;;;;;;;;;;;;

(defrecord mzmlPrecursor [info activation window selected-ions]

  pr/Precursor

  (selected-ions [this]
    (:selected-ions this))

  (isolation-window [this]
    (:window this))

  (activation [this]
    (:activation this)))

(extend mzmlPrecursor mzmlInfo mzml-info-default)

;;;;;;;;;;;;;;;;;;
;; activation
;;;;;;;;;;;;;;;;;;

(defrecord mzmlActivation [info])

(extend mzmlActivation mzmlInfo mzml-info-default)

;;;;;;;;;;;;;;;;;;
;; scanlist
;;;;;;;;;;;;;;;;;;

(defrecord mzmlScanList [info scans])

(extend mzmlScanList mzmlInfo mzml-info-default)

(defn scans
  "Returns a list of scans from an mzmlScanList record."
  [mzmlscanlist]
  (:scans mzmlscanlist))

;;;;;;;;;;;;;;;;;;
;; scan
;;;;;;;;;;;;;;;;;;

(defrecord mzmlScan [info windows]

  pr/Info

  (elution-time [this]
    (let [e (get (:info this) "MS:1000826")]
      (vector (bigdec (:value e)) (:unitName e)))))

(extend mzmlScan mzmlInfo mzml-info-default)

(defn scan-windows
  "Returns a list of mzmlScanWindows from a mzmlScanList record."
  [mzmlscanlist]
  (:windows mzmlscanlist))

;;;;;;;;;;;;;;;;;;
;; scan window
;;;;;;;;;;;;;;;;;;

(defrecord mzmlScanWindow [info]

  pr/Window

  (upper-limit [this]
    (bigdec (:value (get (:info this) "MS:1000500"))))

  (lower-limit [this]
    (bigdec (:value (get (:info this) "MS:1000501"))))

  (mz-step [this]
    (bigdec (:value (get (:info this) "MS:1000040"))))

  (target-mz [this]
    nil))

(extend mzmlScanWindow mzmlInfo mzml-info-default)

;;;;;;;;;;;;;;;;;;
;; product
;;;;;;;;;;;;;;;;;;

(defrecord mzmlProduct [window])

(extend mzmlProduct mzmlInfo mzml-info-default)

;;;;;;;;;;;;;;;;;;
;; IsolationWindow
;;;;;;;;;;;;;;;;;;

(defrecord mzmlIsolationWindow [info]

  pr/Window

  (upper-limit [this]
    (if-let [ul (:value (get (:info this) "MS:1000828"))]
      (bigdec ul)))

  (lower-limit [this]
    (if-let [ll (:value (get (:info this) "MS:1000829"))]
      (bigdec ll)))

  (target-mz [this]
    (if-let [tm (:value (get (:info this) "MS:1000827"))]
      (bigdec tm)))

  (mz-step [this]
    nil))

(extend mzmlIsolationWindow mzmlInfo mzml-info-default)

;;;;;;;;;;;;;;;;;;
;; binary array
;;;;;;;;;;;;;;;;;;

(defrecord mzmlBinaryArray [info data]

  pr/Binary
  
  (get-buffer [this b]
    (condp (fn [x y] (contains? y x)) (mzml-info this)
      "MS:1000519" (.asIntBuffer b)
      "MS:1000521" (.asFloatBuffer b)
      "MS:1000523" (.asDoubleBuffer b)
      "MS:1000522" (.asLongBuffer b)
      :else (throw (Throwable. "Data type not supported"))))

  (array-length [this]
    (if-let [l (:arrayLength (:info this))]
      (Integer/parseInt l)))

  (encoded-length [this]
    (if-let [el (:encodedLength (:info this))]
      (Integer/parseInt el)))

  (binary-data [this]
    (:data this))

  (compressed? [this]
    (false? (contains? (:info this) "MS:1000576"))))

(extend mzmlBinaryArray mzmlInfo mzml-info-default)

;;;;;;;;;;;;;;;;;;
;; instrument config
;;;;;;;;;;;;;;;;;;

(defrecord mzmlInstrumentConfig [info source analyzer detector])

(extend mzmlInstrumentConfig mzmlInfo mzml-info-default)

(defrecord mzmlSource [info])

(extend mzmlSource mzmlInfo mzml-info-default)

(defrecord mzmlAnalyzer [info])

(extend mzmlAnalyzer mzmlInfo mzml-info-default)

(defrecord mzmlDetector [info])

(extend mzmlDetector mzmlInfo mzml-info-default)

(defn ion-source
  "Returns a list of mzmlSource records from a mzmlInstrumentconfig
  record."
  [mzmlinstrument]
  (:source mzmlinstrument))

(defn analyzer
  "Returns a list of mzmlAnalyzer records from a mzmlInstrumentconfig
  record."
  [mzmlinstrument]
  (:analyzer mzmlinstrument))

(defn detector
  "Returns a list of mzmlDetector records from a mzmlInstrumentconfig
  record."
  [mzmlinstrument]
  (:detector mzmlinstrument))

;;;;;;;;;;;;;;;;;;
;; file content
;;;;;;;;;;;;;;;;;;

(defrecord mzmlFileContent [info])

(extend mzmlFileContent mzmlInfo mzml-info-default)

;;;;;;;;;;;;;;;;;;
;; file content
;;;;;;;;;;;;;;;;;;

(defrecord mzmlContact [info])

(extend mzmlContact mzmlInfo mzml-info-default)

;;;;;;;;;;;;;;;;;;
;; source file
;;;;;;;;;;;;;;;;;;

(defrecord mzmlSourceFile [info]

  pr/File
  
  (file-path [this]
    (let [i (mzml-info this)]
      (io/file (:location i) (:name i))))

  pr/ID

  (accession [this]
    (:id (mzml-info this)))

  (definition [this]
    (:name (mzml-info this))))

(extend mzmlSourceFile mzmlInfo mzml-info-default)

;;;;;;;;;;;;;;;;;;
;; spectra 
;;;;;;;;;;;;;;;;;;

(defrecord mzmlSpectra [info scans precursors products binaries]

  pr/Spectra

  (centroid? [this]
    (not (nil? (:name (get-cv-param this "MS:1000127")))))

  (precursors [this]
    (:precursors this))

  (products [this]
    (:products this))

  (scan-list [this]
    (:scans this))

  (binary-arrays [this]
    (:binaries this))

  (spot-id [this]
    (:spot-id (mzml-info this)))

  (mz-array [this]
    (if-let [b (first (filter #(contains? (mzml-info %) "MS:1000514")
                              (pr/binary-arrays this)))]
      (pr/decode-binary b)))

  (intensity-array [this]
    (if-let [b (first (filter #(contains? (mzml-info %) "MS:1000515")
                              (pr/binary-arrays this)))]
      (pr/decode-binary b)))

  (polarity [this]
    (or (get-cv-param this "MS:1000130")
        (get-cv-param this "MS:1000129")))

  (msn [this]
    (if-let [m (:value (get-cv-param this "MS:1000511"))]
      (Integer/parseInt m)))
  
  (index [this]
    (if-let [i (:index (mzml-info this))]
      (Integer/parseInt i)))
  
  pr/ID

  (accession [this]
    (:id (:info this)))

  (definition [this]
    (:id (mzml-info this))))

(extend mzmlSpectra mzmlInfo mzml-info-default)

;;;;;;;;;;;;;;;;;;
;; chromatograms
;;;;;;;;;;;;;;;;;;

(defrecord mzmlChromatogram [info precursors products binaries]

  pr/Spectra

  (centroid? [this]
    nil)

  (precursors [this]
    (:precursors this))

  (products [this]
    (:products this))

  (scan-list [this]
    nil)

  (binary-arrays [this]
    (:binaries this))

  (spot-id [this]
    nil)

  (mz-array [this]
    nil)

  (intensity-array [this]
    (if-let [b (first (filter #(contains? (mzml-info %) "MS:1000515")
                              (pr/binary-arrays this)))]
      (pr/decode-binary b)))

  (polarity [this]
    nil)

  (index [this]
    (if-let [i (:index (mzml-info this))]
      (Integer/parseInt i)))

  (msn [this]
    nil)

  pr/ID
  
  (accession [this]
    (:id (:info this)))
  
  (definition [this]
    "")

  pr/Chromatogram

  (time-array [this]
    (if-let [b (first (filter #(contains? (mzml-info %) "MS:1000595")
                              (pr/binary-arrays this)))]
      (pr/decode-binary b))))

(extend mzmlChromatogram mzmlInfo mzml-info-default)

;;;;;;;;;;;;;;;;;;
;; sample
;;;;;;;;;;;;;;;;;;

(defrecord mzmlSample [info]

  pr/ID

  (accession [this]
    (:id (:info this)))

  (definition [this]
    (:name (:info this))))

(extend mzmlSample mzmlInfo mzml-info-default)

;;;;;;;;;;;;;;;;;;
;; software
;;;;;;;;;;;;;;;;;;

(defrecord mzmlSoftware [info]

  pr/ID

  (accession [this]
    (:id (mzml-info this)))

  (definition [this]
    (apply str
           (interpose "|"
                      (remove nil?
                              (map (fn [[k v]] (:name v))
                                   (mzml-info this))))))

  pr/Software

  (version [this]
    (:version (:info this))))

(extend mzmlSoftware mzmlInfo mzml-info-default)

;;;;;;;;;;;;;;;;;;
;; processing
;;;;;;;;;;;;;;;;;;

(defrecord mzmlDataProcessing [info methods]

  pr/ID

  (accession [this]
    (:id (mzml-info this)))

  (definition [this]
    nil))

(defn processing-methods [dataprocessing]
  "Returns a list of mzmlProcessingmethod records from a
  mzmlDataprocessing record."
  (:methods dataprocessing))

(extend mzmlDataProcessing mzmlInfo mzml-info-default)

(defrecord mzmlProcessingMethod [info])

(defn order [this]
  "Returns the index of a mzmlProcessingmethod, mzmlSource,
  mzmlDetector or mzmlAnalyzer record."
  (if-let [o (:order (mzml-info this))]
    (Integer/parseInt o)))

(extend mzmlProcessingMethod mzmlInfo mzml-info-default)

;;;;;;;;;;;;;;;;;;
;; parameters
;;;;;;;;;;;;;;;;;;

(defn- zip-records
  [params key func]
  (zipmap (keys (key params))
          (map #(func (merge (cv-params (xml-zip %) params)
                             (:attrs %)))
               (vals (key params)))))

(defrecord mzmlParameters [src]

  pr/Parameter

  (instrument-config [this]
    (let [p (:src this)
          k (keys (:instrument-config p))]
      (zipmap k
              (map (fn [id]
                     (let [i (xml-zip (get (:instrument-config p) id))
                           s (xml-> i :componentList :source)
                           a (xml-> i :componentList :analyzer)
                           d (xml-> i :componentList :detector)
                           sr (xml1-> i :softwareRef (attr :ref))
                           f (fn [l f] (map #(f (merge (:attrs (node %))
                                                       (cv-params % p)))
                                            l))]
                       (->mzmlInstrumentConfig (merge (:attrs (node i))
                                                      (cv-params i p))
                                               (f s ->mzmlSource)
                                               (f a ->mzmlAnalyzer)
                                               (f d ->mzmlDetector))))
                   k))))

    (source-files [this]
      (zip-records (:src this) :source-files ->mzmlSourceFile))

    (file-content [this]
      (->mzmlFileContent
       (cv-params (xml1-> (xml-zip
                              (:file-description (:src this)))
                             :fileContent)
                  (:src this))))

    (contacts [this]
      (map #(->mzmlContact (cv-params % (:src this)))
           (xml-> (xml-zip (:file-description (:src this)))
                     :contact)))

    (samples [this]
      (zip-records (:src this) :samples ->mzmlSample))

    (software [this]
      (zip-records (:src this) :software ->mzmlSoftware))

    (processing [this]
      (let [p (:src this)
            k (keys (:processing p))]
        (zipmap k
                (map
                 (fn [id]
                   (let [i (xml-zip (get (:processing p) id))
                         m (xml-> i :processingMethod)
                         f (fn [l f] (map #(f (merge (:attrs (node %))
                                                     (cv-params % p)))
                                          l))]
                     (->mzmlDataProcessing (merge (:attrs (node i))
                                                  (cv-params i p))
                                           (f m ->mzmlProcessingMethod))))
                     k)))))

;;;;;;;;;;;;;;;;;;
;; reader
;;;;;;;;;;;;;;;;;;

(defn- parse-mzml
  [r]
  (let [x (parse r)]
    (if (= (:tag x) :indexedmzML)
      (first (:content x))
      x)))

(defn- get-run
  [xml]
  (->> (filter #(= (:tag %) :run) (:content xml))
       first))

(defn- scan-list-generation
  [this p]
  (let [sc (xml-> (xml-zip this) :scanList :scan)]
    (->mzmlScanList (cv-params (xml1-> (xml-zip this) :scanList) p)
                    (map
                     #(let [a (:attrs (node %))
                            w (xml-> % :scanWindowList :scanWindow)]
                        (->mzmlScan (merge (cv-params % p) a)
                                    (map (fn [x]
                                           (->mzmlScanWindow
                                            (cv-params x p))) w)))
                     sc))))

(defn- precursor-list-generation
  [this p]
  (let [pr (xml-> (xml-zip this)
                     :precursorList
                     :precursor)]
    (map #(->mzmlPrecursor (:attrs (node %))
                           (->mzmlActivation
                            (cv-params (xml1-> % :activation) p))
                           (->mzmlIsolationWindow
                            (cv-params (xml1-> % :isolationWindow) p))
                           (map (fn [x]
                                  (->mzmlSelectedIon (cv-params x p)))
                                (xml-> % :selectedIonList :selectedIon)))
         pr)))

(defn- product-gen
  [this p]
  (let [pr (xml-> (xml-zip this)
                     :productList
                     :product)]
    (map #(->mzmlProduct (list
                          (->mzmlIsolationWindow
                           (cv-params (xml1-> % :isolationWindow) p))))
         pr)))

(defn- array-gen
  [this p]
  (let [al (xml-> (xml-zip this)
                     :binaryDataArrayList
                     :binaryDataArray)]
    (if (seq al)
      (map #(->mzmlBinaryArray (merge (:attrs (node %))
                                      (cv-params % p))
                               (xml1-> % :binary text))
           al))))

(defn- get-xml-spec
  [xml params]
  {:info (merge (cv-params (xml-zip xml) params)
                (:attrs xml))
   :scans (scan-list-generation xml params)
   :precursors (precursor-list-generation xml params)
   :products (product-gen xml params)
   :binaries (array-gen xml params)})

(defrecord mzmlIndexedSpectraReader [index chromat]

  pr/spectraReader

  (spectra-seq [this]
    (map (fn [x] (map->mzmlSpectra
                  (get-xml-spec x (:parameters this))))
         (ind/object-seq (:strm this)
                         (vals
                          (dissoc (:index this) :parameters)))))

  (get-spectra
    [this acc]
    (if-let [i (list ((:index this) acc))]
      (first
       (map (fn [x] (map->mzmlSpectra
                     (get-xml-spec x (:parameters this))))
            (ind/object-seq (:strm this) i)))))

  (get-chromatogram [this acc]
    (if-let [i (get (:chromat this) acc)]
      (first
       (map (fn [x] (map->mzmlChromatogram
                     (get-xml-spec x (:parameters this))))
            (ind/object-seq (:strm this) (list i))))))

  (chromatogram-seq [this]
    (map (fn [x] (map->mzmlChromatogram
                  (get-xml-spec x (:parameters this))))
         (ind/object-seq (:strm this)
                         (vals (:chromatograms this)))))

  (parameters [this]
    (if-let [i (list (:parameters (:index this)))]
      (doall
       (map #(->mzmlParameters %)
            (ind/object-seq (:strm this) i)))))

  pr/ID

  (accession [this]
    (:accession (:mzml-info (:parameters this))))
  
  (definition [this] "")
  
  java.io.Closeable

  (close [this]
    (ind/close-index-reader (:strm this))))

(defrecord mzmlSpectraReader [strm parameters]

  pr/spectraReader

  (spectra-seq [this]
    (map #(map->mzmlSpectra (get-xml-spec % (:parameters this)))
         (:content
          (first (filter #(= (:tag %) :spectrumList)
                         (:content
                          (get-run (parse-mzml (:strm this)))))))))

  (chromatogram-seq [this]
    (map #(map->mzmlChromatogram (get-xml-spec % (:parameters this)))
         (:content
          (first (filter #(= (:tag %) :chromatogramList)
                         (:content
                          (get-run (parse-mzml (:strm this)))))))))

  (get-chromatogram [this acc]
    (filter #(= (pr/accession %) acc)
            (pr/chromatogram-seq this)))

  (get-spectra [this accession]
    (filter #(= (pr/accession %) accession)
            (pr/spectra-seq this)))

  (parameters [this]
    (->mzmlParameters (:parameters this)))
  
  pr/ID

  (accession [this]
    (:accession (:mzml-info (:parameters this))))

  (definition [this] "")

  java.io.Closeable

  (close [this]
    (.close ^java.io.BufferedReader (:strm this))))

(defn cv-list
  "Returns a list of hashes describing controlled vocabulary
  definitions."
  [mzmlreader]
  (:cvlist (:parameters mzmlreader)))

(defn mzml-file-id
  "Returns the optional id for the mzML document used for referencing
  from external files"
  [mzmlreader]
  (:id (:mzml-info (:parameters mzmlreader))))

(defn referenceable-param-groups
  "Returns a hash of all referenceable param groups in the mzml
  file. Keys are the id of each group."
  [mzmlreader]
  (into {} (map (fn [[k v]] (vector k (cv-params (xml-zip v)
                                                 (:parameters mzmlreader))))
                (:ref-params (:parameters mzmlreader)))))

;;;;;;;;;;;;;;;;;;
;; file
;;;;;;;;;;;;;;;;;;

(defn- reference-hash
  [xml tag]
  (let [x (xml-zip xml)]
    (zipmap (xml-> x tag (attr :id))
            (xml-> x tag node))))

(defn- create-indexes
  [xml writer tag]
  (ind/index-objects writer
                     (:content
                      (first (filter #(= (:tag %) tag)
                                     (:content (get-run xml)))))
                     #(:id (:attrs %))))

(defn- mzml-parameters
  [xml]
  (let [get-1 (fn [t] (->> (take-while #(not (= (:tag %) :run))
                                       (:content xml))
                           (filter #(= (:tag %) t))
                           first))]
    {:mzml-info
     (:attrs xml)
     :cvlist
     (map :attrs (:content (get-1 :cvList)))
     :file-description
     (get-1 :fileDescription)
     :ref-params
     (reference-hash
      (get-1 :referenceableParamGroupList)
      :referenceableParamGroup)
     :samples
     (reference-hash (get-1 :sampleList) :sample)
     :software
     (reference-hash (get-1 :softwareList) :software)
     :scan-settings
     (reference-hash (get-1 :scanSettingsList)
                     :scanSettings)
     :instrument-config
     (reference-hash
      (get-1 :instrumentConfigurationList)
      :instrumentConfiguration)
     :source-files
     (reference-hash
      (->> (get-1 :fileDescription)
           :content
           (filter #(= (:tag %) :sourceFileList))
           first)
      :sourceFile)
     :processing
     (reference-hash
      (get-1 :dataProcessingList)

      :dataProcessing)
     :default-spectra-processing
     (->> (:content xml)
          (filter #(= (:tag %) :run))
          first
          :content
          (filter #(= (:tag %) :spectrumList))
          first
          :attrs
          :defaultDataProcessingRef)
     :run
     (let [r (get-run xml)]
       (assoc r :content
              (take-while #(not (= (:tag %) :spectrumList))
                          (:content r))))}))

(defrecord mzmlFile [file indexing]

  pr/spectraFile

  (spectra-reader [this]
    (if (pr/is-indexed? this)
      (let [i (ind/load-indexed-file (pr/file-path this))
            r (->mzmlIndexedSpectraReader (:index i)
                                          (:chromat i))]
        (assoc r :strm (ind/index-reader (pr/file-path this))))
      (if (:indexing this)
        (with-open [r (io/reader (pr/file-path this))
                    w (ind/index-writer file)]
          (let [xml (parse-mzml r)
                r (->mzmlIndexedSpectraReader
                   (merge 
                    (create-indexes xml w :spectrumList)
                    (ind/index-objects w
                                       (list (mzml-parameters xml))
                                       (fn [_] :parameters)))
                   (create-indexes xml w :chromatogramList))]
            (ind/save-index (pr/file-path this)
                            (hash-map :index (:index r)
                                      :chromat (:chromat r)))
            (assoc r :strm
                   (ind/index-reader (pr/file-path this)))))
        (let [params (with-open [r (io/reader (pr/file-path this))]
                       (mzml-parameters (parse-mzml r)))]
          (->mzmlSpectraReader (io/reader (pr/file-path this))
                               params)))))

  pr/File

  (file-path [this]
    (io/file (:file this))))

(defn init-mzml-file
  "Initializes a mzmlFile record. If indexing is true the file will be
  indexed a an indexed reader returned."
  [file & {:keys [indexing] :or {indexing false}}]
  (->mzmlFile file indexing))
