(in-ns 'speccy.core)

(defrecord mgfPrecursor [mass charge]

  Ion

  (mz [this]
    (:mass this))

  (charge [this]
    (:charge this))

  (intensity [this]
    (:intensity this)))

(defrecord mgfSpectra [peaks other]

  Spectra

  (centroid? [this]
    (throw (Throwable. "Not defined for MGF spectra.")))

  (products [this]
    nil)

  (scan-list [this]
    nil)

  (binary-arrays [this]
    nil)

  (spot-id [this]
    nil)

  (polarity [this]
    nil)

  (index [this]
    nil)

  (msn [this]
    2)
  
  (precursors [this]
    (list (->mgfPrecursor
           (bigdec (get (:other this) "PEPMASS"))
           (Integer/parseInt (get (:other this) "CHARGE")))))

  (mz-array [this]
    (into [] (map first (:peaks this))))

  (intensity-array [this]
    (into [] (map second (:peaks this))))

  Info

  (elution-time [this]
    (vector (bigdec (get (:other this) "RTINSECONDS"))
            "seconds"))

  ID

  (accession [this]
    (get (:other this) "ID"))

  (definition [this]
    (get (:other this) "TITLE")))

;;;;;;;;;;;;;;;
;; reader
;;;;;;;;;;;;;;;

(defn- tokenise
  [m]
  (let [l (:remaining m)
        r (rest (drop-while #(not (re-find #"END IONS" %)) l))
        y (->> (drop-while #(not (re-find #"BEGIN IONS" %)) l)
               rest
               (take-while #(not (re-find #"END IONS" %))))]
    (if (seq y)
      {:yield y :remaining r}
      {:end true})))

(defn- get-info
  [l c]
  (let [i (let [info (into {}
                           (->> (drop-while #(not (re-find #"=" %)) l)
                             (take-while #(re-find #"=" %))
                             (map #(split % #"=" 2))
                             (map #(vector (-> (trim (first %))
                                             upper-case)
                                           (apply str
                                                  (map trim (rest %)))))))]
            (if-not (get info "ID")
              (assoc info "ID" c)
              info))
        p (split (get i "PEPMASS") #"\s+")]
    (if (second p)
      (assoc i
        "CHARGE" (int (second p))
        "PEPMASS" (bigdec (first p)))
      (assoc i
        "PEPMASS" (bigdec (first p))))))

(defn- parse-peaks
  [l]
  (let [p (fn [l]
            (if (= 3 (count l))
              (vector (bigdec (first l))
                      (bigdec (second l))
                      (Integer/parseInt (last l)))
              (vector (bigdec (first l))
                      (bigdec (second l)))))]
    (p (split l #"\s+"))))

(defn- parse-mgf
  [l]
  (let [counter (atom 0)]
    (->> {:remaining l}
         (iterate tokenise)
         (take-while #(not (contains? % :end)))
         (map :yield)
         (filter #(not (nil? %)))
         (map #(->mgfSpectra (mapv parse-peaks
                                   (drop-while (fn [x]
                                                 (not (re-find #"^\d" x))) %))
                             (get-info % (swap! counter inc)))))))

(defrecord mgfReader [strm header]

  spectraReader

  (spectra-seq [this]
    (parse-mgf (drop-while #(not (re-find #"BEGIN IONS" %))
                           (line-seq (:strm this)))))

  (get-spectra
    [this acc]
    (filter #(= acc (accession this))
            (spectra-seq this)))

  (chromatogram-seq [this]
    nil)

  (get-chromatogram [this acc]
    nil)

  (parameters [this]
    (:header this))

  java.io.Closeable

  (close [this]
    (.close ^java.io.BufferedReader (:strm this))))

(defrecord mgfIndexedReader [index]

  spectraReader

  (spectra-seq [this]
    (ind/object-seq (:strm this) (vals (dissoc (:index this) :parameters))))

  (get-spectra
    [this acc]
    (if-let [i (list ((:index this) acc))]
      (first (ind/object-seq (:strm this) i))))

  (chromatogram-seq [this]
    nil)

  (get-chromatogram [this acc]
    nil)

  (parameters [this]
    (:header this))

  java.io.Closeable

  (close [this]
    (ind/close-index-reader (:strm this))))

;;;;;;;;;;;;;;;
;; file
;;;;;;;;;;;;;;;

(defn- parse-header
  [l]
  (->> (filter #(re-find #"=" %) l)
       (map #(split % #"="))
       (into {})))

(defn- create-indexes
  [l w]
  (ind/index-objects w l #(get (:other %) "ID")))

(defrecord mgfFile [file indexing]

  spectraFile

  (spectra-reader [this]
    (if (is-indexed? this)
      (let [in (ind/load-indexed-file (file-path this))]
        (assoc (->mgfIndexedReader in) :strm
               (ind/index-reader (file-path this))))
      (with-open [r (reader (file-path this))
                  w (ind/index-writer (file-path this))]
        (if (:indexing this)
          (let [header (parse-header (take-while
                                      #(not (re-find #"BEGIN IONS" %))
                                      (line-seq r)))
                ions (parse-mgf (line-seq r))
                in (merge
                    (create-indexes ions w)
                    (if header
                      (ind/index-objects w (list header)
                                         (fn [_] :parameters))
                      {:parameters nil}))
                reader (->mgfIndexedReader in)]
            (ind/save-index (file-path this) in)
            (assoc reader :strm (ind/index-reader
                                 (file-path this))))
          (->mgfReader (reader (file-path this))
                       (parse-header (take-while
                                      #(not (re-find #"BEGIN IONS" %))
                                      (line-seq r))))))))

  File
  
  (file-path [this]
    (:file this)))

;; functions

(defn init-mgf-file
  "Initializes a mgf file, if indexing is true file will be indexed
  and an indexed reader returned."
  [path & {:keys [indexing] :or {indexing false}}]
  (->mgfFile path indexing))


(defn ->mgf
  "Takes a spectra of any defined spectra record an returns an mgf
  record."
  ([s] (->mgf s count))
  ([s c]
   (if-let [i (-> (precursors s) first selected-ions first)]
     (let [h (let [b (str "BEGIN IONS" "\n"
                          "TITLE=" (definition s) "\n"
                          "CHARGE="  (or (charge i) "") "\n"
                          "PEPMASS=" (mz i) "\n")]
               (if c (str b "ID=" (swap! c inc) "\n") b))]
       (str h
            (->> (interleave (mz-array s) (intensity-array s))
              (partition 2)
              (map #(interpose "\t" %))
              (map #(apply str %))
              (interpose "\n")
              (apply str))
            "\n"
            "END IONS")))))

(defn file->mgf
  "Converts an mzML file to an mgf file. If no outfile argument is
  supplied writes to a file of the same name but with an mgf
  extension."
  ([mzml] (file->mgf mzml (str (file-path mzml) ".mgf")))
  ([mzml outfile]
   (let [c (atom 0)]
     (with-open [r (spectra-reader mzml)]
       (init-mgf-file
        (biosequence->file (map #(->mgf % c) (spectra-seq r))
                           outfile
                           :append false
                           :func identity))))))

