(ns morri.unique-tss.gencode
  (:require [honeysql.core :as sql]
            [honeysql.helpers :refer :all]
            [morri.lib :as lib]
            [morri.meth450k.common.ucsc-db :as ucsc-db]
            [clojure.pprint :refer [pprint]]))

(defn is-ref-seq? [transcript]
  (let [query (-> (select :rnaAcc)
                  (from :wgEncodeGencodeRefSeqV17)
                  (where [:= :transcriptId transcript])
                  sql/format)]
    (pos? (count (first (ucsc-db/ucsc-query query))))))

;; (is-ref-seq? "ENST00000379370.2") => true
;; (is-ref-seq? "ENST00000477585.1") => false
;; (is-ref-seq? "ENST00000379339.1") => false

(defn support-level [transcript]
  (let [query (-> (select :level)
                  (from :wgEncodeGencodeTranscriptionSupportLevelV17)
                  (where [:= :transcriptId transcript])
                  sql/format)]
    (:level (first (ucsc-db/ucsc-query query)))))

;; (support-level "ENST00000379370.2") => 1
;; (support-level "ENST00000379339.1") => 2 (refseq false)
;; (support-level "ENST00000477585.1") => 3 (refseq false)

(defn annot-method [transcript]
  (let [query (-> (select :level)
                  (from :wgEncodeGencodeAttrsV17)
                  (where [:= :transcriptId transcript])
                  sql/format)]
    (:level (first (ucsc-db/ucsc-query query)))))

;; Heirarchy for gencode
;; 1. Is it Refseq?
;; 2. transcript support-level:
;; wgEncodeGencodeTranscriptionSupportLevelV17.level via name =transcriptId
;; (1 best, 5 weakest, -1 NA)
;; 3. annotation method: wgEncodeGencodeAttrsV17.level via name = transcriptId
;; (1 = experimental, 2 = manual , 3 = automatic)

(defn which-tx-is-better [tx-a tx-b]
  (if-let [ref-seq (lib/which-is-better tx-a tx-b is-ref-seq?)]
    ref-seq
    (if-let [better-supported (lib/which-is-better tx-a tx-b support-level
                                                   :larger-is-better? false)]
      better-supported
      (if-let [better-annotated (lib/which-is-better tx-a tx-b annot-method
                                                      :larger-is-better? false)]
        better-annotated
        tx-a))))

;; (which-tx-is-better "ENST00000379370.2" "ENST00000477585.1") ;
;; (which-tx-is-better "ENST00000379339.1" "ENST00000477585.1") ; both
;; not refseq 339 has better (lower) score

(def good-txs (delay (ucsc-db/ucsc-query ["
SELECT wgEncodeGencodeBasicV17.name from wgEncodeGencodeBasicV17
join wgEncodeGencodeAttrsV17 on
wgEncodeGencodeBasicV17.name = wgEncodeGencodeAttrsV17.transcriptId
where  wgEncodeGencodeAttrsV17.transcriptClass = 'coding'"
])))

;; (count @good-txs)

;; below here, some helper and obsolete functions

(defn tx-info [transcript]
  (let [query (-> (select
                   :name
                   [:attrs.level :annot-method]
                   [:support.level :support_level]
                   :refseq.rnaAcc
                   )
                  (from :wgEncodeGencodeBasicV17)
                  (join [:wgEncodeGencodeRefSeqV17 :refseq]
                        [:= :name :refseq.transcriptId])
                  (merge-join
                   [:wgEncodeGencodeTranscriptionSupportLevelV17 :support]
                        [:= :name :support.transcriptId])
                  (merge-join [:wgEncodeGencodeAttrsV17 :attrs]
                        [:= :name :attrs.transcriptId])
                  (where [:= :name transcript])
                  sql/format)]
    (ucsc-db/ucsc-query query)))

;; (tx-info "ENST00000379370.2")

(def some-txs
  ["ENST00000000233.5"
   "ENST00000000412.3"
   "ENST00000000442.6"
   "ENST00000001008.4"
   "ENST00000001146.2"
   "ENST00000002125.4"
   "ENST00000002165.5"
   "ENST00000002501.6"
   "ENST00000002596.5"
   "ENST00000002829.3"
   "ENST00000003084.6"
   "ENST00000003100.8"
   "ENST00000003302.4"
   "ENST00000003583.8"
   "ENST00000003912.3"
   "ENST00000004103.3"
   "ENST00000002165.5"
   "ENST00000002501.6"
   "ENST00000002596.5"
   "ENST00000002829.3"
   "ENST00000003084.6"
   "ENST00000003100.8"
   "ENST00000003302.4"
   "ENST00000003583.8"
   "ENST00000003912.3"
   "ENST00000004103.3"
   ])

;; (pprint (map tx-info some-txs))

;; (annot-method "ENST00000379370.2") => 2

;; (annot-method "ENST00000347370.2") => 3
