(ns clinicaltrials-gov.core
  (:require [clj-http.client :as client]
            [clojure.xml :as xml]
            [clojure.zip :as zip]
            [com.climate.claypoole :as cp])
  (:gen-class)
  (:import (java.io InputStream)
           (java.util.zip ZipFile)))

;https: / / clinicaltrials.gov/ct2 / results/download?down_stds=all&down_typ=fields&down_flds=shown&down_fmt=plain&term=acute+myeloid+leukemia&recr=Open&no_unk=Y

(def api-base "https://clinicaltrials.gov")

(defn ^:private temp-file
  []
  (java.io.File/createTempFile "clinical-trials-gov-" ".zip"))

(defn ^:private process-status
  "Takes :open, :exclude-unknown, or a vector of both and returns a map of the
  parameters per the clinicaltrials.gov API"
  [status]
  (case status
    :open {:recr "Open"}
    :exclude-unknown {:no_unk "Y"}
    ; else, if it's a collection, recursively call this function
    (apply merge (mapv process-status status))))

(defn ^ZipFile search*
  "Executes a search request and saves the resulting ZIP file as a tmp file,
  returning a handle to the file

  Input map:

  :terms - search terms as a string (passed straight through to API)
  :status - :open, :exclude-unknown, or vector of both"
  [^String terms, status]
  (let [status-params (process-status status)
        params (merge status-params {:studyxml "true"
                                     :term     terms})
        output-file (temp-file)
        response (client/get (str api-base "/search") {:query-params params, :as :stream, :debug false})]
    (clojure.java.io/copy
      (:body response)
      output-file)
    (ZipFile. output-file)))

(defn ^:private entries
  "Convenience wrapper around java api"
  [zip-file]
  (enumeration-seq (.entries zip-file)))

(defn ^:private ^InputStream input-stream
  "Convenience wrapper around java api"
  [zip-file entry]
  (.getInputStream zip-file entry))

(defn ^:private unzipfile-results
  "Takes a file handle (as returned by search*) and unzips (decompresses) and
  slurps the XML files, returning a vector of xml documents as strings

  f is an optional function to apply (e.g. zipper)"
  ([file f]
   (with-open [z file]
     (mapv #(f (slurp (input-stream z %1))) (entries z))))
  ([file]
   (unzipfile-results file identity)))

(defn xml-input-streams
  "Takes a ZipFile handle (as returned by search*) and unzips (decompresses) and
  returns a vector of input streams to each XML file.  Requires external
  closing of the zip file"
  [^ZipFile z]
  (mapv #(input-stream z %1) (entries z)))

(defn zip-str [s]
  "Takes XML as a string and returns a map with the original XMl and a zip
  (in the zipper not zipfile sense)"
  {:xml    s
   :zipper (zip/xml-zip
             (xml/parse (java.io.ByteArrayInputStream. (.getBytes s))))})

(defn ^:private results-zipper
  "Convenience function to take all results and return their zipper form"
  [results]
  (mapv zip-str results))

(defn search
  "Executes the search and returns a vector of zippable trees"
  ([^String terms, status]
   (search terms status identity))
  ([^String terms, status, f]
   (unzipfile-results (search* terms status) #(f (zip-str %)))))

(defn get-by-id
  [^String nct-id]
  (let [response (client/get (str api-base "/show/" nct-id "?displayxml=true"))]
    (-> (:body response)
        (zip-str))))

(defn ^ZipFile search-ids-only*
  [params, status]
  (let [status-params (process-status status)
        params (merge status-params
                      {:down_stds "all"
                       :down_typ  "fields"
                       :down_flds "shown"
                       :flds      "k"                       ; k is NCT-ID
                       :down_fmt  "tsv"                     ; options include "csv", "plain", "xml"
                       }
                      params)
        output-file (temp-file)
        response (client/get (str api-base "/ct2/results/download") {:query-params params, :as :stream, :debug false})]
    (clojure.java.io/copy
      (:body response)
      output-file)
    (ZipFile. output-file)))

(defn extract-nct-id
  [tsv-str]
  (->> (clojure.string/split tsv-str #"[\r\n]+")
       (rest)                                               ; ignore header row
       (map #(second (clojure.string/split % #"\t")))))

(defn search-ids-only
  [^String terms, status]
  (let [^String s (first (unzipfile-results (search-ids-only* {:term terms} status)))]
    (extract-nct-id s)))

(defn search-ids-of-conditions-only
  [^String terms, status]
  (let [^String s (first (unzipfile-results (search-ids-only* {:cond terms} status)))]
    (extract-nct-id s)))

(defn parallel-search
  ([nct-ids] (parallel-search nct-ids identity))
  ([nct-ids f]
   (let [groups (partition-all 100 nct-ids)
         search-groups (pmap #(apply str (interpose " OR " %)) groups)]
     (->> search-groups
          (cp/pmap 2 #(search % [] f))
          (apply concat)
          ))))

(defn -main
  "I don't do a whole lot ... yet."
  [& args]
  (println "Hello, World!"))
