(ns scratch
  (:require [notespace.v2.note :refer [note note-void note-hiccup note-md]]
            [notespace.v2.live-reload]
            [clojisr.v1.r :refer [r r->clj r+] :as r]
            [clojisr.v1.applications.plotting :refer [plot->svg plot->file plot->buffered-image]]
            [clojisr.v1.require :refer [require-r]]
            [tech.ml.dataset :as dataset
             :refer [->dataset rename-columns mapseq-reader]]
            [notespace.v2.table :as table]))

;; Background:
;; https://rviews.rstudio.com/2020/03/05/covid-19-epidemiology-with-r/
;; by Tim Churches

(require-r '[tidyr :as tidyr :refer [pivot_longer]]
           '[dplyr :as dplyr :refer [mutate]]
           '[ggplot2 :as gg :refer [ggplot geom_point geom_line xlab ylab aes]])

(r '(library lubridate))




(def filename
  "/workspace/data/covid-19/COVID-19/csse_covid_19_data/csse_covid_19_time_series/time_series_covid19_confirmed_global.csv")


(defonce raw-data
  (-> filename
      ->dataset
      (rename-columns {"Province/State" :province
                       "Country/Region" :country_region})))

#_
(->> raw-data
     mapseq-reader
     (map :country_region)
     frequencies
     (sort-by val)
     reverse)


(defonce data
  (-> raw-data
      (tidyr/pivot_longer '(- [:province :country_region :Lat :Long])
                          :names_to "Date"
                          :values_to "cumulative_cases")
      (dplyr/mutate :Date '(parse_date_time Date "%m/%d/%y"))))

(note-hiccup
 (plot->svg
  (-> data
      (r.dplyr/filter '(%in% country_region ["Indonesia" "Iran" "Italy" "Israel"]))
      (gg/ggplot (gg/aes :x 'Date
                         :y 'cumulative_cases
                         :color 'country_region))
      (r+ (gg/geom_point)
          (gg/geom_line)
          (gg/xlab "x")
          (gg/ylab "y")
          (gg/scale_y_log10)))))


