(ns vura.timezones.parser
  (:refer-clojure :exclude [second])
  (:require [vura.core :refer :all]
            [instaparse.core :as insta]))

(def zones ["europe" "africa" "northamerica" "pacificnew" "southamerica" "asia" "australasia"])

(defn zone-definition [zone]
  (slurp (clojure.java.io/resource (str "tzdb-2018e/" (name zone)))))

(def months-mapping
  {"Jan" 1
   "Feb" 2
   "Mar" 3
   "Apr" 4
   "May" 5
   "Jun" 6
   "Jul" 7
   "Aug" 8
   "Sep" 9
   "Oct" 10
   "Nov" 11
   "Dec" 12})

(def days-mapping
  {"Mon" 1
   "Tue" 2
   "Wed" 3
   "Thu" 4
   "Fri" 5
   "Sat" 6
   "Sun" 7})


(def zone-grammar
  "number = #'[0-9]+'
   <comment> = #'#.*'
   <word> = #'[a-zA-Z]+'
   <space> = (' ' | '\\t')+
   <newline>= '\\n' | '\\n\\r'
   year = number
   month-name = 'Jan' | 'Feb' | 'Mar' | 'Apr' | 'May' | 'Jun' | 'Jul' | 'Aug' | 'Sep' | 'Oct' | 'Nov' | 'Dec'
   month = month-name
   day = number
   floating-day = #'last\\w+' | #'\\w+>=\\d+|\\w+<=\\d+'
   second = number
   minute = number
   hour = number 
   time-suffix = #'s|g|u|z'
   time = hour <':'> minute (<':'> second)? time-suffix?

   <zone-start> =  'Zone'
   sign = '+' | '-'
   gmtoff = sign? hour <':'> minute (<':'> second)? | hour 
   <zone-name> = #'[a-zA-Z/_\\-0-9]+'
   zone-rule = #'[a-zA-Z\\-_]+'  | gmtoff
   zone-format = #'[A-Z\\-/%s\\+0-9a-z]+'
   zone-until = <space> year (<space> month)? (<space> day | <space> floating-day)? (<space> time)?
   zone-offset = gmtoff
   zone-row = <space> zone-offset <space> zone-rule <space> zone-format (zone-until / <space>?) <space>? (<newline> | Epsilon)
   zone-definition = <zone-start> <space> zone-name zone-row+
   
   link-alias = zone-name 
   link-canonical = zone-name
   link-definition = <'Link'> <space>+ link-canonical <space> link-alias <space>? (<newline> | Epsilon)

   rule-name = #'[a-zA-Z\\-_]+'
   rule-type = '-' | word
   rule-from = year
   rule-to = year | 'only' | 'max'
   rule-in = month
   rule-on = day | floating-day 
   rule-at = time
   rule-save = gmtoff | number
   rule-letters = #'[a-zA-Z\\+\\-0-9]+' | '-'
   rule-definition = <'Rule'> <space> rule-name <space> rule-from <space> rule-to <space> 
                     rule-type <space> rule-in <space> rule-on <space> rule-at <space> 
                     rule-save <space> rule-letters <space>? (<newline> | Epsilon)


   <empty-space> = (<space> <newline> | <newline>)+

   timezone = (rule-definition | zone-definition | link-definition)+")

(def zone-parser (insta/parser zone-grammar))

(defn read-zone [zone]
  (insta/transform
    {:number #(Integer/parseInt %)
     :month-name months-mapping
     :time (fn [& args] (vector :time (reduce conj {} args)))
     :gmtoff (fn [& args]
               (let [{:keys [hour minute sign second]
                      :or {sign "+"
                           hour 0
                           minute 0
                           second 0} :as all} (reduce conj {} args)]
                 (* (if (= sign "-") -1 1) 
                    (+ 
                      (hours hour)
                      (minutes minute)
                      (seconds second)))))
     :zone-until (fn [& args] 
                   (let [time (reduce conj {} args)]
                     [:until (if (empty? time) nil time)]))
     :zone-row (fn [& args]
                 (let [[[_ offset]
                        [_ rule]
                        [_ format]
                        [_ until]] args]
                   {:offset offset
                    :rule rule
                    :format format
                    :until until}))
     :rule-definition (fn [[_ rule-name] & args]
                        [:rule (vector rule-name (reduce conj {} args))])
     :link-definition (fn [& args] 
                       [:link (reduce conj {} args)])}
    (->
      (zone-definition zone)
      (clojure.string/replace #"#.*" "")
      clojure.string/split-lines
      (#(remove (comp empty? clojure.string/trim) %))
      (#(clojure.string/join "\n" %))
      (zone-parser :start :timezone))))

(defn extract-zones [parsed-data]
  (let [timezone (rest parsed-data)
        zones (filter (comp #{:zone-definition} first) timezone)
        links (filter (comp #{:link} first) timezone)
        zones' (reduce
                 (fn [r [_ zname & rows]]
                   (assoc r zname rows))
                 {}
                 zones)]
    (reduce
      (fn [r [_ {:keys [link-alias link-canonical]}]]
        (assoc r link-alias link-canonical))
      zones'
      links)))

(defn extract-rules [parsed-data]
  (let [timezone (rest parsed-data)
        rules (map clojure.core/second (filter (comp #{:rule} first) timezone))]
    (reduce
      (fn [r [k v]]
        (assoc r k (mapv clojure.core/second v)))
      {}
      (group-by first rules))))

(defn extract-data [parsed-data]
  {:zones (extract-zones parsed-data)
   :rules (extract-rules parsed-data)})

(defn- until-date [{:keys [year month day floating-day] :as until
                     :or {day 1
                          month 1}}]
  ;; Find calendar frame for this month
  (when until
    (let [{:keys [hour minute]
           :or {hour 0 minute 0}} (:time until)
          frame (->
                  (date year month)
                  time->value
                  (calendar-frame :month))] 
      (if floating-day 
        (if (clojure.string/starts-with? floating-day "last")
          ;; Floating day is last something
          (let [day' (days-mapping (subs floating-day 4))]
            (value->date
             (:value 
              (last
                (filter
                  #(= day' (:day %))
                  frame)))))
          ;; Floating day is higher than
          (let [day' (days-mapping (subs floating-day 0 3))
                operator (case (subs floating-day 3 5)
                           ">=" >=)
                day-in-month' (Integer/parseInt (subs floating-day 5))]
            (value->date
              (:value 
                (first
                  (filter
                    #(operator (:day-in-month %) day-in-month')
                    frame))))))
        (date year month day hour minute)))))

(defn process-zone [rules]
  (loop [rules rules 
         history []]
    (if (empty? rules)
      (let [current (dissoc (last history) :until)
            history (vec (butlast history))] 
        {:current (assoc current :from (:until (last history)))
         :history history})
      (if (empty? history) 
        (let [[current & rules] rules
              current (update 
                        current
                        :until
                        (fn [{:keys [year month day]
                              :or {day 1 month 1}
                              :as until}]
                          (when until 
                            (date->value (date year month day)))))]
          (recur rules (conj history current)))
        (let [[current & rules] rules
              {previous-offset :offset
               :or {previous-offset 0}} (last history)]
          (recur
            rules
            (conj
              history
              (update 
                (case (-> current :until :time :time-suffix)
                  "s" (binding [*offset* previous-offset]
                        (update current :until until-date))
                  ("u" "g" "z") (binding [*offset* 0]
                                  (update current :until until-date))
                  (update current :until until-date))
                :until date->value))))))))

(def timezone-data 
  (reduce
    (fn [r zone]
      (let [{:keys [zones rules]} (extract-data (read-zone zone))]
        (let [zones' (reduce 
                       (fn [result [zone rules]]
                         (assoc result 
                           zone 
                           (if (string? rules) rules (process-zone rules))))
                       {}
                       zones)]
          (->
            r
            (update :zones merge zones')
            (update :rules merge rules)))))
    {}
    [:europe
     :africa
     :northamerica
     :southamerica
     :asia
     :australasia]))

(defn get-rule [rule]
  (get-in timezone-data [:rules rule]))

(defn get-zone [zone]
  (get-in timezone-data [:zones zone]))

(comment
  (def test-zone 
    "Zone	Europe/Dublin	-0:25:00 -	LMT	1880 Aug  2
     -0:25:21 -	DMT	1916 May 21  2:00s
     -0:25:21 1:00	IST	1916 Oct  1  2:00s
     0:00	GB-Eire	%s	1921 Dec  6 
     0:00	GB-Eire	GMT/IST	1940 Feb 25  2:00s
     0:00	1:00	IST	1946 Oct  6  2:00s
     0:00	-	GMT	1947 Mar 16  2:00s
     0:00	1:00	IST	1947 Nov  2  2:00s
     0:00	-	GMT	1948 Apr 18  2:00s
     0:00	GB-Eire	GMT/IST	1968 Oct 27")


  (def test-zone
    "Zone	Europe/Minsk	1:50:16 -	LMT	1880
     1:50	-	MMT	1924 May  2 
     2:00	-	EET	1930 Jun 21
     3:00	-	MSK	1941 Jun 28
     1:00	C-Eur	CE%sT	1944 Jul  3
     3:00	Russia	MSK/MSD	1990
     3:00	-	MSK	1991 Mar 31  2:00s
     2:00	Russia	EE%sT	2011 Mar 27  2:00s
     3:00	-	+03")
  (extract-zones [:timezone (zone-parser test-zone :start :zone-definition)])
  (def europe (read-zone :europe))
  (zone-definition :europe)
  (extract-zones europe)
  (extract-rules europe)
  (def data (extract-data europe))
  (get-zone "Europe/Belgrade")
  (get-zone "Africa/Algiers")
  (get-zone "Europe/Riga")
  (get-rule "C-Eur")
  (set
    (map
      (comp :rule-at)
      (reduce
        concat
        (-> timezone-data :rules vals))))
  (-> timezone-data :rules clojure.pprint/pprint)
  (filter #(clojure.string/starts-with? (key %) "Europe") (-> timezone-data :zones ))
  (doseq [[zone rules] (:zones timezone-data)]
    (println {zone (when-not (string? rules) (mapv :until rules))}))
  (->
    europe
    (clojure.string/replace #"#.*" "")
    clojure.string/split-lines
    (#(remove (comp empty? clojure.string/trim) %))
    (#(clojure.string/join "\n" %))
    (zone-parser :start :timezone)))
