(ns antistock.wikipedia.edits
  (:refer-clojure :exclude [distinct group-by replace update])
  (:require [antistock.config.core :refer [config]]
            [antistock.db :as db]
            [antistock.db.system :refer [with-db]]
            [antistock.util :refer [crc-path indexed]]
            [antistock.db.wikipedia :as wikipedia]
            [antistock.wikipedia :refer [update-page-title]]
            [antistock.time :as time]
            [clj-http.client :as http]
            [clj-time.coerce :refer [to-date to-sql-time]]
            [clojure.string :refer [replace]]
            [clojure.tools.logging :as log]
            [commandline.core :refer [print-help with-commandline]]
            [datumbazo.core :refer :all :exclude [with-db run]]
            [environ.core :refer [env]]
            [net.cgrand.enlive-html :as enlive]
            [no.en.core :refer [parse-integer]]))

(defn make-request [title & [opts]]
  {:as :stream
   :method :get
   :url "http://en.wikipedia.org/w/index.php"
   :query-params
   {:action "history"
    :title title
    :limit (or (:limit opts) 100)}})

(defn history-document
  "Returns the Wikipedia history document for `page`."
  [page & [opts]]
  (when-let [title (:title page)]
    (with-open [stream (:body (http/request (make-request title opts)))]
      (enlive/html-resource stream))))

(defn parse-history-size
  "Parse the Wikipedia history size."
  [s]
  (-> (replace (str s) #"[(),]" "")
      (replace #"\s+bytes" "")
      (parse-integer)))

(defn select-history-time
  "Select the Wikipedia history time from `document`."
  [document]
  (-> (enlive/select document [:a.mw-changeslist-date])
      first :content first time/parse-wikipedia-time
      to-date))

(defn select-history-user
  "Select the Wikipedia history user from `document`."
  [document]
  (-> (enlive/select document [:span.history-user :a])
      first :content first))

(defn select-history-size
  "Select the Wikipedia history size from `document`."
  [document]
  (-> (enlive/select document [:span.history-size])
      first :content first parse-history-size))

(defn select-page-edit
  "Select the Wikipedia page edit from `document`."
  [document]
  (let [author (select-history-user document)
        edited-at (select-history-time document)
        size (select-history-size document)]
    (when (and author edited-at size)
      {:author (select-history-user document)
       :edited-at (select-history-time document)
       :size (select-history-size document)})))

(defn select-page-edits
  "Select the Wikipedia page edits from `document`."
  [document]
  (->> (enlive/select document [:ul#pagehistory :li])
       (map select-page-edit)
       (remove nil?)))

(defn fetch-page-edits
  "Fetch the Wikipedia edits for `page`."
  [page]
  (let [document (history-document page {:limit 1000})]
    (select-page-edits document)))

(defn drop-tmp-page-edits
  "Insert the page edits into the temporary table."
  [db]
  @(drop-table db [:edits]
     (if-exists true)))

(defn create-tmp-page-edits
  "Insert the page edits into the temporary table."
  [db]
  @(create-table db :edits
     (column :page-id :integer)
     (column :author :text)
     (column :edited-at :timestamp-with-time-zone)
     (column :size :integer)
     (temporary true)))

(defn prepare-edit [page edit]
  (-> (assoc edit :page-id (:id page))
      (update-in [:edited-at] to-sql-time)))

(defn insert-tmp-page-edits
  "Insert the page edits into the temporary table."
  [db edits] @(insert db :edits [] (values edits)))

(defn insert-page-edits
  "Insert the page edits from the temporary table into the wikipedia
  page edits table."
  [db]
  @(insert db :wikipedia.page-edits [:page-id :author :edited-at :size]
     (select db [:edits.page-id :edits.author :edits.edited-at :edits.size]
       (from :edits)
       (join :wikipedia.page-edits
             '(on (and (= :wikipedia.page-edits.page-id :edits.page-id)
                       (= :wikipedia.page-edits.edited-at :edits.edited-at)))
             :type :left)
       (where `(is-null :wikipedia.page-edits.id)))))

(defn update-page-edits
  "Update the Wikipedia page edits for `page`."
  [db page]
  (let [edits (fetch-page-edits page)
        edits (map (partial prepare-edit page) edits)]
    (with-transaction [db db]
      (drop-tmp-page-edits db)
      (create-tmp-page-edits db)
      (insert-tmp-page-edits db edits)
      (insert-page-edits db))))

(defn run
  "Update the page edits for all Wikipedia pages."
  [config & [opts]]
  (with-db [db (:db config)]
    (let [pages (wikipedia/wikipedia-pages db)
          total (count pages)]
      (log/infof "Updating Wikipedia page edits for %s pages." total)
      (doseq [[n page] (indexed pages)]
        (try
          (let [page (update-page-title db page)]
            (update-page-edits db page)
            (log/infof "[%s/%s] Updated %s" (inc n) total (:url page)))
          (catch Exception e
            (log/errorf "Can't update %s: %s" (:url page) (.getMessage e))))))))

(defn -main
  [& args]
  (with-commandline [[opts args] args]
    [[h help "Print this help."]]
    (when (:help opts)
      (print-help "as wikipedia edits [OPTIONS]")
      (System/exit 0))
    (run (config env))))
