(ns antistock.wikipedia
  (:refer-clojure :exclude [distinct extend group-by replace update])
  (:import java.io.BufferedReader
           java.io.InputStreamReader
           java.net.URL
           java.util.zip.GZIPInputStream)
  (:require [antistock.db :as db]
            [antistock.db.wikipedia :as wikipedia]
            [antistock.json :as json]
            [antistock.time :as time :refer :all :exclude [second]]
            [antistock.util :refer [indexed]]
            [clj-http.client :as http]
            [clojure.java.io :as io]
            [clojure.java.jdbc :as jdbc]
            [clojure.string :refer [replace split]]
            [clojure.tools.logging :refer [infof warnf]]
            [datumbazo.core :refer :all]
            [no.en.core :refer [parse-integer]]
            [net.cgrand.enlive-html :as enlive]))

(defn find-company
  "Returns the wikipedia data about `company`."
  [company]
  (let [pages (-> (http/get
                   "http://en.wikipedia.org/w/api.php"
                   {:query-params
                    {:action "query"
                     :format "json"
                     :prop "info"
                     :inprop "url"
                     :titles (:name company)}})
                  :body json/read-json
                  :query :pages)]
    (if-not (:-1 pages)
      (vals pages))))

(defn company-url-duck-duck-go
  "Find the Wikipedia url of `company` using the Duck Duck Go API."
  [company]
  (let [url (-> (http/get
                 "http://api.duckduckgo.com"
                 {:as :json
                  :query-params
                  {:format "json"
                   :q (str (:name company " Wikipedia"))}})
                :body :AbstractURL)]
    (if (re-seq #"(?i)wikipedia\.org" (str url))
      url)))

(defn company-url-wikipedia
  "Find the Wikipedia url of `company` using the Wikipedia API."
  [company] (:fullurl (first (find-company company))))

(defn company-url
  "Returns the URL for `company` in Wikipedia."
  [company]
  (or (company-url-wikipedia company)
      (company-url-duck-duck-go company)))

(defn update-wikipedia-urls
  "Update the wikipedia urls of all companies."
  [db]
  (let [companies (db/companies-by-updated-at-asc db)]
    (doseq [[n company] (indexed companies)]
      (if-let [url (company-url company)]
        (let [page (db/save-wikipedia-page db {:url url})]
          (db/update-company db (assoc company :wikipedia-page-id (:id page)))
          (infof "[%s/%s] Updated %s from Wikipedia."
                 (inc n) (count companies) (:name company)))
        (warnf "[%s/%s] Can't find %s in Wikipedia."
               (inc n) (count companies) (:name company))))))

(defn select-title
  "Select the Wikipedia page title from `document`."
  [document]
  (-> (enlive/select document [:h1#firstHeading :span])
      first :content first))

(defn update-page-title
  "Update the Wikipedia page title of `page`."
  [db page]
  (with-open [stream (:body (http/get (:url page) {:as :stream}))]
    (let [document (enlive/html-resource stream)]
      (when-let [title (select-title document)]
        (->> (assoc page :title title)
             (db/update-wikipedia-page db))))))

(defn page-ids-by-title [db]
  (let [pages (wikipedia/wikipedia-pages db)]
    (zipmap (map :title pages)
            (map :id pages))))
