(ns ^{:doc "Upper Confidence Bound algorithm"
      :author "Paul Ingles"}
  bandit.algo.ucb
  (:use [bandit.arms :only (exploit unpulled total-pulls)]
        [clojure.math.numeric-tower :only (sqrt)]))

(defn bonus-value
  [total-pulls arm-pulls]
  (sqrt (/ (* 2 (Math/log total-pulls))
           arm-pulls)))

(defn ucb-value
  [{:keys [value pulls] :as arm} arms]
  (assoc arm :ucb-value (+ value
                           (bonus-value (total-pulls arms)
                                        pulls))))

(defn select-arm
  [arms]
  (or (first (unpulled arms))
      (exploit :ucb-value (map #(ucb-value % arms) arms))))
