xxxxxxxxxx7
1
begin2
using ReinforcementLearning3
using Flux4
using Statistics5
using Plots6
using Distributions7
endxxxxxxxxxx38
1
begin2
const pₕ = 0.43
const WinCapital = 1004
5
decode_state(s::Int) = s - 16
encode_state(s::Int) = s + 17
8
function nextstep(s::Int, a::Int)9
s = decode_state(s)10
a = min(s, a)11
if s == WinCapital || s==012
[(0., false,encode_state(s))=>1.0]13
else14
[15
((s+a >= WinCapital ? 1.0 : 0.), false, encode_state(min(s+a, WinCapital))) => pₕ,16
(0., false, encode_state(max(s-a, 0))) => 1-pₕ17
]18
end19
end20
21
struct GamblerProblemEnvModel <: AbstractEnvironmentModel22
cache23
end24
25
function GamblerProblemEnvModel()26
GamblerProblemEnvModel(27
Dict(28
(s,a) => nextstep(s,a)29
for s in 1:(WinCapital+1) for a in 1:WinCapital30
)31
)32
end33
34
RLBase.state_space(m::GamblerProblemEnvModel) = Base.OneTo(WinCapital+1)35
RLBase.action_space(m::GamblerProblemEnvModel) = Base.OneTo(WinCapital)36
37
(m::GamblerProblemEnvModel)(s, a) = m.cache[(s,a)]38
end0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
1.0
xxxxxxxxxx1
1
V = TabularVApproximator(;n_state=1+WinCapital,opt=Descent(1))xxxxxxxxxx1
1
RLZoo.value_iteration!(V=V, model=GamblerProblemEnvModel(), γ=1.0, max_iter=1000)xxxxxxxxxx1
1
plot(V.table[2:end-1])