CrimsonSkyline.jl
trace
To a large extent, Nodes have local control over the behavior of inference algorithms via their interpretation. There are a variety of Interpretations. The type hierarchy is flat:
abstract type Interpretation end
struct Nonstandard <: Interpretation end
struct Standard <: Interpretation end
struct Replayed <: Interpretation end
struct Conditioned <: Interpretation end
struct Blocked <: Interpretation end
struct Deterministic <: Interpretation end
struct Proposed <: Interpretation endCrimsonSkyline.ParametricNode — Typemutable struct ParametricNode{A, D, T} <: Node
address :: A
dist :: D
value :: Maybe{T}
logprob :: Float64
logprob_sum :: Float64
observed :: Bool
pa :: Array{Node, 1}
ch :: Array{Node, 1}
interpretation :: Union{Interpretation, Vector{Interpretation}}
last_interpretation :: Union{Interpretation, Vector{Interpretation}}
endA Node that can be used with arbitrary code for which rand and Distributionss.logpdf are defined.
CrimsonSkyline.SampleableNode — Typemutable struct SampleableNode{A, T} <: Node
address :: A
dist :: Sampleable
value :: Maybe{T}
logprob :: Float64
logprob_sum :: Float64
observed :: Bool
pa :: Array{Node, 1}
ch :: Array{Node, 1}
interpretation :: Union{Interpretation, Vector{Interpretation}}
last_interpretation :: Union{Interpretation, Vector{Interpretation}}
endA Node that is restricted to be used with any Sampleable from Distributions.jl.
CrimsonSkyline.Trace — Typeabstract type Trace endBase type for all traces. Traces support the following Base methods: setindex!, getindex, keys, values, and length.
CrimsonSkyline.TypedTrace — Typemutable struct TypedTrace{A, T} <: Trace
trace :: OrderedDict{A, SampleableNode{A, T}}
logprob_sum :: Float64
endTrace that can hold nodes of the specific address (A) and value (T) types.
CrimsonSkyline.UntypedTrace — Typemutable struct UntypedTrace
trace :: OrderedDict{Any, Node}
logprob_sum :: Float64
endTrace that can hold nodes with all address and value types.
CrimsonSkyline.connect_pa_ch! — Methodfunction connect_pa_ch!(t :: Trace, pa, a)Connects parent and child nodes. Adds child nodes to parent's ch and parent nodes to child's pa.
CrimsonSkyline.input — Methodinput(t :: Trace, a, d)Track a model input. Used only in graph intermediate representation and factor graph.
CrimsonSkyline.interpret_latent! — Methodfunction interpret_latent!(t :: Trace, i :: Interpretation)Changes the interpretation of all latent nodes in t to have interpretation == i
CrimsonSkyline.loglikelihood — Methodfunction loglikelihood(t :: Trace)Computes and returns the log likelihood of the observed data under the model:
\[\ell(t) = \sum_{a:\ [a \in \text{keys}(t)] \wedge [\text{interpretation}(a) = \text{Standard}]} \log p(t[a])\]
CrimsonSkyline.logprob! — Methodfunction logprob!(t :: Trace)Computes the joint log probability to the trace and assigns it to t.logprob_sum.
CrimsonSkyline.logprob — Methodfunction logprob(t :: Trace)Computes and returns the joint log probability of the trace:
\[\log p(t) = \sum_{a \in \text{keys}(t)}\log p(t[a])\]
CrimsonSkyline.node — Methodfunction node(value, address :: A, dist :: D, is_obs :: Bool, i :: Interpretation) where {A, D}Outer constructor for Node where data is passed during construction. Data type is inferred from the passed data.
CrimsonSkyline.node — Methodfunction node(value, address :: A, dist :: D, is_obs :: Bool, i :: Interpretation) where {A, D}Outer constructor for Node where data is passed during construction. Data type is inferred from the passed data.
CrimsonSkyline.node — Methodfunction node(T :: DataType, address :: A, dist :: D, is_obs :: Bool, i :: Interpretation) where {A, D}Outer constructor for Node where no data is passed during construction.
CrimsonSkyline.node — Methodfunction node(T :: DataType, address :: A, dist :: D, is_obs :: Bool, i :: Interpretation) where {A, D}Outer constructor for Node where no data is passed during construction.
CrimsonSkyline.observe — Methodfunction observe(t :: Trace, a, d, s; pa = ())If d is not nothing an alias for calling sample with standard interpretation. Otherwise, an alias for calling sample with nonstandard interpretation.
CrimsonSkyline.pa_from_trace — Methodfunction pa_from_trace(t :: Trace, pa)Collects nodes in trace corresponding to an iterable of parent addresses pa.
CrimsonSkyline.plate — Methodfunction plate(t::Trace, op::F, a, d, s::Int64, i::Conditioned; pa = ()) where F<:FunctionPlate over conditioned variables.
CrimsonSkyline.plate — Methodfunction plate(t::Trace, op::F, a, d, s::Int64, i::Blocked; pa = ()) where F<:FunctionPlate over blocked variables.
CrimsonSkyline.plate — Methodfunction plate(t::Trace, op::F, a, d, s::Int64, i::Nonstandard; pa = ()) where F<:FunctionPlate over latent variables.
CrimsonSkyline.plate — Methodfunction plate(t::Trace, op::F, a, d, s::Int64, i::Replayed; pa = ()) where F<:FunctionPlate over replayed variables. Note that this method assumes and does not check that the value to be replayed v satisfies length(v) == s.
CrimsonSkyline.plate — Methodfunction plate(t::Trace, op::F, a, d, s::Int64; pa = ()) where F<:FunctionSample or observe a vector of random variables at a single site instead of multiple. This can speed up inference since the number of sites in the model will no longer scale with dataset size (though numerical value computation is still linear in dataset size).
Example usage: instead of
vals = [sample(t, "val $i", Geometric()) for i in 1:N]we can write
vals = plate(t, sample, "val", Geometric(), N)Mathematically, this is equivalent to the product $p(z) = \prod_n p(z_n)$ and treating it as the single object $p(z)$ instead of the $N$ objects $p(z_n)$.
CrimsonSkyline.plate — Methodfunction plate(t::Trace, op::F, a, d, v::Vector{T}, params; pa = ()) where {T, F<:Function}Plate over observed variables with different values but identical distribution, i.e., $p(x|z) = \prod_n p(x_n | z_n)$. This is as opposed to plate(t::Trace, op::F, a, d, v::Vector{T}; pa = ()), which is equivalent to $p(x|z) = \prod_n p(x_n | z)$.
params must have the same length as v. Each element of params corresponds to a vector of that particular component of the params, i.e., $z = (z_1, ..., z_D)$ where each $z_d$ has length $N$, the number of observed datapoints, and $D$ is the cardinality of the parameterization of the distribution.
E.g., replace
locs = sample(t, "locs", MvNormal(D, 1.0))
for (i, (loc, d)) in enumerate(zip(locs, data))
observe(t, "data $i", Normal(loc, 1.0), d)
endwith
locs = sample(t, "locs", MvNormal(D, 1.0))
plate(t, observe, "data", Normal, data, (locs, ones(D)))CrimsonSkyline.plate — Methodfunction plate(t::Trace, op::F, a, d, v::Vector{T}; pa = ()) where {T, F<:Function}Plate over observed variables, i.e., a plated component of model likelihood. v is the vector of observations, while op is likely observe.
Example usage: instead of
for (i, d) in enumerate(data)
observe(t, "data $i", Normal(loc, scale), d)
endwe can write
plate(t, observe, "data", Normal(loc, scale), data)CrimsonSkyline.propose — Methodpropose(t :: Trace, a, d)Propose a value for the address a in trace t from the distribution d.
CrimsonSkyline.sample — Methodfunction sample(t :: Trace, a, d, params, i :: Blocked; pa = ())Samples from d passing the optional arguments params, deletes the node stored at address a from trace t, and returns the sampled value.
CrimsonSkyline.sample — Methodfunction sample(t :: Trace, a, f, v, i :: Deterministic; pa = ())Creates a deterministic node mapping the tuple of data v through function f, storing the value in trace t at address a.
- Infers input type from
v - Maps tuple of data
vthrough functionf, yieldingr = f(v...) - Creates a deterministic node and stores it in
tat addressa - Optionally adds nodes corresponding to the addresses in
paas parent nodes - Returns
r
CrimsonSkyline.sample — Methodfunction sample(t :: Trace, a, d, params, i :: Nonstandard; pa = ())Samples from distribution d into trace t at address a.
- Samples a value from
dpassing the optional argumentsparams - Creates a sample node
- Adds the sample node to trace
tat valuea - Optionally adds nodes corresponding to the addresses in
paas parent nodes - Returns the sampled value
CrimsonSkyline.sample — Methodfunction sample(t :: Trace, a, d, params, i :: Replayed; pa = ())Replays the sampled node through the trace.
- If
ais not int's address set, callssample(t, a, d, NONSTANDARD; pa = pa). - Creates a sample node that copies the value from the last node stored in the trace at address
a. - Adds the sample node to trace
tat valuea - Optionally adds nodes corresponding to the addresses in
paas parent nodes - Resets the node's interpretation to the original interpretation
CrimsonSkyline.sample — Methodfunction sample(t :: Trace, a, d, s, i :: Standard; pa = ())Scores an observed value s against the distribution d, storing the value in trace t at address a and optionally adds nodes corresponding to the addresses in pa as parent nodes.
CrimsonSkyline.sample — Methodfunction sample(t :: Trace, a, d, params, ii :: Array{Interpretation, 1}; pa = ())Sequentially apply sample statements with interpretations as given in ii. This is used to depth-first traverse the interpretation graph.
CrimsonSkyline.sample — Methodfunction sample(t :: Trace, a, d, params; pa = ())If a is in the set of trace addresses, calls sample using t[a]'s interpretation. Otherwise, calls sample using nonstandard interpretation.
CrimsonSkyline.sample — Methodfunction sample(t :: Trace, a, d, i :: Blocked; pa = ())Samples from d, deletes the node stored at address a from trace t, and returns the sampled value.
CrimsonSkyline.sample — Methodfunction sample(t :: Trace, a, d, i :: Nonstandard; pa = ())Samples from distribution d into trace t at address a.
- Samples a value from
d - Creates a sample node
- Adds the sample node to trace
tat valuea - Optionally adds nodes corresponding to the addresses in
paas parent nodes - Returns the sampled value
CrimsonSkyline.sample — Methodfunction sample(t :: Trace, a, d, i :: Replayed; pa = ())Replays the sampled node through the trace.
- If
ais not int's address set, callssample(t, a, d, NONSTANDARD; pa = pa). - Creates a sample node that copies the value from the last node stored in the trace at address
a. - Adds the sample node to trace
tat valuea - Optionally adds nodes corresponding to the addresses in
paas parent nodes - Resets the node's interpretation to the original interpretation
CrimsonSkyline.sample — Methodfunction sample(t :: Trace, a, d, i :: Union{Standard,Conditioned}; pa = ())Scores an observed value against the distribution d, storing the value in trace t at address a and optionally adds nodes corresponding to the addresses in pa as parent nodes.
This method is used by the condition effect. It will probably not be used by most users.
CrimsonSkyline.sample — Methodfunction sample(t :: Trace, a, d, ii :: Array{Interpretation, 1}; pa = ())Sequentially apply sample statements with interpretations as given in ii. This is used to depth-first traverse the interpretation graph.
CrimsonSkyline.sample — Methodfunction sample(t :: Trace, a, d; pa = ())If a is in the set of trace addresses, calls sample using t[a]'s interpretation. Otherwise, calls sample using nonstandard interpretation.
CrimsonSkyline.trace — Methodtrace(A, T)This is the recommended way to construct a new typed trace. A is the address type, T is the value type.
CrimsonSkyline.trace — Methodtrace()This is the recommended way to construct a new trace.
CrimsonSkyline.transform — Methodfunction transform(t :: Trace, a, f :: F, v; pa = ()) where F <: FunctionAlias for sample(t, a, f, v, DETERMINISTIC; pa = pa).
field
CrimsonSkyline also includes methods for sampling from undirected models.
Using Metropolis-Hastings
Here is an example (taken directly from the tests!). Suppose we have three variables $a$, $b$, and $c$, with field structure a - b - c and continuous factors $\psi_{a,b}(a, b) = \mathrm{MvNormal}(0, \Sigma_{a,b})$ with $\Sigma_{a,b} = \begin{pmatrix} 1.0 & 0.5 \\ 0.5 & 1.0 \end{pmatrix}$ and $\psi_{b,c}(b, c) = \mathrm{MvNormal}(\begin{pmatrix} 2.0 & 2.0 \end{pmatrix}, \Sigma_{b,c})$ with $\Sigma_{b,c} = \begin{pmatrix} 2.0 & -1.0 \\ -1.0 & 2.0 \end{pmatrix}$. We can construct the factors using two ordinary functions:
factor_ab(x) = logpdf(MvNormal(PDMat([1.0 0.5; 0.5 1.0])), x)
factor_bc(x) = logpdf(MvNormal([2.0, 2.0], PDMat([2.0 -1.0; -1.0 2.0])), x)Suppose that we observe the data b = 3.0. We can instantiate the RandomField object with the factors and this evidence as follows:
factors = Dict(["a", "b"] => factor_ab, ["b", "c"] => factor_bc)
evidence = Dict("b" => 3.0)
field = RandomField(factors, evidence)In general one cannot sample from random fields directly and instead must use a method such as belief propagation or approximate inference methods. Here we use Metropolis Hastings. We define a proposal kernel that can (a) propose new values and (b) score those values in comparison with old sampled values. This functionality can be implemented as a function, but we will implement a struct to capture some proposal kernel state (namely, the standard deviation of a proposal distribution). We define the proposal kernel as follows:
struct FactorProposal
addresses :: Vector{String}
last :: Vector{String}
std :: Float64
end
FactorProposal(addresses) = FactorProposal(addresses, [addresses[1]], 0.5)
function (fp::FactorProposal)(x::Dict)
new = deepcopy(x)
address = rand(fp.addresses)
new[address] = randn() * fp.std + new[address]
fp.last[1] = address
new
end
function (fp::FactorProposal)(x_prime::Dict, x::Dict)
address = fp.last[1]
logpdf(Normal(x[address], fp.std), x_prime[address]) - log(length(fp.addresses))
endThis is pretty straighforward: calling fp(x) generates a proposal x_prime i.e., $x' \sim q(x)$ (which in this case is just a random walk step away from the current point) and calling fp(x_prime, x) computes the transition probability $q(x' | x)$. Once we have the proposal in hand, we can sample from the posterior $p(a, c | b = 3.0)$ by calling mh(...) just as in the case of trace-based models:
addresses = ["a", "c"]
proposal = FactorProposal(addresses)
initial_values = Dict("a" => 0.0, "b" => 3.0, "c" => 0.0)
samples = mh(field, [proposal], initial_values; burn=1000, thin=100, num_iterations=11000)Because RandomFields aren't generative models, we have to seed the MH algorithm with an initial value for each address. (In this case, the value used to seed "b" isn't used by mh because we've posted evidence to that address; if we just wanted to sample from $p(a, b, c)$ without posting evidence, the initiaizer "b" => 3.0 would be used.) We can visualize the marginal posteriors $p(a|b=3.0)$ and $p(c | b = 3.0)$ below (using the built-in plot_marginal):

In combination with a proposal kernel and MH initialization point, random fields can be used in CrimsonSkyline's trace-based PPL. There is convenience wrapper to bundle these components together called GenerativeField. Here is an example of using it in the trace-based PPL:
t = trace()
upstream_value = sample(t, "a", Normal())
# to pass upstream values as parameters to the random field
# just post them as evidence
field = RandomField(factors, Dict("a" => upstream_value))
proposal = FactorProposal(["b", "c"])
init = Dict("a" => 0.0, "b" => 0.0, "c" => 0.0)
gf = GenerativeField(field, proposal, init)
sample(t, "field", gf)CrimsonSkyline.GenerativeField — Typestruct GenerativeField{T}
field::RandomField
proposal::T
val::Dict
...
endA generative field is a three-tuple of a random field field, a proposal kernel proposal, and an initialization value val used to start MH sampling.
CrimsonSkyline.RandomField — Typestruct RandomField
names::Set{String}
factors::Dict{Vector{String},Function}
evidence::Dict{String,Any}
endA representation of a random field by a collection of factors: $\log p(x) = \sum_{f \in \mathcal F} \log \psi_f(x_f)$, where $\mathcal F$ is the set of (log) factors and $x_f$ is the set of variables incident on that (log) factor. factors should be properly normalized log mass or density functions. There is no restriction on the state space of the variables involved as long as the factor functions can evaluate the log probability of the variables. For example,
factor_ab(x) = logpdf(MvNormal(PDMat([1.0 0.5; 0.5 1.0])), x)
factor_bc(x) = logpdf(MvNormal([2.0, 2.0], PDMat([2.0 -1.0; -1.0 2.0])), x)are two valid (log) factor functions – the first corresponding to the factor $\log \psi_{a,b}(x_a,x_b)$ and the second corresponding to the factor $\log \psi_{b,c}(x_b, x_c)$. Posting evidence is done using a dictionary mapping an address to value, e.g., evidence = Dict("b" => 3.0).
Calling a random field corresponds to evaluating its log probability with the passed argument, e.g.,
my_rf = RandomField(...)
x = Dict("a" => 1.0, "b" => -2.1)
my_lp = my_rf(x) # corresponds to logprob(my_rf, x)CrimsonSkyline.RandomField — Methodfunction RandomField(factors::Dict{Vector{String},Function}, evidence::Dict)Outer constructor for RandomField that requires a dict of factors and allows posting evidence when the field is created (instead of manually doing so later).
CrimsonSkyline.RandomField — Methodfunction RandomField(factors::Dict{Vector{String},Function})Outer constuctor for RandomField that requires only a dict of factors.
Base.rand — Methodfunction Distributions.rand(gf::GenerativeField; num_iterations=2500)Sample from a generative fied using num_iterations of MH sampling. The value generated by MH at the end of num_iterations of sampling is returned. To customize burn-in, you can just overload this function, e.g. rand(gf::GenerativeField) = rand(gf; num_iterations=10000).
CrimsonSkyline.logprob — Methodfunction logprob(rf::RandomField, x::Dict)Evaluates the log probability of a set of values against the density described by the random field rf. The values x should have the format address => value.
io
Saving and loading traces and SamplingResults is possible using the save and load functions. This functionality depends on JuliaDB.
Examples:
# simple example model
function io_model(t::Trace, dim::Int64, data::Int64)
z = sample(t, "z", Dirichlet(ones(dim)))
observe(t, "x", Categorical(z), data)
end
# saving a trace
t = trace()
dim = 10
data = 7
io_model(t, dim, data)
testpath = joinpath(@__DIR__, "TESTIO")
db_file = joinpath(testpath, "test.jdb")
save(t, db_file)
# loading a trace
loaded_trace = load(db_file)
# creating and saving some results
results = mh(io_model; params = (dim, data), burn = 0, thin = 1, num_iterations=10)
results_file = joinpath(testpath, "io_model.csm")
save(results, results_file)
# loading saved results and use to serve model
loaded_results = load(joinpath(testpath, "io_model.csm"))
updated_model = update(io_model, loaded_results) # update effect, see effects.jl
new_t, _ = updated_model(trace(), dim, data)CrimsonSkyline.load — Methodfunction load(f)Load a Trace or SamplingResults object from file. The file must be saved in JuliaDB format with ending .jdb, which will be interpreted as a single saved trace, or must be a directory with ending .csm, which will be interpreted as a SamplingResults object.
CrimsonSkyline.load_csm — Methodfunction load_csm(f) :: SamplingResultsLoads a SamplingResults from a directory. The directory contains metadata.txt, which currently stores the interpretation of the SamplingResults (i.e., what kind of algorithm generated those results), and a file results.jdb, which is a JuliaDB table of the results.
CrimsonSkyline.load_jdb — Methodfunction load_jdb(f) :: TraceLoads a serialized juliadb table from file f and converts it into a trace.
CrimsonSkyline.save — Methodfunction save(r :: SamplingResults, f)Saves a SamplingResults to disk in the directory f.
CrimsonSkyline.save — Methodfunction save(t :: Trace, f)Saves a trace to disk at the filepath f.
CrimsonSkyline.to_table — Methodfunction to_table(t :: Trace)Turns a trace into a juliadb table. Does not store parent / child relationships.
effects
A library of functions that change the interpretation of some or all nodes in a trace.
CrimsonSkyline.block — Methodblock(f :: F, t :: Trace) where F <: FunctionConverts all traced randomness into untraced randomness.
CrimsonSkyline.block — Methodfunction block(f, t :: T, addresses) where T <: TraceGiven a stochastic function f, a trace t, and an iterable of addresses, converts traced randomness into untraced randomness.
Returns a tuple (t_new, g), where t_new is a trace and g is a function. The function signature of g is the same as that of f with the first argument removed; that is, if f(t :: Trace, params...), then g(params...). Computation is delayed, so each of the latent nodes in t_new has interpretation = BLOCKED. Calling g(params...) executes the computation and each latent node in t_new with an address in addresses is removed.
CrimsonSkyline.condition — Methodfunction condition(f, evidence :: Dict)Condition a trace modified by f on evidence, which maps addresses to observed evidence associated with that address. Returns a function with call signature identical to that of f and return signature (t :: Trace, rtype) where rtype is the return type of f.
CrimsonSkyline.replace — Methodfunction replace(f, r :: Dict)Given a mapping r from addresses to distribution-like (currently Distributions or Array{Any, 1}s), replaces the current distributions at that set of addresses with this set of distributions. Returns a function g that has return signature (t :: Trace, rval) where rval is a return value of f.
CrimsonSkyline.replace — Methodfunction replace(t :: Trace, r :: Dict)Given a mapping r from addresses to distribution-like (currently Distributions or Array{Any, 1}s), replaces the current distributions at that set of addresses with this set of distributions. Returns the modified trace.
CrimsonSkyline.replay — Methodfunction replay(f, t :: T) where T <: TraceGiven a stochastic function f and a trace t, makes sample calls behave as though they had sampled the values in t at the corresponding addresses.
Returns a tuple (t_new, g), where t_new is a trace and g is a function. The function signature of g is the same as that of f with the first argument removed; that is, if f(t :: Trace, params...), then g(params...). Computation is delayed, so each of the latent nodes in t_new has interpretation = REPLAYED. Calling g(params...) executes the computation and each latent node in t_new reverts to its original interpretation.
CrimsonSkyline.rewrite — Methodfunction rewrite(f, t :: T, r :: Dict) where T <: TraceRewrites the history of the trace to make it appear as if the values in the trace were sampled at the addresses in the keys of r from the corresponding distributions in the values of r. Returns a function with call signature g(params...) that returns (t :: Trace, rval), where rval is the return type of f.
CrimsonSkyline.update — Methodfunction update(f, r :: SamplingResults{I}) where I <: InferenceTypeGiven a stochastic function f and a SamplingResults r, update the prior predictive to the posterior predictive by jointly replacing all latent sample sites with the joint empirical posterior. Returns a stochastic function g with the same call signature as f. This function will modify in place the trace passed into it as the first argument.
basic sampling methods
Simple samplers such as forward and rejection sampling can be done "by hand", but convenience methods are implemented to facilitate postprocessing of results.
CrimsonSkyline.forward_sampling — Methodfunction forward_sampling(f; params = (), num_iterations = 1)Draws samples from the model's joint density. Equivalent to calling f in a loop num_iterations times, but results are collected in a NonparametricSamplingResults for easier postprocessing.
CrimsonSkyline.rejection — Methodfunction rejection(f, log_l :: Float64, params...)Samples from the prior with the hard likelihood constraint $\log L_k >$ log_l.
Args:
fstochastic function. Must have signaturef(t :: Trace, params...)log_l: current log likelihood thresholdparams: any additional arguments to pass tof
importance
Importance sampling algorithms and utilities. Currently the following importance sampling algorithms are implemented:
- Likelihood weighting
- Generic user-defined proposal
CrimsonSkyline.importance_sampling — Methodfunction importance_sampling(f, q, types::Tuple{DataType,DataType}; params = (), nsamples :: Int = 1)Given a stochastic function f, a proposal function q, and a tuple of params to pass to f and q, compute nsamples iterations of importance sampling. q must have the same input signature as f. Returns a SamplingResults instance.
CrimsonSkyline.importance_sampling — Methodfunction importance_sampling(f, q; params = (), nsamples :: Int = 1)Given a stochastic function f, a proposal function q, and a tuple of params to pass to f and q, compute nsamples iterations of importance sampling. q must have the same input signature as f. Returns a SamplingResults instance.
CrimsonSkyline.is_step — Methodfunction is_step(f, q, types::Tuple{DataType,DataType}; params = ())Perform one step of importance sampling – draw a single sample from the proposal q, replay it through f, and record the log weight as $\log W_n = \log p(x, z_n) - \log q(z_n)$. Returns a tuple (log weight, rval, trace).
CrimsonSkyline.is_step — Methodfunction is_step(f, q; params = ())Perform one step of importance sampling – draw a single sample from the proposal q, replay it through f, and record the log weight as $\log W_n = \log p(x, z_n) - \log q(z_n)$. Returns a tuple (log weight, rval, trace).
CrimsonSkyline.likelihood_weighting — Methodfunction likelihood_weighting(f, types::Tuple{DataType, DataType}, params...; nsamples :: Int = 1)Given a stochastic function f and arguments to the function params..., executes nsamples iterations of importance sampling by using the prior as a proposal distribution. The importance weights are given by $\log W_n = \ell(t_n)$. Returns an SamplingResults instance.
CrimsonSkyline.likelihood_weighting — Methodfunction likelihood_weighting(f, params...; nsamples :: Int = 1)Given a stochastic function f and arguments to the function params..., executes nsamples iterations of importance sampling by using the prior as a proposal distribution. The importance weights are given by $\log W_n = \ell(t_n)$. Returns an SamplingResults instance.
CrimsonSkyline.likelihood_weighting_results — Methodlikelihood_weighting_results()Outer constructor for SamplingResults.
CrimsonSkyline.log_evidence — Methodfunction log_evidence(r :: SamplingResults{LikelihoodWeighting})Computes the log evidence (log partition function),
$\log Z \equiv \log p(x) \approx -\log N_{\text{samples}} + \log \sum_{n=1}^{N_{\text{samples}}} W_n.$
CrimsonSkyline.lw_step — Methodfunction lw_step(f, types::Tuple{DataType, DataType}, params...)Perform one step of likelihood weighting – draw a single proposal from the prior and compute the log weight as equal to the likelihood. Returns a tuple (log weight, rval, trace).
CrimsonSkyline.lw_step — Methodfunction lw_step(f, params...)Perform one step of likelihood weighting – draw a single proposal from the prior and compute the log weight as equal to the likelihood. Returns a tuple (log weight, rval, trace).
CrimsonSkyline.normalized_weights — Methodfunction normalized_weights(r :: SamplingResults{LikelihoodWeighting})Computes the normalized weights $w_n$ from unnormalized weights $W_n$:
$w_n = W_n / p(x) = \exp\{\ell(t_n) - \log Z\}.$
CrimsonSkyline.sample — Methodfunction sample(r :: SamplingResults{LikelihoodWeighting}, k, n :: Int)Draws n samples from the empirical marginal posterior at address k.
metropolis
Metropolis algorithm and utilities. Currently the following algorithms are implemented for both FOPPL and HOPPL programs:
- Independent prior proposal
- Arbitrary single- or multi-site proposal
Here are two examples of inference using the arbitrary MH step interface. Consider the following generative function:
function normal_model(t :: Trace, data :: Vector{Float64})
loc = sample(t, :loc, Normal(0.0, 10.0))
scale = sample(t, :scale, LogNormal())
for i in 1:length(data)
observe(t, (:obs, i), Normal(loc, scale), data[i])
end
endTo learn an approximate posterior for :loc and :scale, we will introduce two proposal kernels:
loc_proposal(old_t :: Trace, new_t :: Trace, data) = propose(new_t, :loc, Normal(old_t[:loc].value, 0.25))
scale_proposal(old_t :: Trace, new_t :: Trace, data) = propose(new_t, :scale, truncated(Normal(old_t[:scale].value, 0.25), 0.0, Inf))Note that while loc_proposal is symmetric, scale_proposal is not. To perform inference, we pass these kernels to mh_step in a loop after first drawing a random trace:
t = trace()
normal_model(t, data)
for i in 1:niter
t = mh_step(t, normal_model, loc_proposal; params = (data,))
t = mh_step(t, normal_model, scale_proposal; params = (data,))
endIn this case, inference was fairly successful:
[ Info: True loc = 4.0
[ Info: True std = 1.0
[ Info: inferred E[loc] = 4.022688081613082
[ Info: inferred E[scale] = 0.9696559373495869
[ Info: approximate p(x) = sum_z p(x|z) = -138.63530736205144As a less trivial (but still contrived!) inference example, we can infer the posterior distribution of a latent discrete random variable in an open-universe model:
function random_sum_model(t :: Trace, data)
n = sample(t, :n, Geometric(0.1))
loc = 0.0
for i in 1:(n + 1)
loc += sample(t, (:loc, i), Normal())
end
obs = Array{Float64, 1}()
for j in 1:length(data)
o = observe(t, (:data, j), Normal(loc, 1.0), data[j])
push!(obs, o)
end
obs
end(N.B.: we write the model is this form for pedagogic reasons; there is a far more efficient way to express the latent structure of this model, namely $n \sim \text{Geometric}(0.1)$, $\text{loc} \sim \text{Normal}(0, n)$.) We are interested in learning the posterior distribution of :n. We introduce two proposal distributions, one for the latent discrete rv and another generic proposal for the location increments:
function random_n_proposal(old_trace, new_trace, params...)
old_n = float(old_trace[:n].value)
if old_n > 0
propose(new_trace, :n, Poisson(old_n))
else
propose(new_trace, :n, Poisson(1.0))
end
end
gen_loc_proposal(old_trace, new_trace, ix, params...) = propose(new_trace, (:loc, ix), Normal(old_trace[(:loc, ix)].value, 0.25))We again conduct inference by simply applying proposals within a loop. This time, the number of location increment proposals we need to construct is dependent on the sampled values of the latent random variable. We can either create these proposals on the fly as they're needed or create what is nearly guaranteed to be enough of them before any inference is performed, e.g., loc_proposals = [(o, n, params...) -> gen_loc_proposal(o, n, i, params...) for i in 1:100]. Now that we have what we need, we can conduct inference:
t = trace()
random_sum_model(t, data)
...
for i in 1:niter
t = mh_step(t, random_sum_model, random_n_proposal; params=(data,))
for j in 1:(t[:n].value + 1)
t = mh_step(t, random_sum_model, loc_proposals[j]; params=(data,))
end
push!(ns, t[:n].value)
endOur inference results look promising:
[ Info: True :n = 9
[ Info: Posterior E[:n] = 7.581604598850287For more examples, check back soon.
CrimsonSkyline.accept — Methodfunction accept(t :: Trace, new_t :: Trace, log_a :: Float64)Stochastic function that either returns new_t if accepted or returns t if not accepted.
CrimsonSkyline.copy_common! — Methodfunction copy_common!(old_t :: Trace, new_t :: Trace)Copies nodes from old_t into new_t for all addresses in the intersection of their address sets.
CrimsonSkyline.log_acceptance_ratio — Methodfunction log_acceptance_ratio(t :: Trace, t_proposed :: Trace, p :: Prior)Computes the log acceptance ratio of a Metropolis step when using the independent prior proposal algorithm:
$\log \alpha = \ell(t_{\text{proposed}}) - \ell(t_{\text{original}})$
CrimsonSkyline.loglatent — Methodfunction loglatent(t :: Trace)Computes the joint log probability of all latent variables in a trace, $\log p(t) - \ell(t)$.
CrimsonSkyline.logprob — Methodfunction logprob(t0 :: Trace, t1 :: Trace)Computes the proposal log probability $q(t_1 | t_0)$.
This expression has two parts: log probability that is generated at the proposed site(s), and log probability that is generated at the sites that are present in t1 but not in t0.
CrimsonSkyline.mh — Methodfunction mh(f, types::Tuple{DataType, DataType}; params = (), burn = 1000, thin = 50, num_iterations = 10000)Generic Metropolis algorithm using draws from the prior.
Args:
f: stochastic function. Must have call signaturef(t :: Trace, params...)params: addditional arguments to pass tofand each of the proposal kernels.burn: number of samples to discard at beginning of markov chainthin: keep only everythin-th draw. E.g., ifthin = 100, only every 100-th trace will be kept.num_iterations: total number of steps to take in the markov chain
CrimsonSkyline.mh — Methodfunction mh(f; params = (), burn = 1000, thin = 50, num_iterations = 10000)Generic Metropolis algorithm using draws from the prior.
Args:
f: stochastic function. Must have call signaturef(t :: Trace, params...)params: addditional arguments to pass tofand each of the proposal kernels.burn: number of samples to discard at beginning of markov chainthin: keep only everythin-th draw. E.g., ifthin = 100, only every 100-th trace will be kept.num_iterations: total number of steps to take in the markov chain
CrimsonSkyline.mh — Methodfunction mh(f, qs :: A, addresses; params = (), burn = 100, thin = 10, num_iterations = 10000, inverse_verbosity = 100) where A <: AbstractArrayGeneric Metropolis algorithm using user-defined proposal kernels, returning only a requested subset of addresses.
Args:
f: stochastic function. Must have call signaturef(t :: Trace, params...)qs: array-like of proposal kernels. Proposal kernels are applied sequentially in the order that they appear in this array. Proposal kernels must have the signatureq(old_t :: Trace, new_t :: Trace, params...)where it must take in at least the same number of arguments inparamsasf.addresses: only values sampled at these addresses will be saved in thevaluesfield of theBareResultsstruct returned.params: addditional arguments to pass tofand each of the proposal kernels.burn: number of samples to discard at beginning of markov chainthin: keep only everythin-th draw. E.g., ifthin = 100, only every 100-th trace will be kept.num_iterations: total number of steps to take in the markov chaininverse_verbosity: everyinverse_verbosityiterations, a stattus report will be logged.
CrimsonSkyline.mh — Methodfunction mh(f, qs :: A, types::Tuple{DataType,DataType}; params = (), burn = 100, thin = 10, num_iterations = 10000, inverse_verbosity = 100) where A <: AbstractArrayGeneric Metropolis algorithm using user-defined proposal kernels.
Args:
f: stochastic function. Must have call signaturef(t :: Trace, params...)qs: array-like of proposal kernels. Proposal kernels are applied sequentially in the order that they appear in this array. Proposal kernels must have the signatureq(old_t :: Trace, new_t :: Trace, params...)where it must take in at least the same number of arguments inparamsasf.params: addditional arguments to pass tofand each of the proposal kernels.burn: number of samples to discard at beginning of markov chainthin: keep only everythin-th draw. E.g., ifthin = 100, only every 100-th trace will be kept.num_iterations: total number of steps to take in the markov chaininverse_verbosity: everyinverse_verbosityiterations, a stattus report will be logged.
CrimsonSkyline.mh — Methodfunction mh(f, qs :: A; params = (), burn = 100, thin = 10, num_iterations = 10000, inverse_verbosity = 100) where A <: AbstractArrayGeneric Metropolis algorithm using user-defined proposal kernels.
Args:
f: stochastic function. Must have call signaturef(t :: Trace, params...)qs: array-like of proposal kernels. Proposal kernels are applied sequentially in the order that they appear in this array. Proposal kernels must have the signatureq(old_t :: Trace, new_t :: Trace, params...)where it must take in at least the same number of arguments inparamsasf.params: addditional arguments to pass tofand each of the proposal kernels.burn: number of samples to discard at beginning of markov chainthin: keep only everythin-th draw. E.g., ifthin = 100, only every 100-th trace will be kept.num_iterations: total number of steps to take in the markov chaininverse_verbosity: everyinverse_verbosityiterations, a stattus report will be logged.
CrimsonSkyline.mh — Methodfunction mh(rf::RandomField, qs::Vector{T}, val; burn=1000, thin=100, num_iterations=11000) where TMetropolis Hastings algorithm for sampling from a random field using arbitrary proposal kernels.
Args:
rf: the random field from which to sampleqs: vector of proposal kernel callables; see documentation ofmh_stepfor specification of proposal kernelsval: initial guess with which to initialize MH, must be a dict with formataddress => value.burn: number of samples to discard at beginning of markov chainthin: keep only everythin-th draw. E.g., ifthin = 100, only every 100-th trace will be kept.num_iterations: total number of steps to take in the markov chain
CrimsonSkyline.mh_step — Methodfunction mh_step(rf::RandomField, q, x, log_prob_rf_x)A Metropolis step to sample from a random field using an arbitrary proposal kernel.
Args:
rf: aRandomFieldfrom which to sampleq: a proposal kernel. This must be a callable that satisfies the following requirements:q(x)returns a new valuex_primethat is generated using the input parametersx, i.e., $x' \sim q(x'|x)$. For example,q(x) = rand(Normal(x, 0.25))q(x_prime, x)scores (computes the log probability of)x_primeagainstx, i.e., computes $\log q(x' | x)$
x: a dict with formataddress => value, the current sampled value.log_prob_rf_x: the log probability ofxunder the random field.
CrimsonSkyline.mh_step — Methodfunction mh_step(t :: Trace, f, q, types::Tuple{DataType,DataType}; params = (), return_val :: Bool = false)A generic Metropolis step using an arbitrary proposal kernel.
Given a trace t, a stochastic function f with signature f(t :: Trace, params...) a stochastic function q with signature q(old_trace :: Trace, new_trace :: Trace, params...), generates a proposal from q and accepts based on the log acceptance probability:
$\log \alpha = \log p(t_{\text{new}}) - \log q(t_{\text{new}}|t_{\text{old}}) - [\log p(t_{\text{old}}) - \log q(t_{\text{old}} | t_{\text{new}})].$
CrimsonSkyline.mh_step — Methodfunction mh_step(t :: Trace, f, q; params = (), return_val :: Bool = false)A generic Metropolis step using an arbitrary proposal kernel.
Given a trace t, a stochastic function f with signature f(t :: Trace, params...) a stochastic function q with signature q(old_trace :: Trace, new_trace :: Trace, params...), generates a proposal from q and accepts based on the log acceptance probability:
$\log \alpha = \log p(t_{\text{new}}) - \log q(t_{\text{new}}|t_{\text{old}}) - [\log p(t_{\text{old}}) - \log q(t_{\text{old}} | t_{\text{new}})].$
CrimsonSkyline.mh_step — Methodfunction mh_step(t :: Trace, f, types::Tuple{DataType, DataType}; params = (), return_val :: Bool = false)An independent prior sample Metropolis step.
Given a trace t and stochastic function f depending on params..., generates proposals from prior draws and accepts based on the likelihood ratio.
CrimsonSkyline.mh_step — Methodfunction mh_step(t :: Trace, f; params = (), return_val :: Bool = false)An independent prior sample Metropolis step.
Given a trace t and stochatic function f depending on params..., generates proposals from prior draws and accepts based on the likelihood ratio.
nested
CrimsonSkyline.nested — Methodfunction nested(f, replace_fn; params = (), num_points :: Int64 = 1)Generic implementation of nested sampling (Skilling, Nested sampling for general Bayesian computation, Bayesian Analysis, 2006). The number of sampling iterations is a function of num_points aka $N$, and the empirical entropy of the sampling distribution, given at the $n$-th iteration by $H_n \approx \sum_k \hat p_k \log \hat p_k^{-1}$, where $\hat p_k = L_k w_k / Z_k$, $L_k$ is the likelihood value, $w_k$ is the difference in prior mass, and $Z_k$ is the current estimate of the partition function. The number of sampling iterations is equal to $\min_n \{n > 0: n > NH_n\}$.
Args:
f: stochastic function. Must have signaturef(t :: Trace, params...)replace_fn: function that returns a tuple(new_trace :: Trace, new_log_likelihood :: Float64). The input signature of this function must bereplace_fn(f :: F, log_likelihood :: Float64, params...) where F <: Function. It must guarantee thatnew_log_likelihood > log_likelihood.params: any parameters to pass tofnum_points: the number of likelihood points to keep track of
CrimsonSkyline.nested — Methodnested(f; params = (), num_points :: Int64 = 1)Run nested sampling using internal rejection method.
results
CrimsonSkyline.NonparametricSamplingResults — Typestruct NonparametricSamplingResults{I} <: SamplingResults{I}
interpretation :: I
log_weights :: Array{Float64, 1}
return_values :: Array{Any, 1}
traces :: Array{Trace, 1}
endWrapper for results of sampling. Implements the following methods from Base: getindex, length, keys. Intepretation of log weights is dependent on I.
CrimsonSkyline.ParametricSamplingResults — Typestruct ParametricSamplingResults{I} <: SamplingResults{I}
interpretation :: I
log_weights :: Array{Float64, 1}
return_values :: Array{Any, 1}
traces :: Array{Trace, 1}
distributions :: Dict
enddistributions maps from addresses to distributions, $a \mapsto \pi^{(a)}_\psi(z)$, where $\pi^{(a)}_\psi(z)$ solves
\[\max_\psi E_{z \sim p(z|x)}[\log \pi^{(a)}_\psi(z)].\]
The distributions are not used to generate values but only to score sampled values; values are still sampled from the posterior traces. Right now, the parametric approximation is very simple: values with support over the negative orthant of $\mathbb R^D$ are approximated by (multivariate) normal distributions, while values with support over only the positive orthant of $\mathbb{R}^D$ are approximated by (multivariate) lognormal distributions. This behavior is expected to change in the future.
CrimsonSkyline.addresses — Methodfunction addresses(r::SamplingResults{I}) where I <: InferenceTypeGet all addresses associated with the SamplingResults object, $A = \bigcup_{t\in \text{traces}}\mathcal A_t$
CrimsonSkyline.sample — Methodfunction sample(t :: Trace, a, r :: NonparametricSamplingResults{I}; pa = ()) where I <: InferenceTypeTreat a marginal site of a SamplingResults as a distribution, sampling from it into a trace.
CrimsonSkyline.sample — Methodfunction sample(t :: Trace, a, r::ParametricSamplingResults{I}; pa = ()) where I <: InferenceTypeTreat a marginal site of a SamplingResults as a distribution, sampling from it into a trace.
CrimsonSkyline.to_parametric — Methodfunction to_parametric(r::NonparametricSamplingResults{I}) where I<:InferenceTypeConverts a nonparametric sampling results object into one that additionally contains a mapping from addresses to distributions.
Distributions.logpdf — Methodfunction Distributions.logpdf(r :: A, v) where A <: AbstractArrayInterprets an array of objects as a delta distribution over those objects. If v is in the support set, returns $-log |r|$. Otherwise, returns $-\infty$.
statistics
CrimsonSkyline.aic — Methodfunction aic(t :: Trace)Computes the Akaike Information Criterion for a single trace (thus replacing the definition) with "maximum likelihood" by one with "likelihood". The formula is
\[\text{AIC}(t)/2 = |\text{params}(t)| - \ell(t),\]
where $\text{params}(t)|$ is the sum of the dimensionalities of non-observed and non-deterministic sample nodes.
CrimsonSkyline.aic — Methodfunction aic(r :: SamplingResults{I}) where I <: InferenceTypeComputes an empirical estimate of the Akaike Information Criterion from a SamplingResults. The formula is
\[\text{AIC}(r)/2 = \min_{t \in \text{traces}(r)}|\text{params}(t)| - \hat\ell(t),\]
where $\text{params}(t)|$ is the sum of the dimensionalities of non-observed and non-deterministic sample nodes and $\hat\ell(t)$ is the empirical maximum likelihood.
CrimsonSkyline.hpdi — Methodfunction hpdi(r::SamplingResults{I}, pct::Float64, addresses::AbstractArray{T}) where {I <: InferenceType, T}Computes the highest posterior density interval(s) for a univariate variable. Does not check that the data corresponding to each address in addresses is actually univariate; if in doubt, use hpds instead.
CrimsonSkyline.hpds — Methodfunction hpds(r::SamplingResults{I}, pct::Float64) where I <: InferenceTypeComputes the highest posterior density set (HPDS) of the SamplingResults object. Let $\mathcal T$ be the set of traces. The $100\times Q \%$-percentile HPDS is defined as the set that satisfies $\sum_{t \in \mathrm{HPDS}} p(t) = Q$ and, for all $t \in \mathrm{HPDS}$, $p(t) > p(s)$ for every $s \in \mathcal T - \mathrm{HPDS}$. It is possible to compute the HPDS using the full joint density $p(t) \equiv p(x, z)$, where $x$ is the set of observed rvs and $z$ is the set of latent rvs, since $p(z|x) \propto p(x, z)$.
pct should be a float in (0.0, 1.0). E.g., pct = 0.95 returns the 95% HPDS.
Index
CrimsonSkyline.GenerativeFieldCrimsonSkyline.NonparametricSamplingResultsCrimsonSkyline.ParametricNodeCrimsonSkyline.ParametricSamplingResultsCrimsonSkyline.RandomFieldCrimsonSkyline.RandomFieldCrimsonSkyline.RandomFieldCrimsonSkyline.SampleableNodeCrimsonSkyline.TraceCrimsonSkyline.TypedTraceCrimsonSkyline.UntypedTraceBase.randCrimsonSkyline.acceptCrimsonSkyline.addressesCrimsonSkyline.aicCrimsonSkyline.aicCrimsonSkyline.blockCrimsonSkyline.blockCrimsonSkyline.conditionCrimsonSkyline.connect_pa_ch!CrimsonSkyline.copy_common!CrimsonSkyline.forward_samplingCrimsonSkyline.hpdiCrimsonSkyline.hpdsCrimsonSkyline.importance_samplingCrimsonSkyline.importance_samplingCrimsonSkyline.inputCrimsonSkyline.interpret_latent!CrimsonSkyline.is_stepCrimsonSkyline.is_stepCrimsonSkyline.likelihood_weightingCrimsonSkyline.likelihood_weightingCrimsonSkyline.likelihood_weighting_resultsCrimsonSkyline.loadCrimsonSkyline.load_csmCrimsonSkyline.load_jdbCrimsonSkyline.log_acceptance_ratioCrimsonSkyline.log_evidenceCrimsonSkyline.loglatentCrimsonSkyline.loglikelihoodCrimsonSkyline.logprobCrimsonSkyline.logprobCrimsonSkyline.logprobCrimsonSkyline.logprob!CrimsonSkyline.lw_stepCrimsonSkyline.lw_stepCrimsonSkyline.mhCrimsonSkyline.mhCrimsonSkyline.mhCrimsonSkyline.mhCrimsonSkyline.mhCrimsonSkyline.mhCrimsonSkyline.mh_stepCrimsonSkyline.mh_stepCrimsonSkyline.mh_stepCrimsonSkyline.mh_stepCrimsonSkyline.mh_stepCrimsonSkyline.nestedCrimsonSkyline.nestedCrimsonSkyline.nodeCrimsonSkyline.nodeCrimsonSkyline.nodeCrimsonSkyline.nodeCrimsonSkyline.normalized_weightsCrimsonSkyline.observeCrimsonSkyline.pa_from_traceCrimsonSkyline.plateCrimsonSkyline.plateCrimsonSkyline.plateCrimsonSkyline.plateCrimsonSkyline.plateCrimsonSkyline.plateCrimsonSkyline.plateCrimsonSkyline.proposeCrimsonSkyline.rejectionCrimsonSkyline.replaceCrimsonSkyline.replaceCrimsonSkyline.replayCrimsonSkyline.rewriteCrimsonSkyline.sampleCrimsonSkyline.sampleCrimsonSkyline.sampleCrimsonSkyline.sampleCrimsonSkyline.sampleCrimsonSkyline.sampleCrimsonSkyline.sampleCrimsonSkyline.sampleCrimsonSkyline.sampleCrimsonSkyline.sampleCrimsonSkyline.sampleCrimsonSkyline.sampleCrimsonSkyline.sampleCrimsonSkyline.sampleCrimsonSkyline.sampleCrimsonSkyline.sampleCrimsonSkyline.saveCrimsonSkyline.saveCrimsonSkyline.to_parametricCrimsonSkyline.to_tableCrimsonSkyline.traceCrimsonSkyline.traceCrimsonSkyline.transformCrimsonSkyline.updateDistributions.logpdf