-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathtest1_STAN_sim_Gaussian.R
70 lines (60 loc) · 2.34 KB
/
test1_STAN_sim_Gaussian.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
# prompt:
# after the prompts of test_Stan2, ask "Do the same but for a Gaussian with mean of 0.5 and variance of 0.1"
# alright it's confusing variance and std so I'll do it myself
# new functionality I will try now is to simulate from the STAN code itself
# Load required libraries
library(rstan)
library(ggplot2)
# Set a seed for reproducibility
set.seed(123)
# Specify the Stan model
stan_code <- '
data {
int<lower=0> N; // Number of data points
real y[N]; // Observed data
int<lower = 0, upper = 1> run_estimation; // a switch to evaluate the likelihood
}
parameters {
real mu; // Mean parameter
real<lower=0> sigma; // Standard deviation parameter
}
model {
mu ~ normal(0, 1); // Prior for mean
sigma ~ cauchy(0, 1); // Prior for standard deviation
if(run_estimation==1){
y ~ normal(mu, sigma); // Likelihood
}
}
generated quantities{
vector[N] y_sim;
for(i in 1:N) {
y_sim[i] = normal_rng(mu, sigma); //copy the likelihood line, but!! need to change to _rng for generating data
}
}
'
# Compile the Stan model
compiled_model <- stan_model(model_code = stan_code)
#=============do the simulations===============
# Simulate data from a Gaussian distribution; sample values from prior?
n_sim <- 1000
n_draws_from_prior <- 5
sim_out <- sampling(compiled_model, data = list(N = n_sim,
y = sample(1:2, n_sim, replace = T),
run_estimation = 0),
chain=3,iter = 100, warmup = 60)
sim_params <- sim_out %>%
as.data.frame %>%
dplyr::select(mu, sigma)
sim_results <- sim_out %>%
as.data.frame %>%
dplyr::select(contains("y_sim"))
#okay, not sure what is determining the number of draws this sampling() will get from the prior??
#Hmm I get it now: iter = 100, but we have 4 chains; every chain will be used, but burn-in will not be used
#so, we get n_chain*(n_iter-n_warm_up) simulations
#now see if the mean & sd of the simulated data are like the parameters that generated them:
sim_params$mu_sim <- apply(sim_results, 1, mean)
sim_params$sigma_sim <- apply(sim_results, 1, sd)
ggarrange(
ggplot(sim_params,aes(mu,mu_sim))+geom_point()+theme_classic(),
ggplot(sim_params,aes(sigma,sigma_sim))+geom_point()+theme_classic()
)