-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathSEL-strata-one-case.R
80 lines (68 loc) · 3.92 KB
/
SEL-strata-one-case.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
rm(list=ls())
source("EL/scelcount.R")
source("EL/scelcount2.R")
source("functions.R")
# Most important switch: do you want to estimate coefficients and shares, or coefficients only?
shares <- TRUE
# Set to FALSE if you are planning to run this code on one multi-core machine; TRUE if you are working on a cluster
multimachine <- FALSE
##########################
# MAIN
##########################
cat(as.character(start.time <- Sys.time() ), "\n")
# Create variables to pass to stratSampleLinearQSEL and call the function
# Unique for all simulations
beta0 <- 1 # Linear model intercept
beta1 <- 1 # Linear model slope
sdXs <- 1 # Regressor standard deviation (for log-normal distribution)
sdUs <- 1 # Error variance
MC <- 1000 # Number of Monte-Carlo simulations
boundary <- 1.4 # Boundary between two strata
Pl <- c(0.9, 0.3) # Probability of being included in the sample, per stratum
params <- c(beta0, beta1, sdXs, sdUs)
powersize <- TRUE # TRUE because empirical size and power need to be computed
wald <- TRUE
verbose <- TRUE
N <- 50 # 50, 150 or 500 was used in the paper
# Naïve bandwidths used in the paper for untransformed X: should be 0.3 for N=50, 0.4 for N=150 and 0.8 for N=500
# band <- if (N<100) 0.3 else if (N<200) 0.4 else 0.8
# Naïve bandwidths used in the paper for log-transformed X: should be 0.5 for N=50, 0.45 for N=150 and 0.35 for N=500
band <- if (N<100) 0.6 else if (N<200) 0.5 else 0.4
# band <- -1 # For rule-of-thumb bandwidths
heteroskedastic <- TRUE
strat.var <- "Y"
optmethod <- "Nelder-Mead" # Passed to optim()
gridtransform <- "log" # "log" drastically improves the performance of the SEL estimator under log-normally distributed X; otherwise use 'NULL'
# Estimates taken from OLS-GMM-strata-all-cases.R; they are not used in this script
Q.hom <- 0.2707248
Q.het <- 0.2839481
Q.X <- plnorm(boundary)
# print("Estimated theoretical aggregate shares, starting simulations...")
filename <- paste0("SEL", if(shares)"Qmu-" else "", "N", N, strat.var, ifelse(heteroskedastic, "-het", "-hom"), "-MC", MC, "-bw", band, gridtransform, ".RData")
cat("Output will be in ", filename, "\n")
if (shares) {
SELofseed <- function(sid) stratSampleLinearQSEL(N=N, params=params, boundary=boundary, Pl=Pl, strat.var=strat.var, heteroskedastic=heteroskedastic, band=band, powersize=powersize, wald=wald, verbose=verbose, optmethod=optmethod, gridtransform=gridtransform, seed=sid)
} else {
SELofseed <- function(sid) stratSampleLinearSEL(N=N, params=params, boundary=boundary, Pl=Pl, strat.var=strat.var, heteroskedastic=heteroskedastic, band=band, powersize=powersize, wald=wald, verbose=verbose, optmethod=optmethod, gridtransform=gridtransform, seed=sid)
}
if (multimachine) { # This part applies only for snow::parLapply implementation
library(Rmpi)
library(snow)
cl <- makeMPIcluster(length(readLines(Sys.getenv("OAR_NODE_FILE"))))
clusterExport(cl, c("generate.data", "SELofseed", "cemplik", "cemplik2",
"smoothEmplik", "linearSmoothEmplik", "linearSmoothEmplikTest",
"smoothEmplik2", "linearMuSmoothEmplik", "linearMuSmoothEmplikTest", "linearSharesSmoothEmplik", "linearSharesSmoothEmplikTest",
"linearMuSmoothEmplikWald", "linearSharesSmoothEmplikWald",
"maximise.SEL.noshares", "stratSampleLinearSEL", "stratSampleLinearQSEL",
"params", "N", "strat.var", "heteroskedastic", "boundary", "Pl", "band",
"powersize", "wald", "verbose", "optmethod", "gridtransform", "Q.hom", "Q.het"))
res <- parLapply(cl, 1:MC, SELofseed)
stopCluster(cl)
} else { # This part applies only for parallel::mclapply implementation
library(parallel)
num.workers <- if (.Platform$OS.type=="windows") 1 else detectCores()
res <- mclapply(1:MC, SELofseed, mc.cores=num.workers)
}
save(res, file=filename)
cat("It took", round(difftime(Sys.time(), start.time, units="hours"), 2), "hours to do", MC, "replications\n")
if (multimachine) mpi.quit()