Skip to content

Commit

Permalink
evidence with NAs for imputation
Browse files Browse the repository at this point in the history
  • Loading branch information
mnwright committed Feb 9, 2024
1 parent 7cead3a commit 5b7f8b1
Show file tree
Hide file tree
Showing 2 changed files with 39 additions and 10 deletions.
47 changes: 38 additions & 9 deletions R/forge.R
Original file line number Diff line number Diff line change
Expand Up @@ -83,15 +83,17 @@ forge <- function(

# Prep evidence
conj <- FALSE
to_sim <- params$meta$variable
if (!is.null(evidence)) {
evidence <- prep_evi(params, evidence)
if (!all(c('f_idx', 'wt') %in% colnames(evidence))) {
conj <- TRUE
to_sim <- setdiff(params$meta$variable, evidence[relation == '==', variable])
to_sim <- evidence[relation == '==', setdiff(params$meta$variable, unique(variable)), by = row_idx]
setnames(to_sim, 'V1', 'variable')
}
} else {
to_sim <- data.table(row_idx = 1, variable = params$meta$variable)
}
factor_cols <- params$meta[variable %in% to_sim, family == 'multinom']
factor_cols <- params$meta[variable %in% to_sim$variable, family == 'multinom']

# Prepare the event space
if (is.null(evidence)) {
Expand Down Expand Up @@ -120,8 +122,11 @@ forge <- function(
synth_cnt <- synth_cat <- NULL
if (any(!factor_cols)) {
fam <- params$meta[family != 'multinom', unique(family)]
psi <- merge(omega, params$cnt[variable %in% to_sim], by = 'f_idx',
merge(omega, params$cnt, by = 'f_idx',
sort = FALSE, allow.cartesian = TRUE)
psi <- merge(omega, params$cnt, by = 'f_idx',
sort = FALSE, allow.cartesian = TRUE)
psi <- merge(psi, to_sim, by = c("row_idx", "variable"))
if (isTRUE(conj)) {
if (any(evidence$relation %in% c('<', '<=', '>', '>='))) {
for (k in evidence[, which(grepl('<', relation))]) {
Expand All @@ -146,8 +151,9 @@ forge <- function(

# Simulate categorical data
if (any(factor_cols)) {
psi <- merge(omega, params$cat[variable %in% to_sim], by = 'f_idx',
psi <- merge(omega, params$cat, by = 'f_idx',
sort = FALSE, allow.cartesian = TRUE)
psi <- merge(psi, to_sim, by = c("row_idx", "variable"))
psi[prob == 1, dat := val]
if (isTRUE(conj)) {
if (any(evidence[, relation == '!='])) {
Expand All @@ -164,10 +170,33 @@ forge <- function(

# Combine, optionally impose constraint(s)
x_synth <- cbind(synth_cnt, synth_cat)
if (length(to_sim) != params$meta[, .N]) {
tmp <- evidence[relation == '==']
add_on <- dcast(tmp, row_idx ~ variable, value.var = 'value')
x_synth <- merge(x_synth, add_on, by = "row_idx")
if (nrow(to_sim)/to_sim[, max(row_idx)] != params$meta[, .N]) {
add_on_cnt <- add_on_cat <- NULL
if (any(!params$meta[, family == 'multinom'])) {
tmp_cnt <- merge(evidence[relation == '=='], params$meta[family != "multinom", ], by = "variable")[, .(row_idx, variable, value)]
if (nrow(tmp_cnt) > 0) {
tmp_cnt[, value := as.numeric(value)]
add_on_cnt <- dcast(tmp_cnt, row_idx ~ variable, value.var = 'value')
}
}
if (any(params$meta[, family == 'multinom'])) {
tmp_cat <- merge(evidence[relation == '=='], params$meta[family == "multinom", ], by = "variable")[, .(row_idx, variable, value)]
if (nrow(tmp_cat) > 0) {
add_on_cat <- dcast(tmp_cat, row_idx ~ variable, value.var = 'value')
}
}
if (!is.null(add_on_cnt) & !is.null(add_on_cat)) {
add_on <- merge(add_on_cnt, add_on_cat, by = "row_idx")
} else if (!is.null(add_on_cnt)) {
add_on <- add_on_cnt
} else if (!is.null(add_on_cat)) {
add_on <- add_on_cat
}
if (any(!(colnames(x_synth) %in% colnames(add_on)))) {
x_synth <- cbind(x_synth, add_on)
} else {
x_synth <- dplyr::rows_patch(add_on, x_synth, by = "row_idx")
}
}

# Clean up, export
Expand Down
2 changes: 1 addition & 1 deletion R/utils.R
Original file line number Diff line number Diff line change
Expand Up @@ -115,7 +115,7 @@ prep_evi <- function(params, evidence) {
}
evidence[, row_idx := .I]
evidence <- suppressWarnings(
melt(evidence, id.vars = "row_idx", variable.factor = FALSE)
melt(evidence, id.vars = "row_idx", variable.factor = FALSE, na.rm = TRUE)
)
evidence[, relation := '==']
conj <- TRUE
Expand Down

0 comments on commit 5b7f8b1

Please sign in to comment.