-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathcustom_functions.R
202 lines (175 loc) · 6.54 KB
/
custom_functions.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
#' Summarize Stochastic Node with Key Columns
#'
#' @param mcnode A Monte Carlo node object
#' @param data A dataframe containing key columns
#' @param keys Vector of column names to use as keys (default: NULL)
#'
#' @return A dataframe with stochastic node summary and key columns
#'
#' @importFrom stats summary
#' @importFrom utils unlist
mc_keys_summary <- function(mcnode, data, keys = NULL, agg_by = NULL) {
# If keys is NULL, use factor columns from data as keys
if (is.null(keys)) {
keys <- names(data[sapply(data, is.factor)])
}
#if aggregated by
if(!is.null(agg_by)){
mcnode<-agg_totals_mc(mcnode,data,by=agg_by)
data<-summarise(data,.by=all_of(agg_by))
keys<-agg_by
}
# Access summary data frame (provided in a list)
summary_l <- summary(mcnode)[[1]]
# If it's a single line mcnode, save single df summary in list for compatibility
if (!is.list(summary_l)) {
summary_l <- list(summary_l)
}
# Extract column names
summary_names <- colnames(summary_l[[1]])
# Summarize all mcnode variates in a dataframe
summary_df <- data.frame(matrix(unlist(summary_l), nrow = length(summary_l), byrow = TRUE))
# Include column names
names(summary_df) <- paste(summary_names)
# Merge keys and summary
summary_df <- cbind(data[keys], summary_df)
return(summary_df)
}
#' Convert Monte Carlo Node to Long Format
#'
#' @param mcnode A Monte Carlo node object
#' @param data A dataframe containing key columns
#' @param keys Vector of column names to use as keys (default: NULL)
#'
#' @return A long format dataframe of the Monte Carlo node
#'
#' @importFrom tidyr pivot_longer unite
#' @importFrom dplyr %>%
long_mc <- function(mcnode, data, keys = NULL) {
# If keys is NULL, use factor columns from data as keys
if (is.null(keys)) {
keys <- names(data[sapply(data, is.factor)])
}
# Use mean as central measure (for ordering etc.)
node_summary <- mc_keys_summary(mcnode, data)
central_col <- node_summary$mean
# Convert mcnode to long data frame
mcnode_long <- as.data.frame(t(unmc(mcnode)))
# Merge keys and summary
results <- cbind(data[keys], central_col, mcnode_long) %>%
pivot_longer(cols = starts_with("V", ignore.case = FALSE)) %>%
unite(key, all_of(keys), remove = FALSE)
return(results)
}
#' Create Boxplot for Long Format Monte Carlo Data
#'
#' @param data A long format dataframe of Monte Carlo data
#' @param value Column name for values (default: "value")
#' @param key_label Column name for key labels (default: "key")
#' @param central_col Column name for central measure (default: "central_col")
#'
#' @return A ggplot object representing the boxplot
#'
#' @importFrom ggplot2 ggplot aes geom_point geom_boxplot scale_color_gradient scale_fill_gradient scale_x_discrete theme_minimal coord_flip theme element_text
#' @importFrom dplyr %>% group_by_at summarise
long_mc_boxplot <- function(data, value = "value", key_label = "key", central_col = "central_col") {
my_labels <- data %>%
group_by_at(c("key", key_label)) %>%
summarise(central_col = mean(!!sym(value)), .groups = "keep")
my_labels <- my_labels[order(my_labels$central_col), ][[key_label]]
data %>%
ggplot(aes(x = reorder(key, !!sym(value)), y = !!sym(value), fill = !!sym(value), colour = !!sym(value))) +
geom_point(aes(x = reorder(key, !!sym(value)), y = !!sym(value)),
position = position_jitter(width = 0.15, height = 0),
size = 0.5, alpha = 0.2) +
geom_boxplot(alpha = 0.4, aes(colour = !!sym(central_col)), fill = "white", outlier.alpha = 0) +
scale_color_gradient(low = "#119da4", high = "#fe6847", guide = NULL) +
scale_fill_gradient(low = "#119da4", high = "#fe6847", guide = NULL) +
scale_x_discrete(labels = my_labels) +
theme_minimal() +
coord_flip() +
theme(
axis.text.x = element_text(angle = 45, hjust = 1),
axis.text.y = element_text(size = 12),
legend.position = "left",
plot.title = element_text(size = 12)
)
}
#' Replace NA and Infinite Values in mcnode
#'
#' This function replaces NA and infinite values in an mcnode object with a specified value.
#'
#' @param mcnode An mcnode object potentially containing NA or infinite values.
#' @param na_value The value to replace NA and infinite values with. Default is 0.
#'
#' @return An mcnode object with NA and infinite values replaced by na_value.
#'
#' @export
#'
#' @examples
#' # Create an mcnode with some NA values
#' x <- mcdata(c(1, 2, NA, 4, Inf, -Inf), type="U")
#'
#' # Replace NA and infinite values with 0
#' result <- mcnode_na_rm(x)
#' print(result)
#'
#' # Replace NA and infinite values with -1
#' result_custom <- mcnode_na_rm(x, na_value = -1)
#' print(result_custom)
mcnode_na_rm <- function(mcnode, na_value = 0) {
replace(mcnode, is.na(mcnode) | is.infinite(mcnode), na_value)
}
#' Aggregate Totals for Monte Carlo Nodes
#'
#' @param mcnode A multivariate Monte Carlo node
#' @param data A data frame containing the data
#' @param by A character vector of column names to group by
#' @param keep_dim Logical, whether to return aggregated variates keeping original dimensions
#'
#' @return An aggregated Monte Carlo node
#'
#' @examples
#' # Add examples here
#'
#' @export
agg_totals_mc <- function(mcnode, data, by, keep_dim = FALSE) {
# Extract each variate (row) of the multivariate mcnode in a different mcnode
variates_list <- list()
inv_variates_list <- list()
for (i in 1:dim(mcnode)[3]) {
variates_list[[i]] <- extractvar(mcnode, i)
inv_variates_list[[i]] <- 1 - extractvar(mcnode, i)
}
# Create index to filter variates by keys
key_col <- data %>%
select(all_of(by)) %>%
unite(everything(), col = "key", sep = ", ", remove = FALSE)
key_levels <- unique(key_col$key)
for (i in 1:length(key_levels)) {
index <- key_col$key %in% key_levels[i]
# Aggregate filtered variates "Probability at least one of the events happening"
total_lev <- 1 - Reduce("*", inv_variates_list[index])
# Aggregate level to total_agg (multivariate node)
if (keep_dim) {
# One row per original variate
agg_index <- mcdata(index, type = "0", nvariates = length(index))
if (exists("total_agg")) {
total_agg <- total_agg + agg_index * total_lev
} else {
total_agg <- agg_index * total_lev
}
key_data <- data
} else {
# One row per result
if (exists("total_agg")) {
total_agg <- addvar(total_agg, total_lev)
} else {
total_agg <- total_lev
}
new_by <- by
key_data <- unique(key_col)
}
}
return(total_agg)
}