From b071e727b9b4de3e92742c44e5beb2ab61f77547 Mon Sep 17 00:00:00 2001 From: Muluh <127390183+Daenarys8@users.noreply.github.com> Date: Wed, 13 Nov 2024 10:38:27 +0100 Subject: [PATCH] suppress non informative output in ml chapter (#636) Signed-off-by: Daena Rys Co-authored-by: TuomasBorman --- DESCRIPTION | 6 ++--- inst/pages/machine_learning.qmd | 47 +++++++++++++++------------------ 2 files changed, 25 insertions(+), 28 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index 8ed2a338..67f10f74 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,6 +1,6 @@ Package: OMA Title: Orchestrating Microbiome Analysis with Bioconductor -Version: 0.98.29 +Version: 0.98.30 Date: 2024-10-04 Authors@R: c(person("Leo", "Lahti", role = c("aut"), @@ -81,6 +81,7 @@ Suggests: patchwork, phyloseq, plotly, + plotROC, purrr, qgraph, RColorBrewer, @@ -88,7 +89,6 @@ Suggests: reshape2, reticulate, rgl, - ROCR, scales, scater, sechm, @@ -112,7 +112,7 @@ Remotes: github::GraceYoon/SPRING, github::himelmallick/IntegratedLearner VignetteBuilder: knitr -RoxygenNote: 7.3.1 +RoxygenNote: 7.3.2 BiocType: Book BiocBookTemplate: 1.0.5 SystemRequirements: quarto diff --git a/inst/pages/machine_learning.qmd b/inst/pages/machine_learning.qmd index 3ffed514..84629943 100644 --- a/inst/pages/machine_learning.qmd +++ b/inst/pages/machine_learning.qmd @@ -196,7 +196,8 @@ model <- train( tuneGrid = tune_grid, trControl = train_control, weights = class_weights, - max_delta_step = 1 + max_delta_step = 1, + verbosity = 0 ) # Get predictions @@ -211,41 +212,37 @@ technique for binary classification problems. ```{r} #| label: ROC -library(ROCR) +library(plotROC) -# Get positive class -pos_class <-levels(res[["obs"]])[[1]] -# Create ROC plot -pred <- prediction(res[[pos_class]], ifelse(res[["obs"]] == pos_class, 1, 0)) -perf <- performance(pred, measure = "tpr", x.measure = "fpr") -p <- plot(perf) +# Prepare data for ROC +roc_data <- data.frame( + observed_class = as.numeric(res[["obs"]] == "healthy"), + predicted_probability = res[["healthy"]] + ) + +# Plot ROC curve +p <- ggplot(roc_data, aes(m = predicted_probability, d = observed_class)) + + geom_roc() + + style_roc(theme = theme_minimal()) p ``` -XGBoost model returns also feature importances that can be visualized with bar +XGBoost model also returns feature significance that can be visualized with bar plot. ```{r} #| label: xgboost_feat library(xgboost) +library(miaViz) + +# Get feature importance and convert to matrix +df <- xgb.importance(model = model$finalModel) |> as.data.frame() +rownames(df) <- df[["Feature"]] +df <- as.matrix(df[, "Gain", drop = FALSE]) -# Get feature importance -df <- xgb.importance(model = model$finalModel) -# Take top 20 features -df <- df[seq_len(20), ] -# Factorize to preserve order -df[["Feature"]] <- factor(df[["Feature"]], levels = df[["Feature"]]) -# Round values, add percentage symbol -df[["Percentage"]] <- paste0(round(df[["Gain"]], 3)*100, "%") - -# Create a plot -p <- ggplot(df, aes(x = Feature, y = Gain)) + - geom_bar(stat = "identity") + - geom_text(aes(label = Percentage), hjust = -0.1, size = 2.5) + - expand_limits(y = max(df[["Gain"]]) + 0.01) + - scale_y_continuous(labels = scales::percent) + - coord_flip() +# Create plot for top 20 features +p <- plotLoadings(df, ncomponents = 1, n = 20, show.color = FALSE) p ```