.Rhistory

score_list <- scores$scores[dat$P_Suitable == "Yes"]
n_scorable <- length(score_list)
# pattern "P" matches all indicators
scores_all <- get_indicators(sc=score_list, pattern="P")
#table(names(scores_all))
scores_all$category <- NA
scores_all$category[str_detect(scores_all$indicator, "Data")] <- "Open Data"
scores_all$category[str_detect(scores_all$indicator, "Prereg")] <- "Preregistration"
scores_all$category[str_detect(scores_all$indicator, "ReproducibleScripts|IndependentVerification")] <- "Reproducible Code \n& Verification"
scores_all$category[str_detect(scores_all$indicator, "FormalModeling")] <- "Formal\nModeling"
scores_all$category[str_detect(scores_all$indicator, "OpenMaterials")] <- "Open Materials"
rigor_category_names <- c("Open Data", "Preregistration", "Reproducible Code \n& Verification", "Formal\nModeling", "Open Materials")
scores_all_aggr <- scores_all %>%
group_by(category) %>%
summarise(
rel_score = mean(rel_score),
overall_points = sum(max)
)
# = mean of the mean scores (with equal weighting of each research output)
# Note (TODO): In the webform, it's the *weighted*
overall_mean_rel_score <- scores_all %>%
group_by(output) %>%
summarise(
scores = sum(value),
max = sum(max),
rel_score = scores/max
)
# overall rigor score as in webform:
sum(overall_mean_rel_score$scores)/sum(overall_mean_rel_score$max)
# equally weighted overall score:
overall_score <- mean(scores_all_aggr$rel_score)
categories_present <- nrow(scores_all_aggr)
radar_dat <- tibble(
dimension = factor(scores_all_aggr$category),
max_points = scores_all_aggr$overall_points,
rel_score = scores_all_aggr$rel_score,
#xstart = c(0, cumsum(max_points)[1:(length(max_points)-1)]),
#xend = cumsum(max_points),
xstart = 0:(categories_present-1),
xend = 1:categories_present,
xmid = (xend-xstart)/2 + xstart
)
max_points <- sum(radar_dat$max_points)
# Standardized and categorized rigor scores:
# Standarized within category
# Quantiles in Franka's study were:
# 0.63 (top 1%), 0.54 (top 10%), 0.50 (top 20%), 0.30 (top 50%)
# radar_dat$norm_rigor <- cut(radar_dat$rel_score, breaks=c(0, 0.30, .50, .54, 1),
#  labels=c("low50", "top50", "top20", "top10"))
p1 <- radar_dat %>% ggplot() +
geom_rect(aes(xmin=xstart, xmax=xend, ymin=0, ymax=rel_score, fill=dimension)) +
coord_polar("x", start=0) +
geom_hline(yintercept=0.30, col="grey30") +  # top 50%
geom_hline(yintercept=0.54, col="grey50") + # top 10%
geom_hline(yintercept=0.63, col="grey60") +  # top  1%
geom_text(x=max_points*0, y=0.30, label = "Top 50%", col="grey30", size=3, vjust=-0.2) +
geom_text(x=max_points*0, y=0.54, label = "Top 10%", col="grey50", size=3, vjust=-0.2) +
geom_text(x=max_points*0, y=0.63, label = "Top 1%" , col="grey50", size=3, vjust=-0.2) +
xlab("") + ylab("") + ggtitle(paste0("Rigor profile for ", meta$FullName), subtitle = paste0("Overall score = ", round(overall_score, 2))) +
scale_x_continuous(labels = NULL, breaks = NULL) + scale_y_continuous(labels = NULL, breaks = NULL, limits=c(0, 1)) + theme_void() +
guides(fill=guide_legend("Quality Dimension")) +
scale_fill_brewer(palette="Set3") +
geom_text(aes(x=xmid, y=0.85, label = dimension), vjust = -0.5)
p1
# TODO: add visual guidance lines (geom_hline) at meaningful percentiles
# TODO: Compute rough norm values (e.g., 5 categories) per category! (Not overall, as it is in the moment)
p1
radar_dat
params <- list(jsonpath="/Users/felix/Documents/Github/RESQUE/profile/data/resque_Felix.json")
library(jsonlite)
library(dplyr)
library(tidyr)
library(stringr)
library(ggplot2)
library(scales)
library(forcats)
library(wordcloud)
library(knitr)
library(openalexR)
library(tibble)
source("score.R")
source("profile_helpers.R")
# Felix: /Users/felix/Documents/Github/RESQUE/profile/data/resque_Felix.json
# Anne: /Users/felix/Documents/Github/RESQUE/profile/data/resque_1697454489129.json
# Daniel: /Users/felix/Documents/Github/RESQUE/profile/data/resque_1696782133298.json
#params <- list(jsonpath="/Users/felix/Documents/Github/RESQUE/profile/data/resque_1696782133298.json")
#params <- list(jsonpath="/Users/felix/Downloads/resque_1701950141044.json")
#params <- list(jsonpath="/Users/felix/Documents/Github/RESQUE/profile/data/resque_1697454489129.json")
#params <- list(jsonpath="/Users/felix/Documents/Github/RESQUE/profile/data/resque_Felix.json")
dat0 <- read_json(params$jsonpath, simplifyVector = TRUE)
meta <- dat0[1, ]
dat <- dat0[-1, ]
# clean and create some fields
meta$FullName <- paste0(meta$FirstName, " ", meta$LastName)
dat$Title <- clean_title(dat$Title)
# remove unnecessary column that breaks the structure of dat (nested data frame)
dat$queryConfig <- NULL
# read the scores
scores <- score_all_from_file(path=params$jsonpath)
# remove the first element: This is the meta-information which has no scores
# Now each list entry is one publication, in the same order as in `dat`
scores$scores <- scores$scores[-1]
# Create nice factor labels
dat$type <- factor(dat$type, levels=c("pub", "data", "software"), labels=c("Publication", "Data set", "Research software"))
dat <- unCamel(dat, "P_TypePublication")
dat <- unCamel(dat, "P_ReproducibleScripts")
dat$dois_normalized <- str_extract(dat$DOI, pattern="10.\\d{4,9}/[-._;()/:a-z0-9A-Z]+")
# Split the research outputs into types, reduce to suitable submissions
pubs <- dat %>% filter(type == "Publication", P_Suitable == "Yes")
credit <- dat %>%
select(contains("CRediT")) %>%
pivot_longer(everything(), names_prefix = "P_CRediT_")
colnames(credit) <- c("Role", "Degree")
credit$Degree <- factor(credit$Degree, levels = rev(c("Lead", "Equal", "Support", "NoRole", "NA")), labels = rev(c("Lead", "Equal", "Support", "NoRole", "not applicable")))
# add space to camelCase; make nice labels
credit <- unCamel(credit, "Role")
credit$Role[credit$Role == "Writing Review Editing"] <- "Writing: Review & Editing"
credit$Role[credit$Role == "Writing Original Draft"]  <- "Writing: Original draft"
credit_tab <- table(credit$Role, credit$Degree)
ct_ordered <- as.data.frame.matrix(credit_tab) %>%
mutate(
LeadEqual = Lead + Equal,
Sum = Lead + Equal + Support + NoRole,
# normalized weight: All "Lead" (=max) would be 1
weight = (Lead * 4 + Equal * 3 + Support * 1) / (Sum * 4),
Role = rownames(.)
) %>%
arrange(-LeadEqual, -Support)
credit$Role <- factor(credit$Role, levels = rev(rownames(ct_ordered)))
# The "CRediT involvement" categories
credit_inv <- dat %>% select(contains("CRediT"))
roles <- colnames(credit_inv) |> str_replace("P_CRediT_", "") |> unCamel0()
roles[roles == "Writing Review Editing"] <- "Writing: Review & Editing"
roles[roles == "Writing Original Draft"]  <- "Writing: Original draft"
main_roles <- rep("", nrow(credit_inv))
for (i in 1:nrow(credit_inv)) {
leads <- credit_inv[i, ] == "Lead"
equals <- credit_inv[i, ] == "Equal"
main_roles[i] <- paste0(
ifelse(sum(leads)>0, paste0(
"<b>Lead:</b> ",
paste0(roles[leads], collapse=", ")), ""),
ifelse(sum(equals)>0, paste0(
"<br><b>Equal:</b> ",
paste0(roles[equals], collapse=", ")), "")
)
}
credit_inv$sum_lead <- apply(credit_inv[, 1:14], 1, function(x) sum(x=="Lead"))
credit_inv$sum_equal <- apply(credit_inv[, 1:14], 1, function(x) sum(x=="Equal"))
credit_inv$sum_leadequal <- apply(credit_inv[, 1:14], 1, function(x) sum(x %in% c("Lead", "Equal")))
credit_inv$sum_support <- apply(credit_inv[, 1:14], 1, function(x) sum(x=="Support"))
# define the categories
credit_inv$CRediT_involvement <- factor(rep("Low", nrow(credit_inv)), levels=c("Low", "Medium", "High", "Very High"), ordered=TRUE)
credit_inv$CRediT_involvement[credit_inv$sum_lead >= 3] <- "Very High"
credit_inv$CRediT_involvement[credit_inv$sum_leadequal >= 5] <- "Very High"
credit_inv$CRediT_involvement[credit_inv$sum_lead %in% c(1, 2)] <- "High"
credit_inv$CRediT_involvement[credit_inv$sum_leadequal %in% c(3, 4) & credit_inv$CRediT_involvement != "Very High"] <- "High"
credit_inv$CRediT_involvement[credit_inv$sum_equal %in% c(1, 2) & credit_inv$sum_lead == 0] <- "Medium"
credit_inv$CRediT_involvement[credit_inv$sum_support >= 5 & credit_inv$CRediT_involvement <= "Medium"] <- "Medium"
dat$CRediT_involvement <- credit_inv$CRediT_involvement
dat$CRediT_involvement_roles <- main_roles
library(jsonlite)
library(dplyr)
library(tidyr)
library(stringr)
library(ggplot2)
library(scales)
library(forcats)
library(wordcloud)
library(knitr)
library(openalexR)
library(tibble)
source("score.R")
source("profile_helpers.R")
# Felix: /Users/felix/Documents/Github/RESQUE/profile/data/resque_Felix.json
# Anne: /Users/felix/Documents/Github/RESQUE/profile/data/resque_1697454489129.json
# Daniel: /Users/felix/Documents/Github/RESQUE/profile/data/resque_1696782133298.json
#params <- list(jsonpath="/Users/felix/Documents/Github/RESQUE/profile/data/resque_1696782133298.json")
#params <- list(jsonpath="/Users/felix/Downloads/resque_1701950141044.json")
#params <- list(jsonpath="/Users/felix/Documents/Github/RESQUE/profile/data/resque_1697454489129.json")
#params <- list(jsonpath="/Users/felix/Documents/Github/RESQUE/profile/data/resque_Felix.json")
dat0 <- read_json(params$jsonpath, simplifyVector = TRUE)
meta <- dat0[1, ]
dat <- dat0[-1, ]
# clean and create some fields
meta$FullName <- paste0(meta$FirstName, " ", meta$LastName)
dat$Title <- clean_title(dat$Title)
# remove unnecessary column that breaks the structure of dat (nested data frame)
dat$queryConfig <- NULL
# read the scores
scores <- score_all_from_file(path=params$jsonpath)
# remove the first element: This is the meta-information which has no scores
# Now each list entry is one publication, in the same order as in `dat`
scores$scores <- scores$scores[-1]
# Create nice factor labels
dat$type <- factor(dat$type, levels=c("pub", "data", "software"), labels=c("Publication", "Data set", "Research software"))
dat <- unCamel(dat, "P_TypePublication")
dat <- unCamel(dat, "P_ReproducibleScripts")
dat$dois_normalized <- str_extract(dat$DOI, pattern="10.\\d{4,9}/[-._;()/:a-z0-9A-Z]+")
# Split the research outputs into types, reduce to suitable submissions
pubs <- dat %>% filter(type == "Publication", P_Suitable == "Yes")
credit <- dat %>%
select(contains("CRediT")) %>%
pivot_longer(everything(), names_prefix = "P_CRediT_")
colnames(credit) <- c("Role", "Degree")
credit$Degree <- factor(credit$Degree, levels = rev(c("Lead", "Equal", "Support", "NoRole", "NA")), labels = rev(c("Lead", "Equal", "Support", "NoRole", "not applicable")))
# add space to camelCase; make nice labels
credit <- unCamel(credit, "Role")
credit$Role[credit$Role == "Writing Review Editing"] <- "Writing: Review & Editing"
credit$Role[credit$Role == "Writing Original Draft"]  <- "Writing: Original draft"
credit_tab <- table(credit$Role, credit$Degree)
ct_ordered <- as.data.frame.matrix(credit_tab) %>%
mutate(
LeadEqual = Lead + Equal,
Sum = Lead + Equal + Support + NoRole,
# normalized weight: All "Lead" (=max) would be 1
weight = (Lead * 4 + Equal * 3 + Support * 1) / (Sum * 4),
Role = rownames(.)
) %>%
arrange(-LeadEqual, -Support)
credit$Role <- factor(credit$Role, levels = rev(rownames(ct_ordered)))
# The "CRediT involvement" categories
credit_inv <- dat %>% select(contains("CRediT"))
roles <- colnames(credit_inv) |> str_replace("P_CRediT_", "") |> unCamel0()
roles[roles == "Writing Review Editing"] <- "Writing: Review & Editing"
roles[roles == "Writing Original Draft"]  <- "Writing: Original draft"
main_roles <- rep("", nrow(credit_inv))
for (i in 1:nrow(credit_inv)) {
leads <- credit_inv[i, ] == "Lead"
equals <- credit_inv[i, ] == "Equal"
main_roles[i] <- paste0(
ifelse(sum(leads)>0, paste0(
"<b>Lead:</b> ",
paste0(roles[leads], collapse=", ")), ""),
ifelse(sum(equals)>0, paste0(
"<br><b>Equal:</b> ",
paste0(roles[equals], collapse=", ")), "")
)
}
credit_inv$sum_lead <- apply(credit_inv[, 1:14], 1, function(x) sum(x=="Lead"))
credit_inv$sum_equal <- apply(credit_inv[, 1:14], 1, function(x) sum(x=="Equal"))
credit_inv$sum_leadequal <- apply(credit_inv[, 1:14], 1, function(x) sum(x %in% c("Lead", "Equal")))
credit_inv$sum_support <- apply(credit_inv[, 1:14], 1, function(x) sum(x=="Support"))
# define the categories
credit_inv$CRediT_involvement <- factor(rep("Low", nrow(credit_inv)), levels=c("Low", "Medium", "High", "Very High"), ordered=TRUE)
credit_inv$CRediT_involvement[credit_inv$sum_lead >= 3] <- "Very High"
credit_inv$CRediT_involvement[credit_inv$sum_leadequal >= 5] <- "Very High"
credit_inv$CRediT_involvement[credit_inv$sum_lead %in% c(1, 2)] <- "High"
credit_inv$CRediT_involvement[credit_inv$sum_leadequal %in% c(3, 4) & credit_inv$CRediT_involvement != "Very High"] <- "High"
credit_inv$CRediT_involvement[credit_inv$sum_equal %in% c(1, 2) & credit_inv$sum_lead == 0] <- "Medium"
credit_inv$CRediT_involvement[credit_inv$sum_support >= 5 & credit_inv$CRediT_involvement <= "Medium"] <- "Medium"
dat$CRediT_involvement <- credit_inv$CRediT_involvement
dat$CRediT_involvement_roles <- main_roles
kable(table(dat$type, dat$P_Suitable))
kable(table(dat[dat$type == "Publication", "P_TypePublication"]))
dat_tM <- dat %>% select(contains("P_TypeMethod"))
# add missing columns
expected_columns<- c(
P_TypeMethod_Empirical = FALSE,
P_TypeMethod_MetaAnalysis = FALSE,
P_TypeMethod_Theoretical = FALSE,
P_TypeMethod_Simulation = FALSE,
P_TypeMethod_OtherMethod = FALSE
)
# adding those columns to df1
dat_tM <- add_column(dat_tM, !!!expected_columns[setdiff(names(expected_columns), names(dat_tM))])
# remove the free text field for this table
dat_tM$P_TypeMethod_Other <- NULL
dat_tM_tab <- pivot_longer(dat_tM, everything()) %>%
group_by(name) %>%
summarise(paper_count=sum(value, na.rm=TRUE))
dat_tM_tab$name <- str_replace(dat_tM_tab$name, "P_TypeMethod_", "")
dat_tM_tab <- unCamel(df=dat_tM_tab, cname="name")
colnames(dat_tM_tab) <- c("Type of method", "# papers")
kable(dat_tM_tab)
#| results: "asis"
all_pubs <- dat[dat$type == "Publication", ]
# clean the dois:
dois <- all_pubs$DOI
dois <- dois %>%
str_replace_all("doi: ", "") %>%
str_replace_all(" ", "") %>%
str_trim()
all_pubs$doi_links <- paste0("https://doi.org/", all_pubs$dois_normalized)
all_pubs$doi <- paste0("[", all_pubs$doi_links, "](", all_pubs$doi_links, ")")
all_papers <- oa_fetch(entity = "works", doi = all_pubs$doi_links)
cat(paste0(nrow(all_papers), " out of ", nrow(all_pubs), " submitted publications could be automatically retrieved with openAlex.\n"))
if (nrow(all_papers) < nrow(all_pubs)) {
cat('\n::: {.callout-caution collapse="true"}\n
## The following papers could *not* be retrieved by openAlex:\n\n')
all_pubs[!all_pubs$doi_links %in% all_papers$doi, ] %>%
select(Title, Year, doi, P_TypePublication) %>%
kable() %>%
print()
cat("\n:::\n")
}
all_papers$n_authors <- sapply(all_papers$author, nrow)
all_papers$team_category <- cut(all_papers$n_authors, breaks=c(0, 1, 5, 15, Inf), labels=c("Single authored", "Small team (<= 5 co-authors)", "Large team (6-15 co-authors)", "Big Team (> 15 co-authors)"))
team_tab <- table(all_papers$team_category) |> as.data.frame()
team_tab$perc <- paste0(round(team_tab$Freq*100 / nrow(all_papers)), "%")
colnames(team_tab) <- c("Team category", "Frequency", "%")
kable(team_tab, align=c("l", "r", "r"))
ggplot(credit, aes(x = Role, fill = Degree)) +
geom_bar(stat = "count") +
coord_flip() +
scale_fill_manual(values = rev(c("grey90", "indianred1", "khaki2", "green3", "green4")), breaks = rev(c("not applicable", "NoRole", "Support", "Equal", "Lead"))) +
theme_minimal() + xlab("") + ylab("# of publications") +
theme(axis.text.y = element_text(size = 14)) +
# force whole integers on x-axis
scale_y_continuous(breaks = function(x) seq(floor(min(x)), ceiling(max(x)), by = 1))
if (any(ct_ordered$weight > 0)) {
wordcloud(ct_ordered$Role, freq = ct_ordered$weight, scale = c(2, .1), min.freq = 0.4, random.order = FALSE)
}
credit_inv_tab <- data.frame(
"Involvement Level" = c("Very High", "High", "Medium", "Low"),
"Definition" = c(
"(>=3 roles as *lead*) OR (>= 5 roles as (*lead* OR *equal*))",
"(1-2 roles as *lead*) OR (3-4 roles as *equal*)",
"(1-2 roles as *equal*) OR (>= 5 roles as *support*)",
"All other combinations"
)
)
t1 <- table(dat$CRediT_involvement) %>% as.data.frame()
# this is an ugly way of merging ...
credit_inv_tab$Publications <- 0
credit_inv_tab$Publications[1] <- t1[t1$Var1 == "Very High", 2]
credit_inv_tab$Publications[2] <- t1[t1$Var1 == "High", 2]
credit_inv_tab$Publications[3] <- t1[t1$Var1 == "Medium", 2]
kable(credit_inv_tab)
# Call BIP! API
library(curl)
doi_csv <- paste0(dat$dois_normalized, collapse=",") |> URLencode(reserved=TRUE)
req <- curl_fetch_memory(paste0("https://bip-api.imsi.athenarc.gr/paper/scores/batch/", doi_csv))
BIP <- jsonlite::fromJSON(rawToChar(req$content))
BIP$pop_class <- factor(BIP$pop_class, levels=paste0("C", 1:5), ordered=TRUE)
BIP$inf_class <- factor(BIP$inf_class, levels=paste0("C", 1:5), ordered=TRUE)
BIP$imp_class <- factor(BIP$imp_class, levels=paste0("C", 1:5), ordered=TRUE)
#| results: "asis"
pop_sel <- BIP %>%
filter(pop_class <= "C4") %>%
arrange(pop_class) %>%
select(doi, "3_year_cc", cc,	pop_class)
pop_sel$Label <- factor(pop_sel$pop_class, levels=paste0("C", 1:5), labels=c("Top 0.01%", "Top 0.1%", "Top 1%", "Top 10%", "Average (Bottom 90%)"))
pop_sel$pop_class <- NULL
pop_sel <- left_join(pop_sel, dat %>% select(doi=dois_normalized, Title, CRediT_involvement, CRediT_involvement_roles), by="doi") %>%
relocate(Title)
colnames(pop_sel) <- c("Title", "doi", "3 year citation count", "Overall citation count", "Popularity", "Candidates' CRediT involvement", "Candidates' CRediT main roles")
pop_sel$doi <- NULL
# add some emojis:
pop_sel$Title[pop_sel$Popularity == "Top 0.01%"] <- paste0("🚀", pop_sel$Title[pop_sel$Popularity == "Top 0.01%"])
pop_sel$Title[pop_sel$Popularity == "Top 0.1%"] <- paste0("️🌟", pop_sel$Title[pop_sel$Popularity == "Top 0.1%"])
pop_sel$Title[pop_sel$Popularity == "Top 1%"] <- paste0("️✨", pop_sel$Title[pop_sel$Popularity == "Top 1%"])
if (nrow(pop_sel) > 0) {
cat("The highly popular papers are: \n\n")
kable(pop_sel, format = "html", escape = FALSE)
} else {
}
# Which outputs should be scored?
score_list <- scores$scores[dat$P_Suitable == "Yes"]
n_scorable <- length(score_list)
# pattern "P" matches all indicators
scores_all <- get_indicators(sc=score_list, pattern="P")
#table(names(scores_all))
scores_all$category <- NA
scores_all$category[str_detect(scores_all$indicator, "Data")] <- "Open Data"
scores_all$category[str_detect(scores_all$indicator, "Prereg")] <- "Preregistration"
scores_all$category[str_detect(scores_all$indicator, "ReproducibleScripts|IndependentVerification")] <- "Reproducible Code \n& Verification"
scores_all$category[str_detect(scores_all$indicator, "FormalModeling")] <- "Formal\nModeling"
scores_all$category[str_detect(scores_all$indicator, "OpenMaterials")] <- "Open Materials"
rigor_category_names <- c("Open Data", "Preregistration", "Reproducible Code \n& Verification", "Formal\nModeling", "Open Materials")
scores_all_aggr <- scores_all %>%
group_by(category) %>%
summarise(
rel_score = mean(rel_score),
overall_points = sum(max)
)
# = mean of the mean scores (with equal weighting of each research output)
# Note (TODO): In the webform, it's the *weighted*
overall_mean_rel_score <- scores_all %>%
group_by(output) %>%
summarise(
scores = sum(value),
max = sum(max),
rel_score = scores/max
)
# overall rigor score as in webform:
sum(overall_mean_rel_score$scores)/sum(overall_mean_rel_score$max)
# equally weighted overall score:
overall_score <- mean(scores_all_aggr$rel_score)
categories_present <- nrow(scores_all_aggr)
radar_dat <- tibble(
dimension = factor(scores_all_aggr$category),
max_points = scores_all_aggr$overall_points,
rel_score = scores_all_aggr$rel_score,
#xstart = c(0, cumsum(max_points)[1:(length(max_points)-1)]),
#xend = cumsum(max_points),
xstart = 0:(categories_present-1),
xend = 1:categories_present,
xmid = (xend-xstart)/2 + xstart
)
#radar_dat$rel_score <- c()
max_points <- sum(radar_dat$max_points)
# Standardized and categorized rigor scores:
# Standarized within category
# Quantiles in Franka's study were:
# 0.63 (top 1%), 0.54 (top 10%), 0.50 (top 20%), 0.30 (top 50%)
# radar_dat$norm_rigor <- cut(radar_dat$rel_score, breaks=c(0, 0.30, .50, .54, 1),
#  labels=c("low50", "top50", "top20", "top10"))
p1 <- radar_dat %>% ggplot() +
geom_rect(aes(xmin=xstart, xmax=xend, ymin=0, ymax=rel_score, fill=dimension)) +
coord_polar("x", start=0) +
geom_hline(yintercept=0.30, col="grey30") +  # top 50%
geom_hline(yintercept=0.54, col="grey50") + # top 10%
geom_hline(yintercept=0.63, col="grey60") +  # top  1%
geom_text(x=max_points*0, y=0.30, label = "Top 50%", col="grey30", size=3, vjust=-0.2) +
geom_text(x=max_points*0, y=0.54, label = "Top 10%", col="grey50", size=3, vjust=-0.2) +
geom_text(x=max_points*0, y=0.63, label = "Top 1%" , col="grey50", size=3, vjust=-0.2) +
xlab("") + ylab("") + ggtitle(paste0("Rigor profile for ", meta$FullName), subtitle = paste0("Overall score = ", round(overall_score, 2))) +
scale_x_continuous(labels = NULL, breaks = NULL) + scale_y_continuous(labels = NULL, breaks = NULL, limits=c(0, 1)) + theme_void() +
guides(fill=guide_legend("Rigor Dimension")) +
scale_fill_brewer(palette="Set3") +
geom_text(aes(x=xmid, y=0.85, label = dimension), vjust = -0.5)
p1
p1
radar_dat$rel_score
radar_dat$rel_score <- c(0.15, 0.8, 0.6, 0.25, 0.3)
max_points <- sum(radar_dat$max_points)
# Standardized and categorized rigor scores:
# Standarized within category
# Quantiles in Franka's study were:
# 0.63 (top 1%), 0.54 (top 10%), 0.50 (top 20%), 0.30 (top 50%)
# radar_dat$norm_rigor <- cut(radar_dat$rel_score, breaks=c(0, 0.30, .50, .54, 1),
#  labels=c("low50", "top50", "top20", "top10"))
p1 <- radar_dat %>% ggplot() +
geom_rect(aes(xmin=xstart, xmax=xend, ymin=0, ymax=rel_score, fill=dimension)) +
coord_polar("x", start=0) +
geom_hline(yintercept=0.30, col="grey30") +  # top 50%
geom_hline(yintercept=0.54, col="grey50") + # top 10%
geom_hline(yintercept=0.63, col="grey60") +  # top  1%
geom_text(x=max_points*0, y=0.30, label = "Top 50%", col="grey30", size=3, vjust=-0.2) +
geom_text(x=max_points*0, y=0.54, label = "Top 10%", col="grey50", size=3, vjust=-0.2) +
geom_text(x=max_points*0, y=0.63, label = "Top 1%" , col="grey50", size=3, vjust=-0.2) +
xlab("") + ylab("") + ggtitle(paste0("Rigor profile for ", meta$FullName), subtitle = paste0("Overall score = ", round(overall_score, 2))) +
scale_x_continuous(labels = NULL, breaks = NULL) + scale_y_continuous(labels = NULL, breaks = NULL, limits=c(0, 1)) + theme_void() +
guides(fill=guide_legend("Rigor Dimension")) +
scale_fill_brewer(palette="Set3") +
geom_text(aes(x=xmid, y=0.85, label = dimension), vjust = -0.5)
p1
# TODO: add visual guidance lines (geom_hli
radar_dat$rel_score <- c(0.15, 0.8, 0.6, 0.25, 0.36)
max_points <- sum(radar_dat$max_points)
# Standardized and categorized rigor scores:
# Standarized within category
# Quantiles in Franka's study were:
# 0.63 (top 1%), 0.54 (top 10%), 0.50 (top 20%), 0.30 (top 50%)
# radar_dat$norm_rigor <- cut(radar_dat$rel_score, breaks=c(0, 0.30, .50, .54, 1),
#  labels=c("low50", "top50", "top20", "top10"))
p1 <- radar_dat %>% ggplot() +
geom_rect(aes(xmin=xstart, xmax=xend, ymin=0, ymax=rel_score, fill=dimension)) +
coord_polar("x", start=0) +
geom_hline(yintercept=0.30, col="grey30") +  # top 50%
geom_hline(yintercept=0.54, col="grey50") + # top 10%
geom_hline(yintercept=0.63, col="grey60") +  # top  1%
geom_text(x=max_points*0, y=0.30, label = "Top 50%", col="grey30", size=3, vjust=-0.2) +
geom_text(x=max_points*0, y=0.54, label = "Top 10%", col="grey50", size=3, vjust=-0.2) +
geom_text(x=max_points*0, y=0.63, label = "Top 1%" , col="grey50", size=3, vjust=-0.2) +
xlab("") + ylab("") + ggtitle(paste0("Rigor profile for ", meta$FullName), subtitle = paste0("Overall score = ", round(overall_score, 2))) +
scale_x_continuous(labels = NULL, breaks = NULL) + scale_y_continuous(labels = NULL, breaks = NULL, limits=c(0, 1)) + theme_void() +
guides(fill=guide_legend("Rigor Dimension")) +
scale_fill_brewer(palette="Set3") +
geom_text(aes(x=xmid, y=0.85, label = dimension), vjust = -0.5)
p1
p1
radar_dat$rel_score <- c(0.15, 0.8, 0.6, 0.25, 0.42)
max_points <- sum(radar_dat$max_points)
# Standardized and categorized rigor scores:
# Standarized within category
# Quantiles in Franka's study were:
# 0.63 (top 1%), 0.54 (top 10%), 0.50 (top 20%), 0.30 (top 50%)
# radar_dat$norm_rigor <- cut(radar_dat$rel_score, breaks=c(0, 0.30, .50, .54, 1),
#  labels=c("low50", "top50", "top20", "top10"))
p1 <- radar_dat %>% ggplot() +
geom_rect(aes(xmin=xstart, xmax=xend, ymin=0, ymax=rel_score, fill=dimension)) +
coord_polar("x", start=0) +
geom_hline(yintercept=0.30, col="grey30") +  # top 50%
geom_hline(yintercept=0.54, col="grey50") + # top 10%
geom_hline(yintercept=0.63, col="grey60") +  # top  1%
geom_text(x=max_points*0, y=0.30, label = "Top 50%", col="grey30", size=3, vjust=-0.2) +
geom_text(x=max_points*0, y=0.54, label = "Top 10%", col="grey50", size=3, vjust=-0.2) +
geom_text(x=max_points*0, y=0.63, label = "Top 1%" , col="grey50", size=3, vjust=-0.2) +
xlab("") + ylab("") + ggtitle(paste0("Rigor profile for ", meta$FullName), subtitle = paste0("Overall score = ", round(overall_score, 2))) +
scale_x_continuous(labels = NULL, breaks = NULL) + scale_y_continuous(labels = NULL, breaks = NULL, limits=c(0, 1)) + theme_void() +
guides(fill=guide_legend("Rigor Dimension")) +
scale_fill_brewer(palette="Set3") +
geom_text(aes(x=xmid, y=0.85, label = dimension), vjust = -0.5)
p1
p1 <- radar_dat %>% ggplot() +
geom_rect(aes(xmin=xstart, xmax=xend, ymin=0, ymax=rel_score, fill=dimension)) +
coord_polar("x", start=0) +
geom_hline(yintercept=0.30, col="grey30") +  # top 50%
geom_hline(yintercept=0.54, col="grey50") + # top 10%
geom_hline(yintercept=0.63, col="grey60") +  # top  1%
geom_text(x=max_points*0, y=0.30, label = "Top 50%", col="grey30", size=3, vjust=-0.2) +
geom_text(x=max_points*0, y=0.54, label = "Top 10%", col="grey50", size=3, vjust=-0.2) +
geom_text(x=max_points*0, y=0.63, label = "Top 1%" , col="grey50", size=3, vjust=-0.2) +
xlab("") + ylab("") + ggtitle(paste0("Rigor profile for ", meta$FullName), subtitle = paste0("Overall score = ", round(overall_score, 2))) +
scale_x_continuous(labels = NULL, breaks = NULL) + scale_y_continuous(labels = NULL, breaks = NULL, limits=c(0, 1)) + theme_void() +
guides(fill=guide_legend("Rigor Dimension")) +
scale_fill_brewer(palette="Set3") +
geom_text(aes(x=xmid, y=0.75, label = dimension), vjust = -0.5, size=3)
p1
p1