diff --git a/Code/DHARMaPackageSupport/glmmTMB/glmmTMB.Rmd b/Code/DHARMaPackageSupport/glmmTMB/glmmTMB.Rmd index 1dc52ef8..7b79f976 100644 --- a/Code/DHARMaPackageSupport/glmmTMB/glmmTMB.Rmd +++ b/Code/DHARMaPackageSupport/glmmTMB/glmmTMB.Rmd @@ -212,11 +212,12 @@ plot(res, asFactor = T) Example from the help ```{r} -m <- glmmTMB(count~spp + mined + (1|site), zi=~spp + mined, family=nbinom2, Salamanders) +m <- glmmTMB(count~spp + mined + (1|site), zi=~spp + mined, + family=nbinom2, Salamanders) summary(m) res = simulateResiduals(m) -plot(res, rank = T) +plot(res) ### creating new data based on the fitted models. Residuals should now be perfect diff --git a/Code/DHARMaPerformance/.gitignore b/Code/DHARMaPerformance/.gitignore new file mode 100644 index 00000000..aab52d90 --- /dev/null +++ b/Code/DHARMaPerformance/.gitignore @@ -0,0 +1 @@ +*.png \ No newline at end of file diff --git a/Code/DHARMaPerformance/Power/TestPower.Rmd b/Code/DHARMaPerformance/Power/TestPower.Rmd index b14c5b81..ebb708d2 100644 --- a/Code/DHARMaPerformance/Power/TestPower.Rmd +++ b/Code/DHARMaPerformance/Power/TestPower.Rmd @@ -57,10 +57,11 @@ legend("bottomright", tests, col = 1:4, lty = 1:4) ``` -## Benchmarking DHARMa dispersiontests against pearson dispersiontest for glmms +## Benchmarking DHARMa dispersiontests against AER dispersiontest for Poisson GLM ```{r} + overdisp_fun <- function(model) { ## number of variance parameters in ## an n-by-n variance-covariance matrix @@ -73,11 +74,43 @@ overdisp_fun <- function(model) { Pearson.chisq <- sum(rp^2) prat <- Pearson.chisq/rdf pval <- pchisq(Pearson.chisq, df=rdf, lower.tail=FALSE) - c(chisq=Pearson.chisq,ratio=prat,rdf=rdf,p=pval) + list(chisq=Pearson.chisq,ratio=prat,rdf=rdf,p=pval) +} + +doCalculations <- function(control = 0){ + testData = createData(sampleSize = 200, family = poisson(), overdispersion = control, randomEffectVariance = 1) + fittedModel <- glmer(observedResponse ~ Environment1 + (1|group), data = testData, family = poisson()) + + out = list() + + res <- simulateResiduals(fittedModel = fittedModel, n = 250) + out$uniformTest = testUniformity(res)$p.value + out$Dispersion = testDispersion(res, plot = F)$p.value + out$DispersionAER = overdisp_fun(fittedModel)$pval + + res <- simulateResiduals(fittedModel = fittedModel, n = 250, refit = T) + out$DispersionRefitted = testDispersion(res, plot = F)$p.value + return(unlist(out)) } + +# testing a single return +doCalculations(control = 0.3) + +dispValues = seq(0,1.2, len = 5) + +# running benchmark +out = runBenchmarks(doCalculations, controlValues = dispValues , nRep = 10, parallel = F) + +tests = c("uniformity", "DHARMa disp basic" , "GLMER dispersiontest", "DHARMa disp refit") + + +matplot(dispValues, t(out$summaries[,1,]), type = "l") +legend("bottomright", tests, col = 1:4, lty = 1:4) + ``` + # Autocorrelation tests diff --git a/DHARMa/R/simulateResiduals.R b/DHARMa/R/simulateResiduals.R index 71a23255..36d0dba7 100644 --- a/DHARMa/R/simulateResiduals.R +++ b/DHARMa/R/simulateResiduals.R @@ -181,8 +181,18 @@ simulateResiduals <- function(fittedModel, n = 250, refit = F, integerResponse = ########### Wrapup ############ out$scaledResidualsNormal = qnorm(out$scaledResiduals + 0.00 ) - + # outliers = list() + # outliers$fracLowerThanAllSimulated = sum(out$scaledResiduals == 0) / out$nSim + # outliers$fracHigherThanAllSimulated = sum(out$scaledResiduals == 1) / out$nSim + # + # # see http://www.di.fc.ul.pt/~jpn/r/prob/range.html + # # 1−(1−x)n + # outliers$samplingDist = ecdf(replicate(10000, min(runif(out$nSim)))) + # + # outliers$plowerThanAllSimulated = 1-outliers$samplingDist(fracLowerThanAllSimulated) + # outliers$plowerThanAllSimulated = 1-outliers$samplingDist(fracLowerThanAllSimulated) + out$time = proc.time() - ptm out$randomState = randomState @@ -301,6 +311,7 @@ recalculateResiduals <- function(simulationOutput, group = NULL, aggregateBy = s if(!is.null(simulationOutput$original)) simulationOutput = simulationOutput$original out = list() + out$original = simulationOutput if(is.null(group)) return(simulationOutput) else group =as.factor(group) @@ -311,14 +322,11 @@ recalculateResiduals <- function(simulationOutput, group = NULL, aggregateBy = s out$observedResponse = aggregateByGroup(simulationOutput$observedResponse) out$fittedPredictedResponse = aggregateByGroup(simulationOutput$fittedPredictedResponse) out$simulatedResponse = apply(simulationOutput$simulatedResponse, 2, aggregateByGroup) - out$scaledResiduals = rep(NA, out$nGroups) - + if (simulationOutput$refit == F){ - if(simulationOutput$integerResponse == T){ - for (i in 1:out$nGroups) out$scaledResiduals[i] <- ecdf(out$simulatedResponse[i,] + runif(out$nGroups, -0.5, 0.5))(out$observedResponse[i] + runif(1, -0.5, 0.5)) - } else { - for (i in 1:out$nGroups) out$scaledResiduals[i] <- ecdf(out$simulatedResponse[i,])(out$observedResponse[i]) - } + + out$scaledResiduals = getQuantile(simulations = out$simulatedResponse , observed = out$observedResponse , n = out$nGroups, nSim = simulationOutput$nSim, integerResponse = simulationOutput$integerResponse) + ######## refit = T ################## } else { @@ -326,22 +334,23 @@ recalculateResiduals <- function(simulationOutput, group = NULL, aggregateBy = s out$fittedResiduals = aggregateByGroup(simulationOutput$fittedResiduals) out$refittedResiduals = apply(simulationOutput$refittedResiduals, 2, aggregateByGroup) out$refittedPearsonResiduals = apply(simulationOutput$refittedPearsonResiduals, 2, aggregateByGroup) - - if(simulationOutput$integerResponse == T){ - for (i in 1:out$nGroups) out$scaledResiduals[i] <- ecdf(out$refittedResiduals[i,] + runif(out$nGroups, -0.5, 0.5))(out$fittedResiduals[i] + runif(1, -0.5, 0.5)) - } else { - for (i in 1:out$nGroups) out$scaledResiduals[i] <- ecdf(out$refittedResiduals[i,])(out$fittedResiduals[i]) - } + + out$scaledResiduals = getQuantile(simulations = out$refittedResiduals , observed = out$fittedResiduals , n = out$nGroups, nSim = simulationOutput$nSim, integerResponse = simulationOutput$integerResponse) + } + # hack - the c here will result in both old and new outputs to be present resulting output, but a named access should refer to the new, grouped calculations + # question to myself - what's the use of that, why not erase the old outputs? they are anyway saved in the old object + out$aggregateByGroup = aggregateByGroup out = c(out, simulationOutput) - out$original = simulationOutput class(out) = "DHARMa" return(out) } - +#' Quantile calculations +#' +#' @keywords internal getQuantile <- function(simulations, observed, n, nSim, integerResponse){ scaledResiduals = rep(NA, n) diff --git a/DHARMa/R/tests.R b/DHARMa/R/tests.R index 4eaea78a..119bbfb4 100644 --- a/DHARMa/R/tests.R +++ b/DHARMa/R/tests.R @@ -211,6 +211,8 @@ testGeneric <- function(simulationOutput, summary, alternative = c("two.sided", #' @example inst/examples/testTemporalAutocorrelationHelp.R #' @export testTemporalAutocorrelation <- function(simulationOutput, time = NULL , alternative = c("two.sided", "greater", "less"), plot = T){ + # actually not sure if this is neccessary for dwtest, but seems better to aggregate + if(any(duplicated(time))) stop("testing for temporal autocorrelation requires unique time values - if you have several observations per location, use the recalculateResiduals function to aggregate residuals per location") alternative <- match.arg(alternative) @@ -250,6 +252,8 @@ testTemporalAutocorrelation <- function(simulationOutput, time = NULL , alternat #' @export testSpatialAutocorrelation <- function(simulationOutput, x = NULL, y = NULL, distMat = NULL, alternative = c("two.sided", "greater", "less"), plot = T){ + if(any(duplicated(cbind(x,y)))) stop("testing for spatial autocorrelation requires unique x,y values - if you have several observations per location, use the recalculateResiduals function to aggregate residuals per location") + alternative <- match.arg(alternative) if( !is.null(x) & !is.null(distMat) ) warning("coordinates and distMat provided, coordinates will only be used for plotting") diff --git a/DHARMa/inst/examples/createDharmaHelp.R b/DHARMa/inst/examples/createDharmaHelp.R index 44e7baf7..e92fb1d9 100644 --- a/DHARMa/inst/examples/createDharmaHelp.R +++ b/DHARMa/inst/examples/createDharmaHelp.R @@ -1,3 +1,32 @@ +## READING IN HAND-CODED SIMULATIONS + +testData = createData(sampleSize = 50, randomEffectVariance = 0) +fittedModel <- glm(observedResponse ~ Environment1, data = testData, family = "poisson") + +# in DHARMA, using the simulate.glm function of glm +sims = simulateResiduals(fittedModel) +plot(sims, quantreg = FALSE) + +# Doing the same with a handcode simulate function. +# of course this code will only work with a 1-par glm model +simulateMyfit <- function(n=10, fittedModel){ + int = coef(fittedModel)[1] + slo = coef(fittedModel)[2] + pred = exp(int + slo * testData$Environment1) + predSim = replicate(n, rpois(length(pred), pred)) + return(predSim) +} + +sims = simulateMyfit(250, fittedModel) + +dharmaRes <- createDHARMa(simulatedResponse = sims, + observedResponse = testData$observedResponse, + fittedPredictedResponse = predict(fittedModel, type = "response"), + integer = TRUE) +plot(dharmaRes, quantreg = FALSE) + +## A BAYESIAN EXAMPLE + \dontrun{ # This example shows how to check the residuals for a diff --git a/DHARMa/inst/examples/testSpatialAutocorrelationHelp.R b/DHARMa/inst/examples/testSpatialAutocorrelationHelp.R index f7e23cc4..762aad29 100644 --- a/DHARMa/inst/examples/testSpatialAutocorrelationHelp.R +++ b/DHARMa/inst/examples/testSpatialAutocorrelationHelp.R @@ -11,3 +11,43 @@ testSpatialAutocorrelation(res) # Alternatively, one can provide a distance matrix dM = as.matrix(dist(cbind(testData$x, testData$y))) testSpatialAutocorrelation(res, distMat = dM) + +# carefull with clustered data and conditional / unconditional simulations +# this originates from https://github.com/florianhartig/DHARMa/issues/81 + +# Assume our data is divided into clusters, and we use a RE to take out cluster effects + +clusters = 100 +subsamples = 10 +size = clusters * subsamples + +testData = createData(sampleSize = size, family = gaussian(), numGroups = clusters ) +testData$x = rnorm(clusters)[testData$group] + rnorm(size, sd = 0.01) +testData$y = rnorm(clusters)[testData$group] + rnorm(size, sd = 0.01) + +library(lme4) +fittedModel <- lmer(observedResponse ~ Environment1 + (1|group), data = testData) + +# DHARMa default is to re-simulted REs - this means spatial pattern remains +# because residuals are still clustered + +res = simulateResiduals(fittedModel) +testSpatialAutocorrelation(res, x = testData$x, y = testData$y) + +# However, it should disappear if you just calculate an aggregate residuals per cluster +# Because at least how the data are simualted, cluster are spatially independent + +res2 = recalculateResiduals(res, group = testData$group) +testSpatialAutocorrelation(res2, + x = aggregate(testData$x, list(testData$group), mean)$x, + y = aggregate(testData$y, list(testData$group), mean)$x) + +# For lme4, possible to simulated residuals conditional on fitted REs (re.form) +# This takes out most of the RSA - a remainder is probably due the shrinkage +# of the REs + +res = simulateResiduals(fittedModel, re.form = NULL) +testSpatialAutocorrelation(res, x = testData$x, y = testData$y) + + + diff --git a/DHARMa/man/createDHARMa.Rd b/DHARMa/man/createDHARMa.Rd index 640a4dcc..2152f8a5 100644 --- a/DHARMa/man/createDHARMa.Rd +++ b/DHARMa/man/createDHARMa.Rd @@ -29,6 +29,35 @@ The use of this function is to convert simulated residuals (e.g. from a point es Either scaled residuals or (simulatedResponse AND observed response) have to be provided } \examples{ +## READING IN HAND-CODED SIMULATIONS + +testData = createData(sampleSize = 50, randomEffectVariance = 0) +fittedModel <- glm(observedResponse ~ Environment1, data = testData, family = "poisson") + +# in DHARMA, using the simulate.glm function of glm +sims = simulateResiduals(fittedModel) +plot(sims, quantreg = FALSE) + +# Doing the same with a handcode simulate function. +# of course this code will only work with a 1-par glm model +simulateMyfit <- function(n=10, fittedModel){ + int = coef(fittedModel)[1] + slo = coef(fittedModel)[2] + pred = exp(int + slo * testData$Environment1) + predSim = replicate(n, rpois(length(pred), pred)) + return(predSim) +} + +sims = simulateMyfit(250, fittedModel) + +dharmaRes <- createDHARMa(simulatedResponse = sims, + observedResponse = testData$observedResponse, + fittedPredictedResponse = predict(fittedModel, type = "response"), + integer = TRUE) +plot(dharmaRes, quantreg = FALSE) + +## A BAYESIAN EXAMPLE + \dontrun{ # This example shows how to check the residuals for a diff --git a/DHARMa/man/getQuantile.Rd b/DHARMa/man/getQuantile.Rd new file mode 100644 index 00000000..f99026dd --- /dev/null +++ b/DHARMa/man/getQuantile.Rd @@ -0,0 +1,12 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/simulateResiduals.R +\name{getQuantile} +\alias{getQuantile} +\title{Quantile calculations} +\usage{ +getQuantile(simulations, observed, n, nSim, integerResponse) +} +\description{ +Quantile calculations +} +\keyword{internal} diff --git a/DHARMa/man/testSpatialAutocorrelation.Rd b/DHARMa/man/testSpatialAutocorrelation.Rd index cac8f344..3dd95236 100644 --- a/DHARMa/man/testSpatialAutocorrelation.Rd +++ b/DHARMa/man/testSpatialAutocorrelation.Rd @@ -46,6 +46,46 @@ testSpatialAutocorrelation(res) # Alternatively, one can provide a distance matrix dM = as.matrix(dist(cbind(testData$x, testData$y))) testSpatialAutocorrelation(res, distMat = dM) + +# carefull with clustered data and conditional / unconditional simulations +# this originates from https://github.com/florianhartig/DHARMa/issues/81 + +# Assume our data is divided into clusters, and we use a RE to take out cluster effects + +clusters = 100 +subsamples = 10 +size = clusters * subsamples + +testData = createData(sampleSize = size, family = gaussian(), numGroups = clusters ) +testData$x = rnorm(clusters)[testData$group] + rnorm(size, sd = 0.01) +testData$y = rnorm(clusters)[testData$group] + rnorm(size, sd = 0.01) + +library(lme4) +fittedModel <- lmer(observedResponse ~ Environment1 + (1|group), data = testData) + +# DHARMa default is to re-simulted REs - this means spatial pattern remains +# because residuals are still clustered + +res = simulateResiduals(fittedModel) +testSpatialAutocorrelation(res, x = testData$x, y = testData$y) + +# However, it should disappear if you just calculate an aggregate residuals per cluster +# Because at least how the data are simualted, cluster are spatially independent + +res2 = recalculateResiduals(res, group = testData$group) +testSpatialAutocorrelation(res2, + x = aggregate(testData$x, list(testData$group), mean)$x, + y = aggregate(testData$y, list(testData$group), mean)$x) + +# For lme4, possible to simulated residuals conditional on fitted REs (re.form) +# This takes out most of the RSA - a remainder is probably due the shrinkage +# of the REs + +res = simulateResiduals(fittedModel, re.form = NULL) +testSpatialAutocorrelation(res, x = testData$x, y = testData$y) + + + } \seealso{ \code{\link{testResiduals}}, \code{\link{testUniformity}}, \code{\link{testDispersion}}, \code{\link{testZeroInflation}}, \code{\link{testGeneric}}, \code{\link{testTemporalAutocorrelation}} diff --git a/DHARMa/tests/testthat/testRecalculateResiduals.R b/DHARMa/tests/testthat/testRecalculateResiduals.R new file mode 100644 index 00000000..c56b1eb0 --- /dev/null +++ b/DHARMa/tests/testthat/testRecalculateResiduals.R @@ -0,0 +1,22 @@ + + +testData = createData(sampleSize = 200, family = poisson(), numGroups = 20, randomEffectVariance = 0) + +#len = sum(testData$group == 1) +#testData$group[testData$group == 1] = sample(c(1,21),20, replace = T) + +fittedModel <- glm(observedResponse ~ Environment1, family = "poisson", data = testData) + +simulationOutput = simulateResiduals(fittedModel) +plot(simulationOutput) +dim(simulationOutput$simulatedResponse) + +simulationOutput = recalculateResiduals(simulationOutput, group = testData$group) +plot(simulationOutput) + + +testData = createData(sampleSize = 200, family = gaussian()) +fittedModel <- glm(observedResponse ~ Environment1, family = "gaussian", data = testData) +simulationOutput = recalculateResiduals(simulationOutput, group = testData$group) + + diff --git a/DHARMa/vignettes/DHARMa.Rmd b/DHARMa/vignettes/DHARMa.Rmd index ed15b885..a088d586 100644 --- a/DHARMa/vignettes/DHARMa.Rmd +++ b/DHARMa/vignettes/DHARMa.Rmd @@ -266,7 +266,7 @@ A few general rules of thumb This this is how **overdispersion** looks like in the DHARMa residuals ```{r} -testData = createData(sampleSize = 500, overdispersion = 2, family = poisson()) +testData = createData(sampleSize = 500, overdispersion = 0.6, family = poisson()) fittedModel <- glmer(observedResponse ~ Environment1 + (1|group) , family = "poisson", data = testData) simulationOutput <- simulateResiduals(fittedModel = fittedModel) @@ -663,6 +663,53 @@ The overdispersion looks better, but you can see that the residuals look a bit i Likely, the reason is the steep increase in the beginning that one can see in the raw data plot. One would probably need to apply another transformation or a nonlinear function to completely fit this away. +## Owl example + +The next examples uses the fairly well known Owl dataset which is provided in glmmTMB (see ?Owls for more info about the data). + +The following shows a sequence of models, all checked with DHARMa. The example is discussed in a talk at ISEC 2018, see slides [here](https://www.slideshare.net/florianhartig/mon-c5hartig2493). + +```{r} + +m1 <- glm(SiblingNegotiation ~ FoodTreatment*SexParent + offset(log(BroodSize)), data=Owls , family = poisson) +res <- simulateResiduals(m1) +plot(res) + + +m2 <- glmer(SiblingNegotiation ~ FoodTreatment*SexParent + offset(log(BroodSize)) + (1|Nest), data=Owls , family = poisson) +res <- simulateResiduals(m2) +plot(res) + + +m3 <- glmmTMB(SiblingNegotiation ~ FoodTreatment*SexParent + offset(log(BroodSize)) + (1|Nest), data=Owls , family = nbinom1) +res <- simulateResiduals(m3) +plot(res) +summary(m3) + +plotResiduals(Owls$FoodTreatment, res$scaledResiduals) + +testDispersion(res) +testZeroInflation(res) + +m4 <- glmmTMB(SiblingNegotiation ~ FoodTreatment*SexParent + offset(log(BroodSize)) + (1|Nest), ziformula = ~ FoodTreatment + SexParent, data=Owls , family = nbinom1) +summary(m4) + +library(DHARMa) +res <- simulateResiduals(m4) +plot(res) + +plotResiduals(Owls$FoodTreatment, res$scaledResiduals) + +testDispersion(res) +testZeroInflation(res) + +m5 <- glmmTMB(SiblingNegotiation ~ FoodTreatment*SexParent + offset(log(BroodSize)) + (1|Nest), dispformula = ~ FoodTreatment , ziformula = ~ FoodTreatment + SexParent, data=Owls , family = nbinom1) +summary(m5) + +``` + + + ## Beetlecount / Poisson example