diff --git a/Code/DHARMaManualTests/Calibration.Rmd b/Code/DHARMaManualTests/Calibration.Rmd new file mode 100644 index 00000000..1a3b0411 --- /dev/null +++ b/Code/DHARMaManualTests/Calibration.Rmd @@ -0,0 +1,45 @@ +--- +title: "Calibration of p-values" +author: "Florian Hartig" +date: "6/3/2020" +output: html_document +--- + +```{r setup, include=FALSE} +knitr::opts_chunk$set(echo = TRUE) +``` + + +```{r} + reps = 200 + + pVals = matrix(ncol = 6, nrow = reps) + + for(i in 1:reps){ + testData = createData(sampleSize = 10000, overdispersion = 0, pZeroInflation = 0, randomEffectVariance = 0, family = gaussian()) + fittedModel <- lm(observedResponse ~ Environment1 , data = testData) + simulationOutput <- simulateResiduals(fittedModel = fittedModel, n = 100) + + + pVals[i,1] = testOutliers(simulationOutput, plot = F, alternative = "two.sided")$p.value + pVals[i,2] = testOutliers(simulationOutput, plot = F, alternative = "greater")$p.value + pVals[i,3] = testOutliers(simulationOutput, plot = F, alternative = "less")$p.value + + pVals[i,4] = testOutliers(simulationOutput, plot = F, alternative = "two.sided", margin = "upper")$p.value + pVals[i,5] = testOutliers(simulationOutput, plot = F, alternative = "greater", margin = "upper")$p.value + pVals[i,6] = testOutliers(simulationOutput, plot = F, alternative = "less", margin = "upper")$p.value + } + +``` + + + +```{r} +par(mfrow = c(2,3)) +for(i in 1:6) hist(pVals[,i], breaks = 50) + + +``` + + + diff --git a/DHARMa/DESCRIPTION b/DHARMa/DESCRIPTION index 21f3c003..de1dad7d 100644 --- a/DHARMa/DESCRIPTION +++ b/DHARMa/DESCRIPTION @@ -1,6 +1,6 @@ Package: DHARMa Title: Residual Diagnostics for Hierarchical (Multi-Level / Mixed) Regression Models -Version: 0.3.1 +Version: 0.3.1.1 Date: 2020-05-10 Authors@R: c(person("Florian", "Hartig", email = "florian.hartig@biologie.uni-regensburg.de", role = c("aut", "cre"), comment=c(ORCID="0000-0002-6255-9059"))) Description: The 'DHARMa' package uses a simulation-based approach to create diff --git a/DHARMa/R/tests.R b/DHARMa/R/tests.R index 60a55afb..6608f31d 100644 --- a/DHARMa/R/tests.R +++ b/DHARMa/R/tests.R @@ -145,54 +145,61 @@ testQuantiles <- function(simulationOutput, predictor = NULL, quantiles = c(0.25 #' Test for outliers #' -#' This function tests if the number of observations that are strictly greater / smaller than all simulations are larger than expected +#' This function tests if the number of observations outside the simulatio envelope are larger or smaller than expected #' #' @param simulationOutput an object of class DHARMa with simulated quantile residuals, either created via \code{\link{simulateResiduals}} or by \code{\link{createDHARMa}} for simulations created outside DHARMa -#' @param alternative a character string specifying whether the test should test if observations are "greater", "less" or "two.sided" compared to the simulated null hypothesis +#' @param alternative a character string specifying whether the test should test if observations are "greater", "less" or "two.sided" (default) compared to the simulated null hypothesis +#' @param margin whether to test for outliers only at the lower, only at the upper, or both sides (default) of the simulated data distribution #' @param plot if T, the function will create an additional plot #' @details DHARMa residuals are created by simulating from the fitted model, and comparing the simulated values to the observed data. It can occur that all simulated values are higher or smaller than the observed data, in which case they get the residual value of 0 and 1, respectively. I refer to these values as simulation outliers, or simply outliers. #' -#' Because no data was simulated in the range of the observed value, we actually don't know "how much" these values deviate from the model expecation, so the term "outlier" should be used with a grain of salt - it's not a judgement about the probability of a deviation from an expectation, but denotes that we are outside the simulated range. The number of outliers would usually decrease if the number of DHARMa simulations is increased. +#' Because no data was simulated in the range of the observed value, we don't know "how strongly" these values deviate from the model expectation, so the term "outlier" should be used with a grain of salt - it's not a judgment about the magnitude of a deviation from an expectation, but simply that we are outside the simulated range, and thus cannot say anything more about the location of the residual. +#' +#' Note also that the number of outliers will decrease as we increase the number of simulations. Under the null hypothesis that the model is correct, we expect nData /(nSim +1) outliers at each margin of the distribution. For a reason, consider that if the data and the model distribution are identical, the probability that a given observation is higher than all simulations is 1/(nSim +1). #' -#' The probability of an outlier depends on the number of simulations (in fact, it is 1/(nSim +1) for each side), so whether the existence of outliers is a reason for concern depends also on the number of simulations. The expected number of outliers is therefore binomially distributed, and we can calculate a p-value from that +#' Based on this null expectation, we can test for an excess or lack of outliers. Per default, testOutliers() looks for both, so if you get a significant p-value, you have to check if you have to many or too few outliers. An excess of outliers is to be interpreted as too many values outside the simulation envelope. This could be caused by overdispersion, or by what we classically call outliers. A lack of outliers would be caused, for example, by underdispersion. #' #' #' @author Florian Hartig #' @seealso \code{\link{testResiduals}}, \code{\link{testUniformity}}, \code{\link{testDispersion}}, \code{\link{testZeroInflation}}, \code{\link{testGeneric}}, \code{\link{testTemporalAutocorrelation}}, \code{\link{testSpatialAutocorrelation}}, \code{\link{testQuantiles}} -#' @example inst/examples/testsHelp.R +#' @example inst/examples/testOutliersHelp.R #' @export -testOutliers <- function(simulationOutput, alternative = c("two.sided", "greater", "less"), plot = T){ - - out = list() - out$data.name = deparse(substitute(simulationOutput)) +testOutliers <- function(simulationOutput, alternative = c("two.sided", "greater", "less"), margin = c("both", "upper", "lower"), plot = T){ + # check inputs + alternative = match.arg(alternative) + margin = match.arg(margin) + data.name = deparse(substitute(simulationOutput)) # remember: needs to be called before ensureDHARMa simulationOutput = ensureDHARMa(simulationOutput, convert = "Model") - alternative <- match.arg(alternative) - - out$alternative = alternative - out$method = "DHARMa outlier test based on exact binomial test" + # calculation of outliers + if(margin == "both") outliers = sum(simulationOutput$scaledResiduals %in% c(0, 1)) + if(margin == "upper") outliers = sum(simulationOutput$scaledResiduals == 1) + if(margin == "lower") outliers = sum(simulationOutput$scaledResiduals == 0) - outLow = sum(simulationOutput$scaledResiduals == 0) - outHigh = sum(simulationOutput$scaledResiduals == 1) + # calculations of trials and H0 + outFreqH0 = 1/(simulationOutput$nSim +1) * ifelse(margin == "both", 2, 1) trials = simulationOutput$nObs - outFreqH0 = 1/(simulationOutput$nSim +1) - out$testlow = binom.test(outLow, trials, p = outFreqH0, alternative = "less") - out$testhigh = binom.test(outHigh, trials, p = outFreqH0, alternative = "greater") + out = binom.test(outliers, trials, p = outFreqH0, alternative = alternative) - out$statistic = c(outLow = outLow, outHigh = outHigh, nobs = trials, freqH0 = outFreqH0) + # overwrite information in binom.test - if(alternative == "greater") p = out$testlow$p.value - if(alternative == "less") p = out$testhigh$p.value - if(alternative == "two.sided") p = min(min(out$testlow$p.value, out$testhigh$p.value) * 2,1) + out$data.name = data.name + out$margin = margin + out$method = "DHARMa outlier test based on exact binomial test" - out$p.value = p - class(out) = "htest" + names(out$statistic) = paste("outliers at", margin, "margin(s)") + names(out$parameter) = "simulations" + names(out$estimate) = paste("frequency of outliers (expected:", out$null.value,")") + + out$interval = "tst" + + out$interval = if(plot == T) { - hist(simulationOutput, main = "", max(round(simulationOutput$nSim / 5), 20)) + hist(simulationOutput, main = "") main = ifelse(out$p.value <= 0.05, "Outlier test significant", @@ -467,12 +474,12 @@ testTemporalAutocorrelation <- function(simulationOutput, time = NULL , alternat #' @export testSpatialAutocorrelation <- function(simulationOutput, x = NULL, y = NULL, distMat = NULL, alternative = c("two.sided", "greater", "less"), plot = T){ + alternative <- match.arg(alternative) + data.name = deparse(substitute(simulationOutput)) # needs to be before ensureDHARMa simulationOutput = ensureDHARMa(simulationOutput, convert = T) if(any(duplicated(cbind(x,y)))) stop("testing for spatial autocorrelation requires unique x,y values - if you have several observations per location, either use the recalculateResiduals function to aggregate residuals per location, or extract the residuals from the fitted object, and plot / test each of them independently for spatially repeated subgroups (a typical scenario would repeated spatial observation, in which case one could plot / test each time step separately for temporal autocorrelation). Note that the latter must be done by hand, outside testSpatialAutocorrelation.") - alternative <- match.arg(alternative) - if( (!is.null(x) | !is.null(y)) & !is.null(distMat) ) message("both coordinates and distMat provided, calculations will be done based on the distance matrix, coordinates will only be used for plotting") # if not provided, fill x and y with random numbers (Null model) if(is.null(x)){ @@ -498,7 +505,7 @@ testSpatialAutocorrelation <- function(simulationOutput, x = NULL, y = NULL, di out$method = "DHARMa Moran's I test for spatial autocorrelation" out$alternative = "Spatial autocorrelation" out$p.value = MI$p.value - # out$data.name = deparse(substitute(simulationOutput)) + out$data.name = data.name class(out) = "htest" diff --git a/DHARMa/inst/examples/testOutliersHelp.R b/DHARMa/inst/examples/testOutliersHelp.R new file mode 100644 index 00000000..9a9c9ad9 --- /dev/null +++ b/DHARMa/inst/examples/testOutliersHelp.R @@ -0,0 +1,24 @@ +set.seed(123) + +testData = createData(sampleSize = 200, overdispersion = 1, randomEffectVariance = 0) +fittedModel <- glm(observedResponse ~ Environment1 , family = "poisson", data = testData) +simulationOutput <- simulateResiduals(fittedModel = fittedModel) + +# default outlier test (with plot) +testOutliers(simulationOutput) + +# note that default is to test outliers at both margins for both an excess and a lack +# of outliers. Here we see that we mostly have an excess of outliers at the upper +# margin. You see that it is an exces because the frequency of outliers is 0.055, +# while expected is 0.008 + +# Let's see what would have happened if we would just have checked the lower margin +testOutliers(simulationOutput, margin = "lower", plot = FALSE) + +# OK, now the frequency of outliers is 0, so we have too few, but this is n.s. against +# the expectation + +# just for completeness, what would have happened if we would have checked both +# margins, but just for a lack of outliers (i.e. underdispersion) + +testOutliers(simulationOutput, alternative = "less", plot = FALSE) diff --git a/DHARMa/man/testOutliers.Rd b/DHARMa/man/testOutliers.Rd index 11d3762f..1cc4283f 100644 --- a/DHARMa/man/testOutliers.Rd +++ b/DHARMa/man/testOutliers.Rd @@ -5,75 +5,54 @@ \title{Test for outliers} \usage{ testOutliers(simulationOutput, alternative = c("two.sided", "greater", - "less"), plot = T) + "less"), margin = c("both", "upper", "lower"), plot = T) } \arguments{ \item{simulationOutput}{an object of class DHARMa with simulated quantile residuals, either created via \code{\link{simulateResiduals}} or by \code{\link{createDHARMa}} for simulations created outside DHARMa} -\item{alternative}{a character string specifying whether the test should test if observations are "greater", "less" or "two.sided" compared to the simulated null hypothesis} +\item{alternative}{a character string specifying whether the test should test if observations are "greater", "less" or "two.sided" (default) compared to the simulated null hypothesis} + +\item{margin}{whether to test for outliers only at the lower, only at the upper, or both sides (default) of the simulated data distribution} \item{plot}{if T, the function will create an additional plot} } \description{ -This function tests if the number of observations that are strictly greater / smaller than all simulations are larger than expected +This function tests if the number of observations outside the simulatio envelope are larger or smaller than expected } \details{ DHARMa residuals are created by simulating from the fitted model, and comparing the simulated values to the observed data. It can occur that all simulated values are higher or smaller than the observed data, in which case they get the residual value of 0 and 1, respectively. I refer to these values as simulation outliers, or simply outliers. -Because no data was simulated in the range of the observed value, we actually don't know "how much" these values deviate from the model expecation, so the term "outlier" should be used with a grain of salt - it's not a judgement about the probability of a deviation from an expectation, but denotes that we are outside the simulated range. The number of outliers would usually decrease if the number of DHARMa simulations is increased. +Because no data was simulated in the range of the observed value, we don't know "how strongly" these values deviate from the model expectation, so the term "outlier" should be used with a grain of salt - it's not a judgment about the magnitude of a deviation from an expectation, but simply that we are outside the simulated range, and thus cannot say anything more about the location of the residual. + +Note also that the number of outliers will decrease as we increase the number of simulations. Under the null hypothesis that the model is correct, we expect nData /(nSim +1) outliers at each margin of the distribution. For a reason, consider that if the data and the model distribution are identical, the probability that a given observation is higher than all simulations is 1/(nSim +1). -The probability of an outlier depends on the number of simulations (in fact, it is 1/(nSim +1) for each side), so whether the existence of outliers is a reason for concern depends also on the number of simulations. The expected number of outliers is therefore binomially distributed, and we can calculate a p-value from that +Based on this null expectation, we can test for an excess or lack of outliers. Per default, testOutliers() looks for both, so if you get a significant p-value, you have to check if you have to many or too few outliers. An excess of outliers is to be interpreted as too many values outside the simulation envelope. This could be caused by overdispersion, or by what we classically call outliers. A lack of outliers would be caused, for example, by underdispersion. } \examples{ -testData = createData(sampleSize = 200, overdispersion = 0.5, randomEffectVariance = 0) +set.seed(123) + +testData = createData(sampleSize = 200, overdispersion = 1, randomEffectVariance = 0) fittedModel <- glm(observedResponse ~ Environment1 , family = "poisson", data = testData) simulationOutput <- simulateResiduals(fittedModel = fittedModel) -# the plot function runs 4 tests -# i) KS test i) Dispersion test iii) Outlier test iv) quantile test -plot(simulationOutput, quantreg = TRUE) - -# testResiduals tests distribution, dispersion and outliers -testResiduals(simulationOutput) - -####### Individual tests ####### - -# KS test for correct distribution of residuals -testUniformity(simulationOutput) - -# Dispersion test -testDispersion(simulationOutput) # tests under and overdispersion -testDispersion(simulationOutput, alternative = "less") # only underdispersion -testDispersion(simulationOutput, alternative = "less") # only underdispersion - -# if model is refitted, a different test will be called -simulationOutput2 <- simulateResiduals(fittedModel = fittedModel, refit = TRUE, seed = 12) -testDispersion(simulationOutput2) - -# often useful to test dispersion per group (e.g. binomial data, see vignette) -simulationOutput3 = recalculateResiduals(simulationOutput, group = testData$group) -testDispersion(simulationOutput3) - -# Outlier test (number of observations outside simulation envelope) -testOutliers(simulationOutput) - -# testing zero inflation -testZeroInflation(simulationOutput) - -# testing generic summaries -countOnes <- function(x) sum(x == 1) # testing for number of 1s -testGeneric(simulationOutput, summary = countOnes) # 1-inflation -testGeneric(simulationOutput, summary = countOnes, alternative = "less") # 1-deficit - -means <- function(x) mean(x) # testing if mean prediction fits -testGeneric(simulationOutput, summary = means) +# default outlier test (with plot) +testOutliers(simulationOutput) -spread <- function(x) sd(x) # testing if mean sd fits -testGeneric(simulationOutput, summary = spread) +# note that default is to test outliers at both margins for both an excess and a lack +# of outliers. Here we see that we mostly have an excess of outliers at the upper +# margin. You see that it is an exces because the frequency of outliers is 0.055, +# while expected is 0.008 +# Let's see what would have happened if we would just have checked the lower margin +testOutliers(simulationOutput, margin = "lower", plot = FALSE) +# OK, now the frequency of outliers is 0, so we have too few, but this is n.s. against +# the expectation +# just for completeness, what would have happened if we would have checked both +# margins, but just for a lack of outliers (i.e. underdispersion) +testOutliers(simulationOutput, alternative = "less", plot = FALSE) } \seealso{ \code{\link{testResiduals}}, \code{\link{testUniformity}}, \code{\link{testDispersion}}, \code{\link{testZeroInflation}}, \code{\link{testGeneric}}, \code{\link{testTemporalAutocorrelation}}, \code{\link{testSpatialAutocorrelation}}, \code{\link{testQuantiles}} diff --git a/DHARMa/tests/testthat/testTests.R b/DHARMa/tests/testthat/testTests.R index 194b6bbc..e5a6db6d 100644 --- a/DHARMa/tests/testthat/testTests.R +++ b/DHARMa/tests/testthat/testTests.R @@ -3,13 +3,13 @@ context("DHARMa tests") # erase? library(stringr) test_that("overdispersion recognized", { - + set.seed(123) - + testData = createData(sampleSize = 200, overdispersion = 3, pZeroInflation = 0.4, randomEffectVariance = 0) fittedModel <- glm(observedResponse ~ Environment1 , family = "poisson", data = testData) simulationOutput <- simulateResiduals(fittedModel = fittedModel) - + x = testUniformity(simulationOutput, plot = F) expect_true( x$p.value < 0.05) x = testOutliers(simulationOutput, plot = F) @@ -17,168 +17,179 @@ test_that("overdispersion recognized", { x = testDispersion(simulationOutput, alternative = "greater", plot = F) expect_true( x$p.value < 0.05) x = testZeroInflation(simulationOutput, alternative = "greater", plot = F) - expect_true( x$p.value < 0.05) - + expect_true( x$p.value < 0.05) + }) test_that("tests work", { - + # creating test data - + testData = createData(sampleSize = 200, overdispersion = 0.5, pZeroInflation = 0, randomEffectVariance = 0) fittedModel <- glm(observedResponse ~ Environment1 , family = "poisson", data = testData) simulationOutput <- simulateResiduals(fittedModel = fittedModel) - + ###### Distribution tests ##### - + testUniformity(simulationOutput) testUniformity(simulationOutput, alternative = "less") testUniformity(simulationOutput, alternative = "greater") - + ###### Dispersion tests ####### - + testDispersion(simulationOutput) testDispersion(simulationOutput, alternative = "less") testDispersion(simulationOutput, alternative = "greater") testDispersion(simulationOutput, alternative = "two.sided", plot = FALSE) - + testOverdispersion(simulationOutput) testOverdispersionParametric(simulationOutput) - + ###### Both together########### - + testResiduals(simulationOutput) testSimulatedResiduals(simulationOutput) - + ###### zero-inflation ########## - + # testing zero inflation testZeroInflation(simulationOutput) testZeroInflation(simulationOutput, alternative = "less") - + # testing generic summaries countOnes <- function(x) sum(x == 1) # testing for number of 1s testGeneric(simulationOutput, summary = countOnes) # 1-inflation testGeneric(simulationOutput, summary = countOnes, alternative = "less") # 1-deficit - + means <- function(x) mean(x) # testing if mean prediction fits - testGeneric(simulationOutput, summary = means) - + testGeneric(simulationOutput, summary = means) + spread <- function(x) sd(x) # testing if mean sd fits - testGeneric(simulationOutput, summary = spread) - + testGeneric(simulationOutput, summary = spread) + ################################################################## - + # grouped simulationOutput <- recalculateResiduals(simulationOutput, group = testData$group) - + ###### Distribution tests ##### - + testUniformity(simulationOutput) testUniformity(simulationOutput, alternative = "less") testUniformity(simulationOutput, alternative = "greater") - + ###### Dispersion tests ####### - + testDispersion(simulationOutput) testDispersion(simulationOutput, alternative = "less") testDispersion(simulationOutput, alternative = "greater") testDispersion(simulationOutput, alternative = "two.sided", plot = T) - + testOverdispersion(simulationOutput) testOverdispersionParametric(simulationOutput) - + ###### Both together########### - + testResiduals(simulationOutput) testSimulatedResiduals(simulationOutput) - + ###### zero-inflation ########## - + # testing zero inflation testZeroInflation(simulationOutput) testZeroInflation(simulationOutput, alternative = "less") - + # testing generic summaries countOnes <- function(x) sum(x == 1) # testing for number of 1s testGeneric(simulationOutput, summary = countOnes) # 1-inflation testGeneric(simulationOutput, summary = countOnes, alternative = "less") # 1-deficit - + means <- function(x) mean(x) # testing if mean prediction fits - testGeneric(simulationOutput, summary = means) - + testGeneric(simulationOutput, summary = means) + spread <- function(x) sd(x) # testing if mean sd fits - testGeneric(simulationOutput, summary = spread) - - - + testGeneric(simulationOutput, summary = spread) + + + ################################################################ - + ###### Refited ############## - + # if model is refitted, a different test will be called - + simulationOutput <- simulateResiduals(fittedModel = fittedModel, refit = T) testDispersion(simulationOutput) - - + + # Standard use testSpatialAutocorrelation(simulationOutput, x = testData$x, y = testData$y) testSpatialAutocorrelation(simulationOutput, x = testData$x, y = testData$y, alternative = "two.sided") - + # If x and y is not provided, random values will be created testSpatialAutocorrelation(simulationOutput) - + # Alternatively, one can provide a distance matrix dM = as.matrix(dist(cbind(testData$x, testData$y))) testSpatialAutocorrelation(simulationOutput, distMat = dM) - - + + # Standard use testTemporalAutocorrelation(simulationOutput, time = testData$time) testTemporalAutocorrelation(simulationOutput, time = testData$time, alternative = "greater") - + # If no time is provided, random values will be created testTemporalAutocorrelation(simulationOutput) - + ################################################################## - + # grouped simulationOutput <- recalculateResiduals(simulationOutput, group = testData$group) - + # if model is refitted, a different test will be called - + simulationOutput <- simulateResiduals(fittedModel = fittedModel, refit = T) testDispersion(simulationOutput) - - + + # Standard use testSpatialAutocorrelation(simulationOutput, x = testData$x, y = testData$y) testSpatialAutocorrelation(simulationOutput, x = testData$x, y = testData$y, alternative = "two.sided") - + # If x and y is not provided, random values will be created testSpatialAutocorrelation(simulationOutput) - + # Alternatively, one can provide a distance matrix dM = as.matrix(dist(cbind(testData$x, testData$y))) testSpatialAutocorrelation(simulationOutput, distMat = dM) - - + + # Standard use testTemporalAutocorrelation(simulationOutput, time = testData$time) testTemporalAutocorrelation(simulationOutput, time = testData$time, alternative = "two.sided") - + # If no time is provided, random values will be created testTemporalAutocorrelation(simulationOutput) - + }) +# Test Outliers +test_that("testOutliers", { + testData = createData(sampleSize = 1000, overdispersion = 0, pZeroInflation = 0, randomEffectVariance = 0) + fittedModel <- glm(observedResponse ~ Environment1 , family = "poisson", data = testData) + simulationOutput <- simulateResiduals(fittedModel = fittedModel) + x = testOutliers(simulationOutput, plot = T, alternative = "two.sided") + x + testOutliers(simulationOutput, plot = T, margin = "lower") + testOutliers(simulationOutput, plot = T, alternative = "two.sided", margin = "lower") + testOutliers(simulationOutput, plot = T, margin = "upper") +})