Skip to content

Commit

Permalink
Merge pull request #83 from florianhartig/0.2.0.1
Browse files Browse the repository at this point in the history
0.2.0.1
  • Loading branch information
florianhartig authored Aug 20, 2018
2 parents 7127674 + 02c90cb commit 831c66b
Show file tree
Hide file tree
Showing 12 changed files with 288 additions and 21 deletions.
5 changes: 3 additions & 2 deletions Code/DHARMaPackageSupport/glmmTMB/glmmTMB.Rmd
Original file line number Diff line number Diff line change
Expand Up @@ -212,11 +212,12 @@ plot(res, asFactor = T)
Example from the help

```{r}
m <- glmmTMB(count~spp + mined + (1|site), zi=~spp + mined, family=nbinom2, Salamanders)
m <- glmmTMB(count~spp + mined + (1|site), zi=~spp + mined,
family=nbinom2, Salamanders)
summary(m)
res = simulateResiduals(m)
plot(res, rank = T)
plot(res)
### creating new data based on the fitted models. Residuals should now be perfect
Expand Down
1 change: 1 addition & 0 deletions Code/DHARMaPerformance/.gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
*.png
37 changes: 35 additions & 2 deletions Code/DHARMaPerformance/Power/TestPower.Rmd
Original file line number Diff line number Diff line change
Expand Up @@ -57,10 +57,11 @@ legend("bottomright", tests, col = 1:4, lty = 1:4)
```


## Benchmarking DHARMa dispersiontests against pearson dispersiontest for glmms

## Benchmarking DHARMa dispersiontests against AER dispersiontest for Poisson GLM

```{r}
overdisp_fun <- function(model) {
## number of variance parameters in
## an n-by-n variance-covariance matrix
Expand All @@ -73,11 +74,43 @@ overdisp_fun <- function(model) {
Pearson.chisq <- sum(rp^2)
prat <- Pearson.chisq/rdf
pval <- pchisq(Pearson.chisq, df=rdf, lower.tail=FALSE)
c(chisq=Pearson.chisq,ratio=prat,rdf=rdf,p=pval)
list(chisq=Pearson.chisq,ratio=prat,rdf=rdf,p=pval)
}
doCalculations <- function(control = 0){
testData = createData(sampleSize = 200, family = poisson(), overdispersion = control, randomEffectVariance = 1)
fittedModel <- glmer(observedResponse ~ Environment1 + (1|group), data = testData, family = poisson())
out = list()
res <- simulateResiduals(fittedModel = fittedModel, n = 250)
out$uniformTest = testUniformity(res)$p.value
out$Dispersion = testDispersion(res, plot = F)$p.value
out$DispersionAER = overdisp_fun(fittedModel)$pval
res <- simulateResiduals(fittedModel = fittedModel, n = 250, refit = T)
out$DispersionRefitted = testDispersion(res, plot = F)$p.value
return(unlist(out))
}
# testing a single return
doCalculations(control = 0.3)
dispValues = seq(0,1.2, len = 5)
# running benchmark
out = runBenchmarks(doCalculations, controlValues = dispValues , nRep = 10, parallel = F)
tests = c("uniformity", "DHARMa disp basic" , "GLMER dispersiontest", "DHARMa disp refit")
matplot(dispValues, t(out$summaries[,1,]), type = "l")
legend("bottomright", tests, col = 1:4, lty = 1:4)
```



# Autocorrelation tests


Expand Down
41 changes: 25 additions & 16 deletions DHARMa/R/simulateResiduals.R
Original file line number Diff line number Diff line change
Expand Up @@ -181,8 +181,18 @@ simulateResiduals <- function(fittedModel, n = 250, refit = F, integerResponse =
########### Wrapup ############

out$scaledResidualsNormal = qnorm(out$scaledResiduals + 0.00 )


# outliers = list()
# outliers$fracLowerThanAllSimulated = sum(out$scaledResiduals == 0) / out$nSim
# outliers$fracHigherThanAllSimulated = sum(out$scaledResiduals == 1) / out$nSim
#
# # see http://www.di.fc.ul.pt/~jpn/r/prob/range.html
# # 1−(1−x)n
# outliers$samplingDist = ecdf(replicate(10000, min(runif(out$nSim))))
#
# outliers$plowerThanAllSimulated = 1-outliers$samplingDist(fracLowerThanAllSimulated)
# outliers$plowerThanAllSimulated = 1-outliers$samplingDist(fracLowerThanAllSimulated)

out$time = proc.time() - ptm
out$randomState = randomState

Expand Down Expand Up @@ -301,6 +311,7 @@ recalculateResiduals <- function(simulationOutput, group = NULL, aggregateBy = s
if(!is.null(simulationOutput$original)) simulationOutput = simulationOutput$original

out = list()
out$original = simulationOutput

if(is.null(group)) return(simulationOutput)
else group =as.factor(group)
Expand All @@ -311,37 +322,35 @@ recalculateResiduals <- function(simulationOutput, group = NULL, aggregateBy = s
out$observedResponse = aggregateByGroup(simulationOutput$observedResponse)
out$fittedPredictedResponse = aggregateByGroup(simulationOutput$fittedPredictedResponse)
out$simulatedResponse = apply(simulationOutput$simulatedResponse, 2, aggregateByGroup)
out$scaledResiduals = rep(NA, out$nGroups)


if (simulationOutput$refit == F){
if(simulationOutput$integerResponse == T){
for (i in 1:out$nGroups) out$scaledResiduals[i] <- ecdf(out$simulatedResponse[i,] + runif(out$nGroups, -0.5, 0.5))(out$observedResponse[i] + runif(1, -0.5, 0.5))
} else {
for (i in 1:out$nGroups) out$scaledResiduals[i] <- ecdf(out$simulatedResponse[i,])(out$observedResponse[i])
}

out$scaledResiduals = getQuantile(simulations = out$simulatedResponse , observed = out$observedResponse , n = out$nGroups, nSim = simulationOutput$nSim, integerResponse = simulationOutput$integerResponse)

######## refit = T ##################
} else {

out$refittedPredictedResponse <- apply(simulationOutput$refittedPredictedResponse, 2, aggregateByGroup)
out$fittedResiduals = aggregateByGroup(simulationOutput$fittedResiduals)
out$refittedResiduals = apply(simulationOutput$refittedResiduals, 2, aggregateByGroup)
out$refittedPearsonResiduals = apply(simulationOutput$refittedPearsonResiduals, 2, aggregateByGroup)

if(simulationOutput$integerResponse == T){
for (i in 1:out$nGroups) out$scaledResiduals[i] <- ecdf(out$refittedResiduals[i,] + runif(out$nGroups, -0.5, 0.5))(out$fittedResiduals[i] + runif(1, -0.5, 0.5))
} else {
for (i in 1:out$nGroups) out$scaledResiduals[i] <- ecdf(out$refittedResiduals[i,])(out$fittedResiduals[i])
}

out$scaledResiduals = getQuantile(simulations = out$refittedResiduals , observed = out$fittedResiduals , n = out$nGroups, nSim = simulationOutput$nSim, integerResponse = simulationOutput$integerResponse)

}

# hack - the c here will result in both old and new outputs to be present resulting output, but a named access should refer to the new, grouped calculations
# question to myself - what's the use of that, why not erase the old outputs? they are anyway saved in the old object

out$aggregateByGroup = aggregateByGroup
out = c(out, simulationOutput)
out$original = simulationOutput
class(out) = "DHARMa"
return(out)
}


#' Quantile calculations
#'
#' @keywords internal
getQuantile <- function(simulations, observed, n, nSim, integerResponse){

scaledResiduals = rep(NA, n)
Expand Down
4 changes: 4 additions & 0 deletions DHARMa/R/tests.R
Original file line number Diff line number Diff line change
Expand Up @@ -211,6 +211,8 @@ testGeneric <- function(simulationOutput, summary, alternative = c("two.sided",
#' @example inst/examples/testTemporalAutocorrelationHelp.R
#' @export
testTemporalAutocorrelation <- function(simulationOutput, time = NULL , alternative = c("two.sided", "greater", "less"), plot = T){
# actually not sure if this is neccessary for dwtest, but seems better to aggregate
if(any(duplicated(time))) stop("testing for temporal autocorrelation requires unique time values - if you have several observations per location, use the recalculateResiduals function to aggregate residuals per location")

alternative <- match.arg(alternative)

Expand Down Expand Up @@ -250,6 +252,8 @@ testTemporalAutocorrelation <- function(simulationOutput, time = NULL , alternat
#' @export
testSpatialAutocorrelation <- function(simulationOutput, x = NULL, y = NULL, distMat = NULL, alternative = c("two.sided", "greater", "less"), plot = T){

if(any(duplicated(cbind(x,y)))) stop("testing for spatial autocorrelation requires unique x,y values - if you have several observations per location, use the recalculateResiduals function to aggregate residuals per location")

alternative <- match.arg(alternative)

if( !is.null(x) & !is.null(distMat) ) warning("coordinates and distMat provided, coordinates will only be used for plotting")
Expand Down
29 changes: 29 additions & 0 deletions DHARMa/inst/examples/createDharmaHelp.R
Original file line number Diff line number Diff line change
@@ -1,3 +1,32 @@
## READING IN HAND-CODED SIMULATIONS

testData = createData(sampleSize = 50, randomEffectVariance = 0)
fittedModel <- glm(observedResponse ~ Environment1, data = testData, family = "poisson")

# in DHARMA, using the simulate.glm function of glm
sims = simulateResiduals(fittedModel)
plot(sims, quantreg = FALSE)

# Doing the same with a handcode simulate function.
# of course this code will only work with a 1-par glm model
simulateMyfit <- function(n=10, fittedModel){
int = coef(fittedModel)[1]
slo = coef(fittedModel)[2]
pred = exp(int + slo * testData$Environment1)
predSim = replicate(n, rpois(length(pred), pred))
return(predSim)
}

sims = simulateMyfit(250, fittedModel)

dharmaRes <- createDHARMa(simulatedResponse = sims,
observedResponse = testData$observedResponse,
fittedPredictedResponse = predict(fittedModel, type = "response"),
integer = TRUE)
plot(dharmaRes, quantreg = FALSE)

## A BAYESIAN EXAMPLE

\dontrun{

# This example shows how to check the residuals for a
Expand Down
40 changes: 40 additions & 0 deletions DHARMa/inst/examples/testSpatialAutocorrelationHelp.R
Original file line number Diff line number Diff line change
Expand Up @@ -11,3 +11,43 @@ testSpatialAutocorrelation(res)
# Alternatively, one can provide a distance matrix
dM = as.matrix(dist(cbind(testData$x, testData$y)))
testSpatialAutocorrelation(res, distMat = dM)

# carefull with clustered data and conditional / unconditional simulations
# this originates from https://github.com/florianhartig/DHARMa/issues/81

# Assume our data is divided into clusters, and we use a RE to take out cluster effects

clusters = 100
subsamples = 10
size = clusters * subsamples

testData = createData(sampleSize = size, family = gaussian(), numGroups = clusters )
testData$x = rnorm(clusters)[testData$group] + rnorm(size, sd = 0.01)
testData$y = rnorm(clusters)[testData$group] + rnorm(size, sd = 0.01)

library(lme4)
fittedModel <- lmer(observedResponse ~ Environment1 + (1|group), data = testData)

# DHARMa default is to re-simulted REs - this means spatial pattern remains
# because residuals are still clustered

res = simulateResiduals(fittedModel)
testSpatialAutocorrelation(res, x = testData$x, y = testData$y)

# However, it should disappear if you just calculate an aggregate residuals per cluster
# Because at least how the data are simualted, cluster are spatially independent

res2 = recalculateResiduals(res, group = testData$group)
testSpatialAutocorrelation(res2,
x = aggregate(testData$x, list(testData$group), mean)$x,
y = aggregate(testData$y, list(testData$group), mean)$x)

# For lme4, possible to simulated residuals conditional on fitted REs (re.form)
# This takes out most of the RSA - a remainder is probably due the shrinkage
# of the REs

res = simulateResiduals(fittedModel, re.form = NULL)
testSpatialAutocorrelation(res, x = testData$x, y = testData$y)



29 changes: 29 additions & 0 deletions DHARMa/man/createDHARMa.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

12 changes: 12 additions & 0 deletions DHARMa/man/getQuantile.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

40 changes: 40 additions & 0 deletions DHARMa/man/testSpatialAutocorrelation.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

22 changes: 22 additions & 0 deletions DHARMa/tests/testthat/testRecalculateResiduals.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@


testData = createData(sampleSize = 200, family = poisson(), numGroups = 20, randomEffectVariance = 0)

#len = sum(testData$group == 1)
#testData$group[testData$group == 1] = sample(c(1,21),20, replace = T)

fittedModel <- glm(observedResponse ~ Environment1, family = "poisson", data = testData)

simulationOutput = simulateResiduals(fittedModel)
plot(simulationOutput)
dim(simulationOutput$simulatedResponse)

simulationOutput = recalculateResiduals(simulationOutput, group = testData$group)
plot(simulationOutput)


testData = createData(sampleSize = 200, family = gaussian())
fittedModel <- glm(observedResponse ~ Environment1, family = "gaussian", data = testData)
simulationOutput = recalculateResiduals(simulationOutput, group = testData$group)


Loading

0 comments on commit 831c66b

Please sign in to comment.