From 8d8c1c45e24e03d0411fd69f4b95ee69d843ab2a Mon Sep 17 00:00:00 2001 From: jessexknight Date: Tue, 9 Nov 2021 19:55:20 -0500 Subject: [PATCH] code: clean up glm, add effects plot, remove p-values, part of #1 & #2 --- code/analysis/data.r | 4 ++- code/analysis/meta.r | 30 +++++++++++++++++++++ code/analysis/numeric.r | 25 ++++++++++-------- code/analysis/plot.r | 58 ++++++++++++++++++++++++++++++++++++++--- 4 files changed, 102 insertions(+), 15 deletions(-) diff --git a/code/analysis/data.r b/code/analysis/data.r index a781f21..9299925 100644 --- a/code/analysis/data.r +++ b/code/analysis/data.r @@ -276,6 +276,7 @@ clean.api.common = function(X){ X$api.inc = X$api.inc * 1000 X$api.prev.cat = cut(X$api.prev,c( 0, 1,10,100),labels=c('Low','Mid','High')) X$api.inc.cat = cut(X$api.inc, c(00,10,20,100),labels=c('Low','Mid','High')) + X$api.prev.cat = relevel(X$api.prev.cat,ref=3) X = make.api.phase(X) return(X) } @@ -311,7 +312,8 @@ make.api.data = function(XA,which='chi'){ names_pattern=name.pat, values_drop_na=TRUE) %>% mutate(t=as.numeric(name)+api.dt) %>% - mutate(t.cat=cut(t,c(tcut,100),as.character(tcut),right=FALSE)) + mutate(t.cat=cut(t,c(tcut,100),as.character(tcut),right=FALSE)) %>% + mutate(art.cd4=relevel(factor(art.cd4),ref='symp')) return(XA) } diff --git a/code/analysis/meta.r b/code/analysis/meta.r index fb78c22..83df9ea 100644 --- a/code/analysis/meta.r +++ b/code/analysis/meta.r @@ -152,6 +152,32 @@ D = list(# definitions act.HR.pr = 'Ratio of highest female to highest male activity group sizes' ) R = list(# Rename stuff + vars = list( + '(Intercept)'='(Intercept)', + 'Time Horizon (years)' = 't.cat', + 'HIV Prevalence' = 'api.prev.cat', + 'HIV Incidence Trend' = 'api.phase', + 'Partnership Types' = 'pt.def', + 'CD4 Threshold for ART' = 'art.cd4', + 'RR Trans. on ART' = 'art.rbeta.cat', + 'Activity Mixing' = 'act.mix', + 'Age Mixing' = 'age.mix', + 'Acivity & KP' = 'act.kp', + 'Acute HIV' = 'hiv.x.acute', + 'Late Stage HIV' = 'hiv.x.late', + 'HIV Morbidity' = 'hiv.morb.any', + 'Trans. Drug Resist.' = 'art.tdr', + 'Any ART Failure' = 'art.fail.any', + 'Any ART Dropout' = 'art.drop.any', + 'HTC Behav. Change' = 'bc.any', + 'Sex Stratification' = 'act.def.sex', + 'Activity Turnover' = 'act.turn.any', + 'Risk Definition' = 'Risk'), + t.cat = list( + '0-10'='0', + '11-20'='10', + '21-30'='20', + '31+'='30'), api.prev.cat = list( 'Low (<1%)'='Low', 'Middle (1-10%)'='Mid', @@ -173,6 +199,10 @@ R = list(# Rename stuff '500'='500', 'Any'='All', 'Symptomatic'='symp'), + art.rbeta.cat = list( + '0.0-0.039'='0', + '0.04-0.099'='0.04', + '0.1+'='0.1'), act.mix = list( 'Proportionate'='prop', 'Assortative'='asso'), diff --git a/code/analysis/numeric.r b/code/analysis/numeric.r index b3deab4..9dcac05 100644 --- a/code/analysis/numeric.r +++ b/code/analysis/numeric.r @@ -188,7 +188,7 @@ numeric.api = function(XA){ rm.vars = c('art.init.cat','act.kp','diff.any.kp.cat','art.fail.any','art.drop.any', 'api.phase','art.cov.cat') # TODO: impute NA via MICE ? vars.all = setdiff(M$api$table,rm.vars) - m.fun = function(vars){ + m.fun = function(XA.,vars){ for (var in vars){ XA.[[var]] = factor(XA.[[var]]) } # HACK: avoid empty levels f = formula(paste('value ~',paste(vars,collapse='+'))) geeglm(f,'gaussian',XA.,id=factor(XA.$bib),corstr='i') } @@ -201,34 +201,37 @@ numeric.api = function(XA){ } distr.funs = c('q2','q1','q3') dir = tex.dir('api') + model = list() for (which in c('inc','chi')){ dir = tex.dir(file.path('api',which)) XA. = make.api.data(XA,which=which)[c('bib','value',M$api$table)] - model.all = m.fun(vars.all) - coef.all = summary(model.all)$coef - # print(summary(model.all)) # DEBUG - # print(coefplot(model.all,title='',color='black') + theme_light()) # TODO + model[[which]] = m.fun(XA.,vars.all) + coef.all = summary(model[[which]])$coef + # print(summary(model[[which]])) # DEBUG for (var in M$api$table){ x = XA.[[var]] - for (level in levels(x)){ + for (level in levels(factor(x))){ varlevel = paste0(var,level) coef.vl = coef.all[varlevel,] nums = c(sum(x==level,na.rm=TRUE), 100 * quantile(XA.[['value']][x==level],c(.5,.25,.75),na.rm=TRUE), - 100 * (coef.vl[1,1] + c(0,-1.959964,+1.959964) * coef.vl[1,2])) - pstr = print.p(coef.vl[1,4]) + 100 * (coef.vl[1,1] + c(0,qnorm(.025),+qnorm(.975)) * coef.vl[1,2])) + pstr = print.p(coef.vl[1,4]) # NOTE: removed strfun = function(fmt,...){ do.call(sprintf,c(list(fmt),...)) } if (!is.na(nums[5])) { - tabstr = strfun('%.0f & %.0f & ( %.0f , %.0f ) & %.0f & ( %.0f , %.0f ) & %s ',nums,pstr) + tabstr = strfun('%.0f & %.0f & ( %.0f , %.0f ) & %.0f & ( %.0f , %.0f ) ',nums) } else if (nums[1]==0 | !var %in% vars.all){ - tabstr = strfun('%.0f & %.0f & ( %.0f , %.0f ) & & & ',nums[1:4]) + tabstr = strfun('%.0f & %.0f & ( %.0f , %.0f ) & & ',nums[1:4]) } else { - tabstr = strfun('%.0f & %.0f & ( %.0f , %.0f ) & \\textsc{ref} & & ',nums[1:4]) + tabstr = strfun('%.0f & %.0f & ( %.0f , %.0f ) & \\textsc{ref} & ',nums[1:4]) } save.tex(tabstr,namefun(var,level,'xtab'),dir=dir) } } } + names(model) = c('IR','CIA') + g = plot.effects(model,group='Outcome') + save.plot(g,'effects',dir='api',w=6,h=10) } save.distr = function(x,name,dir='',d=2,funs=NULL,lt=NULL){ fun.list = list( diff --git a/code/analysis/plot.r b/code/analysis/plot.r index e88b5ba..9f558d3 100644 --- a/code/analysis/plot.r +++ b/code/analysis/plot.r @@ -25,7 +25,7 @@ plot.api = function(XAi,which='chi',drop=FALSE,...){ size.lims = 4*c(ifelse(length(s),min(s),1),ifelse(length(s),max(s),1)) clr.args = list(option='inferno',begin=.1,end=.75) if (!is.numeric(clr)){ clr.args = c(clr.args,list(discrete=TRUE,drop=drop)) } - g = XAi %>% rename(args) %>% + g = XAi %>% rename.lvls(args) %>% ggplot(aes_string(x='t',y='value',...,size=3)) + geom_hline(yintercept=0,color='gray') + geom_point(alpha=.6,position='jitter') + @@ -73,7 +73,7 @@ plot.distr = function(X,var){ args = list(alpha=.4) } clr.args = list(option='inferno',discrete=TRUE,begin=.1,end=.85,na.value='gray') - g = X %>% rename(var) %>% + g = X %>% rename.lvls(var) %>% ggplot(aes_string(x=var,color=var,fill=var)) + do.call(geom,args) + do.call(scale_color_viridis,clr.args) + @@ -84,6 +84,49 @@ plot.distr = function(X,var){ return(g) } +gen.effects = function(model,name=NULL,group=''){ + coef = summary(model)$coef + coef$low = coef$Estimate - 1.96 * coef$Std.err + coef$high = coef$Estimate + 1.96 * coef$Std.err + dc = dummy.coef(model) + E = do.call(rbind,lapply(names(dc),function(var){ + lvls = names(dc[[var]]) + varlvls = paste0(var,lvls) + if (var=='(Intercept)'){ varlvls = var; i = TRUE; } else { i = FALSE; } + E.var = coef[varlvls,c('Estimate','low','high')] + lvl.ref = paste0('\n REF = ',rename.lvl(lvls[is.na(E.var$Estimate)],var)) + E.var$var = paste0(rename.lvl(var,'vars'),ifelse(i,'',lvl.ref)) + E.var$lvl = rename.lvl(lvls,var) + return(E.var) + })) + E[[group]] = name + E = E[!is.na(E$Estimate),] + return(E) +} + +plot.effects = function(model,group='.'){ + if ('coefficients' %in% names(model)){ + E = gen.effects(model,name='',group=group) + } else { + E = do.call(rbind,lapply(names(model),function(name){ + gen.effects(model[[name]],name=name,group=group) })) + } + pos = position_dodge(width=.6) + g = ggplot(E,aes_string(x='Estimate',y='lvl',xmin='low',xmax='high',color=group)) + + geom_point(position=pos) + + geom_linerange(position=pos) + + scale_color_viridis(option='inferno',discrete=TRUE,begin=.3,end=.7) + + facet_grid(rows=vars(E$var),scales='free_y',space='free_y',switch='y') + + labs(x='Effect',y='Factor') + + theme_light() + + theme(legend.position='top', + panel.spacing = unit(.4,'lines'), + strip.placement='outside', + strip.background=element_blank(), + strip.text.y.left=element_text(color='grey30',angle=0,vjust=1,hjust=0)) + return(g) +} + detex = function(name){ subs = list('~'=' ','\\$'='','\\\\'='','\\_'='') for (s in names(subs)){ @@ -96,7 +139,16 @@ decat = function(name){ return(gsub('\\.cat','',name)) } -rename = function(X,...){ +rename.lvl = function(x,name){ + rmap = R[[name]] + if (!is.null(rmap)){ + return(sapply(x,function(xi){ names(rmap)[xi==rmap] })) + } else { + return(x) + } +} + +rename.lvls = function(X,...){ names = c(...) for (name in names){ if (!is.null(R[[name]])){