Skip to content

Commit

Permalink
code: clean up glm, add effects plot, remove p-values, part of #1 & #2
Browse files Browse the repository at this point in the history
  • Loading branch information
jessexknight committed Nov 10, 2021
1 parent 22a81a9 commit 8d8c1c4
Show file tree
Hide file tree
Showing 4 changed files with 102 additions and 15 deletions.
4 changes: 3 additions & 1 deletion code/analysis/data.r
Original file line number Diff line number Diff line change
Expand Up @@ -276,6 +276,7 @@ clean.api.common = function(X){
X$api.inc = X$api.inc * 1000
X$api.prev.cat = cut(X$api.prev,c( 0, 1,10,100),labels=c('Low','Mid','High'))
X$api.inc.cat = cut(X$api.inc, c(00,10,20,100),labels=c('Low','Mid','High'))
X$api.prev.cat = relevel(X$api.prev.cat,ref=3)
X = make.api.phase(X)
return(X)
}
Expand Down Expand Up @@ -311,7 +312,8 @@ make.api.data = function(XA,which='chi'){
names_pattern=name.pat,
values_drop_na=TRUE) %>%
mutate(t=as.numeric(name)+api.dt) %>%
mutate(t.cat=cut(t,c(tcut,100),as.character(tcut),right=FALSE))
mutate(t.cat=cut(t,c(tcut,100),as.character(tcut),right=FALSE)) %>%
mutate(art.cd4=relevel(factor(art.cd4),ref='symp'))
return(XA)
}

Expand Down
30 changes: 30 additions & 0 deletions code/analysis/meta.r
Original file line number Diff line number Diff line change
Expand Up @@ -152,6 +152,32 @@ D = list(# definitions
act.HR.pr = 'Ratio of highest female to highest male activity group sizes'
)
R = list(# Rename stuff
vars = list(
'(Intercept)'='(Intercept)',
'Time Horizon (years)' = 't.cat',
'HIV Prevalence' = 'api.prev.cat',
'HIV Incidence Trend' = 'api.phase',
'Partnership Types' = 'pt.def',
'CD4 Threshold for ART' = 'art.cd4',
'RR Trans. on ART' = 'art.rbeta.cat',
'Activity Mixing' = 'act.mix',
'Age Mixing' = 'age.mix',
'Acivity & KP' = 'act.kp',
'Acute HIV' = 'hiv.x.acute',
'Late Stage HIV' = 'hiv.x.late',
'HIV Morbidity' = 'hiv.morb.any',
'Trans. Drug Resist.' = 'art.tdr',
'Any ART Failure' = 'art.fail.any',
'Any ART Dropout' = 'art.drop.any',
'HTC Behav. Change' = 'bc.any',
'Sex Stratification' = 'act.def.sex',
'Activity Turnover' = 'act.turn.any',
'Risk Definition' = 'Risk'),
t.cat = list(
'0-10'='0',
'11-20'='10',
'21-30'='20',
'31+'='30'),
api.prev.cat = list(
'Low (<1%)'='Low',
'Middle (1-10%)'='Mid',
Expand All @@ -173,6 +199,10 @@ R = list(# Rename stuff
'500'='500',
'Any'='All',
'Symptomatic'='symp'),
art.rbeta.cat = list(
'0.0-0.039'='0',
'0.04-0.099'='0.04',
'0.1+'='0.1'),
act.mix = list(
'Proportionate'='prop',
'Assortative'='asso'),
Expand Down
25 changes: 14 additions & 11 deletions code/analysis/numeric.r
Original file line number Diff line number Diff line change
Expand Up @@ -188,7 +188,7 @@ numeric.api = function(XA){
rm.vars = c('art.init.cat','act.kp','diff.any.kp.cat','art.fail.any','art.drop.any',
'api.phase','art.cov.cat') # TODO: impute NA via MICE ?
vars.all = setdiff(M$api$table,rm.vars)
m.fun = function(vars){
m.fun = function(XA.,vars){
for (var in vars){ XA.[[var]] = factor(XA.[[var]]) } # HACK: avoid empty levels
f = formula(paste('value ~',paste(vars,collapse='+')))
geeglm(f,'gaussian',XA.,id=factor(XA.$bib),corstr='i') }
Expand All @@ -201,34 +201,37 @@ numeric.api = function(XA){
}
distr.funs = c('q2','q1','q3')
dir = tex.dir('api')
model = list()
for (which in c('inc','chi')){
dir = tex.dir(file.path('api',which))
XA. = make.api.data(XA,which=which)[c('bib','value',M$api$table)]
model.all = m.fun(vars.all)
coef.all = summary(model.all)$coef
# print(summary(model.all)) # DEBUG
# print(coefplot(model.all,title='',color='black') + theme_light()) # TODO
model[[which]] = m.fun(XA.,vars.all)
coef.all = summary(model[[which]])$coef
# print(summary(model[[which]])) # DEBUG
for (var in M$api$table){
x = XA.[[var]]
for (level in levels(x)){
for (level in levels(factor(x))){
varlevel = paste0(var,level)
coef.vl = coef.all[varlevel,]
nums = c(sum(x==level,na.rm=TRUE),
100 * quantile(XA.[['value']][x==level],c(.5,.25,.75),na.rm=TRUE),
100 * (coef.vl[1,1] + c(0,-1.959964,+1.959964) * coef.vl[1,2]))
pstr = print.p(coef.vl[1,4])
100 * (coef.vl[1,1] + c(0,qnorm(.025),+qnorm(.975)) * coef.vl[1,2]))
pstr = print.p(coef.vl[1,4]) # NOTE: removed
strfun = function(fmt,...){ do.call(sprintf,c(list(fmt),...)) }
if (!is.na(nums[5])) {
tabstr = strfun('%.0f & %.0f & ( %.0f , %.0f ) & %.0f & ( %.0f , %.0f ) & %s ',nums,pstr)
tabstr = strfun('%.0f & %.0f & ( %.0f , %.0f ) & %.0f & ( %.0f , %.0f ) ',nums)
} else if (nums[1]==0 | !var %in% vars.all){
tabstr = strfun('%.0f & %.0f & ( %.0f , %.0f ) & & & ',nums[1:4])
tabstr = strfun('%.0f & %.0f & ( %.0f , %.0f ) & & ',nums[1:4])
} else {
tabstr = strfun('%.0f & %.0f & ( %.0f , %.0f ) & \\textsc{ref} & & ',nums[1:4])
tabstr = strfun('%.0f & %.0f & ( %.0f , %.0f ) & \\textsc{ref} & ',nums[1:4])
}
save.tex(tabstr,namefun(var,level,'xtab'),dir=dir)
}
}
}
names(model) = c('IR','CIA')
g = plot.effects(model,group='Outcome')
save.plot(g,'effects',dir='api',w=6,h=10)
}
save.distr = function(x,name,dir='',d=2,funs=NULL,lt=NULL){
fun.list = list(
Expand Down
58 changes: 55 additions & 3 deletions code/analysis/plot.r
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ plot.api = function(XAi,which='chi',drop=FALSE,...){
size.lims = 4*c(ifelse(length(s),min(s),1),ifelse(length(s),max(s),1))
clr.args = list(option='inferno',begin=.1,end=.75)
if (!is.numeric(clr)){ clr.args = c(clr.args,list(discrete=TRUE,drop=drop)) }
g = XAi %>% rename(args) %>%
g = XAi %>% rename.lvls(args) %>%
ggplot(aes_string(x='t',y='value',...,size=3)) +
geom_hline(yintercept=0,color='gray') +
geom_point(alpha=.6,position='jitter') +
Expand Down Expand Up @@ -73,7 +73,7 @@ plot.distr = function(X,var){
args = list(alpha=.4)
}
clr.args = list(option='inferno',discrete=TRUE,begin=.1,end=.85,na.value='gray')
g = X %>% rename(var) %>%
g = X %>% rename.lvls(var) %>%
ggplot(aes_string(x=var,color=var,fill=var)) +
do.call(geom,args) +
do.call(scale_color_viridis,clr.args) +
Expand All @@ -84,6 +84,49 @@ plot.distr = function(X,var){
return(g)
}

gen.effects = function(model,name=NULL,group=''){
coef = summary(model)$coef
coef$low = coef$Estimate - 1.96 * coef$Std.err
coef$high = coef$Estimate + 1.96 * coef$Std.err
dc = dummy.coef(model)
E = do.call(rbind,lapply(names(dc),function(var){
lvls = names(dc[[var]])
varlvls = paste0(var,lvls)
if (var=='(Intercept)'){ varlvls = var; i = TRUE; } else { i = FALSE; }
E.var = coef[varlvls,c('Estimate','low','high')]
lvl.ref = paste0('\n REF = ',rename.lvl(lvls[is.na(E.var$Estimate)],var))
E.var$var = paste0(rename.lvl(var,'vars'),ifelse(i,'',lvl.ref))
E.var$lvl = rename.lvl(lvls,var)
return(E.var)
}))
E[[group]] = name
E = E[!is.na(E$Estimate),]
return(E)
}

plot.effects = function(model,group='.'){
if ('coefficients' %in% names(model)){
E = gen.effects(model,name='',group=group)
} else {
E = do.call(rbind,lapply(names(model),function(name){
gen.effects(model[[name]],name=name,group=group) }))
}
pos = position_dodge(width=.6)
g = ggplot(E,aes_string(x='Estimate',y='lvl',xmin='low',xmax='high',color=group)) +
geom_point(position=pos) +
geom_linerange(position=pos) +
scale_color_viridis(option='inferno',discrete=TRUE,begin=.3,end=.7) +
facet_grid(rows=vars(E$var),scales='free_y',space='free_y',switch='y') +
labs(x='Effect',y='Factor') +
theme_light() +
theme(legend.position='top',
panel.spacing = unit(.4,'lines'),
strip.placement='outside',
strip.background=element_blank(),
strip.text.y.left=element_text(color='grey30',angle=0,vjust=1,hjust=0))
return(g)
}

detex = function(name){
subs = list('~'=' ','\\$'='','\\\\'='','\\_'='')
for (s in names(subs)){
Expand All @@ -96,7 +139,16 @@ decat = function(name){
return(gsub('\\.cat','',name))
}

rename = function(X,...){
rename.lvl = function(x,name){
rmap = R[[name]]
if (!is.null(rmap)){
return(sapply(x,function(xi){ names(rmap)[xi==rmap] }))
} else {
return(x)
}
}

rename.lvls = function(X,...){
names = c(...)
for (name in names){
if (!is.null(R[[name]])){
Expand Down

0 comments on commit 8d8c1c4

Please sign in to comment.