🎓 Tidy tools for academics
Install the development version from Github with:
## install devtools if not already
if (!requireNamespace("devtools", quietly = TRUE)) {
install.packages("devtools")
}
## install tidyversity from Github
devtools::install_github("mkearney/tidyversity")
Load the package (it, of course, plays nicely with tidyverse).
## load tidyverse
library(tidyverse)
#> ── Attaching packages ─────────────────────────────────────────────────── tidyverse 1.2.1 ──
#> ✔ ggplot2 2.2.1 ✔ purrr 0.2.4
#> ✔ tibble 1.4.2 ✔ dplyr 0.7.4
#> ✔ tidyr 0.8.0 ✔ stringr 1.3.0
#> ✔ readr 1.1.1 ✔ forcats 0.3.0
#> ── Conflicts ────────────────────────────────────────────────────── tidyverse_conflicts() ──
#> ✖ dplyr::filter() masks stats::filter()
#> ✖ dplyr::lag() masks stats::lag()
## load tidyversity
library(tidyversity)
Conduct an Ordinary Least Squares (OLS) regression analysis.
polcom %>%
tidy_regression(follow_trump ~ news_1 + ambiv_sexism_1) %>%
tidy_summary()
#> # A tidy model
#> Model formula : follow_trump ~ news_1 + ambiv_sexism_1
#> Model type : Ordinary Least Squares (OLS) regression
#> Model pkg::fun : stats::lm()
#> Model data : 243 (observations) X 3 (variables)
#> $fit
#> fit_stat n df estimate p.value stars
#> F 243 2 3.831 0.023 *
#> R^2 243 - 0.031 -
#> Adj R^2 243 - 0.023 -
#> RMSE 243 - 0.409 -
#> AIC 243 - 260.148 -
#> BIC 243 - 274.121 -
#>
#> $coef
#> term est s.e. est.se p.value stars std.est
#> (Intercept) 0.745 0.097 7.692 <.001 *** <.001
#> news_1 0.022 0.012 1.811 0.071 + 0.048
#> ambiv_sexism_1 -0.038 0.021 -1.870 0.063 + -0.050
Conduct a logistic regression analysis for binary (dichotomous) outcomes.
polcom %>%
tidy_regression(follow_trump ~ news_1 + ambiv_sexism_1, type = "logistic") %>%
tidy_summary()
#> # A tidy model
#> Model formula : follow_trump ~ news_1 + ambiv_sexism_1
#> Model type : Logistic regression
#> Model pkg::fun : stats::glm()
#> Model data : 243 (observations) X 3 (variables)
#> $fit
#> fit_stat n df estimate p.value stars
#> χ2 243 240 247.442 0.357
#> Δχ2 243 2 7.466 0.024 *
#> Nagelkerke R^2 243 - 0.030 -
#> McFadden R^2 243 - 0.029 -
#> RMSE 243 - 2.540 -
#> AIC 243 - 253.442 -
#> BIC 243 - 263.921 -
#>
#> $coef
#> term est s.e. est.se p.value stars std.est
#> (Intercept) 1.133 0.553 2.049 0.040 * <.001
#> news_1 0.127 0.070 1.808 0.071 + 0.195
#> ambiv_sexism_1 -0.229 0.122 -1.872 0.061 + -0.201
Conduct a poisson regression analysis for count data.
polcom %>%
mutate(polarize = abs(therm_1 - therm_2)) %>%
tidy_regression(polarize ~ news_1 + ambiv_sexism_1, type = "poisson") %>%
tidy_summary()
#> # A tidy model
#> Model formula : polarize ~ news_1 + ambiv_sexism_1
#> Model type : Poisson regression
#> Model pkg::fun : stats::glm()
#> Model data : 242 (observations) X 3 (variables)
#> $fit
#> fit_stat n df estimate p.value stars
#> χ2 242 239 6549.419 <.001 ***
#> Δχ2 242 2 399.077 <.001 ***
#> Nagelkerke R^2 242 - 0.808 -
#> McFadden R^2 242 - 0.057 -
#> RMSE 242 - 0.760 -
#> AIC 242 - 7725.222 -
#> BIC 242 - 7735.689 -
#>
#> $coef
#> term est s.e. est.se p.value stars std.est
#> (Intercept) 3.798 0.038 99.448 <.001 *** <.001
#> news_1 0.045 0.005 9.358 <.001 *** 0.881
#> ambiv_sexism_1 -0.126 0.008 -15.852 <.001 *** -2.230
Conduct a negative binomial regression analysis for overdispersed count data.
polcom %>%
mutate(polarize = abs(therm_1 - therm_2)) %>%
tidy_regression(polarize ~ news_1 + ambiv_sexism_1, type = "negbinom") %>%
tidy_summary()
#> # A tidy model
#> Model formula : polarize ~ news_1 + ambiv_sexism_1
#> Model type : Negative binomial regression
#> Model pkg::fun : MASS::glm.nb()
#> Model data : 242 (observations) X 3 (variables)
#> $fit
#> fit_stat n df estimate p.value stars
#> χ2 242 239 293.328 0.009 **
#> Δχ2 242 2 8.440 0.015 *
#> Nagelkerke R^2 242 - 0.034 -
#> McFadden R^2 242 - 0.028 -
#> RMSE 242 - 0.761 -
#> AIC 242 - 2312.391 -
#> BIC 242 - 2326.347 -
#>
#> $coef
#> term est s.e. est.se p.value stars std.est
#> (Intercept) 3.741 0.258 14.510 <.001 *** 3.752
#> news_1 0.053 0.032 1.632 0.103 0.113
#> ambiv_sexism_1 -0.123 0.054 -2.273 0.023 * -0.158
polcom %>%
mutate(polarize = abs(therm_1 - therm_2)) %>%
tidy_regression(polarize ~ news_1 + ambiv_sexism_1,
type = "quasipoisson", robust = TRUE) %>%
tidy_summary()
#> # A tidy model
#> Model formula : polarize ~ news_1 + ambiv_sexism_1
#> Model type : [Robust] Poisson regression
#> Model pkg::fun : robust::glmRob()
#> Model data : 242 (observations) X 3 (variables)
#> $fit
#> fit_stat n df estimate p.value stars
#> χ2 242 239 6989.543 <.001 ***
#> Δχ2 242 2 58782.937 <.001 ***
#> Nagelkerke R^2 242 - 1.000 -
#> McFadden R^2 242 - 0.894 -
#> RMSE 242 - 31.865 -
#> AIC 242 - 2245.147 -
#> BIC 242 - 2259.103 -
#>
#> $coef
#> term est s.e. est.se p.value stars std.est
#> (Intercept) 3.705 0.071 51.968 <.001 *** <.001
#> news_1 0.079 0.010 8.325 <.001 *** 1.279
#> ambiv_sexism_1 -0.241 0.022 -11.179 <.001 *** -2.086
Conduct an analysis of variance (ANOVA).
polcom %>%
mutate(sex = ifelse(sex == 1, "Male", "Female"),
vote_choice = case_when(
vote_2016_choice == 1 ~ "Clinton",
vote_2016_choice == 2 ~ "Trump",
TRUE ~ "Other")) %>%
tidy_anova(pp_party ~ sex * vote_choice) %>%
tidy_summary()
#> # A tidy model
#> Model formula : pp_party ~ sex * vote_choice
#> Model type : Analysis of variance (ANOVA)
#> Model pkg::fun : stats::aov()
#> Model data : 243 (observations) X 3 (variables)
#> $fit
#> fit_stat n df estimate p.value stars
#> F 243 5 53.327 <.001 ***
#> R^2 243 - 0.529 -
#> Adj R^2 243 - 0.519 -
#> RMSE 243 - 1.238 -
#> AIC 243 - 801.115 -
#> BIC 243 - 825.567 -
#>
#> $coef
#> term est s.e. est.se statistic p.value stars std.est
#> sex 1.000 19.238 19.238 12.561 <.001 *** 2.000
#> vote_choice 2.000 388.606 194.303 126.867 <.001 *** 2.000
#> sex:vote_choice 2.000 0.519 0.259 0.169 0.844 2.000
#> Residuals 237.000 362.978 1.532 - - 237.000
polcom %>%
tidy_ttest(pp_ideology ~ follow_trump) %>%
tidy_summary()
#> # A tidy model
#> Model formula : pp_ideology ~ follow_trump
#> Model type : T-test
#> Model pkg::fun : stats::t.test()
#> Model data : 244 (observations)
#> $fit
#> group df mean diff lo.95 hi.05
#> FALSE 76.911 4.185 0.922 0.308 1.536
#> TRUE 76.911 3.263 -0.922 -0.308 -1.536
#>
#> $coef
#> est t p.value stars
#> 0.922 2.992 0.004 **
Conduct latent variable analysis using structural equation modeling.
## mutate data and then specify and estimate model
sem1 <- polcom %>%
mutate(therm_2 = therm_2 / 10,
therm_1 = 10 - therm_1 / 10) %>%
tidy_sem_model(news =~ news_1 + news_2 + news_3 + news_4 + news_5 + news_6,
ambiv_sexism =~ ambiv_sexism_1 + ambiv_sexism_2 + ambiv_sexism_3 +
ambiv_sexism_4 + ambiv_sexism_5 + ambiv_sexism_6,
partisan =~ a*therm_1 + a*therm_2,
ambiv_sexism ~ age + sex + hhinc + edu + news + partisan) %>%
tidy_sem()
## print model summary
sem1 %>%
tidy_summary()
#> # A tidy model
#> Model formula : news =~ news_1 + news_2 + news_3 + news_4 + news_5 + news_6
#> ambiv_sexism =~ ambiv_sexism_1 + ambiv_sexism_2 + ambiv_sexism_3 + ambiv_sexism_4 +
#> ambiv_sexism_5 + ambiv_sexism_6
#> partisan =~ a * therm_1 + a * therm_2
#> ambiv_sexism ~ age + sex + hhinc + edu + news + partisan
#> Model type : Structural Equation Model (SEM)
#> Model pkg::fun : lavaan::sem()
#> Model data : 235 (observations) X 18 (variables)
#> $fit
#> fit_stat n df estimate p.value stars
#> chisq 235 127 239.579 <.001 ***
#> aic 235 - 0.907 -
#> bic 235 - 0.892 -
#> cfi 235 - 16138.684 -
#> tli 235 - 16256.310 -
#> rmsea 235 - 0.061 -
#> srmr 235 - 0.073 -
#> R^2:ambiv_sexism 235 - 0.379 -
#>
#> $coef
#> term est se est.se p.value stars std.est
#> news =~ news_1 1.000 <.001 - - 0.173
#> news =~ news_2 1.592 0.722 2.204 0.028 * 0.340
#> news =~ news_3 5.069 2.095 2.419 0.016 * 0.781
#> news =~ news_4 5.587 2.312 2.417 0.016 * 0.851
#> news =~ news_5 3.493 1.485 2.353 0.019 * 0.520
#> news =~ news_6 1.255 0.683 1.838 0.066 + 0.196
#> ambiv_sexism =~ ambiv_sexism_1 1.000 <.001 - - 0.825
#> ambiv_sexism =~ ambiv_sexism_2 0.942 0.067 14.043 <.001 *** 0.801
#> ambiv_sexism =~ ambiv_sexism_3 0.795 0.067 11.844 <.001 *** 0.706
#> ambiv_sexism =~ ambiv_sexism_4 0.743 0.064 11.647 <.001 *** 0.697
#> ambiv_sexism =~ ambiv_sexism_5 0.902 0.062 14.644 <.001 *** 0.825
#> ambiv_sexism =~ ambiv_sexism_6 0.904 0.064 14.185 <.001 *** 0.807
#> partisan =~ therm_1 1.000 <.001 - - 0.577
#> partisan =~ therm_2 1.000 <.001 - - 0.592
#> ambiv_sexism ~ age -0.004 0.005 -0.824 0.410 -0.051
#> ambiv_sexism ~ sex -0.271 0.130 -2.089 0.037 * -0.130
#> ambiv_sexism ~ hhinc -0.021 0.023 -0.878 0.380 -0.057
#> ambiv_sexism ~ edu -0.088 0.069 -1.279 0.201 -0.083
#> ambiv_sexism ~ news 0.130 0.215 0.607 0.544 0.047
#> ambiv_sexism ~ partisan 0.347 0.069 5.032 <.001 *** 0.592
Estimate multilevel (mixed effects) models.
lme4::sleepstudy %>%
tidy_mlm(Reaction ~ Days + (Days | Subject)) %>%
summary()
#> Linear mixed model fit by REML ['lmerMod']
#> Formula: Reaction ~ Days + (Days | Subject)
#> Data: .data
#>
#> REML criterion at convergence: 1743.6
#>
#> Scaled residuals:
#> Min 1Q Median 3Q Max
#> -3.954 -0.463 0.023 0.463 5.179
#>
#> Random effects:
#> Groups Name Variance Std.Dev. Corr
#> Subject (Intercept) 612.1 24.74
#> Days 35.1 5.92 0.07
#> Residual 654.9 25.59
#> Number of obs: 180, groups: Subject, 18
#>
#> Fixed effects:
#> Estimate Std. Error t value
#> (Intercept) 251.41 6.82 36.84
#> Days 10.47 1.55 6.77
#>
#> Correlation of Fixed Effects:
#> (Intr)
#> Days -0.138
Comes with one data set.
Consists of survey responses to demographic, background, and likert-type attitudinal items about political communication.
print(tibble::as_tibble(polcom), n = 5)
#> # A tibble: 244 x 63
#> follow_trump news_1 news_2 news_3 news_4 news_5 news_6 ambiv_sexism_1 ambiv_sexism_2
#> * <lgl> <int> <int> <int> <int> <int> <int> <int> <int>
#> 1 TRUE 8 1 1 1 1 6 3 3
#> 2 TRUE 1 1 1 1 1 1 5 5
#> 3 TRUE 8 1 1 1 8 1 5 4
#> 4 TRUE 8 1 1 1 1 6 2 2
#> 5 TRUE 6 1 2 1 1 3 4 4
#> # ... with 239 more rows, and 54 more variables: ambiv_sexism_3 <int>, ambiv_sexism_4 <int>,
#> # ambiv_sexism_5 <int>, ambiv_sexism_6 <int>, img1_hrc_1 <int>, img1_hrc_2 <dbl>,
#> # img1_hrc_3 <int>, img1_hrc_4 <dbl>, img1_hrc_5 <int>, img1_hrc_6 <int>, img1_hrc_7 <int>,
#> # img1_hrc_8 <int>, img1_hrc_9 <int>, img2_hrc_10 <int>, img2_hrc_11 <int>, img2_hrc_12 <dbl>,
#> # img2_hrc_13 <int>, img2_hrc_14 <int>, img2_hrc_15 <dbl>, img1_djt_1 <int>, img1_djt_2 <dbl>,
#> # img1_djt_3 <int>, img1_djt_4 <dbl>, img1_djt_5 <int>, img1_djt_6 <int>, img1_djt_7 <int>,
#> # img1_djt_8 <int>, img1_djt_9 <int>, img2_djt_10 <int>, img2_djt_11 <int>, img2_djt_12 <dbl>,
#> # img2_djt_13 <int>, img2_djt_14 <int>, img2_djt_15 <dbl>, pie_1 <int>, pie_2 <int>, pie_3 <int>,
#> # pie_4 <int>, vote_2016 <int>, vote_2016_choice <int>, pp_ideology <int>, pp_party <int>,
#> # pp_party_lean <int>, therm_1 <int>, therm_2 <int>, therm_3 <int>, therm_4 <int>, therm_5 <int>,
#> # age <int>, sex <int>, gender <int>, race <int>, edu <int>, hhinc <int>
Return summary statistics in the form of a data frame (not yet added).
## summary stats for social media use (numeric) variables
summarize_numeric(polcom_survey, smuse1:smuse3)
## summary stats for respondent sex and race (categorical) variables
summarize_categorical(polcom_survey, sex, race)
Estimate Cronbach’s alpha for a set of variables.
## reliability of social media use items
cronbachs_alpha(polcom, ambiv_sexism_1:ambiv_sexism_6)
#> items alpha alpha.std
#> 1 ambiv_sexism_1:ambiv_sexism_6 0.904609 0.904600
#> 2 -ambiv_sexism_1 0.882322 0.882225
#> 3 -ambiv_sexism_2 0.884272 0.884121
#> 4 -ambiv_sexism_3 0.896061 0.896218
#> 5 -ambiv_sexism_4 0.897127 0.897411
#> 6 -ambiv_sexism_5 0.883554 0.883420
#> 7 -ambiv_sexism_6 0.881595 0.881855