前提:残差正态性、残差满足方差齐性、协变量与响应变量关系为线性且其在分类变量每个组的斜率相等
方法1:
data(mtcars)
library(dplyr)
library(ggpubr)
mtcars$cyl <- factor(mtcars$cyl,levels = c("4", "6", "8"))
mtcars %>%
group_by(cyl) %>%
get_summary_stats(mpg, type = "mean_sd")
#可视化
#分类变量"cyl"与响应变量"mpg"
ggboxplot(
mtcars, x = "cyl", y = "mpg")
#线性
#连续变量“wt”与响应变量"mpg"
ggplot(data = mtcars,aes(x=wt,y=mpg,color=cyl))+
geom_point()+
stat_smooth(method = "lm")+
theme_classic()
#组间斜率相等
#构建模型
mod2 <- lm(mpg~cyl+wt,data = mtcars)
mod3 <- lm(mpg~cyl*wt,data = mtcars)
anova(mod2,mod3)#交互作用不显著
#残差正态性
shapiro.test(resid(mod2))
#残差方差齐性
leveneTest(.resid~cyl,data = augment(mod2))#augment将模型结果整理成data.frame
Anova(mod2)

summary(mod2)

因交互作用不显著(即cyl
不同水平下的斜率相等),所以只关注截距即可,整理后应该如下图所示,wt
直接取均值带入即可获得调整后均值,用于后续写结果时进行表述(如:组A均值为···,显著高于或者低于组B和组C):
如果觉得手动提取太麻烦,可直接用emmeans
包提取emmeans(mod2,pairwise~cyl|wt)

#可视化
library(RColorBrewer)
ggplot()+
geom_point(data = mtcars,aes(x=wt,y=mpg,color=cyl))+
scale_color_manual(values = colorRampPalette(brewer.pal(3,"Set1"))(3))+
geom_abline(intercept=c(33.99,33.99-4.2556,33.99-6.0709),
slope=-3.2,color=colorRampPalette(brewer.pal(3,"Set1"))(3))+
theme_bw()+theme(text = element_text(face = "bold",size = 16))

方法2(基于rstatix
包):
library(ggpubr)
library(rstatix)
#前提条件
#线性
ggscatter(
mtcars, x = "wt", y = "mpg",
color = "cyl", add = "reg.line")+
stat_regline_equation(aes(label = paste(..eq.label..,
..rr.label..,
sep = "~~~~"), color = cyl))
#组间斜率相等
mtcars %>% anova_test(mpg~cyl*wt)
#残差正态性
mod2 <- lm(mpg~cyl+wt,data = mtcars)
shapiro_test(mod2$residuals)
#方差齐性
augment(mod2) %>% levene_test(.resid~cyl)
#检验
mtcars %>% anova_test(mpg~cyl+wt) %>% get_anova_table()
#多重比较
(mc <- mtcars %>%
emmeans_test(
mpg ~ cyl, covariate = wt,
p.adjust.method = "bonferroni"
))
#调整后均值
get_emmeans(mc)
结果描述:在控制协变量wt
后,通过ANCOVA分析得出cyl
对mpg
有显著影响,F(2,28) = 7.286, p = 0.003。事后多重比较并使用bonferroni
对p值进行校正,得出调整后均值(23.7±1.04)在cyl=4时,显著高于cyl等于6和8时的调整后均值,p<0.05。
参考资料:
[1].https://www./en/lessons/ancova-in-r/
[2].用R语言做协方差分析