1 Linear hypothesis test challenge

1.1 Setup

library(tidyverse)

pvc <- read_csv("r-linear-files/pvc.csv")
pvc$operator <- factor(pvc$operator)
pvc$resin <- factor(pvc$resin)

pvcfit1 <- lm(psize ~ operator + resin, data=pvc)

1.2 Solution

library(multcomp)

K2 <- rbind(                   # R2 R3 R4 R5 R6 R7 R8
    R8_vs_R4      = c(0,  0, 0,   0, 0,-1, 0, 0, 0, 1),
    R2_vs_R1      = c(0,  0, 0,   1, 0, 0, 0, 0, 0, 0))

result2 <- glht(pvcfit1, K2)

summary(result2)
## 
##   Simultaneous Tests for General Linear Hypotheses
## 
## Fit: lm(formula = psize ~ operator + resin, data = pvc)
## 
## Linear Hypotheses:
##               Estimate Std. Error t value Pr(>|t|)    
## R8_vs_R4 == 0    6.000      0.661   9.077   <1e-10 ***
## R2_vs_R1 == 0   -1.033      0.661  -1.563    0.235    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## (Adjusted p values reported -- single-step method)
confint(result2)
## 
##   Simultaneous Confidence Intervals
## 
## Fit: lm(formula = psize ~ operator + resin, data = pvc)
## 
## Quantile = 2.3255
## 95% family-wise confidence level
##  
## 
## Linear Hypotheses:
##               Estimate lwr     upr    
## R8_vs_R4 == 0  6.0000   4.4628  7.5372
## R2_vs_R1 == 0 -1.0333  -2.5706  0.5039
par(mar=c(2,10,2,2))
plot(result2)

2 Gene expression challenge

2.1 Setup

library(tidyverse)

teeth <- read_csv("r-linear-files/teeth.csv")

teeth$tooth <- factor(teeth$tooth, levels=c("lower","upper"))
teeth$mouse <- factor(teeth$mouse)

# A convenience to examine different model fits
more_data <- expand.grid(
    day=seq(14.3,18.2,by=0.01),
    tooth=factor(c("lower","upper"), levels=c("lower","upper")))

look <- function(y, fit=NULL) {
    p <- ggplot(teeth,aes(x=day,group=tooth))
    if (!is.null(fit)) {
        more_ci <- cbind(
            more_data, 
            predict(fit, more_data, interval="confidence"))
        p <- p + 
            geom_ribbon(aes(ymin=lwr,ymax=upr), data=more_ci, alpha=0.1) + 
            geom_line(aes(y=fit,color=tooth), data=more_ci)
    }
    p + geom_point(aes(y=y,color=tooth)) +
        labs(y=deparse(substitute(y)))
}

2.2 Possible solution

library(splines)

look(log2(teeth$gene_wnt2))

wnt2_additive <- lm(log2(gene_wnt2) ~ tooth + day, data=teeth)
look(log2(teeth$gene_wnt2), wnt2_additive)

wnt2_interaction <- lm(log2(gene_wnt2) ~ tooth * day, data=teeth)
look(log2(teeth$gene_wnt2), wnt2_interaction)

# We can reject wnt2_additive using wnt2_interaction
anova(wnt2_additive, wnt2_interaction)
## Analysis of Variance Table
## 
## Model 1: log2(gene_wnt2) ~ tooth + day
## Model 2: log2(gene_wnt2) ~ tooth * day
##   Res.Df    RSS Df Sum of Sq      F   Pr(>F)   
## 1     13 5.5087                                
## 2     12 2.9862  1    2.5225 10.137 0.007865 **
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
wnt2_poly <- lm(log2(gene_wnt2) ~ tooth * poly(day,2), data=teeth)
look(log2(teeth$gene_wnt2), wnt2_poly)

# We can reject wnt2_interaction using wnt2_poly
anova(wnt2_interaction, wnt2_poly)
## Analysis of Variance Table
## 
## Model 1: log2(gene_wnt2) ~ tooth * day
## Model 2: log2(gene_wnt2) ~ tooth * poly(day, 2)
##   Res.Df    RSS Df Sum of Sq      F  Pr(>F)  
## 1     12 2.9862                              
## 2     10 1.2687  2    1.7175 6.7686 0.01384 *
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
wnt2_ns <- lm(log2(gene_wnt2) ~ tooth * ns(day,2), data=teeth)
look(log2(teeth$gene_wnt2), wnt2_ns)

# wnt2_poly and wnt2_ns don't nest.
# Not valid to compare using anova(), but can use AIC().

AIC(wnt2_additive, wnt2_interaction, wnt2_poly, wnt2_ns)
##                  df      AIC
## wnt2_additive     4 36.34595
## wnt2_interaction  5 28.54865
## wnt2_poly         7 18.85275
## wnt2_ns           7 17.81323
# Further models possible...

Home