# GLM1/ MASS: Venables & Ripley, Modern Applied Statistics with S-PLUS, Springer # 1999, s. 218 (1997,1994) library(MASS) # Aineisto: Collett, Modellin Binary Data, Chapman & Hall, 1991 & 1999, s. 75 #################### # Binomiaalinen data #################### ldose <- rep(0:5, 2) numdead <- c(1, 4, 9, 13, 18, 20, 0, 2, 6, 10, 12, 16) number <-rep(20,12) # painomuuttuja y <- numdead/number # vastemuuttuja budworm.lg <- glm(y ~ ldose,weight=number, family=binomial) budworm0.lg <- glm(y ~ 1,weight=number, family=binomial) budworm.lg summary(budworm.lg) anova(budworm.lg,test="Chisq") # Analysis of Deviance Table names(budworm.lg) # AIC= -2*L_M + k*npar, L_M = (-AIC + k*npar)/2 # AIC= -2*L_M + 2*npar # dbinom(x, size, prob, log = FALSE) dbinom(numdead, 20, y) sum(log(dbinom(numdead, 20, y))) # L_f = -15.0552, täyden mallin uf:n maksimi budworm.lg$coefficients # -2.766087 1.006807 lk<- -2.766087+1.006807*ldose e<-exp(lk) pihat<- e/(1+e) sum(log(dbinom(numdead, 20, pihat))) # -23.54722 = L_M # D(M) = 2*(-15.0552)+2*23.54722 = , devianssi # AIC = -2*(-23.54722)+2*2 = 51.09444 budworm.lg$aic # AIC = 51.09443; AIC= -2*L_M + 2*2 # L_M = (-AIC + 2*2)/2 = -23.54722 # L_f = (D(M)+2*L_M)/2 = -15.0552 anova(budworm.lg,test="Chisq") # Df Deviance Resid. Df Resid. Dev P(>|Chi|) # NULL 11 124.876 # ldose 1 107.892 10 16.984 2.839e-25 # -2*log(l_0/l_m) = 2*log(l_M/l_0) = 2*L_M-2*L_0 ja toisaalta # D(M_0)-D(M) = 2*(L_f-L_0) - 2*(L_f-L_M) = 2*L_M-2*L_0 # Merk. log(l_M/l_0) = R_n; # 2*R_n ~ Chisq(df_0-df_M), asymptoottisesti pchisq(107.892, 1,lower.tail = FALSE) # 2.838307e-25 ################# # Graafinen esitys ################# # plot(2^ldose, numdead/20) plot(c(1,32), c(0,1), type="n", xlab="dose", ylab="prob", log="x") text(2^ldose, numdead/20, labels=as.character("x")) # text(2^ldose, numdead/20, labels=as.character("x")) #f<-budworm.lg$fitted.values #plot(2^ldose, f) ld <- seq(0, 5, 0.1) lines(2^ld, predict(budworm.lg, data.frame(ldose=ld), type="response", col=3)) ################ # Otetaan selittäjäksi pelkästään sukupuoli sex <- factor(rep(c("M", "F"), c(6, 6))) # sex <- rep(c(1,0),c(6,6)) budworm.lg <- glm(y ~ sex,weight=number, family=binomial) budworm.lg anova(budworm.lg,test="Chisq") exp(-0.4754199)/(1+exp(-0.4754199)) # 0.3833342, naaraat e<-exp(-0.4754199+0.6424740) e/(1+e) # 0.5416667, urokset; eroavatko merkitsevästi # 0.5416667-0.3833342 = 0.1583325 sqrt(0.3833342*(1-0.3833342)/120 + 0.5416667*(1-0.5416667)/120) # 0.06355136 # 0.1583325/0.06355136 = 2.49141 "tavanomainen testisuure" ################ # Otetaan selittäjäksi sukupuoli ja ldose sex <- factor(rep(c("M", "F"), c(6, 6))) # sexn <- rep(c(1,0),c(6,6)) # ldose =log_2(dose), koska annos menee kahden potensseissa budworm.lg <- glm(y ~ sex + ldose,weight=number, family=binomial) # budworm2.lg <- glm(y ~ sexn,weight=number, family=binomial) # sex:in numeerinen koodaus budworm.lg <- glm(SF ~ sex + ldose,weight=number, family=binomial) budworm.lg; summary(budworm.lg) anova(budworm.lg,test="Chisq") # Kummallekin sukupuolelle oma käyrä ############ plot(c(1,32), c(0,1), type="n", xlab="dose", ylab="prob", log="x") text(2^ldose, numdead/20, labels=as.character(sex)) ld <- seq(0, 5, 0.1) lines(2^ld, predict(budworm.lg, data.frame(ldose=ld, sex=factor(rep("M", length(ld)), levels=levels(sex))), type="response"), col=3) lines(2^ld, predict(budworm.lg, data.frame(ldose=ld, sex=factor(rep("F", length(ld)), levels=levels(sex))), type="response"), lty=2, col=2) ########### # 2. tapa esittää vastemuuttuja SF <- cbind(numdead, numalive=20-numdead) budworm.lg <- glm(SF ~ sex*ldose,weight=number, family=binomial) ########### ########### # Sukupuolen ja annoksen yhdysvaikutus mukaan budworm.lg <- glm(SF ~ sex*ldose, family=binomial)