# 8. harjoitukset # 8.1 kyphosis <- read.table("C:\\Kurssit\\Glim\\Glim02\\Datat\\kyphosis.txt", header = TRUE, sep = ",") attach(kyphosis) kyphosis kyph.glm1 <- glm(Kyphosis ~ Age + Number + Start+.^2, family=binomial,data=kyphosis) summary(kyph.glm1) kyph.glm2 <- glm(Kyphosis ~ Age + Number + Start+Age:(Number+Start), family=binomial,data=kyphosis) summary(kyph.glm2) kyph.glm3 <- glm(Kyphosis ~ Age + Number + Start+Age:Number, family=binomial,data=kyphosis) summary(kyph.glm3) kyph.glm4 <- glm(Kyphosis ~ Age + Number + Start, family=binomial,data=kyphosis) summary(kyph.glm4) kyph.glm5 <- glm(Kyphosis ~ Number + Start, family=binomial,data=kyphosis) summary(kyph.glm5) kyph.glm6 <- glm(Kyphosis ~ Start, family=binomial,data=kyphosis) summary(kyph.glm6) ####################################### anova(kyph.glm1,kyph.glm2,kyph.glm3,kyph.glm4,kyph.glm5,kyph.glm6,test="Chisq") Analysis of Deviance Table Model 1: Kyphosis ~ Age + Number + Start + .^2 Model 2: Kyphosis ~ Age + Number + Start + Age:(Number + Start) Model 3: Kyphosis ~ Age + Number + Start + Age:Number Model 4: Kyphosis ~ Age + Number + Start Model 5: Kyphosis ~ Number + Start Model 6: Kyphosis ~ Start Resid. Df Resid. Dev Df Deviance P(>|Chi|) 1 74 56.994 2 75 57.001 -1 -0.007 0.934 3 76 59.453 -1 -2.452 0.117 4 77 61.380 -1 -1.927 0.165 5 78 64.536 -1 -3.157 0.076 6 79 68.072 -1 -3.536 0.060 ########################## library(MASS) stepAIC(kyph.glm1, direction= "backward") Start: AIC= 70.99 Kyphosis ~ Age + Number + Start + Age:Number + Age:Start + Number:Start Df Deviance AIC - Number:Start 1 57.001 69.001 56.994 70.994 - Age:Start 1 59.034 71.034 - Age:Number 1 60.356 72.356 Step: AIC= 69 Kyphosis ~ Age + Number + Start + Age:Number + Age:Start Df Deviance AIC 57.001 69.001 - Age:Start 1 59.453 69.453 - Age:Number 1 60.443 70.443 Call: glm(formula = Kyphosis ~ Age + Number + Start + Age:Number + Age:Start, family = binomial, data = kyphosis) Coefficients: (Intercept) Age Number Start Age:Number Age:Start -7.875979 0.072189 1.260596 -0.022333 -0.008456 -0.002231 Degrees of Freedom: 80 Total (i.e. Null); 75 Residual Null Deviance: 83.23 Residual Deviance: 57 AIC: 69 ############################### # 8.2 kyph.glm2 <- glm(Kyphosis ~ Age + Number + Start+Age:(Number+Start), family=binomial,data=kyphosis) kyph.glm7 <- glm(Kyphosis ~ poly(Age,2) + I((Start>12)*(Start-12)), family=binomial,data=kyphosis) anova(kyph.glm2,kyph.glm7) summary(kyph.glm7) summary(kyph.glm7) Call: glm(formula = Kyphosis ~ poly(Age, 2) + I((Start > 12) * (Start - 12)), family = binomial, data = kyphosis) Deviance Residuals: Min 1Q Median 3Q Max -1.42301 -0.50141 -0.13277 -0.01416 2.11657 Coefficients: Estimate Std. Error z value Pr(>|z|) (Intercept) -0.6849 0.4574 -1.498 0.13426 poly(Age, 2)1 5.7720 4.1348 1.396 0.16272 poly(Age, 2)2 -10.3247 4.9585 -2.082 0.03732 * I((Start > 12) * (Start - 12)) -1.3512 0.5113 -2.643 0.00823 ** --- Signif. codes: 0 `***' 0.001 `**' 0.01 `*' 0.05 `.' 0.1 ` ' 1 (Dispersion parameter for binomial family taken to be 1) Null deviance: 83.234 on 80 degrees of freedom Residual deviance: 51.953 on 77 degrees of freedom AIC: 59.953 ######################## # Vaikuttavat havainnot cooks.distance(kyph.glm7) plot(cooks.distance(kyph.glm7)) identify(cooks.distance(kyph.glm7)) kyph.glmp11<-update(kyph.glm7, subset = -11) # Havainto 11 poistetaan aineistosta plot(cooks.distance(kyph.glmp11));identify(cooks.distance(kyph.glmp11)) plot(dfbetas(kyph.glm7));identify(dfbetas(kyph.glm7)) b1<- dfbetas(kyph.glm4)[,2] plot(b1) ###################### # 8.3 smoking.cdeath <- read.table("C:\\Kurssit\\Glim\\Glim02\\Datat\\smoking.cdeath.txt", header = TRUE, sep = ",") # smoking.cdeath names(smoking.cdeath) [1] "age" "agec" "agecsq" "smokeage" "smoking" [6] "deaths" "person.years" smoking.cd0 <- glm(deaths ~ smoke + agec, family = poisson, data = smoking.cdeath) smoking.cd1 <- glm(deaths ~ offset(log(person.years)) + smoke + agec, family = poisson, data = smoking.cdeath) smoking.cd0 smoking.cd1 predict(smoking.cd0,type = "response") predict(smoking.cd1,type = "response") ts<- matrix(data = predict(smoking.cd0,type = "response"), nrow = 5, ncol = 2) ts<- matrix(data = predict(smoking.cd1,type = "response"), nrow = 5, ncol = 2) # 8.4 smoking.cd <- glm(deaths ~ offset(log(person.years)) + smoke + agec + agecsq + smokeage, family = poisson, data = smoking.cdeath) smoking.cd ########## Call: glm(formula = deaths ~ offset(log(person.years)) + smoke + agec + agecsq + smokeage, family = poisson, data = smoking.cdeath) Coefficients: (Intercept) smoke agec agecsq smokeage -10.7918 1.4410 2.3765 -0.1977 -0.3075 Degrees of Freedom: 9 Total (i.e. Null); 5 Residual Null Deviance: 935.1 Residual Deviance: 1.635 AIC: 66.7 ########## # summary(smoking.cd) summary(smoking.cd) Call: glm(formula = deaths ~ offset(log(person.years)) + smoke + agec + agecsq + smokeage, family = poisson, data = smoking.cdeath) Deviance Residuals: 1 2 3 4 5 6 7 8 0.43820 -0.27329 -0.15265 0.23393 -0.05700 -0.83049 0.13404 0.64107 9 10 -0.41058 -0.01275 Coefficients: Estimate Std. Error z value Pr(>|z|) (Intercept) -10.79176 0.45004 -23.979 < 2e-16 *** smoke 1.44097 0.37217 3.872 0.000108 *** agec 2.37648 0.20794 11.429 < 2e-16 *** agecsq -0.19768 0.02737 -7.223 5.08e-13 *** smokeage -0.30755 0.09704 -3.169 0.001527 ** --- Signif. codes: 0 `***' 0.001 `**' 0.01 `*' 0.05 `.' 0.1 ` ' 1 (Dispersion parameter for poisson family taken to be 1) Null deviance: 935.0673 on 9 degrees of freedom Residual deviance: 1.6354 on 5 degrees of freedom AIC: 66.703 ########### ts<- matrix(data = predict(smoking.cd,type = "response"), nrow = 5, ncol = 2) ts ts<- matrix(data = smoking.cdeath[,7], nrow = 5, ncol = 2); ts ts<- matrix(data = residuals(smoking.cd,type = "pearson"), nrow = 5, ncol = 2); ts # Vaikuttavin havainto plot(cooks.distance(smoking.cd)) identify(cooks.distance(smoking.cd)) # 1, 6 ############### # 8.5 ############################ # ks. 5.7 # Uudet AIDS-tapaukset # (a) y <- scan() 0 0 3 0 1 1 1 2 2 4 2 8 0 3 4 5 2 2 2 5 4 3 15 12 7 14 6 10 14 8 19 10 7 20 10 19 i <- 1:36 aids.reg <- glm(y ~ i,family=poisson) # poisson pienellä vaikka nimi aids.reg # aids.reg$coefficients aids.reg1 <- glm(y ~ i,family=poisson(link=sqrt)) # poisson pienellä vaikka nimi aids.reg1 ########### aids.reg1 Call: glm(formula = y ~ i, family = poisson(link = sqrt)) Coefficients: (Intercept) i 0.55251 0.09443 Degrees of Freedom: 35 Total (i.e. Null); 34 Residual Null Deviance: 190.2 Residual Deviance: 59.79 AIC: 175.1 > aids.reg Call: glm(formula = y ~ i, family = poisson) Coefficients: (Intercept) i 0.03966 0.07957 Degrees of Freedom: 35 Total (i.e. Null); 34 Residual Null Deviance: 190.2 Residual Deviance: 62.36 AIC: 177.7 ############## # Plottaus plot(i,y) f <- aids.reg$fitted.values;lines(f,lty=1) f1 <- aids.reg1$fitted.values;lines(f1,lty=2) ####################### #Degrees of Freedom: 35 Total (i.e. Null); 34 Residual #Null Deviance: 190.2 #Residual Deviance: 62.36 AIC: 177.7 anova(aids.reg,test="Chisq") # Df Deviance Resid. Df Resid. Dev P(>|Chi|) # NULL 35 190.17 # i 1 127.81 34 62.36 1.238e-29 pchisq(62.36, 34 , ncp=0, lower.tail = F) #[1] 0.002131935 ########### # # 8.7 aids.reg <- glm(y ~ i,family=poisson) w<-aids.reg$weights # painot z<- aids.reg$linear.predictors + aids.reg$residuals # z-vektori a<-sum(w) # Matriisin (X^T)WX=XWT alkiot a,b ja c b<-sum(i*w) c<-sum(i^2*w) zv1<-sum(w*z) # z-vektorin alkiot zv2<-sum(i*w*z) XWX<-matrix(data=c(a,b,b,c),nrow=2) XWz<-matrix(data=c(zv1,zv2),nrow=2) b<-matrix(data=aids.reg$coefficients,nrow=2) XWX<-matrix(data=c(a,b,b,c),nrow=2) b [,1] [1,] 0.03966095 [2,] 0.07956924 > XWX%*%b [,1] [1,] 476.2771 [2,] 13769.9384 > XWz [,1] [1,] 476.2771 [2,] 13769.9384 > XWX%*%b-XWz #################