# 2. Harjoitukset, 23.1.2003 ################# # 2.1 ################# # a) kulma<-(20/180)*pi P<-rbind(c(cos(kulma),sin(kulma)),c(-sin(kulma),cos(kulma))) P [,1] [,2] [1,] 0.9396926 0.3420201 [2,] -0.3420201 0.9396926 a<-c(3,-2);a<-cbind(a) a a [1,] 3 [2,] -2 P%*%a a [1,] 2.135038 [2,] -2.905446 b<-c(5,1) b<-cbind(b) b b [1,] 5 [2,] 1 P%*%b b [1,] 5.0404832 [2,] -0.7704081 # b) # Ratkaistaa yhtälöistä # 3.63= 5*cos t + 2*sin t # 3.93=-5*sin t + 2*cos t # sin t ja siitä kulma t. asin(-12.27/29) [1] -0.4368677 (-0.4368677/pi)*180 [1] -25.03068 # H2.3 ######### library(Mva) # a) r<-0.7 Rho<-matrix(data = c(1,r,r,r,r), nrow = 4, ncol = 4) Rho [,1] [,2] [,3] [,4] [1,] 1.0 0.7 0.7 0.7 [2,] 0.7 1.0 0.7 0.7 [3,] 0.7 0.7 1.0 0.7 [4,] 0.7 0.7 0.7 1.0 Rho.pka<-princomp(cor = T, covmat = Rho) Rho.pka Call: princomp.default(cor = T, covmat = Rho) Standard deviations: Comp.1 Comp.2 Comp.3 Comp.4 1.7606817 0.5477226 0.5477226 0.5477226 4 variables and NA observations. names(Rho.pka) [1] "sdev" "loadings" "center" "scale" "n.obs" "scores" "call" Rho.pka$loadings Loadings: Comp.1 Comp.2 Comp.3 Comp.4 [1,] -0.500 0.866 [2,] -0.500 -0.680 -0.289 -0.452 [3,] -0.500 0.731 -0.289 -0.363 [4,] -0.500 -0.289 0.815 Rho.pka$sdev^2 # komponenttien varianssit Comp.1 Comp.2 Comp.3 Comp.4 3.1 0.3 0.3 0.3 plot(Rho.pka) # b) r<-0.9 Rho<-matrix(data = c(1,r,r,r,r), nrow = 4, ncol = 4) det(Rho);det(Rho[1:3,1:3]) Rho.pka<-princomp(cor = T, covmat = Rho) Rho.pka$loadings Loadings: Comp.1 Comp.2 Comp.3 Comp.4 [1,] -0.500 0.866 [2,] -0.500 0.816 -0.289 [3,] -0.500 -0.408 -0.289 -0.707 [4,] -0.500 -0.408 -0.289 0.707 Rho.pka$sdev^2 # komponenttien varianssit Comp.1 Comp.2 Comp.3 Comp.4 3.7 0.1 0.1 0.1 # c) 1. pääkomponentti on yleinen vaihtelukomponentti Kaikki muuttujat korreloivat raman verran ja samaan suuntaan. Ensimmäinen komponentti sisältää tämän yhteisen vaihtelun. # H2.4 ########### R<-c(1,0.7501,0.6329,0.6363,0.7501,1,0.6925,0.7386,0.6329,0.6925,1,0.6625,0.6363,0.7386,0.6625,1) R<-matrix(data =R,nrow = 4, ncol = 4) R [,1] [,2] [,3] [,4] [1,] 1.0000 0.7501 0.6329 0.6363 [2,] 0.7501 1.0000 0.6925 0.7386 [3,] 0.6329 0.6925 1.0000 0.6625 [4,] 0.6363 0.7386 0.6625 1.0000 R.pka<-princomp(cor = T, covmat = R) R.pka$loadings Loadings: Comp.1 Comp.2 Comp.3 Comp.4 [1,] -0.494 0.713 -0.233 -0.440 [2,] -0.522 0.191 0.143 0.819 [3,] -0.487 -0.585 -0.645 [4,] -0.497 -0.335 0.714 -0.363 R.pka$sdev^2 # komponenttien varianssit Comp.1 Comp.2 Comp.3 Comp.4 3.0584114 0.3823261 0.3417786 0.2174839 # Keskimääräinen synn. jälkeisellä painolla on pieni "hyppäys" 1. synnytyksen # jäleen ja sen jälkeen lievä kasvava trendi. # Kuitenkin # 1. pääkomponentti selittää 76.46% kokonaisvaihtelusta # z_1=0.494*y_1+0.522*y_2+0.487*y_3+0.497*y_4 # Lataukset ovat melkein yhtä suuret ja 1. pääkomponentti korreloi jokseenkin yhtä voimakkaasti jokaisen muuttujan kanssa # H2.5 firmat<-read.table("C:\\Kurssit\\Mmm\\mmm03\\Datat\\firmat.dat", header = T) # (b) fimat.pka<-princomp(firmat) names(fimat.pka) [1] "sdev" "loadings" "center" "scale" "n.obs" "scores" "call" summary(fimat.pka) Importance of components: Comp.1 Comp.2 Comp.3 Standard deviation 9.1590678 1.36758404 0.366185228 Proportion of Variance 0.9766642 0.02177463 0.001561149 Cumulative Proportion 0.9766642 0.99843885 1.000000000 fimat.pka$loadings Loadings: Comp.1 Comp.2 Comp.3 varat -0.866 -0.485 -0.122 ntulot 0.372 -0.925 parvo -0.494 0.792 0.359 # z_1=0.866*varat+0.494*parvo # Selittää 97.7% kokonaisvaihtelusta fimat.pka$sdev^2 Comp.1 Comp.2 Comp.3 83.8885223 1.8702861 0.1340916 fimat.pka$scores # 1. komponentin arvot kerrottava (-1):llä Comp.1 Comp.2 Comp.3 G.M. -9.5129590 1.8714091 -0.38928000 Exxon -21.4042517 -1.2035355 0.34001642 Ford 0.7609231 -0.9480711 -0.64981325 Mobil -0.2988297 -2.0451158 -0.24556489 Texaco 0.5886351 -0.3082677 0.48639468 Std.Oil 5.0209125 0.2906013 0.24917581 IBM -1.2172603 2.8461707 -0.04110956 Gulf 5.7041718 0.2694851 0.39987677 G.E. 6.8054807 -0.4852114 -0.31953403 Chrysler 13.5531775 -0.2874647 0.16983805 # (a) cor(firmat,fimat.pka$scores) Comp.1 Comp.2 Comp.3 varat -0.9965123 -0.0832560 -0.00563392 ntulot -0.7566011 0.5441335 -0.36259267 parvo -0.9721317 0.2327231 0.02828159 attach(firmat) par(mfrow=c(2,2)) plot(varat,-fimat.pka$scores[,1],xlim=c(0,40),ylim=c(-20,30)) plot(ntulot,-fimat.pka$scores[,1],xlim=c(0,40),ylim=c(-20,30)) plot(parvo,-fimat.pka$scores[,1],xlim=c(0,40),ylim=c(-20,30)) # c) 1. pk selittää 97.7% kokonaisvaihtelusta. Se näyttää kuvaavan hyvin yhtiön arvoa. # H1.6 # a) pojat<-read.table("C:\\Kurssit\\Mmm\\mmm03\\Datat\\pojat.dat", header = T, skip=7) pojat2<-pojat[,3:4] pojat2.pka<-princomp(pojat2) pojat2.pka$sdev^2; pojat2.pka$loadings Comp.1 Comp.2 130.52194 9.47486 # ominaisarvot # Ominaisvektorit: 1. sarake ensimmäinen ov, 1. sarake toinen ov Loadings: Comp.1 Comp.2 ppit2 -0.849 0.528 plev2 -0.528 -0.849 # b) z_1= 0.849*ppit2+0.528*plev2 # 1. sarake kerrottu (-1):llä z_2= 0.849*ppit2-0.849*plev2 # c) I<-matrix(c(-1,0,0,1),nrow=2) pojat2.pka$scores[1:10,]%*%I [,1] [,2] 1 -6.3490777 1.0451854 2 16.0302174 6.7167945 3 0.8583918 0.8163109 4 3.4061023 2.4003473 5 -14.7270087 -0.6312011 6 8.3870860 1.9646854 7 5.1045759 3.4563716 8 5.8393755 0.3806490 9 16.3293551 -1.3399118 10 3.6128897 0.1738615 cbind(-pojat2.pka$scores[1:10,1],pojat2.pka$scores[1:10,2]) [,1] [,2] 1 -6.3490777 1.0451854 2 16.0302174 6.7167945 3 0.8583918 0.8163109 4 3.4061023 2.4003473 5 -14.7270087 -0.6312011 6 8.3870860 1.9646854 7 5.1045759 3.4563716 8 5.8393755 0.3806490 9 16.3293551 -1.3399118 10 3.6128897 0.1738615 # d) pojat2<-transform(pojat2,ppit2k=ppit2-mean(ppit2),plev2k=plev2-mean(plev2)) attach(pojat2) plot(ppit2k,plev2k,xlim=c(-30,30),ylim=c(-30,30),col="red") points(-pojat2.pka$scores[1:10,1],pojat2.pka$scores[1:10,2],pch=20) # Saman tien plot(ppit2k,plev2k,xlim=c(-30,30),ylim=c(-30,30),col="red") points(-pojat2.pka$scores[,1],pojat2.pka$scores[,2],pch=20) # "alkuperäiset" pääkomponentit plot(ppit2k,plev2k,xlim=c(-30,30),ylim=c(-30,30),col="red") points(pojat2.pka$scores[,1],pojat2.pka$scores[,2],pch=20) # H1.7 asanat<-read.table("C:\\Kurssit\\Mmm\\mmm03\\Datat\\asanat.txt", header = T, skip=4) asanat.pka<-princomp(asanat) plot(asanat.pka) names(asanat.pka) [1] "sdev" "loadings" "center" "scale" "n.obs" "scores" "call" mean(asanat.pka$sdev^2) [1] 1.22 > asanat.pka$sdev^2 Comp.1 Comp.2 Comp.3 Comp.4 Comp.5 Comp.6 2.9826083 1.4427751 1.1975642 0.8122765 0.5191242 0.3656517 # 2) Kahden ensimm. komponentin varianssi > 1.22 # 3) Tarvitaan 3 komponentia selittämään n. 77% kok. varianssista # 4) Aineistossa ei ole mitään selvää pienten ja suurten om. arvojen välistä rajaa summary(asanat.pka) Importance of components: Comp.1 Comp.2 Comp.3 Comp.4 Comp.5 Standard deviation 1.7270229 1.2011557 1.0943328 0.9012638 0.7205027 Proportion of Variance 0.4074601 0.1971004 0.1636017 0.1109667 0.0709186 Cumulative Proportion 0.4074601 0.6045606 0.7681622 0.8791290 0.9500476 # Osakkeet osakkeet<-read.table("C:\\Kurssit\\Mmm\\mmm03\\Datat\\Osakkeet.txt", header = T, skip=8) osakkeet.pka<-princomp(osakkeet,cor=T) osakkeet.pka$sdev^2 Comp.1 Comp.2 Comp.3 Comp.4 Comp.5 2.8564889 0.8091190 0.5400421 0.4513483 0.3430018 # 2) Ensimmäisen komponentin varianssi > 1 # 3) Kaksi ensimmäistä komponttia selittää 73.3%, kolme 84.1% # 4) Ensimmäisen komponentin varianssi selvästi suurin summary(osakkeet.pka) Importance of components: Comp.1 Comp.2 Comp.3 Comp.4 Comp.5 Standard deviation 1.6901150 0.8995104 0.7348756 0.67182459 0.58566358 Proportion of Variance 0.5712978 0.1618238 0.1080084 0.09026966 0.06860037 Cumulative Proportion 0.5712978 0.7331216 0.8411300 0.93139963 1.00000000 plot(osakkeet.pka) ##########