************************************************************************ Kooste The Little SAS Bookin (5.painos) koodeista kahden päivän SAS-peruskurssille Koodeista on jätetty tyhjiksi avainkohtia, joita täytetään harjoituksissa HUOM! Lisäksi useimmissa esimerkeissä pitää mahdollisesti vaihtaa kansio- polut oikeiksi. Delwichen ja Slaughterin koodia muokannut T.Hurme ************************************************************************; /*-------------------------------------------------------------------*/ /* The Little SAS(r) Book: A Primer, Fifth Edition */ /* by Lora D. Delwiche and Susan J. Slaughter */ /* Copyright(c) 2012 by SAS Institute Inc., Cary, NC, USA */ /* SAS Publications order */ /* ISBN 978-1-61290-343-9 */ /*-------------------------------------------------------------------*/ /* */ /* This material is provided "as is" by SAS Institute Inc. There */ /* are no warranties, expressed or implied, as to merchantability or */ /* fitness for a particular purpose regarding the materials or code */ /* contained herein. The Institute is not responsible for errors */ /* in this material as it now exists or will exist, nor does the */ /* Institute provide technical support for it. */ /* */ /*-------------------------------------------------------------------*/ /* Date Last Updated: 30SEP2012 */ /*-------------------------------------------------------------------*/ /* Questions or problem reports concerning this material may be */ /* addressed to the author: */ /* */ /* SAS Institute Inc. */ /* SAS Press */ /* Attn: Lora Delwiche and Susan Slaughter */ /* SAS Campus Drive */ /* Cary, NC 27513 */ /* */ /* */ /* If you prefer, you can send email to: saspress@sas.com */ /* Use this for subject field: */ /* Comments for Lora Delwiche and Susan Slaughter */ /* */ /*-------------------------------------------------------------------*/ /* Chapter 2 */ /* Section 2.4 */ /* First program */ * Read internal data into SAS data set uspresidents; DATA uspresidents; * Täydennetään tämän tilalle *; RUN; /* Section 2.4 */ /* Second program */ * Read data from external file into SAS data set; DATA uspresidents; * Täydennetään tämän tilalle *; INPUT President $ Party $ Number; RUN; /* Section 2.5 */ /* Program */ * Create a SAS data set named toads; * Read the data file ToadJump.dat using list input; DATA toads; INFILE 'c:\MyRawData\ToadJump.dat'; * Täydennetään tämän tilalle *; RUN; * Print the data to make sure the file was read correctly; PROC PRINT DATA = toads; TITLE 'SAS Data Set Toads'; RUN; /* Section 2.6 */ /* Program */ * Create a SAS data set named sales; * Read the data file OnionRing.dat using column input; DATA sales; INFILE 'c:\MyRawData\OnionRing.dat'; * Täydennetään tämän tilalle *; RUN; * Print the data to make sure the file was read correctly; PROC PRINT DATA = sales; TITLE 'SAS Data Set Sales'; RUN; /* Section 2.7 */ /* Program */ * Create a SAS data set named contest; * Read the file Pumpkin.dat using formatted input; DATA contest; INFILE 'c:\MyRawData\Pumpkin.dat'; * Täydennetään tämän tilalle *; RUN; * Print the data set to make sure the file was read correctly; PROC PRINT DATA = contest; TITLE 'Pumpkin Carving Contest'; RUN; /* Section 2.14 */ /* First Program */ DATA icecream; INFILE 'c:\MyRawData\IceCreamSales.dat' /* Täydennetään tämän tilalle */; INPUT Flavor $ 1-9 Location BoxesSold; RUN; /* Second Program */ DATA icecream; INFILE 'c:\MyRawData\IceCreamSales2.dat' /* Täydennetään tämän tilalle */; INPUT Flavor $ 1-9 Location BoxesSold; RUN; /* Third Program */ DATA class102; INFILE 'c:\MyRawData\AllScores.dat' /* Täydennetään tämän tilalle */ INPUT Name $ Test1 Test2 Test3 Test4 Test5; RUN; /* Fourth Program */ DATA homeaddress; INFILE 'c:\MyRawData\Address.dat' /* Täydennetään tämän tilalle */ INPUT Name $ 1-15 Number 16-19 Street $ 22-37; RUN; /* Section 2.18 */ /* First Program */ DATA /* Täydennetään tämän tilalle */ Miles = 26.22; Kilometers = 1.61 * Miles; RUN; PROC PRINT DATA = /* Täydennetään tämän tilalle */; RUN; /* Second Program */ DATA /* Täydennetään tämän tilalle */; Miles = 26.22; Kilometers = 1.61 * Miles; RUN; PROC PRINT DATA = /* Täydennetään tämän tilalle */; RUN; /* Section 2.19 */ /* First Program */ /* Täydennetään tämän tilalle */ DATA /* Täydennetään tämän tilalle */; INFILE 'c:\MyRawData\Mag.dat'; INPUT ScientificName $ 1-14 CommonName $ 16-32 MaximumHeight AgeBloom Type $ Color $; RUN; /* Second Program */ PROC PRINT DATA = /* Täydennetään tämän tilalle */; TITLE 'Magnolias'; RUN; /* Chapter 3 */ /* Section 3.1 */ /* Program */ * Modify homegarden data set with assignment statements; DATA homegarden; INFILE 'c:\MyRawData\Garden.dat'; INPUT Name $ 1-7 Tomato Zucchini Peas Grapes; /* Täydennetään tämän tilalle */ RUN; PROC PRINT DATA = homegarden; TITLE 'Home Gardening Survey'; RUN; /* Section 3.2 */ /* Program */ DATA contest; INFILE 'c:\MyRawData\Pumpkin.dat'; INPUT Name $16. Age 3. +1 Type $1. +1 Date MMDDYY10. (Scr1 Scr2 Scr3 Scr4 Scr5) (4.1); /* Täydennetään tämän tilalle */ RUN; PROC PRINT DATA = contest; TITLE 'Pumpkin Carving Contest'; RUN; /* Section 3.5 */ /* Program */ DATA oldcars; INFILE 'c:\MyRawData\Auction.dat'; INPUT Make $ 1-13 Model $ 15-29 YearMade Seats MillionsPaid; /* Täydennetään tämän tilalle */ RUN; PROC PRINT DATA = oldcars; TITLE 'Cars Sold at Auction'; RUN; /* Section 3.6 */ /* Program */ * Group observations by cost; DATA homeimprovements; INFILE 'c:\MyRawData\Home.dat'; INPUT Owner $ 1-7 Description $ 9-33 Cost; /* Täydennetään tämän tilalle */ RUN; PROC PRINT DATA = homeimprovements; TITLE 'Home Improvement Cost Groups'; RUN; /* Section 3.7 */ /* Program */ * Choose only comedies; DATA comedy; INFILE 'c:\MyRawData\Shakespeare.dat'; INPUT Title $ 1-26 Year Type $; /* Täydennetään tämän tilalle */ RUN; PROC PRINT DATA = comedy; TITLE 'Shakespearean Comedies'; RUN; /* Section 3.8 */ /* Program */ DATA librarycards; INFILE 'c:\MyRawData\Library.dat' TRUNCOVER; /* Täydennetään tämän tilalle */ RUN; PROC PRINT DATA = librarycards; /* Täydennetään tämän tilalle */ TITLE 'SAS Dates without and with Formats'; RUN; /* Chapter 4 */ /* Section 4.2 */ /* First Program */ DATA 'c:\MySASLib\style'; INFILE 'c:\MyRawData\Artists.dat'; INPUT Name $ 1-21 Genre $ 23-40 Origin $ 42; RUN; /* Second Program */ PROC PRINT DATA = 'c:\MySASLib\style'; /* Täydennetään tämän tilalle */ TITLE 'Major Impressionist Painters'; FOOTNOTE 'F = France N = Netherlands U = US'; RUN; /* Section 4.3 */ /* Program */ DATA marine; INFILE 'c:\MyRawData\Lengths.dat'; INPUT Name $ Family $ Length @@; RUN; * Sort the data; /* Täydennetään tämän tilalle */ PROC PRINT DATA = seasort; TITLE 'Whales and Sharks'; RUN; /* Section 4.5 */ /* Program */ DATA sales; INFILE 'c:\MyRawData\CandySales.dat'; INPUT Name $ 1-11 Class @15 DateReturned MMDDYY10. CandyType $ Quantity; Profit = Quantity * 1.25; PROC SORT DATA = sales; BY Class; /* Täydennetään tämän tilalle */ /* Section 4.6 */ /* Program */ DATA sales; INFILE 'c:\MyRawData\CandySales.dat'; INPUT Name $ 1-11 Class @15 DateReturned MMDDYY10. CandyType $ Quantity; Profit = Quantity * 1.25; PROC PRINT DATA = sales; VAR Name DateReturned CandyType Profit; /* Täydennetään tämän tilalle */ TITLE 'Candy Sale Data Using Formats'; RUN; /* Section 4.8 */ /* Program */ DATA carsurvey; INFILE 'c:\MyRawData\Cars.dat'; INPUT Age Sex Income Color $; RUN; /* Täydennetään tämän tilalle */ * Print data using user-defined and standard (DOLLAR8.) formats; PROC PRINT DATA = carsurvey; /* Täydennetään tämän tilalle */ TITLE 'Survey Results Printed with User-Defined Formats'; RUN; /* Section 4.10 */ /* Program */ DATA sales; INFILE 'c:\MyRawData\Flowers.dat'; INPUT CustID $ @9 SaleDate MMDDYY10. Petunia SnapDragon Marigold; Month = MONTH(SaleDate); PROC SORT DATA = sales; BY Month; * Calculate means by Month for flower sales; /* Täydennetään tämän tilalle */ /* Section 4.11 */ /* Program */ DATA sales; INFILE 'c:\MyRawData\Flowers.dat'; INPUT CustID $ @9 SaleDate MMDDYY10. Petunia SnapDragon Marigold; PROC SORT DATA = sales; BY CustID; * Calculate means by CustomerID, output sum and mean to new data set; PROC MEANS NOPRINT DATA = sales; BY CustID; VAR Petunia SnapDragon Marigold; /* Täydennetään tämän tilalle */ PROC PRINT DATA = totals; TITLE 'Sum of Flower Data over Customer ID'; FORMAT MeanP MeanSD MeanM 3.; RUN; /* Section 4.12 */ /* Program */ DATA orders; INFILE 'c:\MyRawData\Coffee.dat'; INPUT Coffee $ Window $ @@; * Print tables for Window and Window by Coffee; /* Täydennetään tämän tilalle */ /* Chapter 5 */ /* Section 5.6 */ /* Program */ * Create an RTF file; /* Täydennetään tämän tilalle */ DATA marine; INFILE 'c:\MyRawData\Lengths8.dat'; INPUT Name $ Family $ Length @@; RUN; PROC MEANS DATA = marine MEAN MIN MAX; CLASS Family; TITLE 'Whales and Sharks'; RUN; PROC PRINT DATA = marine; RUN; * Close the RTF file; /* Täydennetään tämän tilalle */ /* Chapter 6 */ /* Section 6.1 */ /* First Program */ * Create permanent SAS data set trains; DATA 'c:\MySASLib\trains'; INFILE 'c:\MyRawData\Train.dat'; INPUT Time TIME5. Cars People; RUN; /* Second Program */ * Read the SAS data set trains with a SET statement; /* Täydennetään tämän tilalle */ PeoplePerCar = People / Cars; RUN; PROC PRINT DATA = averagetrain; TITLE 'Average Number of People per Train Car'; FORMAT Time TIME5.; RUN; /* Section 6.2 */ /* Program */ DATA southentrance; INFILE 'c:\MyRawData\South.dat'; INPUT Entrance $ PassNumber PartySize Age; PROC PRINT DATA = southentrance; TITLE 'South Entrance Data'; RUN; DATA northentrance; INFILE 'c:\MyRawData\North.dat'; INPUT Entrance $ PassNumber PartySize Age Lot; PROC PRINT DATA = northentrance; TITLE 'North Entrance Data'; RUN; * Create a data set, both, combining northentrance and southentrance; * Create a variable, AmountPaid, based on value of variable Age; /* Täydennetään tämän tilalle */ IF Age = . THEN AmountPaid = .; ELSE IF Age < 3 THEN AmountPaid = 0; ELSE IF Age < 65 THEN AmountPaid = 35; ELSE AmountPaid = 27; RUN; PROC PRINT DATA = both; TITLE 'Both Entrances'; RUN; /* Section 6.4 */ /* Program */ DATA descriptions; INFILE 'c:\MyRawData\chocolate.dat' TRUNCOVER; INPUT CodeNum $ 1-4 Name $ 6-14 Description $ 15-60; RUN; DATA sales; INFILE 'c:\MyRawData\chocsales.dat'; INPUT CodeNum $ 1-4 PiecesSold 6-7; PROC SORT DATA = sales; BY CodeNum; RUN; * Merge data sets by CodeNum; /* Täydennetään tämän tilalle */ PROC PRINT DATA = chocolates; TITLE "Today's Chocolate Sales"; RUN; /* Section 6.14 */ /* Program */ DATA baseball; INFILE 'c:\MyRawData\Transpos.dat'; INPUT Team $ Player Type $ Entry; PROC SORT DATA = baseball; BY Team Player; PROC PRINT DATA = baseball; TITLE 'Baseball Data After Sorting and Before Transposing'; RUN; * Transpose data so salary and batavg are variables; /* Täydennetään tämän tilalle */ PROC PRINT DATA = flipped; TITLE 'Baseball Data After Transposing'; RUN; /* Chapter 8 */ /* Section 8.5 */ DATA wings; INFILE 'c:\MyRawData\Birds.dat'; INPUT Name $12. Type $ Length Wingspan @@; RUN; * Plot Wingspan by Length; PROC FORMAT; VALUE $birdtype 'S' = 'Songbirds' 'R' = 'Raptors'; RUN; /* Täydennetään tämän tilalle */ FORMAT Type $birdtype.; TITLE 'Comparison of Wingspan vs. Length'; RUN; /* Section 8.6 */ DATA electricity; INFILE 'c:\MyRawData\Hourly.dat'; INPUT Time kWh @@; RUN; * Plot temperatures by time; /* Täydennetään tämän tilalle */ TITLE 'Hourly Use of Electricity'; RUN; /* Section 8.7 */ DATA Olympic1500; INFILE 'C:\MyRawData\Olympic1500.dat'; INPUT Year Men @@; RUN; /* Täydennetään tämän tilalle */ LABEL Men = 'Time in Seconds'; TITLE "Olympic Times for Men's 1500 Meter Run"; RUN; /* Chapter 9 */ /* Section 9.1 */ /* Program */ DATA class; INFILE 'c:\MyRawData\Scores.dat'; INPUT Score @@; RUN; /* Täydennetään tämän tilalle */ /* Section 9.2 */ /* Program */ DATA class; INFILE 'c:\MyRawData\Scores.dat'; INPUT Score @@; RUN; PROC UNIVARIATE DATA = class; VAR Score; /* Täydennetään tämän tilalle */ TITLE; RUN; /* Section 9.8 */ /* Program */ DATA class; INFILE 'c:\MyRawData\Exercise.dat'; INPUT Score Television Exercise @@; RUN; /* Täydennetään tämän tilalle */ TITLE 'Correlations for Test Scores'; TITLE2 'With Hours of Television and Exercise'; RUN; /* Section 9.9 */ /* Program */ DATA class; INFILE 'c:\MyRawData\Exercise.dat'; INPUT Score Television Exercise @@; RUN; PROC CORR DATA = class /* Täydennetään tämän tilalle */; VAR Television Exercise; WITH Score; TITLE 'Correlations for Test Scores'; TITLE2 'With Hours of Television and Exercise'; RUN;