/* Run this program to create ncaa with x1-x42 and avg_grad = y */ /* Here are the variables and their descriptions: OBSNUM ID NAME school name AVG_GRAD average 6 yr rate 1996 97 98 T10 BB tourney wins 90-99 REGIONAL number appear in regional tourney games BPOWER basketball power 1990-98 RECENT proportion of wins in past 5 years CHAMPS number of sport championships - total FBWINS PERCENT WINS 90-99 football BOWLS bowl appearances past 10 yrs ATTEND avg BB home attendance ACCEPT Acceptance rate ACT25 ACT COMPOSITE 25TH ACT75 ACT COMPOSITE 75TH AST_SAL Assistant professor salary AVGSAL BBRANK10 ranked in BB past 10 years BBRANK5 ranked in BB past 5 years BOARD room and board COST estimated annual total cost FBRANK10 football ranked in past 10 years FBRANK5 ranked in FB last 5 years FBSUM football power 1990-98 FT % enrolled fulltime FT_AID % undergrads receiving aid FT_GRAD % first-time undergraduates FTIME number enrolled full time FULL_SAL Full Professor Salary GIVE Giving rank GRAD96 GRAD97 Graduation rate GRAD98 1999 GRAD - 1991 FRESHMEN IG_PCT % receiving inst grants L_PCT % receiving loans LOAN loan amount $ OLD % students 25+ ONCAMPUS % On living campus OUTSTATE % Out of state PHD % faculty with PHD POP population of city where located SF_RATIO Student to faculty ratio SIZE Total Enrollment SPORTS weighted sports index TATEACH % courses taught by TAs TOP10 % students in top 10% HS TUITION in-state tuition WHITE % white */ data one; infile 'K:\www\var.select\ncaa.data.orig.txt' lrecl = 1000 pad; input obsnum id name $ 10-28 avg_grad t10 regional bpower recent champs fbwins bowls attend accept act25 act75 ast_sal avgsal bbrank10 bbrank5 board cost fbrank10 fbrank5 fbsum ft ft_aid ft_grad ftime full_sal give grad96 grad97 grad98 ig_pct l_pct loan old oncampus outstate phd pop sf_ratio size sports tateach top10 tuition white; /* bbindex is the index of basketball strength defined in the journal article */ data two; set one; bbindex=.889*bbrank10 + .947*t10 + .950*regional + .189*recent; bbindex=(bbindex-6.4360274)/5.6433947; size=size/1000; tuition=tuition/1000; board=board/1000; data ncaa; set two; label avg_grad = "avg_grad" top10 = "top10" act25 = "act25" oncampus = "oncampus" ft_grad = "ft_grad" size = "size" tateach = "tateach" bbindex = "bbindex" tuition = "tuition" board = "board" attend = "attend" full_sal = "full_sal" sf_ratio = "sf_ratio" white = "white" ast_sal = "ast_sal" pop = "pop" phd = "phd" accept = "accept" l_pct = "l_pct" outstate = "outstate" ; rename avg_grad = y top10 = x1 act25 = x2 oncampus = x3 ft_grad = x4 size = x5 tateach = x6 bbindex = x7 tuition = x8 board = x9 attend = x10 full_sal = x11 sf_ratio = x12 white = x13 ast_sal = x14 pop = x15 phd = x16 accept = x17 l_pct = x18 outstate = x19 t10=x20 regional=x21 bpower=x22 recent=x23 champs=x24 fbwins=x25 bowls=x26 act75=x27 avgsal=x28 bbrank10=x29 bbrank5=x30 cost=x31 fbrank10=x32 fbrank5=x33 fbsum=x34 ft=x35 ft_aid=x36 ftime=x37 give=x38 ig_pct=x39 loan=x40 old=x41 sports=x42 ; run; proc means data=ncaa;var y x1-x42; run; proc contents data=ncaa; run;