options ls=75 ps=10000 nodate; /* this program contains fairly advanced datastep code, For a less complicated version which reads data formatted more conveniently, see "rftc08.sas" */ data women; infile "AGEGROUPW.HTM"; sex="women"; input var1 $ @; v1=substr(var1,1,1); if (v1 < "A") and (v1 > "-") then input name $ 10-30 survivor $ 33 bib age all crace $ gun $ diff $; else delete; if (bib=.) or (v1="<") then delete; rtime=scan(crace,1,':')+1/60*scan(crace,2,':'); keep name sex age rtime; run; data men; infile "AGEGROUPM.HTM"; sex="men"; input var1 $ @; v1=substr(var1,1,1); if (v1 < "A") and (v1 > "-") then input name $ 10-30 survivor $ 33 bib age all crace $ gun $ diff $; else delete; if (bib=.) or (v1="<") then delete; rtime=scan(crace,1,':')+1/60*scan(crace,2,':'); keep name sex age rtime; run; proc sort data=men nodup; by name;run; proc sort data=women nodup; by name;run; data both; set women men; label rtime="5k time"; keep sex age rtime name; *if age>39; *age2=age*age; *male=sex="men"; *ageXmale=age*male; *age2Xmale=age2*male; run; /* proc print data=men (obs=10); title "men"; run; proc print data=women (obs=10); title "women"; run; proc print data=both (obs=100) noobs labels; title "both"; proc sort data=both nodup; by rtime; *by sex; run; proc glm; class sex; model rtime=age sex age*sex /solution; run; symbol value=dot i=rl; proc gplot; *by sex; title "linear fit"; plot rtime*age=sex; run; symbol value=dot i=rq; proc gplot; *by sex; title "quadratic fit"; plot rtime*age=sex; run; proc glm; title "full (overfit) model"; class sex; model rtime=age age*age sex age*sex age*age*sex; run; proc glm; title "final model"; class sex; model rtime=age age*age sex/solution; output out=residz r=r p=p; estimate "40 yr old man " intercept 1 age 40 sex 1 0; estimate "40 yr old woman" intercept 1 age 40 sex 0 1 ; run; proc gchart; vbar r; run; proc univariate normal plot; var r; run; /* proc reg; title "CP"; model rtime=age age2 male ageXmale age2Xmale/selection=cp; run; */