options ls=80 ps=1000 nodate; /* The macro FIBERGEN is given below. Use &seed=1 to generate same data as on practice exam problem #4. Use other seeds to generate more datasets and observe the behaviour of Mallow's C(p) statistic and the estimated SE terms for inference at (x1,x2,x3,x4) = (55,675,94,280). Note that the true model is y=0.5-0.005*x1 + 0.005*x3 + 0.02*E where E are normally distributed w/ standard deviation sigma=0.02 Under the true model, the mean at (x1,x2,x3,x4) = (55,675,94,280) is mu(55,675,94,280) = 0.695 */ /* MACRO declares the beginning of the code for a macro */ /*%macro FIBERGEN(seed);*/ %macro FIBERGEN(numdatasets,seed); data one; do simnumber=1 to &numdatasets; do i=1 to 20; x1=50+5*rannor(&seed); x2=600+75*rannor(&seed)+(x1-50)*10; x3=90+4*rannor(&seed); x4=300+10*rannor(&seed)+(x3-90)*2; /* multicollinearity induced by making x2 and x4 fcns of x1,x3 */ x1=round(x1,0.1); x2=round(x2,1); x3=round(x3,1); x4=round(x4,1); y=0.5-0.005*x1 + 0.005*x3 + 0.02*rannor(&seed); y=round(y,0.001); output; end; end; run; data missingy; do simnumber=1 to &numdatasets; x1=55; x2=675; x3=94; x4=280; y=.; output; end; run; data missingy; set missingy one; run; proc print data=missingy; title "sample dataset"; where simnumber=1; var x1-x3 y; run; proc sort data=missingy; by simnumber ; run; proc reg data=missingy; by simnumber; title "Cp output with seed=&seed"; model y=x1-x4/selection=cp; run; proc reg data=missingy outsscp=ss1 noprint; by simnumber; model y=x1-x4; output out=out1 p=p1 lclm=l1 uclm=u1 stdp=stderr1; model y=x1 x3 x4; output out=out2 p=p2 lclm=l2 uclm=u2 stdp=stderr2; model y=x1; output out=out3 p=p3 lclm=l3 uclm=u3 stdp=stderr3; model y=x1 x3; output out=out4 p=p4 lclm=l4 uclm=u4 stdp=stderr4; run; data results; merge out1 out2 out3 out4; if y=.; /*Only keep the predictions for mu(55,675,94,280) */ run; %mend; /* MEND ends the macro code */ %fibergen(10,12345); *proc print data=preds; proc print data=results; title1 "inference for mu(55,675,94,280)=0.695"; title3 "Model 1 is full model, Model 2 is x1,x3,x4 "; title4 "Model 3 is x1, Model 4 is x1,x3 (true)"; title5 "ALL SIMULATIONS"; var p1-p4 stderr1-stderr4; /* only print these variables */ sum p1-p4 stderr1-stderr4; /* also, print column sums */ run; proc means data=results mean std; var p1-p4 stderr1-stderr4; run;