options ls=80 ps=1000 nodate; /* The macro FIBERGEN is given below. Use &seed=1 to generate same data as on practice exam problem #4. Use other seeds to generate more datasets and observe the behaviour of Mallow's C(p) statistic and the estimated SE terms for inference at (x1,x2,x3,x4) = (55,675,94,280). Note that the true model is y=0.5-0.005*x1 + 0.005*x3 + 0.02*E where E are normally distributed w/ standard deviation sigma=0.02 Under the true model, the mean at (x1,x2,x3,x4) = (55,675,94,280) is mu(55,675,94,280) = 0.695 */ /* MACRO declares the beginning of the code for a macro */ %macro FIBERGEN(seed); data one; do i=1 to 20; x1=50+5*rannor(&seed); x2=600+75*rannor(&seed)+(x1-50)*10; x3=90+4*rannor(&seed); x4=300+10*rannor(&seed)+(x3-90)*2; /* multicollinearity induced by making x2 and x4 fcns of x1,x3 */ x1=round(x1,0.1); x2=round(x2,1); x3=round(x3,1); x4=round(x4,1); y=0.5-0.005*x1 + 0.005*x3 + 0.02*rannor(&seed); y=round(y,0.001); output; end; run; /* comment this PROC PRINT out after viewing first dataset */ /* proc print; var x1-x3 y; run; */ proc reg; title "Cp output with seed=&seed"; model y=x1-x4/selection=cp; run; data missingy; x1=55; x2=675; x3=94; x4=280; y=.; run; data missingy; set missingy one; run; proc reg outsscp=ss1 noprint; model y=x1-x4; output out=out1 p=p1 lclm=l1 uclm=u1 stdp=stderr1; model y=x1 x3 x4; output out=out2 p=p2 lclm=l2 uclm=u2 stdp=stderr2; model y=x1; output out=out3 p=p3 lclm=l3 uclm=u3 stdp=stderr3; model y=x1 x3; output out=out4 p=p4 lclm=l4 uclm=u4 stdp=stderr4; run; data results; merge out1 out2 out3 out4; if y=.; /*Only keep the predictions for mu(55,675,94,280) */ run; proc append base=preds data=results; run; /*Append results of each simulated dataset in each invocation of FIBERGEN. Accumulated dataset called "preds" */ %mend; /* MEND ends the macro code */ %fibergen(1); %fibergen(2); %fibergen(3); %fibergen(4); %fibergen(5); %fibergen(6); %fibergen(7); %fibergen(8); %fibergen(9); %fibergen(10); *%fibergen(11); *%fibergen(12); *%fibergen(13); *%fibergen(14); *%fibergen(15); *%fibergen(16); *%fibergen(17); *%fibergen(18); *%fibergen(19); *%fibergen(20); proc print data=preds; title1 "inference for mu(55,675,94,280)=0.695"; title3 "Model 1 is full model, Model 2 is x1,x3,x4 "; title4 "Model 3 is x1, Model 4 is x1,x3 (true)"; title5 "ALL SIMULATIONS"; var p1-p4 stderr1-stderr4; /* only print these variables */ sum p1-p4 stderr1-stderr4; /* also, print column sums */ run; proc means mean std; var p1-p4 stderr1-stderr4; run;