/* fsr_linear is a sas macro */ /* */ /* 0. Based on proc glmselect - if you don't have it, first go to */ /* http://support.sas.com:80/rnd/app/da/glmselect.html */ /* */ /* 1. Standardizes all predictors to have mean 0 and variance 1 and */ /* renames them as x1-xp, numbered as they occur in the data set. */ /* */ /* 2. Fits main effects, interactions, and squared terms according to a */ /* modified forward selection procedure that on average has False */ /* Selection Rate (FSR) = gamma (default=0.05) */ /* */ /* Developed by Hugh Crews, August 2008. */ /* */ /* Typical call (all linear and quadratic terms): */ /* */ /* %fsr_linear(dataset=diabetes,model=age|sex|bmi|bp|s1|s2|s3|s4|s5|s6 @q */ /* gamma=0.05,y=y,method=5,terms=20,include=0,cbound=2); */ /* */ /* method=1 Fast FSR with Strong Hierarchy */ /* method=2 Fast FSR with No Hierarchy */ /* method=3 Fast FSR with Weak Hierarchy */ /* method=4 Fast FSR with No Hierarchy and Iterated Adjustment */ /* method=5 Fast FSR with No Hierarchy and Sequential Adjustment */ /* method=6 Fast FSR with Weak Hierarchy and Sequential Adjustment */ /* method=7 Fast FSR with Main Effects only */ /* */ /* The default is method=1, gamma=0.05. Terms=x restricts the forward */ /* sequence to x terms. The default is the full sequence of terms or */ /* (terms=full). Include=k forces the first k terms into the model. The */ /* default is to include no terms or (include=0). Cbound=b bounds the */ /* adjustment used in Methods 5 and 6 by b*(k_T-p+1)/(p+1). The default */ /* is to enforce no limit on c. */ /* */ /* The model statement is meant to be close to general sas usage */ /* except for @q which tells the program to add squared terms to */ /* the code immediately preceding it. */ /* Examples: */ /*model=age|sex|bmi|bp|s1|s2|s3|s4|s5|s6 @q all 1st & 2nd order terms */ /*model=age--s6 @q same as above */ /*model=age--s6 only linear */ /*model=age|sex|bmi|bp|s1|s2|s3|s4|s5|s6 linear and interactions only */ /*model=age sex bmi--s6 @q plus include=2 */ /* includes age and sex, then selects from full quadratic in the others */ /*model=age sex age--s6 @q plus include=2 */ /* same as above, but now age and sex interactions and age^2 are possible */ /* redundancies like age appearing twice are no problem */ /* The macro getdata is used to get data, relabel predictors, */ /* standardize predictors, delete redundant predictors, etc. */ %macro getdata; ods listing close; data &dataset._original; set &dataset.; call symput("n",trim(left(put(_n_,best12.)))); run; data &dataset._ffsr; set &dataset.; run; data null; length model $32000. x $32000.; model="&model."; %do tab=1 %to 50; model=tranwrd(model," ",""); %end; %do tab=1 %to 100; model=tranwrd(model," "," "); %end; model=tranwrd(model,"@Q","@q"); model=tranwrd(model,"| ","|"); model=tranwrd(model," |","|"); model=tranwrd(model,"- ","-"); model=tranwrd(model," -","-"); model=tranwrd(model," "," "); model=tranwrd(model," "," "); model=tranwrd(model," @2","@2"); model=tranwrd(model," @q","@q"); x=tranwrd(model,"|"," "); x=tranwrd(x,"*"," "); x=tranwrd(x," @2",""); x=tranwrd(x,"@2",""); x=tranwrd(x," @q",""); x=tranwrd(x,"@q",""); %do hij=3 %to 19; x=tranwrd(x," @%eval(&hij.)",""); x=tranwrd(x,"@%eval(&hij.)",""); %end; parts=countc(trim(left(model))," ")+1; call symput("x",trim(left(x))); call symput("parts",trim(left(put(parts,best12.)))); call symput("fullmodel",trim(left(model))); run; data x; set &dataset._ffsr; keep &x.; run; proc contents data=x out=xcount noprint; run; proc sort data=xcount; by VARNUM; run; data null; set xcount; call symput("p",trim(left(put(_n_,best12.)))); run; %do j=1 %to &p.; data _null; set null(firstobs=&j. obs=&j.); call symput("var&j.",trim(left(name))); call symput("label&j.",trim(left(label))); run; %end; data &dataset._ffsr; set &dataset._ffsr; keep &x. &y.; label %do j=1 %to &p.; &&var&j.=&&var&j. %end; ; rename %do j=1 %to &p.; &&var&j.=x&j. %end; ; label &y.=&y.; rename &y.=y; run; data &dataset._ffsr; set &dataset._ffsr; if y = . then delete; %do j=1 %to &p.; if x&j.=. then delete; %end; run; data y; set &dataset._ffsr; keep y; run; data x; set &dataset._ffsr; keep x1-x&p.; run; proc contents data=x out=xcount noprint; run; proc sort data=xcount; by VARNUM; run; data null; set xcount; call symput("p",trim(left(put(_n_,best12.)))); run; %do j=1 %to &p.; data _null; set null(firstobs=&j. obs=&j.); call symput("var&j.",trim(left(name))); call symput("label&j.",trim(left(label))); run; %end; data &dataset._ffsr; set &dataset._ffsr; drop y; call symput('n',trim(left(put(_n_,best12.)))); run; proc corr data=&dataset._ffsr outp=abcd noprint; var x1-x&p.; run; data abcd2; set abcd; where _type_="CORR"; %do j=1 %to &p.; if x&j.>=.999999999999 and _name_ ne "x&j." then novar&j. = "x&j."; if x&j.>=.999999999999 and _name_ ne "x&j." then output; %end; run; data _null; if 0 then set abcd2 nobs=count; call symput('nn',count); stop; run; %let remove=; %let novar=; %let remnum=0; %if &nn. >0 %then %do; data abcd3; set abcd2; length delvar $100.; %do j=1 %to &p.; if x&j.>=.999999999999 and _name_ ne "x&j." then delvar="x&j."; if x&j.>=.999999999999 and _name_ ne "x&j." then label="&&label&j."; call symput("delvar",delvar); %end; %do j=1 %to &nn.; lag&j.=lag&j.(delvar); if _name_=lag&j.(delvar) then delete; %end; run; proc sort data=abcd3 nodupkey; by _name_; run; data abcd4; set abcd3; by _name_; length remove $1000. remlabel $1000.; retain remove '' remlabel ''; remove=trim(left(remove)) || ' ' || trim(left(delvar)); remlabel=trim(left(remlabel)) || ' ' || trim(left(label)); call symput('remove',trim(left(remove))); call symput('remlabel',trim(left(remlabel))); call symput('remnum',_n_); run; %put WARNING: Some explantory variable(s) have correlation of 1 with other explantory variables in your data set. These variables should be removed from the analysis due to redundancy: &remlabel..; %do j=1 %to %eval(&remnum.); data null; set abcd4(firstobs=&j. obs=&j.); call symput("delvar&j.",trim(left(label))); run; %end; %end; %let novarcount=0; data abcdstd; set abcd; where _type_="STD"; length msg $10000. msg2 $10000. novar $100. labelvar $1000.; retain count 0; retain msg '' msg2 ''; %do j=1 %to &p.; if x&j.=0 then novar = "x&j."; if x&j.=0 then labelvar = "&&label&j."; if x&j.=0 then count=count+1; if count=1 then msg=trim(left(novar)); else msg=trim(left((msg))) || ' ' || trim(left(novar)); if count=1 then msg2=trim(left(labelvar)); else msg2=trim(left((msg2)))|| ' ' || trim(left(labelvar)); if x&j.=0 then output; %end; call symput('novar',trim(left(msg))); call symput('novarcount',count); run; data abcdstd; set abcdstd; length msglabel $30000.; retain msglabel ''; msglabel=trim(left(msglabel)) || ' ' || trim(left(labelvar)); msglabel=trim(left(msglabel)); call symput('labelvar',trim(left(msglabel))); run; %if &novarcount. >0 %then %do; %put WARNING: Some explantory variable(s) have no variation. These variables should be removed from the analysis: &labelvar..; /* %do j=1 %to &novarcount.; data null; set abcdstd(firstobs=&j. obs=&j.); call symput("novar&j.",trim(left(labelvar))); run; %end; */ %end; %if %eval(&novarcount.+&nn.)>0 %then %do; data &dataset.; set &dataset._original; run; ods listing; data _null; file print notitles; put "Note: The program stopped executing because redundant terms were specified."; put "Rerun the analysis after removing all terms containing the variables:"; %if %eval(&novarcount.)>0 and %eval(&nn.)>0 %then %do; put "&remlabel. &labelvar.."; %end; %else %if %eval(&novarcount.)>0 %then %do; put "&labelvar.."; %end; %else %do; put "&remlabel.."; %end; run; ods listing close; /*Cleans up data sets */ proc datasets; delete mean mean2 includesummary:(gennum=all) summary2 stats std x y xcount part:(gennum=all) candidates:(gennum=all) newalphas:(gennum=all) abcd:(gennum=all) alphas:(gennum=all) counts amax summary final model1 fsr:(gennum=all) ghat:(gennum=all) null parameters stop q; run; quit; /*Turn log notes back on and listing back on*/ options notes; ods listing; %abort; %end; proc contents data=&dataset._ffsr out=abcdnew noprint; run; data abcdnew; set abcdnew; length predictors $3000. labels $30000.; retain predictors '' labels ''; predictors = trim(left(predictors)) || ' ' || trim(left(name)); labels = trim(left(labels)) || ' ' || trim(left(label)); call symput('p',trim(left(put(_n_,best12.)))); call symput('predictors',trim(left(predictors))); call symput('labels',trim(left(labels))); run; data _null; predictors="&predictors."; retain start end; %do j=1 %to &p.; if &j.=1 then start=1; else start=end+1; end=find(predictors,' ', start); if &j.<&p. then var&j.=trim(left(substr(predictors,start,end-start))); else var&j.=trim(left(substr(predictors,start))); output; call symput("var&j.", var&j.); %end; run; data _null; predictors="&labels."; retain start end; %do j=1 %to &p.; if &j.=1 then start=1; else start=end+1; end=find(predictors,' ', start); if &j.<&p. then var&j.=trim(left(substr(predictors,start,end-start))); else var&j.=trim(left(substr(predictors,start))); output; call symput("label&j.", var&j.); %end; run; data &dataset._ffsr; merge &dataset._ffsr; keep &predictors.; label %do j=1 %to &p.; &&var&j.=&&label&j. %end; ; rename %do j=1 %to &p.; &&var&j.=x&j. %end; ; run; data &dataset._ffsr; merge &dataset._ffsr y; run; data abcdnew; set &dataset._ffsr; drop y; run; proc contents data=abcdnew out=abcdnew2 noprint; run; data abcdnew2; set abcdnew2; call symput('p',trim(left(put(_n_,best12.)))); run; %do j=1 %to &p.; proc sort data=&dataset._ffsr out=abcdsub&j. nodupkey; by x&j.; run; data abcdsub&j.; set abcdsub&j.; call symput("levels&j.",_n_); run; data abcdlevels&j.; length label $1000.; varnum=&j.; levels=&&levels&j.; label="&&label&j."; if levels > 2 then type=0; else type=1; run; %end; data abcdlevels; set %do j=1 %to &p.; abcdlevels&j. %end; ; run; proc sort data=abcdlevels; by varnum; run; %do j=1 %to &p.; data abcdlevelsnew; set abcdlevels(firstobs=&j. obs=&j.); call symput("binary&j.",type); run; %end; /*Begin comments here for data set based sorting proc sort data=abcdlevels; by type varnum; run; data abcdlevels; set abcdlevels; retain cont 0 binary 0; if type=0 then cont=cont+1; else binary=binary+1; varnum2=_n_; call symput('binary',trim(left(put(binary,best12.)))); call symput('cont',trim(left(put(cont,best12.)))); run; %do j=1 %to &p.; data abcdlevels2; set abcdlevels(firstobs=&j. obs=&j.); call symput("varnum&j.",trim(left(put(varnum,best12.)))); call symput("varnumnew&j.",trim(left(put(varnum2,best12.)))); run; %end; data &dataset._ffsr; set &dataset._ffsr; rename %do j =1 %to &p.; x%eval(&&varnum&j.)=x%eval(&&varnumnew&j.) %end; ; run; End comments here for data set based sorting*/ data null; set &dataset._ffsr; drop y; run; proc contents data=null out=xcount noprint; run; proc sort data=xcount; by VARNUM; run; data null; set xcount; call symput('p',trim(left(put(_n_,best12.)))); run; %do j=1 %to &p.; data _null; set null(firstobs=&j. obs=&j.); call symput("var&j.",trim(left(name))); call symput("label&j.",trim(left(label))); run; %end; data null; set xcount; lablength=length(trim(left(label))); run; proc sort data=null; by descending lablength; run; %do j=1 %to &p.; data _null; set null(firstobs=&j. obs=&j.); call symput("trnvar&j.",trim(left(name))); call symput("trnlabel&j.",trim(left(label))); run; %end; data null; length model $32000.; model=trim(left("&fullmodel.")); %do j=1 %to &p.; model=tranwrd(lowcase(trim(left(model))),lowcase(trim(left("&&trnlabel&j."))),lowcase(trim(left("&&trnvar&j.")))); %end; call symput('fullmodel',trim(left(model))); run; data null; set null; checkcomp=0; %do hij=3 %to 19; modelcomp=trim(left(model)); model=tranwrd(model,"@&hij.",''); if trim(left(model)) ne trim(left(modelcomp)) then checkcomp=1; %end; model=tranwrd(model,' @2',''); model=tranwrd(model,'@2',''); model=tranwrd(model,' ',' '); call symput('checkcomp',trim(left(put(checkcomp,best12.)))); run; %if %eval(&checkcomp.)=1 %then %do; %put WARNING: Only second-order terms are allowed. All higher-order terms will be ignored.; %end; data _null; set null; retain start end; %do j=1 %to %eval(&parts.); if &j.=1 then start=1; else start=end+1; end=find(model,' ', start); if &j.<%eval(&parts.) then part&j.=trim(left(substr(model,start,end-start))); else part&j.=trim(left(substr(model,start))); output; call symput("part&j.", trim(left(part&j.))); call symput("partlabel&j.", trim(left(part&j.))); %end; run; %do j=1 %to %eval(&parts.); data part&j.; set _null (firstobs=&j. obs=&j.); count0=countc(part&j.,'@q'); count1=countc(part&j.,'|'); count2=countc(part&j.,'-'); call symput('count0',count0); call symput('count1',count1); call symput('count2',count2); run; %if %eval(&count0.)>0 %then %do; data null; b=trim(left("&&part&j.")); b=tranwrd(b,'@q',''); x=trim(left("&&part&j.")); x=tranwrd(x,'|',' '); x=tranwrd(x,'@q',''); call symput("part&j.",trim(left(b))); call symput("quadpart", trim(left(x))); run; data null; set &dataset._ffsr; keep &quadpart.; run; proc contents data=null out=xcount noprint; run; proc sort data=xcount; by VARNUM; run; data null; set xcount; call symput('quadp',trim(left(put(_n_,best12.)))); run; %do aquad=1 %to &quadp.; data null; set xcount(firstobs=&aquad. obs=&aquad.); quad=trim(left(name)); call symput("quadpart&aquad.",trim(left(quad))); run; %end; data null; length modelpart $30000.; retain modelpart ''; %do aquad=1 %to &quadp.; modelpart=trim(left(modelpart)) || ' ' || trim(left("&&quadpart&aquad.")) || " " || trim(left("&&quadpart&aquad.")) || "*" || trim(left("&&quadpart&aquad.")); %if &aquad. > 1 %then %do; %do bquad=1 %to %eval(&aquad.-1); modelpart=trim(left(modelpart)) || ' ' || trim(left("&&quadpart&bquad.")) || "*" || trim(left("&&quadpart&aquad.")); %end; %end; %end; modelpart=trim(left(modelpart)); call symput("modelpart&j.",trim(left(modelpart))); run; %end; %else %if %eval(&count1.)>0 %then %do; data null; b=trim(left("&&part&j.")); b=tranwrd(b,'@q',''); call symput("part&j.",trim(left(b))); run; data null; set _null (firstobs=&j. obs=&j.); retain start&j. 1 end&j. 1; %do m=1 %to %eval(&count1.+1); if &m.=1 then start&j.=1; else start&j.=end&j.+1; end&j.=find(part&j.,'|', start&j.); if &m.<%eval(&count1.+1) then subpart&m.=trim(left(substr(part&j.,start&j.,end&j.-start&j.))); else subpart&m.=trim(left(substr(part&j.,start&j.))); output; call symput("subpart&m.", subpart&m.); call symput("subpartlabel&m.", subpart&m.); %end; run; data null; set null(firstobs=%eval(&count1.+1) obs=%eval(&count1.+1)); length subpart $30000.; retain subpart ''; %do m=1 %to %eval(&count1.+1); subpart=trim(left(subpart)) || ' ' || trim(left(subpart&m.)); %if &m. > 1 %then %do; %do n=1 %to %eval(&m.-1); subpart=trim(left(subpart)) || ' ' || trim(left(subpart&n.)) || '*' || trim(left(subpart&m.)); %end; %end; %end; call symput("modelpart&j.",trim(left(subpart))); run; %end; %else %if %eval(&count2.)>0 %then %do; data null; b=trim(left("&&part&j.")); b=tranwrd(b,'@q',''); call symput("part&j.",trim(left(b))); run; data null; merge %do m=1 %to &p.; &dataset._ffsr(keep=x&m.) %end; ; keep &&part&j.; run; proc contents data=null out=xcount noprint; run; data null; set xcount; length modelpart $1000.; retain modelpart ''; modelpart=trim(left(modelpart)) || ' ' || trim(left(name)); call symput("modelpart&j.",trim(left(modelpart))); run; %end; %else %do; data null; b=trim(left("&&part&j.")); b=tranwrd(b,'@q',''); call symput("part&j.",trim(left(b))); run; data null; set _null (firstobs=&j. obs=&j.); call symput("modelpart&j.",trim(left(part&j.))); run; %end; %end; data null; length model $30000.; model= %do j=1 %to %eval(&parts.-1); trim(left("&&modelpart&j.")) || ' ' || %end; trim(left("&&modelpart&parts.")); q=countc(trim(left(model)),' ')+1; call symput('q',trim(left(put(q,best12.)))); call symput("fullmodel2",trim(left(model))); run; proc means data=&dataset._ffsr noprint; var x1-x&p.; output out=mean; run; data null; set &dataset._ffsr; call symput('n',trim(left(put(_n_,best12.)))); run; data mean2; merge mean(where =(_stat_="MEAN") rename=( %do j=1 %to &p.; x&j.=m&j. %end; )); do i=1 to &n.; output; end; run; data std; merge mean(where =(_stat_="STD") rename=( %do j=1 %to &p.; x&j.=s&j. %end; )); do i=1 to &n.; output; end; run; data &dataset._ffsr_std; merge &dataset._ffsr mean2 std; %do j=1 %to &p.; z&j.=(x&j. - m&j.)/(s&j.); %end; drop x1-x&p.; run; data &dataset._ffsr; set &dataset._ffsr_std; %do j=1 %to &p.; m&j.=z&j.; %end; rename %do j=1 %to &p.; m&j.=x&j. %end; ; drop i z1-z&p. s1-s&p. _type_ _freq_ _stat_; run; /* These lines of code automatically handle binary variables*/ data null; model="&fullmodel2."; model=trim(left(model)); countparts=countc(trim(left(model))," ")+1; call symput('modelparts',countparts); run; data _null; set null; retain start end; %do j=1 %to %eval(&modelparts.); if &j.=1 then start=1; else start=end+1; end=find(model,' ', start); if &j.<%eval(&modelparts.) then part&j.=trim(left(substr(model,start,end-start))); else part&j.=trim(left(substr(model,start))); output; call symput("modelpart&j.", trim(left(part&j.))); call symput("modelpartlabel&j.", trim(left(part&j.))); %end; run; data null; set _null(firstobs=&modelparts. obs=&modelparts.); %do j=1 %to %eval(&modelparts.); %do jjj=1 %to &p.; %if %eval(&&binary&jjj.)=1 %then %do; if trim(left(part&j.))="&&var&jjj."||"*"||"&&var&jjj." then part&j.=""; %end; %end; %end; run; data _null; set null; %if %eval(&modelparts.) >0 %then %do; model= %do j=1 %to %eval(&modelparts.); trim(left(part&j.))|| " " || %end; ""; %end; %else %do; model=""; %end; call symput("fullmodel2",trim(left(model))); run; /*End of binary variable code*/ ods output Candidates=candidatesq; proc glmselect data=&dataset._ffsr; model y = &fullmodel2. / hier=none selection=forward(select=sl sle=.99999 stop=2) details=steps(candidates(show=all)); run; data null; set candidatesq; q=_n_; call symput('q',trim(left(put(q,best12.)))); run; data stop; q=&q.; p=&p.; fullmodel=trim(left("&fullmodel2.")); run; %mend getdata; %macro fastfsr1; %getdata; data stop; set stop; call symput("fullmodel2",trim(left(fullmodel))); run; data null; terms=lowcase("&terms."); if trim(left(terms))="full" then terms2=0; else terms2=1; call symput('terms2',trim(left(put(terms2,best12.)))); run; %if &terms2.=0 %then %do; data null; call symput('maxmodelsize',''); run; %end; %else %do; data null; call symput('maxmodelsize',"stop=&terms."); run; %end; %if %eval(&include.)=0 %then %do; ods output selectionsummary=summary Candidates=candidates; proc glmselect data=&dataset._ffsr; model y = &fullmodel2. / hier=single selection=forward(select=sl sle=.99999 &maxmodelsize.) details=steps(candidates(show=all)); run; %end; %else %do; ods output selectionsummary=summary Candidates=candidates; proc glmselect data=&dataset._ffsr; model y = &fullmodel2. / hier=single selection=forward(include=%eval(&include.) select=sl sle=.99999 &maxmodelsize.) details=steps(candidates(show=all)); run; %end; proc means data=candidates noprint; by step; var rank; output out=counts n=n; run; data summary2; set summary; where nparmsin>1; length model $30000.; retain model ''; model = trim(left(model)) || ' ' || trim(left(effectentered)); model = trim(left(model)); run; proc sort data=summary2; by step; run; proc sort data=counts; by step; run; data summary2; merge summary2(in=in1) counts; by step; if in1; run; data alphas; set summary2; where step>0; retain maxpval; if probf>lag(probf) and probf>maxpval then maxpval=probf; else maxpval=maxpval; run; data alphas2; set alphas; check1=lag(maxpval); if step=1 then seqflag=1; else if maxpval>check1 then seqflag=1; else seqflag=0; run; proc sort data=alphas2; by descending nparmsin; run; data alphas3; set alphas2; alpha=maxpval; check2=lag(seqflag); if check2=. then choose=1; else if check2=1 then choose=1; else choose=0; run; proc sort data=alphas3; by nparmsin; where choose=1; run; data ghat; set alphas3; ghat=(((n-1)*alpha))/(nparmsin-%eval(&include.)); gamma=input(&gamma.,best12.); if ghat<=gamma then flag2=1; else flag2=0; lagalpha=lag(alpha); if lagalpha=. then lagalpha=0; run; proc sort data=ghat out=ghat2; by descending alpha; run; data ghat2; set ghat2; futn=lag(n); if futn=. then futn=0; futalpha=lag(alpha); if futalpha=. then futalpha=1; run; proc sort data=ghat2; by alpha; run; proc sort data=ghat; by alpha; run; data ghatnew; merge ghat ghat2; by alpha; if n-1=0 then alphahat=1; else if futn=0 then alphahat=alpha; else alphahat=gamma*(nparmsin-%eval(&include.))/(futn); if alphahat>=futalpha then alphahat=alpha; if alphahat<=alpha then alphahat=alpha; if alphahat>1 then alphahat=1; run; proc sort data= ghatnew out=amax; by ghat; run; data amax; set amax; call symput('amax',left(put(nparmsin,best12.))); run; proc sort data=ghatnew out=ghat2; where flag2=1 and nparmsin <=&amax.; by flag2 alpha; run; data fsr; set ghat2; by flag2 alpha; if last.flag2 then output; run; data final; set fsr nobs=count; call symput('alphahat', alphahat); call symput('size',nparmsin); call symput('model',trim(left(model))); run; data _null_; if 0 then set final nobs=count; call symput('count',left(put(count,4.))); stop; run; ods output variables=&dataset._ffsr_labels; proc contents data=&dataset._ffsr; run; ods listing; proc print data=&dataset._ffsr_labels label; var variable label; label variable="Variable name" label="Original name"; title 'Variable names'; run; data null; set summary; where step=0; keep effectentered nparmsin; run; data newalphas; set null; probf=0; maxpval=0; run; data newalphas2; set newalphas alphas; size=nparmsin; %if %eval(&include.)=0 %then %do; step=size-1; %end; %else %do; if size-1<=%eval(&include.) then step=0; else step=size-1-%eval(&include.); %end; run; proc print data=newalphas2 label; var step size effectentered probf maxpval; label step="Selection Step" size="1+S" effectentered="Effect Entered" probf="p-to-enter" maxpval="monotonized p-to-enter"; title 'Forward Sequence'; run; data null; alpha=0; alphahat=0; ghat=0; nparmsin=1+%eval(&include.); run; data gammafast; set null ghatnew; size=nparmsin; run; proc print data=gammafast label; title 'Sequence of Possible Models'; label size="1+S" ghat="gammahat" ; var alpha ghat size; run; %if &count.=0 %then %do; data final; alphahat=0; ghat=0; size=1+%eval(&include.); run; data new; set summary; where nparmsin>1 and step=0; length model $30000.; retain model ''; model = trim(left(model)) || ' ' || trim(left(effectentered)); model = trim(left(model)); run; data null; set new; call symput('model',trim(left(model))); run; proc print data=final label; var alphahat size; title 'Fast FSR estimates'; label size="1+S" alphahat="alpha estimate"; run; title; %if %eval(&include.)>0 %then %do; proc glm data=&dataset._ffsr; model y= &model. / solution; output out=preds p=pred; run; %end; %else %do; proc glm data=&dataset._ffsr; model y= / solution; output out=preds p=pred; run; %end; %end; %else %do; data final; set gammafast; where size=&size.; run; proc print data=final label; var alphahat size; title 'Fast FSR estimates'; label size="1+S" alphahat="alpha estimate"; run; title; ods output ParameterEstimates=parameters; proc glm data=&dataset._ffsr; model y = &model. / solution; output out=preds p=pred; run; %end; /*Cleans up data sets */ ods listing close; proc datasets; delete mean mean2 includesummary:(gennum=all) summary2 stats std x y xcount part:(gennum=all) abcd:(gennum=all) newalphas:(gennum=all) alphas:(gennum=all) amax summary final fsr:(gennum=all) candidates counts ghat:(gennum=all) null parameters; run; quit; data &dataset.; set &dataset._original; run; %mend fastfsr1; %macro fastfsr2; %getdata; data stop; set stop; call symput("fullmodel2",trim(left(fullmodel))); call symput('q',trim(left(put(q,best12.)))); call symput('p',trim(left(put(p,best12.)))); run; data null; terms=lowcase("&terms."); if trim(left(terms))="full" then terms2=0; else terms2=1; call symput('terms2',trim(left(put(terms2,best12.)))); run; %if &terms2.=0 %then %do; data null; call symput('maxmodelsize',''); run; %end; %else %do; data null; call symput('maxmodelsize',"stop=&terms."); run; %end; %if %eval(&include.)=0 %then %do; ods output selectionsummary=summary; proc glmselect data=&dataset._ffsr; model y = &fullmodel2. / hier=none selection=forward(select=sl sle=.99999 &maxmodelsize.); run; %end; %else %do; ods output selectionsummary=summary; proc glmselect data=&dataset._ffsr; model y = &fullmodel2. / hier=none selection=forward(include=%eval(&include.) select=sl sle=.99999 &maxmodelsize.); run; %end; data summary2; set summary; where nparmsin>1; length model $30000.; retain model ''; model = trim(left(model)) || ' ' || trim(left(effectentered)); model = trim(left(model)); run; data alphas; set summary2; where step>0; retain maxpval; if probf>lag(probf) and probf>maxpval then maxpval=probf; else maxpval=maxpval; run; data alphas2; set alphas; check1=lag(maxpval); if step=1 then seqflag=1; else if maxpval>check1 then seqflag=1; else seqflag=0; run; proc sort data=alphas2; by descending nparmsin; run; data alphas3; set alphas2; alpha=maxpval; check2=lag(seqflag); if check2=. then choose=1; else if check2=1 then choose=1; else choose=0; run; proc sort data=alphas3; by nparmsin; where choose=1; run; data ghat; set alphas3; q=%eval(&q.); p=&p.; ghat=(((q-(nparmsin-1))*alpha))/(nparmsin-%eval(&include.)); gamma=input(&gamma.,best12.); if (q-(nparmsin-1))=0 then alphahat=1; else alphahat=gamma*(nparmsin-%eval(&include.))/(q-(nparmsin-1)); if ghat<=gamma then flag2=1; else flag2=0; lagalpha=lag(alpha); if lagalpha=. then lagalpha=0; run; proc sort data=ghat out=ghat2; by descending alpha; run; data ghat2; set ghat2; futalpha=lag(alpha); if futalpha=. then futalpha=1; run; proc sort data=ghat2; by alpha; run; proc sort data=ghat; by alpha; run; data ghatnew; merge ghat ghat2; by alpha; if alphahat>=futalpha then alphahat=alpha; if alphahat<=alpha then alphahat=alpha; if alphahat>1 then alphahat=1; run; proc sort data= ghatnew out=amax; by ghat; run; data amax; set amax; call symput('amax',left(put(nparmsin,best12.))); run; proc sort data=ghatnew out=ghat2; where flag2=1 and nparmsin <=&amax.; by flag2 alpha; run; data fsr; set ghat2; by flag2 alpha; if last.flag2 then output; run; data final; set fsr nobs=count; call symput('alphahat', alphahat); call symput('size',nparmsin); call symput('model',trim(left(model))); run; data _null_; if 0 then set final nobs=count; call symput('count',left(put(count,4.))); stop; run; ods output variables=&dataset._ffsr_labels; proc contents data=&dataset._ffsr; run; ods listing; proc print data=&dataset._ffsr_labels label; var variable label; label variable="Variable name" label="Original name"; title 'Variable names'; run; data null; set summary; where step=0; keep effectentered nparmsin; run; data newalphas; set null; probf=0; maxpval=0; run; data newalphas2; set newalphas alphas; size=nparmsin; %if %eval(&include.)=0 %then %do; step=size-1; %end; %else %do; if size-1<=%eval(&include.) then step=0; else step=size-1-%eval(&include.); %end; run; proc print data=newalphas2 label; var step size effectentered probf maxpval; label step="Selection Step" size="1+S" effectentered="Effect Entered" probf="p-to-enter" maxpval="monotonized p-to-enter"; title 'Forward Sequence'; run; data null; alpha=0; alphahat=0; ghat=0; nparmsin=1+%eval(&include.); run; data gammafast; set null ghatnew; size=nparmsin; run; proc print data=gammafast label; title 'Sequence of Possible Models'; label size="1+S" ghat="gammahat" ; var alpha ghat size; run; %if &count.=0 %then %do; data final; alphahat=0; ghat=0; size=1+%eval(&include.); run; data new; set summary; where nparmsin>1 and step=0; length model $30000.; retain model ''; model = trim(left(model)) || ' ' || trim(left(effectentered)); model = trim(left(model)); run; data null; set new; call symput('model',trim(left(model))); run; proc print data=final label; var alphahat size; title 'Fast FSR estimates'; label size="1+S" alphahat="alpha estimate"; run; title; %if %eval(&include.)>0 %then %do; proc glm data=&dataset._ffsr; model y= &model. / solution; output out=preds p=pred; run; %end; %else %do; proc glm data=&dataset._ffsr; model y= / solution; output out=preds p=pred; run; %end; %end; %else %do; data final; set gammafast; where size=&size.; run; proc print data=final label; var alphahat size; title 'Fast FSR estimates'; label size="1+S" alphahat="Estimate for alpha"; run; title; ods output ParameterEstimates=parameters; proc glm data=&dataset._ffsr; model y = &model. / solution; output out=preds p=pred; run; %end; /*Cleans up data sets*/ ods listing close; proc datasets; delete mean mean2 includesummary:(gennum=all) summary2 stats std x y xcount part:(gennum=all) abcd:(gennum=all) alphas:(gennum=all) newalphas:(gennum=all) amax summary final fsr:(gennum=all) ghat:(gennum=all) null parameters; run; quit; data &dataset.; set &dataset._original; run; %mend fastfsr2; %macro fastfsr3; %getdata; data null; terms=lowcase("&terms."); if trim(left(terms))="full" then terms2=0; else terms2=1; call symput('terms2',trim(left(put(terms2,best12.)))); run; proc means data=&dataset._ffsr noprint; var y; output out=stats n=n; run; data _null_; set stats; call symput('n',n); run; data stop; set stop; stopselect=min(&n.-1,q+1 %if &terms2.=1 %then %do; ,&terms. %end; ); call symput("fullmodel2",trim(left(fullmodel))); call symput('q',trim(left(put(q,best12.)))); call symput('p',trim(left(put(p,best12.)))); call symput('stopselect',stopselect); call symput('perfect',0); call symput('stophere',stopselect); run; %macro new3; %if &size.>2 %then %do; data candidates; call symput('candidates',trim(left("&linear.")) || ' ' || trim(left("&choice."))); run; %end; %if &size.=2 %then %do; ods output Candidates=summary1_&size. SelectionSummary=step1_&size.; proc glmselect data=&dataset._ffsr; model y= &fullmodel2. / hierarchy=single selection=forward( stop=%sysevalf(&size.) select=sl sle=.99999) details=steps(candidates(show=all)); run; %end; %else %do; ods output Candidates=summary1_&size. SelectionSummary=step1_&size.; proc glmselect data=&dataset._ffsr; model y= &model. &candidates. / hierarchy=none selection=forward(include=%sysevalf(&size.-2) stop=%sysevalf(&size.) select=sl sle=.99999) details=steps(candidates(show=all)); run; %end; /*Adding in check for perfect fit*/ %if %sysfunc(exist(summary1_&size.))=0 %then %do; %put WARNING: Ignore the previous WARNING. A perfect fit was detected so the full forward sequence is unable to be obtained. Fast FSR will continue using the partial sequence.; data null; call symput('perfect',1); run; data null; call symput('stophere',&size.-1); run; %end; %else %do; %if %eval(&size.)=2 and %eval(&include.)=0 %then %do; data null; length linear $10000.; set summary1_&size.; retain linear ''; linear=trim(left(linear)) || ' ' || trim(left(effect)); linear=trim(left(linear)); call symput("linear",trim(left(linear))); run; %end; data summary1_&size.; set summary1_&size.; size=&size.; run; proc transpose data=step1_&size. out=cand; var EffectEntered; run; data cand2; set cand; length model $32767.; %if &size.=2 %then %do; model=trim(left(col2)); %end; %else %if &size.=3 %then %do; model=trim(left(col2)) || ' ' || trim(left(col3)); %end; %else %do; model= %do col=2 %to &size.-1; trim(left(col&col.)) || " " || %end; trim(left(col&size.)); %end; model=compress(model,'.'); call symput('model',trim(left(model))); size=input(trim(left(&size.)),best12.); run; data effectsin; %do it=1 %to %eval(&intcandidates.); set cand2; output; %end; run; data effects2; merge effects effectsin; %if &size.=2 %then %do; if trim(left(variable1))=trim(left(col2)) or trim(left(variable2))=trim(left(col2)) then output; %end; %else %do; if %do kk=2 %to &size.-1; trim(left(variable1))=trim(left(col&kk.)) or trim(left(variable2))=trim(left(col&kk.)) or %end; trim(left(variable1))=trim(left(col&size.)) or trim(left(variable2))=trim(left(col&size.)) then output; %end; run; data effects3; set effects2; length choice $32767.; retain choice; choice=trim(left(trim(left(choice)) || ' ' || trim(left(variable)))); call symput('choice',trim(left(choice))); run; data model1; length effect $100. effectentered $1000.; set model1 cand2; call symput('size',trim(left(put(size+1,best12.)))); run; %end; %mend new3; %macro dynamic3; %let choice=; %let model=; %if %eval(&include.)>0 %then %do; ods output SelectionSummary=includesummary; proc glmselect data=&dataset._ffsr; model y= &fullmodel2. / selection=forward(select=sl include=%eval(&include.) stop=%eval(&include.+1)); run; data includesummary; set includesummary; length includein $32767.; retain includein ''; if trim(left(effectentered))="Intercept" then effectentered=''; includein=trim(left(trim(left(includein)) || ' ' || trim(left(effectentered)))); call symput('includein',trim(left(includein))); call symput('model',trim(left(includein))); run; data model1; set includesummary; model=trim(left(includein)); effect=trim(left(effectentered)); if trim(left(effect))='' then effect="Intercept"; size=nparmsin; call symput('size',trim(left(put(size+1,best12.)))); run; ods output Candidates=linear; proc glmselect data=&dataset._ffsr; model y= &fullmodel2. / hier=single selection=forward(select=sl sle=.99999999 stop=2) details=steps(candidates(show=all)); run; data null; length linear $10000.; set linear; retain linear ''; linear=trim(left(linear)) || ' ' || trim(left(effect)); linear=trim(left(linear)); call symput("linear",trim(left(linear))); run; proc transpose data=includesummary out=cand; var EffectEntered; run; data cand2; set cand; length model $32767.; %if &size.=2 %then %do; model=trim(left(col2)); %end; %else %if &size.=3 %then %do; model=trim(left(col2)) || ' ' || trim(left(col3)); %end; %else %do; model= %do col=2 %to &size.-1; trim(left(col&col.)) || " " || %end; trim(left(col&size.)); %end; model=compress(model,'.'); call symput('model',trim(left(model))); size=input(trim(left(&size.)),best12.); run; data effectsin; %do it=1 %to %eval(&intcandidates.); set cand2; output; %end; run; data effects2; merge effects effectsin; %if &size.=2 %then %do; if trim(left(variable1))=trim(left(col2)) or trim(left(variable2))=trim(left(col2)) then output; %end; %else %do; if %do kk=2 %to &size.-1; trim(left(variable1))=trim(left(col&kk.)) or trim(left(variable2))=trim(left(col&kk.)) or %end; trim(left(variable1))=trim(left(col&size.)) or trim(left(variable2))=trim(left(col&size.)) then output; %end; run; data effects3; set effects2; length choice $32767.; retain choice; choice=trim(left(trim(left(choice)) || ' ' || trim(left(variable)))); call symput('choice',trim(left(choice))); run; %end; %else %do; data model1; length model $32767. effect $100.; model=''; size=1; effect="Intercept"; effectentered="Intercept"; call symput('size',trim(left(put(size+1,best12.)))); run; %end; %do %until(&size.=&stopselect.+1 or &perfect.=1); %new3; %end; %if &perfect.=0 %then %do; data step1; set step1_%eval(&stopselect.); size=nparmsin; effect=effectentered; run; data summary1; length effect $100.; set %do size=%eval(&include.+2) %to %eval(&stopselect.); summary1_&size. %end; ; run; %end; %else %do; data step1; set step1_%eval(&stophere.); size=nparmsin; effect=effectentered; run; data summary1; length effect $100.; set %do size=%eval(&include.+2) %to %eval(&stophere.); summary1_&size. %end; ; run; %end; %mend dynamic3; %if %eval(&include.)=0 %then %do; ods output Candidates=candidatesnew; proc glmselect data=&dataset._ffsr; model y= &fullmodel2. / hierarchy=none selection=forward(stop=2 select=sl sle=.99999) details=steps(candidates(show=all)); run; %end; %else %do; ods output Candidates=candidatesnew; proc glmselect data=&dataset._ffsr; model y= &fullmodel2. / hierarchy=none selection=forward(include=%eval(&include.) stop=%eval(&include.+2) select=sl sle=.99999) details=steps(candidates(show=all)); run; %end; data candidatesnew; set candidatesnew; variable=trim(left(effect)); if trim(left(compress(effect,'x1234567890')))='*' then comp=1; else comp=0; end=findc(trim(left(variable)),'*'); if comp=1 then output; run; data effects; set candidatesnew; length variable1 $1000. variable2 $1000.; variable1=substr(trim(left(variable)),1,end-1); variable2=substr(trim(left(variable)),end+1); run; data null; set effects; call symput("intcandidates",trim(left(put(_n_,best12.)))); run; ods listing close; %dynamic3; proc means data=summary1 noprint; by size; var rank; output out=counts n=n; run; data summary; set step1; where size>1; retain main 0 int 0; if compress(effect,'x1234567890')='' then main=main+1; if compress(effect,'x1234567890')='*' then int=int+1; nparmsin=size; run; proc sort data=summary; by size; run; proc sort data=counts; by size; run; data summary; merge summary(in=in1) counts; by size; if in1; run; data alphas; set summary; where size>%eval(1+&include.); retain maxpval; if probf>lag(probf) and probf>maxpval then maxpval=probf; else maxpval=maxpval; run; data alphas2; set alphas; check1=lag(maxpval); if size=2 then seqflag=1; else if maxpval>check1 then seqflag=1; else seqflag=0; run; proc sort data=alphas2; by descending nparmsin; run; data alphas3; set alphas2; alpha=maxpval; check2=lag(seqflag); if check2=. then choose=1; else if check2=1 then choose=1; else choose=0; run; proc sort data=alphas3; by nparmsin; where choose=1; run; data ghat; set alphas3; ghat=(((n-1)*alpha))/(nparmsin-%eval(&include.)); gamma=input(&gamma.,best12.); if ghat<=gamma then flag2=1; else flag2=0; lagalpha=lag(alpha); if lagalpha=. then lagalpha=0; run; proc sort data=ghat out=ghat2; by descending alpha; run; data ghat2; set ghat2; futn=lag(n); if futn=. then futn=0; futalpha=lag(alpha); if futalpha=. then futalpha=1; run; proc sort data=ghat2; by alpha; run; proc sort data=ghat; by alpha; run; data ghatnew; merge ghat ghat2; by alpha; if n-1=0 then alphahat=1; else if futn=0 then alphahat=alpha; else alphahat=gamma*(nparmsin-%eval(&include.))/(futn); if alphahat>=futalpha then alphahat=alpha; if alphahat<=alpha then alphahat=alpha; if alphahat>1 then alphahat=1; run; proc sort data= ghatnew out=amax; by ghat; run; data amax; set amax; call symput('amax',left(put(nparmsin,best12.))); run; proc sort data=ghatnew out=ghat2; where flag2=1 and nparmsin <=&amax.; by flag2 alpha; run; data fsr; set ghat2; by flag2 alpha; if last.flag2 then output; run; data final; set fsr nobs=count; call symput('alphahat', alphahat); run; data _null_; if 0 then set final nobs=count; call symput('count',left(put(count,4.))); stop; run; data _null_; set final; call symput('modelsize',left(put(size,4.))); run; ods output variables=&dataset._ffsr_labels; proc contents data=&dataset._ffsr; run; ods listing; proc print data=&dataset._ffsr_labels label; var variable label; label variable="Variable name" label="Original name"; title 'Variable names'; run; data null; effect="Intercept"; size=1; run; data null; set null summary; where size<=%eval(1+&include.); keep effect size; run; data newalphas; set null; probf=0; maxpval=0; run; data newalphas2; set newalphas alphas; %if %eval(&include.)=0 %then %do; step=size-1; %end; %else %do; if size-1<=%eval(&include.) then step=0; else step=size-1-%eval(&include.); %end; run; proc print data=newalphas2 label; var step size effect probf maxpval; label step="Selection Step" size="1+S" effect="Effect Entered" probf="p-to-enter" maxpval="monotonized p-to-enter"; title 'Forward Sequence'; run; data null; alpha=0; alphahat=0; ghat=0; nparmsin=1+%eval(&include.); run; data gammafast; set null ghatnew; size=nparmsin; run; proc print data=gammafast label; title 'Sequence of Possible Models'; label size="1+S" ghat="gammahat" ; var alpha ghat size; run; %if &count.=0 %then %do; data final; alphahat=0; ghat=0; size=1+%eval(&include.); run; proc print data=final label; var alphahat size; title 'Fast FSR estimates'; label size="1+S" alphahat="alpha estimate"; run; title; %if %eval(&include.)>0 %then %do; data includesummary2; set includesummary; call symput('model',trim(left(includein))); run; proc glm data=&dataset._ffsr; model y= &model. / solution; output out=preds p=pred; run; %end; %else %do; proc glm data=&dataset._ffsr; model y= / solution; output out=preds p=pred; run; %end; %end; %else %do; data final; set gammafast; where size=&modelsize.; run; proc print data=final label; var alphahat size; title 'Fast FSR estimates'; label size="1+S" alphahat="alpha estimate"; run; title; data summaryb; length model $32767.; set model1; where size=&modelsize.; call symput('model',trim(left(model))); run; ods output ParameterEstimates=parameters; proc glm data=&dataset._ffsr; model y = &model. / solution; output out=preds p=pred; run; %end; /*Cleans up data sets */ ods listing close; proc datasets; delete mean mean2 includesummary:(gennum=all) summary2 stats std x y xcount part:(gennum=all) linear abcd:(gennum=all) newalphas:(gennum=all) cand:(gennum=all) alphas:(gennum=all) counts step:(gennum=all) othercounts effects:(gennum=all) amax summary:(gennum=all) final model1 fsr:(gennum=all) ghat:(gennum=all) null parameters stop q; run; quit; data &dataset.; set &dataset._original; run; %mend fastfsr3; %macro fastfsr4; %getdata; data null; terms=lowcase("&terms."); if trim(left(terms))="full" then terms2=0; else terms2=1; call symput('terms2',trim(left(put(terms2,best12.)))); run; proc means data=&dataset._ffsr noprint; var y; output out=stats n=n; run; data _null_; set stats; call symput('n',n); run; data stop; set stop; stopselect=min(&n.-1,q+1 %if &terms2.=1 %then %do; ,&terms. %end; ); call symput("fullmodel2",trim(left(fullmodel))); call symput('q',trim(left(put(q,best12.)))); call symput('p',trim(left(put(p,best12.)))); call symput('stopselect',stopselect); call symput('perfect',0); call symput('stophere',stopselect); run; %if %eval(&include.)>0 %then %do; ods output SelectionSummary=includesummary; proc glmselect data=&dataset._ffsr; model y= &fullmodel2. / selection=forward(select=sl include=%eval(&include.) stop=%eval(&include.+1)); run; data includesummary; set includesummary; retain includeint includemain 0; comp=trim(left(compress(effectentered,'x1234567890'))); if comp="Intercept" then includemain=0; else if comp='*' then includeint=includeint+1; else includemain=includemain+1; call symput('includeint',trim(left(put(includeint,best12.)))); call symput('includemain',trim(left(put(includemain,best12.)))); run; data null; q=%eval(&q.- &include.); p=%eval(&p.- &includemain.); call symput('q',trim(left(put(q,best12.)))); call symput('p',trim(left(put(p,best12.)))); run; %end; %macro new4; %if &size.=2 %then %do; ods output Candidates=candidates&size.; proc glmselect data=&dataset._ffsr; model y = &fullmodel2. / hierarchy=none selection=forward( stop=%sysevalf(&size.) select=sl sle=.99999) details=steps(candidates(show=all)); run; %end; %else %do; ods output Candidates=candidates&size.; proc glmselect data=&dataset._ffsr; model y = &model. &fullmodel2. / hierarchy=none selection=forward(include=%sysevalf(&size.-2) stop=%sysevalf(&size.) select=sl sle=.99999) details=steps(candidates(show=all)); run; %end; /*Adding in check for perfect fit*/ %if %sysfunc(exist(candidates&size.))=0 %then %do; %put WARNING: Ignore the previous WARNING. A perfect fit was detected so the full forward sequence is unable to be obtained. Fast FSR will continue using the partial sequence.; data null; call symput('perfect',1); run; data null; call symput('stophere',&size.-1); run; %end; %else %do; data candidates&size.; set candidates&size.; retain count1 count2 0; comp=trim(left(compress(effect,'x1234567890'))); if comp='*' then count1=count1+1; else count2=count2+1; size=input(&size.,best12.); run; data null; set candidates&size.; call symput('main',count2); call symput('int', count1); run; data candidates&size.; set candidates&size.; nint=input(&int.,best12.); nmain=input(&main.,best12.); if comp='*' then pval=probf*&c.; else pval=probf; run; proc sort data=candidates&size.; by step pval; run; data model1; length model $32767.; set model1 candidates&size.(firstobs=1 obs=1); if size>1 then model=trim(left(trim(left(lag(model))) || ' ' || trim(left(effect)))); call symput('model',trim(left(model))); call symput('size',trim(left(put(size+1,best12.)))); run; %end; %mend new4; %macro adjusted4; %let model=; %if %eval(&include.)>0 %then %do; data includesummary2; set includesummary; length includein $32767.; retain includein ''; if trim(left(effectentered))="Intercept" then effectentered=''; includein=trim(left(includein)) || ' ' || trim(left(effectentered)); includein=trim(left(includein)); call symput('includein',trim(left(includein))); call symput('model',trim(left(includein))); run; data model1; set includesummary2; model=trim(left(includein)); effect=trim(left(effectentered)); if trim(left(effect))='' then effect="Intercept"; size=nparmsin; call symput('size',trim(left(put(size+1,best12.)))); run; %end; %else %do; data model1; length model $32767.; model=''; size=1; call symput('size',trim(left(put(size+1,best12.)))); run; %end; %do %until(&size.=&stopselect.+1 or &perfect.=1); %new4; %end; %mend adjusted4; %macro adjusted24; %if &size.=2 %then %do; ods output Candidates=candidates&size.; proc glmselect data=&dataset._ffsr; model y = &fullmodel2. / hierarchy=none selection=forward( stop=2 select=sl sle=.99999) details=steps(candidates(show=all)); run; %end; %else %do; ods output Candidates=candidates&size.; proc glmselect data=&dataset._ffsr; model y = &model. &fullmodel2. / hierarchy=none selection=forward(include=%sysevalf(&size.-2) stop=%sysevalf(&size.) select=sl sle=.99999) details=steps(candidates(show=all)); run; %end; /*Adding in check for perfect fit*/ %if %sysfunc(exist(candidates&size.))=0 %then %do; %put WARNING: Ignore the previous WARNING. A perfect fit was detected so the full forward sequence is unable to be obtained. Fast FSR will continue using the partial sequence.; data null; call symput('perfect2',1); run; data null; call symput('stophere2',&size.-1); run; data final; set gammafast; where trim(left(model))=trim(left("&model.")); run; ods listing; proc print data=final label; var alphahat c size; title 'Fast FSR estimates'; label size="1+S" alphahat="alpha estimate"; run; title; ods output ParameterEstimates=parameters; proc glm data=&dataset._ffsr; model y = &model. / solution; output out=preds p=pred; run; %end; %else %do; data candidates&size.; set candidates&size.; retain count1 count2 0; comp=trim(left(compress(effect,'x1234567890'))); if comp='*' then count1=count1+1; else count2=count2+1; size=input(&size.,best12.); run; data null; set candidates&size.; call symput('main',count2); call symput('int', count1); run; data candidates&size.; set candidates&size.; nint=input(&int.,best12.); nmain=input(&main.,best12.); if comp='*' then pval=probf*&c.; else pval=probf; run; proc sort data=candidates&size.; by step pval; run; data null; set candidates&size.(firstobs=1 obs=1); if pval< &alpha. then call symput('stop',0); else call symput('stop',1); run; %if &stop.=0 %then %do; data model1; length model $32767.; set model1 candidates&size.(firstobs=1 obs=1); if size>1 then model=trim(left(trim(left(lag(model))) || ' ' || trim(left(effect)))); call symput('model',trim(left(model))); run; ods output ParameterEstimates=parameters; proc glm data=&dataset._ffsr; model y = &model. / solution; output out=preds p=pred; run; %end; %else %do; data final; set gammafast; where trim(left(model))=trim(left("&model.")); run; ods listing; proc print data=final label; var alphahat c size; title 'Fast FSR estimates'; label size="1+S" alphahat="alpha estimate"; run; title; ods output ParameterEstimates=parameters; proc glm data=&dataset._ffsr; model y = &model. / solution; output out=preds p=pred; run; %end; %end; %mend adjusted24; %macro step4; %let perfect=0; %adjusted4; data summary; set model1; where size>1; retain main 0 int 0; if compress(effect,'x1234567890')='' then main=main+1; if compress(effect,'x1234567890')='*' then int=int+1; nparmsin=size; run; proc sort data=summary; by size; run; data alphas; set summary; where size>%eval(1+&include.); retain maxpval; if pval>lag(pval) and pval>maxpval then maxpval=pval; else maxpval=maxpval; run; data alphas2; set alphas; check1=lag(maxpval); if size=2 then seqflag=1; else if maxpval>check1 then seqflag=1; else seqflag=0; run; proc sort data=alphas2; by descending nparmsin; run; data alphas3; set alphas2; alpha=maxpval; check2=lag(seqflag); if check2=. then choose=1; else if check2=1 then choose=1; else choose=0; run; proc sort data=alphas3; by nparmsin; where choose=1; run; data ghat; set alphas3; cstar=&c.; p=&p.; q=&q.; ghat_m=((&p.-(main-&includemain.))*alpha)/(size-&include.); ghat_q=((&q.-&p.-(int-&includeint.))*alpha/cstar)/(size-&include.); ghat=ghat_m+ghat_q; gamma=input(&gamma.,best12.); if &p.-(main-&includemain.)=0 and &q.-&p.-(int-&includeint.)=0 then alphahat=alpha; else alphahat=(gamma)*(size-&include.)/((&p.-(main-&includemain.))+((&q.-&p.-(int-&includeint.))/cstar)); c2=(&q.-&p.-(int-&includeint.)+1)/(&p.-(main-&includemain.)+1); ghat_q2=((&q.-&p.-(int-&includeint.))*alpha/c2)/(size-&include.); if ghat<= gamma then flag2=1; else flag2=0; lagalpha=lag(alpha); if lagalpha=. then lagalpha=0; run; proc sort data=ghat out=ghat2; by descending alpha; run; data ghat2; set ghat2; futalpha=lag(alpha); if futalpha=. then futalpha=alpha; run; proc sort data=ghat2; by alpha; run; proc sort data=ghat; by alpha; run; data ghatnew; merge ghat ghat2; by alpha; if alphahat>=futalpha then alphahat=alpha; if alphahat<=alpha then alphahat=alpha; run; proc sort data= ghatnew out=amax; by ghat; run; data amax; set amax; call symput('amax',left(put(size,best12.))); run; proc sort data=ghatnew out=ghat2; where flag2=1 and size <=&amax.; by flag2 alpha; run; data ghat3; set ghat2; by flag2 alpha; flag3=1; if last.flag2 then output; run; proc sort data=ghat3; by flag2 alpha c2; run; proc sort data=ghat3 out=ghat4; where flag3=1; by flag3 c2; run; data fsr; set ghat4; by flag3 c2; if last.flag3 then output; run; data final; set fsr nobs=count; alphastar=&alpha.; val=abs(cstar-c2); if val>.01 then dif=0; else dif=1; zz=&zz.+1; call symput('dif',trim(left(put(dif,best12.)))); call symput('val',val); call symput('zz',trim(left(put(zz,best12.)))); call symput('cstar',trim(left(put(cstar,best12.)))); call symput('alpha', alphahat); call symput('c',c2); c=c2; run; data _null_; if 0 then set final nobs=count; call symput('count',left(put(count,4.))); stop; run; %if &count.=0 %then %do; data null; dif=1; call symput('dif',dif); run; %end; %mend step4; data null; c=(&q.-&p.+1)/(&p.+1); call symput('c',c); call symput('alpha',0); run; data null; dif=0; count=1; zz=1; call symput('zz',trim(left(put(zz,best12.)))); call symput('dif',trim(left(put(dif,best12.)))); call symput('count',count); run; %do %until(&dif.=1 or &zz.=4); %step4; %end; ods output variables=&dataset._ffsr_labels; proc contents data=&dataset._ffsr; run; ods listing; proc print data=&dataset._ffsr_labels label; var variable label; label variable="Variable name" label="Original name"; title 'Variable names'; run; data null; size=1; effect="Intercept"; run; data null; set null summary; where size<=%eval(1+&include.); keep effect size; run; data newalphas; set null; probf=0; pval=0; maxpval=0; run; data newalphas2; set newalphas alphas; %if %eval(&include.)=0 %then %do; step=size-1; %end; %else %do; if size-1<=%eval(&include.) then step=0; else step=size-1-%eval(&include.); %end; run; proc print data=newalphas2 label; var step size effect probf pval maxpval; label step="Selection Step" size="1+S" effect="Effect Entered" probf="p-to-enter" pval="adjusted p-to-enter" maxpval="monotonized adjusted p-to-enter"; title 'Forward Sequence'; run; data null; alpha=0; alphahat=0; ghat=0; size=1+%eval(&include.); c2=(&q.-&p.+1)/(&p.+1); run; data gammafast; set null ghatnew; c=c2; run; proc print data=gammafast label; title 'Sequence of Possible Models'; label size="1+S" ghat="gammahat" ; var alpha ghat size; run; ods listing close; %if &count.=0 %then %do; ods listing; data final; alphahat=0; c=(&q.-&p.+1)/(&p.+1); ghat=0; size=1+%eval(&include.); run; proc print data=final label; var alphahat c size; title 'Fast FSR estimates'; label size="1+S" alphahat="alpha estimate"; run; title; %if %eval(&include.)>0 %then %do; data includesummary2; set includesummary2; call symput('model',trim(left(includein))); run; proc glm data=&dataset._ffsr; model y= &model. / solution; output out=preds p=pred; run; %end; %else %do; proc glm data=&dataset._ffsr; model y= / solution; output out=preds p=pred; run; %end; %end; %else %do; %let stop=0; %let model=; %if %eval(&include.)>0 %then %do; data model1; set includesummary2; model=trim(left(includein)); effect=trim(left(effectentered)); if trim(left(effect))='' then effect="Intercept"; size=nparmsin; call symput('size',trim(left(put(size+1,best12.)))); call symput('model',trim(left(model))); run; %end; %else %do; data model1; length model $32767.; model=''; size=1; call symput('size',trim(left(put(size+1,best12.)))); run; %end; data perfect2; call symput('perfect2',0); run; %do %until (&stop = 1 or &perfect2.=1); %adjusted24; data null2; size=&size.+1; call symput('size',trim(left(put(size,best12.)))); run; %end; %end; /*Cleans up data sets*/ ods listing close; proc datasets; delete mean mean2 includesummary:(gennum=all) summary2 stats std x y xcount part:(gennum=all) abcd:(gennum=all) newalphas:(gennum=all) candidates:(gennum=all) alphas:(gennum=all) counts perfect:(gennum=all) amax summary final model1 fsr:(gennum=all) ghat:(gennum=all) null:(gennum=all) parameters stop q; run; quit; data &dataset.; set &dataset._original; run; %mend fastfsr4; %macro fastfsr5; %getdata; data null; terms=lowcase("&terms."); if trim(left(terms))="full" then terms2=0; else terms2=1; call symput('terms2',trim(left(put(terms2,best12.)))); run; proc means data=&dataset._ffsr noprint; var y; output out=stats n=n; run; data _null_; set stats; call symput('n',n); run; data stop; set stop; stopselect=min(&n.-1,q+1 %if &terms2.=1 %then %do; ,&terms. %end; ); call symput("fullmodel2",trim(left(fullmodel))); call symput('q',trim(left(put(q,best12.)))); call symput('p',trim(left(put(p,best12.)))); call symput('stopselect',stopselect); call symput('perfect',0); call symput('stophere',stopselect); run; %if %eval(&include.)>0 %then %do; ods output SelectionSummary=includesummary; proc glmselect data=&dataset._ffsr; model y= &fullmodel2. / selection=forward(select=sl include=%eval(&include.) stop=%eval(&include.+1)); run; data includesummary; set includesummary; retain includeint includemain 0; comp=trim(left(compress(effectentered,'x1234567890'))); if comp="Intercept" then includemain=0; else if comp='*' then includeint=includeint+1; else includemain=includemain+1; call symput('includeint',trim(left(put(includeint,best12.)))); call symput('includemain',trim(left(put(includemain,best12.)))); run; data null; q=%eval(&q.- &include.); p=%eval(&p.- &includemain.); call symput('q',trim(left(put(q,best12.)))); call symput('p',trim(left(put(p,best12.)))); run; %end; %macro new5; %if &size.=2 %then %do; ods output Candidates=candidates&size.; proc glmselect data=&dataset._ffsr; model y = &fullmodel2. / hierarchy=none selection=forward( stop=%sysevalf(&size.) select=sl sle=.99999) details=steps(candidates(show=all)); run; %end; %else %do; ods output Candidates=candidates&size.; proc glmselect data=&dataset._ffsr; model y = &model. &fullmodel2. / hierarchy=none selection=forward(include=%sysevalf(&size.-2) stop=%sysevalf(&size.) select=sl sle=.99999) details=steps(candidates(show=all)); run; %end; /*Adding in check for perfect fit*/ %if %sysfunc(exist(candidates&size.))=0 %then %do; %put WARNING: Ignore the previous WARNING. A perfect fit was detected so the full forward sequence is unable to be obtained. Fast FSR will continue using the partial sequence.; data null; call symput('perfect',1); run; data null; call symput('stophere',&size.-1); run; %end; %else %do; data candidates&size.; set candidates&size.; retain count1 count2 0; comp=trim(left(compress(effect,'x1234567890'))); if comp='*' then count1=count1+1; else count2=count2+1; size=input(&size.,best12.); run; data null; set candidates&size.; call symput('main',count2); call symput('int', count1); run; data candidates&size.; set candidates&size.; nint=input(&int.,best12.); nmain=input(&main.,best12.); cstar= (nint+1)/(nmain+1); %if %eval(&cbound.)>0 %then %do; if cstar>2*(1+&q.-&p.)/(1+&p.) then cstar= 2*(1+&q.-&p.)/(1+&p.); %end; if comp='*' then pval=probf*cstar; else pval=probf; run; proc sort data=candidates&size.; by step pval; run; data model1; length model $32767.; set model1 candidates&size.(firstobs=1 obs=1); if size>1 then model=trim(left(trim(left(lag(model))) || ' ' || trim(left(effect)))); call symput('model',trim(left(model))); call symput('size',trim(left(put(size+1,best12.)))); run; %end; %mend new5; %macro adjusted5; %let model=; %if %eval(&include.)>0 %then %do; data includesummary2; set includesummary; length includein $32767.; retain includein ''; if trim(left(effectentered))="Intercept" then effectentered=''; includein=trim(left(includein)) || ' ' || trim(left(effectentered)); includein=trim(left(includein)); call symput('includein',trim(left(includein))); call symput('model',trim(left(includein))); run; data model1; set includesummary2; model=trim(left(includein)); effect=trim(left(effectentered)); if trim(left(effect))='' then effect="Intercept"; size=nparmsin; call symput('size',trim(left(put(size+1,best12.)))); run; %end; %else %do; data model1; length model $32767.; model=''; size=1; call symput('size',trim(left(put(size+1,best12.)))); run; %end; %do %until(&size.=&stopselect.+1 or &perfect.=1); %new5; %end; %mend adjusted5; ods listing close; %adjusted5; data summary; set model1; where size>1; retain main 0 int 0; if compress(effect,'x1234567890')='' then main=main+1; if compress(effect,'x1234567890')='*' then int=int+1; nparmsin=size; run; proc sort data=summary; by size; run; data alphas; set summary; where size>%eval(&include.+1); retain maxpval; if pval>lag(pval) and pval>maxpval then maxpval=pval; else maxpval=maxpval; run; data alphas2; set alphas; check1=lag(maxpval); if size=2 then seqflag=1; else if maxpval>check1 then seqflag=1; else seqflag=0; run; proc sort data=alphas2; by descending nparmsin; run; data alphas3; set alphas2; alpha=maxpval; check2=lag(seqflag); if check2=. then choose=1; else if check2=1 then choose=1; else choose=0; run; proc sort data=alphas3; by nparmsin; where choose=1; run; data ghat; set alphas3; cstar= (nint+1)/(nmain+1); %if %eval(&cbound.)>0 %then %do; if cstar>2*(1+&q.-&p.)/(1+&p.) then cstar= 2*(1+&q.-&p.)/(1+&p.); %end; ghat=((&p.-(main-&includemain.))*(alpha)+(&q.-&p.-(int-&includeint.))*(alpha)/cstar)/(size-&include.); gamma=input(&gamma.,best12.); c=(&q.-&p.-(int-&includeint.)+1)/(&p.-(main-&includemain.)+1); %if %eval(&cbound.)>0 %then %do; if c>2*(1+&q.-&p.)/(1+&p.) then c= 2*(1+&q.-&p.)/(1+&p.); %end; if ghat<=gamma then flag2=1; else flag2=0; lagalpha=lag(alpha); if lagalpha=. then lagalpha=0; run; proc sort data=ghat out=ghat2; by descending alpha; run; data ghat2; set ghat2; futalpha=lag(alpha); if futalpha=. then futalpha=alpha; run; proc sort data=ghat2; by alpha; run; proc sort data=ghat; by alpha; run; data ghatnew; merge ghat ghat2; by alpha; if (&p.-(main-&includemain.))+(&q.-&p.-(int-&includeint.))/c=0 then alphahat=alpha; else alphahat=gamma*(size-&include.)/((&p.-(main-&includemain.))+(&q.-&p.-(int-&includeint.))/c); if alphahat>=futalpha then alphahat=alpha; if alphahat<=alpha then alphahat=alpha; run; proc sort data= ghatnew out=amax; by ghat; run; data amax; set amax; call symput('amax',left(put(size,best12.))); run; proc sort data=ghatnew out=ghat2; where flag2=1 and size <=&amax.; by flag2 alpha; run; data fsr; set ghat2; by flag2 alpha; if last.flag2 then output; run; data final; set fsr nobs=count; call symput('modelsize', size); run; data _null_; if 0 then set final nobs=count; call symput('count',left(put(count,4.))); stop; run; ods output variables=&dataset._ffsr_labels; proc contents data=&dataset._ffsr; run; ods listing; proc print data=&dataset._ffsr_labels label; var variable label; label variable="Variable name" label="Original name"; title 'Variable names'; run; data null; size=1; effect="Intercept"; run; data null; set null summary; where size<=%eval(1+&include.); keep effect size; run; data newalphas; set null; probf=0; pval=0; maxpval=0; cstar=(&q.-&p.+1)/(&p.+1); run; data alphas; set alphas; cstar= (nint+1)/(nmain+1); %if %eval(&cbound.)>0 %then %do; if cstar>2*(1+&q.-&p.)/(1+&p.) then cstar= 2*(1+&q.-&p.)/(1+&p.); %end; run; data newalphas2; length effect $100.; set newalphas alphas; %if %eval(&include.)=0 %then %do; step=size-1; %end; %else %do; if size-1<=%eval(&include.) then step=0; else step=size-1-%eval(&include.); %end; if step=0 then cstar=.; run; proc print data=newalphas2 label; var step size effect probf cstar pval maxpval; label step="Selection Step" size="1+S" effect="Effect Entered" probf="p-to-enter" cstar="c" pval="adjusted p-to-enter" maxpval="monotonized adjusted p-to-enter"; title 'Forward Sequence'; run; data null; alpha=0; alphahat=0; ghat=0; size=1+%eval(&include.); cstar=.; run; data gammafast; set null ghatnew; run; proc print data=gammafast label; title 'Sequence of Possible Models'; label size="1+S" cstar="c" ghat="gammahat" ; var alpha cstar ghat size; run; %if &count.=0 %then %do; data final; alphahat=0; cstar=(&q.-&p.+1)/(&p.+1); ghat=0; size=1+%eval(&include.); run; proc print data=final label; var alphahat size; title 'Fast FSR estimates'; label size="1+S" alphahat="alpha estimate"; run; title; %if %eval(&include.)>0 %then %do; data includesummary2; set includesummary2; call symput('model',trim(left(includein))); run; proc glm data=&dataset._ffsr; model y= &model. / solution; output out=preds p=pred; run; %end; %else %do; proc glm data=&dataset._ffsr; model y= / solution; output out=preds p=pred; run; %end; %end; %else %do; data final; set gammafast; where size=&modelsize.; run; data _null; set gammafast; retain warn 0; where size<=&modelsize.; if cstar>2*(&q.-&p.+1)/(&p.+1) then warn=warn+1; else warn=warn; call symput('warn',warn); run; proc print data=final label; var alphahat size; title 'Fast FSR estimates'; label size="1+S" alphahat="alpha estimate"; run; title; %if %eval(&warn.)>0 and %eval(&cbound.)=0 %then %do; data _null; file print notitles; put "Note: The adjustment exceeded 2 times the initial value making it very difficult for"; put "interactions and squared terms to enter the model. Use cbound=2 for optimal results." ; run; %end; data summary; length model $32767.; retain model; set model1; where size<=&modelsize.; if size=1 then model=''; else model=trim(left(trim(left(model)) || ' ' || trim(left(effect)))); call symput('model',trim(left(model))); run; ods output ParameterEstimates=parameters; proc glm data=&dataset._ffsr; model y = &model. / solution; output out=preds p=pred; run; %end; /*Cleans up data sets */ ods listing close; proc datasets; delete mean mean2 includesummary:(gennum=all) summary2 stats std x y xcount part:(gennum=all) candidates:(gennum=all) newalphas:(gennum=all) abcd:(gennum=all) alphas:(gennum=all) counts amax summary final model1 fsr:(gennum=all) ghat:(gennum=all) null parameters stop q; run; quit; data &dataset.; set &dataset._original; run; %mend fastfsr5; %macro fastfsr6; %getdata; data null; terms=lowcase("&terms."); if trim(left(terms))="full" then terms2=0; else terms2=1; call symput('terms2',trim(left(put(terms2,best12.)))); run; proc means data=&dataset._ffsr noprint; var y; output out=stats n=n; run; data _null_; set stats; call symput('n',n); run; data stop; set stop; stopselect=min(&n.-1,q+1 %if &terms2.=1 %then %do; ,&terms. %end; ); call symput("fullmodel2",trim(left(fullmodel))); call symput('q',trim(left(put(q,best12.)))); call symput('p',trim(left(put(p,best12.)))); call symput('binary',trim(left(put(binary,best12.)))); call symput('cont',trim(left(put(cont,best12.)))); call symput('stopselect',stopselect); call symput('perfect',0); call symput('stophere',stopselect); run; %if %eval(&include.)>0 %then %do; ods output SelectionSummary=includesummary; proc glmselect data=&dataset._ffsr; model y= &fullmodel2. / selection=forward(select=sl include=%eval(&include.) stop=%eval(&include.+1)); run; data includesummary; set includesummary; length includein $32767.; retain includeint includemain 0; comp=trim(left(compress(effectentered,'x1234567890'))); if trim(left(effectentered))="Intercept" then effectentered=''; if comp="Intercept" then includemain=0; else if comp='*' then includeint=includeint+1; else includemain=includemain+1; retain includein ''; includein=trim(left(trim(left(includein)) || ' ' || trim(left(effectentered)))); call symput('includein',trim(left(includein))); call symput('model',trim(left(includein))); call symput('includeint',trim(left(put(includeint,best12.)))); call symput('includemain',trim(left(put(includemain,best12.)))); run; data null; q=%eval(&q.- &include.); p=%eval(&p.- &includemain.); call symput('q',trim(left(put(q,best12.)))); call symput('p',trim(left(put(p,best12.)))); run; %end; %macro new6; %if &size.>2 %then %do; data candidates; call symput('candidates',trim(left("&linear.")) || ' ' || trim(left("&choice."))); run; %end; %if &size.=2 %then %do; ods output Candidates=summary1_&size. SelectionSummary=step1_&size.; proc glmselect data=&dataset._ffsr; model y= &fullmodel2. / hierarchy=single selection=forward( stop=%sysevalf(&size.) select=sl sle=.99999) details=steps(candidates(show=all)); run; %end; %else %do; ods output Candidates=summary1_&size. SelectionSummary=step1_&size.; proc glmselect data=&dataset._ffsr; model y= &model. &candidates. / hierarchy=none selection=forward(include=%sysevalf(&size.-2) stop=%sysevalf(&size.) select=sl sle=.99999) details=steps(candidates(show=all)); run; %end; /*Adding in check for perfect fit*/ %if %sysfunc(exist(summary1_&size.))=0 %then %do; %put WARNING: Ignore the previous WARNING. A perfect fit was detected so the full forward sequence is unable to be obtained. Fast FSR will continue using the partial sequence.; data null; call symput('perfect',1); run; data null; call symput('stophere',&size.-1); run; %end; %else %do; %if %eval(&size.)=2 and %eval(&include.)=0 %then %do; data null; length linear $10000.; set summary1_&size.; retain linear ''; linear=trim(left(linear)) || ' ' || trim(left(effect)); linear=trim(left(linear)); call symput("linear",trim(left(linear))); run; %end; data step1_&size.; set step1_&size.; where step <= 1; length Effect $100.; if EffectEntered="Intercept" then Effect=""; else Effect=EffectEntered; run; data summary1_&size.; set summary1_&size.; size=&size.; run; proc sort data= summary1_&size.; by size rank; run; data summary1_&size.; set summary1_&size.; by size rank; retain count1 count2 0; comp=trim(left(compress(effect,'x1234567890'))); if comp='*' then count1=count1+1; else count2=count2+1; size=input(&size.,best12.); run; data null; set summary1_&size.; call symput('main',count2); call symput('int', count1); run; data summary1_&size.; set summary1_&size.; cstar= (&int.+1)/(&main.+1); %if %eval(&cbound.)>0 %then %do; if cstar>2*(1+&q.-&p.)/(1+&p.) then cstar= 2*(1+&q.-&p.)/(1+&p.); %end; if comp='*' then pval=probf*cstar; else pval=probf; run; proc sort data=summary1_&size.; by step pval; run; data summary1_&size._b; set step1_&size. summary1_&size.(firstobs=1 obs=1); size=&size.; run; proc transpose data=summary1_&size._b out=cand; var Effect; run; data cand2; set cand; length model $32767.; %if &size.=2 %then %do; model=trim(left(col2)); %end; %else %if &size.=3 %then %do; model=trim(left(col2)) || ' ' || trim(left(col3)); %end; %else %do; model= %do col=2 %to &size.-1; trim(left(col&col.)) || " " || %end; trim(left(col&size.)); %end; model=compress(model,'.'); call symput('model',trim(left(model))); size=input(trim(left(&size.)),best12.); run; data effectsin; %do it=1 %to %eval(&intcandidates.); set cand2; output; %end; run; data effects2; merge effects effectsin; %if &size.=2 %then %do; if trim(left(variable1))=trim(left(col2)) or trim(left(variable2))=trim(left(col2)) then output; %end; %else %do; if %do kk=2 %to &size.-1; trim(left(variable1))=trim(left(col&kk.)) or trim(left(variable2))=trim(left(col&kk.)) or %end; trim(left(variable1))=trim(left(col&size.)) or trim(left(variable2))=trim(left(col&size.)) then output; %end; run; data effects3; set effects2; length choice $32767.; retain choice; choice=trim(left(trim(left(choice)) || ' ' || trim(left(variable)))); call symput('choice',trim(left(choice))); run; data model1; length effect $100.; set model1 cand2; call symput('size',trim(left(put(size+1,best12.)))); run; %end; %mend new6; %macro dynamic6; %let choice=; %let model=; %if %eval(&include.)>0 %then %do; data model1; set includesummary; model=trim(left(includein)); if trim(left(effectentered))='' then effectentered='Intercept'; effect=trim(left(effectentered)); size=nparmsin; call symput('size',trim(left(put(size+1,best12.)))); call symput('includein',trim(left(includein))); call symput('model',trim(left(includein))); run; ods output Candidates=linear; proc glmselect data=&dataset._ffsr; model y= &fullmodel2. / hier=single selection=forward(select=sl sle=.99999999 stop=2) details=steps(candidates(show=all)); run; data null; length linear $10000.; set linear; retain linear ''; linear=trim(left(linear)) || ' ' || trim(left(effect)); linear=trim(left(linear)); call symput("linear",trim(left(linear))); run; proc transpose data=includesummary out=cand; var EffectEntered; run; data cand2; set cand; length model $32767.; %if &size.=2 %then %do; model=trim(left(col2)); %end; %else %if &size.=3 %then %do; model=trim(left(col2)) || ' ' || trim(left(col3)); %end; %else %do; model= %do col=2 %to &size.-1; trim(left(col&col.)) || " " || %end; trim(left(col&size.)); %end; model=compress(model,'.'); call symput('model',trim(left(model))); size=input(trim(left(&size.)),best12.); run; data effectsin; %do it=1 %to %eval(&intcandidates.); set cand2; output; %end; run; data effects2; merge effects effectsin; %if &size.=2 %then %do; if trim(left(variable1))=trim(left(col2)) or trim(left(variable2))=trim(left(col2)) then output; %end; %else %do; if %do kk=2 %to &size.-1; trim(left(variable1))=trim(left(col&kk.)) or trim(left(variable2))=trim(left(col&kk.)) or %end; trim(left(variable1))=trim(left(col&size.)) or trim(left(variable2))=trim(left(col&size.)) then output; %end; run; data effects3; set effects2; length choice $32767.; retain choice; choice=trim(left(trim(left(choice)) || ' ' || trim(left(variable)))); call symput('choice',trim(left(choice))); run; %end; %else %do; data model1; length model $32767. effect $100.; model=''; size=1; effect="Intercept"; call symput('size',trim(left(put(size+1,best12.)))); run; %end; %do %until(&size.=&stopselect.+1 or &perfect.=1); %new6; %end; %if &perfect.=0 %then %do; data step1; set step1_%eval(&stopselect.); size=nparmsin; effect=effectentered; run; data summary1; length effect $100. comp $20.; set %do size=%eval(2+&include.) %to %eval(&stopselect.); summary1_&size. %end; ; run; %end; %else %do; data step1; set step1_%eval(&stophere.); size=nparmsin; effect=effectentered; run; data summary1; length effect $100. comp $20.; set %do size=%eval(2+&include.) %to %eval(&stophere.); summary1_&size. %end; ; run; %end; %mend dynamic6; %if %eval(&include.)=0 %then %do; ods output Candidates=candidatesnew; proc glmselect data=&dataset._ffsr; model y= &fullmodel2. / hierarchy=none selection=forward(stop=2 select=sl sle=.99999) details=steps(candidates(show=all)); run; %end; %else %do; ods output Candidates=candidatesnew; proc glmselect data=&dataset._ffsr; model y= &fullmodel2. / hierarchy=none selection=forward(include=%eval(&include.) stop=%eval(&include.+2) select=sl sle=.99999) details=steps(candidates(show=all)); run; %end; data candidatesnew; set candidatesnew; variable=trim(left(effect)); if trim(left(compress(effect,'x1234567890')))='*' then comp=1; else comp=0; end=findc(trim(left(variable)),'*'); if comp=1 then output; run; data effects; set candidatesnew; length variable1 $1000. variable2 $1000.; variable1=substr(trim(left(variable)),1,end-1); variable2=substr(trim(left(variable)),end+1); run; data null; set effects; call symput("intcandidates",trim(left(put(_n_,best12.)))); run; ods listing close; %dynamic6; proc means data=summary1 noprint; by size; var rank; output out=counts n=n; run; data summary; set step1; where size>1; retain main 0 int 0; if compress(effect,'x1234567890')='' then main=main+1; if compress(effect,'x1234567890')='*' then int=int+1; nparmsin=size; run; proc sort data= summary1; by size rank; run; data othercounts; set summary1; where size>%eval(1+&include.); by size rank; if last.size then output; rename count1 = nint count2=nmain; keep size count1 count2; run; proc sort data=summary; by size; run; proc sort data=counts; by size; run; data summary; merge summary(in=in1) counts othercounts; by size; if in1; comp=trim(left(compress(effect,'x1234567890'))); cstar= (nint+1)/(nmain+1); %if %eval(&cbound.)>0 %then %do; if cstar>2*(1+&q.-&p.)/(1+&p.) then cstar= 2*(1+&q.-&p.)/(1+&p.); %end; if comp='*' then pval=probf*cstar; else pval=probf; run; data alphas; set summary; where size>%eval(1+&include.); retain maxpval; if pval>lag(pval) and pval>maxpval then maxpval=pval; else maxpval=maxpval; run; data alphas2; set alphas; check1=lag(maxpval); if size=2 then seqflag=1; else if maxpval>check1 then seqflag=1; else seqflag=0; run; proc sort data=alphas2; by descending nparmsin; run; data alphas3; set alphas2; alpha=maxpval; check2=lag(seqflag); if check2=. then choose=1; else if check2=1 then choose=1; else choose=0; run; proc sort data=alphas3; by nparmsin; where choose=1; run; data ghat; set alphas3; if comp='*' then countint=nint-1; else countint=nint; p=&p.; %if %eval(&cbound.)>0 %then %do; if cstar>2*(1+&q.-&p.)/(1+&p.) then cstar= 2*(1+&q.-&p.)/(1+&p.); %end; if nint ne 0 then ghat=((&p.-(main-&includemain.))*(alpha)+(countint)*(alpha)/cstar)/(size-&include.); else ghat=((&p.-(main-&includemain.))*(alpha))/(size-&include.); gamma=input(&gamma.,best12.); if nint ne 0 then c=(countint+1)/(&p.-(main-&includemain.)+1); else c=1; %if %eval(&cbound.)>0 %then %do; if c>2*(1+&q.-&p.)/(1+&p.) then c= 2*(1+&q.-&p.)/(1+&p.); %end; if ghat<=gamma then flag2=1; else flag2=0; lagalpha=lag(alpha); if lagalpha=. then lagalpha=0; run; proc sort data=ghat out=ghat2; by descending alpha; run; data ghat2; set ghat2; futint=lag(nint); if futint=. then futint=0; futmain=lag(nmain); if futmain=. then futmain=0; futalpha=lag(alpha); if futalpha=. then futalpha=alpha; run; proc sort data=ghat2; by alpha; run; proc sort data=ghat; by alpha; run; data ghatnew; merge ghat ghat2; by alpha; if futint = 0 and futmain = 0 then alphahat=alpha; else if futint ne 0 or futmain ne 0 then alphahat=gamma*(size-&include.)/((&p.-(main-&includemain.))+(futint)/c); else alphahat=alpha; if alphahat>=futalpha then alphahat=alpha; if alphahat<=alpha then alphahat=alpha; if alphahat=. then alphahat=alpha; run; proc sort data= ghatnew out=amax; by ghat; run; data amax; set amax; call symput('amax',left(put(size,best12.))); run; proc sort data=ghatnew out=ghat2; where flag2=1 and size <=&amax.; by flag2 alpha; run; data fsr; set ghat2; by flag2 alpha; if last.flag2 then output; run; data final; set fsr nobs=count; call symput('alphahat', alphahat); run; data _null_; if 0 then set final nobs=count; call symput('count',left(put(count,4.))); stop; run; data _null_; set final; call symput('modelsize',left(put(size,4.))); run; ods output variables=&dataset._ffsr_labels; proc contents data=&dataset._ffsr; run; ods listing; proc print data=&dataset._ffsr_labels label; var variable label; label variable="Variable name" label="Original name"; title 'Variable names'; run; data null; effect="Intercept"; size=1; run; data null; length effect $100.; set null summary; where size<=%eval(1+&include.); keep effect size; run; data newalphas; set null; probf=0; pval=0; maxpval=0; cstar=(&q.-&p.+1)/(&p.+1); run; data alphas; set alphas; cstar= (nint+1)/(nmain+1); %if %eval(&cbound.)>0 %then %do; if cstar>2*(1+&q.-&p.)/(1+&p.) then cstar= 2*(1+&q.-&p.)/(1+&p.); %end; run; data newalphas2; length effect $100.; set newalphas alphas; %if %eval(&include.)=0 %then %do; step=size-1; %end; %else %do; if size-1<=%eval(&include.) then step=0; else step=size-1-%eval(&include.); %end; if step=0 then cstar=.; if size=2 then cstar=.; run; proc print data=newalphas2 label; var step size effect probf cstar pval maxpval; label step="Selection Step" size="1+S" effect="Effect Entered" probf="p-to-enter" cstar="c" pval="adjusted p-to-enter" maxpval="monotonized adjusted p-to-enter"; title 'Forward Sequence'; run; data null; alpha=0; alphahat=0; ghat=0; size=1+%eval(&include.); cstar=.; run; data gammafast; set null ghatnew; if size<=2 then cstar=.; run; proc print data=gammafast label; title 'Sequence of Possible Models'; label size="1+S" cstar="c" ghat="gammahat" ; var alpha cstar ghat size; run; %if &count.=0 %then %do; data final; alphahat=0; c=(&q.-&p.+1)/(&p.+1); ghat=0; size=1+%eval(&include.); run; proc print data=final label; var alphahat size; title 'Fast FSR estimates'; label size="1+S" alphahat="alpha estimate"; run; title; %if %eval(&include.)>0 %then %do; data includesummary2; set includesummary; call symput('model',trim(left(includein))); run; proc glm data=&dataset._ffsr; model y= &model. / solution; output out=preds p=pred; run; %end; %else %do; proc glm data=&dataset._ffsr; model y= / solution; output out=preds p=pred; run; %end; %end; %else %do; data final; set gammafast; where size=&modelsize.; run; data _null; set gammafast; retain warn 0; where size<=&modelsize.; if cstar>2*(&q.-&p.+1)/(&p.+1) then warn=warn+1; else warn=warn; call symput('warn',warn); run; proc print data=final label; var alphahat size; title 'Fast FSR estimates'; label size="1+S" alphahat="alpha estimate"; run; title; %if %eval(&warn.)>0 and %eval(&cbound.)=0 %then %do; data _null; file print notitles; put "Note: The adjustment exceeded 2 times the initial value making it very difficult for"; put "interactions and squared terms to enter the model. Use cbound=2 for optimal results." ; run; %end; data summaryb; length model $32767. effect $100.; retain model; set step1; where nparmsin<=&modelsize.; size=nparmsin; if size>1 then model=trim(left(trim(left(model)) || ' ' || trim(left(effect)))); call symput('model',trim(left(model))); run; ods output ParameterEstimates=parameters; proc glm data=&dataset._ffsr; model y = &model. / solution; output out=preds p=pred; run; %end; /*Cleans up data sets */ ods listing close; proc datasets; delete mean mean2 includesummary:(gennum=all) summary2 stats std x y xcount part:(gennum=all) linear abcd:(gennum=all) newalphas:(gennum=all) cand:(gennum=all) alphas:(gennum=all) counts step:(gennum=all) othercounts effects:(gennum=all) amax summary:(gennum=all) final model1 fsr:(gennum=all) ghat:(gennum=all) null parameters stop q; run; quit; data &dataset.; set &dataset._original; run; %mend fastfsr6; %macro fastfsr7; %getdata; data null; terms=lowcase("&terms."); if trim(left(terms))="full" then terms2=0; else terms2=1; call symput('terms2',trim(left(put(terms2,best12.)))); run; %if &terms2.=0 %then %do; data null; call symput('maxmodelsize',''); run; %end; %else %do; data null; call symput('maxmodelsize',"stop=&terms."); run; %end; data stop; set stop; call symput('fullmodel2',trim(left(fullmodel))); call symput('p',trim(left(put(p,best12.)))); run; %if %eval(&include.)=0 %then %do; ods output selectionsummary=summary; proc glmselect data=&dataset._ffsr; model y = &fullmodel2. / hier=none selection=forward(select=sl sle=.99999 &maxmodelsize.); run; %end; %else %do; ods output selectionsummary=summary; proc glmselect data=&dataset._ffsr; model y = &fullmodel2. / hier=none selection=forward(include=%eval(&include.) select=sl sle=.99999 &maxmodelsize.); run; %end; data summary2; set summary; where nparmsin>1; length model $30000.; retain model ''; model = trim(left(model)) || ' ' || trim(left(effectentered)); model = trim(left(model)); run; data alphas; set summary2; where step>0; retain maxpval; if probf>lag(probf) and probf>maxpval then maxpval=probf; else maxpval=maxpval; run; data alphas2; set alphas; check1=lag(maxpval); if step=1 then seqflag=1; else if maxpval>check1 then seqflag=1; else seqflag=0; run; proc sort data=alphas2; by descending nparmsin; run; data alphas3; set alphas2; alpha=maxpval; check2=lag(seqflag); if check2=. then choose=1; else if check2=1 then choose=1; else choose=0; run; proc sort data=alphas3; by nparmsin; where choose=1; run; data ghat; set alphas3; p=&p.; ghat=(((&p.-(nparmsin-1))*alpha))/(nparmsin-%eval(&include.)); gamma=input(&gamma.,best12.); if (&p.-(nparmsin-1))=0 then alphahat=1; else alphahat=gamma*(nparmsin-%eval(&include.))/(&p.-(nparmsin-1)); if ghat<=gamma then flag2=1; else flag2=0; lagalpha=lag(alpha); if lagalpha=. then lagalpha=0; run; proc sort data=ghat out=ghat2; by descending alpha; run; data ghat2; set ghat2; futalpha=lag(alpha); if futalpha=. then futalpha=1; run; proc sort data=ghat2; by alpha; run; proc sort data=ghat; by alpha; run; data ghatnew; merge ghat ghat2; by alpha; if alphahat>=futalpha then alphahat=alpha; if alphahat<=alpha then alphahat=alpha; if alphahat>1 then alphahat=1; run; proc sort data= ghatnew out=amax; by ghat; run; data amax; set amax; call symput('amax',left(put(nparmsin,best12.))); run; proc sort data=ghatnew out=ghat2; where flag2=1 and nparmsin <=&amax.; by flag2 alpha; run; data fsr; set ghat2; by flag2 alpha; if last.flag2 then output; run; data final; set fsr nobs=count; call symput('alphahat', alphahat); call symput('size',nparmsin); call symput('model',trim(left(model))); run; data _null_; if 0 then set final nobs=count; call symput('count',left(put(count,4.))); stop; run; ods output variables=&dataset._ffsr_labels; proc contents data=&dataset._ffsr; run; ods listing; proc print data=&dataset._ffsr_labels label; var variable label; label variable="Variable name" label="Original name"; title 'Variable names'; run; data null; set summary; where step=0; keep effectentered nparmsin; run; data newalphas; set null; probf=0; maxpval=0; run; data newalphas2; set newalphas alphas; size=nparmsin; %if %eval(&include.)=0 %then %do; step=size-1; %end; %else %do; if size-1<=%eval(&include.) then step=0; else step=size-1-%eval(&include.); %end; run; proc print data=newalphas2 label; var step size effectentered probf maxpval; label step="Selection Step" size="1+S" effectentered="Effect Entered" probf="p-to-enter" maxpval="monotonized p-to-enter"; title 'Forward Sequence'; run; data null; alpha=0; alphahat=0; ghat=0; nparmsin=1+%eval(&include.); run; data gammafast; set null ghatnew; size=nparmsin; run; proc print data=gammafast label; title 'Sequence of Possible Models'; label size="1+S" ghat="gammahat" ; var alpha ghat size; run; %if &count.=0 %then %do; data final; alphahat=0; ghat=0; size=1+%eval(&include.); run; data new; set summary; where nparmsin>1 and step=0; length model $30000.; retain model ''; model = trim(left(model)) || ' ' || trim(left(effectentered)); model = trim(left(model)); run; data null; set new; call symput('model',trim(left(model))); run; proc print data=final label; var alphahat size; title 'Fast FSR estimates'; label size="1+S" alphahat="alpha estimate"; run; title; %if %eval(&include.)>0 %then %do; proc glm data=&dataset._ffsr; model y= &model. / solution; output out=preds p=pred; run; %end; %else %do; proc glm data=&dataset._ffsr; model y= / solution; output out=preds p=pred; run; %end; %end; %else %do; data final; set gammafast; where size=&size.; run; proc print data=final label; var alphahat size; title 'Fast FSR estimates'; label size="1+S" alphahat="alpha estimate"; run; title; ods output ParameterEstimates=parameters; proc glm data=&dataset._ffsr; model y = &model. / solution; output out=preds p=pred; run; %end; /*Cleans up data sets*/ ods listing close; proc datasets; delete mean mean2 includesummary:(gennum=all) summary2 stats std x y xcount part:(gennum=all) stats alphas:(gennum=all) newalphas:(gennum=all) abcd:(gennum=all) amax summary final fsr:(gennum=all) ghat:(gennum=all) null parameters; run; quit; data &dataset.; set &dataset._original; run; %mend fastfsr7; %macro fsr_linear(dataset,model,y,gamma=0.05,method=1,terms=full,include=0,cbound=none); ods listing; options noquotelenmax nonotes nodate nonumber ps=2000 ls=100; data null; terms=lowcase("&terms."); if trim(left(terms))="full" then terms=0; else terms=1; call symput('termswarn',terms); run; %let includeint=0; %let includemain=0; %let includein=; %let linear=; %let remlabel=; %let labelvar=; data _null; length methodmsg $100.; if &method.=1 then methodmsg="1 (Strong Hierarchy)"; else if &method.=2 then methodmsg="2 (No Hierarchy)"; else if &method.=3 then methodmsg="3 (Weak Hierarchy)"; else if &method.=4 then methodmsg="4 (No Hierarchy with Iterated Adjustment)"; else if &method.=5 then methodmsg="5 (No Hierarchy with Sequential Adjustment)"; else if &method.=6 then methodmsg="6 (Weak Hierarchy with Sequential Adjustment)"; else if &method.=7 then methodmsg="7 (Main effects only)"; call symput('methodmsg',trim(left(methodmsg))); run; data _null; file print notitles; put " Method = &methodmsg. was chosen. The program"; put ; put " 1. Standardizes all predictors to have mean 0 and variance 1 and renames them as"; put " x1-xp (see table below), numbered as they occur in the data set."; put ; put " 2. Fits main effects, interactions, and squared terms according to a modified forward"; put " selection procedure that on average has False Selection Rate (FSR) = &gamma.."; put ; put " Developed by Hugh Crews, August 2008."; put ; put " Macro call parameters:"; put ; put " Dataset = &dataset."; put " Model = &model."; put " Y = &y."; put " Gamma = &gamma."; put " Method = &method."; put " Terms = &terms."; put " Include = &include."; put " Cbound = &cbound."; put ; put " The number of effects forced into the model is %eval(&include.)."; put ; %if %eval(&termswarn.)=1 %then %do; put " Note: The terms=k option was used. Suboptimal results may occur if k is so small that"; put " gammahat never reaches its maximum."; %end; run; options formdlim=' '; data null; cbound=lowcase("&cbound."); if trim(left(cbound))="none" then cbound2=0; else cbound2=input(cbound,best12.); call symput('cbound',cbound2); run; %if &method.=1 %then %do; %fastfsr1; %end; %if &method.=2 %then %do; %fastfsr2; %end; %if &method.=3 %then %do; %fastfsr3; %end; %if &method.=4 %then %do; %fastfsr4; %end; %if &method.=5 %then %do; %fastfsr5; %end; %if &method.=6 %then %do; %fastfsr6; %end; %if &method.=7 %then %do; %fastfsr7; %end; /*Turn log notes back on and listing back on*/ options notes formdlim=''; ods listing; %mend fsr_linear;