/* ---------------------------------------------------------------------\ | 4000 samples of size n=10 for each of 5 rho values are generated. | | PROC GCHART is used to plot the 5 r histograms. Converting each r | | to a Fisher Z we plot another 5 Z histograms. | | | | Confidence intervals for rho are also computed from each sample, and | | coverage properties may be studied by simulation through the behavior | | of the variable "cover". Endpoints are plotted versus sample so that | | the 5% of the intervals that miss may be visualized | | (modified from Dr. Dickey's ST512 webpage) | \ ---------------------------------------------------------------------*/ options nodate formdlim="_"; %let seed=123; * fixed for reproducibly generated random variables; %let n=10; * other sample sizes may be chosen here; data r; do rho = -.8, -.5, 0, .5, .8; B=sqrt(1-rho*rho); psi = .5*log( (1+rho)/(1-rho) ); do sample = 1 to 400; sxy=0; sx=0; sy=0; syy=0; sxx=0; do t =1 to &n; X=normal(&seed); Y = rho*X + B*normal(&seed); sx=sx+x; sy=sy+y; sxx=sxx+x*x; sxy=sxy+x*y; syy=syy+y*y; end; r = (sxy-sx*sy/&n)/ sqrt ( (sxx-sx*sx/&n)*(syy-sy*sy/&n) ); Z = .5*log( (1+r)/(1-r) ); rratio=(1+r)/(1-r); zcrit=probit(.975)/sqrt(&n-3); reject=(abs(z)>zcrit); * an 0-1 variable indicating rejection of h0:rho=0; rlow=(rratio * exp(-2*zcrit)-1)/(rratio*exp(-2*zcrit)+1); rhigh=(rratio * exp(2*zcrit)-1)/(rratio*exp(2*zcrit)+1); cover=(rlow < rho) * (rho < rhigh); keep r rho sample Z rlow rhigh cover reject; output; end; end; proc means; var r Z cover rlow rhigh reject; by rho; proc gchart; title "r has an asymmetric sampling distribution"; vbar r/group=rho; run; proc gchart; title "Fisher's Z has a symmetric sampling distribution"; vbar Z/group=rho; run; proc gplot; title "Most intervals cover rho"; by rho; plot rlow*sample="L" rhigh*sample="U"/overlay vref=(-.8 -.5 0 .5 .8) cvref=blue; run;