/*----------------------------------------------------------------- | Example 1: 2 of 10 customers return a purchased item. We want | | to estimate the probability, in general, of a return for that | | item. Common sense says it should be Pr{return}=0.20. | | Suppose we have n "trials" each with probability p of producing| | an event. The (binomial) probability of X events out of n trials| | is then Pr{X} = [n!/(X!(n-X)!)](p**X)[(1-p)**(n-X)]. We have n=10| | and X=2 so what value of p maked Pr{X} the largest, that is, | | what value of p gives the distribution with the maximum | | probability of delivering the results we observed? That p is the| | "maximum likelihood estimate" of the true p. For example q we | | graph the likelihood, ignoring n!/(X!(n-X)!) which is 5 and does | | not change with p anyway. | -----------------------------------------------------------------*/ axis1 offset = (5,5) label=(angle=90); data like; do p = 0 to 1 by .01; L = p**2*(1-p)**8; output; end; proc gplot; plot L*p/href=0.20 overlay; symbol1 v=none i=join; title "Example 1: Likelihood for n=10, r=2"; run; /*------------------------------------------------------------------ | Example 2: 9 of 15 seeds germinate but they are in soils of | | different temperatures. We want to let the probability of | | germinating be a function of tmeperature as follows: | | L= a + bX (X=temperature, possibly centered) | | p = exp(L)/(1+exp(L)) i.e. 1/(exp(-L)+1) | | The likelihood is now a function of a and b. We now use PROC | | LOGISTIC to maximize the likelihood and we plot the likelihood. | ----------------------------------------------------------------*/ title "Logistic Regression"; * Centering * **************; %let X0=70; **************; Data seeds; Input Germ $ 1-3 n @; Y=(Germ="Yes"); If Germ=" " then Y=.; do i=1 to n; input temp @; tempc = temp - &X0; output; end; label tempc = "temperature - &X0"; cards; Yes 9 64 67 70 71 73 77 78 82 85 No 6 50 58 63 67 72 75 23 46 48 50 52 54 56 58 60 62 64 66 68 70 72 74 76 78 80 82 84 86 88 90 PROC LOGISTIC data=seeds order=data; model germ=temp / itprint ctable pprob=.6923 ; output out=out1 predicted=p xbeta=logit; title2 "Using raw temperatures "; proc sort; by temp; proc gplot; plot Y*temp p*temp / overlay vref=.6 href=69.05; symbol1 v=dot i=none c=red; symbol2 v=none i=join c=blue w=3; run; /*----------------------------------------- | Note: In SAS classification table, they | | omit temperature 70 then refit then | | classify the temperature 70 point so | | their correct and incorrect counts are | | not those using the overall slope and | | intercept. | ------------------------------------------*/ ****** Graphing the likelihood and ln(likelihood)*****; Data next; array Y(15); Array X(15); do i = 1 to 9; Y(i)=1; input xx @; X(i) = XX-&X0; end; do i = 10 to 15; Y(i)=0; input xx @; X(i) = XX-&X0; end; do alpha = -3 to 3 by .05; do beta = -.2 to 0.7 by .01; ** compute the likelihood at this (alpha, beta) combo **; like = 1; do i=1 to 15; L = alpha + beta*X(i); if Y(i) = 1 then like = like*(exp(L)/(1+exp(L))); if Y(i) = 0 then like = like*(1-exp(L)/(1+exp(L))); end; ** end likelihood loop **; loglike = -2*log(comb(15,9)*like); keep alpha beta like loglike; if loglike > cinv(.95,2) then loglike = cinv(.95,2); label loglike = "-2 ln(L)"; output; end; end; ** end alpha beta loops **; cards; 64 67 70 71 73 77 78 82 85 50 58 63 67 72 75 ; axis1 label=(angle=90); proc g3d; plot alpha*beta=like; title2 "Using input (temperature-&X0)"; proc g3d; plot alpha*beta=loglike; title3 "Plane shows 95% confidence ellipsoid"; run; proc gcontour; plot alpha*beta=like; title "Confidence Ellipsoids"; proc logistic data=seeds order=data; model germ = tempc; run;