OPTIONS LS=75 NODATe; /* Though we've seen this example several times, we'll now consider using simple linear regression techniques to test for a difference in mean length by sex */ DATA spiders; DO sex = 'm','f'; male=(sex='m'); /* indicator variable: 1 for males, 0 for females */ INPUT sp_length @; OUTPUT; END; CARDS; 5.2 8.25 4.7 9.8 5.7 6.1 5.65 9 5.75 9.5 4.7 9.95 4.8 10.8 6.2 9.3 5.5 6.3 5.95 8.3 5.75 5.9 5.95 6.6 5.4 8.75 5.65 8.35 5.9 7.05 7.5 7.05 5.2 7.55 6.2 7 5.85 8.7 7 8.3 6.45 8.45 6.35 8.1 5.85 7.8 5.75 8 6.1 7.95 6.55 7.55 6.95 9.1 6.8 8 6.35 7.5 5.8 9.6 ; RUN; /* Consider a t-test. Compute the pooled variance, sp^2, by hand from the PROC MEANS output. */ *PROC TTEST; /* it is a good habit to specify the dataset */ PROC TTEST DATA=spiders; CLASS sex; VAR sp_length; RUN; /* Using the PROC TTEST output, Find the estimated difference of population mean lengths (females-males). Find a confidence interval for this difference and a pvalue that it is zero. */ /* Uncomment the PROC REG code to carry out statistical inference that is equivalent to the t-test using PROC REG. The regression model has one predictor variable "male" which is just an 0-1 indicator for males. */ /* PROC REG DATA=spiders; MODEL sp_length=male; PLOT RESIDUAL.*PREDICTED.; RUN; */ /* Using the PROC REG output, Find the estimated difference of population mean lengths (females-males). Find a confidence interval for this difference and a pvalue that it is zero. */ /* Note evidence of inhomogeneity of variance from both the PROC REG plot and the F-TEST from PROC TTEST */ DATA spiders; SET spiders; ly=LOG(sp_length); * Try the log transformation; run; /* PROC TTEST DATA=spiders; TITLE "log-transformed spiderlengths"; CLASS sex; VAR ly; RUN; PROC REG DATA=spiders; TITLE "log-transformed spiderlengths"; MODEL ly=male; PLOT RESIDUAL.*PREDICTED.; RUN; */