WinBUGS Codes for Module 12: MCMC for Genetics Copyright(2002): Sujit K. Ghosh, NC State University. I. Estimating the female birth model{ y ~ dbin(theta, n) theta ~ dbeta(b[1], b[2]) phi <- (1-theta)/theta } Data: list(y=241945,n=493472,b=c(0.5,0.5)) Inits: list(theta=0.5) list(theta=0.1) list(theta=0.9) Output: (#chains=3, thin=1, update took 3s) node mean sd MC error 2.5% median 97.5% start sample theta 0.4903 7.147E-4 6.284E-6 0.4889 0.4903 0.4917 1001 15000 phi 1.04 0.002973 2.614E-5 1.034 1.04 1.046 1001 15000 ------------------------------------------------------------------------------------------------------------------------------------ ------------------------------------------------------------------------------------------------------------------------------------ II. Estimating allele frequencies under HWE & HWD model{ y[1:3] ~ dmulti(theta[], n) #Under HWE: theta[1] <- p*p theta[2] <- 2*p*(1-p) theta[3] <- (1-p)*(1-p) p ~ dbeta(b[1], b[2]) #Under HWD: #theta[1] <- p*p + p*(1-p)*f #theta[2] <- 2p*(1-p)*(1-f) #theta[3] <- (1-p)*(1-p) + p*(1-p)*f #p ~ dunif(lower.p, 0.5) #f ~ dunif(lower.f, 1) #lower.p <- max(0,-f/(1-f)) #lower.f <- -p/(1-p) #D <- theta[1] - p*p } Data: Under HWE list(y=c(53,95,38),n=186,b=c(0.5,0.5)) Inits: list(p=0.5) list(p=0.1) list(p=0.9) Under HWD list(y=c(53,95,38),n=186) Inits: list(p=0.25, f=0) list(p=0.1, f=-0.1) list(p=0.5, f=0.1) Output: Under HWE (#chain=3, thin=1, update took 3s) node mean sd MC error 2.5% median 97.5% start sample p 0.5401 0.0257 2.151E-4 0.4908 0.5401 0.5901 1001 15000 theta[1] 0.2923 0.02778 2.335E-4 0.2409 0.2917 0.3482 1001 15000 theta[2] 0.4955 0.004509 4.185E-5 0.4838 0.4968 0.5 1001 15000 theta[3] 0.2122 0.02366 1.972E-4 0.168 0.2115 0.2593 1001 15000 ------------------------------------------------------------------------------------------------------------------------------------ ------------------------------------------------------------------------------------------------------------------------------------ III. Estimating allele frequencies: ABO blood groups model{ # n[1]=nA, n[2]=nB, n[3]=nAB, n[4]=nO (phenotype frequencies) # prior on allele frequencies # this is a trick to get uniform dirichlet prior on pa, pb, po (allele frequencies) fa ~ dgamma(1,1.0) # unscaled frequency of allele A fb ~ dgamma(1,1.0) # unscaled frequency of allele B fo ~ dgamma(1,1.0) # unscaled frequency of allele O pa <- fa/(fa + fb + fo) # frequency of allele A pb <- fb/(fa + fb + fo) # frequency of allele B po <- fo/(fa + fb + fo) # frequency of allele O # distribution of phenotypes p[1] <- pa*pa + 2*pa*po # frequency of A phenotype p[2] <- pb*pb + 2*pb*po # frequency of B phenotype p[3] <- 2*pa*pb # frequency of AB phenotype p[4] <- po*po # frequency of O phenotype n[1:4] ~ dmulti(p[],total) } Data: list(n=c(725,258,72,1073),total=2128) Inits: list(fa=1.0, fb=1.0, fo=1.0) list(fa=1.0, fb=2.0, fo=3.0) list(fa=3.0, fb=2.0, fo=1.0) Output: (#chain=3, thin=2, update took 3s) node mean sd MC error 2.5% median 97.5% start sample pa 0.2092 0.006667 5.213E-5 0.1963 0.2092 0.2224 1001 15000 pb 0.08102 0.004298 3.209E-5 0.07281 0.08097 0.08968 1001 15000 po 0.7097 0.007383 5.691E-5 0.6951 0.7099 0.7241 1001 15000 ------------------------------------------------------------------------------------------------------------------------------------ ------------------------------------------------------------------------------------------------------------------------------------ IV. Outcrossing data model{ #The Likelihood: # The progeny arrays n11[1:3] ~ dmulti(x11[],n[1]) n12[1:3] ~ dmulti(x12[],n[2]) n22[1:3] ~ dmulti(x22[],n[3]) # A1A1 mothers x11[1] <- (1-s)*p + s x11[2] <- (1-s)*(1-p) x11[3] <- 0 # A1A2 mothers x12[1] <- (1-s)*p/2 + s/4 x12[2] <- 1/2 x12[3] <- (1-s)*(1-p)/2 + s/4 # A2A2 mothers x22[1] <- 0 x22[2] <- (1-s)*p x22[3] <- (1-s)*(1-p) + s #The Priors: p ~ dbeta(b, b) s ~ dbeta(b, b) } Data: list(n11=c(120,5,0), n12=c(11,28,16), n22=c(0,9,104), n=c(125,55,113), b=1) Inits: list(p=.5,s=.5) list(p=.15,s=.15) list(p=.85,s=.85) Output : (#chains=3, burn-in=5000, thin=1, update took 13s) node mean sd MC error 2.5% median 97.5% start sample p 0.6265 0.1135 8.983E-4 0.3913 0.6322 0.8301 5001 15000 s 0.8754 0.0311 2.702E-4 0.8071 0.8778 0.9292 5001 15000 x11[1] 0.9534 0.01871 1.465E-4 0.91 0.9557 0.9828 5001 15000 x11[2] 0.0466 0.01871 1.465E-4 0.01721 0.04427 0.08999 5001 15000 x12[1] 0.2579 0.007473 6.303E-5 0.2434 0.2577 0.2731 5001 15000 x12[3] 0.2421 0.007473 6.303E-5 0.227 0.2423 0.2566 5001 15000 x22[2] 0.07805 0.02409 2.164E-4 0.03766 0.07569 0.1311 5001 15000 x22[3] 0.922 0.02409 2.164E-4 0.869 0.9243 0.9624 5001 15000 deviance 18.89 1.936 0.02157 17.02 18.27 24.22 5001 15000 ------------------------------------------------------------------------------------------------------------------------------------ ------------------------------------------------------------------------------------------------------------------------------------ V. DNA sequence data model{ #The Likelihood: # The DNA seuqences for(l in 1:p){ for(i in 1:n){ x[l,i] ~ dcat(prob[l,i,]) for(j in 1:4){ prob[l,i,j] <- equals(k[i],j)*exp(-v[l]) + (1-exp(-v[l]))*pi[j] } } v[l] ~ dunif(0, b) } for(i in 1:n){ k[i] ~ dcat(pi[]) } #The Priors: for(j in 1:4){ g[j] ~ dgamma(a, a) pi[j] <- g[j]/sum(g[]) } } Data: (source: Sharon Browning) list(x=structure( .Data = c(3,4,1,1,1,4,1,4,1,3, 4,4,4,1,1,2,2,1,1,1,1,2,1,4,2,1,3,1,4,4,3,4,3,1,1,4,2,4,3,1, 2,1,1,2,1,3,1,3,3,2,4,4,1,2,3,1,2,2,2,2, 4,4,1,4,4,4,1,2,2, 3,4,1,1,1,4,1,4,1,3, 4,4,4,1,1,2,2,1,1,1,1,2,1,4,2,1,3,1,4,4,3,4,3,1,1,4,2,4,3,1, 4,1,1,2,1,3,1,3,3,2, 4,2,1,2,1,1,2,2,2,2, 4,4,1,4,4,4,1,2,2, 3,4,1,1,1,2,1,4,1,3, 4,4,4,1,1,4,2,1,1,1,1,2,1,4,4,1,3,1,4,4,3,4,3,1,1,4,2,4,1,1, 2,1,1,4,1,3,1,3,3,2, 4,2,3,1,1,1,2,2,4,2, 4,4,3,2,4,4,1,2,2), .Dim=c(3,69) ), n=69, p=3, b=1, a=0.5) Inits: list(v=c(0.1,0.1,0.1), g=c(2.5, 2.5, 2.5, 2.5)) list(v=c(0.05,0.02,0.01), g=c(1.5, 4.0, 3.5, 1.0)) list(v=c(0.2,0.1,0.5), g=c(5.0, 2.0, 1.0, 2.0)) Output with one variable (obtained on 04/30/02): (#chains=3, burn-in=1514, thin=1, update took 524s) node mean sd MC error 2.5% median 97.5% start sample pi[1] 0.3503 0.05175 3.922E-4 0.252 0.3493 0.4546 1514 15000 pi[2] 0.2171 0.04457 3.544E-4 0.1368 0.2146 0.3106 1514 15000 pi[3] 0.1429 0.03747 3.019E-4 0.0781 0.1398 0.2233 1514 15000 pi[4] 0.2897 0.04878 3.872E-4 0.1998 0.2881 0.3904 1514 15000 v[1] 0.06216 0.0385 3.67E-4 0.01018 0.05454 0.1565 1514 15000 v[2] 0.04213 0.03202 3.101E-4 0.002808 0.03468 0.1232 1514 15000 v[3] 0.258 0.08143 7.352E-4 0.1255 0.2482 0.4421 1514 15000 deviance 119.4 4.578 0.04391 113.7 118.5 131.4 1514 15000 ------------------------------------------------------------------------------------------------------------------------------------ ------------------------------------------------------------------------------------------------------------------------------------ VI. Estimating allele frequencies: Haplotypes model{ # prior on gamete frequencies # this is a trick to get uniform dirichlet prior on p (gamete frequencies) p1 ~ dgamma(1,1.0) # unscaled frequency of gamete AB p2 ~ dgamma(1,1.0) # unscaled frequency of gamete Ab p3 ~ dgamma(1,1.0) # unscaled frequency of gamete aB p4 ~ dgamma(1,1.0) # unscaled frequency of gamete ab psum <- p1 + p2 + p3 + p4 p[1] <- p1/psum # frequency of gamete AB p[2] <- p2/psum # frequency of gamete Ab p[3] <- p3/psum # frequency of gamete aB p[4] <- p4/psum # frequency of gamete ab # n contains genotype counts, pg contains genotype frequencies # n[i]=number of genotype i; pg[i]=frequency of genotype i pg[1] <- p[1]*p[1] # frequency of AABB genotype pg[2] <- 2*p[1]*p[2] # frequency of AABb genotype pg[3] <- p[2]*p[2] # frequency of AAbb genotype pg[4] <- 2*p[1]*p[3] # frequency of AaBB genotype pg[5] <- 2*p[1]*p[4] + 2*p[2]*p[3] # frequency of AaBb genotype pg[6] <- 2*p[2]*p[4] # frequency of Aabb genotype pg[7] <- p[3]*p[3] # frequency of aaBB genotype pg[8] <- 2*p[3]*p[4] # frequency of aaBb genotype pg[9] <- p[4]*p[4] # frequency of aabb genotype n[1:9] ~ dmulti(pg[],total) } Data: list(n=c(19,5,0,8,8,0,0,0,0),total=40) Inits: list(p1=1.0,p2=1.0,p3=1.0,p4=1.0) list(p1=3.0,p2=2.0,p3=1.0,p4=2.0) list(p1=1.0,p2=2.0,p3=3.0,p4=1.0) Output: (#chains=3, thin=1, update took 3s) node mean sd MC error 2.5% median 97.5% start sample p[1] 0.6981 0.05274 4.541E-4 0.5887 0.7001 0.7967 1001 15000 p[2] 0.08771 0.03496 3.595E-4 0.03241 0.08341 0.1669 1001 15000 p[3] 0.1235 0.03967 3.424E-4 0.0578 0.1193 0.2103 1001 15000 p[4] 0.09067 0.0355 3.057E-4 0.0282 0.08812 0.1673 1001 15000 ------------------------------------------------------------------------------------------------------------------------------------ ------------------------------------------------------------------------------------------------------------------------------------