# 1st try at IBD by looping setwd(paste(Sys.getenv('HOME'),'Research/Perturb/artifacts',sep='/')) source('Grappa.R') source('extras.R') #----------------------------------------------------------- # Mix, peak area problem - # one big likelihood term, single 'data' node # one trace (T1) and one marker (D2) for now # set up DAG # The Evett problem alleles<-list( D8=c('10','11','14','x'), D18=c('13','16','17','x'), D21=c('A','B','C','D','x'), FGA=c('A','B','C','x'), Tho1=c('A','B','C','x'), vwa=c('A','B','C','D','x') ) freqs<-list( D8=c(0.094,0.066,0.209,0.631), D18=c(0.125,0.137,0.115,0.623), D21=c(0.031,0.258,0.069,0.090,0.552), FGA=c(0.187,0.165,0.139,0.509), Tho1=c(0.108,0.304,0.000,0.588), vwa=c(0.216,0.270,0.219,0.093,0.202) ) evidence<-list( D8='10-14', D18='13-13', D21='C-D', FGA='A-B', Tho1='A-B', vwa='A-C' ) relweight<-list( D8=c(0.4347,0.0285,0.5368,0), D18=c(0.8871,0.0536,0.0592,0), D21=c(0.0525,0.0676,0.4284,0.4515,0), FGA=c(0.5699,0.3908,0.0393,0), Tho1=c(0.4015,0.5985,0,0), vwa=c(0.4170,0.0884,0.4747,0.0199,0) ) IBD<-function(patt,m,spg,smg,aspg,asmg) { # pattern|relation distribution zibd<-matrix(0,7,5) zibd[1,1]<-1 zibd[c(2,5),2]<-zibd[c(2,4),3]<-zibd[c(1,3),4]<-zibd[c(1,2),5]<-0.5 # qr<-c('none','1pa2','2pa1','hsma','hspa')%in%vs.relation zibd<-zibd[,qr,drop=FALSE] qp<-apply(zibd,1,sum)>0 zibd<-zibd[qp,,drop=FALSE] # pool1<-cs('pool1',m); pool2<-cs('pool2',m); pool3<-cs('pool3',m) #IBD patterns tab(c(patt,'relation'),c(sum(qp),sum(qr)),zibd, c('none','1p2p','1m2m','1p2m','1m2p','1p1m','2p2m')[qp]) # correlated founding genes - suspect and alt suspect founder(spg) founder(pool1) founder(pool2) founder(pool3) which(aspg,patt,lw=c(2,1,2,2,2,2,2),spg,pool1) which(smg,patt,lw=c(3,3,3,3,2,1,3),spg,aspg,pool2) which(asmg,patt,lw=c(4,4,3,1,4,4,2),spg,aspg,smg,pool3) } UAF<-function(M,m,g,Nc='c',Nd='d',Npool='pool',Ntemp='temp') { # M is Dirichlet parameter, m is marker suffix, g is vector # of founder gene names (excluding suffix) # NG correlated founding genes, Polya urn # c's and d's are binary selection variables # pool's and temp's are allele-valued # for N = 2,3,...,NG # cN is choice between a new draw from gene pool (poolN) # and random choice among preceding g's (tempN1) # d's create tempN1 from successive biased binary choices, # with tempNM holding intermediate stages # tempNj is random choice among first j genes out of N NG<-length(g) founder(cs(g[1],m)) for(i in 2:NG) founder(cs(Npool,i,m)) for(i in 2:NG) query(cs(Nc,i,m),c(i-1,M)/(M+i-1)) which(cs(g[2],m),cs(Nc,2,m),cs(g[1],m),cs(Npool,2,m)) for(i in 3:NG) { for(j in 2:(i-1)) query(cs(Nd,i,j,m),c(j-1,1)/j) which(cs(Ntemp,i,2,m),cs(Nd,i,2,m),cs(g[1],m),cs(g[2],m)) if(i>3) for(j in 3:(i-1)) which(cs(Ntemp,i,j,m),cs(Nd,i,j,m),cs(Ntemp,i,j-1,m),cs(g[j],m)) which(cs(g[i],m),cs(Nc,i,m),cs(Ntemp,i,i-1,m),cs(Npool,i,m)) } } run<-function(use,model,pars,ptheta,prior=F) { clean(T) if(substring(model,1,3)=='IBD') { if(length(pars)==2) { alpha<-pars[1]; gamma<-pars[2] tab('relation',5,c(1-alpha-gamma,alpha/2,alpha/2,gamma/2,gamma/2), c('none','1pa2','2pa1','hsma','hspa')) } else { # scalar pars - integer fixing that value of relation tab('relation',5,pars==(1:5), c('none','1pa2','2pa1','hsma','hspa')) } } else if(model=='UAF') { M<-pars } else if(model !='baseline') stop('not a valid model') tab('theta',ntheta,ptheta,(0:(ntheta-1))/(ntheta-1)) query('target',,c('SU','2U')) for(m in use) { vs('alleles',alleles[[m]]) gene.freq<<-freqs[[m]] r<-relweight[[m]] smg<-cs('smg',m); spg<-cs('spg',m); sgt<-cs('sgt',m) u1mg<-cs('u1mg',m); u1pg<-cs('u1pg',m); u1gt<-cs('u1gt',m) u2mg<-cs('u2mg',m); u2pg<-cs('u2pg',m); u2gt<-cs('u2gt',m) patt<-cs('patt',m) if(model=='baseline') { founder(smg); founder(spg) founder(u1mg); founder(u1pg) founder(u2mg); founder(u2pg) } else if(model=='UAF') { UAF(M,m,c('spg','smg','u1pg','u1mg','u2pg','u2mg')) } else if(model=='IBDu1u2') { founder(smg); founder(spg) IBD(patt,m,u1pg,u1mg,u2pg,u2mg) } else if(model=='IBDsu1') { founder(u2mg); founder(u2pg) IBD(patt,m,u1pg,u1mg,spg,smg) } else if(model=='IBDsu2') { founder(u1mg); founder(u1pg) IBD(patt,m,spg,smg,u2pg,u2mg) } genotype(sgt,smg,spg) genotype(u1gt,u1mg,u1pg) genotype(u2gt,u2mg,u2pg) vs.gt<-get(cs('vs.','sgt',m)) p1gt<-cs('p1gt',m) si(p1gt,sgt,u1gt,'target',length(vs.gt)) vs(p1gt,vs.gt) ################## # allele counting nodes z0<-matrix(unlist(strsplit(vs.gt,'-')),nrow=2) for(a in vs.alleles) { z1<-outer(0:2,apply(z0==a,2,sum),'==') z2<-as.numeric(z1); dim(z2)<-dim(z1) tab(c(cs(p1gt,'_',a),p1gt),,z2,0:2) tab(c(cs(u2gt,'_',a),u2gt),,z2,0:2) } #--------- likelihood calculation nall<-length(vs.alleles) likelihood<-array(1,c(2,3,3,ntheta,nall)) for(ith in 1:ntheta) { th<-vs.theta[ith] mu<-0.5*outer(th*(0:2),(1-th)*(0:2),'+') w<-mu*(1/sigma2-1) for(ia in 1:nall) { likelihood[1,,,ith,ia]<-likelihood[1,,,ith,ia]* ((mu>0)*r[ia]^w/gamma(pmax(1e-6,w))+(mu==0)*(r[ia]==0)) } } likelihood[1,,,,]<-likelihood[1,,,,]/max(likelihood[1,,,,]) likelihood[2,,,,]<-max(likelihood[1,,,,])-likelihood[1,,,,] # likelihood is 2 by 3 by 3 for each (theta, allele) combination - # 1st layer is likelihood for that combination of allele counts, # and theta and allele, 2nd layer is a constant minus this, # making sure everything is >=0 # ---------- end of likelihood calculation for(ia in 1:nall) { a<-vs.alleles[ia] tab(c(cs('mix',m,'_',a),cs(p1gt,'_',a),cs(u2gt,'_',a),'theta'),, likelihood[,,,,ia],c('yes','no')) } } ########## # by('b','p1gt1_10','u2gt1_10') ########## ################## if(prior) { compile() equil() } else { for(m in use) { vs('alleles',alleles[[m]]) sgtevid<-evidence[[m]] prop.evid(cs('sgt',m),sgtevid) for(a in vs.alleles) prop.evid(cs('mix',m,'_',a),'yes') } } if(is.null(getOption('silent'))||!getOption('silent')) pnmarg('target') x<-marg(tcq[[acliq('target')]],'target') invisible(x) } # end of run() definition ############################################################ sigma2<-0.01 ntheta<-11 ptheta<-rep(1/ntheta,ntheta) options(silent=FALSE) #=================== run(1,'baseline',0,ptheta) run(1,'baseline',0,ptheta,prior=T) joint(c("p1gt1_10","u2gt1_10")) run(1,'IBDu1u2',c(0.05,0.05),ptheta) run(1,'IBDu1u2',c(0.05,0.05),ptheta,prior=T) joint(c("p1gt1_10","u2gt1_10")) run(1,'IBDsu1',c(0.05,0.05),ptheta) run(1,'IBDsu1',c(0.05,0.05),ptheta,prior=T) joint(c("p1gt1_10","u2gt1_10")) run(1,'IBDsu2',c(0.05,0.05),ptheta) run(1,'IBDsu2',c(0.05,0.05),ptheta,prior=T) joint(c("p1gt1_10","u2gt1_10")) #=================== run(1,'baseline',0,ptheta) run(1,'UAF',10000,ptheta) run(1,'UAF',100,ptheta) run(1,'IBDsu1',c(0.05,0.05),ptheta) run(2,'baseline',0,ptheta) run(2,'UAF',10000,ptheta) run(2,'UAF',100,ptheta) run(2,'IBDsu1',c(0.05,0.05),ptheta) run(1:2,'baseline',0,ptheta) run(1:2,'UAF',10000,ptheta) run(1:2,'UAF',100,ptheta) run(1:2,'IBDsu1',c(0.05,0.05),ptheta) #=================== # all 6 markers # run(1:6,'baseline',0,ptheta) # 171046229 options(silent=TRUE) # baseline, 6 markers a<-array(0,c(6,ntheta,2)) for(itheta in 1:ntheta) for(m in 1:6) a[m,itheta,]<-run(m,'baseline',0,itheta==(1:ntheta)) a[a==0]<-min(a[a>0])*1e-10 b<-apply(log(a),c(2,3),sum) c<-as.vector(ptheta%*%exp(b-max(b))) c[1]/c[2] # 171043922 # baseline, 2 markers a<-array(0,c(2,ntheta,2)) for(itheta in 1:ntheta) for(m in 1:2) a[m,itheta,]<-run(m,'baseline',0,itheta==(1:ntheta)) a[a==0]<-min(a[a>0])*1e-10 b<-apply(log(a),c(2,3),sum) c<-as.vector(ptheta%*%exp(b-max(b))) c[1]/c[2] # 814.4151 # IBD 2 markers alpha<-gamma<-0.05 a<-array(0,c(2,ntheta,5,2)) # no. markers, no theta's, no. relations, 2 for(m in 1:2) for(itheta in 1:ntheta) for(ir in 1:5) a[m,itheta,ir,]<-run(m,'IBDsu1',ir,itheta==(1:ntheta)) a[a==0]<-min(a[a>0])*1e-10 b<-apply(log(a),c(2,3,4),sum) # b is log product over markers d<-(ptheta%o%c(1-alpha-gamma,alpha/2,alpha/2,gamma/2,gamma/2)) c<-apply(array(d,dim(b))*exp(b-max(b)),3,sum) c[1]/c[2] # 415.8375 # all at once: options(silent=FALSE) run(1:2,'IBDsu1',c(0.05,0.05),ptheta) options(silent=TRUE) # 415.0168 # IBD 4 markers alpha<-gamma<-0.05 a<-array(0,c(4,ntheta,5,2)) # no. markers, no theta's, no. relations, 2 for(m in 1:4) for(itheta in 1:ntheta) for(ir in 1:5) a[m,itheta,ir,]<-run(m,'IBDsu1',ir,itheta==(1:ntheta)) a[a==0]<-min(a[a>0])*1e-10 b<-apply(log(a),c(2,3,4),sum) # b is log product over markers d<-(ptheta%o%c(1-alpha-gamma,alpha/2,alpha/2,gamma/2,gamma/2)) c<-apply(array(d,dim(b))*exp(b-max(b)),3,sum) c[1]/c[2] # 62752.97 # IBD 6 markers alpha<-gamma<-0.05 a<-array(0,c(6,ntheta,5,2)) # no. markers, no theta's, no. relations, 2 for(m in 1:6) for(itheta in 1:ntheta) for(ir in 1:5) a[m,itheta,ir,]<-run(m,'IBDsu1',ir,itheta==(1:ntheta)) a[a==0]<-min(a[a>0])*1e-10 b<-apply(log(a),c(2,3,4),sum) # b is log product over markers d<-(ptheta%o%c(1-alpha-gamma,alpha/2,alpha/2,gamma/2,gamma/2)) c<-apply(array(d,dim(b))*exp(b-max(b)),3,sum) c[1]/c[2] # 1568036 # UAF 100 - 2 markers a<-array(0,c(2,ntheta,2)) for(m in 1:2) for(itheta in 1:ntheta) a[m,itheta,]<-run(m,'UAF',100,itheta==(1:ntheta)) a[a==0]<-min(a[a>0])*1e-10 b<-apply(log(a),c(2,3),sum) c<-as.vector(ptheta%*%exp(b-max(b))) c[1]/c[2] # 555.4607 # all at once: run(1:2,'UAF',100,ptheta) # 535.2903 # UAF 100 - 6 markers a<-array(0,c(6,ntheta,2)) for(m in 1:6) for(itheta in 1:ntheta) a[m,itheta,]<-run(m,'UAF',100,itheta==(1:ntheta)) a[a==0]<-min(a[a>0])*1e-10 b<-apply(log(a),c(2,3),sum) c<-as.vector(ptheta%*%exp(b-max(b))) c[1]/c[2] # 84140302 # UAF 10000 - 6 markers a<-array(0,c(6,ntheta,2)) for(m in 1:6) for(itheta in 1:ntheta) a[m,itheta,]<-run(m,'UAF',10000,.000001+.999999*(itheta==(1:ntheta))) b<-apply(log(a),c(2,3),sum) c<-as.vector(ptheta%*%exp(b-max(b))) c[1]/c[2] # 169718884 > log10(171044336) [1] 8.233109 > log10(1568032) [1] 6.195355 > log10(84139668) [1] 7.925001 options(silent=FALSE) for(m in 1:6) { run(m,'baseline',0,ptheta) run(m,'UAF',100,ptheta) run(m,'IBDsu1',c(0.05,0.05),ptheta) } 12.72524 11.43222 11.38884 32 24.29366 26.12245 40.25765 34.55696 34.55485 8.102414 7.535281 7.486822 7.937572 7.267086 7.222674 5.284965 5.274955 5.226372 D8 & 12.73 & 11.43 & 11.39 \\ D18 & 32.00 & 24.29 & 26.12 \\ D21 & 40.26 & 34.56 & 34.55 \\ FGA & 8.11 & 7.54 & 7.49 \\ THO1 & 7.94 & 7.27 & 7.22 \\ VWA & 5.28 & 5.27 & 5.23 \\