interventionalDBN/DESCRIPTION00006660000000066412262776067011670 0ustar00Package: interventionalDBN Type: Package Title: Interventional Inference for Dynamic Bayesian Networks Version: 1.2 Date: 2013-09-10 Author: Simon Spencer Maintainer: Simon Spencer Description: This package allows a dynamic Bayesian network to be inferred from microarray timecourse data with interventions (inhibitors). License: GPL (>=2) LazyLoad: yes Packaged: 2014-01-07 13:08:07 UTC; simon interventionalDBN/NAMESPACE00006660000000004012262776067011365 0ustar00exportPattern("^[[:alpha:]]+") interventionalDBN/R/00007770000000000012262776067010355 5ustar00interventionalDBN/R/countGraphs.R00006660000000021212262776067012770 0ustar00countGraphs <- function(nodes,max.indeg) { grphs<-1 for (i in 1:max.indeg) { grphs<-grphs+choose(nodes,i) } return(grphs) } interventionalDBN/R/formatData.R00006660000002122212262776067012561 0ustar00formatData <- function(d,cellLines=NULL,inhibitors=NULL,stimuli=NULL,times=NULL,nodes=NULL,intercept=TRUE,initialIntercept=TRUE,gradients=FALSE) { # data is a samples x (4 + nodes) matrix or dataframe # column 1 gives the cell line in each sample # column 2 gives the inhibitor used in each sample # column 3 gives the stimuli used in each sample # column 4 gives the time each sample was measured # cellLines - a vector that can be used to specify a subset of the cell lines to analyse (default is to use them all) # inhibitors - a vector that can be used to specify a subset of the inhibitors to analyse (default is to use them all) # stimuli - a vector that can be used to specify a subset of the stimuli to analyse (default is to use them all) # times - a vector that can be used to specify a subset of the sample times to analyse as the response (default is to use them all). # Note: The entries of d[,4] must be real numbers. Missing values are acceptable and are handled as follows: # 1) Missing values in the response are just ignored. # 2) For the predictors, if a single timepoint is missing, the predictors are interpolated from the two immediate neighbours. # 3) If one of the two immediate neighbours is missing then the response is ignored. # 4) UNLESS the predictor in question is for the time zero observation (which is always missing), in which case NA is returned, which is later set to the mean of the predictor during centring. See initialIntercept. # nodes - a vector that can be used to specify the indices of a subset of nodes to include in the analysis. Further nodes can be removed from the response in the interventionalInferenceDBN function. # intercept - include an intercept parameter in all models? # initialIntercept - include an intercept parameter to estimate the level at time zero. Only used if the first sample is included in the response. # gradients - If true, changes in concentration are used as the response, rather than the raw values. if (length(dim(d))!=2 | dim(d)[2]<5) {stop("d must be a matrix with columns 1 to 4 filled with sample information (cell line, inhibitor, stimuli, time)\n")} if (is.null(cellLines)) {cellLines<-unique(d[,1])} if (is.null(inhibitors)) {inhibitors<-unique(d[,2])} if (is.null(stimuli)) {stimuli<-unique(d[,3])} sampleTimes<-sort(unique(as.numeric(d[,4]))) if (is.null(times)) {times<-sampleTimes} if (length(intersect(times,sampleTimes))!=length(times)) {stop("All times must appear in d[,4].\n")} sampleTimepoints<-1:length(sampleTimes) timepoints<-which(sampleTimes %in% times) timeIntervals<-sampleTimes[2:length(sampleTimes)]-sampleTimes[1:(length(sampleTimes)-1)] if (is.null(nodes)) {nodes<-1:(dim(d)[2]-4)} if (initialIntercept & gradients) {stop("initialIntercept and gradients cannot both be TRUE.\n")} if (gradients & 1 %in% timepoints) {timepoints<-setdiff(timepoints,1)} dm<-data.matrix(d[,5:(dim(d)[2])]) y<-matrix(NA,0,length(nodes)) X0<-matrix(NA,0,intercept+initialIntercept) X1<-matrix(NA,0,length(nodes)) Sigma<-matrix(NA,0,0) n<-0 n.cellLines<-rep(0,length(cellLines)) n.inhibitors<-rep(0,length(inhibitors)) n.stimuli<-rep(0,length(stimuli)) n.timepoints<-rep(0,length(timepoints)) n.interpolated<-0 interpolated<-matrix(NA,0,4) sampleInfo<-matrix(NA,0,4) current.condition<-0 cond<-NULL for (cellLine in cellLines) { for (i in inhibitors) { for (j in stimuli) { current.condition<-current.condition+1 for (k in timepoints) { response<-which(d[,1]==cellLine & d[,2]==i & d[,3]==j & d[,4]==sampleTimes[k]) if (length(response)>0) { if (k==1) { predictor<-rep(0,length(nodes)) } else { wh<-which(d[,1]==cellLine & d[,2]==i & d[,3]==j & d[,4]==sampleTimes[k-1]) if (length(wh)==1) { predictor<-dm[wh,nodes] } else if (length(wh)>1) { predictor<-apply(dm[wh,nodes],2,mean) } else if (!gradients & k>2) {# interpolation before<-which(d[,1]==cellLine & d[,2]==i & d[,3]==j & d[,4]==sampleTimes[k-2]) if (length(before)>1 & length(response)>1) { predictor<-(timeIntervals[k+1]*apply(dm[before,nodes],2,mean)+timeIntervals[k]*apply(dm[response,nodes],2,mean))/(timeIntervals[k+1]+timeIntervals[k]) } else if (length(before)==1 & length(response)>1) { predictor<-(timeIntervals[k+1]*dm[before,nodes]+timeIntervals[k]*apply(dm[response,nodes],2,mean))/(timeIntervals[k+1]+timeIntervals[k]) } else if (length(before)>1 & length(response)==1) { predictor<-(timeIntervals[k+1]*apply(dm[before,nodes],2,mean)+timeIntervals[k]*dm[response,nodes])/(timeIntervals[k+1]+timeIntervals[k]) } else if (length(before)==1 & length(response)==1) { predictor<-(timeIntervals[k+1]*dm[before,nodes]+timeIntervals[k]*dm[response,nodes])/(timeIntervals[k+1]+timeIntervals[k]) } else {# Give up! predictor<-NULL } } else {# don't use interpolation and gradients predictor<-NULL } } if (!is.null(predictor) & gradients) { n<-n+1 Sigma<-cbind(rbind(Sigma,rep(0,n-1)),rep(0,n)) if (length(response)==1) { y<-rbind(y,(dm[response,nodes]-predictor)/timeIntervals[k-1]) } else { y<-rbind(y,(apply(dm[response,nodes],2,mean)-predictor)/timeIntervals[k-1]) } Sigma[n,n]<-(1/length(response)+1/length(wh))/(timeIntervals[k-1])^2 if (n>1 && prod(sampleInfo[n-1,]==c(cellLine,i,j,sampleTimes[k-1]))==1) { Sigma[n-1,n]<--1/length(wh)/timeIntervals[k-1]/timeIntervals[k-2] Sigma[n,n-1]<--1/length(wh)/timeIntervals[k-1]/timeIntervals[k-2] } X1<-rbind(X1,predictor) sampleInfo<-rbind(sampleInfo,c(cellLine,i,j,sampleTimes[k])) cond<-c(cond,current.condition) n.cellLines[which(cellLines==cellLine)]<-n.cellLines[which(cellLines==cellLine)]+1 n.inhibitors[which(inhibitors==i)]<-n.inhibitors[which(inhibitors==i)]+1 n.stimuli[which(stimuli==j)]<-n.stimuli[which(stimuli==j)]+1 n.timepoints[which(timepoints==k)]<-n.timepoints[which(timepoints==k)]+1 if (intercept) {X0<-matrix(1,n,1)} } else if (!is.null(predictor)) { for (r in response) { n<-n+1 y<-rbind(y,dm[r,nodes]) X1<-rbind(X1,predictor) Sigma<-diag(rep(1,n)) if (k>1 && length(wh)==0) {interpolated<-rbind(interpolated,c(cellLine,i,j,sampleTimes[k]));n.interpolated<-n.interpolated+1} sampleInfo<-rbind(sampleInfo,c(cellLine,i,j,sampleTimes[k])) cond<-c(cond,current.condition) n.cellLines[which(cellLines==cellLine)]<-n.cellLines[which(cellLines==cellLine)]+1 n.inhibitors[which(inhibitors==i)]<-n.inhibitors[which(inhibitors==i)]+1 n.stimuli[which(stimuli==j)]<-n.stimuli[which(stimuli==j)]+1 n.timepoints[which(timepoints==k)]<-n.timepoints[which(timepoints==k)]+1 if (intercept & initialIntercept & k==1) { X0<-rbind(X0,c(1,1)) } else if (intercept & initialIntercept & k>1) { X0<-rbind(X0,c(1,0)) } else if (intercept | (initialIntercept & k==1)) { X0<-rbind(X0,1) } else if (initialIntercept & k>1) { X0<-rbind(X0,0) } } } } } } } } row.names(X1)<-NULL colnames(interpolated)<-c("Cell line","Inhibitor","Stimuli","Time") colnames(sampleInfo)<-c("Cell line","Inhibitor","Stimuli","Time") cat("n =",n,"\n") cat(current.condition,"conditions:\n") for (cellLine in cellLines) {cat(" Cell line",cellLine,":",n.cellLines[which(cellLines==cellLine)],"samples.\n")} for (i in inhibitors) {cat(" Inhibitor",i,":",n.inhibitors[which(inhibitors==i)],"samples.\n")} for (j in stimuli) {cat(" Stimulus",j,":",n.stimuli[which(stimuli==j)],"samples.\n")} for (k in timepoints) {cat(" Time",sampleTimes[k],":",n.timepoints[which(timepoints==k)],"samples.\n")} if (n.interpolated>0) {cat(n.interpolated,"predictors produced by interpolation.\n")} return(list(y=y,X0=X0,X1=X1,Sigma=Sigma,sampleInfo=sampleInfo,interpolated=interpolated,cond=cond)) } interventionalDBN/R/interventionEffects.R00006660000000727412262776067014536 0ustar00interventionEffects <- function(d,cellLine,baseline,inhibited) { # data is a samples x (4 + nodes) matrix or dataframe # column 1 gives the cell line in each sample # column 2 gives the inhibitor used in each sample # column 3 gives the stimuli used in each sample # column 4 gives the time index for each sample # cellLine is the cell line to examine # baseline is the entry in column 2 for the baseline (uninhibited samples) # inhibited is the entry in column 2 for the samples in which the inhibitor was active if (length(dim(d))!=2 | dim(d)[2]<5) {stop("d must be a matrix with columns 1 to 4 filled with sample information (cell line, inhibitor, stimulus, timepoint)\n")} dm<-data.matrix(d[,5:(dim(d)[2])]) n.nodes<-dim(dm)[2] stimuli<-levels(factor(d[,3])) n.stimuli<-length(stimuli) n.tps<-max(d[,4])+1 n.baseline<-rep(0,n.stimuli) n.inhibited<-rep(0,n.stimuli) n.baseline.used<-rep(0,n.stimuli) n.inhibited.used<-rep(0,n.stimuli) n.differences<-rep(0,n.stimuli) degrees.freedom<-rep(0,n.stimuli) names(degrees.freedom)<-stimuli t.statistics<-matrix(NA,n.stimuli,n.nodes) colnames(t.statistics)<-colnames(dm) rownames(t.statistics)<-stimuli p.values<-t.statistics heatmap.p.values<-t.statistics all.stim.t.statistics<-rep(NA,n.nodes) names(all.stim.t.statistics)<-colnames(dm) all.stim.p.values<-all.stim.t.statistics all.stim.heatmap.p.values<-all.stim.t.statistics all.stim.degrees.freedom<-NA diffs<-array(NA,c(n.stimuli,n.nodes,n.tps)) for (j in 1:n.stimuli) { for (k in 1:n.tps-1) { wh1<-which(d[,1]==cellLine & d[,2]==baseline & d[,3]==stimuli[j] & d[,4]==k) wh2<-which(d[,1]==cellLine & d[,2]==inhibited & d[,3]==stimuli[j] & d[,4]==k) n.baseline[j]<-n.baseline[j]+length(wh1) n.inhibited[j]<-n.inhibited[j]+length(wh2) if (length(wh1)>0 & length(wh2)>0) { n.baseline.used[j]<-n.baseline.used[j]+length(wh1) n.inhibited.used[j]<-n.inhibited.used[j]+length(wh2) a1<-dm[wh1[1],] a2<-dm[wh2[1],] if (length(wh1)>1) {a1<-apply(dm[wh1,],2,mean)} if (length(wh2)>1) {a2<-apply(dm[wh2,],2,mean)} diffs[j,,k+1]<-(a1-a2)/(1/length(wh1)+1/length(wh2))^(1/2) # This ensures that a positive T-statistic corresponds to a reduction in protein expression (ie inhibition). } } wh<-which(!is.na(diffs[j,1,]))# This assumes that if node 1 is observed, so are the others. n.differences[j]<-length(wh) degrees.freedom[j]<-length(wh)-1 t.statistics[j,]<-apply(diffs[j,,wh],1,mean)/apply(diffs[j,,wh],1,sd)*sqrt(n.differences[j]) p.values[j,]<-(1-pt(abs(t.statistics[j,]),degrees.freedom[j]))*2 heatmap.p.values[j,]<-sign(t.statistics[j,])*(1-p.values[j,]) } all.stim.degrees.freedom<-sum(n.differences)-1 for (i in 1:n.nodes) { all.stim.t.statistics[i]<-mean(c(diffs[,i,]),na.rm=TRUE)/sd(c(diffs[,i,]),na.rm=TRUE)*sqrt(sum(n.differences)) } all.stim.p.values<-(1-pt(abs(all.stim.t.statistics),all.stim.degrees.freedom))*2 all.stim.heatmap.p.values<-sign(all.stim.t.statistics)*(1-all.stim.p.values) # for (j in 1:n.stimuli) { cat(" Stimulus",stimuli[j],":", n.baseline.used[j],"/", n.baseline[j]," baseline observations used.\n") cat(" Stimulus",stimuli[j],":",n.inhibited.used[j],"/",n.inhibited[j],"inhibited observations used.\n") } return(list(n.differences=n.differences,t.statistics=t.statistics,degrees.freedom=degrees.freedom,p.values=p.values,heatmap.p.values=heatmap.p.values, all.stim.t.statistics=all.stim.t.statistics,all.stim.degrees.freedom=all.stim.degrees.freedom, all.stim.p.values=all.stim.p.values,all.stim.heatmap.p.values=all.stim.heatmap.p.values)) } interventionalDBN/R/interventionalInference.R00006660000005267012262776067015372 0ustar00interventionalInference <- function(y,X0,X1,Z=NULL,max.indeg,g=NULL,Sigma=NULL,inferParents=NULL,allowSelfEdges=TRUE,perfectOut=FALSE,fixedEffectOut=FALSE,mechanismChangeOut=FALSE,perfectIn=FALSE,fixedEffectIn=FALSE,mechanismChangeIn=FALSE,priorType="uninformed",priorGraph=NULL,priorStrength=3,fittedValues=FALSE) { # n is the number of samples. # P is the number of nodes. # y is an (n x P) matrix filled with response values. # X0 is an (n x a) matrix giving the part of the design matrix that is the same for all models. For no intercept, put X0=NULL. # X1 is an (n x P) matrix filled with the predictors. # Z is an (n x P) binary matrix which is 1 iff node j is inhibited in sample i. # Sigma is an (n x n) symmetric positive definite matrix giving the covariance of the responses (proportional to sigma^2). # inferParents is a vector of nodes for which to infer parents. If left blank, parents are inferred for all nodes. Any values in y for nodes that are not inferred are ignored. # allowSelfEdges if FALSE then self edges are forbidden, causing the diagonal of the pep matrix to be 0. # perfectOut: apply perfect-out interventions? # fixedEffectOut: apply fixed-effect-out interventions? # mechanismChangeOut: apply mechanism-change-out interventions? # perfecIn: apply perfect-in interventions? # fixedEffectIn: apply fixed-effect-in interventions? # mechanismChangeIn: apply mechanism-change-in interventions? # max.indeg: maximum indegree for each node. # g: the 'g' in Zellner's g-prior, by default set to be n. # priorType: type of prior to use. "uninformed", "Hamming" or "Mukherjee" ('only penalise unexpected' prior) # priorGraph: a (P x P) matrix specifying the prior network. # priorStrength: the prior strength parameter, ignored (but don't set it to NA) if priorGraph=NULL. # fittedValues: calculate fitted values? n<-dim(y)[1] P<-dim(y)[2] cat("n =",n,", nodes =",P,"\n") if (!is.null(X0) && dim(X0)[1]!=n) {stop("X0 must have dimension (n x a).\n")} if (dim(X1)[1]!=n | dim(X1)[2]!=P) {stop("X1 must have dimension (n x P).\n")} if (!is.null(Sigma) && dim(Sigma)[1]!=n && dim(Sigma)[2]!=n) {stop("Sigma must have dimension (n x n).\n")} if (perfectOut | fixedEffectOut | mechanismChangeOut | perfectIn | fixedEffectIn | mechanismChangeIn) { if (is.null(Z)) {stop("Z must be specified if an intervention model is used.\n")} } if (is.null(inferParents)) {inferParents<-1:P} if (is.null(max.indeg) || max.indeg>P) {stop("max.indeg must be less than P.\n")} if (is.null(g)) {g<-n} if (priorType!="uninformed" & priorType!="Hamming" & priorType!="Mukherjee") {stop("priorType must be 'uninformed', 'Hamming' or 'Mukherjee'.\n")} if (priorType!="uninformed" & is.null(priorGraph)) {stop("priorGraph must be specified with an informed prior.\n")} if (!is.null(priorGraph) && (dim(priorGraph)[1]!=P | dim(priorGraph)[2]!=P)) {stop("priorGraph must have dimension (P x P).\n")} if (length(priorStrength)>1) {sort(priorStrength)} if ((perfectIn | perfectOut) & (mechanismChangeIn | mechanismChangeOut)) {stop("Perfect and mechanism change interventions cannot be used together.\n")} if (mechanismChangeIn & mechanismChangeOut) {stop("MC-in and MC-out not currently implemented.\n")} inputs<-list(y=y,X0=X0,X1=X1,Z=Z,max.indeg=max.indeg,g=g,Sigma=Sigma,inferParents=inferParents, allowSelfEdges=allowSelfEdges,perfectOut=perfectOut,fixedEffectOut=fixedEffectOut,mechanismChangeOut=mechanismChangeOut, perfectIn=perfectIn,fixedEffectIn=fixedEffectIn,mechanismChangeIn=mechanismChangeIn, priorType=priorType,priorGraph=priorGraph,priorStrength=priorStrength,n=n,P=P) # Part zero: prepare to remove covariance structure from everything. if (!is.null(Sigma)) {R<-t(chol(Sigma))} # Part one: remove component of X0 from y. if (!is.null(X0)) { a<-dim(X0)[2] cat("a =",a,"\n") for (p in inferParents) { if (perfectIn & max(Z[,p])==1) { obs<-which(Z[,p]==0) } else { obs<-1:n } X0p<-matrix(X0[obs,],length(obs),a) if (is.null(Sigma)) { IP0<-diag(rep(1,length(obs)))-X0p%*%solve(crossprod(X0p),t(X0p)) } else { IP0<-solve(R[obs,obs])-solve(R[obs,obs],X0p)%*%solve(crossprod(X0p,solve(Sigma[obs,obs],X0p)),t(solve(Sigma[obs,obs],X0p))) } y[obs,p]<-IP0%*%y[obs,p] } } # Part two: implement perfect-out interventions if (perfectOut) {X1[which(Z==1)]<-NA} # Part three: orthogonalise the predictors for (p in 1:P) { wh<-which(!is.na(X1[,p])) if (length(wh)==n) { X1[,p]<-IP0%*%X1[,p] } else { if (is.null(Sigma)) { X1[wh,p]<-(diag(rep(1,length(wh)))-X0[wh,]%*%solve(crossprod(X0[wh,]),t(X0[wh,])))%*%X1[wh,p] } else { X1[wh,p]<-(solve(R[wh,wh])-solve(R[wh,wh],X0[wh,])%*%solve(crossprod(X0[wh,],solve(Sigma[wh,wh],X0[wh,])),t(solve(Sigma[wh,wh],X0[wh,]))))%*%X1[wh,p] } X1[which(is.na(X1[,p])),p]<-0 } if (sd(X1[,p])==0) {cat("Predictor",p,"is either constant or always inhibited.\n")} } # Part four: orthogonalise fixed-effect-out interventions if (fixedEffectOut) {fe<-IP0%*%Z} # Part five: the prior grphs<-countGraphs(P,max.indeg) prior<-matrix(0,P,grphs) if (!is.null(priorGraph) & priorType!="uninformed") { if (priorType=="Hamming") { cat("Calculating Hamming prior distances...\n") } else if (priorType=="Mukherjee") { cat("Calculating Mukherjee prior distances...\n") } else { stop("Prior type not supported.\n") } parents<-rep(0,P) for (i in 1:countGraphs(P,max.indeg)) { wh<-which(parents==1) for (p in 1:P) { if (priorType=="Hamming") {# SHD prior prior[p,i]<-sum(abs(parents-priorGraph[,p])) } else if (priorType=="Mukherjee") {# OPU prior (Sach prior) prior[p,i]<-length(which(priorGraph[wh,p]==0)) } } parents<-nxt(parents,max.indeg) } } # Part six: initialise ll<-matrix(NA,P,grphs) rownames(ll)<-colnames(X1) parentSets<-ll parents<-rep(0,P) st<-Sys.time() cat("Processing",grphs,"models",date(),"\n") # Part seven: The Main Loop for (m in 1:grphs) { parentSets[,m]<-parents # record parent set if (mechanismChangeOut) { # Mechanism-change-out interventions X<-matrix(NA,n,0) for (p in which(parents==1)) { if (max(Z[,p])==1) { newcols<-matrix(0,n,2) wh1<-which(Z[,p]==1) wh0<-which(Z[,p]==0) if (is.null(Sigma)) { newcols[wh1,1]<-(diag(rep(1,length(wh1)))-X0[wh1,]%*%solve(crossprod(X0[wh1,]),t(X0[wh1,])))%*%X1[wh1,p] newcols[wh0,2]<-(diag(rep(1,length(wh0)))-X0[wh0,]%*%solve(crossprod(X0[wh0,]),t(X0[wh0,])))%*%X1[wh0,p] } else { newcol[wh1,1]<-(solve(R[wh1,wh1])-solve(R[wh1,wh1],X0[wh1,])%*%solve(crossprod(X0[wh1,],solve(Sigma[wh1,wh1],X0[wh1,])),t(solve(Sigma[wh1,wh1],X0[wh1,]))))%*%X1[wh1,p] newcol[wh0,2]<-(solve(R[wh0,wh0])-solve(R[wh0,wh0],X0[wh0,])%*%solve(crossprod(X0[wh0,],solve(Sigma[wh0,wh0],X0[wh0,])),t(solve(Sigma[wh0,wh0],X0[wh0,]))))%*%X1[wh0,p] } X<-cbind(X,newcols) } else { X<-cbind(X,X1[,p]) } } } else { X<-matrix(X1[,which(parents==1)],n,sum(parents)) # put together design matrix for this model } if (fixedEffectOut) {X<-cbind(X,fe[,which(parents==1 & apply(Z,2,max)==1)])} b<-dim(X)[2] # number of betas if (b==0) { # null model H<-diag(rep(1,n)) } else { H<-diag(rep(1,n))-(g/(g+1))*X%*%solve(crossprod(X),t(X)) } inhibitedResponses<-NULL uninhibitedResponses<-inferParents if (!allowSelfEdges) {uninhibitedResponses<-intersect(uninhibitedResponses,which(parents==0))} if (perfectIn | fixedEffectIn | mechanismChangeIn) { inhibitedResponses<-intersect(uninhibitedResponses,which(apply(Z,2,max)==1)) uninhibitedResponses<-setdiff(uninhibitedResponses,inhibitedResponses) } for (p in uninhibitedResponses) { ll[p,m]<--b/2*log(1+g)-(n-a)/2*log(crossprod(y[,p],H%*%y[,p])) } # Part 7a deal with -in interventions (and -out ones as well!) for (p in inhibitedResponses) { if (perfectIn) { obs<-which(Z[,p]==0) } else { obs<-1:n } if (mechanismChangeIn) { b<-length(which(parents==1)) X<-matrix(0,n,2*b) wh1<-which(Z[,p]==1) wh0<-which(Z[,p]==0) if (is.null(Sigma)) { X0p<-matrix(X0[wh1,],length(wh1),a) X[wh1,1:b]<-(diag(rep(1,length(wh1)))-X0p%*%solve(crossprod(X0p),t(X0p)))%*%X1[wh1,which(parents==1)] X0p<-matrix(X0[wh0,],length(wh0),a) X[wh0,(b+1):(2*b)]<-(diag(rep(1,length(wh0)))-X0p%*%solve(crossprod(X0p),t(X0p)))%*%X1[wh0,which(parents==1)] } else { X0p<-matrix(X0[wh1,],length(wh1),a) X[wh1,1:b]<-(solve(R[wh1,wh1])-solve(R[wh1,wh1],X0p)%*%solve(crossprod(X0p,solve(Sigma[wh1,wh1],X0p)),t(solve(Sigma[wh1,wh1],X0p))))%*%X1[wh1,which(parents==1)] X0p<-matrix(X0[wh0,],length(wh0),a) X[wh0,(b+1):(2*b)]<-(solve(R[wh0,wh0])-solve(R[wh0,wh0],X0p)%*%solve(crossprod(X0p,solve(Sigma[wh0,wh0],X0p)),t(solve(Sigma[wh0,wh0],X0p))))%*%X1[wh0,which(parents==1)] } } else if (perfectOut) { X<-matrix(X1[obs,which(parents==1)],length(obs),sum(parents)) for (pa in which(parents==1)) { if (max(Z[obs,pa])==1) { wh0<-obs[which(Z[obs,pa]==0)] X0p<-matrix(X0[wh0,],length(wh0),a) if (is.null(Sigma)) { X[wh0,which(which(parents==1)==pa)]<-(diag(rep(1,length(wh0)))-X0p%*%solve(crossprod(X0p),t(X0p)))%*%X1[wh0,pa] } else { X[wh0,which(which(parents==1)==pa)]<-(solve(R[wh0,wh0])-solve(R[wh0,wh0],X0p)%*%solve(crossprod(X0p,solve(Sigma[wh0,wh0],X0p)),t(solve(Sigma[wh0,wh0],X0p))))%*%X1[wh0,pa] } X[which(Z[obs,pa]==1),which(which(parents==1)==pa)]<-0 } } } else { X<-matrix(X1[obs,which(parents==1)],length(obs),sum(parents)) } if (fixedEffectIn & fixedEffectOut) { X<-cbind(X,Z[obs,union(p,which(parents==1 & apply(Z[obs,],2,max)==1))]) } else if (fixedEffectIn) { X<-cbind(X,Z[obs,p]) } else if (fixedEffectOut) { X<-cbind(X,Z[obs,which(parents==1 & apply(Z[obs,],2,max)==1)]) } X0p<-matrix(X0[obs,],length(obs),a) if (is.null(Sigma)) { X<-(diag(rep(1,length(obs)))-X0p%*%solve(crossprod(X0p),t(X0p)))%*%X } else { X<-(solve(R[obs,obs])-solve(R[obs,obs],X0p)%*%solve(crossprod(X0p,solve(Sigma[obs,obs],X0p)),t(solve(Sigma[obs,obs],X0p))))%*%X } b<-dim(X)[2] # number of betas if (b==0) { # null model H<-diag(rep(1,length(obs))) } else { H<-diag(rep(1,length(obs)))-(g/(g+1))*X%*%solve(crossprod(X),t(X)) } ll[p,m]<--b/2*log(1+g)-(length(obs)-a)/2*log(crossprod(y[obs,p],H%*%y[obs,p])) } parents <- nxt(parents,max.indeg) if (m==100) {cat("Estimated duration",difftime(Sys.time(),st,units="mins")*grphs/100,"minutes.\n")} if (m==1000) {cat("Estimated duration",difftime(Sys.time(),st,units="mins")*grphs/1000,"minutes.\n")} if (m==10000) {cat("Estimated duration",difftime(Sys.time(),st,units="mins")*grphs/10000,"minutes.\n")} if (m==100000) {cat("Estimated duration",difftime(Sys.time(),st,units="mins")*grphs/100000,"minutes.\n")} } cat("Actual duration",difftime(Sys.time(),st,units="mins"),"minutes.\n") # Part eight: renormalisation & MAP model cat("Renormalising...\n") parentCount<-apply(parentSets,2,sum) if (length(priorStrength)>1 & max(prior)>0) { # Perform Empirical Bayes to estimate the best one. marginal.likelihood<-matrix(0,P,length(priorStrength)) ll.Inf<-ll ll.Inf[which(is.na(ll))]<--Inf st<-Sys.time() for (i in 1:length(priorStrength)) { # 'soft' multiplicity correction - prior is penalised and then renormalised normalisedPrior<-matrix(-log(1+max.indeg),P,grphs)# deal with the null model here. for (j in 1:max.indeg) { wh<-which(parentCount==j) normalisedPrior[,wh]<--log(length(wh))-log(1+max.indeg) } normalisedPrior <- normalisedPrior - priorStrength[i]*prior normalisedPrior <- normalisedPrior - log(apply(exp(normalisedPrior),1,sum)) marginal.likelihood[,i] <- apply(ll+normalisedPrior,1,min,na.rm=TRUE) marginal.likelihood[,i] <- marginal.likelihood[,i]+log(apply(exp(ll.Inf+normalisedPrior-marginal.likelihood[,i]),1,sum)) if (i==20) {cat("Estimated duration of empirical Bayes calculation",difftime(Sys.time(),st,units="mins")*length(priorStrength)/20,"minutes.\n")} } log.ml.prod <- apply(marginal.likelihood,2,sum) if (length(which(log.ml.prod==max(log.ml.prod)))>1) {cat("Multiple solutions for Emperical Bayes - taking weakest prior strength.\n")} ebPriorStrength <- priorStrength[which.max(log.ml.prod)] const1 <- marginal.likelihood[,which.max(log.ml.prod)] # Now that we have chosen the prior strength, repeat the calculations. normalisedPrior<-matrix(-log(1+max.indeg),P,grphs)# deal with the null model here. for (j in 1:max.indeg) { wh<-which(parentCount==j) normalisedPrior[,wh]<--log(length(wh))-log(1+max.indeg) } normalisedPrior <- normalisedPrior-ebPriorStrength*prior normalisedPrior <- normalisedPrior-log(apply(exp(normalisedPrior),1,sum)) lpost <- ll.Inf + normalisedPrior - const1 } else { # No empirical Bayes # 'soft' multiplicity correction - prior is penalised and then renormalised normalisedPrior<-matrix(-log(1+max.indeg),P,grphs)# deal with the null model here. for (j in 1:max.indeg) { wh<-which(parentCount==j) normalisedPrior[,wh]<--log(length(wh))-log(1+max.indeg) } normalisedPrior<-normalisedPrior-priorStrength*prior normalisedPrior<-normalisedPrior-log(apply(exp(normalisedPrior),1,sum)) const1<-apply(ll+normalisedPrior,1,min,na.rm=TRUE) ll[which(is.na(ll))]<--Inf const1<-const1+log(apply(exp(ll+normalisedPrior-const1),1,sum)) lpost<-ll+normalisedPrior-const1 marginal.likelihood <- const1 ebPriorStrength <- NULL } MAP<-matrix(NA,P,P) colnames(MAP)<-colnames(X1) rownames(MAP)<-colnames(X1) pep<-MAP MAPprob<-rep(NA,P) names(MAPprob)<-colnames(X1) MAPmodel<-MAPprob for (p in 1:P) { if (length(which.max(lpost[p,]))>0) { MAPmodel[p]<-which.max(lpost[p,]) MAP[,p]<-parentSets[,MAPmodel[p]] MAPprob[p]<-exp(lpost[p,MAPmodel[p]]) } } # Part nine: model averaging cat("Calculating posterior edge probabilities...\n") for (i in 1:P) { for (j in 1:P) { pep[i,j]<-sum(exp(lpost[j,which(parentSets[i,]==1)])) } } # Part ten: fitted values if (fittedValues) { parents<-rep(0,P) st<-Sys.time() cat("Second pass to calculate fitted values.\n") yhat<-matrix(0,n,P) cat("Processing",grphs,"models",date(),"\n") for (m in 1:grphs) { parentSets[,m]<-parents # record parent set if (mechanismChangeOut) { # Mechanism-change-out interventions X<-matrix(NA,n,0) for (p in which(parents==1)) { if (max(Z[,p])==1) { newcols<-matrix(0,n,2) wh1<-which(Z[,p]==1) wh0<-which(Z[,p]==0) if (is.null(Sigma)) { newcols[wh1,1]<-(diag(rep(1,length(wh1)))-X0[wh1,]%*%solve(crossprod(X0[wh1,]),t(X0[wh1,])))%*%X1[wh1,p] newcols[wh0,2]<-(diag(rep(1,length(wh0)))-X0[wh0,]%*%solve(crossprod(X0[wh0,]),t(X0[wh0,])))%*%X1[wh0,p] } else { newcol[wh1,1]<-(solve(R[wh1,wh1])-solve(R[wh1,wh1],X0[wh1,])%*%solve(crossprod(X0[wh1,],solve(Sigma[wh1,wh1],X0[wh1,])),t(solve(Sigma[wh1,wh1],X0[wh1,]))))%*%X1[wh1,p] newcol[wh0,2]<-(solve(R[wh0,wh0])-solve(R[wh0,wh0],X0[wh0,])%*%solve(crossprod(X0[wh0,],solve(Sigma[wh0,wh0],X0[wh0,])),t(solve(Sigma[wh0,wh0],X0[wh0,]))))%*%X1[wh0,p] } X<-cbind(X,newcols) } else { X<-cbind(X,X1[,p]) } } } else { X<-matrix(X1[,which(parents==1)],n,sum(parents)) # put together design matrix for this model } if (fixedEffectOut) {X<-cbind(X,fe[,which(parents==1 & apply(Z,2,max)==1)])} b<-dim(X)[2] # number of betas if (b==0) { # null model H<-matrix(0,n,n) } else { H<-(g/(g+1))*X%*%solve(crossprod(X),t(X)) } inhibitedResponses<-NULL uninhibitedResponses<-inferParents if (!allowSelfEdges) {uninhibitedResponses<-intersect(uninhibitedResponses,which(parents==0))} if (perfectIn | fixedEffectIn | mechanismChangeIn) { inhibitedResponses<-intersect(uninhibitedResponses,which(apply(Z,2,max)==1)) uninhibitedResponses<-setdiff(uninhibitedResponses,inhibitedResponses) } for (p in uninhibitedResponses) { yhat[,p]<-yhat[,p]+exp(lpost[p,m])*H%*%y[,p] } # Part 10a deal with -in interventions (and -out ones as well!) for (p in inhibitedResponses) { if (perfectIn) { obs<-which(Z[,p]==0) } else { obs<-1:n } if (mechanismChangeIn) { b<-length(which(parents==1)) X<-matrix(0,n,2*b) wh1<-which(Z[,p]==1) wh0<-which(Z[,p]==0) if (is.null(Sigma)) { X0p<-matrix(X0[wh1,],length(wh1),a) X[wh1,1:b]<-(diag(rep(1,length(wh1)))-X0p%*%solve(crossprod(X0p),t(X0p)))%*%X1[wh1,which(parents==1)] X0p<-matrix(X0[wh0,],length(wh0),a) X[wh0,(b+1):(2*b)]<-(diag(rep(1,length(wh0)))-X0p%*%solve(crossprod(X0p),t(X0p)))%*%X1[wh0,which(parents==1)] } else { X0p<-matrix(X0[wh1,],length(wh1),a) X[wh1,1:b]<-(solve(R[wh1,wh1])-solve(R[wh1,wh1],X0p)%*%solve(crossprod(X0p,solve(Sigma[wh1,wh1],X0p)),t(solve(Sigma[wh1,wh1],X0p))))%*%X1[wh1,which(parents==1)] X0p<-matrix(X0[wh0,],length(wh0),a) X[wh0,(b+1):(2*b)]<-(solve(R[wh0,wh0])-solve(R[wh0,wh0],X0p)%*%solve(crossprod(X0p,solve(Sigma[wh0,wh0],X0p)),t(solve(Sigma[wh0,wh0],X0p))))%*%X1[wh0,which(parents==1)] } } else if (perfectOut) { X<-matrix(X1[obs,which(parents==1)],length(obs),sum(parents)) for (pa in which(parents==1)) { if (max(Z[obs,pa])==1) { wh0<-obs[which(Z[obs,pa]==0)] X0p<-matrix(X0[wh0,],length(wh0),a) if (is.null(Sigma)) { X[wh0,which(which(parents==1)==pa)]<-(diag(rep(1,length(wh0)))-X0p%*%solve(crossprod(X0p),t(X0p)))%*%X1[wh0,pa] } else { X[wh0,which(which(parents==1)==pa)]<-(solve(R[wh0,wh0])-solve(R[wh0,wh0],X0p)%*%solve(crossprod(X0p,solve(Sigma[wh0,wh0],X0p)),t(solve(Sigma[wh0,wh0],X0p))))%*%X1[wh0,pa] } X[which(Z[obs,pa]==1),which(which(parents==1)==pa)]<-0 } } } else { X<-matrix(X1[obs,which(parents==1)],length(obs),sum(parents)) } if (fixedEffectIn & fixedEffectOut) { X<-cbind(X,Z[obs,union(p,which(parents==1 & apply(Z[obs,],2,max)==1))]) } else if (fixedEffectIn) { X<-cbind(X,Z[obs,p]) } else if (fixedEffectOut) { X<-cbind(X,Z[obs,which(parents==1 & apply(Z[obs,],2,max)==1)]) } X0p<-matrix(X0[obs,],length(obs),a) if (is.null(Sigma)) { X<-(diag(rep(1,length(obs)))-X0p%*%solve(crossprod(X0p),t(X0p)))%*%X } else { X<-(solve(R[obs,obs])-solve(R[obs,obs],X0p)%*%solve(crossprod(X0p,solve(Sigma[obs,obs],X0p)),t(solve(Sigma[obs,obs],X0p))))%*%X } b<-dim(X)[2] # number of betas if (b==0) { # null model H<-matrix(0,length(obs),length(obs)) } else { H<-(g/(g+1))*X%*%solve(crossprod(X),t(X)) } yhat[obs,p]<-yhat[obs,p]+exp(lpost[p,m])*H%*%y[obs,p] } parents <- nxt(parents,max.indeg) if (m==100) {cat("Estimated duration",difftime(Sys.time(),st,units="mins")*grphs/100,"minutes.\n")} if (m==1000) {cat("Estimated duration",difftime(Sys.time(),st,units="mins")*grphs/1000,"minutes.\n")} if (m==10000) {cat("Estimated duration",difftime(Sys.time(),st,units="mins")*grphs/10000,"minutes.\n")} if (m==100000) {cat("Estimated duration",difftime(Sys.time(),st,units="mins")*grphs/100000,"minutes.\n")} } cat("Actual duration",difftime(Sys.time(),st,units="mins"),"minutes.\n") # Map fitted values back onto the original scale for (p in inferParents) { if (perfectIn & max(Z[,p])==1) { obs<-which(Z[,p]==0) yhat[which(Z[,p]==1),p]<-NA } else { obs<-1:n } X0p<-matrix(X0[obs,],length(obs),a) if (is.null(Sigma)) { P0<-diag(rep(1,length(obs)))-X0p%*%solve(crossprod(X0p),t(X0p)) yhat[obs,p]<-yhat[obs,p]+P0%*%inputs$y[obs,p] } else { P0<-solve(R[obs,obs],X0p)%*%solve(crossprod(X0p,solve(Sigma[obs,obs],X0p)),t(solve(Sigma[obs,obs],X0p))) yhat[obs,p]<-R[obs,obs]%*%yhat[obs,p]+P0%*%inputs$y[obs,p] } } } else { yhat<-matrix(NA,n,P) } return(list(pep=pep,MAP=MAP,parentSets=parentSets,ll=ll,lpost=lpost,MAPprob=MAPprob,MAPmodel=MAPmodel,marginal.likelihood=marginal.likelihood,ebPriorStrength=ebPriorStrength,yhat=yhat,inputs=inputs)) } interventionalDBN/R/interventionalInferenceAdvanced.R00006660000003723012262776067017013 0ustar00interventionalInferenceAdvanced <- function(y,X0,X1,cond=NULL,inhibition=NULL,inhibitors=NULL,max.indeg,g=NULL,Sigma=NULL,inferParents=NULL,allowSelfEdges=TRUE,perfect=FALSE,fixedEffect=FALSE,mechanismChange=FALSE,priorType="uninformed",priorGraph=NULL,priorStrength=3,fittedValues=FALSE) { # n is the number of samples. # P is the number of nodes. # y is an (n x P) matrix filled with response values. # X0 is an (n x a) matrix giving the part of the design matrix that is the same for all models. For no intercept, put X0=NULL. # X1 is an (n x P) matrix filled with the predictors. # Sigma is an (n x n) symmetric positive definite matrix giving the covariance of the responses (proportional to sigma^2). # cond is an (n x 1) matrix giving the condition of each sample, filled with the numbers 1,...,#conditions. # inhibition is a (#conditions x #inhibitors) binary matrix, where entry (c,i)=1 iff inhibitor i is active in condition c. # inhibitors is a (#inhibitors x P) matrix, where (i,p)=1 iff inhibitor i acts on node p (eg it removes edges OUT from node p). # inferParents is a vector of nodes for which to infer parents. If left blank, parents are inferred for all nodes. Any values in y for nodes that are not inferred are ignored. # allowSelfEdges if FALSE then self edges are forbidden, causing the diagonal of the pep matrix to be 0. # perfect: apply perfect interventions? # fixedEffect: apply fixed effect interventions? # mechanismChange: apply mechanism change interventions? # max.indeg: maximum indegree for each node. # g: the 'g' in Zellner's g-prior, by default set to be n. # priorType: type of prior to use. "uninformed", "Hamming" or "Mukherjee" ('only penalise unexpected' prior) # priorGraph: a (P x P) matrix specifying the prior network. # priorStrength: the prior strength parameter, ignored (but don't set it to NA) if priorGraph=NULL. # fittedValues: calculate fitted values? n<-dim(y)[1] P<-dim(y)[2] cat("n =",n,", nodes =",P,"\n") if (!is.null(X0) && dim(X0)[1]!=n) {stop("X0 must have dimension (n x a).\n")} if (dim(X1)[1]!=n | dim(X1)[2]!=P) {stop("X1 must have dimension (n x P).\n")} if (!is.null(Sigma) && dim(Sigma)[1]!=n && dim(Sigma)[2]!=n) {stop("Sigma must have dimension (n x n).\n")} if (perfect | fixedEffect | mechanismChange) { if (is.null(cond)) {stop("cond must be specified if an intervention model is used.\n")} if (is.null(inhibition)) {stop("inhibition must be specified if an intervention model is used.\n")} if (is.null(inhibitors)) {stop("inhibitors must be specified if an intervention model is used.\n")} cds<-dim(inhibition)[1] # number of conditions ins<-dim(inhibition)[2] # number of inhibitors if (length(cond)!=n) {stop("cond must have length n.\n")} if (max(cond)>cds) {stop("inhibition must have dimension (conditions x inhibitors).\n")} if (dim(inhibitors)[1]!=ins | dim(inhibitors)[2]!=P) {stop("inhibitors must have dimension (inhibitors x P).\n")} } if (is.null(inferParents)) {inferParents<-1:P} if (is.null(max.indeg) || max.indeg>P) {stop("max.indeg must be less than P.\n")} if (is.null(g)) {g<-n} if (priorType!="uninformed" & priorType!="Hamming" & priorType!="Mukherjee") {stop("priorType must be 'uninformed', 'Hamming' or 'Mukherjee'.\n")} if (priorType!="uninformed" & is.null(priorGraph)) {stop("priorGraph must be specified with an informed prior.\n")} if (!is.null(priorGraph) && (dim(priorGraph)[1]!=P | dim(priorGraph)[2]!=P)) {stop("priorGraph must have dimension (P x P).\n")} if (length(priorStrength)>1) {sort(priorStrength)} inputs<-list(y=y,X0=X0,X1=X1,cond=cond,inhibition=inhibition,inhibitors=inhibitors,max.indeg=max.indeg,g=g,Sigma=Sigma,inferParents=inferParents, allowSelfEdges=allowSelfEdges,perfect=perfect,fixedEffect=fixedEffect,mechanismChange=mechanismChange, priorType=priorType,priorGraph=priorGraph,priorStrength=priorStrength,n=n,P=P) # Part zero: prepare to remove covariance structure from everything. if (!is.null(Sigma)) {R<-t(chol(Sigma))} # Part one: remove component of X0 from y. if (!is.null(X0)) { a<-dim(X0)[2] cat("a =",a,"\n") #intercepts<-matrix(NA,a,P) if (is.null(Sigma)) { IP0<-diag(rep(1,n))-X0%*%solve(crossprod(X0),t(X0)) } else { IP0<-solve(R)-solve(R,X0)%*%solve(crossprod(X0,solve(Sigma,X0)),t(solve(Sigma,X0))) } for (p in inferParents) { #intercepts[,p]<-solve(crossprod(X0),t(X0))%*%y[,p] y[,p]<-IP0%*%y[,p] } } # Part two: implement perfect interventions if (perfect) { for (j in 1:cds) { for (i in 1:ins) { if (inhibition[j,i]==1) { for (p in which(inhibitors[i,]==1)) {X1[which(cond==j),p]<-NA} } } } } # Part three: orthogonalise the predictors for (p in 1:P) { wh<-which(!is.na(X1[,p])) if (length(wh)==n) { X1[,p]<-IP0%*%X1[,p] } else { if (is.null(Sigma)) { X1[wh,p]<-(diag(rep(1,length(wh)))-X0[wh,]%*%solve(crossprod(X0[wh,]),t(X0[wh,])))%*%X1[wh,p] } else { X1[wh,p]<-(solve(R[wh,wh])-solve(R[wh,wh],X0[wh,])%*%solve(crossprod(X0[wh,],solve(Sigma[wh,wh],X0[wh,])),t(solve(Sigma[wh,wh],X0[wh,]))))%*%X1[wh,p] } X1[which(is.na(X1[,p])),p]<-0 } if (sd(X1[,p])==0) {cat("Predictor",p,"is either constant or always inhibited.\n")} } # Part four: implement fixed effect interventions if (fixedEffect) { fe<-matrix(0,n,ins) for (i in 1:ins) { for (j in 1:cds) { if (inhibition[j,i]==1) {fe[which(cond==j),i]<-1} } } # Part four (a): orthogonalise fixed effects fe<-IP0%*%fe } # Part five: the prior grphs<-countGraphs(P,max.indeg) prior<-matrix(0,P,grphs) if (!is.null(priorGraph) & priorType!="uninformed") { if (priorType=="Hamming") { cat("Calculating Hamming prior distances...\n") } else if (priorType=="Mukherjee") { cat("Calculating Mukherjee prior distances...\n") } else { stop("Prior type not supported.\n") } parents<-rep(0,P) for (i in 1:countGraphs(P,max.indeg)) { wh<-which(parents==1) for (p in 1:P) { if (priorType=="Hamming") {# SHD prior prior[p,i]<-sum(abs(parents-priorGraph[,p])) } else if (priorType=="Mukherjee") {# OPU prior (Sach prior) prior[p,i]<-length(which(priorGraph[wh,p]==0)) } } parents<-nxt(parents,max.indeg) } } # Part six: initialise ll<-matrix(NA,P,grphs) rownames(ll)<-colnames(X1) parentSets<-ll parents<-rep(0,P) st<-Sys.time() cat("Processing",grphs,"models",date(),"\n") # Part seven: The Main Loop for (m in 1:grphs) { parentSets[,m]<-parents # record parent set if (mechanismChange) { X<-matrix(NA,n,0) for (p in which(parents==1)) { unused.obs<-1:n for (i in c(which(inhibitors[,p]==1),0)) { if (i>0) { used.obs<-which(cond %in% which(inhibition[,i]==1)) } else { used.obs<-unused.obs } if (length(used.obs)>length(intersect(used.obs,unused.obs))) {cat("Error: Mechanism change interventions do not support combinations of inhibitors that target the same protein.\n")} if (length(used.obs)>0) { newcol<-rep(0,n) newcol[used.obs]<-X1[used.obs,p] if (is.null(Sigma)) { newcol[used.obs]<-(diag(rep(1,length(used.obs)))-X0[used.obs,]%*%solve(crossprod(X0[used.obs,]),t(X0[used.obs,])))%*%newcol[used.obs] } else { newcol[used.obs]<-(solve(R[used.obs,used.obs])-solve(R[used.obs,used.obs],X0[used.obs,])%*%solve(crossprod(X0[used.obs,],solve(Sigma[used.obs,used.obs],X0[used.obs,])),t(solve(Sigma[used.obs,used.obs],X0[used.obs,]))))%*%newcol[used.obs] } if (sd(newcol)>0) {X<-cbind(X,newcol)} else {cat("sd zero\n")} unused.obs<-setdiff(unused.obs,used.obs) } } } } else { X<-matrix(X1[,which(parents==1)],n,sum(parents)) # put together design matrix for this model } if (fixedEffect) { for (i in 1:ins) { if (max(parents*inhibitors[i,])==1) {X<-cbind(X,fe[,i])} } } b<-dim(X)[2] # number of betas if (b==0) { # null model H<-diag(rep(1,n)) } else { H<-diag(rep(1,n))-(g/(g+1))*X%*%solve(crossprod(X),t(X)) } if (allowSelfEdges) { for (p in inferParents) { ll[p,m]<--b/2*log(1+g)-(n-a)/2*log(crossprod(y[,p],H%*%y[,p])) } } else { for (p in setdiff(inferParents,which(parents==1))) { ll[p,m]<--b/2*log(1+g)-(n-a)/2*log(crossprod(y[,p],H%*%y[,p])) } } parents <- nxt(parents,max.indeg) if (m==100) {cat("Estimated duration",difftime(Sys.time(),st,units="mins")*grphs/100,"minutes.\n")} if (m==1000) {cat("Estimated duration",difftime(Sys.time(),st,units="mins")*grphs/1000,"minutes.\n")} if (m==10000) {cat("Estimated duration",difftime(Sys.time(),st,units="mins")*grphs/10000,"minutes.\n")} if (m==100000) {cat("Estimated duration",difftime(Sys.time(),st,units="mins")*grphs/100000,"minutes.\n")} } cat("Actual duration",difftime(Sys.time(),st,units="mins"),"minutes.\n") # Part eight: renormalisation & MAP model cat("Renormalising...\n") parentCount<-apply(parentSets,2,sum) if (length(priorStrength)>1 & max(prior)>0) { # Perform Empirical Bayes to estimate the best one. marginal.likelihood<-matrix(0,P,length(priorStrength)) ll.Inf<-ll ll.Inf[which(is.na(ll))]<--Inf st<-Sys.time() for (i in 1:length(priorStrength)) { # 'soft' multiplicity correction - prior is penalised and then renormalised normalisedPrior<-matrix(-log(1+max.indeg),P,grphs)# deal with the null model here. for (j in 1:max.indeg) { wh<-which(parentCount==j) normalisedPrior[,wh]<--log(length(wh))-log(1+max.indeg) } normalisedPrior <- normalisedPrior - priorStrength[i]*prior normalisedPrior <- normalisedPrior - log(apply(exp(normalisedPrior),1,sum)) marginal.likelihood[,i] <- apply(ll+normalisedPrior,1,min,na.rm=TRUE) marginal.likelihood[,i] <- marginal.likelihood[,i]+log(apply(exp(ll.Inf+normalisedPrior-marginal.likelihood[,i]),1,sum)) if (i==20) {cat("Estimated duration of empirical Bayes calculation",difftime(Sys.time(),st,units="mins")*length(priorStrength)/20,"minutes.\n")} } log.ml.prod <- apply(marginal.likelihood,2,sum) if (length(which(log.ml.prod==max(log.ml.prod)))>1) {cat("Multiple solutions for Emperical Bayes - taking weakest prior strength.\n")} ebPriorStrength <- priorStrength[which.max(log.ml.prod)] const1 <- marginal.likelihood[,which.max(log.ml.prod)] # Now that we have chosen the prior strength, repeat the calculations. normalisedPrior<-matrix(-log(1+max.indeg),P,grphs)# deal with the null model here. for (j in 1:max.indeg) { wh<-which(parentCount==j) normalisedPrior[,wh]<--log(length(wh))-log(1+max.indeg) } normalisedPrior <- normalisedPrior-ebPriorStrength*prior normalisedPrior <- normalisedPrior-log(apply(exp(normalisedPrior),1,sum)) lpost <- ll.Inf + normalisedPrior - const1 } else { # No empirical Bayes # 'soft' multiplicity correction - prior is penalised and then renormalised normalisedPrior<-matrix(-log(1+max.indeg),P,grphs)# deal with the null model here. for (j in 1:max.indeg) { wh<-which(parentCount==j) normalisedPrior[,wh]<--log(length(wh))-log(1+max.indeg) } normalisedPrior<-normalisedPrior-priorStrength*prior normalisedPrior<-normalisedPrior-log(apply(exp(normalisedPrior),1,sum)) const1<-apply(ll+normalisedPrior,1,min,na.rm=TRUE) ll[which(is.na(ll))]<--Inf const1<-const1+log(apply(exp(ll+normalisedPrior-const1),1,sum)) lpost<-ll+normalisedPrior-const1 marginal.likelihood <- const1 ebPriorStrength <- NULL } MAP<-matrix(NA,P,P) colnames(MAP)<-colnames(X1) rownames(MAP)<-colnames(X1) pep<-MAP MAPprob<-rep(NA,P) names(MAPprob)<-colnames(X1) MAPmodel<-MAPprob for (p in 1:P) { if (length(which.max(lpost[p,]))>0) { MAPmodel[p]<-which.max(lpost[p,]) MAP[,p]<-parentSets[,MAPmodel[p]] MAPprob[p]<-exp(lpost[p,MAPmodel[p]]) } } # Part nine: model averaging cat("Calculating posterior edge probabilities...\n") for (i in 1:P) { for (j in 1:P) { pep[i,j]<-sum(exp(lpost[j,which(parentSets[i,]==1)])) } } # Part ten: fitted values if (fittedValues) { parents<-rep(0,P) st<-Sys.time() cat("Second pass to calculate fitted values.\n") yhat<-matrix(0,n,P) cat("Processing",grphs,"models",date(),"\n") for (m in 1:grphs) { if (mechanismChange) { X<-matrix(NA,n,0) for (p in which(parents==1)) { unused.obs<-1:n for (i in c(which(inhibitors[,p]==1),0)) { if (i>0) { used.obs<-which(cond %in% which(inhibition[,i]==1)) } else { used.obs<-unused.obs } if (length(used.obs)>length(intersect(used.obs,unused.obs))) {cat("Error: Mechanism change interventions do not support combinations of inhibitors that target the same protein.\n")} if (length(used.obs)>0) { newcol<-rep(0,n) newcol[used.obs]<-X1[used.obs,p] if (is.null(Sigma)) { newcol[used.obs]<-(diag(rep(1,length(used.obs)))-X0[used.obs,]%*%solve(crossprod(X0[used.obs,]),t(X0[used.obs,])))%*%newcol[used.obs] } else { newcol[used.obs]<-(solve(R[wh,wh])-solve(R[used.obs,used.obs],X0[used.obs,])%*%solve(crossprod(X0[used.obs,],solve(Sigma[used.obs,used.obs],X0[used.obs,])),t(solve(Sigma[used.obs,used.obs],X0[used.obs,]))))%*%newcol[used.obs] } if (sd(newcol)>0) {X<-cbind(X,newcol)} else {cat("sd zero\n")} unused.obs<-setdiff(unused.obs,used.obs) } } } } else { X<-matrix(X1[,which(parents==1)],n,sum(parents)) # put together design matrix for this model } if (fixedEffect) { for (i in 1:ins) { if (max(parents*inhibitors[i,])==1) {X<-cbind(X,fe[,i])} } } b<-dim(X)[2] # number of betas if (b==0) { # null model H<-matrix(0,n,n) } else { H<-(g/(g+1))*X%*%solve(crossprod(X),t(X)) } for (p in inferParents) { yhat[,p]<-yhat[,p]+exp(lpost[p,m])*H%*%y[,p] } parents <- nxt(parents,max.indeg) if (m==100) {cat("Estimated duration",difftime(Sys.time(),st,units="mins")*grphs/100,"minutes.\n")} if (m==1000) {cat("Estimated duration",difftime(Sys.time(),st,units="mins")*grphs/1000,"minutes.\n")} if (m==10000) {cat("Estimated duration",difftime(Sys.time(),st,units="mins")*grphs/10000,"minutes.\n")} if (m==100000) {cat("Estimated duration",difftime(Sys.time(),st,units="mins")*grphs/100000,"minutes.\n")} } cat("Actual duration",difftime(Sys.time(),st,units="mins"),"minutes.\n") # Map fitted values back onto the original scale if (is.null(Sigma)) { P0<-X0%*%solve(crossprod(X0),t(X0)) for (p in inferParents) { yhat[,p]<-yhat[,p]+P0%*%inputs$y[,p] } } else { P0<-X0%*%solve(crossprod(X0,solve(Sigma,X0)),t(solve(Sigma,X0))) for (p in inferParents) { yhat[,p]<-R%*%yhat[,p]+P0%*%inputs$y[,p] } } } else { yhat<-matrix(NA,n,P) } return(list(pep=pep,MAP=MAP,parentSets=parentSets,ll=ll,lpost=lpost,MAPprob=MAPprob,MAPmodel=MAPmodel,marginal.likelihood=marginal.likelihood,ebPriorStrength=ebPriorStrength,yhat=yhat,inputs=inputs)) } interventionalDBN/R/linesROC.R00006660000000116412262776067012160 0ustar00linesROC <- function(trueMatrix,pep,col="red",lty=1,lwd=1) { n<-sum(!is.na(trueMatrix)) l<-sort(pep,decreasing=TRUE) pos<-which(trueMatrix==1) npos<-length(pos) neg<-which(trueMatrix==0) nneg<-length(neg) A<-NA if (nneg==0 || npos==0) { cat("ROC curve undefined.\n") } else { x0<-0 y0<-0 A<-0 for (i in 1:length(l)) { x1<-sum(1*(pep[neg]>=l[i]))/nneg y1<-sum(1*(pep[pos]>=l[i]))/npos if (x1>x0) {A<-A+(x1-x0)*(y0+y1)/2} segments(x0,y0,x1,y1,col=col,lty=lty) x0<-x1 y0<-y1 } } cat(col,"ROC area =",A,"\n") return(A) } interventionalDBN/R/nxt.R00006660000000045112262776067011311 0ustar00nxt <- function(g,max.indeg) { j<-1 s<-sum(g) cont<-TRUE while (cont) { if (g[j]==0 && slength(g)) {g<-rep(0,length(g));cont<-FALSE} } return(g) } interventionalDBN/R/plotMaxML.R00006660000000066212262776067012361 0ustar00plotMaxML <- function(output,xlab="Prior strength",ylab="Marginal likelihood",col.max="red",lty.max=3,lwd.max=1,...) { log.ml.prod <- apply(output$marginal.likelihood,2,sum) plot(output$inputs$priorStrength,log.ml.prod,t="l",ylim=c(min(log.ml.prod),max(log.ml.prod)),xlab=xlab,ylab=ylab,...) segments(output$ebPriorStrength,min(log.ml.prod),output$ebPriorStrength,max(log.ml.prod),col=col.max,lty=lty.max,lwd=lwd.max) } interventionalDBN/R/warshall.R00006660000000033612262776067012317 0ustar00warshall<-function(M) { d<-sqrt(length(M)) path<-M for (k in 1:d) { for (i in 1:d) { for (j in 1:d) { path[i,j]<-max(path[i,j],min(path[i,k],path[k,j])) } } } return(path) }interventionalDBN/data/00007770000000000012262776067011065 5ustar00interventionalDBN/data/interventionalData.rda00006660000000427012262776067015413 0ustar00Xkl^n(D)E4RE"Љ4u] EqFl^ )M[PE)BmU!(4Z@EQr&4/w ~^ۻ3۱;NHmfw;wwsvʊ*˯w\n{}xSʳEqC TCʺJs=/2}sZ ;RPނ9/*|DX.KtfOI*9&lUzldsN8ra qZm(;h㖒cqץm>序~,3[?Z<G}>?j^GS@M7~ooM>T+R ;_S,U'q%y|9e#m7WlOl.xqu)z~<ݓʚ-㦓pbʑͶh,G Cn= 6ݴ~ʱ7M4p8c?g6x]F&9%MvdeQFL5(לw_v|o"'~1~]^8xDܺngl邆q*ss/yrfaRBq7syčL\RwD3[!tV=:LmWQ# ~ݚ6 m[pvA5en6~oO Mȃj~byʶ.RO!Ysa6w+W>#=&.iȨdž<(n~riͳoBr _*گCY٪c MǨCvߧ7Ob@' %%~4u8t'^E))KQwHob߮mCK;BwFi:| KM ;>wk*EyʼnW숬V 'j䰢ٛr1jxX@ EgbP-yLIٟtxsEe@ȗnzE:l79g1aFqB&^>%?+~=Kol_[dyGˣK돕DY'13>OO˫Zywv _-`GVŘGL7I);"a1!ȇ$,x0֧r1C9 huyUS|^qAX"z,$ ]Ĥ }Js0*|Q碨&)Fw3_NQEW{ޥV3aGdt"]8j:l.'qpn|SWӢ{O߰Cq̷"m z>2GWyLy/R/9?A?~b?eC]G$BQǁnB4.8)V=NН13"|tA17|2+tщ= J7t\7Q{WMRȻ.1}u)9^7"9|4uO@ (xһI@[oPq #~CFLdi7o1hCNLϭ>Y=5%AWUV;1 B~3!d`K@^!|uʢ@:ÀWd1%|eiUfC`c1} %\references{} \seealso{\code{\link{interventionalInferenceAdvanced}}, \code{\link{countGraphs}}, \code{\link{interventionalData}}, \code{\link{linesROC}}, \code{\link{nxt}}, \code{\link{trueMatrix}}, \code{\link{warshall}}.} \examples{ library(interventionalDBN) data(interventionalData)# loads interventionalData. # Load your own data spreadsheet using myData<-read.csv("myDataFile.csv"). # Estimate nodes downstream of intervention. egfriEffects<-interventionEffects(interventionalData,1,"DMSO","EGFRi") aktiEffects <-interventionEffects(interventionalData,1,"DMSO","AKTi") # Format the data for network inference d<-formatData(interventionalData) # EGFRi is active in conditions 2 and 4, AKTi is active in conditions 3 and 4. # Each condition has 8 timepoints. Z<-matrix(0,32,15) Z[9:16,1]<-1 # EGFR (node 1) is inhibited in condition 2 Z[25:32,1]<-1 # EGFR (node 1) is inhibited in condition 4 Z[17:24,8]<-1 # AKT (node 8) is inhibited in condition 3 Z[25:32,8]<-1 # AKT (node 8) is inhibited in condition 4 # Perform network inference myNetwork<-interventionalInference(d$y,d$X0,d$X1,Z,max.indeg=3, perfectOut=TRUE,fixedEffectOut=TRUE) # Make ROC curve, to see how well we have done. data(trueMatrix) plot(0:1,0:1,t="l",col="grey",xlab="False positive rate",ylab="False negative rate", main="ROC curve showing network inference performance.") redArea<-linesROC(trueMatrix,myNetwork$pep) # ROC area is also sent to the console. # More realistically, the true edge matrix is unknown. # We can use descendancy to get (a much coarser) ROC, # which is based only on nodes that are downstream of the inhibitors. pap<-warshall(myNetwork$pep) effectMatrix<-matrix(NA,15,15) effectMatrix[1,]<-1*(egfriEffects$p.values<=0.1) effectMatrix[8,]<-1*( aktiEffects$p.values<=0.1) blueArea<-linesROC(effectMatrix,myNetwork$pep,col="blue") legend("bottomright",c("Edge matrix known","Descendancy ROC"),col=c("red","blue"),lty=1) } \keyword{ package }interventionalDBN/man/interventionalData.Rd00006660000000221612262776067015052 0ustar00\name{interventionalData} \alias{interventionalData} \docType{data} \title{Simulated micro-array timecourse data speadsheet.} \description{A simulated microarray timecourse dataset, generated using the perfect and fixed effect intervention models.} \usage{data(interventionalData)} \format{ A data frame with 32 observations on the following 19 variables. \describe{ \item{\code{Cell.line}}{a factor with levels representing the cell line.} \item{\code{Inhibitor}}{a factor with levels describing the inhibitors used in each sample.} \item{\code{Stimuli}}{a factor with levels describing the stimulus used in each sample.} \item{\code{Timepoint}}{a integer vector (starting from zero) representing the time index of each sample.} \item{\code{EGFR, SRC, STAT5, Mek, MAPK, p90RSK, PDK, AKT, GSK, TSC2, BAD, mTOR, p70S6K, S6, FOXO3}}{The remaining columns give the log-concentrations of each node.} } } \source{Simulated by Simon Spencer.} %\references{} \examples{ data(interventionalData) interventionalData } \seealso{\code{\link{formatData}}, \code{\link{interventionEffects}}, \code{\link{interventionalDBN-package}}.} \keyword{datasets} interventionalDBN/man/interventionalInference.Rd00006660000002050012262776067016073 0ustar00\name{interventionalInference} \alias{interventionalInference} %- Also NEED an '\alias' for EACH other topic documented here. \title{Dynamic Bayesian Network inference with interventions.} \description{This function performs exact Bayesian inference for dynamic Bayesian networks using microarray timecourse data. Several intervention models can be chosen to take into account the effect of inhibitors.} \usage{ interventionalInference(y, X0, X1, Z, max.indeg, g = NULL, Sigma = NULL, inferParents = NULL, allowSelfEdges = TRUE, perfectOut = FALSE, fixedEffectOut = FALSE, mechanismChangeOut = FALSE, perfectIn = FALSE, fixedEffectIn = FALSE, mechanismChangeIn = FALSE, priorType = "uninformed", priorGraph = NULL, priorStrength = 3, fittedValues = FALSE) } %- maybe also 'usage' for other objects documented here. \arguments{ \item{y}{an \eqn{n} by \eqn{P} matrix filled with the response values, where \eqn{n} is the number of observations and \eqn{P} is the number of nodes.} \item{X0}{an \eqn{n} by \eqn{a} matrix - the part of the design matrix that is the same for all models. \eqn{a} is the number of parameters that are in all of the modesl.} \item{X1}{an \eqn{n} by \eqn{P} matrix - the part of the design matrix to undergo model selection. \code{colnames(X1)} provides the labels for the output.} \item{Z}{an \eqn{n} by \eqn{P} binary matrix. Entry \eqn{i,j} is one if node \eqn{j} is inhibited in sample \eqn{i}.} \item{max.indeg}{The maximum permitted in-degree for each node.} \item{g}{The constant \eqn{g} in Zellner's g-prior. Defaults to \eqn{n}.} \item{Sigma}{an \eqn{n} by \eqn{n} covariance matrix of the responses, divided by \eqn{\sigma^2}. Faster if not specified, in which case the identity matrix is assumed.} \item{inferParents}{a vector of node indices specifying which nodes to infer parents for. If omitted, parents are inferred for all nodes.} \item{allowSelfEdges}{Should self-edges be allowed?} \item{perfectOut}{Apply perfect-out interventions?} \item{fixedEffectOut}{Apply fixed-effect-out interventions?} \item{mechanismChangeOut}{Apply mechanism-change-out interventions? Note: cannot be applied with perfect interventions.} \item{perfectIn}{Apply perfect-in interventions?} \item{fixedEffectIn}{Apply fixed-effect-in interventions?} \item{mechanismChangeIn}{Apply mechanism-change-in interventions? Note: cannot be applied with perfect interventions.} \item{priorType}{One of \code{"uninformed"}, \code{"Mukherjee"} and \code{"Hamming"}. In the structural Hamming distance prior, each difference from the edges in \code{priorGraph} incurs a prior penalty of \code{exp(-priorStrength)}. In the Mukherjee-Speed prior, adding edges from outside \code{priorGraph} earns the same penalty as before, but if a prior edge is omitted a penalty is no longer incurred.} \item{priorGraph}{A \eqn{P} by \eqn{P} binary matrix specifying the prior graph. If \eqn{(i,j)=1} then node \eqn{i} influences node \eqn{j}. If omitted, an uninformed prior is used.} \item{priorStrength}{The prior strength parameter. Ignored (but don't set it to NA) if \code{priorGraph} is \code{NULL}. If specified as a vector then the value from that gives the highest marginal likelihood is chosen (Empirical Bayes).} \item{fittedValues}{Perform a second pass to calculate the fitted values?} } \value{ \item{pep}{A \eqn{P} by \eqn{P} matrix of posterior probabilities, where element \eqn{(i,j)} gives the posterior probability that node \eqn{i} influences node \eqn{j}.} \item{MAP}{A \eqn{P} by \eqn{P} binary matrix giving the maximum a posteriori network.} \item{parentSets}{A \code{countGraphs(P,max.indeg)} by \eqn{P} binary matrix, where element \eqn{(m,p}=1) iff node \eqn{i} is a parent in model \eqn{m}.} \item{ll}{A \code{countGraphs(P,max.indeg)} by \eqn{P} matrix, where element \eqn{(m,p)} gives the log-likelihood for model \eqn{m} for node \eqn{p}.} \item{lpost}{A \code{countGraphs(P,max.indeg)} by \eqn{P} matrix, where element \eqn{(m,p)} gives the log-posterior probability for model \eqn{m} for node \eqn{p}.} \item{MAPprob}{A \eqn{P} vector where element \eqn{p} gives the posterior probability of the maximum a posteriori model for node \eqn{p}.} \item{MAPmodel}{A \eqn{P} vector where element \eqn{p} gives the index of the maximum a posterior model for node \eqn{p} (between 1 and \code{countGraphs(P,max.indeg)}.} \item{marginal.likelihood}{A \eqn{P} by \code{length(priorStrength)} matrix that gives the marginal likelihood for each node.} \item{ebPriorStrength}{Value of \code{priorStrength} with the largest marginal likelihood, if \code{priorStrength} is a vector; \code{NULL} otherwise.} \item{yhat}{The posterior expected fitted values, if \code{fittedValues} is TRUE.} \item{inputs}{A list containing the inputs to \code{interventionalInference}} } \details{ This function performs interventional inference with both -in and -out forms of the interventions. The targets of the interventions are specified in the matrix \code{Z}. This assumes that each node is the target of only one intervention - if this is not the case, you must use the \code{\link{interventionalInferenceAdvanced}} function. Certain combinations of interventions do not work together, in particular mixtures of perfect and mechanism change interventions. Perfect-in and perfect-out can be used together. Mechanism-change-in and mechanism-change-out could potentially be used together, but are not currently implemented.} \references{ Spencer, S.E.F, Hill, S.M. and Mukherjee, S. (2012) Dynamic Bayesian networks for interventional data. CRiSM pre-print 12-24.\cr Mukherjee, S. and Speed, T.P. Network inference using informative priors. Proc. Nat. Acad. Sci. USA, 105, 14313-14318. } \author{Simon Spencer} \seealso{\code{\link{interventionalDBN-package}}, \code{\link{formatData}}} \examples{ library(interventionalDBN) data(interventionalData)# loads interventionalData. # Load your own data spreadsheet using myData<-read.csv("myDataFile.csv"). # Format the data for network inference d<-formatData(interventionalData) # Perform network inference without modelling interventions. myNetwork0<-interventionalInference(d$y,d$X0,d$X1,max.indeg=3,fittedValues=TRUE) # EGFRi is active in conditions 2 and 4, AKTi is active in conditions 3 and 4. # Each condition has 8 timepoints. Z<-matrix(0,32,15) Z[9:16,1]<-1 # EGFR (node 1) is inhibited in condition 2 Z[25:32,1]<-1 # EGFR (node 1) is inhibited in condition 4 Z[17:24,8]<-1 # AKT (node 8) is inhibited in condition 3 Z[25:32,8]<-1 # AKT (node 8) is inhibited in condition 4 # Perform network inference with perfect-out and fixed-effect-out interventions. myNetwork1<-interventionalInference(d$y,d$X0,d$X1,Z,max.indeg=3, perfectOut=TRUE,fixedEffectOut=TRUE) # Perform network inference on with mechanism-change-out interventions. myNetwork2<-interventionalInference(d$y,d$X0,d$X1,Z,max.indeg=3, mechanismChangeOut=TRUE) # Perform network inference with Mukherjee Prior that prefers to omit self-edges. myNetwork3<-interventionalInference(d$y,d$X0,d$X1,Z,max.indeg=3, perfectOut=TRUE,fixedEffectOut=TRUE, priorType="Mukherjee",priorGraph=matrix(1,15,15)-diag(rep(1,15)),priorStrength=2) # Compare with self-edge peps with myNetwork1 diag(myNetwork1$pep)-diag(myNetwork3$pep) # Perform network inference with Hamming Prior that prefers self-edges, # and use Empirical Bayes to choose the priorStrength. myNetwork4<-interventionalInference(d$y,d$X0,d$X1,Z,max.indeg=3, perfectOut=TRUE,fixedEffectOut=TRUE, priorType="Hamming",priorGraph=diag(rep(1,15)),priorStrength=0:10/2) # You should always check to see if the Empirical Bayes appears to be working. plotMaxML(myNetwork4) # Now let's try using using the gradients as the response. # Note that we have to tranfser Sigma this time, as it is no longer the identity. d<-formatData(interventionalData,gradients=TRUE,initialIntercept=FALSE) # There are now only 28 observations Z<-Z[c(2:8,10:16,18:24,26:32),] # Perform network inference on gradients with perfect-in interventions. myNetwork5<-interventionalInference(d$y,d$X0,d$X1,Z,max.indeg=3, Sigma=d$Sigma,perfectIn=TRUE,fittedValues=TRUE) # Perform network inference on gradients with perfect-in and -out plus fixed-effect out. myNetwork6<-interventionalInference(d$y,d$X0,d$X1,Z,max.indeg=3, Sigma=d$Sigma,perfectIn=TRUE,perfectOut=TRUE) } interventionalDBN/man/interventionalInferenceAdvanced.Rd00006660000002165312262776067017533 0ustar00\name{interventionalInferenceAdvanced} \alias{interventionalInferenceAdvanced} %- Also NEED an '\alias' for EACH other topic documented here. \title{Dynamic Bayesian Network inference with interventions.} \description{This function performs exact Bayesian inference for dynamic Bayesian networks using microarray timecourse data. Several intervention models can be chosen to take into account the effect of inhibitors.} \usage{ interventionalInferenceAdvanced(y, X0, X1, cond, inhibition, inhibitors, max.indeg, g = NULL, Sigma = NULL, inferParents = NULL, allowSelfEdges = TRUE, perfect = FALSE, fixedEffect = FALSE, mechanismChange = FALSE, priorType = "uninformed", priorGraph = NULL, priorStrength = 3, fittedValues = FALSE) } %- maybe also 'usage' for other objects documented here. \arguments{ \item{y}{an \eqn{n} by \eqn{P} matrix filled with the response values, where \eqn{n} is the number of observations and \eqn{P} is the number of nodes.} \item{X0}{an \eqn{n} by \eqn{a} matrix - the part of the design matrix that is the same for all models. \eqn{a} is the number of parameters that are in all of the modesl.} \item{X1}{an \eqn{n} by \eqn{P} matrix - the part of the design matrix to undergo model selection. \code{colnames(X1)} provides the labels for the output.} \item{cond}{an \eqn{n} by \eqn{1} matrix giving the experimental condition number of each sample. Filled with integers from 1 to the number of different conditions.} \item{inhibition}{a \eqn{conditions} by \eqn{inhibitors} binary matrix, where element \eqn{(c,i)} is one iff inhibitor \eqn{i} is active in condition \eqn{c}.} \item{inhibitors}{an \eqn{inhibitors} by \eqn{P} binary matrix, where element \eqn{(i,p)} is one iff inhibitor \eqn{i} affects node \eqn{p}.} \item{max.indeg}{The maximum permitted in-degree for each node.} \item{g}{The constant \eqn{g} in Zellner's g-prior. Defaults to \eqn{n}.} \item{Sigma}{an \eqn{n} by \eqn{n} covariance matrix of the responses, divided by \eqn{\sigma^2}. Faster if not specified, in which case the identity matrix is assumed.} \item{inferParents}{a vector of node indices specifying which nodes to infer parents for. If omitted, parents are inferred for all nodes.} \item{allowSelfEdges}{Should self-edges be allowed?} \item{perfect}{Apply perfect-out interventions?} \item{fixedEffect}{Apply fixed-effect-out interventions?} \item{mechanismChange}{Apply mechanism-change-out interventions? Note: cannot be applied with perfect interventions.} \item{priorType}{One of \code{"uninformed"}, \code{"Mukherjee"} and \code{"Hamming"}. In the structural Hamming distance prior, each difference from the edges in \code{priorGraph} incurs a prior penalty of \code{exp(-priorStrength)}. In the Mukherjee-Speed prior, adding edges from outside \code{priorGraph} earns the same penalty as before, but if a prior edge is omitted a penalty is no longer incurred.} \item{priorGraph}{A \eqn{P} by \eqn{P} binary matrix specifying the prior graph. If \eqn{(i,j)=1} then node \eqn{i} influences node \eqn{j}. If omitted, an uninformed prior is used.} \item{priorStrength}{The prior strength parameter. Ignored (but don't set it to NA) if \code{priorGraph} is \code{NULL}. If specified as a vector then the value from that gives the highest marginal likelihood is chosen (Empirical Bayes).} \item{fittedValues}{Perform a second pass to calculate the fitted values?} } \value{ \item{pep}{A \eqn{P} by \eqn{P} matrix of posterior probabilities, where element \eqn{(i,j)} gives the posterior probability that node \eqn{i} influences node \eqn{j}.} \item{MAP}{A \eqn{P} by \eqn{P} binary matrix giving the maximum a posteriori network.} \item{parentSets}{A \code{countGraphs(P,max.indeg)} by \eqn{P} binary matrix, where element \eqn{(m,p}=1) iff node \eqn{i} is a parent in model \eqn{m}.} \item{ll}{A \code{countGraphs(P,max.indeg)} by \eqn{P} matrix, where element \eqn{(m,p)} gives the log-likelihood for model \eqn{m} for node \eqn{p}.} \item{lpost}{A \code{countGraphs(P,max.indeg)} by \eqn{P} matrix, where element \eqn{(m,p)} gives the log-posterior probability for model \eqn{m} for node \eqn{p}.} \item{MAPprob}{A \eqn{P} vector where element \eqn{p} gives the posterior probability of the maximum a posteriori model for node \eqn{p}.} \item{MAPmodel}{A \eqn{P} vector where element \eqn{p} gives the index of the maximum a posterior model for node \eqn{p} (between 1 and \code{countGraphs(P,max.indeg)}.} \item{marginal.likelihood}{A \eqn{P} by \code{length(priorStrength)} matrix that gives the marginal likelihood for each node.} \item{ebPriorStrength}{Value of \code{priorStrength} with the largest marginal likelihood, if \code{priorStrength} is a vector; \code{NULL} otherwise.} \item{yhat}{The posterior expected fitted values, if \code{fittedValues} is TRUE.} \item{inputs}{A list containing the inputs to \code{interventionalInferenceAdvanced}} } \details{ The function \code{\link{interventionalInference}} provides a simpler, but less general way of coding which inhibitors are active in each condition. Currently this advanced version only supports -out forms of the interventions. By default the fixed effects in the fixedEffect intervention are assumed to be additive in samples with multiple inhibitors. However if you do not wish for this to be the case, then you can simply define a dummy inhibitor for each combination of inhibitors and a new fixed effect parameter will be estimated. See example 7 below.} \references{ Spencer, S.E.F, Hill, S.M. and Mukherjee, S. (2012) Dynamic Bayesian networks for interventional data. CRiSM pre-print 12-24.\cr Mukherjee, S. and Speed, T.P. Network inference using informative priors. Proc. Nat. Acad. Sci. USA, 105, 14313-14318. } \author{Simon Spencer} \seealso{\code{\link{interventionalDBN-package}}, \code{\link{interventionalInference}}, \code{\link{formatData}}} \examples{ library(interventionalDBN) data(interventionalData)# loads interventionalData. # Load your own data spreadsheet using myData<-read.csv("myDataFile.csv"). # Format the data for network inference d<-formatData(interventionalData) # Perform network inference without modelling interventions. myNetwork0<-interventionalInferenceAdvanced(d$y,d$X0,d$X1,max.indeg=3,fittedValues=TRUE) # EGFRi is active in conditions 2 and 4, AKTi is active in conditions 3 and 4. myInhibition<-cbind(c(0,1,0,1),c(0,0,1,1)) myInhibitors<-matrix(0,2,15) myInhibitors[1,1]<-1 # EGFRi targets EGFR (node 1). myInhibitors[2,8]<-1 # AKTi targets AKT (node 8). # Perform network inference with perfect and fixed effect interventions. myNetwork1<-interventionalInferenceAdvanced(d$y,d$X0,d$X1,d$cond,max.indeg=3, inhibition=myInhibition,inhibitors=myInhibitors,perfect=TRUE,fixedEffect=TRUE) # Perform network inference on with mechanism change interventions. myNetwork2<-interventionalInferenceAdvanced(d$y,d$X0,d$X1,d$cond,max.indeg=3, inhibition=myInhibition,inhibitors=myInhibitors,mechanismChange=TRUE) # Perform network inference with Mukherjee Prior that prefers to omit self-edges. myNetwork3<-interventionalInferenceAdvanced(d$y,d$X0,d$X1,d$cond,max.indeg=3, inhibition=myInhibition,inhibitors=myInhibitors,perfect=TRUE,fixedEffect=TRUE, priorType="Mukherjee",priorGraph=matrix(1,15,15)-diag(rep(1,15)),priorStrength=2) # Compare with self-edge peps with myNetwork1 diag(myNetwork1$pep)-diag(myNetwork3$pep) # Perform network inference with Hamming Prior that prefers self-edges, # and use Empirical Bayes to choose the priorStrength. myNetwork4<-interventionalInferenceAdvanced(d$y,d$X0,d$X1,d$cond,max.indeg=3, inhibition=myInhibition,inhibitors=myInhibitors,perfect=TRUE,fixedEffect=TRUE, priorType="Hamming",priorGraph=diag(rep(1,15)),priorStrength=0:10/2) # You should always check to see if the Empirical Bayes appears to be working. plotMaxML(myNetwork4) # Now let's try using using the gradients as the response. # Note that we have to tranfser Sigma this time, as it is no longer the identity. d<-formatData(interventionalData,gradients=TRUE,initialIntercept=FALSE) # Perform network inference on gradients with perfect-out interventions. myNetwork5<-interventionalInferenceAdvanced(d$y,d$X0,d$X1,d$cond,max.indeg=3, Sigma=d$Sigma,inhibition=myInhibition,inhibitors=myInhibitors,perfect=TRUE) # So far we have assumed that the fixed effects are additive in EGFRi+AKTi. # Now let's change this, by coding EGFRi+AKTi as a separate inhibitor. d<-formatData(interventionalData) # EGFRi+AKTi is active in condition 4. myInhibition<-cbind(c(0,1,0,0),c(0,0,1,0),c(0,0,0,1)) myInhibitors<-matrix(0,3,15) myInhibitors[1,1]<-1 # EGFRi targets EGFR (node 1). myInhibitors[2,8]<-1 # AKTi targets AKT (node 8). myInhibitors[3,c(1,8)]<-1 # EGFRi+AKTi targets both. # Perform network inference on gradients with fixed effect interventions. myNetwork7<-interventionalInferenceAdvanced(d$y,d$X0,d$X1,d$cond,max.indeg=3, inhibition=myInhibition,inhibitors=myInhibitors,fixedEffect=TRUE) } interventionalDBN/man/linesROC.Rd00006660000000205712262776067012700 0ustar00\name{linesROC} \alias{linesROC} \title{Add an ROC curve to an existing plot.} \description{A simple function to produce an ROC curve from a known edge matrix and a posterior edge probability matrix.} \usage{linesROC(trueMatrix, pep, col = "red", lty = 1, lwd = 1)} %- maybe also 'usage' for other objects documented here. \arguments{ \item{trueMatrix}{The 'true' edge matrix.} \item{pep}{A matrix of posterior edge probabilities.} \item{col}{A colour (passed to segments).} \item{lty}{A line type (passed to segments).} \item{lwd}{A line width (passed to segments).} } \value{The area of the ROC curve.} \details{The area of the ROC curve is also sent to the console.} \author{Simon Spencer} \seealso{\code{\link{interventionalDBN-package}}} \examples{ trueMatrix<-matrix(rbinom(225,1,0.5),15,15) pep<-matrix(runif(225,0.2,1)*trueMatrix+runif(225,0,0.5)*(1-trueMatrix),15,15) plot(0:1, 0:1, t="l", col="grey", xlab="False positive rate", ylab="False negative rate", main="An ROC curve.") linesROC(trueMatrix,pep) } \keyword{ aplot } interventionalDBN/man/nxt.Rd00006660000000111412262776067012024 0ustar00\name{nxt} \alias{nxt} \title{Produces the next set of parents from an existing set of parents (internal).} \description{A function to find the next parent set in the sequence.} \usage{nxt(g, max.indeg)} \arguments{ \item{g}{A binary vector of length \code{nodes}} \item{max.indeg}{The maximum in-degree of the network} } \value{A different binary vector of length \code{nodes}} \author{Simon Spencer} \seealso{\code{\link{countGraphs}},\code{\link{interventionalDBN-package}}} \examples{ g<-rep(0,7) for (i in 1:countGraphs(7,3)) { cat(g,"\n") g<-nxt(g,3) } } interventionalDBN/man/plotMaxML.Rd00006660000000412712262776067013077 0ustar00\name{plotMaxML} \alias{plotMaxML} \title{Plot the performance of maximum marginal likelihood (Empirical Bayes).} \description{Make a plot of the marginal likelihood against the prior strength parameter, highlighting the value used to produce the network.} \usage{plotMaxML(output,xlab="Prior strength",ylab="Marginal likelihood",col.max="red",lty.max=3,lwd.max=1,...)} %- maybe also 'usage' for other objects documented here. \arguments{ \item{output}{The object returned from the interventionalInference function.} \item{xlab}{A label for the prior strength axis.} \item{ylab}{A label for the marginal likelihood axis.} \item{col.max}{The colour of the line highlighting the maximum.} \item{lty.max}{The line type of the highlight.} \item{lwd.max}{The line width of the highlight.} \item{...}{Other arguments, such as \code{main}, which are passed to \code{plot}.} } \details{It is important to check that the Empirical Bayes calculation is doing something sensible.} \author{Simon Spencer} \seealso{\code{\link{interventionalDBN-package}},\code{\link{interventionalInference}}} \examples{ library(interventionalDBN) data(interventionalData)# loads interventionalData. # Load your own data spreadsheet using myData<-read.csv("myDataFile.csv"). # Format the data for network inference d<-formatData(interventionalData) # EGFRi is active in conditions 2 and 4, AKTi is active in conditions 3 and 4. # Each condition has 8 timepoints. Z<-matrix(0,32,15) Z[9:16,1]<-1 # EGFR (node 1) inhibited in condition 2 Z[25:32,1]<-1 # EGFR inhibited in condition 4 Z[17:24,8]<-1 # AKT (node 8) inhibited in condition 3 Z[25:32,8]<-1 # AKT inhibited in condition 4 # Perform network inference with Hamming Prior that prefers self-edges, # and use Empirical Bayes to choose the priorStrength. myNetwork4<-interventionalInference(d$y,d$X0,d$X1,Z,max.indeg=3, perfectOut=TRUE,fixedEffectOut=TRUE, priorType="Hamming",priorGraph=diag(rep(1,15)),priorStrength=0:10/2) # You should always check to see if the Empirical Bayes appears to be working. plotMaxML(myNetwork4) } \keyword{ aplot } interventionalDBN/man/trueMatrix.Rd00006660000000124012262776067013357 0ustar00\name{trueMatrix} \alias{trueMatrix} \docType{data} \title{The true edge matrix used to generate \code{interventionalData}.} \description{The 15 by 15 binary edge matrix that was used to generate the dataset \code{\link{interventionalData}}.} \usage{data(trueMatrix)} \source{Simon Spencer} \seealso{\code{\link{interventionalData}},\code{\link{interventionalDBN-package}}} %\references{} \examples{ data(trueMatrix) pep<-matrix(runif(225,0.2,1)*trueMatrix+runif(225,0,0.5)*(1-trueMatrix),15,15) plot(0:1, 0:1, t="l", col="grey", xlab="False positive rate", ylab="False negative rate",main="An ROC curve.") linesROC(trueMatrix,pep) } \keyword{datasets} interventionalDBN/man/warshall.Rd00006660000000252612262776067013040 0ustar00\name{warshall} \alias{warshall} %- Also NEED an '\alias' for EACH other topic documented here. \title{Find the largest edge probability threshold that connects a pair of nodes.} \description{This function runs a slight variation on the Warshall algorithm to find the largest posterior edge probability threshold that allows each pair of nodes to remain connected. It is useful for calculating ROC curves based on descendancy information.} \usage{warshall(M)} \arguments{ \item{M}{A square matrix of probabilities.} } \details{The Warshall algorithm is \eqn{O(P)^3}, where \eqn{P} is the number of nodes.} \value{A square matrix, where element \eqn{(i,j)} is the largest edge probability threshold that allows \eqn{i} to remain connected to \eqn{j}.} %%\references{} \author{Simon Spencer} %%\note{} %% ~Make other sections like Warning with \section{Warning }{....} ~ \seealso{\code{\link{interventionEffects}}, \code{\link{interventionalDBN-package}}} \examples{ M1<-rbind(c(0.5,1,0),c(0,0,1),c(0,0,0))# A->B->C warshall(M1)# A is upstream of B and C, B is upstream of C. # Note that A is upstream of itself iff there is a cycle. M2<-matrix(runif(25),5,5) warshall(M2) } % Add one or more standard keywords, see file 'KEYWORDS' in the % R documentation directory. %\keyword{ ~kwd1 } %\keyword{ ~kwd2 }% __ONLY ONE__ keyword per line