dreams<-structure(c(7, 10, 23, 28, 32, 4, 15, 9, 9, 5, 3, 11, 11, 12, 4, 7, 13, 7, 10, 3), .Dim = c(5, 4), .Dimnames = list(c("5-7", "8-9", "10-11", "12-13", "14-15"), c("low", "below avg", "above avg", "high"))) # FUNCTION FOR BAYESIAN MODEL FOR THE RC MODEL IN TWO WAY CONTINGENCY TABLES # --------------------------------------- # FIRST DRAFT SEPTEMBER 2005 # FINAL VERSION: JANUARY 2006 # --------------------------------------- # CODE BY IOANNIS NTZOUFRAS, # DEP. OF STATISTICS, ATHENS UNIVERSITY OF ECONOMICS AND BUSINESS # ON A JOINT WORK WITH G.ILIOPOULOS AND M.KATERI FROM UNIV. OF PIRAEUS # ------------------------------------------------------------------------------ # # NORMAL PRIORS FOR ALL PARAMETERS # # THREE CONSTRAINTS ON M & V (PHI=-/+ 1) # v2=1 # m1 = -SUM(mi; i=2...I) # v1 = -SUM(vj; j=2...J) # # # mcmc.rc<-function( y, totiter=2000, ROW=TRUE, COLUMN=TRUE ){ # # initial parameters regarding data I<-dim(y)[1] # number of rows J<-dim(y)[2] # number of cols N<-sum(y) # total number of observations row.marg<-apply(y,1,sum) # row marginal distribution col.marg<-apply(y,2,sum) # col marginal distribution # ------------------------- # CHECKING TABLE DIMENSIONS # ------------------------- if (I<2) stop('TABLE DIMENSION PROBLEM: ASSOCIATION MODEL CANNOT BE FITTED IF THE NUMBER OF ROWS ARE LESS THAN 2') else if (I==2) { print('NUMBER OF ROWS = 2: ROW SCORES WILL BE FIXED TO -0.7, 0.7') ROW<-FALSE } if (J<2) stop('TABLE DIMENSION PROBLEM: ASSOCIATION MODEL CANNOT BE FITTED IF THE NUMBER OF COLUMNS ARE LESS THAN 2') else if (J==2) { print('NUMBER OF COLUMNS = 2: COLUMN SCORES WILL BE FIXED TO -0.7, 0.7') COLUMN<-FALSE } # # priors prior.mean<-list( lx=c( rep(0,I) ) , ly=c( rep(0,J) ) , phi=0, m=c( rep(0,I) ) , v=c( rep(0,J) ) ) prior.s <-list( lx=c( rep(100,I) ), ly=c( rep(100,J) ), phi=100, m=c( rep(100,I) ), v=c( rep(100,J) ) ) # # proposal parameters if (!ROW & !COLUMN) { prop.s<-list( lx=c( rep(0.35,I) ), ly=c( rep(0.25,J) ), phi=0.35, m=c( rep(0.35,I) ), v=c( rep(0.4,J) )) } else{ prop.s<-list( lx=c( rep(0.35,I) ), ly=c( rep(0.25,J) ), phi=0.65, m=c( rep(0.15,I) ), v=c( rep(0.75,J) )) } if ( ROW & COLUMN ) { startv<- 3 } else startv<-2 # # variables definition # # for total sample output<-as.data.frame( matrix(nrow=totiter, ncol=4+2*I+2*J)) names(output)<-c( 'l', paste('lx',1:I,sep='') , paste('ly',1:J,sep='') , 'phi', paste('m',1:I,sep='') , paste('v',1:J,sep=''), 'loglike', 'logpost' ) # initial current values cur<-list( l=0, lx=log(row.marg)- mean(log(row.marg)) ) cur$ly<-log(col.marg)- mean(log(col.marg)) cur$phi<- 1.0 # initial values for phi cur$m<-1:I*0 test<-1:I test<-test-mean(test) test<-test/sqrt( sum(test^2) ) cur$m<-test if (ROW) { cur$m[2]<- 1 cur$m[1]<- -sum(cur$m[2:I]) } cur$v<-1:J*0 test<-1:J test<-test-mean(test) test<-test/sqrt( sum(test^2) ) cur$v<-test if (COLUMN) { cur$v[2]<-1 cur$v[1]<- -sum(cur$v[2:J]) } cur$l <- lambda.rc( I,J, cur ) # initial value for constant lambda # prop<-cur # initial values for proposed values # # define probabilities of acceptance acc.prob<-list( lx=rep(0,I) ) acc.prob$ly<-rep(0,J) acc.prob$m<-rep(0,I) acc.prob$v<-rep(0,J) acc.prob$phi<-0 # for (iter in 1:totiter) { # -------------------------------- # STEP1: sampling of lx # -------------------------------- for (i in 2:I ){ prop<-cur # set proposed values to current prop$lx[i]<-rnorm(1, cur$lx[i], prop.s$lx[i] ) # propose new lx[i] prop$lx[1]<- -sum(prop$lx[2:I]) # calculate new lx[1] prop$l<-lambda.rc( I,J, prop ) # calculate new lambda # # calculate mhratio # *** likelihood difference mhratio<- N*(prop$l-cur$l)+(row.marg[i]-row.marg[1])*(prop$lx[i]-cur$lx[i]) # *** prior difference mhratio<-mhratio - 0.5*( (prop$lx[i]-prior.mean$lx[i])/prior.s$lx[i] )^2 mhratio<-mhratio + 0.5*( (cur$lx[i] -prior.mean$lx[i])/prior.s$lx[i] )^2 # if ( log(runif(1)) <= mhratio ){ cur<-prop acc.prob$lx[i]<-acc.prob$lx[i]+1 } } # -------------------------------- # STEP2: sampling of ly # -------------------------------- for (j in 2:J ){ prop<-cur # set proposed values to current prop$ly[j]<-rnorm(1, cur$ly[j], prop.s$ly[j] ) # propose new ly[j] prop$ly[1]<- -sum(prop$ly[2:J]) # calculate new ly[1] prop$l<-lambda.rc( I,J, prop ) # calculate new lambda # # calculate mhratio # *** likelihood difference mhratio<- N*(prop$l-cur$l)+(col.marg[j]-col.marg[1])*(prop$ly[j]-cur$ly[j]) # prior difference mhratio<-mhratio + 0.5*( (cur$ly[j] -prior.mean$ly[j])/prior.s$ly[j] )^2 mhratio<-mhratio - 0.5*( (prop$ly[j]-prior.mean$ly[j])/prior.s$ly[j] )^2 # if ( log(runif(1)) <= mhratio ){ cur<-prop acc.prob$ly[j]<-acc.prob$ly[j]+1 } } # -------------------------------- # STEP3: sampling of phi # -------------------------------- yesphi<-FALSE if ( (!ROW)&(!COLUMN) ) yesphi<-TRUE if(yesphi){ prop<-cur # set proposed values to current prop$phi<-rnorm(1, cur$phi, prop.s$phi ) # propose new ly[j] prop$l<-lambda.rc( I,J, prop ) # calculate new lambda # # calculate mhratio # *** likelihood difference # calculate tables with m's and v's tabm<-matrix( cur$m, nrow=I, ncol=J ) tabv<-matrix( cur$v, nrow=I, ncol=J, byrow=TRUE ) TABSUM<- sum(y*tabm*tabv) # calculate sum y[i,j]*m[i]*v[j] mhratio<- N*(prop$l-cur$l)+TABSUM*(prop$phi-cur$phi) # prior difference mhratio<-mhratio + 0.5*( (cur$phi -prior.mean$phi)/prior.s$phi )^2 mhratio<-mhratio - 0.5*( (prop$phi-prior.mean$phi)/prior.s$phi )^2 # if ( log(runif(1)) <= mhratio ){ cur<-prop acc.prob$phi<-acc.prob$phi+1 } } else cur$phi<-1 # ----------------------------------- # STEP4: sampling of m (row scores) # ----------------------------------- # # sampling of m if (ROW) { for (i in 2:I ){ prop<-cur # set proposed values to current prop$m[i]<-rnorm(1, cur$m[i], prop.s$m[i] ) # propose new m[i] prop$m[1]<- -sum( prop$m[2:I] ) # STZ constraint # prop$m[I]<- 1 # constraint m2=1 prop$l<-lambda.rc( I,J, prop ) # calculate new lambda # # calculate mhratio # *** likelihood difference mhratio<- N*(prop$l-cur$l) + cur$phi * (prop$m[i]-cur$m[i]) * sum( cur$v * (y[i,]-y[1,]) ) # *** prior difference mhratio<-mhratio + 0.5*( (cur$m[i] -prior.mean$m[i])/prior.s$m[i] )^2 mhratio<-mhratio - 0.5*( (prop$m[i]-prior.mean$m[i])/prior.s$m[i] )^2 # if ( log(runif(1)) <= mhratio ){ cur<-prop acc.prob$m[i]<-acc.prob$m[i]+1 }# end of MHUPDATE IF }# END OF m loop }# end of ROW if # ----------------------------------- # STEP5: sampling of v (column scores) # ----------------------------------- # if (COLUMN){ for (j in startv:J ){ prop<-cur # set proposed values to current prop$v[j]<-rnorm(1, cur$v[j], prop.s$v[j] ) # propose new v[j] if (startv==3) prop$v[2]<- 1 # constraint v2=1 prop$v[1]<- -sum( prop$v[2:J] ) # STZ constraint # # # prop$v<-prop$v/sqrt(sum(prop$v^2)) prop$l<-lambda.rc( I,J, prop ) # calculate new lambda # # calculate mhratio # *** likelihood difference mhratio<- N*(prop$l-cur$l) + cur$phi * (prop$v[j]-cur$v[j]) * sum( cur$m * (y[,j]-y[,1]) ) # sum(y* matrix( prop$m, nrow=I, ncol=J)* matrix( prop$v-cur$v, nrow=I, ncol=J, byrow=TRUE )) # *** prior difference mhratio<-mhratio + 0.5*( (cur$v[j] -prior.mean$v[j])/prior.s$v[j] )^2 mhratio<-mhratio - 0.5*( (prop$v[j]-prior.mean$v[j])/prior.s$v[j] )^2 # if ( log(runif(1)) <= mhratio ){ cur<-prop acc.prob$v[j]<-acc.prob$v[j]+1 } # end of MHUPDATE IF } # END OF v loop } # end of COLUMN if # # # calculation of the loglikelihood for each iteration # calculate tables with m's and v's tabm<-matrix( cur$m, nrow=I, ncol=J ) tabv<-matrix( cur$v, nrow=I, ncol=J, byrow=TRUE ) TABSUM<- sum(y*tabm*tabv) # calculate sum y[i,j]*m[i]*v[j] loglike <- lgamma( N+1 ) - sum( lgamma(y+1) ) + N * cur$l + sum( row.marg*cur$lx )+ sum( col.marg*cur$ly )+ cur$phi*TABSUM # # # calculation of logposterior logpost <- loglike + sum(dnorm( cur$lx[2:I] , prior.mean$lx[2:I] , prior.s$lx[2:I] , log=TRUE )) logpost <- logpost + sum(dnorm( cur$ly[2:J] , prior.mean$ly[2:J] , prior.s$ly[2:J] , log=TRUE )) logpost <- logpost + dnorm( cur$phi, prior.mean$phi, prior.s$phi, log=TRUE ) logpost <- logpost + sum(dnorm( cur$m[2:I] , prior.mean$m[2:I] , prior.s$m[2:I] , log=TRUE )) logpost <- logpost + sum(dnorm( cur$v[startv:J] , prior.mean$v[startv:J] , prior.s$v[startv:J] , log=TRUE )) # output[iter,]<- c( cur$l, cur$lx, cur$ly, cur$phi,cur$m, cur$v, loglike, logpost ) # print (c( cur$l, cur$lx, cur$ly, cur$phi, cur$m, cur$v )) # print ( iter) print ( output[iter,]) } for (k in 1:length(acc.prob) ){ acc.prob[[k]]<-acc.prob[[k]]/totiter } print ( acc.prob ) output } lambda.rc<-function( IMAX, JMAX, parameters ){ tab<-matrix( nrow=IMAX, ncol=JMAX ) # tabx<-matrix( parameters$lx, nrow=IMAX, ncol=JMAX ) taby<-matrix( parameters$ly, nrow=IMAX, ncol=JMAX, byrow=TRUE ) tabm<-matrix( parameters$m, nrow=IMAX, ncol=JMAX ) tabv<-matrix( parameters$v, nrow=IMAX, ncol=JMAX, byrow=TRUE ) # tab<-tabx+taby+parameters$phi*tabm*tabv # tab2<-exp(tab-max(tab)) # l<- -log( sum(tab2) )-max(tab) l } # ------------------------------------------------------------------------------------------------------------------------------------------------------- # ------------------------------------------------------------------------------------------------------------------------------------------------------- # ------------------------------------------------------------------------------------------------------------------------------------------------------- # ------------------------------------------------------------------------------------------------------------------------------------------------------- # ------------------------------------------------------------------------------------------------------------------------------------------------------- # ------------------------------------------------------------------------------------------------------------------------------------------------------- # ------------------------------------------------------------------------------------------------------------------------------------------------------- # FUNCTION FOR BAYESIAN MODEL FOR THE ORDINAL RC MODEL IN TWO WAY CONTINGENCY TABLES # --------------------------------------- # FIRST DRAFT SEPTEMBER 2005 # FINAL VERSION: MARCH 2006 # --------------------------------------- # CODE BY IOANNIS NTZOUFRAS, # DEP. OF STATISTICS, ATHENS UNIVERSITY OF ECONOMICS AND BUSINESS # ON A JOINT WORK WITH G.ILIOPOULOS AND M.KATERI FROM UNIV. OF PIRAEUS # ------------------------------------------------------------------------------ # # NORMAL PRIORS FOR ALL LAMBDA's # # THREE CONSTRAINTS ON M & V (PHI=-/+ 1) # v2=1 # m1 = -SUM(mi; i=2...I) # v1 = -SUM(vj; j=2...J) # # GAMMA PRIORS ON BETAs (SCORE DIFFERENCES) mcmc.rc.ordinal<-function( y, totiter=2000, ROW=TRUE, COLUMN=TRUE ){ # # initial parameters regarding data I<-dim(y)[1] # number of rows J<-dim(y)[2] # number of cols N<-sum(y) # total number of observations row.marg<-apply(y,1,sum) # row marginal distribution col.marg<-apply(y,2,sum) # col marginal distribution # ------------------------- # CHECKING TABLE DIMENSIONS # ------------------------- if (I<2) stop('TABLE DIMENSION PROBLEM: ASSOCIATION MODEL CANNOT BE FITTED IF THE NUMBER OF ROWS ARE LESS THAN 2') else if (I==2) { print('NUMBER OF ROWS = 2: ROW SCORES WILL BE FIXED TO -0.7, 0.7') ROW<-FALSE } if (J<2) stop('TABLE DIMENSION PROBLEM: ASSOCIATION MODEL CANNOT BE FITTED IF THE NUMBER OF COLUMNS ARE LESS THAN 2') else if (J==2) { print('NUMBER OF COLUMNS = 2: COLUMN SCORES WILL BE FIXED TO -0.7, 0.7') COLUMN<-FALSE } # # priors prior.mean<-list( lx=c( rep(0,I) ) , ly=c( rep(0,J) ) , phi=0, dm=c( rep(1,I) ) , dv=c( rep(1,J) ) ) prior.s <-list( lx=c( rep(100,I) ), ly=c( rep(100,J) ), phi=100, dm=c( rep(100,I) ), dv=c( rep(100,J) ) ) # prior.bm<-prior.mean$dm/prior.s$dm^2 prior.am<-prior.bm*prior.mean$dm prior.bv<-prior.mean$dv/prior.s$dv^2 prior.av<-prior.bv*prior.mean$dv # # proposal parameters if (!ROW & !COLUMN) { prop.s<-list( lx=c( rep(0.35,I) ), ly=c( rep(0.25,J) ), phi=0.35, dm=c( rep(0.35,I) ), dv=c( rep(0.4,J) )) } else{ prop.s<-list( lx=c( rep(0.35,I) ), ly=c( rep(0.25,J) ), phi=0.65, dm=c( NA, 2.40, 1.20, 2.10, 1.10 ), dv=c( NA, NA, 1.3, 1.1)) } if ( ROW & COLUMN ) { startv<- 3 } else startv<-2 # # variables definition # # for total sample output<-as.data.frame( matrix(nrow=totiter, ncol=4+3*I+3*J)) names(output)<-c( 'l', paste('lx',1:I,sep='') , paste('ly',1:J,sep='') , 'phi', paste('m',1:I,sep='') , paste('v',1:J,sep=''), paste('dm',1:I,sep='') , paste('dv',1:J,sep=''), 'loglike', 'logpost' ) # initial current values cur<-list( l=0, lx=log(row.marg)- mean(log(row.marg)) ) cur$ly<-log(col.marg)- mean(log(col.marg)) cur$phi<- -1.0 # initial values for phi # # cur$dm<- c( NA, 0.01, rep(0.1, I-2) ) cur$dv<- c( NA, rep(0.1, J-1) ) cur$dm[1]<- -sum( seq(I-1,1,-1)*cur$dm[2:I] )/I cur$dv[1]<- -sum( seq(J-1,1,-1)*cur$dv[2:J] )/J if (ROW) cur$m<-cumsum(cur$dm) else{ temp<-1:I temp<-temp-mean(temp) cur$m<-temp/sqrt(sum(temp^2)) cur$dm<-c( cur$m[1], cur$m[2:I]-cur$m[1:(I-1)] ) } if (COLUMN) cur$v<-cumsum(cur$dv) else{ temp<-1:J temp<-temp-mean(temp) cur$v<-temp/sqrt(sum(temp^2)) cur$dv<-c( cur$v[1], cur$v[2:J]-cur$v[1:(J-1)] ) } if (ROW&COLUMN) { cur$dm<- c( NA, NA, rep(0.1, I-2) ) cur$dv<- c( NA, NA, rep(1, J-2) ) cur$dm[1]<- - sum( seq( I-2, 1, -1) * cur$dm[3:I] ) -(I-1) cur$dv[1]<- - sum( seq( J-2, 1, -1) * cur$dv[3:J] ) -(J-1) cur$dm[2]<- 1 - cur$dm[1] cur$dv[2]<- 1 - cur$dv[1] } cur$m<-cumsum(cur$dm) cur$v<-cumsum(cur$dv) # cur$l <- lambda.rc( I,J, cur ) # initial value for constant lambda prop<-cur # initial values for proposed values # # define probabilities of acceptance acc.prob<-list( lx=rep(0,I) ) acc.prob$ly<-rep(0,J) acc.prob$m<-rep(0,I) acc.prob$v<-rep(0,J) acc.prob$phi<-0 # for (iter in 1:totiter) { # -------------------------------- # STEP1: sampling of lx # -------------------------------- for (i in 2:I ){ prop<-cur # set proposed values to current prop$lx[i]<-rnorm(1, cur$lx[i], prop.s$lx[i] ) # propose new lx[i] prop$lx[1]<- -sum(prop$lx[2:I]) # calculate new lx[1] prop$l<-lambda.rc( I,J, prop ) # calculate new lambda # # calculate mhratio # *** likelihood difference mhratio<- N*(prop$l-cur$l)+(row.marg[i]-row.marg[1])*(prop$lx[i]-cur$lx[i]) # *** prior difference mhratio<-mhratio - 0.5*( (prop$lx[i]-prior.mean$lx[i])/prior.s$lx[i] )^2 mhratio<-mhratio + 0.5*( (cur$lx[i] -prior.mean$lx[i])/prior.s$lx[i] )^2 # if ( log(runif(1)) <= mhratio ){ cur<-prop acc.prob$lx[i]<-acc.prob$lx[i]+1 } } # -------------------------------- # STEP2: sampling of ly # -------------------------------- for (j in 2:J ){ prop<-cur # set proposed values to current prop$ly[j]<-rnorm(1, cur$ly[j], prop.s$ly[j] ) # propose new ly[j] prop$ly[1]<- -sum(prop$ly[2:J]) # calculate new ly[1] prop$l<-lambda.rc( I,J, prop ) # calculate new lambda # # calculate mhratio # *** likelihood difference mhratio<- N*(prop$l-cur$l)+(col.marg[j]-col.marg[1])*(prop$ly[j]-cur$ly[j]) # prior difference mhratio<-mhratio + 0.5*( (cur$ly[j] -prior.mean$ly[j])/prior.s$ly[j] )^2 mhratio<-mhratio - 0.5*( (prop$ly[j]-prior.mean$ly[j])/prior.s$ly[j] )^2 # if ( log(runif(1)) <= mhratio ){ cur<-prop acc.prob$ly[j]<-acc.prob$ly[j]+1 } } # -------------------------------- # STEP3: sampling of phi # -------------------------------- yesphi<-FALSE if(yesphi){ prop<-cur # set proposed values to current prop$phi<-rnorm(1, cur$phi, prop.s$phi ) # propose new ly[j] prop$l<-lambda.rc( I,J, prop ) # calculate new lambda # # calculate mhratio # *** likelihood difference # calculate tables with m's and v's tabm<-matrix( cur$m, nrow=I, ncol=J ) tabv<-matrix( cur$v, nrow=I, ncol=J, byrow=TRUE ) TABSUM<- sum(y*tabm*tabv) # calculate sum y[i,j]*m[i]*v[j] mhratio<- N*(prop$l-cur$l)+TABSUM*(prop$phi-cur$phi) # prior difference mhratio<-mhratio + 0.5*( (cur$phi -prior.mean$phi)/prior.s$phi )^2 mhratio<-mhratio - 0.5*( (prop$phi-prior.mean$phi)/prior.s$phi )^2 # if ( log(runif(1)) <= mhratio ){ cur<-prop acc.prob$phi<-acc.prob$phi+1 } } else cur$phi<- -1 # ----------------------------------- # STEP4: sampling of m (row scores) # ----------------------------------- # # sampling of m if (ROW) { mhratio for (i in 2:I ){ prop<-cur # set proposed values to current u<-rnorm(1, log(cur$dm[i]), prop.s$dm[i]) prop$dm[i]<-exp(u) # calculate dm1 prop$dm[1] <- - sum( seq( I-1, 1, -1) * prop$dm[2:I] )/I # calculate proposed m's prop$m<- cumsum(prop$dm) # calculate proposed l prop$l<-lambda.rc( I,J, prop ) # calculate new lambda # # calculate mhratio # *** likelihood difference tabm<-matrix( prop$m-cur$m, nrow=I, ncol=J ) tabv<-matrix( cur$v, nrow=I, ncol=J, byrow=TRUE ) TABSUM<- sum(y*tabm*tabv) # calculate sum y[i,j]*m[i]*v[j] mhratio<- N*(prop$l-cur$l)+TABSUM*cur$phi # *** prior difference # we use as prior Gamma(1,prior.bm=1/100) i.e. clos to zero mean and large variance mhratio<-mhratio - prior.bm[i]*(prop$dm[i]-cur$dm[i]) # proposal ratio mhratio<-mhratio+log(prop$dm[i]/cur$dm[i]) if ( log(runif(1)) <= mhratio ){ cur<-prop acc.prob$m[i]<-acc.prob$m[i]+1 }# end of MHUPDATE IF }# END OF m loop }# end of ROW if # ----------------------------------- # STEP5: sampling of v (column scores) # ----------------------------------- # if (COLUMN){ for (j in startv:J ){ prop<-cur # set proposed values to current prop$dv[j] <- rlnorm( 1, log(cur$dv[j]), prop.s$dv[j] ) # # parametrization with v[2]=1 if(startv==2){ # calculate dv1 if we have column model prop$dv[1] <- - sum( seq( J-1, 1, -1) * prop$dv[2:J] )/J } else{ # calculate dv1 dv2 if we have rc model prop$dv[1]<- - sum( seq( J-2, 1, -1) * prop$dv[3:J] ) -(J-1) prop$dv[2]<- 1-prop$dv[1] } # prop$v<- cumsum(prop$dv) prop$l<-lambda.rc( I,J, prop ) # calculate new lambda # # calculate mhratio # *** likelihood difference tabm<-matrix( cur$m, nrow=I, ncol=J ) tabv<-matrix( prop$v-cur$v, nrow=I, ncol=J, byrow=TRUE ) TABSUM<- sum(y*tabm*tabv) # calculate sum y[i,j]*m[i]*v[j] mhratio<- N*(prop$l-cur$l)+TABSUM*cur$phi # *** prior difference # we use as prior Gamma(1,prior.bm=1/100) i.e. clos to zero mean and large variance mhratio<-mhratio - prior.bv[j]*(prop$dv[j]-cur$dv[j]) # proposal ratio of log-normals mhratio<-mhratio+log(prop$dv[j]/cur$dv[j]) if ( log(runif(1)) <= mhratio ){ cur<-prop acc.prob$v[j]<-acc.prob$v[j]+1 } # end of MHUPDATE IF } # END OF v loop } # end of COLUMN if # # # calculation of the loglikelihood for each iteration # calculate tables with m's and v's tabm<-matrix( cur$m, nrow=I, ncol=J ) tabv<-matrix( cur$v, nrow=I, ncol=J, byrow=TRUE ) TABSUM<- sum(y*tabm*tabv) # calculate sum y[i,j]*m[i]*v[j] loglike <- lgamma( N+1 ) - sum( lgamma(y+1) ) + N * cur$l + sum( row.marg*cur$lx )+ sum( col.marg*cur$ly )+ cur$phi*TABSUM # # # calculation of logposterior logpost <- loglike + sum(dnorm( cur$lx[2:I] , prior.mean$lx[2:I] , prior.s$lx[2:I] , log=TRUE )) logpost <- logpost + sum(dnorm( cur$ly[2:J] , prior.mean$ly[2:J] , prior.s$ly[2:J] , log=TRUE )) logpost <- logpost + dnorm( cur$phi, prior.mean$phi, prior.s$phi, log=TRUE ) logpost <- logpost + sum(dgamma( cur$dm[2:I] , prior.am[2:I] , prior.bm[2:I] , log=TRUE )) logpost <- logpost + sum(dgamma( cur$dv[startv:J] , prior.av[startv:J] , prior.bv[startv:3] , log=TRUE )) # output[iter,]<- c( cur$l, cur$lx, cur$ly, cur$phi,cur$m, cur$v, cur$dm, cur$dv, loglike, logpost ) print ( output[iter,]) } for (k in 1:length(acc.prob) ){ acc.prob[[k]]<-acc.prob[[k]]/totiter } print ( acc.prob ) output } # ------------------------------------------------------------------------------------------------------------------------------------------------------- # ------------------------------------------------------------------------------------------------------------------------------------------------------- # ------------------------------------------------------------------------------------------------------------------------------------------------------- # ------------------------------------------------------------------------------------------------------------------------------------------------------- # ------------------------------------------------------------------------------------------------------------------------------------------------------- # ------------------------------------------------------------------------------------------------------------------------------------------------------- # ------------------------------------------------------------------------------------------------------------------------------------------------------- # FUNCTION FOR BAYESIAN MODEL FOR THE ORDINAL RC MODEL IN TWO WAY CONTINGENCY TABLES # --------------------------------------- # FIRST DRAFT SEPTEMBER 2005 # FINAL VERSION: MARCH 2006 # --------------------------------------- # CODE BY IOANNIS NTZOUFRAS, # DEP. OF STATISTICS, ATHENS UNIVERSITY OF ECONOMICS AND BUSINESS # ON A JOINT WORK WITH G.ILIOPOULOS AND M.KATERI FROM UNIV. OF PIRAEUS # ------------------------------------------------------------------------------ # # NORMAL PRIORS FOR ALL LAMBDAs # # THREE CONSTRAINTS ON M & V (PHI=-/+ 1) # v2=1 # m1 = -SUM(mi; i=2...I) # v1 = -SUM(vj; j=2...J) # # GAMMA PRIORS ON BETAs (SCORE DIFFERENCES) # mcmc.rc.ordinal.zeros<-function( y, totiter=2000, ROW=TRUE, COLUMN=TRUE, zerodm2=FALSE, zerodm4=FALSE, zerodv3=FALSE ){ # # initial parameters regarding data I<-dim(y)[1] # number of rows J<-dim(y)[2] # number of cols N<-sum(y) # total number of observations row.marg<-apply(y,1,sum) # row marginal distribution col.marg<-apply(y,2,sum) # col marginal distribution # ------------------------- # CHECKING TABLE DIMENSIONS # ------------------------- if (I<2) stop('TABLE DIMENSION PROBLEM: ASSOCIATION MODEL CANNOT BE FITTED IF THE NUMBER OF ROWS ARE LESS THAN 2') else if (I==2) { print('NUMBER OF ROWS = 2: ROW SCORES WILL BE FIXED TO -0.7, 0.7') ROW<-FALSE } if (J<2) stop('TABLE DIMENSION PROBLEM: ASSOCIATION MODEL CANNOT BE FITTED IF THE NUMBER OF COLUMNS ARE LESS THAN 2') else if (J==2) { print('NUMBER OF COLUMNS = 2: COLUMN SCORES WILL BE FIXED TO -0.7, 0.7') COLUMN<-FALSE } # # priors prior.mean<-list( lx=c( rep(0,I) ) , ly=c( rep(0,J) ) , phi=0, dm=c( rep(1,I) ) , dv=c( rep(1,J) ) ) prior.s <-list( lx=c( rep(100,I) ), ly=c( rep(100,J) ), phi=100, dm=c( rep(100,I) ), dv=c( rep(100,J) ) ) # prior.bm<-prior.mean$dm/prior.s$dm^2 prior.am<-prior.bm*prior.mean$dm prior.bv<-prior.mean$dv/prior.s$dv^2 prior.av<-prior.bv*prior.mean$dv # # proposal parameters if (!ROW & !COLUMN) { prop.s<-list( lx=c( rep(0.35,I) ), ly=c( rep(0.25,J) ), phi=0.35, dm=c( rep(0.35,I) ), dv=c( rep(0.4,J) )) } else{ prop.s<-list( lx=c( rep(0.35,I) ), ly=c( rep(0.25,J) ), phi=0.65, dm=c( NA, 2.40, 1.20, 2.10, 1.10 ), dv=c( NA, NA, 1.3, 1.1)) } if ( ROW & COLUMN ) { startv<- 3 } else startv<-2 # # variables definition # # for total sample output<-as.data.frame( matrix(nrow=totiter, ncol=4+3*I+3*J)) names(output)<-c( 'l', paste('lx',1:I,sep='') , paste('ly',1:J,sep='') , 'phi', paste('m',1:I,sep='') , paste('v',1:J,sep=''), paste('dm',1:I,sep='') , paste('dv',1:J,sep=''), 'loglike', 'logpost' ) # initial current values cur<-list( l=0, lx=log(row.marg)- mean(log(row.marg)) ) cur$ly<-log(col.marg)- mean(log(col.marg)) cur$phi<- -1.0 # initial values for phi # # cur$dm<- c( NA, 0.01, rep(0.1, I-2) ) cur$dv<- c( NA, rep(0.1, J-1) ) cur$dm[1]<- -sum( seq(I-1,1,-1)*cur$dm[2:I] )/I cur$dv[1]<- -sum( seq(J-1,1,-1)*cur$dv[2:J] )/J if (ROW) cur$m<-cumsum(cur$dm) else{ temp<-1:I temp<-temp-mean(temp) cur$m<-temp/sqrt(sum(temp^2)) cur$dm<-c( cur$m[1], cur$m[2:I]-cur$m[1:(I-1)] ) } if (COLUMN) cur$v<-cumsum(cur$dv) else{ temp<-1:J temp<-temp-mean(temp) cur$v<-temp/sqrt(sum(temp^2)) cur$dv<-c( cur$v[1], cur$v[2:J]-cur$v[1:(J-1)] ) } if (ROW&COLUMN) { cur$dm<- c( NA, NA, rep(0.1, I-2) ) cur$dv<- c( NA, NA, rep(1, J-2) ) cur$dm[1]<- - sum( seq( I-2, 1, -1) * cur$dm[3:I] ) -(I-1) cur$dv[1]<- - sum( seq( J-2, 1, -1) * cur$dv[3:J] ) -(J-1) cur$dm[2]<- 1 - cur$dm[1] cur$dv[2]<- 1 - cur$dv[1] } cur$m<-cumsum(cur$dm) cur$v<-cumsum(cur$dv) # cur$l <- lambda.rc( I,J, cur ) # initial value for constant lambda prop<-cur # initial values for proposed values # # define probabilities of acceptance acc.prob<-list( lx=rep(0,I) ) acc.prob$ly<-rep(0,J) acc.prob$m<-rep(0,I) acc.prob$v<-rep(0,J) acc.prob$phi<-0 # for (iter in 1:totiter) { # -------------------------------- # STEP1: sampling of lx # -------------------------------- for (i in 2:I ){ prop<-cur # set proposed values to current prop$lx[i]<-rnorm(1, cur$lx[i], prop.s$lx[i] ) # propose new lx[i] prop$lx[1]<- -sum(prop$lx[2:I]) # calculate new lx[1] prop$l<-lambda.rc( I,J, prop ) # calculate new lambda # # calculate mhratio # *** likelihood difference mhratio<- N*(prop$l-cur$l)+(row.marg[i]-row.marg[1])*(prop$lx[i]-cur$lx[i]) # *** prior difference mhratio<-mhratio - 0.5*( (prop$lx[i]-prior.mean$lx[i])/prior.s$lx[i] )^2 mhratio<-mhratio + 0.5*( (cur$lx[i] -prior.mean$lx[i])/prior.s$lx[i] )^2 # if ( log(runif(1)) <= mhratio ){ cur<-prop acc.prob$lx[i]<-acc.prob$lx[i]+1 } } # -------------------------------- # STEP2: sampling of ly # -------------------------------- for (j in 2:J ){ prop<-cur # set proposed values to current prop$ly[j]<-rnorm(1, cur$ly[j], prop.s$ly[j] ) # propose new ly[j] prop$ly[1]<- -sum(prop$ly[2:J]) # calculate new ly[1] prop$l<-lambda.rc( I,J, prop ) # calculate new lambda # # calculate mhratio # *** likelihood difference mhratio<- N*(prop$l-cur$l)+(col.marg[j]-col.marg[1])*(prop$ly[j]-cur$ly[j]) # prior difference mhratio<-mhratio + 0.5*( (cur$ly[j] -prior.mean$ly[j])/prior.s$ly[j] )^2 mhratio<-mhratio - 0.5*( (prop$ly[j]-prior.mean$ly[j])/prior.s$ly[j] )^2 # if ( log(runif(1)) <= mhratio ){ cur<-prop acc.prob$ly[j]<-acc.prob$ly[j]+1 } } # -------------------------------- # STEP3: sampling of phi # -------------------------------- yesphi<-FALSE if(yesphi){ prop<-cur # set proposed values to current prop$phi<-rnorm(1, cur$phi, prop.s$phi ) # propose new ly[j] prop$l<-lambda.rc( I,J, prop ) # calculate new lambda # # calculate mhratio # *** likelihood difference # calculate tables with m's and v's tabm<-matrix( cur$m, nrow=I, ncol=J ) tabv<-matrix( cur$v, nrow=I, ncol=J, byrow=TRUE ) TABSUM<- sum(y*tabm*tabv) # calculate sum y[i,j]*m[i]*v[j] mhratio<- N*(prop$l-cur$l)+TABSUM*(prop$phi-cur$phi) # prior difference mhratio<-mhratio + 0.5*( (cur$phi -prior.mean$phi)/prior.s$phi )^2 mhratio<-mhratio - 0.5*( (prop$phi-prior.mean$phi)/prior.s$phi )^2 # if ( log(runif(1)) <= mhratio ){ cur<-prop acc.prob$phi<-acc.prob$phi+1 } } else cur$phi<- -1 # ----------------------------------- # STEP4: sampling of m (row scores) # ----------------------------------- # # sampling of m if (ROW) { for (i in 2:I ){ if (cur$dm[i]>0){ prop<-cur # set proposed values to current u<-rnorm(1, log(cur$dm[i]), prop.s$dm[i]) prop$dm[i]<-exp(u) # calculate dm1 prop$dm[1] <- - sum( seq( I-1, 1, -1) * prop$dm[2:I] )/I # calculate proposed m's prop$m<- cumsum(prop$dm) # calculate proposed l prop$l<-lambda.rc( I,J, prop ) # calculate new lambda # # calculate mhratio # *** likelihood difference tabm<-matrix( prop$m-cur$m, nrow=I, ncol=J ) tabv<-matrix( cur$v, nrow=I, ncol=J, byrow=TRUE ) TABSUM<- sum(y*tabm*tabv) # calculate sum y[i,j]*m[i]*v[j] mhratio<- N*(prop$l-cur$l)+TABSUM*cur$phi # *** prior difference # we use as prior Gamma(1,prior.bm=1/100) i.e. clos to zero mean and large variance mhratio<-mhratio - prior.bm[i]*(prop$dm[i]-cur$dm[i]) # proposal ratio mhratio<-mhratio+log(prop$dm[i]/cur$dm[i]) if ( log(runif(1)) <= mhratio ){ cur<-prop acc.prob$m[i]<-acc.prob$m[i]+1 }# end of MHUPDATE IF } # if dm>0 }# END OF m loop if (zerodm2)cur$dm[2]<-0 if (zerodm4)cur$dm[4]<-0 }# end of ROW if # ----------------------------------- # STEP5: sampling of v (column scores) # ----------------------------------- # if (COLUMN){ for (j in startv:J ){ if (cur$dv[j]>0){ prop<-cur # set proposed values to current prop$dv[j] <- rlnorm( 1, log(cur$dv[j]), prop.s$dv[j] ) # # parametrization with v[2]=1 if(startv==2){ # calculate dv1 if we have column model prop$dv[1] <- - sum( seq( J-1, 1, -1) * prop$dv[2:J] )/J } else{ # calculate dv1 dv2 if we have rc model prop$dv[1]<- - sum( seq( J-2, 1, -1) * prop$dv[3:J] ) -(J-1) prop$dv[2]<- 1-prop$dv[1] } # prop$v<- cumsum(prop$dv) prop$l<-lambda.rc( I,J, prop ) # calculate new lambda # # calculate mhratio # *** likelihood difference tabm<-matrix( cur$m, nrow=I, ncol=J ) tabv<-matrix( prop$v-cur$v, nrow=I, ncol=J, byrow=TRUE ) TABSUM<- sum(y*tabm*tabv) # calculate sum y[i,j]*m[i]*v[j] mhratio<- N*(prop$l-cur$l)+TABSUM*cur$phi # *** prior difference # we use as prior Gamma(1,prior.bm=1/100) i.e. clos to zero mean and large variance mhratio<-mhratio - prior.bv[j]*(prop$dv[j]-cur$dv[j]) # proposal ratio of log-normals mhratio<-mhratio+log(prop$dv[j]/cur$dv[j]) if ( log(runif(1)) <= mhratio ){ cur<-prop acc.prob$v[j]<-acc.prob$v[j]+1 } # end of MHUPDATE IF }#endif of dv[j] } # END OF v loop if (zerodv3)cur$dv[3]<-0 } # end of COLUMN if # # # calculation of the loglikelihood for each iteration # calculate tables with m's and v's tabm<-matrix( cur$m, nrow=I, ncol=J ) tabv<-matrix( cur$v, nrow=I, ncol=J, byrow=TRUE ) TABSUM<- sum(y*tabm*tabv) # calculate sum y[i,j]*m[i]*v[j] loglike <- lgamma( N+1 ) - sum( lgamma(y+1) ) + N * cur$l + sum( row.marg*cur$lx )+ sum( col.marg*cur$ly )+ cur$phi*TABSUM # # # calculation of logposterior logpost <- loglike + sum(dnorm( cur$lx[2:I] , prior.mean$lx[2:I] , prior.s$lx[2:I] , log=TRUE )) logpost <- logpost + sum(dnorm( cur$ly[2:J] , prior.mean$ly[2:J] , prior.s$ly[2:J] , log=TRUE )) logpost <- logpost + dnorm( cur$phi, prior.mean$phi, prior.s$phi, log=TRUE ) for (i in 2:I){ if (cur$dm[i]>0) logpost <- logpost + dgamma( cur$dm[i] , prior.am[i] , prior.bm[i] , log=TRUE ) } for (j in startv:J){ if (cur$dv[j]>0) logpost <- logpost + dgamma( cur$dv[j] , prior.av[j] , prior.bv[j] , log=TRUE ) } # logpost <- logpost + sum(dgamma( cur$dm[2:I] , prior.am[2:I] , prior.bm[2:I] , log=TRUE )) # logpost <- logpost + sum(dgamma( cur$dv[3:J] , prior.av[startv:J] , prior.bv[startv:3] , log=TRUE )) # output[iter,]<- c( cur$l, cur$lx, cur$ly, cur$phi,cur$m, cur$v, cur$dm, cur$dv, loglike, logpost ) print ( output[iter,]) } for (k in 1:length(acc.prob) ){ acc.prob[[k]]<-acc.prob[[k]]/totiter } print ( acc.prob ) output } # ------------------------------------------------------------------------------------------------------------------------------------------------------- # ------------------------------------------------------------------------------------------------------------------------------------------------------- # ------------------------------------------------------------------------------------------------------------------------------------------------------- # ------------------------------------------------------------------------------------------------------------------------------------------------------- # ------------------------------------------------------------------------------------------------------------------------------------------------------- # ------------------------------------------------------------------------------------------------------------------------------------------------------- # ------------------------------------------------------------------------------------------------------------------------------------------------------- rc.boa<- function( chain, burnin=0, transformation=TRUE, MODE=TRUE, PLOTS=TRUE, data=NULL, model='RC' ){ # # p<-length(chain[1,])-2 TOTITER<-length(chain[,1]) if (burnin>0.2*TOTITER) burnin<-0.2*TOTITER ITER<- TOTITER-burnin IKEPT<-(burnin+1):TOTITER # descriptives for the original parametrization descr.params <- data.frame(mean = apply( chain[IKEPT,], 2, mean ) ) descr.params$median <- apply( chain[IKEPT,], 2, median ) if (MODE) { pmode<-max(chain[IKEPT,]$logpost) print(chain[chain$logpost==pmode,] ) print(descr.params) descr.params$mode<- t(chain[chain$logpost==pmode,] ) } descr.params$st.dev <- sqrt( apply( chain[IKEPT,], 2, var ) ) # rownames(descr.params) <- names(chain) colnames(descr.params) <- c( 'mean', 'median', 'mode', 'st.dev.' ) # # # calculate transformations of descriptives if (transformation){ x<-descr.params$mean xnames<-rownames(descr.params) phi<-x[ xnames=='phi' ] index0<-1:length(x) index1<-index0[xnames=='m1'] index2<-index0[xnames=='v1'] for (i in (index2+1):length(x)) { if (substr(xnames[i],1,1)!='v') { index3<-i-1 break } } m<-x[index1:(index2-1)] v<-x[index2:index3] m<-m-mean(m) v<-v-mean(v) s1<-sqrt( sum( m^2 ) ) s2<-sqrt( sum( v^2 ) ) newphi<-phi*(s1*s2) newm<-m/s1 newv<-v/s2 trans.descr<-data.frame( mean=c(newphi,newm,newv) ) rownames(trans.descr)<-rownames(descr.params)[(index1-1):index3] x<-descr.params$median phi<-x[ xnames=='phi' ] m<-x[index1:(index2-1)] v<-x[index2:index3] m<-m-mean(m) v<-v-mean(v) s1<-sqrt( sum( m^2 ) ) s2<-sqrt( sum( v^2 ) ) newphi<-phi*(s1*s2) newm<-m/s1 newv<-v/s2 trans.descr$median<-c(newphi,newm,newv) if (MODE){ x<-descr.params$mode phi<-x[ xnames=='phi' ] m<-x[index1:(index2-1)] v<-x[index2:index3] m<-m-mean(m) v<-v-mean(v) s1<-sqrt( sum( m^2 ) ) s2<-sqrt( sum( v^2 ) ) newphi<-phi*(s1*s2) newm<-m/s1 newv<-v/s2 trans.descr$mode<-c(newphi,newm,newv) } } # # ------------------------------------------------- # Calculate the means for each cell # ------------------------------------------------- I<-index2-index1 J<-index3-index2+1 ITER<-length( chain[,1] ) xrow<-sort(rep(1:I,J)) ycol<-rep(1:J,I) fitted<-matrix( ncol=I*J, nrow=ITER ) for (k in 1:(I*J) ) { fitted[,k]<-exp( chain[,1]+chain[,1+xrow[k]]+chain[,1+I+ycol[k]]+chain[,2+I+J]*chain[,2+I+J+xrow[k]]*chain[,2+2*I+J+ycol[k]] ) } # calculation of mean probabilities for each cell meanfitted<- apply( fitted, 2, mean ) meanp<-matrix( meanfitted,nrow=I, ncol=J, byrow=TRUE) # # # calculation of AIC/BIC/DIC if data are provided # if (!is.null(data)) { y<- as.vector(t(data)) deviance<-vector(length=ITER ) # # deviance for each iteration for (k in 1:ITER) deviance[k]<- -2 * sum( y*log(fitted[k,]) ) # # deviance evaluated at the posterior means of fitted values Dhat<- -2 * sum( y*log(meanfitted) ) # # calculation of model dimension dm<-I+J-2 if (model=='RC') dm<-dm+I-1+J-2 else if (model=='R') dm<-dm+I-1 else if (model=='C') dm<-dm+J-1 else if (model=='U') dm<-dm+1 else if (model=='I') dm<-dm # # deviance statistics Dmean<-mean(deviance) # mean Dq025<-quantile(deviance, 0.025) # 2.5% percentile Dq975<-quantile(deviance, 0.975) # 97.5% percentile pm<-Dmean-Dhat # number of effective parameters used in DIC N<-sum(y) # Total number of observations # # calculation of AIC stats AIC<-data.frame(hat=Dhat + dm*2, p2.5=Dq025+ dm*2, mean=Dmean+ dm*2, p97.5=Dq975+ dm*2) # calculation of BIC stats BIC<-data.frame(hat=Dhat + dm*log(N), p2.5=Dq025+ dm*log(N), mean=Dmean+ dm*log(N), p97.5=Dq975+ dm*log(N)) # calculation of DIC DIC<-data.frame(value=Dhat + 2*pm, Dhat=Dhat, Dmean=Dmean, pm=pm) } # # # ------------------------------------------------- # PLOTS # ------------------------------------------------- if (PLOTS){ temp<-1 j<-1 for (j in 1:p){ temp<- menu( c('YES', 'NO'), title = paste( paste("DO YOU WANT TO PRODUCE PLOTS OF", names(chain)[j]) , '?') ) print(temp) if (temp==0) break else if (temp==1){ par(mfrow=c(2,2)) minx<-min(chain[,j]) smx<-sign(minx) maxx<-max(chain[,j]) smxx<-sign(maxx) ylims<-c( minx*(1-smx*0.10), maxx*(1+smxx*0.10) ) plot(1:TOTITER, chain[,j], type='l', xlab='ITERATIONS', ylab=paste('Trace of ', names(chain)[j],'') , ylim=ylims ) plot(1:TOTITER, cumsum(chain[,j])/1:TOTITER, type='l', xlab='ITERATIONS', ylab=paste('Ergodic mean of', names(chain)[j],'') , ylim=ylims) lines(IKEPT, cumsum(chain[IKEPT,j])/(IKEPT-burnin), col=2) hist( chain[IKEPT,j] , main=names(chain)[j]) acf( chain[IKEPT,j] , main=names(chain)[j]) } } } if (is.null(data)) list( orig=descr.params, transf=trans.descr, fitted.means=meanp ) else list( orig=descr.params, transf=trans.descr, fitted.means=meanp, AIC=AIC, BIC=BIC, DIC=DIC ) }