##### Part 1: Propensity score matching in R ######################### ################################################### ### Set working directory, to for example your home directory. ################################################### ## change the line below to match your local directory setwd("H:/Myname/Myfolder/") #Store output for later in a log file sink("Output.txt") ################################################### ### Step 1: Read the data in R ################################################### install.packages("rgenoud") install.packages("Matching") library(foreign) library(Matching) library(boot) dta3 <- read.dta(file="notforprofit2013.dta") dta3 <- as.data.frame(dta3) names(dta3) ################################################### ### Step 2: Estimate the propensity score ################################################### pscore <- glm(treated ~ W1male + W1nonwhite + W1highcost + W1lowcost + W1schiz + W1bipolar+ W1age + W1qaly + W1paid + W1phs + W1mhs + W1gf + W1use, data = dta3, binomial(link = "logit")) summary(pscore) pscore_est <- predict(pscore, data=dta3) dta3<- cbind(dta3, pscore_est) rm(pscore_est) ################################################### ### Step 3: Perfrm propensity score matching ################################################### attach(dta3) m_ps1 <- Match(Tr=dta3$treated, X=pscore_est, estimand="ATT") detach(dta3) ################################################### ### Step 4: Assess balance ################################################### set.seed(1122) mb_ps1 <-MatchBalance(treated ~ W1age + W1qaly + W1paid + + W1nonwhite + W1schiz + W1bipolar, match.out = m_ps1, data=dta3, nboots=500) attach(dta3) qqplot(W1qaly[treated==0], W1qaly[treated==1]) detach(dta3) attach(dta3) par(mfrow = c(1,2), las=2, oma=c(6,0,6,0)) qqplot(W1qaly[treated==0], W1qaly[treated==1], main="Before matching", ylab = "Treatment observations", xlab="Control observations", ylim=c(min(W1qaly[treated==1], W1qaly[m_ps1$index.treated]), max(W1qaly[treated==1], W1qaly[m_ps1$index.treated])), xlim=c(min(W1qaly[treated==0], W1qaly[m_ps1$index.control]), max(W1qaly[treated==0], W1qaly[m_ps1$index.control])) ) abline(coef=c(0,1)) qqplot(W1qaly[m_ps1$index.control], W1qaly[m_ps1$index.treated], main="After matching", ylab="Treatment observations", xlab="Control observations", ylim=c(min(W1qaly[treated==1], W1qaly[m_ps1$index.treated]), max(W1qaly[treated==1], W1qaly[m_ps1$index.treated])), xlim=c(min(W1qaly[treated==0], W1qaly[m_ps1$index.control]), max(W1qaly[treated==0], W1qaly[m_ps1$index.control])) ) abline(coef=c(0,1)) detach(dta3) mb_ps1b <- MatchBalance(treated ~ priorqaly2 + priorqaly3, match.out = m_ps1,data = dta3, nboots = 500) ################################################### ### Step 5: Estimate the ATT ################################################### attach(dta3) m_ps_cost <- Match(Y=totalcost, Tr=treated, X=pscore_est, estimand="ATT") summary(m_ps_cost, full=TRUE) m_ps_qaly <- Match(Y=totalqaly, Tr=treated, X=pscore_est, estimand="ATT") summary(m_ps_qaly, full=TRUE) detach(dta3) ################################################ ##### Step 6: Saving the matched data ################################################# match.ps.data <- rbind(dta3[m_ps1$index.treated,], dta3[m_ps1$index.control,]) match.ps.data <- cbind(match.ps.data, weights=c(m_ps1$weights, m_ps1$weights)) match.ps.data <- cbind(match.ps.data, matchid=c(1:length(m_ps1$index.treated),1:length(m_ps1$index.control))) write.table(match.ps.data, file = "match.ps.data.csv", sep = ",", col.names = NA, na="NA", qmethod = "double") fix(match.ps.data) ##### Check the matched pairs ######################## matchedpairs=cbind(m_ps1$index.treated ,m_ps1$index.control,m_ps1$weights) head(matchedpairs) dim(matchedpairs) sum(m_ps1$weights) # weights sum to number of treated length(unique(m_ps1$index.control)) # 70 of 114 controls used as matches dim(match.ps.data) # 2* N_treated observations ####################### PART 2: Genetic Matching ########################### ################################################### ### Step 1: Read the data in R ################################################### library(foreign) library(Matching) dta3 <- read.dta(file="notforprofit2013.dta") ################################################### Fit a linear propensity score as in Practical 3 ################################################### pscore <- glm(treated ~ W1male + W1nonwhite + W1highcost + W1lowcost + W1schiz + W1bipolar+ W1age + W1qaly + W1paid + W1phs + W1mhs + W1gf + W1use, data=dta3, binomial(link = "logit")) pscore_est <- predict(pscore, data=dta3) dta3<- cbind(dta3, pscore_est) rm(pscore_est) ################################################### ### Step 2: Specify X and BalanceMatrix ################################################### attach(dta3) X <-cbind(pscore_est, W1male, W1nonwhite, W1highcost, W1lowcost, W1schiz, W1bipolar, W1use, W1age, W1qaly, W1paid, W1phs, W1mhs, W1gf) BalanceMatrix <- X ################################################################## ### Step 3: Run Genetic matching and assess covariate balance ################################################################## ### Run Genetic Matching ####################################### gen1 <- GenMatch(Tr=treated, X=X, BalanceMatrix=BalanceMatrix, pop.size=500, unif.seed=2233, int.seed=2233 ) ### Perform matching ############################################ m_gm1 <- Match(Tr=treated, X=X, Weight.matrix=gen1,estimand="ATT") detach(dta3) ### Assess covariate balance ################################### set.seed(1122) mb_gm1 <- MatchBalance(treated ~ W1age + W1qaly + W1paid + W1nonwhite + W1schiz + W1bipolar, match.out=m_gm1, data=dta3, nboots=500) ### Draw eQQ plots ############################################### par(mfrow = c(1,2), las=2, oma=c(6,0.5,6,0.5)) attach(dta3) qqplot(W1qaly[treated==0], W1qaly[treated==1], main="Before matching", ylab = "Treatment observations", xlab="Control observations", ylim=c(min(W1qaly[treated==1], W1qaly[m_gm1$index.treated]), max(W1qaly[treated==1], W1qaly[m_gm1$index.treated])), xlim=c(min(W1qaly[treated==0], W1qaly[m_gm1$index.control]), max(W1qaly[treated==0], W1qaly[m_gm1$index.control])) ) abline(coef=c(0,1)) qqplot(W1qaly[m_gm1$index.control],W1qaly[m_gm1$index.treated], main="After matching", ylab="Treatment observations", xlab="Control observations", ylim=c(min(W1qaly[treated==1],W1qaly[m_gm1$index.treated]), max(W1qaly[treated==1], W1qaly[m_gm1$index.treated])), xlim=c(min(W1qaly[treated==0],W1qaly[m_gm1$index.control]), max(W1qaly[treated==0], W1qaly[m_gm1$index.control])) ) abline(coef=c(0,1)) detach(dta3) ## Check balance on non-linear terms ############################## set.seed(1122) mb_gm1b <- MatchBalance(treated ~ W1age + W1qaly + W1paid + W1nonwhite + W1schiz + W1bipolar + priorqaly2 + priorqaly3, match.out = m_gm1, data = dta3, nboots = 500) ################################################### ### Step 4: Estimate the ATT ################################################### attach(dta3) m.gm1.cost <- Match(Y=totalcost, Tr=treated, X=X, Weight.matrix=gen1, estimand="ATT") summary(m.gm1.cost, full=TRUE) m.gm1.qaly <- Match(Y=totalqaly, Tr=treated, X=X, Weight.matrix=gen1, estimand="ATT") summary(m.gm1.qaly, full=TRUE) detach(dta3) ################################################### ### Step 5: Save and store the matched dataset ################################################### match.gn.data <- rbind(dta3[m_gm1$index.treated,], dta3[m_gm1$index.control,]) match.gn.data <- cbind(match.gn.data, weights=c(m_gm1$weights, m_gm1$weights)) match.gn.data <- cbind(match.gn.data, matchid=c(1:length(m_gm1$index.treated),1:length(m_gm1$index.control))) write.table(match.gn.data, file = "match.gn.data.csv", sep = ",", col.names = NA, na="NA", qmethod = "double") #close our log file sink()