## ----echo=F, results='hide', fig.show='hide', message=FALSE, warning=FALSE---- library(knitr) library(DAMisc) opts_chunk$set(tidy=TRUE, tidy.opts = list(only.comment=TRUE), warning=FALSE, message=FALSE) ## ----mix1, echo=T, results="hide"---------------------------------------- ## Load relevant packages library(flexmix) library(foreign) ## Read in the data, dat <- na.omit(read.dta("http://www.quantoid.net/files/reg3/mixture_data_small.dta")) ## rescale gdp/capita to rescale the coefficient dat$gdppc10k <- dat$gdppc/10000 ## Generate the model setup (this doesn't estimate the model, it just ## generates the appropriate setup) model <- FLXMRglmfix(family = "gaussian", nested=list(k=c(1,1), formula = c(~lgates + aclp + bnr + xrcomp + parcomp, ~log_checks + polconiii + xconst + laworder + subfed)), fixed = ~ gdppc10k + logpop + cwar + iwar) ## maximize the model you set up previously out <- stepFlexmix(rep1 ~ 1 |ccode, k=2, model=model, data=dat, nrep=10) ## ----groupprob, echo=T--------------------------------------------------- ## get the probabilities out of the posterior. In the posterior, ## there are two elements - scaled and unscaled. The scaled versions ## sum to 1 in the rows. probs <- out@posterior$scaled ## Calculate the average probability by theory colMeans(probs) ## summarize the model output. summary(out) ## ----senstest, echo=T, cache=TRUE---------------------------------------- ## find unique country codes from the data unccode <- unique(dat$ccode) ## set up the model model <- FLXMRglmfix(family = "gaussian", nested=list(k=c(1,1), formula = c(~lgates + aclp + bnr + xrcomp + parcomp, ~log_checks + polconiii + xconst + laworder + subfed)), fixed = ~ gdppc10k + logpop + cwar + iwar) ## generate a set of points to initialize the clusters. ## initialize probs and ll to results probs <- ll <- NULL ## loop over this 100 times. 100 is an arbitrary number and in "real settings" should ## probably be bigger than 100 if you want to do a serious assessment of sensitivity to ## initial conditions. for(i in 1:100){ ## generate a set of random cluster assignments clust <- round(runif(length(unccode), .5,2.5), 0) ## initialize matrix for clusters cluster <- matrix(0, ncol=2, nrow=nrow(dat)) ## Loop over each of the country codes for(i in 1:length(unccode)){ ## replace the appropriate column of the cluster matrix with 1. cluster[which(dat$ccode == unccode[i]), clust[i]] <- 1 } ## save clusters in 'myclust' myclust <- cluster ## estimate model again, but initialize at a different cluster assignment using the ## object that we just created. out <- flexmix(rep1 ~ 1 |ccode, k=2, model=model, data=dat, cluster=myclust) ## get the theory model probabilities from the model probs <- cbind(probs, do.call("c", as.list(by(out@posterior$scaled[,1], list(dat$ccode), mean)))) ## save the log likelihoods ll <- c(ll, logLik(out)) } ## ----samecat, echo=T, fig.show='hide'------------------------------------ ## recodes probabilities greater than .5 to 2 and less than 1.5 to 1 g <- round(probs+1) ## initialize a matrix d to hold results to be NxN d <- matrix(NA, nrow=nrow(g), ncol=nrow(g)) ## loop over 1:(N-1) for(i in 1:(nrow(d)-1)){ ## loop over relevant columns for each row for(j in (i+1):nrow(d)){ ## replace d[i,j] and d[j,i] with the proportion of times that g[i, ] and g[j,] are the same d[i,j] <- d[j,i] <- mean(g[i,] == g[j,]) } } ## make a histogram of the upper triangle of d hist(d[upper.tri(d)], xlab = "Pr(Same Groups)") ## ----histprob, echo=T, fig.show='hide'----------------------------------- ## Posterior probabilities by group. hist(probs[,1], xlab="Posterior Probabilities of Elections Group", main="") ## ----id, echo=T---------------------------------------------------------- ## source in mixturetools functions source("http://www.quantoid.net/files/reg3/mixturetools.r") ## IdentifyList finds statistically significantly consistent results id.list <- IdentifyList(out@posterior$scaled, dat, dat$ccode, cluster=T, alpha=.05) ## print the two sets of results id.list$caseID.1 id.list$caseID.2 ## generate a dataset that has country codes and the group to which ## each observation belongs. Note that missing is still an option here. group <- data.frame(ccode=unccode, theory=0) group$theory[which(group$ccode %in% id.list$caseID.1)] <- 1 group$theory[which(group$ccode %in% id.list$caseID.2)] <- 2 ## ----refit, echo=T, cache=TRUE------------------------------------------- ## refit the model to get coefficient estimates out.refit <- refit(out) ## summarize model object summary(out.refit) ## ----map, echo=T, eval=F------------------------------------------------- ## ## load relevant packages. Note that you should also download rgeos, which will get used ## ## by these packages. ## library(rgdal) ## library(ggplot2) ## library(plyr) ## ## Read in the WorldCountries shape file (need all auxiliary files: .dbf, .sbn, .sbx, .shp, .shx) and they need to be in R's working directory. ## world <- readOGR(".", layer="WorldCountries") ## ## add the theory groups to the data in the world object ## world@data <- join(world@data, group, type="left") ## ## recode missing theories to 3 (neither) ## world@data$theory <- ifelse(is.na(world@data$theory),3, world@data$theory ) ## ## create a factor out of the result ## world@data$theory <- factor(world@data$theory, levels=1:3, ## labels=c("Elections", "Checks", "Neither")) ## ## make a new variable called id that we will use later ## world@data$id <- world@data$placename ## ## manage data toget it into a format to produce maps ## world.points <- fortify(world, region="id") ## world.df <- join(world.points, world@data, by="id") ## ## ## generate the plot ## pl <- ggplot(world.df) + aes(long,lat,group=group,fill=theory) + geom_polygon() + ## geom_path(color="gray50", size=.1) + coord_equal() + ## scale_fill_brewer("", labels=c("Elections", "Checks", "Neither"), palette = "Set2") ## ## reset a bunch of the theme stuff ## pl + theme(line = element_blank(), axis.title.x = element_blank(), ## axis.title.y = element_blank(), axis.ticks = element_blank(), ## axis.text = element_blank(),legend.position = "bottom", ## panel.background = element_blank()) ## ----concom,echo=T, cache=TRUE------------------------------------------- ## make parliamentary responsibility into complete/incomplete vs not dat$prcomp <- as.numeric(dat$parlresp_c %in% c("Incomplete", "Complete")) ## Use prcomp as a concomitant (theory predicting) variable out.a <- stepFlexmix(rep1 ~ 1 | ccode , k=2, model=model, data=dat, nrep=20, concomitant = FLXPmultinom( ~ prcomp)) ## refit the model to get coefficient estimates out.a.refit <- refit(out.a) ## ----sums, echo=T-------------------------------------------------------- ## look at the model results for each group summary(out.a.refit) ## look at the concomiatant variable model results out.a.refit@concomitant ## ----dftest, echo=T------------------------------------------------------ ## If we had estimated these models separately, what would we find about ## which one fits better? mod1 <- lm(rep1 ~lgates + aclp + bnr + xrcomp + parcomp + gdppc10k + logpop + cwar + iwar, data=dat, y=T) mod2 <- lm(rep1 ~log_checks + polconiii + xconst + laworder + subfed + gdppc10k + logpop + cwar + iwar, data=dat, y=T) library(games) clarke(mod1, mod2)