library(car) library(tibble) library(ggplot2) library(formatR) library(knitr) library(pander) library(xtable) library(dplyr) library(tidyr) mytheme <- function(){ theme(axis.title=element_text(size=15), axis.text=element_text(size=12), text=element_text(size=12), title=element_text(size=15)) } uwogray <- "#807F83" purples <- c("#4F2683", "#8463AD", "#26064E", "#643F94", "#3B166A") pal2 <- c("#4F2683","#807F83") # pal3 <- c("#4F2683", "#811D7C", "302E87") pal3 <- c(rgb(79,38,131, maxColorValue = 255), rgb(129,29,124, maxColorValue = 255), rgb(48,46,135, maxColorValue = 255)) # pal4 <- c("#4F2683", "#C1582C", "#1E845A", "#C1B52C") pal4 <- c(rgb(79,38,131, maxColorValue = 255), rgb(193,88,44, maxColorValue = 255), rgb(30,132,90, maxColorValue = 255), rgb(193,181,44, maxColorValue = 255)) ## ------------------------------------------------------------------------------------- f <- "http://www.quantoid.net/files/reg3/weakliem.txt" library(rio) Weakliem <- import(f) W <- Weakliem[-c(21,22,24, 25,49), ] mod2 <- lm(secpay ~ log(gdp), data=W) ## ---- echo=FALSE---------------------------------------------------------------------- library(car) library(ggplot2) ggplot(mapping=aes(x=fitted.values(mod2), y=rstudent(mod2))) + geom_point(pch=1) + geom_hline(yintercept=0, lty=2) + theme_bw() + mytheme() + labs(x="Fitted Values", y="Studentized Residuals") ## ------------------------------------------------------------------------------------- # make U = residuals^2/SEr U <- (mod2$residuals^2)/ (sum(mod2$residuals^2)/length(mod2$residuals)) # if using all varaibles use this one upmod <- update(mod2, U ~ .) # Alternatively, if using only fitted values # upmod <- lm(U ~ fitted.values(mod2)) # find degrees of freedom df <- upmod$rank-1 # calculate the score score <- sum((fitted(upmod) - mean(U))^2)/2 score # calculate the p-value round(pchisq(score, df, lower.tail=FALSE), 3) ## ------------------------------------------------------------------------------------- ncvTest(mod2) ## ------------------------------------------------------------------------------------- library(gamlss) W2 <- W %>% dplyr::select(secpay, gdp, gini, hetero, union, democrat) %>% na.omit mm <- gamlss(secpay ~ log(gdp),data=W2) vm <- gamlss(secpay ~ log(gdp), ~ log(gdp) ,data=W2) ## ------------------------------------------------------------------------------------- summary(mm) ## ------------------------------------------------------------------------------------- summary(vm) ## ---- echo=FALSE, fig.align="center", out.height="50%", out.width="50%"--------------- knitr::include_graphics("diagnostics_files/figure-html/nrqr_continuous.png") ## ---- results='hide'------------------------------------------------------------------ set.seed(54434) x <- runif(1000,-1,1) mu <- 1 + 2*x sig <- exp(x) y <- mu + rnorm(1000, 0, sig) df1 <- data.frame(x=x, y=y) m1 <- gamlss(y ~ x, ~x, data = df1) mu.hat <- predict(m1, what="mu") sig.hat <- predict(m1, what="sigma") e <- y-mu.hat u <- pNO(df1$y, mu.hat, exp(sig.hat)) r <- qnorm(u) plot.df <- data.frame( res = c(e, r), type = factor(rep(1:2, each=1000), labels=c("Raw", "NRQ")) ) ## ---- echo=FALSE, fig.align="center"-------------------------------------------------- ggplot(plot.df, aes(x=res, fill=type)) + geom_histogram(aes(y=..density..), alpha=.2, position="identity", bins=50) + stat_function(fun=dnorm) + scale_fill_manual(values=rev(pal2)) + theme_bw() + mytheme() + theme(legend.position="top") + labs(x="Residuals", y="Density", fill="Residual Type:") ## ---- echo=FALSE, fig.align="center", out.height="50%", out.width="50%"--------------- knitr::include_graphics("diagnostics_files/figure-html/nrqr_discrete.png") ## ------------------------------------------------------------------------------------- ystar <- 2*x y <- rbinom(1000, 1, plogis(ystar)) df2 <- data.frame(x=x,y=y) m2 <- gamlss(y ~ x, data=df2, family=BI) mu.hat <- predict(m2, what="mu", type="response") e <- y-mu.hat tmp <- NULL u1 <- dBI(0, 1, mu=mu.hat) u1 <- ifelse(y == 0, 0, u1) u2 <- pBI(y, 1, mu=mu.hat) for(i in 1:8){ u <- runif(length(y), u1, u2) u <- ifelse(u > 0.999999, u - 1e-16, u) u <- ifelse(u < 1e-06, u + 1e-16, u) rqres <- qnorm(u) tmp <- rbind(tmp, data.frame(r=rqres, draw=i)) } ## ---- echo=FALSE, fig.height=4, fig.width=8, fig.align="center", out.width="90%"------ ggplot(tmp, aes(x=r)) + geom_histogram(aes(y=..density..), alpha=.5) + stat_function(fun=dnorm) + facet_wrap(~draw, nrow=2) + theme_bw() + mytheme() + labs(x="Residuals", y="Density") ## ---- out.width="80%", fig.align="center"--------------------------------------------- wp(m1, ylim.all=TRUE) ## ---- out.width="80%", fig.align="center"--------------------------------------------- wp(m2, ylim.all=TRUE) ## ----pm1, fig.show='hide'------------------------------------------------------------- plot(m1) ## ----pm2, fig.show='hide'------------------------------------------------------------- plot(m2) ## ----bf1, fig.show='hide'------------------------------------------------------------- m3 <- gamlss(y ~ x, data=df1) plot(m3) ## ---- out.width="50%", echo=FALSE, fig.align="center"--------------------------------- wp(m3) ## ----bf2, fig.show='hide'------------------------------------------------------------- m3a <- gamlss(y ~ x, sigma.formula= ~ x, data=df1) plot(m3a) ## ---- out.width="50%", echo=FALSE, fig.align="center"--------------------------------- wp(m3a) ## ----bf3, fig.show='hide'------------------------------------------------------------- set.seed(54434) a <- sqrt(12) x <- runif(1000, -a,a) p <- plogis(-1.9 + .7*x + x^2) y <- rbinom(1000, 1, p) df2 <- data.frame(x=x,y=y) m4 <- gamlss(y ~ x, data=df2, family=BI) plot(m4) ## ---- out.width="50%", echo=FALSE, fig.align="center"--------------------------------- wp(m4) ## ----bf4, fig.show='hide'------------------------------------------------------------- m4a <- gamlss(y ~ pb(x), data=df2, family=BI) plot(m4a) ## ---- out.width="50%", echo=FALSE, fig.align="center"--------------------------------- wp(m4a) ## ------------------------------------------------------------------------------------- library(splines) library(car) library(rio) dat <- import( "http://www.quantoid.net/files/9591/anes2008_binary.dta") vmod1 <- glm(voted ~ age + educ + female + leftright, data=dat, family=binomial(link="logit")) vmod1a <- glm(voted ~ age + educ + female + bs(leftright, df=5), data=dat, family=binomial(link="logit")) ## ------------------------------------------------------------------------------------- cprw1 <- residuals(vmod1, type="partial")[,"leftright"] g <- ggplot(mapping=aes( x=dat$leftright, y=cprw1)) + geom_point(pch=1) + geom_smooth(method="loess", se=T) + theme_bw() + mytheme() + coord_cartesian(ylim=c(-5,5)) + labs(x="Left-Right", y="Component + Residual") ## ---- echo=FALSE, out.width="100%", fig.align="center"-------------------------------- g ## ------------------------------------------------------------------------------------- vmod1a <- glm(voted ~ age + educ + female + bs(leftright, df=5), data=dat, family=binomial(link="logit")) cprw1a <- residuals(vmod1a, type="partial")[,"bs(leftright, df = 5)"] g <- ggplot(mapping=aes(x=dat$leftright, y=cprw1a)) + geom_point(pch=1) + geom_smooth(method="loess", se=T) + theme_bw() + mytheme() + coord_cartesian(ylim=c(-5,5)) + labs(x="Left-Right", y="Component + Residual") ## ---- echo=FALSE, out.width="100%", fig.align="center"-------------------------------- g ## ------------------------------------------------------------------------------------- vmod2a <- gamlss(voted ~ age + educ + female + leftright, data=dat, family=BI) eg2a <- residuals(vmod2a, type="partial", terms="leftright") g <- ggplot(mapping=aes(y=eg2a, x=dat$leftright)) + geom_point(pch=1) + geom_smooth(method="loess", se=TRUE) + coord_cartesian(ylim=c(-2,2)) + theme_bw() + labs(x="left-right", y="Component + Residual") ## ---- echo=FALSE, out.width="100%", fig.align="center"-------------------------------- g ## ------------------------------------------------------------------------------------- vmod2b <- gamlss(voted ~ age + educ + female + pb(leftright), data=dat, family=BI) eg2b <- residuals(vmod2b, type="partial", terms="leftright") g <- ggplot(mapping=aes(y=eg2b, x=dat$leftright)) + geom_point(pch=1) + geom_smooth(method="loess", se=TRUE) + coord_cartesian(ylim=c(-2,2)) + theme_bw() + labs(x="left-right", y="Component + Residual") ## ---- echo=FALSE, out.width="100%", fig.align="center"-------------------------------- g ## ----fig.align="center", out.width="45%"---------------------------------------------- plot(mm) ## ---- fig.align="center", out.width="80%"--------------------------------------------- wp(mm) ## ----fig.align="center", out.width="45%"---------------------------------------------- plot(vm) ## ---- fig.align="center", out.width="80%"--------------------------------------------- wp(vm) ## ------------------------------------------------------------------------------------- W3 <- Weakliem %>% mutate(orig = 1:nrow(Weakliem), weight=1) %>% dplyr::select(country, orig, secpay, gini, democrat, weight) %>% na.omit mod1 <- mod1o <- gamlss(secpay ~ gini*democrat, data=W3, weights=weight, trace=FALSE) devDiff <- 1 prevDev <- deviance(mod1) maxit <- 30 k <- 1 while(devDiff > 0 & k < maxit){ e <- residuals(mod1, type="simple") S2e <- sum(e^2)/mod1$df.residual se <- e/sqrt(S2e) w <- psi.bisquare(se) W3$weight <- w mod1 <- gamlss(secpay ~ gini*democrat, data=W3, weights = weight, trace=FALSE) devDiff <- abs(deviance(mod1) - prevDev) k <- k+1 } ## ---- echo=FALSE, fig.height=5, fig.width=8, out.width="75%", fig.align="center"------ library(ggeffects) p1 <- ggpredict(mod1o, terms=c("gini [n=25]", "democrat [0, 1]")) p2 <- ggpredict(mod1, terms=c("gini [n=25]", "democrat [0, 1]")) p1$model <- factor(1, levels=1:2, labels=c("Raw", "Weighted")) p2$model <- factor(2, levels=1:2, labels=c("Raw", "Weighted")) p <- bind_rows(p1, p2) ggplot(p, aes(x=x, y=predicted, fill=group, colour=group, ymin=conf.low, ymax=conf.high)) + geom_ribbon(alpha=.2, col="transparent") + scale_fill_manual(values=pal2, labels=c("Non-Democracy", "Democracy")) + scale_colour_manual(values=pal2, labels=c("Non-Democracy", "Democracy")) + geom_line() + facet_wrap(~model) + theme_bw() + mytheme() + theme(legend.position = "top") + labs(x="GINI", y="Secretary Pay Question", fill="Model Type", colour="Model Type") ## ------------------------------------------------------------------------------------- W3 %>% filter(weight < .9) %>% arrange(weight) ## ----rr1, echo=TRUE, eval=FALSE------------------------------------------------------- W3$r0 <- residuals(mod1o) W3$r1 <- residuals(mod1) ggplot(W3, aes(x=r0, y=r1)) + geom_point() + geom_abline(aes(linetype="45-degree Line", slope=1, intercept=0)) + theme_classic() + theme(legend.position="top") + labs(x="Original Model Residuals", y="Robust Model Residuals", linetype="")