#https://stats.oarc.ucla.edu/r/dae/tobit-models/#:~:text=Tobit%20Models%20%7C%20R%20Data%20Analysis%20Examples%20The,known%20as%20censoring%20from%20below%20and%20above%2C%20respectively%29. require(ggplot2) require(GGally) require(VGAM) dat <- read.csv("https://stats.idre.ucla.edu/stat/data/tobit.csv") summary(dat) # function that gives the density of normal distribution # for given mean and sd, scaled to be on a count metric # for the histogram: count = density * sample size * bin width f <- function(x, var, bw = 15) { dnorm(x, mean = mean(var), sd(var)) * length(var) * bw } # setup base plot p <- ggplot(dat, aes(x = apt, fill=prog)) # histogram, coloured by proportion in different programs # with a normal distribution overlayed p + stat_bin(binwidth=15) + stat_function(fun = f, size = 1, args = list(var = dat$apt)) p + stat_bin(binwidth = 1) + stat_function(fun = f, size = 1, args = list(var = dat$apt, bw = 1)) cor(dat[, c("read", "math", "apt")]) # plot matrix ggpairs(dat[, c("read", "math", "apt")]) #Tobit regression #Below we run the tobit model, using the vglm function #of the VGAM package. summary(m <- vglm(apt ~ read + math + prog, tobit(Upper = 800), data = dat)) ctable <- coef(summary(m)) pvals <- 2 * pt(abs(ctable[, "z value"]), df.residual(m), lower.tail = FALSE) cbind(ctable, pvals) m2 <- vglm(apt ~ read + math, tobit(Upper = 800), data = dat) (p <- pchisq(2 * (logLik(m) - logLik(m2)), df = 2, lower.tail = FALSE)) b <- coef(m) se <- sqrt(diag(vcov(m))) cbind(LL = b - qnorm(0.975) * se, UL = b + qnorm(0.975) * se) dat$yhat <- fitted(m)[,1] dat$rr <- resid(m, type = "response") dat$rp <- resid(m, type = "pearson")[,1] par(mfcol = c(2, 3)) with(dat, { plot(yhat, rr, main = "Fitted vs Residuals") qqnorm(rr) plot(yhat, rp, main = "Fitted vs Pearson Residuals") qqnorm(rp) plot(apt, rp, main = "Actual vs Pearson Residuals") plot(apt, yhat, main = "Actual vs Fitted") }) # correlation (r <- with(dat, cor(yhat, apt))) # variance accounted for r^2