########################### # MS-C1620 # Statistical inference # Lecture 8 library(MASS) library(KernSmooth) ####################################### # Kernel smoothing # Motorcycle data x <- mcycle\$times y <- mcycle\$accel plot(x,y) # Linear regression model does not seem very good linmodel <- lm(y ~ x) linmodel abline(linmodel) # Difficult to claim that a parametric model would be good. # Try nonparametric ones. # Nadaraya-Watson = simple weighted average of nearby points kernelmodel5 <- locpoly(x, y, degree=0, bandwidth=5) lines(kernelmodel5, col="red") # Try different bandwidths kernelmodel20 <- locpoly(x, y, degree=0, bandwidth=20) lines(kernelmodel20, col="blue") kernelmodel1 <- locpoly(x, y, degree=0, bandwidth=1) lines(kernelmodel1, col="magenta") kernelmodel1 <- locpoly(x, y, degree=0, bandwidth=0.1) lines(kernelmodel1, col="black") # Local linear regression plot(x,y) loclinmodel5 <- locpoly(x, y, degree=1, bandwidth=5) lines(loclinmodel5, col="red") # seems too wide smoothing window (bandwidth) # try smaller bandwidth loclinmodel2 <- locpoly(x, y, degree=1, bandwidth=2) lines(loclinmodel2, col="magenta") # Demonstrating edge effects, NW versus local linear x <- seq(from=0, to=1.75*pi, length.out=20) y <- sin(x) h <- 0.5 range <- c(0, 2.25*pi) plot(x,y, xlim=range) nwmodel <- locpoly(x, y, degree=0, bandwidth=h, range.x=range) lines(nwmodel, col="blue") llmodel <- locpoly(x, y, degree=1, bandwidth=h, range.x=range) lines(llmodel, col="red") ####################################### # Kernel density estimation # KDE example 1: A very small sample from uniform distribution on [10,20] n <- 5 range <- c(10,20) x <- runif(n, 10, 20) h <- 0.2 densitymodel <- bkde(x, bandwidth=h, range.x=range) plot(x, rep(0,n), xlim=range, ylim=c(0,max(densitymodel\$y))) lines(densitymodel) # try increasing bandwidth to eg. 1 # try increasing n to eg. 20 or 300 # KDE example 2: Sample from an exponential distribution n <- 200 x <- rexp(n, rate=5) h <- 0.05 densitymodel <- bkde(x, bandwidth=h) plot(x, rep(0,n), ylim=c(0,max(densitymodel\$y))) lines(densitymodel) # KDE example 3: Old Faithful eruption lengths (a bimodal distribution) x <- faithful\$eruptions n <- length(x) h <- 0.2 densitymodel <- bkde(x, bandwidth=h) plot(x, rep(0,n), ylim=c(0,max(densitymodel\$y))) lines(densitymodel) # try changing bandwidth