


Study with the several resources on Docsity
Earn points by helping other students or get them with a premium plan
Prepare for your exams
Study with the several resources on Docsity
Earn points to download
Earn points by helping other students or get them with a premium plan
Tree-Based Methods - Exercise R code as soutution manual ISLR Introduction to Statistical Learning James, Witten, Hastie, Tibshirani
Typology: Exercises
1 / 4
This page cannot be seen from the preview
Don't miss anything!



library(ISLR) library(tree)
data(Hitters) tree.mod=tree(Salary~Years, data=Hitters) plot(tree.mod) text(tree.mod)
tree.unpr=tree(Salary~., data=Hitters) plot(tree.unpr) text(tree.unpr, pretty=0)
set.seed(100) Hitters=na.omit(Hitters) train=sample(1:nrow(Hitters), nrow(Hitters)/2) test=-train data.train=Hitters[train,] data.test=Hitters[test,] tree.mod=tree(Salary~., data=Hitters, subset=train) yhat=predict(tree.mod, data.test) test.error=mean((yhat-data.test$Salary)^2) test.error
cv.hitters=cv.tree(tree.mod) plot(cv.hitters$size, cv.hitters$dev, type="b") prune.hitters=prune.tree(tree.mod, best=3) plot(prune.hitters) text(prune.hitters, pretty=0) yhat=predict(prune.hitters, data.test) mean((yhat-data.test$Salary)^2)
library(randomForest) set.seed(100)
bag.hitters=randomForest(Salary~., data=Hitters, subset=train, mtry=19, importance=TRUE) bag.hitters yhat.bag=predict(bag.hitters, newdata=Hitters[-train,]) plot(yhat.bag, data.test$salary) abline(0,1) mean((yhat.bag-Hitters[-train,]$Salary)^2)
varImpPlot(bag.hitters)
rf.hitters=randomForest(Salary~., data=Hitters, subset=train, mtry=sqrt(19), importance=TRUE) yhat.rf=predict(rf.hitters, newdata=Hitters[-train,]) mean((yhat.rf-Hitters[-train,]$Salary)^2) varImpPlot(rf.hitters) importance(rf.hitters) ######## Boosting ####### library(gbm) set.seed(100) boost.hitters=gbm(Salary~., data=Hitters[train,], distribution="gaussian", n.tree=5000, interaction.depth=4) summary(boost.hitters) yhat.boost=predict(boost.hitters, newdata=Hitters[-train,], n.tree=5000) mean((yhat.boost-Hitters[-train,]$Salary)^2)
############################################################################## ################# Tree (Checkboard Data) ############## ############################################################################## #############################
############################# library(MASS) set.seed(100) mean.class0.1 <- c(2.5,2.5) mean.class0.2 <- c(7.5,7.5) mean.class1.1 <- c(2.5,7.5) mean.class1.2 <- c(7.5,2.5) sigma.cov <- matrix(c(1,0.2,0.2,4),nrow=2,byrow=T) data.class0.1 <- mvrnorm(20, mean.class0.1,sigma.cov) data.class0.2 <- mvrnorm(20, mean.class0.2,sigma.cov) data.class1.1 <- mvrnorm(20, mean.class1.1,sigma.cov) data.class1.2 <- mvrnorm(20, mean.class1.2,sigma.cov) #data.new <- mvrnorm(4,mean.class0,sigma.cov)
############################# plot(0,0, xlim=c(x.min-0.5,x.max+0.5),ylim=c(y.min- 0.5,y.max+0.5),type="n",xlab="x1",ylab="x2") points(data.class0.1[,1],data.class0.1[,2],pch="o") points(data.class0.2[,1],data.class0.2[,2],pch="o") points(data.class1.1[,1],data.class1.1[,2],pch="+") points(data.class1.2[,1],data.class1.2[,2],pch="+") #points(data.new[,1],data.new[,2],pch="?") abline(v=8.296) lines(c(0, 8.296),c(4.402,4.402)) lines(c(4.643,4.643),c(-2,4.402)) lines(c(4.531,4.531),c(4.402,14))