Introduction to Statistical Learning ISLR Chapter 9 Solutions Code, Exercises of Statistics

Support Vector Machines - Exercise R code as soutution manual ISLR Introduction to Statistical Learning James, Witten, Hastie, Tibshirani

Typology: Exercises

2020/2021

Uploaded on 05/26/2021

eklavya
eklavya 🇺🇸

4.5

(23)

266 documents

1 / 10

Toggle sidebar

This page cannot be seen from the preview

Don't miss anything!

bg1
#
# Written by:
# --
# John L. Weatherwax 2009-04-21
#
#
# Please send comments and especially bug reports to the
# above email address.
#
# EPage 387
#
#-----
save_plots = F
if( ! require("ISLR") ){ install.packages("ISLR") }
if( ! require("e1071") ){ install.packages("e1071") }
set.seed(0)
vt100ClearScreen <- function(...) cat("\033[2J")
vt100ClearScreen()
DF = data.frame( x1=c(3,2,4,1,2,4,4), x2=c(4,2,4,4,1,3,1),
y=as.factor(c(rep(1,4),rep(0,3))) )
colors = c( rep('red',4), rep('blue',3) )
# Use the svm code to find the linear boundary (take the cost of a margin
violation to be very large):
svm.fit = svm( y ~ ., data=DF, kernel="linear", cost=1e6, scale=FALSE )
print( summary( svm.fit ) )
# Use the output from svm.fit to compute/extract the linear decision boundary:
#
# For some reason this did not work ... not sure why. If anyone knows please
contact me.
#
# Its based on the decomposition:
#
# f(x) = beta_0 + \sum_{i \in S} \alpha_i < x, x_i >
#
# given in the text and using the results from the svm call to get the support
vectors via "index".
#
if( FALSE ){
beta_0 = svm.fit$coef0
beta_x1 = sum( svm.fit$coefs * DF[ svm.fit$index, ]$x1 )
beta_x2 = sum( svm.fit$coefs * DF[ svm.fit$index, ]$x2 )
# Plot the decision boundary with
#abline( a=-beta_0/beta_x1, b=-beta_x2/beta_x1 )
}
# From a plot of the points the decision boundary is given by the line with:
#
slope = ( 3.5 - 1.5 ) / ( 4 - 2 )
pf3
pf4
pf5
pf8
pf9
pfa

Partial preview of the text

Download Introduction to Statistical Learning ISLR Chapter 9 Solutions Code and more Exercises Statistics in PDF only on Docsity!

Written by:

--

John L. Weatherwax 2009-04-

email: [email protected]

Please send comments and especially bug reports to the

above email address.

EPage 387

#----- save_plots = F if(! require("ISLR") ){ install.packages("ISLR") } if(! require("e1071") ){ install.packages("e1071") } set.seed(0) vt100ClearScreen <- function(...) cat("\033[2J") vt100ClearScreen() DF = data.frame( x1=c(3,2,4,1,2,4,4), x2=c(4,2,4,4,1,3,1), y=as.factor(c(rep(1,4),rep(0,3))) ) colors = c( rep('red',4), rep('blue',3) )

Use the svm code to find the linear boundary (take the cost of a margin

violation to be very large): svm.fit = svm( y ~ ., data=DF, kernel="linear", cost=1e6, scale=FALSE ) print( summary( svm.fit ) )

Use the output from svm.fit to compute/extract the linear decision boundary:

For some reason this did not work ... not sure why. If anyone knows please

contact me.

Its based on the decomposition:

f(x) = beta_0 + \sum_{i \in S} \alpha_i < x, x_i >

given in the text and using the results from the svm call to get the support

vectors via "index".

if( FALSE ){ beta_0 = svm.fit$coef beta_x1 = sum( svm.fit$coefs * DF[ svm.fit$index, ]$x1 ) beta_x2 = sum( svm.fit$coefs * DF[ svm.fit$index, ]$x2 )

Plot the decision boundary with

#abline( a=-beta_0/beta_x1, b=-beta_x2/beta_x1 ) }

From a plot of the points the decision boundary is given by the line with:

slope = ( 3.5 - 1.5 ) / ( 4 - 2 )

intercept = -2 * slope + 1.

Compute the two margin lines:

slope_m_upper = slope intercept_m_upper = -2 * slope_m_upper + 2 slope_m_lower = slope intercept_m_lower = -2 * slope_m_lower + 1 if( save_plots ){ postscript("../../WriteUp/Graphics/Chapter9/prob_3_plot.eps", onefile=FALSE, horizontal=FALSE) } plot( DF$x1, DF$x2, col=colors, pch=19, cex=2.0, xlab='X_1', ylab='X_2', main='' ) grid() if( save_plots ){ dev.off() } if( save_plots ){ postscript("../../WriteUp/Graphics/Chapter9/prob_3_plot_with_SVM_lines.eps", onefile=FALSE, horizontal=FALSE) } plot( DF$x1, DF$x2, col=colors, pch=19, cex=2.0, xlab='X_1', ylab='X_2', main='' ) abline(a=intercept,b=slope,col='black') abline(a=intercept_m_upper,b=slope_m_upper,col='gray',lty=2,lwd=2) abline(a=intercept_m_lower,b=slope_m_lower,col='gray',lty=2,lwd=2) grid() if( save_plots ){ dev.off() }

print( sprintf('Linear logistic regression training error rate= %10.6f', 1 - sum( predicted_class == y )/length(y) ) ) if( save_plots ){ postscript("../../WriteUp/Graphics/Chapter9/prob_5_linear_LR_predicted_class.e ps", onefile=FALSE, horizontal=FALSE) } plot( x1, x2, col=(predicted_class+1), pch=19, cex=1.05, xlab='x1', ylab='x2', main='logistic regression: y ~ x1 + x2' ) grid() if( save_plots ){ dev.off() }

Part (e-f): Using logistic regression to fit a nonlinear model:

m = glm( y ~ x1 + x2 + I(x1^2) + I(x2^2) + I(x1*x2), data=DF, family="binomial" ) y_hat = predict( m, newdata=data.frame(x1=x1,x2=x2), type="response" ) predicted_class = 1 * ( y_hat > 0.5 ) print( sprintf('Non-linear logistic regression training error rate= %10.6f', 1 - sum( predicted_class == y )/length(y) ) ) if( save_plots ){ postscript("../../WriteUp/Graphics/Chapter9/prob_5_nonlinear_LR_predicted_clas s.eps", onefile=FALSE, horizontal=FALSE) } plot( x1, x2, col=(predicted_class+1), pch=19, cex=1.05, xlab='x1', ylab='x2' ) grid() if( save_plots ){ dev.off() }

Part (g): Fit a linear SVM to the data and report the error rate:

dat = data.frame( x1=x1, x2=x2, y=as.factor(y) )

Do CV to select the value of cost using the "tune" function:

tune.out = tune( svm, y ~ ., data=dat, kernel="linear", ranges=list( cost=c(0.001, 0.01, 0.1, 1, 5, 10, 100, 1000) ) ) print( summary(tune.out) ) # <- use this output to select the optimal cost value

The best model is stored in "tune.out$best.model"

print(tune.out$best.model) y_hat = predict( tune.out$best.model, newdata=data.frame(x1=x1,x2=x2) ) y_hat = as.numeric( as.character( y_hat ) ) # convert factor responses into numerical values print( sprintf('Linear SVM training error rate= %10.6f', 1 - sum( y_hat == y )/length(y) ) ) if( save_plots ){ postscript("../../WriteUp/Graphics/Chapter9/prob_5_linear_SVM_predicted_class. eps", onefile=FALSE, horizontal=FALSE) } plot( x1, x2, col=(y_hat+1), pch=19, cex=1.05, xlab='x1', ylab='x2' ) grid() if( save_plots ){ dev.off() }

Part (h-i): Fit a linear SVM to the data and report the error rate:

Do CV to select the value of cost using the "tune" function:

tune.out = tune( svm, y ~ ., data=dat, kernel="radial", ranges = list( cost=c(0.001, 0.01, 0.1, 1, 5, 10, 100, 1000), gamma=c(0.5,1,2,3,4) ) ) print( summary(tune.out) ) # <- use this output to select the optimal cost value

The best model is stored in "tune.out$best.model"

print(tune.out$best.model) y_hat = predict( tune.out$best.model, newdata=data.frame(x1=x1,x2=x2) ) y_hat = as.numeric( as.character( y_hat ) ) # convert factor responses into numerical values print( sprintf('Nonlinear SVM training error rate= %10.6f', 1 - sum( y_hat == y )/length(y) ) ) if( save_plots ){ postscript("../../WriteUp/Graphics/Chapter9/prob_5_nonlinear_SVM_predicted_cla ss.eps", onefile=FALSE, horizontal=FALSE) } plot( x1, x2, col=(y_hat+1), pch=19, cex=1.05, xlab='x1', ylab='x2' ) grid() if( save_plots ){ dev.off() }

Written by:

--

John L. Weatherwax 2009-04-

email: [email protected]

Please send comments and especially bug reports to the

above email address.

EPage 386

#----- save_plots = F if(! require("MASS") ){ install.packages("MASS") } if(! require("e1071") ){ install.packages("e1071") } set.seed(0) vt100ClearScreen <- function(...) cat("\033[2J") vt100ClearScreen() Auto = read.csv("../../Data/Auto.csv",header=T,na.strings="?") Auto = na.omit(Auto) Auto$name = NULL

Part (a):

if( TRUE ){ AbvMedian = rep( 0, dim(Auto)[1] ) # 0 => less than the median of mpg AbvMedian[ Auto$mpg > median(Auto$mpg) ] = 1 # 1 => greater than the median of mpg }else{ AbvMedian = rep( "LT", dim(Auto)[1] ) AbvMedian[ Auto$mpg > median(Auto$mpg) ] = "GT" } AbvMedian = as.factor(AbvMedian) Auto$AbvMedian = AbvMedian Auto$mpg = NULL

Part (b):

tune.out = tune( svm, AbvMedian ~ ., data=Auto, kernel="linear", ranges=list( cost=c(0.001, 0.01, 0.1, 1, 5, 10, 100, 1000) ) ) print( summary(tune.out) ) # <- use this output to select the optimal cost value

Some plots to explore with:

if( save_plots ){ postscript("../../WriteUp/Graphics/Chapter9/prob_7_linear_SVM_weight_vs_horsep ower.eps", onefile=FALSE, horizontal=FALSE) } plot( tune.out$best.model, Auto, weight ~ horsepower ) if( save_plots ){ dev.off() }

if( save_plots ){ postscript("../../WriteUp/Graphics/Chapter9/prob_7_linear_SVM_cylinders_vs_yea r.eps", onefile=FALSE, horizontal=FALSE) } plot( tune.out$best.model, Auto, cylinders ~ year ) if( save_plots ){ dev.off() } plot( tune.out$best.model, Auto, cylinders ~ horsepower )

Part (c) radial kernel:

tune.out = tune( svm, AbvMedian ~ ., data=Auto, kernel="radial", ranges = list( cost=c(0.001, 0.01, 0.1, 1, 5, 10, 100, 1000), gamma=c(0.5,1,2,3,4) ) ) print( summary(tune.out) ) # <- use this output to select the optimal cost value plot( tune.out$best.model, Auto, weight ~ horsepower ) plot( tune.out$best.model, Auto, cylinders ~ year )

Part (c) polynomial kernel:

tune.out = tune( svm, AbvMedian ~ ., data=Auto, kernel="polynomial", ranges = list( cost=c(0.001, 0.01, 0.1, 1, 5, 10, 100, 1000), degree=c(1,2,3,4,5) ) ) print( summary(tune.out) ) # <- use this output to select the optimal cost value

print( summary(tune.out) ) # <- use this output to select the optimal cost value

Part (e): Predict the performance on training and testing using the best

linear model:

y_hat = predict( tune.out$best.model, newdata=OJ[train_inds,] ) print( table( predicted=y_hat, truth=OJ[train_inds,]$Purchase ) ) print( sprintf('Linear SVM training error rate (optimal cost=%.2f)= %10.6f', tune.out$best.model$cost, 1 - sum( y_hat == OJ[train_inds,]$Purchase ) / n_train ) ) y_hat = predict( tune.out$best.model, newdata=OJ[test_inds,] ) print( table( predicted=y_hat, truth=OJ[test_inds,]$Purchase ) ) print( sprintf('Linear SVM testing error rate (optimal cost=%.2f)= %10.6f', tune.out$best.model$cost, 1 - sum( y_hat == OJ[test_inds,]$Purchase ) / n_test) )

Part (f): Use a radial kernel:

tune.out = tune( svm, Purchase ~ ., data=OJ[train_inds,], kernel="radial", ranges = list( cost=c(0.001, 0.01, 0.1, 1, 5, 10, 100, 1000), gamma=c(0.5,1,2,3,4) ) ) print( summary(tune.out) ) # <- use this output to select the optimal cost value y_hat = predict( tune.out$best.model, newdata=OJ[train_inds,] ) print( table( predicted=y_hat, truth=OJ[train_inds,]$Purchase ) ) print( sprintf('Radial SVM training error rate (optimal)= %10.6f', 1 - sum( y_hat == OJ[train_inds,]$Purchase ) / n_train ) ) y_hat = predict( tune.out$best.model, newdata=OJ[test_inds,] ) print( table( predicted=y_hat, truth=OJ[test_inds,]$Purchase ) ) print( sprintf('Radial SVM testing error rate (optimal)= %10.6f', 1 - sum( y_hat == OJ[test_inds,]$Purchase ) / n_test) )

Part (g): Use a polynomial kernel:

tune.out = tune( svm, Purchase ~ ., data=OJ[train_inds,], kernel="polynomial", ranges = list( cost=c(0.001, 0.01, 0.1, 1, 5, 10, 100, 1000), degree=c(1, 2,

  1. ) ) print( summary(tune.out) ) # <- use this output to select the optimal cost value y_hat = predict( tune.out$best.model, newdata=OJ[train_inds,] ) print( table( predicted=y_hat, truth=OJ[train_inds,]$Purchase ) ) print( sprintf('Polynomial SVM training error rate (optimal)= %10.6f', 1 - sum( y_hat == OJ[train_inds,]$Purchase ) / n_train ) ) y_hat = predict( tune.out$best.model, newdata=OJ[test_inds,] ) print( table( predicted=y_hat, truth=OJ[test_inds,]$Purchase ) ) print( sprintf('Polynomial SVM testing error rate (optimal)= %10.6f', 1 - sum( y_hat == OJ[test_inds,]$Purchase ) / n_test) )