






Study with the several resources on Docsity
Earn points by helping other students or get them with a premium plan
Prepare for your exams
Study with the several resources on Docsity
Earn points to download
Earn points by helping other students or get them with a premium plan
Support Vector Machines - Exercise R code as soutution manual ISLR Introduction to Statistical Learning James, Witten, Hastie, Tibshirani
Typology: Exercises
1 / 10
This page cannot be seen from the preview
Don't miss anything!







#----- save_plots = F if(! require("ISLR") ){ install.packages("ISLR") } if(! require("e1071") ){ install.packages("e1071") } set.seed(0) vt100ClearScreen <- function(...) cat("\033[2J") vt100ClearScreen() DF = data.frame( x1=c(3,2,4,1,2,4,4), x2=c(4,2,4,4,1,3,1), y=as.factor(c(rep(1,4),rep(0,3))) ) colors = c( rep('red',4), rep('blue',3) )
violation to be very large): svm.fit = svm( y ~ ., data=DF, kernel="linear", cost=1e6, scale=FALSE ) print( summary( svm.fit ) )
contact me.
vectors via "index".
if( FALSE ){ beta_0 = svm.fit$coef beta_x1 = sum( svm.fit$coefs * DF[ svm.fit$index, ]$x1 ) beta_x2 = sum( svm.fit$coefs * DF[ svm.fit$index, ]$x2 )
#abline( a=-beta_0/beta_x1, b=-beta_x2/beta_x1 ) }
slope = ( 3.5 - 1.5 ) / ( 4 - 2 )
intercept = -2 * slope + 1.
slope_m_upper = slope intercept_m_upper = -2 * slope_m_upper + 2 slope_m_lower = slope intercept_m_lower = -2 * slope_m_lower + 1 if( save_plots ){ postscript("../../WriteUp/Graphics/Chapter9/prob_3_plot.eps", onefile=FALSE, horizontal=FALSE) } plot( DF$x1, DF$x2, col=colors, pch=19, cex=2.0, xlab='X_1', ylab='X_2', main='' ) grid() if( save_plots ){ dev.off() } if( save_plots ){ postscript("../../WriteUp/Graphics/Chapter9/prob_3_plot_with_SVM_lines.eps", onefile=FALSE, horizontal=FALSE) } plot( DF$x1, DF$x2, col=colors, pch=19, cex=2.0, xlab='X_1', ylab='X_2', main='' ) abline(a=intercept,b=slope,col='black') abline(a=intercept_m_upper,b=slope_m_upper,col='gray',lty=2,lwd=2) abline(a=intercept_m_lower,b=slope_m_lower,col='gray',lty=2,lwd=2) grid() if( save_plots ){ dev.off() }
print( sprintf('Linear logistic regression training error rate= %10.6f', 1 - sum( predicted_class == y )/length(y) ) ) if( save_plots ){ postscript("../../WriteUp/Graphics/Chapter9/prob_5_linear_LR_predicted_class.e ps", onefile=FALSE, horizontal=FALSE) } plot( x1, x2, col=(predicted_class+1), pch=19, cex=1.05, xlab='x1', ylab='x2', main='logistic regression: y ~ x1 + x2' ) grid() if( save_plots ){ dev.off() }
m = glm( y ~ x1 + x2 + I(x1^2) + I(x2^2) + I(x1*x2), data=DF, family="binomial" ) y_hat = predict( m, newdata=data.frame(x1=x1,x2=x2), type="response" ) predicted_class = 1 * ( y_hat > 0.5 ) print( sprintf('Non-linear logistic regression training error rate= %10.6f', 1 - sum( predicted_class == y )/length(y) ) ) if( save_plots ){ postscript("../../WriteUp/Graphics/Chapter9/prob_5_nonlinear_LR_predicted_clas s.eps", onefile=FALSE, horizontal=FALSE) } plot( x1, x2, col=(predicted_class+1), pch=19, cex=1.05, xlab='x1', ylab='x2' ) grid() if( save_plots ){ dev.off() }
dat = data.frame( x1=x1, x2=x2, y=as.factor(y) )
tune.out = tune( svm, y ~ ., data=dat, kernel="linear", ranges=list( cost=c(0.001, 0.01, 0.1, 1, 5, 10, 100, 1000) ) ) print( summary(tune.out) ) # <- use this output to select the optimal cost value
print(tune.out$best.model) y_hat = predict( tune.out$best.model, newdata=data.frame(x1=x1,x2=x2) ) y_hat = as.numeric( as.character( y_hat ) ) # convert factor responses into numerical values print( sprintf('Linear SVM training error rate= %10.6f', 1 - sum( y_hat == y )/length(y) ) ) if( save_plots ){ postscript("../../WriteUp/Graphics/Chapter9/prob_5_linear_SVM_predicted_class. eps", onefile=FALSE, horizontal=FALSE) } plot( x1, x2, col=(y_hat+1), pch=19, cex=1.05, xlab='x1', ylab='x2' ) grid() if( save_plots ){ dev.off() }
tune.out = tune( svm, y ~ ., data=dat, kernel="radial", ranges = list( cost=c(0.001, 0.01, 0.1, 1, 5, 10, 100, 1000), gamma=c(0.5,1,2,3,4) ) ) print( summary(tune.out) ) # <- use this output to select the optimal cost value
print(tune.out$best.model) y_hat = predict( tune.out$best.model, newdata=data.frame(x1=x1,x2=x2) ) y_hat = as.numeric( as.character( y_hat ) ) # convert factor responses into numerical values print( sprintf('Nonlinear SVM training error rate= %10.6f', 1 - sum( y_hat == y )/length(y) ) ) if( save_plots ){ postscript("../../WriteUp/Graphics/Chapter9/prob_5_nonlinear_SVM_predicted_cla ss.eps", onefile=FALSE, horizontal=FALSE) } plot( x1, x2, col=(y_hat+1), pch=19, cex=1.05, xlab='x1', ylab='x2' ) grid() if( save_plots ){ dev.off() }
#----- save_plots = F if(! require("MASS") ){ install.packages("MASS") } if(! require("e1071") ){ install.packages("e1071") } set.seed(0) vt100ClearScreen <- function(...) cat("\033[2J") vt100ClearScreen() Auto = read.csv("../../Data/Auto.csv",header=T,na.strings="?") Auto = na.omit(Auto) Auto$name = NULL
if( TRUE ){ AbvMedian = rep( 0, dim(Auto)[1] ) # 0 => less than the median of mpg AbvMedian[ Auto$mpg > median(Auto$mpg) ] = 1 # 1 => greater than the median of mpg }else{ AbvMedian = rep( "LT", dim(Auto)[1] ) AbvMedian[ Auto$mpg > median(Auto$mpg) ] = "GT" } AbvMedian = as.factor(AbvMedian) Auto$AbvMedian = AbvMedian Auto$mpg = NULL
tune.out = tune( svm, AbvMedian ~ ., data=Auto, kernel="linear", ranges=list( cost=c(0.001, 0.01, 0.1, 1, 5, 10, 100, 1000) ) ) print( summary(tune.out) ) # <- use this output to select the optimal cost value
if( save_plots ){ postscript("../../WriteUp/Graphics/Chapter9/prob_7_linear_SVM_weight_vs_horsep ower.eps", onefile=FALSE, horizontal=FALSE) } plot( tune.out$best.model, Auto, weight ~ horsepower ) if( save_plots ){ dev.off() }
if( save_plots ){ postscript("../../WriteUp/Graphics/Chapter9/prob_7_linear_SVM_cylinders_vs_yea r.eps", onefile=FALSE, horizontal=FALSE) } plot( tune.out$best.model, Auto, cylinders ~ year ) if( save_plots ){ dev.off() } plot( tune.out$best.model, Auto, cylinders ~ horsepower )
tune.out = tune( svm, AbvMedian ~ ., data=Auto, kernel="radial", ranges = list( cost=c(0.001, 0.01, 0.1, 1, 5, 10, 100, 1000), gamma=c(0.5,1,2,3,4) ) ) print( summary(tune.out) ) # <- use this output to select the optimal cost value plot( tune.out$best.model, Auto, weight ~ horsepower ) plot( tune.out$best.model, Auto, cylinders ~ year )
tune.out = tune( svm, AbvMedian ~ ., data=Auto, kernel="polynomial", ranges = list( cost=c(0.001, 0.01, 0.1, 1, 5, 10, 100, 1000), degree=c(1,2,3,4,5) ) ) print( summary(tune.out) ) # <- use this output to select the optimal cost value
print( summary(tune.out) ) # <- use this output to select the optimal cost value
linear model:
y_hat = predict( tune.out$best.model, newdata=OJ[train_inds,] ) print( table( predicted=y_hat, truth=OJ[train_inds,]$Purchase ) ) print( sprintf('Linear SVM training error rate (optimal cost=%.2f)= %10.6f', tune.out$best.model$cost, 1 - sum( y_hat == OJ[train_inds,]$Purchase ) / n_train ) ) y_hat = predict( tune.out$best.model, newdata=OJ[test_inds,] ) print( table( predicted=y_hat, truth=OJ[test_inds,]$Purchase ) ) print( sprintf('Linear SVM testing error rate (optimal cost=%.2f)= %10.6f', tune.out$best.model$cost, 1 - sum( y_hat == OJ[test_inds,]$Purchase ) / n_test) )
tune.out = tune( svm, Purchase ~ ., data=OJ[train_inds,], kernel="radial", ranges = list( cost=c(0.001, 0.01, 0.1, 1, 5, 10, 100, 1000), gamma=c(0.5,1,2,3,4) ) ) print( summary(tune.out) ) # <- use this output to select the optimal cost value y_hat = predict( tune.out$best.model, newdata=OJ[train_inds,] ) print( table( predicted=y_hat, truth=OJ[train_inds,]$Purchase ) ) print( sprintf('Radial SVM training error rate (optimal)= %10.6f', 1 - sum( y_hat == OJ[train_inds,]$Purchase ) / n_train ) ) y_hat = predict( tune.out$best.model, newdata=OJ[test_inds,] ) print( table( predicted=y_hat, truth=OJ[test_inds,]$Purchase ) ) print( sprintf('Radial SVM testing error rate (optimal)= %10.6f', 1 - sum( y_hat == OJ[test_inds,]$Purchase ) / n_test) )
tune.out = tune( svm, Purchase ~ ., data=OJ[train_inds,], kernel="polynomial", ranges = list( cost=c(0.001, 0.01, 0.1, 1, 5, 10, 100, 1000), degree=c(1, 2,