



Study with the several resources on Docsity
Earn points by helping other students or get them with a premium plan
Prepare for your exams
Study with the several resources on Docsity
Earn points to download
Earn points by helping other students or get them with a premium plan
Unsupervised Learning - Exercise R code as soutution manual ISLR Introduction to Statistical Learning James, Witten, Hastie, Tibshirani
Typology: Exercises
1 / 6
This page cannot be seen from the preview
Don't miss anything!




set.seed(1000) x1 <- runif(100,-2,2) x2 <- x1 + rnorm(100, 0, 1) y1 <- runif(100,-2,2) y2 <- runif(100,-2,2)
library(stats) library(MASS) par(mfrow=c(1,2)) x.result <- princomp(cbind(x1,x2),cor=TRUE) eqscplot(x1,x2) # draw x-y scatterplot in the same scale pc1 <- x.result$loading[,1] pc2 <- x.result$loading[,2] abline(1/pc1[2],-pc1[1]/pc1[2], lty=2,col="red") abline(1/pc2[2],-pc2[1]/pc2[2], lty=1) y.result <- princomp(cbind(y1,y2),cor=TRUE) y.result$loading[,1] y.result$loading[,2] eqscplot(y1,y2) abline(0.5/y.result$loading[,1][1],-y.result$loading[,1][1]/ y.result$loading[,1][2],lty=2) abline(0.5/y.result$loading[,2][1],-y.result$loading[,2][1]/ y.result$loading[,2][2],lty=1)
x3 <- x1* x3.result <- princomp(cbind(x3,x2),cor=FALSE) eqscplot(x3,x2) pc1 <- x3.result$loading[,1] pc2 <- x3.result$loading[,2] abline(1/pc1[2],-pc1[1]/pc1[2]) abline(1/pc2[2],-pc2[1]/pc2[2]) ############################
############################
library(MASS) library(stats) data(iris)
names(iris) #$"Sepal.Length" "Sepal.Width" "Petal.Length" "Petal.Width" "Species" attach(iris) eqscplot(cbind(Sepal.Length,Sepal.Width),pch=c(1,2,3)[Species],col=c("red", "green", "blue")[Species], xlab="Sepal Length", ylab="Sepal Width", main="Sepal: Width vs. Length") legend(6.5, 4.55, legend=c("setosa","versicolor","virginica"),pch=c(1,2,3), col=c("red", "green", "blue"),cex=0.8) eqscplot(cbind(Petal.Length,Petal.Width),pch=c(1,2,3)[Species],col=c("red", "green", "blue")[Species], xlab="Petal Length", ylab="Petal Width", main="Petal: Width vs. Length") legend(1.5, 3, legend=c("setosa","versicolor","virginica"),pch=c(1,2,3), col=c("red", "green", "blue"),cex=0.8)
iris.pca <- princomp(iris[,1:4], cor=FALSE) round(loadings(iris.pca)[,1:4],8) ## keep enough decimal points summary(iris.pca)
biplot(iris.pca) eqscplot(iris.pca$scores[,1], iris.pca$scores[,2], xlab="The First PC", ylab="The Second PC", pch=c(1,2,3)[Species],col=c("red", "green", "blue")[Species]) legend(1.5, 3, legend=c("setosa","versicolor","virginica"),pch=c(1,2,3), col=c("red", "green", "blue"),cex=0.8)
pr.var=iris.pca$sdev^ pve=pr.var/sum(pr.var) par(mfrow=c(1,2)) plot(pve, xlab="Principal Component", ylab="Proportion of Variance Explained", ylim=c(0,1), type="b") plot(cumsum(pve), xlab="Principal Component", ylab="Cumulative Proportion of Variance Explained", ylim=c(0,1), type="b")
##################################### library(stats) data <- source("/Users/xwang/Documents/Teaching at StFX/STAT472/Winter 2013/Data Sets/checktr.txt") checker_train <- data$value
par(mfrow=c(1,2)) x.min <- min(checker_train[,1]) x.max <- max(checker_train[,1]) y.min <- min(checker_train[,2]) y.max <- max(checker_train[,2]) plot(0,0, xlim=c(x.min-0.5,x.max+0.5),ylim=c(y.min- 0.5,y.max+0.5),type="n",xlab="x1",ylab="x2") points(checker_train[checker_train[,3]==0,1:2],pch="o") points(checker_train[checker_train[,3]==1,1:2],pch="+")
x.dist <- dist(checker_train[,1:2], method = "euclidean") x.hclust <- hclust(x.dist, method="single") x <- checker_train[,1:2] y <- checker_train[,3]
l <- paste(dimnames(x)[[1]], " (", round(x[,1],1), ",", round(x[,2],1),")", c("0","1")[as.factor(y)], sep="")
plclust(x.hclust, label=l, hang=-1, main ="", sub ="", frame.plot= TRUE, xlab = "", ylab ="Dissimilarity") ################# Representation ######### par(mfrow=c(2,2)) l <- paste(dimnames(x)[[1]], " (", round(x[,1],1), ",", round(x[,2],1),")", c("0","1")[as.factor(y)], sep="")
x.hclust.s <- hclust(x.dist, method="single")
plclust(x.hclust.s, label=l, hang=-1,sub ="", frame.plot= TRUE, xlab = "", ylab ="Dissimilarity", main="Single Linkage")
x.hclust.c <- hclust(x.dist, method="complete")
plclust(x.hclust.c, label=l, hang=-1, sub ="", frame.plot= TRUE, xlab = "", ylab ="Dissimilarity", main="Complete Linkage")
x.hclust.a <- hclust(x.dist, method="average")
plclust(x.hclust.a, label=l, hang=-1, sub ="", frame.plot= TRUE, xlab = "", ylab ="Dissimilarity", main="Average Linkage") ################# Class labels ############ par(mfrow=c(2,2))
plot(0,0, xlim=c(x.min-0.5,x.max+0.5),ylim=c(y.min- 0.5,y.max+0.5),type="n",xlab="x1",ylab="x2") points(checker_train[checker_train[,3]==0,1:2],pch="o") points(checker_train[checker_train[,3]==1,1:2],pch="+")
clus <- cutree(x.hclust.s,2)
points(checker_train[clus==2,1:2],pch=5,cex=par()$cex*3)
plot(0,0, xlim=c(x.min-0.5,x.max+0.5),ylim=c(y.min- 0.5,y.max+0.5),type="n",xlab="x1",ylab="x2") points(checker_train[checker_train[,3]==0,1:2],pch="o") points(checker_train[checker_train[,3]==1,1:2],pch="+")
clus <- cutree(x.hclust.c,2)
points(checker_train[clus==2,1:2],pch=5,cex=par()$cex*3)