p165
library(ISLR)
dim(Caravan)
## [1] 5822 86
#View(Caravan)
attach(Caravan)
summary(Caravan$Purchase)
## No Yes
## 5474 348
348/5822 # 6% purchased Caravan insurance
## [1] 0.05977327
summary(Caravan[,1])
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 1.00 10.00 30.00 24.25 35.00 41.00
Now every column of standardized.X has a standard deviation of one and a mean of zero.
standardized.X=scale(Caravan [,-86]) # Produces matrix for all columns
summary(standardized.X[,1])
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## -1.8101 -1.1095 0.4473 0.0000 0.8365 1.3036
var(Caravan [ ,1])
## [1] 165.0378
var(Caravan [ ,2])
## [1] 0.1647078
var(standardized.X[,1])
## [1] 1
var(standardized.X[,2])
## [1] 1
mean(standardized.X[,1])
## [1] -7.025576e-17
test=1:1000
train.X=standardized.X[-test ,] # Exclude first 1000 rows
test.X=standardized.X[test ,]
train.Y=Caravan$Purchase [-test]
test.Y=Caravan$Purchase [test]
library(class) # knn
set.seed(1)
knn.pred=knn(train.X, test.X, train.Y, k=1)
mean(test.Y!=knn.pred) # 0.118 11.8%
## [1] 0.118
mean(test.Y!="No") # 0.059 6%
## [1] 0.059
table(knn.pred,test.Y) # Confusion Matrix
## test.Y
## knn.pred No Yes
## No 873 50
## Yes 68 9
# Yes 68 9
9/(68+9) # Success rate = 11.7%
## [1] 0.1168831
knn.pred=knn(train.X, test.X, train.Y, k=3)
table(knn.pred,test.Y) # Confusion Matrix
## test.Y
## knn.pred No Yes
## No 920 54
## Yes 21 5
# Yes 21 5
5/(21 + 5) # Success rate = 19.2%
## [1] 0.1923077
knn.pred=knn(train.X, test.X, train.Y, k=5)
table(knn.pred,test.Y) # Confusion Matrix
## test.Y
## knn.pred No Yes
## No 930 55
## Yes 11 4
# Yes 11 4
4/(11+4) # Success rate = 26%
## [1] 0.2666667
glm.fits=glm(Purchase~.,data=Caravan ,family=binomial, subset=-test)
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
glm.probs=predict(glm.fits,Caravan[test,], type="response")
glm.pred=rep("No",1000) # The 1000 in test
glm.pred[glm.probs >.5]="Yes"
table(glm.pred,test.Y)
## test.Y
## glm.pred No Yes
## No 934 59
## Yes 7 0
Yes 7 0
glm.pred=rep("No",1000) # The 1000 in test
glm.pred[glm.probs >.25]="Yes"
table(glm.pred,test.Y)
## test.Y
## glm.pred No Yes
## No 919 48
## Yes 22 11
#Yes 22 11
11/(22+11) # Success rate = 33.3%
## [1] 0.3333333