p54
college = read.csv("College.csv")
# attach(College)
fix function
{r, results=‘hide’}
rownames(college)=college[,1] ### Set rownames equal to first column
Remove column 1 because we assigned to rownames.
# college[,-1]
college=college[,-1] #
summary(college)
## Private Apps Accept Enroll
## Length:777 Min. : 81 Min. : 72 Min. : 35
## Class :character 1st Qu.: 776 1st Qu.: 604 1st Qu.: 242
## Mode :character Median : 1558 Median : 1110 Median : 434
## Mean : 3002 Mean : 2019 Mean : 780
## 3rd Qu.: 3624 3rd Qu.: 2424 3rd Qu.: 902
## Max. :48094 Max. :26330 Max. :6392
## Top10perc Top25perc F.Undergrad P.Undergrad
## Min. : 1.00 Min. : 9.0 Min. : 139 Min. : 1.0
## 1st Qu.:15.00 1st Qu.: 41.0 1st Qu.: 992 1st Qu.: 95.0
## Median :23.00 Median : 54.0 Median : 1707 Median : 353.0
## Mean :27.56 Mean : 55.8 Mean : 3700 Mean : 855.3
## 3rd Qu.:35.00 3rd Qu.: 69.0 3rd Qu.: 4005 3rd Qu.: 967.0
## Max. :96.00 Max. :100.0 Max. :31643 Max. :21836.0
## Outstate Room.Board Books Personal
## Min. : 2340 Min. :1780 Min. : 96.0 Min. : 250
## 1st Qu.: 7320 1st Qu.:3597 1st Qu.: 470.0 1st Qu.: 850
## Median : 9990 Median :4200 Median : 500.0 Median :1200
## Mean :10441 Mean :4358 Mean : 549.4 Mean :1341
## 3rd Qu.:12925 3rd Qu.:5050 3rd Qu.: 600.0 3rd Qu.:1700
## Max. :21700 Max. :8124 Max. :2340.0 Max. :6800
## PhD Terminal S.F.Ratio perc.alumni
## Min. : 8.00 Min. : 24.0 Min. : 2.50 Min. : 0.00
## 1st Qu.: 62.00 1st Qu.: 71.0 1st Qu.:11.50 1st Qu.:13.00
## Median : 75.00 Median : 82.0 Median :13.60 Median :21.00
## Mean : 72.66 Mean : 79.7 Mean :14.09 Mean :22.74
## 3rd Qu.: 85.00 3rd Qu.: 92.0 3rd Qu.:16.50 3rd Qu.:31.00
## Max. :103.00 Max. :100.0 Max. :39.80 Max. :64.00
## Expend Grad.Rate
## Min. : 3186 Min. : 10.00
## 1st Qu.: 6751 1st Qu.: 53.00
## Median : 8377 Median : 65.00
## Mean : 9660 Mean : 65.46
## 3rd Qu.:10830 3rd Qu.: 78.00
## Max. :56233 Max. :118.00
library(ISLR)
pairs(College[,1:10]) # [,1:10] college dataframe didn't work
plot(College$Outstate, College$Private)
#rm(college)
detach("package:ISLR", unload=TRUE)
attach(college)
plot(Private, Outstate, xlim=c(0,100), ylim=c(0,100))
## Warning in xy.coords(x, y, xlabel, ylabel, log): NAs introduced by coercion
Elite=rep("No", nrow(college))
Elite[college$Top10perc > 50] = "Yes"
Elite = as.factor(Elite)
summary(Elite)
## No Yes
## 699 78
plot(college$Outstate, Elite)
plot(Elite, college$Outstate)
par(mfrow=c(2,2)) # 4 plots per picture
attach(college)
## The following objects are masked from college (pos = 3):
##
## Accept, Apps, Books, Enroll, Expend, F.Undergrad, Grad.Rate,
## Outstate, P.Undergrad, perc.alumni, Personal, PhD, Private,
## Room.Board, S.F.Ratio, Terminal, Top10perc, Top25perc
hist(PhD, breaks = 10, col = 3)
hist(F.Undergrad, breaks = 10)
hist(P.Undergrad, col=2, breaks = 15)
hist(Outstate, col=2, breaks = 15)
pairs(~ PhD + F.Undergrad + P.Undergrad , college)
Which predictors are quantitative, and which are qualitative.
range of each quantitative predictor
# attach(Auto)
library(ISLR)
## Warning: package 'ISLR' was built under R version 4.0.3
range(Auto$mpg)
## [1] 9.0 46.6
range(Auto$cylinders)
## [1] 3 8
range(Auto$displacement)
## [1] 68 455
range(Auto$weight)
## [1] 1613 5140
range(Auto$horsepower)
## [1] 46 230
range(Auto$acceleration)
## [1] 8.0 24.8
range(Auto$year)
## [1] 70 82
# range(order)
sd(Auto$mpg)
## [1] 7.805007
sd(Auto$cylinders)
## [1] 1.705783
sd(Auto$displacement)
## [1] 104.644
sd(Auto$weight)
## [1] 849.4026
sd(Auto$horsepower)
## [1] 38.49116
sd(Auto$acceleration)
## [1] 2.758864
sd(Auto$year)
## [1] 3.683737
#View(Auto)
auto = Auto
tenth = auto[10,]
auto = auto[-c(10:85),]
attach(auto)
sd(mpg)
## [1] 7.867283
sd(cylinders)
## [1] 1.654179
sd(displacement)
## [1] 99.67837
sd(weight)
## [1] 811.3002
sd(horsepower)
## [1] 35.70885
sd(acceleration)
## [1] 2.693721
sd(year)
## [1] 3.106217
library(MASS)
head(Boston, n=5)
## crim zn indus chas nox rm age dis rad tax ptratio black lstat
## 1 0.00632 18 2.31 0 0.538 6.575 65.2 4.0900 1 296 15.3 396.90 4.98
## 2 0.02731 0 7.07 0 0.469 6.421 78.9 4.9671 2 242 17.8 396.90 9.14
## 3 0.02729 0 7.07 0 0.469 7.185 61.1 4.9671 2 242 17.8 392.83 4.03
## 4 0.03237 0 2.18 0 0.458 6.998 45.8 6.0622 3 222 18.7 394.63 2.94
## 5 0.06905 0 2.18 0 0.458 7.147 54.2 6.0622 3 222 18.7 396.90 5.33
## medv
## 1 24.0
## 2 21.6
## 3 34.7
## 4 33.4
## 5 36.2
Number of rows and columns
How many rows are in this data set? How many columns? What do the rows and columns represent?
dim(Boston)
## [1] 506 14
Make some pairwise scatterplots of the predictors (columns) in this data set. Describe your findings.
pairs(Boston)
Are any of the predictors associated with per capita crime rate? If so, explain the relationship.
pairs(~ crim + age, Boston)
#help(Boston)
pairs(~ crim + age + zn, Boston)
Age of the housing, Most likely the older the neighborhood, the more experience/ lower income it has with crime
Radial Highways, either criminals has quick getaway or it is so far away from highway that it is not gentrified
Do any of the suburbs of Boston appear to have particularly high crime rates? Tax rates? Pupil-teacher ratios? Comment on the range of each predictor.
dim(Boston[Boston$crim >= 20,])
## [1] 18 14
dim(Boston[Boston$tax >= 666,])
## [1] 137 14
dim(Boston[Boston$ptratio >= 20,])
## [1] 201 14
How many of the suburbs in this data set bound the Charles river?
dim(Boston[Boston$chas==1,])[1]
## [1] 35
What is the median pupil-teacher ratio among the towns in this data set?
median(Boston$ptratio)
## [1] 19.05
Which suburb of Boston has lowest median value of owner- occupied homes? What are the values of the other predictors for that suburb, and how do those values compare to the overall ranges for those predictors? Comment on your findings.
Boston[Boston$medv == min(Boston$medv),]
## crim zn indus chas nox rm age dis rad tax ptratio black lstat
## 399 38.3518 0 18.1 0 0.693 5.453 100 1.4896 24 666 20.2 396.90 30.59
## 406 67.9208 0 18.1 0 0.693 5.683 100 1.4254 24 666 20.2 384.97 22.98
## medv
## 399 5
## 406 5
In this data set, how many of the suburbs average more than seven rooms per dwelling? More than eight rooms per dwelling? Comment on the suburbs that average more than eight rooms per dwelling.
dim(Boston[Boston$rm > 7,])[1]
## [1] 64
dim(Boston[Boston$rm > 8,])[1]
## [1] 13
pairs(Boston[Boston$rm > 8,])