Polynomial Regression
Data called “Position_Salaries”
Position
|
Level
|
Salary
|
Business Analyst
|
1
|
45000
|
Junior Consultant
|
2
|
50000
|
Senior Consultant
|
3
|
60000
|
Manager
|
4
|
80000
|
Country Manager
|
5
|
110000
|
Region Manager
|
6
|
150000
|
Partner
|
7
|
200000
|
Senior Partner
|
8
|
300000
|
C-level
|
9
|
500000
|
CEO
|
10
|
1000000
|
Step 1
# Importing the dataset
dataset
= read.csv('Position_Salaries.csv')
dataset
= dataset[2:3]
Step 2
# Fitting Linear Regression to the dataset
lin_reg
= lm(formula = Salary ~ .,
data = dataset)
summary(lin_reg)
Call:
lm(formula = Salary ~ ., data =
dataset)
Residuals:
Min
1Q Median 3Q
Max
-170818 -129720 -40379
65856 386545
Coefficients:
Estimate Std. Error t value
Pr(>|t|)
(Intercept) -195333
124790 -1.565 0.15615
Level 80879 20112
4.021 0.00383 **
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’
1
Residual standard error: 182700
on 8 degrees of freedom
Multiple R-squared: 0.669, Adjusted
R-squared: 0.6277
F-statistic: 16.17 on 1 and 8
DF, p-value: 0.003833
# Fitting Polynomial Regression to the dataset
Step 3
dataset$Level2
= dataset$Level^2
dataset$Level3
= dataset$Level^3
dataset$Level4
= dataset$Level^4
poly_reg
= lm(formula = Salary ~ .,
data = dataset)
summary(poly_reg)
Call:
lm(formula = Salary ~ ., data =
dataset)
Residuals:
1
2 3 4
5 6 7
8 9 10
-8357
18240 1358 -14633 -11725 6725
15997 10006 -28695 11084
Coefficients:
Estimate Std. Error t value
Pr(>|t|)
(Intercept) 184166.7
67768.0 2.718 0.04189 *
Level -211002.3 76382.2
-2.762 0.03972 *
Level2 94765.4 26454.2
3.582 0.01584 *
Level3 -15463.3 3535.0
-4.374 0.00719 **
Level4 890.2 159.8
5.570 0.00257 **
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’
1
Residual standard error: 20510
on 5 degrees of freedom
Multiple R-squared: 0.9974, Adjusted
R-squared: 0.9953
F-statistic: 478.1 on 4 and 5
DF, p-value: 1.213e-06
# Visualising the Linear Regression results
# install.packages('ggplot2')
Step 4
library(ggplot2)
ggplot()
+
geom_point(aes(x =
dataset$Level, y = dataset$Salary),
colour = 'red') +
geom_line(aes(x = dataset$Level,
y = predict(lin_reg, newdata = dataset)),
colour = 'blue') +
ggtitle('Truth or Bluff (Linear
Regression)') +
xlab('Level') +
ylab('Salary')
# Visualising the
Polynomial Regression results
#
install.packages('ggplot2')
Step 5
library(ggplot2)
ggplot()
+
geom_point(aes(x =
dataset$Level, y = dataset$Salary),
colour = 'red') +
geom_line(aes(x = dataset$Level,
y = predict(poly_reg, newdata = dataset)),
colour = 'blue') +
ggtitle('Truth or Bluff
(Polynomial Regression)') +
xlab('Level') +
ylab('Salary')
# Visualising the Regression Model results (for higher
resolution and smoother curve)
# install.packages('ggplot2')
library(ggplot2)
x_grid
= seq(min(dataset$Level), max(dataset$Level), 0.1)
ggplot()
+
geom_point(aes(x =
dataset$Level, y = dataset$Salary),
colour = 'red') +
geom_line(aes(x = x_grid, y =
predict(poly_reg,
newdata = data.frame(Level =
x_grid,
Level2 = x_grid^2,
Level3 = x_grid^3,
Level4 =
x_grid^4))),
colour = 'blue') +
ggtitle('Truth or Bluff
(Polynomial Regression)') +
xlab('Level') +
ylab('Salary')
# Predicting a new result with Linear Regression
predict(lin_reg,
data.frame(Level = 6.5))
solution
1
330378.8
# Predicting a new result with Polynomial Regression
predict(poly_reg,
data.frame(Level = 6.5,
Level2 = 6.5^2,
Level3 = 6.5^3,
Level4 = 6.5^4))
solution
1
158862.5
Comments
Post a Comment