Polynomial Regression


Data called “Position_Salaries”
Position
Level
Salary
Business Analyst
1
45000
Junior Consultant
2
50000
Senior Consultant
3
60000
Manager
4
80000
Country Manager
5
110000
Region Manager
6
150000
Partner
7
200000
Senior Partner
8
300000
C-level
9
500000
CEO
10
1000000


Step 1
# Importing the dataset

dataset = read.csv('Position_Salaries.csv')
dataset = dataset[2:3]

Step 2
# Fitting Linear Regression to the dataset

lin_reg = lm(formula = Salary ~ .,
             data = dataset)
summary(lin_reg)

Call:

lm(formula = Salary ~ ., data = dataset)

Residuals:
    Min      1Q  Median      3Q     Max
-170818 -129720  -40379   65856  386545

Coefficients:
            Estimate Std. Error t value Pr(>|t|)  
(Intercept)  -195333     124790  -1.565  0.15615  
Level          80879      20112   4.021  0.00383 **
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1

Residual standard error: 182700 on 8 degrees of freedom
Multiple R-squared:  0.669,    Adjusted R-squared:  0.6277
F-statistic: 16.17 on 1 and 8 DF,  p-value: 0.003833


# Fitting Polynomial Regression to the dataset
Step 3

dataset$Level2 = dataset$Level^2
dataset$Level3 = dataset$Level^3
dataset$Level4 = dataset$Level^4
poly_reg = lm(formula = Salary ~ .,
              data = dataset)
summary(poly_reg)

Call:
lm(formula = Salary ~ ., data = dataset)

Residuals:
     1      2      3      4      5      6      7      8      9     10
 -8357  18240   1358 -14633 -11725   6725  15997  10006 -28695  11084

Coefficients:
             Estimate Std. Error t value Pr(>|t|)  
(Intercept)  184166.7    67768.0   2.718  0.04189 *
Level       -211002.3    76382.2  -2.762  0.03972 *
Level2        94765.4    26454.2   3.582  0.01584 *
Level3       -15463.3     3535.0  -4.374  0.00719 **
Level4          890.2      159.8   5.570  0.00257 **
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1

Residual standard error: 20510 on 5 degrees of freedom
Multiple R-squared:  0.9974,   Adjusted R-squared:  0.9953
F-statistic: 478.1 on 4 and 5 DF,  p-value: 1.213e-06

# Visualising the Linear Regression results
# install.packages('ggplot2')

Step 4

library(ggplot2)
ggplot() +
  geom_point(aes(x = dataset$Level, y = dataset$Salary),
             colour = 'red') +
  geom_line(aes(x = dataset$Level, y = predict(lin_reg, newdata = dataset)),
            colour = 'blue') +
  ggtitle('Truth or Bluff (Linear Regression)') +
  xlab('Level') +
  ylab('Salary')


# Visualising the Polynomial Regression results
# install.packages('ggplot2')

Step 5
library(ggplot2)
ggplot() +
  geom_point(aes(x = dataset$Level, y = dataset$Salary),
             colour = 'red') +
  geom_line(aes(x = dataset$Level, y = predict(poly_reg, newdata = dataset)),
            colour = 'blue') +
  ggtitle('Truth or Bluff (Polynomial Regression)') +
  xlab('Level') +
  ylab('Salary')





# Visualising the Regression Model results (for higher resolution and smoother curve)
# install.packages('ggplot2')

library(ggplot2)
x_grid = seq(min(dataset$Level), max(dataset$Level), 0.1)
ggplot() +
  geom_point(aes(x = dataset$Level, y = dataset$Salary),
             colour = 'red') +
  geom_line(aes(x = x_grid, y = predict(poly_reg,
                                        newdata = data.frame(Level = x_grid,
                                                             Level2 = x_grid^2,
                                                             Level3 = x_grid^3,
                                                             Level4 = x_grid^4))),
            colour = 'blue') +
  ggtitle('Truth or Bluff (Polynomial Regression)') +
  xlab('Level') +
  ylab('Salary')



# Predicting a new result with Linear Regression

predict(lin_reg, data.frame(Level = 6.5))

solution
1
330378.8

# Predicting a new result with Polynomial Regression

predict(poly_reg, data.frame(Level = 6.5,
                             Level2 = 6.5^2,
                             Level3 = 6.5^3,
                             Level4 = 6.5^4))
solution
1
158862.5


Comments

Popular posts from this blog

Decision Tree Classification

Random Forest Classification