0%

Random Forest

random_forest_regression.utf8

Importing the dataset

dataset = read.csv('Position_Salaries.csv')
dataset = dataset[2:3]

Splitting the dataset into the Training set and Test set

# install.packages('caTools')
# library(caTools)
# set.seed(123)
# split = sample.split(dataset$Salary, SplitRatio = 2/3)
# training_set = subset(dataset, split == TRUE)
# test_set = subset(dataset, split == FALSE)

Feature Scaling

# training_set = scale(training_set)
# test_set = scale(test_set)

Decision Tree Model

Fitting Decision Tree Regression to the Training set

# install.packages('rpart')
library(rpart)
regressor = rpart(formula = Salary ~ ., 
                  data = dataset, 
                  control = rpart.control(minsplit = 1))

Predicting the Test set results

y_pred = predict(regressor, data.frame(Level = 6.5))

Visualising the Decision Tree Regression results

# install.packages('ggplot2')
library(ggplot2)
ggplot() +
  geom_point(aes(x = dataset$Level, y = dataset$Salary),
             colour = 'red') +
  geom_line(aes(x = dataset$Level, y = predict(regressor, newdata = dataset)),
            colour = 'blue') +
  ggtitle('Decision Tree Regression') +
  xlab('Level') +
  ylab('Salary')

Visualising the Decision Tree Regression results (higher resolution)

# install.packages('ggplot2')
library(ggplot2)
x_grid = seq(min(dataset$Level), max(dataset$Level), 0.01)
ggplot() +
  geom_point(aes(x = dataset$Level, y = dataset$Salary),
             colour = 'red') +
  geom_line(aes(x = x_grid, y = predict(regressor, newdata = data.frame(Level = x_grid))),
            colour = 'blue') +
  ggtitle('Truth or Bluff (Decision Tree Regression)') +
  xlab('Level') +
  ylab('Salary')

Random Forest Model

Fitting Random Forest Regression to the Training set

# install.packages('randomForest')
library(randomForest)
## randomForest 4.6-12
## Type rfNews() to see new features/changes/bug fixes.
## 
## Attaching package: 'randomForest'
## The following object is masked from 'package:ggplot2':
## 
##     margin
set.seed(1234)
regressor = randomForest(x = dataset[1], y = dataset$Salary, ntree = 500)

Predicting the Test set results

y_pred = predict(regressor, data.frame(Level = 6.5))
print(y_pred)
##        1 
## 160457.7

Visualising the Decision Tree Regression results (higher resolution)

# install.packages('ggplot2')
library(ggplot2)
x_grid = seq(min(dataset$Level), max(dataset$Level), 0.01)
ggplot() +
  geom_point(aes(x = dataset$Level, y = dataset$Salary),
             colour = 'red') +
  geom_line(aes(x = x_grid, y = predict(regressor, newdata = data.frame(Level = x_grid))),
            colour = 'blue') +
  ggtitle('Truth or Bluff (Random Forest Regression)') +
  xlab('Level') +
  ylab('Salary')