Importing the dataset
dataset = read.csv('Position_Salaries.csv')
dataset = dataset[2:3]
Splitting the dataset into the Training set and Test set
# install.packages('caTools')
# library(caTools)
# set.seed(123)
# split = sample.split(dataset$Salary, SplitRatio = 2/3)
# training_set = subset(dataset, split == TRUE)
# test_set = subset(dataset, split == FALSE)
Feature Scaling
# training_set = scale(training_set)
# test_set = scale(test_set)
Decision Tree Model
Fitting Decision Tree Regression to the Training set
# install.packages('rpart')
library(rpart)
regressor = rpart(formula = Salary ~ .,
data = dataset,
control = rpart.control(minsplit = 1))
Predicting the Test set results
y_pred = predict(regressor, data.frame(Level = 6.5))
Visualising the Decision Tree Regression results
# install.packages('ggplot2')
library(ggplot2)
ggplot() +
geom_point(aes(x = dataset$Level, y = dataset$Salary),
colour = 'red') +
geom_line(aes(x = dataset$Level, y = predict(regressor, newdata = dataset)),
colour = 'blue') +
ggtitle('Decision Tree Regression') +
xlab('Level') +
ylab('Salary')
Visualising the Decision Tree Regression results (higher resolution)
# install.packages('ggplot2')
library(ggplot2)
x_grid = seq(min(dataset$Level), max(dataset$Level), 0.01)
ggplot() +
geom_point(aes(x = dataset$Level, y = dataset$Salary),
colour = 'red') +
geom_line(aes(x = x_grid, y = predict(regressor, newdata = data.frame(Level = x_grid))),
colour = 'blue') +
ggtitle('Truth or Bluff (Decision Tree Regression)') +
xlab('Level') +
ylab('Salary')
Random Forest Model
Fitting Random Forest Regression to the Training set
# install.packages('randomForest')
library(randomForest)
## randomForest 4.6-12
## Type rfNews() to see new features/changes/bug fixes.
##
## Attaching package: 'randomForest'
## The following object is masked from 'package:ggplot2':
##
## margin
set.seed(1234)
regressor = randomForest(x = dataset[1], y = dataset$Salary, ntree = 500)
Predicting the Test set results
y_pred = predict(regressor, data.frame(Level = 6.5))
print(y_pred)
## 1
## 160457.7
Visualising the Decision Tree Regression results (higher resolution)
# install.packages('ggplot2')
library(ggplot2)
x_grid = seq(min(dataset$Level), max(dataset$Level), 0.01)
ggplot() +
geom_point(aes(x = dataset$Level, y = dataset$Salary),
colour = 'red') +
geom_line(aes(x = x_grid, y = predict(regressor, newdata = data.frame(Level = x_grid))),
colour = 'blue') +
ggtitle('Truth or Bluff (Random Forest Regression)') +
xlab('Level') +
ylab('Salary')