Discussion 3

Machine Learning

Friendly Reminder

FIRST PROJECT DEADLINE: 10.27 11:30pm!!!

Tree based packages

  • rpart
  • party
  • randomForest
  • ranger

We will use the famous Dataset iris as example, first take a look at dataset iris. We want to predict the Species based on each observation’s covarates

head(iris)
##   Sepal.Length Sepal.Width Petal.Length Petal.Width Species
## 1          5.1         3.5          1.4         0.2  setosa
## 2          4.9         3.0          1.4         0.2  setosa
## 3          4.7         3.2          1.3         0.2  setosa
## 4          4.6         3.1          1.5         0.2  setosa
## 5          5.0         3.6          1.4         0.2  setosa
## 6          5.4         3.9          1.7         0.4  setosa

Second, split the data into two parts, one for training, the other for test.

train.idx <- sample(nrow(iris), 2/3 * nrow(iris))
iris.train <- iris[train.idx, ]
iris.test <- iris[-train.idx, ]

rpart

if (!requireNamespace("rpart")) install.packages("rpart")
library(rpart)
time_rpart <- system.time(fit_rpart <- rpart(Species ~ ., data = iris.train))[3]
predict_rpart <- predict(fit_rpart, newdata = iris.test, type = "class")
ratio_rpart <- sum(predict_rpart == iris.test[,"Species"]) / length(predict_rpart)

party

if (!requireNamespace("party")) install.packages("party")
library(party)
time_ctree <- system.time(fit_ctree <- ctree(Species ~ ., data = iris.train))[3]
predict_ctree <- predict(fit_ctree, newdata = iris.test, type = "response")
ratio_ctree <- sum(predict_ctree == iris.test[,"Species"]) / length(predict_ctree)

randomForest

if (!requireNamespace("randomForest")) install.packages("randomForest")
library(randomForest)

time_rf <- system.time(fit_rf <- randomForest(Species ~ ., data = iris.train))[3]
predict_rf <- predict(fit_rf, newdata = iris.test, type = "response")
ratio_rf <- sum(predict_rf == iris.test[,"Species"]) / length(predict_rf)

ranger

if (!requireNamespace("ranger")) install.packages("ranger")
library(ranger)
time_ranger <- system.time(fit_ranger <- ranger(Species ~ ., data = iris.train))[3]
predict_ranger <- predict(fit_ranger, data = iris.test, type = "response")
ratio_ranger <- sum(predict_ranger$predictions == iris.test[,"Species"]) / length(predict_ranger$predictions)

Neural Network

  • nnet
if (!requireNamespace("nnet")) install.packages("nnet")
library(nnet)
time_nnet <- system.time(fit_nnet <- nnet(Species ~ ., data = iris.train, size = 2, maxit = 200, decay = 5e-4))[3]
predict_nnet <- predict(fit_nnet, newdata = iris.test, type = "class")
ratio_nnet <- sum(predict_nnet == iris.test[,"Species"]) / length(predict_nnet)

Result

result_ratio <- c(ratio_rpart, ratio_ctree,  ratio_rf, ratio_ranger, ratio_nnet)
result_time <- c(time_rpart, time_ctree, time_rf, time_ranger, time_nnet)
result <- data.frame(Ratio = result_ratio, Time = result_time)
method_name <- c("rpart", "ctree", "randomForest", "ranger", "nnet")
rownames(result) <- method_name
knitr::kable(result)
Ratio Time
rpart 0.94 0.012
ctree 0.94 0.126
randomForest 0.94 0.028
ranger 0.94 0.030
nnet 0.94 0.012

Optional Tensorflow with Keras

As deep learning community grow up, there are lots of open source deep learning framworks which we can use for training our deep learning model.

If you are first heard of this word, check the youtube video

The comparison of deep learning frameworks from wiki

We use Tensorflow with Keras for our toy example, the full tutorial can be found here

comments powered by Disqus