Posts

Showing posts from March, 2018

K-Means Clustering

Image
# Importing the dataset dataset = read.csv('Mall_Customers.csv') dataset = dataset[4:5] # Splitting the dataset into the Training set and Test set # install.packages('caTools') # library(caTools) # set.seed(123) # split = sample.split(dataset$DependentVariable, SplitRatio = 0.8) # training_set = subset(dataset, split == TRUE) # test_set = subset(dataset, split == FALSE) # Feature Scaling # training_set = scale(training_set) # test_set = scale(test_set) # Using the elbow method to find the optimal number of clusters set.seed(6) wcss = vector() for (i in 1:10) wcss[i] = sum(kmeans(dataset, i)$withinss) plot(1:10,       wcss,       type = 'b',       main = paste('The Elbow Method'),       xlab = 'Number of clusters',       ylab = 'WCSS') # Fitting K-Means to the dataset set.seed(29) kmeans = kmeans(x = dataset, centers = 5) y_kmeans = kmeans$cluster # Visualising the clusters library(cluster) clusplo

Random Forest Classification

Image
# Importing the dataset dataset = read.csv('Social_Network_Ads.csv') dataset = dataset[3:5] # Encoding the target feature as factor dataset$Purchased = factor(dataset$Purchased, levels = c(0, 1)) # Splitting the dataset into the Training set and Test set # install.packages('caTools') library(caTools) set.seed(123) split = sample.split(dataset$Purchased, SplitRatio = 0.75) training_set = subset(dataset, split == TRUE) test_set = subset(dataset, split == FALSE) # Feature Scaling training_set[-3] = scale(training_set[-3]) test_set[-3] = scale(test_set[-3]) # Fitting Random Forest Classification to the Training set # install.packages('randomForest') library(randomForest) set.seed(123) classifier = randomForest(x = training_set[-3],                           y = training_set$Purchased,                           ntree = 500) # Predicting the Test set results y_pred = predict(classifier, newdata = test