INDEX
[Link]. Particular Page Signature
No.
1. To implement basic python libraries of machine 1-5
learning like numpy, pandas, SciPy, Scitkit-learn,
matplotlib,etc.
2. To implement Principal Component Analysis. 6-7
3. To implement FIND-S algorithm. 8-9
4. To analyse the tested data using K-Means 10 -11
Clustering algorithm.
5. To implement k-nearest neighbor algorithm. 12-13
6. To implement linear regression. 14-15
7. To implement logistics regression. 16-18
8. To implement Naïve Bayes Algorithm. 19-23
9. To implement Decision Tree Algorithm. 24-28
10. To implement Support Vector Machine Algorithm. 29-32
PROGRAM :- 1
AIM: To implement basic python libraries of machine learning
like numpy, pandas, SciPy, Scitkit-learn, matplotlib,etc.
1.
import numpy as nup
# Then, create two arrays of rank 2
K = [Link]([[2, 4], [6, 8]])
R = [Link]([[1, 3], [5, 7]])
# Then, create two arrays of rank 1
P = [Link]([10, 12])
S = [Link]([9, 11])
# Then, we will print the Inner product of vectors
print ("Inner product of vectors: ", [Link](P, S), "\n")
# Then, we will print the Matrix and Vector product
print ("Matrix and Vector product: ", [Link](K, P), "\n")
# Now, we will print the Matrix and matrix product
print ("Matrix and matrix product: ", [Link](K, R))
OUTPUT:-
2.
import [Link] as plt
from scipy import interpolate
x = [Link](5, 20)
y = [Link](x/3.0)
f = interpolate.interp1d(x, y)x1 = [Link](6, 12)
y1 = f(x1) # use interpolation function returned by `interp1d`
[Link](x, y, 'o', x1, y1, '--')
[Link]()
OUTPUT:-
3.
from sklearn import datasets as ds
from sklearn import metrics as mt
from [Link] import DecisionTreeClassifier as dtc
# load the iris datasets
dataset_1 = ds.load_iris()
# fit a CART model to the data
model_1 = dtc()
model_1.fit(dataset_1.data, dataset_1.target)
print(model)
# make predictions
expected_1 = dataset_1.target
predicted_1 = model_1.predict(dataset_1.data)
# summarize the fit of the model
print (mt.classification_report(expected_1, predicted_1))
print(mt.confusion_matrix(expected_1, predicted_1))
OUTPUT:-
4.
import pandas as pad
data_1 = {"Countries": ["Bhutan", "Cape Verde", "Chad", "Estonia", "Guinea", "Kenya", "Libya
", "Mexico"],
"capital": ["Thimphu", "Praia", "N'Djamena", "Tallinn", "Conakry", "Nairobi", "Tripoli", "
Mexico City"],
"Currency": ["Ngultrum", "Cape Verdean escudo", "CFA Franc", "Estonia Kroon; Euro", "G
uinean franc", "Kenya shilling", "Libyan dinar", "Mexican peso"],
"population": [20.4, 143.5, 12.52, 135.7, 52.98, 76.21, 34.28, 54.32] }
data_1_table = [Link](data_1)
print(data_1_table)
OUTPUT:-
4. Matplotlib
import [Link] as plot
import numpy as nup
# Prepare the data
K = [Link](2, 4, 8)
R = [Link](5, 7, 9)
Q = [Link](0, 1, 3)
# Plot the data
[Link](K, K, label = 'K')
[Link](R, R, label = 'R')
[Link](Q, Q, label = 'Q')
# Add a legend
[Link]()
# Show the plot
[Link]()
OUTPUT:-
PROGRAM :- 2
AIM: To implement Principal Component Analysis.
import numpy as nmp
import [Link] as mpltl
import pandas as pnd
DS = pnd.read_csv('[Link]')
# Now, we will distribute the dataset into two components "X" and "Y"
X = [Link][: , 0:13].values
Y = [Link][: , 13].values
from sklearn.model_selection import train_test_split as tts
X_train, X_test, Y_train, Y_test = tts(X, Y, test_size = 0.2, random_state = 0)
from [Link] import StandardScaler as SS
SC = SS()
X_train = SC.fit_transform(X_train)
X_test = [Link](X_test)
from [Link] import PCA
PCa = PCA (n_components = 1)
X_train = PCa.fit_transform(X_train)
X_test = [Link](X_test)
explained_variance = PCa.explained_variance_ratio_
from sklearn.linear_model import LogisticRegression as LR
classifier_1 = LR (random_state = 0)
classifier_1.fit(X_train, Y_train)
Y_pred = classifier_1.predict(X_test)
from [Link] import confusion_matrix as CM
c_m = CM (Y_test, Y_pred)
from [Link] import ListedColormap as LCM
X_set, Y_set = X_train, Y_train
X_1, X_2 = [Link]([Link](start = X_set[:, 0].min() - 1,
stop = X_set[: , 0].max() + 1, step = 0.01),
[Link](start = X_set[: , 1].min() - 1,
stop = X_set[: , 1].max() + 1, step = 0.01))
[Link](X_1, X_2, classifier_1.predict([Link]([X_1.ravel(),
X_2.ravel()]).T).reshape(X_1.shape), alpha = 0.75,
cmap = LCM (('yellow', 'grey', 'green')))
[Link] (X_1.min(), X_1.max())
[Link] (X_2.min(), X_2.max())
for s, t in enumerate([Link](Y_set)):
[Link](X_set[Y_set == t, 0], X_set[Y_set == t, 1],
c = LCM (('red', 'green', 'blue'))(s), label = t)
[Link]('Logistic Regression for Training set: ')
[Link] ('PC_1') # for X_label
[Link] ('PC_2') # for Y_label
[Link]() # for showing legend
# show scatter plot
[Link]()
OUTPUT:-
PROGRAM :- 3
AIM: To implement FIND-S algorithm.
import pandas as pd
import numpy as np
#to read the data in the csv file
data = pd.read_csv("[Link]")
print(data,"n")
#making an array of all the attributes
d = [Link](data)[:,:-1]
print("n The attributes are: ",d)
#segragating the target that has positive and negative examples
target = [Link](data)[:,-1]
print("n The target is: ",target)
#training function to implement find-s algorithm
def train(c,t):
for i, val in enumerate(t):
if val == "Yes":
specific_hypothesis = c[i].copy()
break
for i, val in enumerate(c):
if t[i] == "Yes":
for x in range(len(specific_hypothesis)):
if val[x] != specific_hypothesis[x]:
specific_hypothesis[x] = '?'
else:
pass
return specific_hypothesis
#obtaining the final hypothesis
print("n The final hypothesis is:",train(d,target))
OUTPUT:-
PROGRAM :- 4
AIM: To analyse the tested data using K-MeansClustering
algorithm.
# importing required tools
import numpy as np
import cv2
from matplotlib import pyplot as plt
# creating two test data
X = [Link](10,35,(25,2))
Y = [Link](55,70,(25,2))
Z = [Link]((X,Y))
Z = [Link]((50,2))
# convert to np.float32
Z = np.float32(Z)
[Link]('Test Data')
[Link]('Z samples')
[Link](Z,256,[0,256])
[Link]()
X = [Link](10,45,(25,2))
Y = [Link](55,70,(25,2))
Z = [Link]((X,Y))
# convert to np.float32
Z = np.float32(Z)
# define criteria and apply kmeans()
criteria = (cv2.TERM_CRITERIA_EPS + cv2.TERM_CRITERIA_MAX_ITER, 10, 1.0)
ret,label,center = [Link](Z,2,None,criteria,10,cv2.KMEANS_RANDOM_CENTERS)
# Now separate the data
A = Z[[Link]()==0]
B = Z[[Link]()==1]
# Plot the data
[Link](A[:,0],A[:,1])
[Link](B[:,0],B[:,1],c = 'r')
[Link](center[:,0],center[:,1],s = 80,c = 'y', marker = 's')
[Link]('Test Data'),[Link]('Z samples')
[Link]()
OUTPUT:-
PROGRAM :- 5
AIM: To implement k-nearest neighbor algorithm.
# Import necessary modules
from [Link] import KNeighborsClassifier
from sklearn.model_selection import train_test_split
from [Link] import load_iris
import numpy as np
import [Link] as plt
irisData = load_iris()
X=[Link]
y=[Link]
# Split into training and test set
X_train, X_test, y_train, y_test = train_test_split( X,
y, test_size = 0.2, random_state=42) neighbors =
[Link](1, 9)
train_accuracy = [Link](len(neighbors))
test_accuracy = [Link](len(neighbors))
# Loop over K values
for i, k in enumerate(neighbors):
knn = KNeighborsClassifier(n_neighbors=k)
[Link](X_train, y_train)
# Compute training and test data accuracy
train_accuracy[i] = [Link](X_train, y_train)
test_accuracy[i] = [Link](X_test, y_test)
# Generate plot
[Link](neighbors, test_accuracy, label = 'Testing dataset Accuracy')
[Link](neighbors, train_accuracy, label = 'Training dataset Accuracy')
[Link]()
[Link]('n_neighbors')
[Link]('Accuracy')
[Link]()
OUTPUT:-
PROGRAM :- 6
AIM: To implement linear regression.
import numpy as nmp
import [Link] as mtplt
def estimate_coeff(p, q):
# Here, we will estimate the total number of points or observation
n1 = [Link](p)
# Now, we will calculate the mean of a and b vector
m_p = [Link](p)
m_q = [Link](q)
# here, we will calculate the cross deviation and deviation about a
SS_pq = [Link](q * p) - n1 * m_q * m_p
SS_pp = [Link](p * p) - n1 * m_p * m_p
# here, we will calculate the regression coefficients
b_1 = SS_pq / SS_pp
b_0 = m_q - b_1 * m_p
return (b_0, b_1)
def plot_regression_line(p, q, b):
# Now, we will plot the actual points or observation as scatter plot
[Link](p, q, color = "m",
marker = "o", s = 30)
# here, we will calculate the predicted response vector
q_pred = b[0] + b[1] * p
# here, we will plot the regression line
[Link](p, q_pred, color = "g")
# here, we will put the labels
[Link]('p')
[Link]('q')
# here, we will define the function to show plot
[Link]()
def main():
# entering the observation points or data
p = [Link]([10, 11, 12, 13, 14, 15, 16, 17, 18, 19])
q = [Link]([11, 13, 12, 15, 17, 18, 18, 19, 20, 22])
# now, we will estimate the coefficients
b = estimate_coeff(p, q)
print("Estimated coefficients are :\nb_0 = {} \
\nb_1 = {}".format(b[0], b[1]))
# Now, we will plot the regression line
plot_regression_line(p, q, b)
if __name__ == "__main__":
main()
OUTPUT:-
PROGRAM :- 7
AIM: To implement logistics regression.
import pandas as pd
import numpy as np
import [Link] as plt
from sklearn.model_selection
import train_test_split
from [Link]
import StandardScaler
dataset = pd.read_csv("User_Data.csv")
x = [Link][:, [2, 3]].values
y = [Link][:, 4].values
# Splitting the dataset to train and test
X_train, X_test, y_train, y_test = train_test_split(x, y, test_size = 0.25,
random_state = 0)
sc_x = StandardScaler()
xtrain = sc_x.fit_transform(xtrain)
xtest = sc_x.transform(xtest)
print (xtrain[0:10, :])
OUTPUT:-
[[ 0.58164944 -0.88670699]
[-0.60673761 1.46173768]
[-0.01254409 -0.5677824 ]
[-0.60673761 1.89663484]
[ 1.37390747 -1.40858358]
[ 1.47293972 0.99784738]
[ 0.08648817 -0.79972756]
[-0.01254409 -0.24885782]
[-0.21060859 -0.5677824 ]
[-0.21060859 -0.19087153]]
# Train the model
from sklearn.linear_model import LogisticRegression
classifier = LogisticRegression(random_state = 0)
[Link](xtrain, ytrain)
# prediction
y_pred = [Link](xtest)
# Test the performance of our model
from [Link] import confusion_matrix cm =
confusion_matrix(ytest, y_pred)
print ("Confusion Matrix : \n", cm)
# Accuracy
from [Link] import accuracy_score
print ("Accuracy : ", accuracy_score(ytest, y_pred))
OUTPUT 2 :-
Confusion Matrix :
[[65 3]
[8 24]]
Out of 100 :
True Positive + True Negative = 65 + 24
False Positive + False Negative = 3 + 8 Performance measure – Accuracy
Accuracy: 0.89
# Visualizing the performance of our model from
[Link] import ListedColormapX_set,
y_set = xtest, ytest
X1, X2 = [Link]([Link](start = X_set[:, 0].min() - 1,
stop = X_set[:, 0].max() + 1, step = 0.01),
[Link](start = X_set[:, 1].min() - 1,
stop = X_set[:, 1].max() + 1, step = 0.01))
[Link](X1, X2, [Link](
[Link]([[Link](), [Link]()]).T).reshape(
[Link]), alpha = 0.75, cmap = ListedColormap(('red', 'green')))
[Link]([Link](), [Link]())
[Link]([Link](), [Link]())
for i, j in enumerate([Link](y_set)):
[Link](X_set[y_set == j, 0], X_set[y_set == j, 1],c =
ListedColormap(('red', 'green'))(i), label = j)
[Link]('Classifier (Test set)')[Link]('Age')
[Link]('Estimated Salary')[Link]()
[Link]()
OUTPUT:-
PROGRAM :- 8
AIM: To implement Naïve Bayes Algorithm.
# Importing library
import math
import random
import csv
# the categorical class names are changed to numberic data
# eg: yes and no encoded to 1 and 0
def encode_class(mydata):
classes = []
for i in range(len(mydata)):
if mydata[i][-1] not in classes:
[Link](mydata[i][-1])
for i in range(len(classes)):
for j in range(len(mydata)):
if mydata[j][-1] == classes[i]:
mydata[j][-1] = i
return mydata
# Splitting the data
def splitting(mydata, ratio):
train_num = int(len(mydata) * ratio)
train = []
# initially testset will have all the dataset
test = list(mydata)
while len(train) < train_num:
# index generated randomly from range 0
# to length of testset
index = [Link](len(test))
# from testset, pop data rows and put it in train
[Link]([Link](index))
return train, test
# Group the data rows under each class yes or
# no in dictionary eg: dict[yes] and dict[no]
def groupUnderClass(mydata):
dict = {}
for i in range(len(mydata)):
if (mydata[i][-1] not in dict):
dict[mydata[i][-1]] = []
dict[mydata[i][-1]].append(mydata[i])
return dict
# Calculating Mean
def mean(numbers):
return sum(numbers) / float(len(numbers))
# Calculating Standard Deviation
def std_dev(numbers):
avg = mean(numbers)
variance = sum([pow(x - avg, 2) for x in numbers]) / float(len(numbers) - 1)
return [Link](variance)
def MeanAndStdDev(mydata):
info = [(mean(attribute), std_dev(attribute)) for attribute in zip(*mydata)]
# eg: list = [ [a, b, c], [m, n, o], [x, y, z]]
# here mean of 1st attribute =(a + m+x), mean of 2nd attribute = (b + n+y)/3
# delete summaries of last class
del info[-1]
return info
# find Mean and Standard Deviation under each class
def MeanAndStdDevForClass(mydata):
info = {}
dict = groupUnderClass(mydata)
for classValue, instances in [Link]():
info[classValue] = MeanAndStdDev(instances)
return info
# Calculate Gaussian Probability Density Function
def calculateGaussianProbability(x, mean, stdev):
expo = [Link](-([Link](x - mean, 2) / (2 * [Link](stdev, 2))))
return (1 / ([Link](2 * [Link]) * stdev)) * expo
# Calculate Class Probabilities
def calculateClassProbabilities(info, test):
probabilities = {}
for classValue, classSummaries in [Link]():
probabilities[classValue] = 1
for i in range(len(classSummaries)):
mean, std_dev = classSummaries[i]
x = test[i]
probabilities[classValue] *= calculateGaussianProbability(x, mean, std_dev)
return probabilities
# Make prediction - highest probability is the prediction
def predict(info, test):
probabilities = calculateClassProbabilities(info, test)
bestLabel, bestProb = None, -1
for classValue, probability in [Link]():
if bestLabel is None or probability > bestProb:
bestProb = probability
bestLabel = classValue
return bestLabel
# returns predictions for a set of examples
def getPredictions(info, test):
predictions = []
for i in range(len(test)):
result = predict(info, test[i])
[Link](result)
return predictions
# Accuracy score
def accuracy_rate(test, predictions):
correct = 0
for i in range(len(test)):
if test[i][-1] == predictions[i]:
correct += 1
return (correct / float(len(test))) * 100.0
# driver code
# add the data path in your system
filename = r'E:\user\MACHINE LEARNING\machine learning algos\Naive
bayes\[Link]'
# load the file and store it in mydata list
mydata = [Link](open(filename, "rt"))
mydata = list(mydata)
mydata = encode_class(mydata)
for i in range(len(mydata)):
mydata[i] = [float(x) for x in mydata[i]]
# split ratio = 0.7
# 70% of data is training data and 30% is test data used for testing
ratio = 0.7
train_data, test_data = splitting(mydata, ratio)
print('Total number of examples are: ', len(mydata))
print('Out of these, training examples are: ', len(train_data))
print("Test examples are: ", len(test_data))
# prepare model
info = MeanAndStdDevForClass(train_data)
# test model
predictions = getPredictions(info, test_data)
accuracy = accuracy_rate(test_data, predictions)
print("Accuracy of your model is: ", accuracy)
OUTPUT:-
Total number of examples are: 200
Out of these, training examples are: 140
Test examples are: 60
Accuracy of your model is: 71.237678
PROGRAM :- 9
AIM: To implement Decision Tree Algorithm.
# Run this program on your local python
# interpreter, provided you have installed
# the required libraries.
# Importing the required packages
import numpy as np
import pandas as pd
from [Link] import confusion_matrix
from sklearn.model_selection import train_test_split
from [Link] import DecisionTreeClassifier
from [Link] import accuracy_score
from [Link] import classification_report
# Function importing Dataset
def importdata():
balance_data = pd.read_csv(
'[Link]
'databases/balance-scale/[Link]',
sep= ',', header = None)
# Printing the dataswet shape
print ("Dataset Length: ", len(balance_data))
print ("Dataset Shape: ", balance_data.shape)
# Printing the dataset obseravtions
print ("Dataset: ",balance_data.head())
return balance_data
# Function to split the dataset
def splitdataset(balance_data):
# Separating the target variable
X = balance_data.values[:, 1:5]
Y = balance_data.values[:, 0]
# Splitting the dataset into train and test
X_train, X_test, y_train, y_test = train_test_split(
X, Y, test_size = 0.3, random_state = 100)
return X, Y, X_train, X_test, y_train, y_test
# Function to perform training with giniIndex.
def train_using_gini(X_train, X_test, y_train):
# Creating the classifier object
clf_gini = DecisionTreeClassifier(criterion = "gini",
random_state = 100,max_depth=3, min_samples_leaf=5)
# Performing training
clf_gini.fit(X_train, y_train)
return clf_gini
# Function to perform training with entropy.
def tarin_using_entropy(X_train, X_test, y_train):
# Decision tree with entropy
clf_entropy = DecisionTreeClassifier(
criterion = "entropy", random_state = 100,
max_depth = 3, min_samples_leaf = 5)
# Performing training
clf_entropy.fit(X_train, y_train)
return clf_entropy
# Function to make predictions
def prediction(X_test, clf_object):
# Predicton on test with giniIndex
y_pred = clf_object.predict(X_test)
print("Predicted values:")
print(y_pred)
return y_pred
# Function to calculate accuracy
def cal_accuracy(y_test, y_pred):
print("Confusion Matrix: ",
confusion_matrix(y_test, y_pred))
print ("Accuracy : ",accuracy_score(y_test,y_pred)*100)
print("Report : ",classification_report(y_test, y_pred))
# Driver code
def main():
# Building Phase
data = importdata()
X, Y, X_train, X_test, y_train, y_test = splitdataset(data)
clf_gini = train_using_gini(X_train, X_test, y_train)
clf_entropy = tarin_using_entropy(X_train, X_test, y_train)
# Operational Phase
print("Results Using Gini Index:")
# Prediction using gini
y_pred_gini = prediction(X_test, clf_gini)
cal_accuracy(y_test, y_pred_gini)
print("Results Using Entropy:")
# Prediction using entropy
y_pred_entropy = prediction(X_test, clf_entropy)
cal_accuracy(y_test, y_pred_entropy)
# Calling main function
if __name__=="__main__":
main()
OUTPUT:-
Dataset Length: 625
Dataset Shape: (625, 5)
Dataset: 0 1 2 3 4
0 B 1 1 1 1
1 R 1 1 1 2
2 R 1 1 1 3
3 R 1 1 1 4
4 R 1 1 1 5
Results Using Gini Index:
Predicted values:
['R' 'L' 'R' 'R' 'R' 'L' 'R' 'L' 'L' 'L' 'R' 'L' 'L' 'L' 'R' 'L' 'R' 'L'
'L' 'R' 'L' 'R' 'L' 'L' 'R' 'L' 'L' 'L' 'R' 'L' 'L' 'L' 'R' 'L' 'L' 'L'
'L' 'R' 'L' 'L' 'R' 'L' 'R' 'L' 'R' 'R' 'L' 'L' 'R' 'L' 'R' 'R' 'L' 'R'
'R' 'L' 'R' 'R' 'L' 'L' 'R' 'R' 'L' 'L' 'L' 'L' 'L' 'R' 'R' 'L' 'L' 'R'
'R' 'L' 'R' 'L' 'R' 'R' 'R' 'L' 'R' 'L' 'L' 'L' 'L' 'R' 'R' 'L' 'R' 'L'
'R' 'R' 'L' 'L' 'L' 'R' 'R' 'L' 'L' 'L' 'R' 'L' 'R' 'R' 'R' 'R' 'R' 'R'
'R' 'L' 'R' 'L' 'R' 'R' 'L' 'R' 'R' 'R' 'R' 'R' 'L' 'R' 'L' 'L' 'L' 'L'
'L' 'L' 'L' 'R' 'R' 'R' 'R' 'L' 'R' 'R' 'R' 'L' 'L' 'R' 'L' 'R' 'L' 'R'
'L' 'L' 'R' 'L' 'L' 'R' 'L' 'R' 'L' 'R' 'R' 'R' 'L' 'R' 'R' 'R' 'R' 'R'
'L' 'L' 'R' 'R' 'R' 'R' 'L' 'R' 'R' 'R' 'L' 'R' 'L' 'L' 'L' 'L' 'R' 'R'
'L' 'R' 'R' 'L' 'L' 'R' 'R' 'R']
Confusion Matrix: [[ 0 6 7]
[ 0 67 18]
[ 0 19 71]]
Accuracy : 73.4042553191
Report :
precision recall f1-score support
B 0.00 0.00 0.00 13
L 0.73 0.79 0.76 85
R 0.74 0.79 0.76 90
avg/total 0.68 0.73 0.71 188
Results Using Entropy:
Predicted values:
['R' 'L' 'R' 'L' 'R' 'L' 'R' 'L' 'R' 'R' 'R' 'R' 'L' 'L' 'R' 'L' 'R' 'L'
'L' 'R' 'L' 'R' 'L' 'L' 'R' 'L' 'R' 'L' 'R' 'L' 'R' 'L' 'R' 'L' 'L' 'L'
'L' 'L' 'R' 'L' 'R' 'L' 'R' 'L' 'R' 'R' 'L' 'L' 'R' 'L' 'L' 'R' 'L' 'L'
'R' 'L' 'R' 'R' 'L' 'R' 'R' 'R' 'L' 'L' 'R' 'L' 'L' 'R' 'L' 'L' 'L' 'R'
'R' 'L' 'R' 'L' 'R' 'R' 'R' 'L' 'R' 'L' 'L' 'L' 'L' 'R' 'R' 'L' 'R' 'L'
'R' 'R' 'L' 'L' 'L' 'R' 'R' 'L' 'L' 'L' 'R' 'L' 'L' 'R' 'R' 'R' 'R' 'R'
'R' 'L' 'R' 'L' 'R' 'R' 'L' 'R' 'R' 'L' 'R' 'R' 'L' 'R' 'R' 'R' 'L' 'L'
'L' 'L' 'L' 'R' 'R' 'R' 'R' 'L' 'R' 'R' 'R' 'L' 'L' 'R' 'L' 'R' 'L' 'R'
'L' 'R' 'R' 'L' 'L' 'R' 'L' 'R' 'R' 'R' 'R' 'R' 'L' 'R' 'R' 'R' 'R' 'R'
'R' 'L' 'R' 'L' 'R' 'R' 'L' 'R' 'L' 'R' 'L' 'R' 'L' 'L' 'L' 'L' 'L' 'R'
'R' 'R' 'L' 'L' 'L' 'R' 'R' 'R']
Confusion Matrix: [[ 0 6 7]
[ 0 63 22]
[ 0 20 70]]
Accuracy : 70.7446808511
Report :
precision recall f1-score support
B 0.00 0.00 0.00 13
L 0.71 0.74 0.72 85
R 0.71 0.78 0.74 90
avg / total 0.66 0.71 0.68 188
PROGRAM :- 10
AIM: To implement Support Vector MachineAlgorithm.
Importing the dataset
import pandas as pd
data = pd.read_csv("apples_and_oranges.csv")
Splitting the dataset into training and test samples
from sklearn.model_selection import train_test_split
training_set, test_set = train_test_split(data, test_size = 0.2,
random_state = 1)
Classifying the predictors and target
X_train = training_set.iloc[:,0:2].values
Y_train = training_set.iloc[:,2].values
X_test = test_set.iloc[:,0:2].values
Y_test = test_set.iloc[:,2].values
Initializing Support Vector Machine and fitting the
training data
from [Link] import SVC
classifier = SVC(kernel='rbf', random_state = 1)
[Link](X_train,Y_train)
Predicting the classes for test set
Y_pred = [Link](X_test)
Attaching the predictions to test set for comparing
test_set["Predictions"] = Y_pred
#Comparing the actual classes and predictions
#Calculating the accuracy of the predictions
from [Link] import confusion_matrix
cm = confusion_matrix(Y_test,Y_pred)
accuracy = float([Link]().sum())/len(Y_test)
print("\nAccuracy Of SVM For The Given Dataset : ", accuracy)
OUTPUT:-
Accuracy Of SVM For The Given Dataset : 0.875
Visualizing the classifier
from [Link] import LabelEncoder
le = LabelEncoder()
Y_train = le.fit_transform(Y_train)
After encoding , fit the encoded data to the SVM
from [Link] import SVC
classifier = SVC(kernel='rbf', random_state = 1)
[Link](X_train,Y_train)
Let’s Visualize!
import numpy as np
import [Link] as plt
from [Link] import ListedColormap
[Link](figsize = (7,7))
X_set, y_set = X_train, Y_train
X1, X2 = [Link]([Link](start = X_set[:, 0].min() - 1, stop = X_set[:, 0].max() + 1, step
= 0.01), [Link](start = X_set[:, 1].min() - 1, stop = X_set[:, 1].max() + 1, step = 0.01))
[Link](X1, X2, [Link]([Link]([[Link](), [Link]()]).T).reshape([Link]),
alpha = 0.75, cmap = ListedColormap(('black', 'white')))
[Link]([Link](), [Link]())
[Link]([Link](), [Link]())
for i, j in enumerate([Link](y_set)):
[Link](X_set[y_set == j, 0], X_set[y_set == j, 1], c = ListedColormap(('red', 'orange'))(i),
label = j)
[Link]('Apples Vs Oranges')
[Link]('Weight In Grams')
[Link]('Size in cm')
[Link]()
[Link]()
OUTPUT:-
Visualizing the predictions
import numpy as np
import [Link] as plt
from [Link] import ListedColormap
[Link](figsize = (7,7))
X_set, y_set = X_test, Y_test
X1, X2 = [Link]([Link](start = X_set[:, 0].min() - 1, stop = X_set[:, 0].max() + 1, step
= 0.01),[Link](start = X_set[:, 1].min() - 1, stop = X_set[:, 1].max() + 1, step = 0.01))
[Link](X1, X2, [Link]([Link]([[Link](),
[Link]()]).T).reshape([Link]),alpha = 0.75, cmap = ListedColormap(('black', 'white')))
[Link]([Link](), [Link]())
[Link]([Link](), [Link]())
for i, j in enumerate([Link](y_set)):
[Link](X_set[y_set == j, 0], X_set[y_set == j, 1],c = ListedColormap(('red', 'orange'))(i), label
= j)
[Link]('Apples Vs Oranges Predictions')
[Link]('Weight In Grams')
[Link]('Size in cm')
[Link]()
[Link]()
OUTPUT:-
A Practical File
On
Machine Learning
Submitted in partial fulfillment of the requirement
for the
Award of Bachelor of Technology Degree
In
Computer Science Engineering
2019-2023
Submitted By:
Ravi Parkash (4811)
Semester 8TH
Under the Guidance of
Mrs, Jyoti Ahlawat
DEPARTMENT OF COMPUTER SCIENCE ENGINEERING
MATU RAM INSTITUTE OF ENGINEERING
& MANAGEMENT Rohtak Haryana