Lab Assignment-Artificial Intelligence (MCAC391)
Problem Statement:
1. Predict the Iris class based on sepal and petal measures and visualise using different plots.
# Load Dataset
import pandas as pd
df = pd.read_csv("[Link]
print([Link]())
print([Link])
print([Link]())
# Check Missing Values & Duplicates
print([Link]().sum())
print("Duplicate rows:", [Link]().sum())
df = df.drop_duplicates()
# Visualizations
import seaborn as sns
import [Link] as plt
# Countplot
[Link](figsize=(5,3))
[Link](x="Species", data=df)
[Link]()
# Pairplot
[Link](df, hue="Species")
[Link]()
# Histograms
df[['SepalLengthCm','SepalWidthCm','PetalLengthCm','PetalWidthCm']].hist(figsize=(6,4))
[Link]()
# Correlation Heatmap
[Link](figsize=(5,3))
[Link]([Link](), annot=True, cmap="coolwarm")
[Link]()
# Building Model
from sklearn.model_selection import train_test_split
from [Link] import LabelEncoder
from sklearn.linear_model import LogisticRegression
from [Link] import accuracy_score, confusion_matrix, classification_report
le = LabelEncoder()
df['Species'] = le.fit_transform(df['Species'])
X = df[['SepalLengthCm','SepalWidthCm','PetalLengthCm','PetalWidthCm']]
y = df['Species']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
model = LogisticRegression()
[Link](X_train, y_train)
# Prediction
y_pred = [Link](X_test)
# Results
print("Accuracy:", accuracy_score(y_test, y_pred))
print(confusion_matrix(y_test, y_pred))
print(classification_report(y_test, y_pred))
2. Annual Rainfall prediction using Machine Learning and visualise the prediction plots.
import pandas as pd
import numpy as np
import [Link] as plt
# Load dataset
ds = pd.read_csv("[Link]
ds = [Link](['month','day'], axis=1)
# Check missing values
print([Link]().sum())
# Split data
x = [Link][:,:7].values
y = [Link][:,7].values
from sklearn.model_selection import train_test_split
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.20, random_state=0)
# Model building
from [Link] import RandomForestRegressor
regressor = RandomForestRegressor(n_estimators=100, random_state=0)
[Link](x_train, y_train)
# Prediction
ypred = [Link](x_test)
print(ypred)
# Evaluate
from [Link] import r2_score
print("R2 Score:", r2_score(y_test, ypred))
# Visualisation of prediction (Actual vs Predicted)
[Link](figsize=(6,4))
[Link](y_test, ypred, color='blue')
[Link]("Actual Rainfall")
[Link]("Predicted Rainfall")
[Link]("Actual vs Predicted Annual Rainfall")
[Link]()
# Simple Line Plot to show prediction trend
[Link](y_test[:50], label="Actual")
[Link](ypred[:50], label="Predicted")
[Link]("Samples")
[Link]("Rainfall")
[Link]("Annual Rainfall Prediction Trend")
[Link]()
[Link]()
3. Store Sales. Time Series Forecast & Visualization using Machine Learning.
import pandas as pd
import seaborn as sns
import [Link] as plt
pd.read_csv("[Link]")
print(dataset)
[Link](dataset) [Link]()
[Link]()
4. Rainfall prediction using Regression
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from [Link] import StandardScaler
from sklearn.linear_model import LinearRegression
from [Link] import mean_absolute_error, mean_squared_error
import [Link] as plt
df = pd.read_csv("[Link]
df = [Link](columns=["DewPointHighF","DewPointLowF","Unnamed: 0"])
input_ds = [Link](columns=["PrecipitationSumInches"])
output_ds = df["PrecipitationSumInches"]
input_train, input_test, output_train, output_test = train_test_split(input_ds, output_ds, test_size=0.2,
random_state=42)
sc = StandardScaler()
input_train = sc.fit_transform(input_train)
input_test = [Link](input_test)
model = LinearRegression()
[Link](input_train, output_train)
y_pred = [Link](input_test)
print("MAE:", round(mean_absolute_error(output_test, y_pred),2))
print("RMSE:", round([Link](mean_squared_error(output_test, y_pred)),2))
[Link](figsize=(6,6))
[Link](output_test, y_pred)
x = [Link](0,4,5)
[Link](x, x, 'r-')
[Link]("Actual Rainfall")
[Link]("Predicted Rainfall")
[Link]("Rainfall Prediction Plot")
[Link]()
5. Rainfall prediction in Australia
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from [Link] import StandardScaler
from sklearn.linear_model import LinearRegression
from [Link] import mean_absolute_error, mean_squared_error
import [Link] as plt
df = pd.read_csv("[Link]
df_proc = pd.get_dummies(df, columns=["RainToday","WindGustDir","WindDir9am","WindDir3pm"])
df_proc.replace(['No','Yes'],[0,1],inplace=True)
df_proc.drop(columns=['Date'], inplace=True)
df_proc = df_proc.astype(float)
X = df_proc.drop(columns="Rainfall")
y = df_proc["Rainfall"]
x_train, x_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
sc = StandardScaler()
x_train = sc.fit_transform(x_train)
x_test = [Link](x_test)
model = LinearRegression()
[Link](x_train, y_train)
y_pred = [Link](x_test)
print("MAE:", round(mean_absolute_error(y_test, y_pred),2))
print("RMSE:", round([Link](mean_squared_error(y_test, y_pred)),2))
[Link](figsize=(6,6))
[Link](y_test, y_pred)
x = [Link](0, max(y_test.max(), y_pred.max()), 5)
[Link](x, x, 'r-')
[Link]("Actual Rainfall")
[Link]("Predicted Rainfall")
[Link]("Rainfall Prediction Plot")
[Link]()
6. Autism detection Experiment
import pandas as pd
import numpy as np
import [Link] as plt
import seaborn as sns
df =
pd.read_csv("[Link]
csv")
df = [Link](columns={'Class/ASD Traits ':
'ASD','A1':'Q1','A2':'Q2','A3':'Q3','A4':'Q4','A5':'Q5','A6':'Q6','A7':'Q7','A8':'Q8','A9':'Q9','A10':'Q10','Age_Mons':'Age in
Months','Sex':'Gender','Qchat-10-Score':'Score out of 10','Ethnicity':'Region'})
x = [Link](['Case_No','ASD'], axis=1)
y = df['ASD']
from sklearn.model_selection import train_test_split
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.2, random_state=36)
obj_cols = x_train.select_dtypes(include='object').columns
float_cols = x_train.select_dtypes(include='int64').columns
from [Link] import LabelEncoder
le = LabelEncoder()
[Link](y_train)
y_train_processed = [Link](y_train)
y_test_processed = [Link](y_test)
from [Link] import OrdinalEncoder
oe = OrdinalEncoder(categories=[x_train[i].unique() for i in obj_cols])
[Link](x_train[obj_cols])
x_train_cat_encoded = [Link](x_train[obj_cols])
x_test_cat_encoded = [Link](x_test[obj_cols])
from [Link] import MinMaxScaler
scaler = MinMaxScaler()
[Link](x_train[float_cols])
x_train_float_encoded = [Link](x_train[float_cols])
x_test_float_encoded = [Link](x_test[float_cols])
x_train_processed = [Link]((x_train_cat_encoded, x_train_float_encoded))
x_test_processed = [Link]((x_test_cat_encoded, x_test_float_encoded))
from sklearn.linear_model import LogisticRegression
from [Link] import accuracy_score, confusion_matrix
lr = LogisticRegression()
[Link](x_train_processed, y_train_processed)
y_pred = [Link](x_test_processed)
acc = accuracy_score(y_test_processed, y_pred)
cm = confusion_matrix(y_test_processed, y_pred)
feature_dict = dict(zip([Link], list(lr.coef_[0])))
feature_df = [Link](feature_dict, index=[0])
feature_df.[Link](title="Feature Importance", legend=False)
cm_df = [Link](cm, columns=['Predicted No','Predicted Yes'], index=['Actual No','Actual Yes'])
[Link](cm_df, annot=True, cmap='Spectral')
[Link]()
Q7. Cancer Cell Detection
import sklearn
from [Link] import load_breast_cancer
data = load_breast_cancer()
label_names = data['target_names']
labels = data['target']
feature_names = data['feature_names']
features = data['data']
print(label_names)
print(labels)
print(feature_names)
print(features)
from sklearn.model_selection import train_test_split
train, test, train_labels, test_labels = train_test_split(features, labels, test_size=0.33, random_state=42)
from sklearn.naive_bayes import GaussianNB
gnb = GaussianNB()
model = [Link](train, train_labels)
predictions = [Link](test)
print(predictions)
from [Link] import accuracy_score
print(accuracy_score(test_labels, predictions))
Q8. . Breast Cancer detection
import numpy as np
import [Link] as plt
import pandas as pd
import seaborn as sns
from [Link] import load_breast_cancer
cancer = load_breast_cancer()
print(cancer)
print(cancer['target'])
print(cancer['feature_names'])
print(cancer['data'].shape)
df_cancer = [Link](
np.c_[cancer['data'], cancer['target']],
columns = [Link](cancer['feature_names'], ['target'])
)
print(df_cancer.head())
print(df_cancer.tail())
[Link](df_cancer , vars =[
'mean radius','mean texture','mean perimeter','mean area',
'mean smoothness','mean compactness','mean concavity',
'mean concave points','mean symmetry','mean fractal dimension',
'radius error','texture error','perimeter error','area error',
'smoothness error','compactness error','concavity error',
'concave points error','symmetry error','fractal dimension error',
'worst radius','worst texture','worst perimeter','worst area',
'worst smoothness','worst compactness','worst concavity',
'worst concave points','worst symmetry','worst fractal dimension'
])
[Link]()
9. Diabetes Prediction using ML model
import numpy as np
import pandas as pd
from [Link] import LocalOutlierFactor
from [Link] import RobustScaler
from sklearn.model_selection import KFold, cross_val_score
from sklearn.linear_model import LogisticRegression
from [Link] import KNeighborsClassifier
from [Link] import DecisionTreeClassifier
from [Link] import RandomForestClassifier, GradientBoostingClassifier
from [Link] import SVC
from lightgbm import LGBMClassifier
import [Link] as plt
import warnings
[Link]("ignore")
df = pd.read_csv("[Link]")
df[['Glucose','BloodPressure','SkinThickness','Insulin','BMI']] =
df[['Glucose','BloodPressure','SkinThickness','Insulin','BMI']].replace(0,[Link])
def median_target(var):
temp = df[df[var].notnull()]
return [Link]("Outcome")[[var]].median().reset_index()
cols = [Link]("Outcome")
for i in cols:
[Link][(df['Outcome']==0)&(df[i].isnull()), i] = median_target(i)[i][0]
[Link][(df['Outcome']==1)&(df[i].isnull()), i] = median_target(i)[i][1]
lof = LocalOutlierFactor(n_neighbors=10)
scores = lof.fit_predict(df)
df = df[scores == 1]
NewBMI = [Link](["Underweight","Normal","Overweight","Obesity 1","Obesity 2","Obesity 3"])
df["NewBMI"] = NewBMI
[Link][df["BMI"] < 18.5, "NewBMI"] = NewBMI[0]
[Link][(df["BMI"] > 18.5)&(df["BMI"] <= 24.9), "NewBMI"] = NewBMI[1]
[Link][(df["BMI"] > 24.9)&(df["BMI"] <= 29.9), "NewBMI"] = NewBMI[2]
[Link][(df["BMI"] > 29.9)&(df["BMI"] <= 34.9), "NewBMI"] = NewBMI[3]
[Link][(df["BMI"] > 34.9)&(df["BMI"] <= 39.9), "NewBMI"] = NewBMI[4]
[Link][df["BMI"] > 39.9, "NewBMI"] = NewBMI[5]
def set_insulin(row):
return "Normal" if 16 <= row["Insulin"] <= 166 else "Abnormal"
df["NewInsulinScore"] = [Link](set_insulin, axis=1)
NewGlucose = [Link](["Low","Normal","Overweight","Secret","High"])
df["NewGlucose"] = NewGlucose
[Link][df["Glucose"] <= 70, "NewGlucose"] = NewGlucose[0]
[Link][(df["Glucose"] > 70)&(df["Glucose"] <= 99), "NewGlucose"] = NewGlucose[1]
[Link][(df["Glucose"] > 99)&(df["Glucose"] <= 126), "NewGlucose"] = NewGlucose[2]
[Link][df["Glucose"] > 126, "NewGlucose"] = NewGlucose[3]
df = pd.get_dummies(df, columns=["NewBMI","NewInsulinScore","NewGlucose"], drop_first=True)
y = df["Outcome"]
X = [Link](["Outcome"], axis=1)
numeric = X.select_dtypes(include=[[Link]])
scaler = RobustScaler().fit(numeric)
X[[Link]] = [Link](numeric)
models = []
[Link](('LR', LogisticRegression()))
[Link](('KNN', KNeighborsClassifier()))
[Link](('CART', DecisionTreeClassifier()))
[Link](('RF', RandomForestClassifier()))
[Link](('SVM', SVC(gamma='auto')))
[Link](('XGB', GradientBoostingClassifier()))
[Link](("LightGBM", LGBMClassifier()))
results = []
names = []
for name, model in models:
kfold = KFold(n_splits=10, shuffle=True, random_state=42)
cv_results = cross_val_score(model, X, y, cv=10, scoring="accuracy")
[Link](cv_results)
[Link](name)
[Link](results)
[Link](range(1, len(names) + 1), names)
[Link]("Algorithm Comparison")
[Link]()
10. Logistic Regression Classsification model to predict probable rainfall in any area. Design and error ananysis.
import pandas as pd
import numpy as np
import [Link] as plt
import seaborn as sns
from sklearn.model_selection import train_test_split
from [Link] import MinMaxScaler
from sklearn.linear_model import LogisticRegression
from [Link] import accuracy_score, confusion_matrix, classification_report, roc_curve, roc_auc_score
import category_encoders as ce
df=pd.read_csv("[Link]")
[Link](['RISK_MM'],axis=1,inplace=True)
df['Date']=pd.to_datetime(df['Date'])
df['Year']=df['Date'].[Link]
df['Month']=df['Date'].[Link]
df['Day']=df['Date'].[Link]
[Link]('Date',axis=1,inplace=True)
categorical=[col for col in [Link] if df[col].dtype=='O']
numerical=[col for col in [Link] if df[col].dtype!='O']
for col in numerical:df[col].fillna(df[col].median(),inplace=True)
for col in categorical:df[col].fillna(df[col].mode()[0],inplace=True)
def max_value(df3,variable,top):return [Link](df3[variable]>top,top,df3[variable])
df['Rainfall']=max_value(df,'Rainfall',3.2)
df['Evaporation']=max_value(df,'Evaporation',21.8)
df['WindSpeed9am']=max_value(df,'WindSpeed9am',55)
df['WindSpeed3pm']=max_value(df,'WindSpeed3pm',57)
X=[Link](['RainTomorrow'],axis=1)
y=df['RainTomorrow']
X_train,X_test,y_train,y_test=train_test_split(X,y,test_size=0.2,random_state=0)
encoder=[Link](cols=['RainToday'])
X_train=encoder.fit_transform(X_train)
X_test=[Link](X_test)
X_train=[Link]([X_train[numerical],X_train[['RainToday_0','RainToday_1']],pd.get_dummies(X_train.Location),pd.get_dum
mies(X_train.WindGustDir),pd.get_dummies(X_train.WindDir9am),pd.get_dummies(X_train.WindDir3pm)],axis=1)
X_test=[Link]([X_test[numerical],X_test[['RainToday_0','RainToday_1']],pd.get_dummies(X_test.Location),pd.get_dummies(
X_test.WindGustDir),pd.get_dummies(X_test.WindDir9am),pd.get_dummies(X_test.WindDir3pm)],axis=1)
cols=X_train.columns
scaler=MinMaxScaler()
X_train=scaler.fit_transform(X_train)
X_test=[Link](X_test)
X_train=[Link](X_train,columns=[cols])
X_test=[Link](X_test,columns=[cols])
logreg=LogisticRegression(solver='liblinear',random_state=0)
[Link](X_train,y_train)
y_pred_test=[Link](X_test)
cm=confusion_matrix(y_test,y_pred_test)
[Link]([Link](cm),annot=True,fmt='d')
[Link]()
print(classification_report(y_test,y_pred_test))
y_pred1=logreg.predict_proba(X_test)[:,1]
fpr,tpr,thresholds=roc_curve(y_test,y_pred1,pos_label='Yes')
[Link](fpr,tpr)
[Link]([0,1],[0,1],'k--')
[Link]("False Positive Rate")
[Link]("True Positive Rate")
[Link]("ROC Curve")
[Link]()
print(roc_auc_score(y_test,y_pred1))
11. Sales project Linear Regression.
Same as question 3
12. Amazon sales prediction using Linear Regression analysis and machine Learning Model.
import pandas as pd
import numpy as np
import [Link] as plt
import seaborn as sns
customers=pd.read_csv("Ecommerce [Link]")
[Link](data=customers,x=customers['Time on Website'],y=customers['Yearly Amount Spent'])
[Link](data=customers,x=customers['Time on App'],y=customers['Yearly Amount Spent'])
[Link](data=customers,x=customers['Time on App'],y=customers['Yearly Amount Spent'],kind='hex')
[Link](customers)
[Link](x='Yearly Amount Spent',y='Length of Membership',data=customers)
y=customers['Yearly Amount Spent']
X=customers[['Avg. Session Length','Time on App','Time on Website','Length of Membership']]
from sklearn.model_selection import train_test_split
X_train,X_test,y_train,y_test=train_test_split(X,y,test_size=0.3,random_state=101)
from sklearn.linear_model import LinearRegression
lm=LinearRegression()
[Link](X_train,y_train)
predictions=[Link](X_test)
[Link](y_test,predictions)
[Link]('Y Test')
[Link]('Predicted Y')
from sklearn import metrics
metrics.mean_absolute_error(y_test,predictions)
metrics.mean_squared_error(y_test,predictions)
[Link](metrics.mean_squared_error(y_test,predictions))
[Link]((y_test-predictions),bins=50)
coeffecients=[Link](lm.coef_,[Link])
[Link]=['Coeffecient']
coeffecients
13. Multiple Linear Regression using advertisement TV radio newspaper sales
import pandas as pd
import numpy as np
import seaborn as sns
import [Link] as plt
df=pd.read_csv("[Link]")
[Link](df,x_vars=["TV","radio","newspaper"],y_vars="sales",height=5,aspect=0.7)
from sklearn.linear_model import LinearRegression
X=df[["TV","radio","newspaper"]]
y=df["sales"]
lm=LinearRegression()
[Link](X,y)
lm.intercept_
lm.coef_
[Link]([Link](),annot=True)
from [Link] import r2_score
lm2=LinearRegression()
[Link](df[["TV","radio"]],y)
r2_score(y,[Link](df[["TV","radio"]]))
from sklearn.model_selection import train_test_split
from [Link] import mean_squared_error
X=[Link]("sales",axis=1)
y=df["sales"]
x_train,x_test,y_train,y_test=train_test_split(X,y,random_state=1)
lm3=LinearRegression().fit(x_train,y_train)
[Link](mean_squared_error(y_test,[Link](x_test)))
r2_score(y_test,[Link](x_test))
14. Multiple Linear Regression for Predicting "Petrol Consumption"
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from [Link] import StandardScaler
from sklearn.linear_model import LinearRegression
from [Link] import r2_score
df=pd.read_csv("petrol_consumption.csv")
X=df[['Petrol_tax','Average_income','Population_Driver_licence(%)']]
y=df['Petrol_Consumption']
x_train,x_test,y_train,y_test=train_test_split(X,y,test_size=0.2,random_state=200)
sc=StandardScaler()
x_train=sc.fit_transform(x_train)
x_test=[Link](x_test)
model=LinearRegression()
[Link](x_train,y_train)
pred=[Link](x_test)
r2_score(y_test,pred)
15. Logistic Handwritten digit recognition
from [Link] import load_digits
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn import metrics
import numpy as np
import [Link] as plt
import seaborn as sns
digits=load_digits()
x_train,x_test,y_train,y_test=train_test_split([Link],[Link],test_size=0.23,random_state=2)
model=LogisticRegression(solver='lbfgs',max_iter=10000)
[Link](x_train,y_train)
pred=[Link](x_test)
score=[Link](x_test,y_test)
cm=metrics.confusion_matrix(y_test,pred)
[Link](cm,annot=True)
16. Draw a ROC-AUC curve using Logistic Regression and classify the survived passangers from Titanic
dataset.
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from [Link] import roc_curve, auc
import [Link] as plt
titanic = pd.read_csv('titanic_train.csv')
titanic['Age'] = titanic[['Age','Pclass']].apply(lambda x:
titanic[titanic['Pclass']==x[1]]['Age'].mean() if [Link](x[0]) else x[0], axis=1)
titanic = [Link](['Cabin'], axis=1)
titanic = [Link]()
sex = pd.get_dummies(titanic['Sex'], drop_first=True)
emb = pd.get_dummies(titanic['Embarked'], drop_first=True)
titanic = [Link]([titanic, sex, emb], axis=1)
titanic = [Link](['Name','PassengerId','Ticket','Sex','Embarked'], axis=1)
y = titanic['Survived']
X = [Link]('Survived', axis=1)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3)
model = LogisticRegression(max_iter=2000)
[Link](X_train, y_train)
y_prob = model.predict_proba(X_test)[:,1]
fpr, tpr, th = roc_curve(y_test, y_prob)
[Link](fpr, tpr)
[Link]('False Positive Rate')
[Link]('True Positive Rate')
[Link]('ROC Curve')
[Link]()
print(auc(fpr, tpr))
17. Using Binary and Multiclass Logistic Regression Classification classify the probability of heart attack in
youngsters of India
Binary Logistic Regression
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from [Link] import classification_report
df=pd.read_csv("heart_attack_youngsters_india.csv")
df['Gender']=df['Gender'].replace({'Male':0,'Female':1})
df['Heart Attack Likelihood']=df['Heart Attack Likelihood'].replace({'No':0,'Yes':1})
X=df[['Age','Gender']]
y=df['Heart Attack Likelihood']
x_train,x_test,y_train,y_test=train_test_split(X,y,test_size=0.2)
model=LogisticRegression(max_iter=5000)
[Link](x_train,y_train)
pred=[Link](x_test)
print(classification_report(y_test,pred))
Multiclass Logistic Regression
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from [Link] import classification_report
df=pd.read_csv("heart_attack_youngsters_india.csv")
df['Smoking Status']=df['Smoking Status'].replace({'Occasionally':1,'Regularly':2,'Never':3})
X=df[['Age']]
y=df['Smoking Status']
x_train,x_test,y_train,y_test=train_test_split(X,y,test_size=0.2)
model=LogisticRegression(max_iter=5000)
[Link](x_train,y_train)
pred=[Link](x_test)
print(classification_report(y_test,pred))
18. Using the Binary Logistic classifier calculate probability of detecting Diabetis and calculate probability of
existing diffrent Penguin species using Multiclass Logistic Regression.
Binary Logistic Regression – Probability of Detecting Diabetes
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
df = pd.read_csv("[Link]")
X = df[['Pregnancies','PlasmaGlucose','DiastolicBloodPressure','TricepsThickness','SerumInsulin','BMI','DiabetesPedigree','Age']]
y = df['Diabetic']
x_train,x_test,y_train,y_test = train_test_split(X,y,test_size=0.3)
model = LogisticRegression(max_iter=5000)
[Link](x_train,y_train)
prob_diabetes = model.predict_proba(x_test)
print(prob_diabetes)
Multiclass Logistic Regression – Probability of Penguin Species
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
penguins = pd.read_csv("[Link]")
penguins = [Link]()
X = penguins[['CulmenLength','CulmenDepth','FlipperLength','BodyMass']]
y = penguins['Species']
x_train,x_test,y_train,y_test = train_test_split(X,y,test_size=0.3)
multi_model = LogisticRegression(max_iter=5000,multi_class='auto')
multi_model.fit(x_train,y_train)
prob_penguins = multi_model.predict_proba(x_test)
print(prob_penguins)
19. Classify Iris Dataset using SVM with different types of Kernels.
from sklearn import datasets
from [Link] import SVC
import numpy as np
import [Link] as plt
iris = datasets.load_iris()
X = [Link][:, :2]
y = [Link]
def plot_svm(model, title):
[Link](X, y)
x_min, x_max = X[:, 0].min() - 1, X[:, 0].max() + 1
y_min, y_max = X[:, 1].min() - 1, X[:, 1].max() + 1
h = 0.02
xx, yy = [Link]([Link](x_min, x_max, h),
[Link](y_min, y_max, h))
Z = [Link](np.c_[[Link](), [Link]()]).reshape([Link])
[Link](xx, yy, Z, alpha=0.4)
[Link](X[:, 0], X[:, 1], c=y)
[Link]('Sepal Length')
[Link]('Sepal Width')
[Link](title)
[Link]()
plot_svm(SVC(kernel='linear'), 'SVM - Linear Kernel')
plot_svm(SVC(kernel='poly', degree=3), 'SVM - Polynomial Kernel')
plot_svm(SVC(kernel='rbf'), 'SVM - RBF Kernel')
plot_svm(SVC(kernel='sigmoid'), 'SVM - Sigmoid Kernel')
20. Build a single layer perceptron using tensorflow in neural network Experiment
import numpy as np
import tensorflow as tf
from tensorflow import keras
import [Link] as plt
(x_train, y_train), (x_test, y_test) = [Link].load_data()
[Link](x_train[0])
x_train = x_train / 255
x_test = x_test / 255
x_train_flat = x_train.reshape(len(x_train), 784)
x_test_flat = x_test.reshape(len(x_test), 784)
model = [Link]([
[Link](10, input_shape=(784,), activation='sigmoid')
])
[Link](optimizer='adam',
loss='sparse_categorical_crossentropy',
metrics=['accuracy'])
[Link](x_train_flat, y_train, epochs=5)
[Link](x_test_flat, y_test)
21. Show different activation functions using Neural Network.
import numpy as np
import [Link] as plt
x = [Link](-5, 5, 100)
[Link](x, 1 / (1 + [Link](-x)), label='Sigmoid')
[Link](x, [Link](x), label='tanh')
[Link](x, [Link](0, x), label='ReLU')
[Link](x, x, label='Identity')
[Link](x, [Link](x) / [Link]([Link](x)), label='Softmax')
[Link]('Input')
[Link]('Activation')
[Link]('Activation Functions')
[Link]()
[Link](True)
[Link]()
22. Implement Logistic Regression by using a model built of ANN (Artificial Neural Network).
import numpy as np
from [Link] import cifar10
from [Link] import Sequential
from [Link] import Flatten, Dense
(X_train_full, y_train_full), (X_test_full, y_test_full) = cifar10.load_data()
y_train_full = y_train_full.flatten()
y_test_full = y_test_full.flatten()
CAT = 3
y_train = [Link](y_train_full == CAT, 1, 0)
y_test = [Link](y_test_full == CAT, 1, 0)
X_train = X_train_full.astype("float32") / 255
X_test = X_test_full.astype("float32") / 255
model = Sequential([
Flatten(input_shape=(32,32,3)),
Dense(1, activation='sigmoid')
])
[Link](optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])
[Link](X_train, y_train, epochs=10, batch_size=64)
[Link](X_test, y_test)
23. Implement SVM, Decision tree and Random forest classifier on Iris dataset
from [Link] import load_iris
from sklearn.model_selection import train_test_split
from [Link] import SVC
from [Link] import DecisionTreeClassifier
from [Link] import RandomForestClassifier
from [Link] import accuracy_score, confusion_matrix
data = load_iris()
X = [Link]
y = [Link]
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
svm_model = SVC(kernel='linear')
dt_model = DecisionTreeClassifier()
rf_model = RandomForestClassifier()
svm_model.fit(X_train, y_train)
dt_model.fit(X_train, y_train)
rf_model.fit(X_train, y_train)
svm_pred = svm_model.predict(X_test)
dt_pred = dt_model.predict(X_test)
rf_pred = rf_model.predict(X_test)
print("SVM Accuracy:", accuracy_score(y_test, svm_pred))
print(confusion_matrix(y_test, svm_pred))
print("Decision Tree Accuracy:", accuracy_score(y_test, dt_pred))
print(confusion_matrix(y_test, dt_pred))
print("Random Forest Accuracy:", accuracy_score(y_test, rf_pred))
print(confusion_matrix(y_test, rf_pred))
24. Use the machine learning classifiers SVM, Random Forest, Decision Tree and Naive Bayes using same
training and testing datasets in python and calculate accuracy of each of them separately.
import pandas as pd
from sklearn.model_selection import train_test_split
from [Link] import LabelEncoder
from [Link] import SVC
from [Link] import DecisionTreeClassifier
from [Link] import RandomForestClassifier
from sklearn.naive_bayes import GaussianNB
from [Link] import accuracy_score
df = pd.read_csv("Training_new.csv").drop(["Unnamed: 133"], axis=1)
X = [Link]("prognosis", axis=1)
y = LabelEncoder().fit_transform(df["prognosis"])
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3)
models = [
("SVM", SVC(kernel='linear')),
("Decision Tree", DecisionTreeClassifier()),
("Random Forest", RandomForestClassifier()),
("Naive Bayes", GaussianNB())
]
for name, model in models:
[Link](X_train, y_train)
pred = [Link](X_test)
print(name, "Accuracy:", accuracy_score(y_test, pred))
25. Build an user operated box using symptoms to identify diseases using different classifiers. Experiment
import pandas as pd
from sklearn.model_selection import train_test_split
from [Link] import DecisionTreeClassifier
from [Link] import RandomForestClassifier
from sklearn.naive_bayes import GaussianNB
from [Link] import accuracy_score
df = pd.read_csv("[Link]")
df_test = pd.read_csv("[Link]")
[Link]({'prognosis':{
'Fungal infection':0,'Allergy':1,'GERD':2,'Chronic cholestasis':3,'Drug Reaction':4,
'Peptic ulcer diseae':5,'AIDS':6,'Diabetes ':7,'Gastroenteritis':8,'Bronchial Asthma':9,
'Hypertension ':10,'Migraine':11,'Cervical spondylosis':12,'Paralysis (brain hemorrhage)':13,
'Jaundice':14,'Malaria':15,'Chicken pox':16,'Dengue':17,'Typhoid':18,'hepatitis A':19,
'Hepatitis B':20,'Hepatitis C':21,'Hepatitis D':22,'Hepatitis E':23,'Alcoholic hepatitis':24,
'Tuberculosis':25,'Common Cold':26,'Pneumonia':27,'Dimorphic hemmorhoids(piles)':28,
'Heart attack':29,'Varicose veins':30,'Hypothyroidism':31,'Hyperthyroidism':32,
'Hypoglycemia':33,'Osteoarthristis':34,'Arthritis':35,'(vertigo) Paroymsal Positional Vertigo':36,
'Acne':37,'Urinary tract infection':38,'Psoriasis':39,'Impetigo':40}}, inplace=True)
df_test.replace([Link], inplace=True)
X = [Link]('prognosis', axis=1)
y = df['prognosis']
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.3, random_state=42)
dt = DecisionTreeClassifier()
[Link](X_train, y_train)
dt_pred = [Link](X_val)
print("Decision Tree Accuracy:", accuracy_score(y_val, dt_pred))
rf = RandomForestClassifier()
[Link](X_train, y_train)
rf_pred = [Link](X_val)
print("Random Forest Accuracy:", accuracy_score(y_val, rf_pred))
nb = GaussianNB()
[Link](X_train, y_train)
nb_pred = [Link](X_val)
print("Naive Bayes Accuracy:", accuracy_score(y_val, nb_pred))
symptoms = ['fever','headache','nausea','fatigue']
l1 = list([Link])
input_sym = [1 if s in symptoms else 0 for s in l1]
print("DT Prediction:", [Link]([input_sym])[0])
print("RF Prediction:", [Link]([input_sym])[0])
print("NB Prediction:", [Link]([input_sym])[0])
26. Build the logic gates using single layer perceptron in ANN Experiment
import numpy as np
from [Link] import Sequential
from [Link] import Dense
model = Sequential([Dense(1, input_dim=2, activation='sigmoid')])
[Link](optimizer='adam', loss='binary_crossentropy')
# AND Gate
X = [Link]([[0,0],[0,1],[1,0],[1,1]])
y = [Link]([0,0,0,1])
[Link](X, y, epochs=500, verbose=0)
print("AND:", [Link](X).round().flatten())
# OR Gate
y = [Link]([0,1,1,1])
[Link](X, y, epochs=500, verbose=0)
print("OR:", [Link](X).round().flatten())
# NAND Gate
y = [Link]([1,1,1,0])
[Link](X, y, epochs=500, verbose=0)
print("NAND:", [Link](X).round().flatten())
27. Build a Neural Network model from scratch using PyTorch Experiment
import torch
import [Link] as nn
import [Link] as F
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from [Link] import LabelEncoder, StandardScaler
df = pd.read_csv("[Link]")
X = df[["X1","X2"]].values
y = df["color"].values
le = LabelEncoder()
y = le.fit_transform(y)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, stratify=y)
sc = StandardScaler()
X_train = sc.fit_transform(X_train)
X_test = [Link](X_test)
X_train = [Link](X_train, dtype=torch.float32)
X_test = [Link](X_test, dtype=torch.float32)
y_train = [Link](y_train, dtype=[Link])
y_test = [Link](y_test, dtype=[Link])
class MLP([Link]):
def __init__(self):
super().__init__()
self.fc1 = [Link](2, 100)
self.fc2 = [Link](100, 3)
def forward(self, x):
x = [Link](self.fc1(x))
return self.fc2(x)
model = MLP()
loss_fn = [Link]()
opt = [Link]([Link](), lr=0.01)
for epoch in range(200):
y_pred = model(X_train)
loss = loss_fn(y_pred, y_train)
opt.zero_grad()
[Link]()
[Link]()
y_prob = [Link](model(X_test), dim=1)
y_pred = y_prob.argmax(dim=1)
acc = (y_pred == y_test).float().mean().item()
print("Test Accuracy:", acc)
28. Build a model to perform employee churn prediction using Multi-Layer Perceptron in Neural Network
using HR_comma_sep.csv dataset.
import pandas as pd
from [Link] import LabelEncoder
from sklearn.model_selection import train_test_split
from sklearn.neural_network import MLPClassifier
from [Link] import accuracy_score
data = pd.read_csv("HR_comma_sep.csv")
le = LabelEncoder()
data["salary"] = le.fit_transform(data["salary"])
data["Department"] = le.fit_transform(data["Department"])
X = data[['satisfaction_level','last_evaluation','number_project',
'average_montly_hours','time_spend_company','Work_accident',
'promotion_last_5years','Department','salary']]
y = data['left']
X_train, X_test, y_train, y_test = train_test_split(
X, y, test_size=0.3, random_state=42)
clf = MLPClassifier(hidden_layer_sizes=(6,5),
learning_rate_init=0.01,
random_state=5)
[Link](X_train, y_train)
y_pred = [Link](X_test)
print("Accuracy:", accuracy_score(y_test, y_pred))