1.
import pandas as pd
# Sample dataset
data = {
'Location': ['Jubilee Hills', 'Banjara Hills', 'Madhapur', 'Gachibowli', 'Kondapur',
'Kukatpally', 'Ameerpet', 'Secunderabad', 'Begumpet', 'Hitech City'],
'Size (sqft)': [2000, 2500, 1800, 2200, 1600, 1700, 1900, 2400, 2100, 2300],
'Bedrooms': [3, 4, 3, 4, 2, 3, 3, 4, 3, 4],
'Bathrooms': [2, 3, 2, 3, 2, 2, 2, 3, 2, 3],
'Year Built': [2010, 2015, 2012, 2018, 2008, 2011, 2013, 2017, 2016, 2019],
'Price (INR)': [12000000, 15000000, 10000000, 14000000, 9000000, 9500000,
11000000, 13000000, 12500000, 15500000],
'Distance to City Center (km)': [8.5, 7.0, 10.0, 12.0, 14.0, 15.5, 9.0, 16.0, 11.0,
13.0]
}
df = [Link](data)
# Check for missing values
print([Link]().sum())
# Print the number of rows and columns
print([Link])
import [Link] as plt
import seaborn as sns
# Plot box plot
[Link](figsize=(10, 6))
[Link](x='Location', y='Price (INR)', data=df)
[Link](rotation=45)
[Link]('Box Plot of Property Prices by Location')
[Link]()
# Plot heat map
[Link](figsize=(10, 6))
[Link](df.select_dtypes(include='number').corr(), annot=True,
cmap='coolwarm')
[Link]('Correlation Heat Map')
[Link]()
# Plot scatter plot
[Link](figsize=(10, 6))
[Link](x='Size (sqft)', y='Price (INR)', hue='Location', data=df,
palette='viridis')
[Link]('Scatter Plot of Size vs Price')
[Link]()
# Plot bubble chart
[Link](figsize=(10, 6))
[Link](df['Size (sqft)'], df['Price (INR)'], s=df['Bedrooms']*100, alpha=0.5)
[Link]('Size (sqft)')
[Link]('Price (INR)')
[Link]('Bubble Chart of Size vs Price')
[Link]()
# Plot area chart
[Link](figsize=(10, 6))
df.sort_values('Year Built').plot(kind='area', x='Year Built', y='Price (INR)',
alpha=0.5)
[Link]('Area Chart of Property Prices Over Years')
[Link]()
2. import pandas as pd
import numpy as np
import [Link] as plt
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from [Link] import mean_squared_error, r2_score
# Load your dataset from a CSV file
df = pd.read_csv(r"C:\Users\harik\Downloads\[Link]") # Replace
'your_dataset.csv' with the path to your CSV file
# a. Finding missing data
print([Link]().sum())
df = [Link]() # Drop rows with missing values if any
# b. Splitting training and test data
X = df[['sugarpercent']] # Replace 'Your Feature Column' with the name of your
feature column
y = df['pricepercent'] # Replace 'Your Target Column' with the name of your
target column
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2,
random_state=42)
# c. Evaluate the model (intercept and slope)
model = LinearRegression()
[Link](X_train, y_train)
intercept = model.intercept_
coefficients = model.coef_
print(f'Intercept: {intercept}')
print(f'Coefficients: {coefficients}')
# d. Visualize the training set and testing set
[Link](figsize=(10, 6))
[Link](X_train, y_train, color='blue', label='Training data')
[Link](X_train, [Link](X_train), color='red', label='Linear Regression
Line')
[Link]('Training set')
[Link]('sugarpercent') # Replace 'Your Feature Column' with your feature
column name
[Link]('pricepercent') # Replace 'Your Target Column' with your target
column name
[Link]()
[Link]()
[Link](figsize=(10, 6))
[Link](X_test, y_test, color='green', label='Testing data')
[Link](X_train, [Link](X_train), color='red', label='Linear Regression
Line')
[Link]('Testing set')
[Link]('sugarpercent') # Replace 'Your Feature Column' with your feature
column name
[Link]('pricepercent') # Replace 'Your Target Column' with your target
column name
[Link]()
[Link]()
# e. Predict the test set result
y_pred = [Link](X_test)
# f. Compare actual output value with predicted values
comparison = [Link]({'Actual': y_test, 'Predicted': y_pred})
print(comparison)
# Calculate and print the evaluation metrics
mse = mean_squared_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)
print(f'Mean Squared Error: {mse}')
print(f'R^2 Score: {r2}')
3. import pandas as pd
import numpy as np
import [Link] as plt
from sklearn.model_selection import train_test_split
from [Link] import mean_squared_error, r2_score
# Load your dataset from a CSV file
df = pd.read_csv(r"C:\Users\harik\Downloads\[Link]") # Replace
'your_dataset.csv' with the path to your CSV file
# a. Finding missing data
print([Link]().sum())
df = [Link]() # Drop rows with missing values if any
# b. Splitting training and test data
X = df[['pricepercent']].values # Replace 'Your Feature Column' with the name of
your feature column
y = df['winpercent'].values # Replace 'Your Target Column' with the name of
your target column
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2,
random_state=42)
# c. Evaluate the model (intercept and slope) using Gradient Descent
class LinearRegressionGD:
def _init_(self, learning_rate=0.01, n_iterations=1000):
self.learning_rate = learning_rate
self.n_iterations = n_iterations
self.intercept_ = None
self.coef_ = None
def fit(self, X, y):
X = [Link](X, 0, 1, axis=1) # Add bias (intercept term)
[Link] = [Link]([Link][1])
m = len(y)
for _ in range(self.n_iterations):
gradients = 1/m * [Link]([Link]([Link]) - y)
[Link] -= self.learning_rate * gradients
self.intercept_ = [Link][0]
self.coef_ = [Link][1:]
def predict(self, X):
X = [Link](X, 0, 1, axis=1) # Add bias (intercept term)
return [Link]([Link])
model = LinearRegressionGD(learning_rate=0.01, n_iterations=1000)
[Link](X_train, y_train)
intercept = model.intercept_
coefficients = model.coef_
print(f'Intercept: {intercept}')
print(f'Coefficients: {coefficients}')
# d. Visualize the training set and testing set
[Link](figsize=(10, 6))
[Link](X_train, y_train, color='blue', label='Training data')
[Link](X_train, [Link](X_train), color='red', label='Linear Regression
Line')
[Link]('Training set')
[Link]('pricepercent') # Replace 'Your Feature Column' with your feature
column name
[Link]('winpercent') # Replace 'Your Target Column' with your target column
name
[Link]()
[Link]()
[Link](figsize=(10, 6))
[Link](X_test, y_test, color='green', label='Testing data')
[Link](X_train, [Link](X_train), color='red', label='Linear Regression
Line')
[Link]('Testing set')
[Link]('pricepercent') # Replace 'Your Feature Column' with your feature
column name
[Link]('winpercent') # Replace 'Your Target Column' with your target column
name
[Link]()
[Link]()
# e. Predict the test set result
y_pred = [Link](X_test)
# f. Compare actual output value with predicted values
comparison = [Link]({'Actual': y_test, 'Predicted': y_pred})
print(comparison)
# Calculate and print the evaluation metrics
mse = mean_squared_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)
print(f'Mean Squared Error: {mse}')
print(f'R^2 Score: {r2}')
4. import pandas as pd
import numpy as np
import [Link] as plt
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from [Link] import mean_squared_error, r2_score
# Load your dataset from a CSV file
df = pd.read_csv(r"C:\Users\harik\Downloads\[Link]") # Replace
'your_dataset.csv' with the path to your CSV file
# a. Finding missing data
print([Link]().sum())
df = [Link]() # Drop rows with missing values if any
# b. Splitting training and test data
X = df[['sugarpercent']] # Replace 'Your Feature Column' with the name of your
feature column
y = df['winpercent'] # Replace 'Your Target Column' with the name of your target
column
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2,
random_state=42)
# c. Evaluate the model (intercept and slope)
model = LinearRegression()
[Link](X_train, y_train)
intercept = model.intercept_
coefficients = model.coef_
print(f'Intercept: {intercept}')
print(f'Coefficients: {coefficients}')
# d. Visualize the training set and testing set
[Link](figsize=(10, 6))
[Link](X_train, y_train, color='blue', label='Training data')
[Link](X_train, [Link](X_train), color='red', label='Linear Regression
Line')
[Link]('Training set')
[Link]('sugarpercent') # Replace 'Your Feature Column' with your feature
column name
[Link]('winpercent') # Replace 'Your Target Column' with your target column
name
[Link]()
[Link]()
[Link](figsize=(10, 6))
[Link](X_test, y_test, color='green', label='Testing data')
[Link](X_train, [Link](X_train), color='red', label='Linear Regression
Line')
[Link]('Testing set')
[Link]('sugarpercent') # Replace 'Your Feature Column' with your feature
column name
[Link]('winpercent') # Replace 'Your Target Column' with your target column
name
[Link]()
[Link]()
# e. Predict the test set result
y_pred = [Link](X_test)
# f. Compare actual output value with predicted values
comparison = [Link]({'Actual': y_test, 'Predicted': y_pred})
print(comparison)
# Calculate and print the evaluation metrics
mse = mean_squared_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)
print(f'Mean Squared Error: {mse}')
print(f'R^2 Score: {r2}')
5. import pandas as pd
import numpy as np
import [Link] as plt
from sklearn.model_selection import train_test_split
from [Link] import PolynomialFeatures
from sklearn.linear_model import LinearRegression
from [Link] import mean_squared_error, r2_score
# Load your dataset from a CSV file
df = pd.read_csv(r"C:\Users\harik\Downloads\MBA_ADMISSIONS.csv") #
Replace 'your_dataset.csv' with the path to your CSV file
# a. Finding missing data
print([Link]().sum())
df = [Link]() # Drop rows with missing values if any
# b. Splitting training and test data
X = df[['post_score']] # Replace 'Your Feature Column' with the name of your
feature column
y = df['Age_in_years'] # Replace 'Your Target Column' with the name of your
target column
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2,
random_state=42)
# c. Evaluate the model
poly = PolynomialFeatures(degree=2)
X_train_poly = poly.fit_transform(X_train)
X_test_poly = [Link](X_test)
model = LinearRegression()
[Link](X_train_poly, y_train)
intercept = model.intercept_
coefficients = model.coef_
print(f'Intercept: {intercept}')
print(f'Coefficients: {coefficients}')
# d. Visualize the training set and testing set
[Link](figsize=(10, 6))
[Link](X_train, y_train, color='blue', label='Training data')
[Link]([Link](X_train, axis=0), [Link]([Link](X_train_poly, axis=0)),
color='red', label='Quadratic Regression Line')
[Link]('Training set')
[Link]('post_score') # Replace 'Your Feature Column' with your feature
column name
[Link]('Age_in_years') # Replace 'Your Target Column' with your target
column name
[Link]()
[Link]()
[Link](figsize=(10, 6))
[Link](X_test, y_test, color='green', label='Testing data')
[Link]([Link](X_train, axis=0), [Link]([Link](X_train_poly, axis=0)),
color='red', label='Quadratic Regression Line')
[Link]('Testing set')
[Link]('post_score') # Replace 'Your Feature Column' with your feature
column name
[Link]('Age_in_years') # Replace 'Your Target Column' with your target
column name
[Link]()
[Link]()
# e. Predict the test set result
y_pred = [Link](X_test_poly)
# f. Compare actual output value with predicted values
comparison = [Link]({'Actual': y_test, 'Predicted': y_pred})
print(comparison)
# Calculate and print the evaluation metrics
mse = mean_squared_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)
print(f'Mean Squared Error: {mse}')
print(f'R^2 Score: {r2}')
6. import pandas as pd
# Create the dataset
data = {
'Study Hours': [1.2, 2.3, 3.1, 4.0, 5.8, 6.1, 7.4, 8.6, 9.7, 10.5, 11.3, 12.2, 2.5, 3.9,
4.8, 5.2, 6.4, 7.3, 8.9, 9.4],
'Scores': [10, 22, 30, 45, 48, 60, 74, 85, 92, 100, 105, 110, 15, 40, 50, 55, 65, 72,
88, 95]
}
# Create DataFrame
df = [Link](data)
# Find missing values
missing_values = [Link]().sum()
print("Missing values in each column:\n", missing_values)
from sklearn.model_selection import train_test_split
# Define features and target variable
X = df[['Study Hours']]
y = df['Scores']
# Split the data
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2,
random_state=42)
import [Link] as plt
# Visualize training set
[Link](figsize=(10, 6))
[Link](X_train['Study Hours'], y_train, color='blue', label='Training data')
[Link]('Training Set')
[Link]('Study Hours')
[Link]('Scores')
[Link]()
[Link]()
# Visualize testing set
[Link](figsize=(10, 6))
[Link](X_test['Study Hours'], y_test, color='green', label='Testing data')
[Link]('Testing Set')
[Link]('Study Hours')
[Link]('Scores')
[Link]()
[Link]()
from sklearn.linear_model import LogisticRegression
# Create and train the model
model = LogisticRegression()
[Link](X_train, y_train)
# Predict the test set results
y_pred = [Link](X_test)
print("Predicted values:", y_pred)
# Compare actual vs predicted values
comparison = [Link]({'Actual': y_test, 'Predicted': y_pred})
print(comparison)
7. import pandas as pd
# Create the dataset
data = {
'Study Hours': [1.2, 2.3, 3.1, 4.0, 5.8, 6.1, 7.4, 8.6, 9.7, 10.5, 11.3, 12.2, 2.5, 3.9,
4.8, 5.2, 6.4, 7.3, 8.9, 9.4],
'Scores': [10, 22, 30, 45, 48, 60, 74, 85, 92, 100, 105, 110, 15, 40, 50, 55, 65, 72,
88, 95]
}
# Create DataFrame
df = [Link](data)
# Find missing values
missing_values = [Link]().sum()
print("Missing values in each column:\n", missing_values)
from sklearn.model_selection import train_test_split
# Define features and target variable
X = df[['Study Hours']]
y = df['Scores']
# Split the data
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2,
random_state=42)
from [Link] import SVR
from [Link] import mean_squared_error
# Create and train the model
model = SVR(kernel='linear')
[Link](X_train, y_train)
# Predict the test set results
y_pred = [Link](X_test)
# Evaluate the model
mse = mean_squared_error(y_test, y_pred)
print("Mean Squared Error:", mse)
import [Link] as plt
# Visualize training set
[Link](figsize=(10, 6))
[Link](X_train['Study Hours'], y_train, color='blue', label='Training data')
[Link](X_train['Study Hours'], [Link](X_train), color='red', label='Model
prediction')
[Link]('Training Set')
[Link]('Study Hours')
[Link]('Scores')
[Link]()
[Link]()
# Visualize testing set
[Link](figsize=(10, 6))
[Link](X_test['Study Hours'], y_test, color='green', label='Testing data')
[Link](X_test['Study Hours'], y_pred, color='red', label='Model prediction')
[Link]('Testing Set')
[Link]('Study Hours')
[Link]('Scores')
[Link]()
[Link]()
# Predict the test set results
y_pred = [Link](X_test)
print("Predicted values:", y_pred)
# Compare actual vs predicted values
comparison = [Link]({'Actual': y_test, 'Predicted': y_pred})
print(comparison)
8. import pandas as pd
import numpy as np
# Create a sample dataset
data = {
'Study Hours': [1.2, 2.3, 3.1, 4.0, 5.8, 6.1, 7.4, 8.6, 9.7, 10.5, 11.3, 12.2, 2.5, 3.9,
4.8, 5.2, 6.4, 7.3, 8.9, 9.4],
'Scores': [10, 22, 30, 45, 48, 60, 74, 85, 92, 100, 105, 110, 15, 40, 50, 55, 65, 72,
88, 95]
}
# Save the dataset to a CSV file
df = [Link](data)
df.to_csv('study_scores.csv', index=False)
# Read the saved CSV file
df = pd.read_csv('study_scores.csv')
print([Link]())
from sklearn.model_selection import train_test_split
# Define features and target variable
X = df[['Study Hours']]
y = df['Scores']
# Split the data
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2,
random_state=42)
from [Link] import DecisionTreeRegressor
from [Link] import mean_squared_error
# Create and train the model
model = DecisionTreeRegressor(random_state=42)
[Link](X_train, y_train)
# Predict the test set results
y_pred = [Link](X_test)
# Evaluate the model
mse = mean_squared_error(y_test, y_pred)
print("Mean Squared Error:", mse)
from [Link] import entropy
# Calculate the entropy of the target variable
target_entropy = entropy(df['Scores'].value_counts(normalize=True), base=2)
print("Entropy of the target variable:", target_entropy)
def information_gain(df, split_attribute_name, target_name="Scores"):
# Calculate the entropy of the total dataset
total_entropy = entropy(df[target_name].value_counts(normalize=True),
base=2)
# Calculate the values and the corresponding counts for the split attribute
vals, counts = [Link](df[split_attribute_name], return_counts=True)
# Calculate the weighted entropy
weighted_entropy = sum((counts[i] / [Link](counts)) *
entropy([Link](df[split_attribute_name] ==
vals[i]).dropna()[target_name].value_counts(normalize=True), base=2) for i in
range(len(vals)))
# Calculate the information gain
information_gain = total_entropy - weighted_entropy
return information_gain
# Calculate the information gain for 'Study Hours'
info_gain = information_gain(df, 'Study Hours')
print("Information Gain for Study Hours:", info_gain)
from [Link] import plot_tree
import [Link] as plt
# Plot the decision tree
[Link](figsize=(12, 8))
plot_tree(model, feature_names=['Study Hours'], filled=True)
[Link]('Decision Tree')
[Link]()
9. import pandas as pd
# Create the dataset
data = {
'Study Hours': [1.2, 2.3, 3.1, 4.0, 5.8, 6.1, 7.4, 8.6, 9.7, 10.5, 11.3, 12.2, 2.5, 3.9,
4.8, 5.2, 6.4, 7.3, 8.9, 9.4],
'Scores': [10, 22, 30, 45, 48, 60, 74, 85, 92, 100, 105, 110, 15, 40, 50, 55, 65, 72,
88, 95]
}
# Create DataFrame
df = [Link](data)
# Find missing values
missing_values = [Link]().sum()
print("Missing values in each column:\n", missing_values)
from sklearn.model_selection import train_test_split
# Define features and target variable
X = df[['Study Hours']]
y = df['Scores']
# Split the data
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2,
random_state=42)
import [Link] as plt
# Visualize training set
[Link](figsize=(10, 6))
[Link](X_train['Study Hours'], y_train, color='blue', label='Training data')
[Link]('Training Set')
[Link]('Study Hours')
[Link]('Scores')
[Link]()
[Link]()
# Visualize testing set
[Link](figsize=(10, 6))
[Link](X_test['Study Hours'], y_test, color='green', label='Testing data')
[Link]('Testing Set')
[Link]('Study Hours')
[Link]('Scores')
[Link]()
[Link]()
from [Link] import RandomForestRegressor
# Create and train the model
model = RandomForestRegressor(n_estimators=100, random_state=42)
[Link](X_train, y_train)
# Predict the test set results
y_pred = [Link](X_test)
print("Predicted values:", y_pred)
# Compare actual vs predicted values
comparison = [Link]({'Actual': y_test, 'Predicted': y_pred})
print(comparison)
10. import pandas as pd
# Create the dataset
data = {
'Study Hours': [1.2, 2.3, 3.1, 4.0, 5.8, 6.1, 7.4, 8.6, 9.7, 10.5, 11.3, 12.2, 2.5, 3.9,
4.8, 5.2, 6.4, 7.3, 8.9, 9.4],
'Scores': [10, 22, 30, 45, 48, 60, 74, 85, 92, 100, 105, 110, 15, 40, 50, 55, 65, 72,
88, 95]
}
# Create DataFrame
df = [Link](data)
from sklearn.model_selection import train_test_split
# Split the data
X_train, X_test = train_test_split(df, test_size=0.2, random_state=42)
import [Link] as plt
# Visualize training set
[Link](figsize=(10, 6))
[Link](X_train['Study Hours'], X_train['Scores'], color='blue', label='Training
data')
[Link]('Training Set')
[Link]('Study Hours')
[Link]('Scores')
[Link]()
[Link]()
# Visualize testing set
[Link](figsize=(10, 6))
[Link](X_test['Study Hours'], X_test['Scores'], color='green', label='Testing
data')
[Link]('Testing Set')
[Link]('Study Hours')
[Link]('Scores')
[Link]()
[Link]()
from [Link] import KMeans
# Create and train the model
kmeans = KMeans(n_clusters=3, random_state=42)
[Link](X_train)
# Predict the test set results
y_pred = [Link](X_test)
print("Predicted values:", y_pred)
# Compare actual vs predicted values
comparison = [Link]({'Actual': X_test.index, 'Predicted': y_pred})
print(comparison)
11. import pandas as pd
# Create the dataset
data = {
'Study Hours': [1.2, 2.3, 3.1, 4.0, 5.8, 6.1, 7.4, 8.6, 9.7, 10.5, 11.3, 12.2, 2.5, 3.9,
4.8, 5.2, 6.4, 7.3, 8.9, 9.4],
'Scores': [10, 22, 30, 45, 48, 60, 74, 85, 92, 100, 105, 110, 15, 40, 50, 55, 65, 72,
88, 95]
}
# Create DataFrame
df = [Link](data)
from sklearn.model_selection import train_test_split
# Split the data
X_train, X_test = train_test_split(df, test_size=0.2, random_state=42)
import [Link] as plt
# Visualize training set
[Link](figsize=(10, 6))
[Link](X_train['Study Hours'], X_train['Scores'], color='blue', label='Training
data')
[Link]('Training Set')
[Link]('Study Hours')
[Link]('Scores')
[Link]()
[Link]()
# Visualize testing set
[Link](figsize=(10, 6))
[Link](X_test['Study Hours'], X_test['Scores'], color='green', label='Testing
data')
[Link]('Testing Set')
[Link]('Study Hours')
[Link]('Scores')
[Link]()
[Link]()
from [Link] import DBSCAN
# Create and train the model
dbscan = DBSCAN(eps=0.5, min_samples=5)
[Link](X_train)
# Predict the test set results
y_pred = dbscan.fit_predict(X_test)
print("Predicted values:", y_pred)
# Compare actual vs predicted values
comparison = [Link]({'Actual': X_test.index, 'Predicted': y_pred})
print(comparison)
12. import pandas as pd
# Create the dataset
data = {
'Study Hours': [1.2, 2.3, 3.1, 4.0, 5.8, 6.1, 7.4, 8.6, 9.7, 10.5, 11.3, 12.2, 2.5, 3.9,
4.8, 5.2, 6.4, 7.3, 8.9, 9.4],
'Scores': [10, 22, 30, 45, 48, 60, 74, 85, 92, 100, 105, 110, 15, 40, 50, 55, 65, 72,
88, 95]
}
# Create DataFrame
df = [Link](data)
from sklearn.model_selection import train_test_split
# Split the data
X_train, X_test = train_test_split(df, test_size=0.2, random_state=42)
import [Link] as plt
# Visualize training set
[Link](figsize=(10, 6))
[Link](X_train['Study Hours'], X_train['Scores'], color='blue', label='Training
data')
[Link]('Training Set')
[Link]('Study Hours')
[Link]('Scores')
[Link]()
[Link]()
# Visualize testing set
[Link](figsize=(10, 6))
[Link](X_test['Study Hours'], X_test['Scores'], color='green', label='Testing
data')
[Link]('Testing Set')
[Link]('Study Hours')
[Link]('Scores')
[Link]()
[Link]()
from [Link] import GaussianMixture
# Create and train the model
gmm = GaussianMixture(n_components=2, random_state=42)
[Link](X_train)
# Predict the test set results
y_pred = [Link](X_test)
print("Predicted values:", y_pred)
# Compare actual vs predicted values
comparison = [Link]({'Actual': X_test.index, 'Predicted': y_pred})
print(comparison)