0% found this document useful (0 votes)
33 views17 pages

Data Analysis and Visualization in Python

Copyright
© © All Rights Reserved
We take content rights seriously. If you suspect this is your content, claim it here.
Available Formats
Download as PDF, TXT or read online on Scribd
0% found this document useful (0 votes)
33 views17 pages

Data Analysis and Visualization in Python

Copyright
© © All Rights Reserved
We take content rights seriously. If you suspect this is your content, claim it here.
Available Formats
Download as PDF, TXT or read online on Scribd

1.

import pandas as pd
# Sample dataset
data = {
'Location': ['Jubilee Hills', 'Banjara Hills', 'Madhapur', 'Gachibowli', 'Kondapur',
'Kukatpally', 'Ameerpet', 'Secunderabad', 'Begumpet', 'Hitech City'],
'Size (sqft)': [2000, 2500, 1800, 2200, 1600, 1700, 1900, 2400, 2100, 2300],
'Bedrooms': [3, 4, 3, 4, 2, 3, 3, 4, 3, 4],
'Bathrooms': [2, 3, 2, 3, 2, 2, 2, 3, 2, 3],
'Year Built': [2010, 2015, 2012, 2018, 2008, 2011, 2013, 2017, 2016, 2019],
'Price (INR)': [12000000, 15000000, 10000000, 14000000, 9000000, 9500000,
11000000, 13000000, 12500000, 15500000],
'Distance to City Center (km)': [8.5, 7.0, 10.0, 12.0, 14.0, 15.5, 9.0, 16.0, 11.0,
13.0]
}
df = [Link](data)
# Check for missing values
print([Link]().sum())
# Print the number of rows and columns
print([Link])
import [Link] as plt
import seaborn as sns
# Plot box plot
[Link](figsize=(10, 6))
[Link](x='Location', y='Price (INR)', data=df)
[Link](rotation=45)
[Link]('Box Plot of Property Prices by Location')
[Link]()
# Plot heat map
[Link](figsize=(10, 6))
[Link](df.select_dtypes(include='number').corr(), annot=True,
cmap='coolwarm')
[Link]('Correlation Heat Map')
[Link]()
# Plot scatter plot
[Link](figsize=(10, 6))
[Link](x='Size (sqft)', y='Price (INR)', hue='Location', data=df,
palette='viridis')
[Link]('Scatter Plot of Size vs Price')
[Link]()
# Plot bubble chart
[Link](figsize=(10, 6))
[Link](df['Size (sqft)'], df['Price (INR)'], s=df['Bedrooms']*100, alpha=0.5)
[Link]('Size (sqft)')
[Link]('Price (INR)')
[Link]('Bubble Chart of Size vs Price')
[Link]()
# Plot area chart
[Link](figsize=(10, 6))
df.sort_values('Year Built').plot(kind='area', x='Year Built', y='Price (INR)',
alpha=0.5)
[Link]('Area Chart of Property Prices Over Years')
[Link]()

2. import pandas as pd
import numpy as np
import [Link] as plt
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from [Link] import mean_squared_error, r2_score
# Load your dataset from a CSV file
df = pd.read_csv(r"C:\Users\harik\Downloads\[Link]") # Replace
'your_dataset.csv' with the path to your CSV file
# a. Finding missing data
print([Link]().sum())
df = [Link]() # Drop rows with missing values if any
# b. Splitting training and test data
X = df[['sugarpercent']] # Replace 'Your Feature Column' with the name of your
feature column
y = df['pricepercent'] # Replace 'Your Target Column' with the name of your
target column
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2,
random_state=42)
# c. Evaluate the model (intercept and slope)
model = LinearRegression()
[Link](X_train, y_train)
intercept = model.intercept_
coefficients = model.coef_
print(f'Intercept: {intercept}')
print(f'Coefficients: {coefficients}')
# d. Visualize the training set and testing set
[Link](figsize=(10, 6))
[Link](X_train, y_train, color='blue', label='Training data')
[Link](X_train, [Link](X_train), color='red', label='Linear Regression
Line')
[Link]('Training set')
[Link]('sugarpercent') # Replace 'Your Feature Column' with your feature
column name
[Link]('pricepercent') # Replace 'Your Target Column' with your target
column name
[Link]()
[Link]()
[Link](figsize=(10, 6))
[Link](X_test, y_test, color='green', label='Testing data')
[Link](X_train, [Link](X_train), color='red', label='Linear Regression
Line')
[Link]('Testing set')
[Link]('sugarpercent') # Replace 'Your Feature Column' with your feature
column name
[Link]('pricepercent') # Replace 'Your Target Column' with your target
column name
[Link]()
[Link]()
# e. Predict the test set result
y_pred = [Link](X_test)
# f. Compare actual output value with predicted values
comparison = [Link]({'Actual': y_test, 'Predicted': y_pred})
print(comparison)
# Calculate and print the evaluation metrics
mse = mean_squared_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)
print(f'Mean Squared Error: {mse}')
print(f'R^2 Score: {r2}')

3. import pandas as pd
import numpy as np
import [Link] as plt
from sklearn.model_selection import train_test_split
from [Link] import mean_squared_error, r2_score
# Load your dataset from a CSV file
df = pd.read_csv(r"C:\Users\harik\Downloads\[Link]") # Replace
'your_dataset.csv' with the path to your CSV file
# a. Finding missing data
print([Link]().sum())
df = [Link]() # Drop rows with missing values if any
# b. Splitting training and test data
X = df[['pricepercent']].values # Replace 'Your Feature Column' with the name of
your feature column
y = df['winpercent'].values # Replace 'Your Target Column' with the name of
your target column
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2,
random_state=42)
# c. Evaluate the model (intercept and slope) using Gradient Descent
class LinearRegressionGD:
def _init_(self, learning_rate=0.01, n_iterations=1000):
self.learning_rate = learning_rate
self.n_iterations = n_iterations
self.intercept_ = None
self.coef_ = None
def fit(self, X, y):
X = [Link](X, 0, 1, axis=1) # Add bias (intercept term)
[Link] = [Link]([Link][1])
m = len(y)
for _ in range(self.n_iterations):
gradients = 1/m * [Link]([Link]([Link]) - y)
[Link] -= self.learning_rate * gradients
self.intercept_ = [Link][0]
self.coef_ = [Link][1:]
def predict(self, X):
X = [Link](X, 0, 1, axis=1) # Add bias (intercept term)
return [Link]([Link])
model = LinearRegressionGD(learning_rate=0.01, n_iterations=1000)
[Link](X_train, y_train)
intercept = model.intercept_
coefficients = model.coef_
print(f'Intercept: {intercept}')
print(f'Coefficients: {coefficients}')
# d. Visualize the training set and testing set
[Link](figsize=(10, 6))
[Link](X_train, y_train, color='blue', label='Training data')
[Link](X_train, [Link](X_train), color='red', label='Linear Regression
Line')
[Link]('Training set')
[Link]('pricepercent') # Replace 'Your Feature Column' with your feature
column name
[Link]('winpercent') # Replace 'Your Target Column' with your target column
name
[Link]()
[Link]()
[Link](figsize=(10, 6))
[Link](X_test, y_test, color='green', label='Testing data')
[Link](X_train, [Link](X_train), color='red', label='Linear Regression
Line')
[Link]('Testing set')
[Link]('pricepercent') # Replace 'Your Feature Column' with your feature
column name
[Link]('winpercent') # Replace 'Your Target Column' with your target column
name
[Link]()
[Link]()
# e. Predict the test set result
y_pred = [Link](X_test)
# f. Compare actual output value with predicted values
comparison = [Link]({'Actual': y_test, 'Predicted': y_pred})
print(comparison)
# Calculate and print the evaluation metrics
mse = mean_squared_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)
print(f'Mean Squared Error: {mse}')
print(f'R^2 Score: {r2}')

4. import pandas as pd
import numpy as np
import [Link] as plt
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from [Link] import mean_squared_error, r2_score
# Load your dataset from a CSV file
df = pd.read_csv(r"C:\Users\harik\Downloads\[Link]") # Replace
'your_dataset.csv' with the path to your CSV file
# a. Finding missing data
print([Link]().sum())
df = [Link]() # Drop rows with missing values if any
# b. Splitting training and test data
X = df[['sugarpercent']] # Replace 'Your Feature Column' with the name of your
feature column
y = df['winpercent'] # Replace 'Your Target Column' with the name of your target
column
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2,
random_state=42)
# c. Evaluate the model (intercept and slope)
model = LinearRegression()
[Link](X_train, y_train)
intercept = model.intercept_
coefficients = model.coef_
print(f'Intercept: {intercept}')
print(f'Coefficients: {coefficients}')
# d. Visualize the training set and testing set
[Link](figsize=(10, 6))
[Link](X_train, y_train, color='blue', label='Training data')
[Link](X_train, [Link](X_train), color='red', label='Linear Regression
Line')
[Link]('Training set')
[Link]('sugarpercent') # Replace 'Your Feature Column' with your feature
column name
[Link]('winpercent') # Replace 'Your Target Column' with your target column
name
[Link]()
[Link]()
[Link](figsize=(10, 6))
[Link](X_test, y_test, color='green', label='Testing data')
[Link](X_train, [Link](X_train), color='red', label='Linear Regression
Line')
[Link]('Testing set')
[Link]('sugarpercent') # Replace 'Your Feature Column' with your feature
column name
[Link]('winpercent') # Replace 'Your Target Column' with your target column
name
[Link]()
[Link]()

# e. Predict the test set result


y_pred = [Link](X_test)
# f. Compare actual output value with predicted values
comparison = [Link]({'Actual': y_test, 'Predicted': y_pred})
print(comparison)
# Calculate and print the evaluation metrics
mse = mean_squared_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)
print(f'Mean Squared Error: {mse}')
print(f'R^2 Score: {r2}')

5. import pandas as pd
import numpy as np
import [Link] as plt
from sklearn.model_selection import train_test_split
from [Link] import PolynomialFeatures
from sklearn.linear_model import LinearRegression
from [Link] import mean_squared_error, r2_score
# Load your dataset from a CSV file
df = pd.read_csv(r"C:\Users\harik\Downloads\MBA_ADMISSIONS.csv") #
Replace 'your_dataset.csv' with the path to your CSV file
# a. Finding missing data
print([Link]().sum())
df = [Link]() # Drop rows with missing values if any
# b. Splitting training and test data
X = df[['post_score']] # Replace 'Your Feature Column' with the name of your
feature column
y = df['Age_in_years'] # Replace 'Your Target Column' with the name of your
target column
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2,
random_state=42)
# c. Evaluate the model
poly = PolynomialFeatures(degree=2)
X_train_poly = poly.fit_transform(X_train)
X_test_poly = [Link](X_test)
model = LinearRegression()
[Link](X_train_poly, y_train)
intercept = model.intercept_
coefficients = model.coef_
print(f'Intercept: {intercept}')
print(f'Coefficients: {coefficients}')
# d. Visualize the training set and testing set
[Link](figsize=(10, 6))
[Link](X_train, y_train, color='blue', label='Training data')
[Link]([Link](X_train, axis=0), [Link]([Link](X_train_poly, axis=0)),
color='red', label='Quadratic Regression Line')
[Link]('Training set')
[Link]('post_score') # Replace 'Your Feature Column' with your feature
column name
[Link]('Age_in_years') # Replace 'Your Target Column' with your target
column name
[Link]()
[Link]()
[Link](figsize=(10, 6))
[Link](X_test, y_test, color='green', label='Testing data')
[Link]([Link](X_train, axis=0), [Link]([Link](X_train_poly, axis=0)),
color='red', label='Quadratic Regression Line')
[Link]('Testing set')
[Link]('post_score') # Replace 'Your Feature Column' with your feature
column name
[Link]('Age_in_years') # Replace 'Your Target Column' with your target
column name
[Link]()
[Link]()
# e. Predict the test set result
y_pred = [Link](X_test_poly)
# f. Compare actual output value with predicted values
comparison = [Link]({'Actual': y_test, 'Predicted': y_pred})
print(comparison)
# Calculate and print the evaluation metrics
mse = mean_squared_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)
print(f'Mean Squared Error: {mse}')
print(f'R^2 Score: {r2}')

6. import pandas as pd
# Create the dataset
data = {
'Study Hours': [1.2, 2.3, 3.1, 4.0, 5.8, 6.1, 7.4, 8.6, 9.7, 10.5, 11.3, 12.2, 2.5, 3.9,
4.8, 5.2, 6.4, 7.3, 8.9, 9.4],
'Scores': [10, 22, 30, 45, 48, 60, 74, 85, 92, 100, 105, 110, 15, 40, 50, 55, 65, 72,
88, 95]
}
# Create DataFrame
df = [Link](data)
# Find missing values
missing_values = [Link]().sum()
print("Missing values in each column:\n", missing_values)
from sklearn.model_selection import train_test_split
# Define features and target variable
X = df[['Study Hours']]
y = df['Scores']
# Split the data
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2,
random_state=42)
import [Link] as plt
# Visualize training set
[Link](figsize=(10, 6))
[Link](X_train['Study Hours'], y_train, color='blue', label='Training data')
[Link]('Training Set')
[Link]('Study Hours')
[Link]('Scores')
[Link]()
[Link]()
# Visualize testing set
[Link](figsize=(10, 6))
[Link](X_test['Study Hours'], y_test, color='green', label='Testing data')
[Link]('Testing Set')
[Link]('Study Hours')
[Link]('Scores')
[Link]()
[Link]()
from sklearn.linear_model import LogisticRegression
# Create and train the model
model = LogisticRegression()
[Link](X_train, y_train)
# Predict the test set results
y_pred = [Link](X_test)
print("Predicted values:", y_pred)
# Compare actual vs predicted values
comparison = [Link]({'Actual': y_test, 'Predicted': y_pred})
print(comparison)

7. import pandas as pd
# Create the dataset
data = {
'Study Hours': [1.2, 2.3, 3.1, 4.0, 5.8, 6.1, 7.4, 8.6, 9.7, 10.5, 11.3, 12.2, 2.5, 3.9,
4.8, 5.2, 6.4, 7.3, 8.9, 9.4],
'Scores': [10, 22, 30, 45, 48, 60, 74, 85, 92, 100, 105, 110, 15, 40, 50, 55, 65, 72,
88, 95]
}
# Create DataFrame
df = [Link](data)
# Find missing values
missing_values = [Link]().sum()
print("Missing values in each column:\n", missing_values)
from sklearn.model_selection import train_test_split
# Define features and target variable
X = df[['Study Hours']]
y = df['Scores']
# Split the data
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2,
random_state=42)
from [Link] import SVR
from [Link] import mean_squared_error
# Create and train the model
model = SVR(kernel='linear')
[Link](X_train, y_train)
# Predict the test set results
y_pred = [Link](X_test)
# Evaluate the model
mse = mean_squared_error(y_test, y_pred)
print("Mean Squared Error:", mse)
import [Link] as plt
# Visualize training set
[Link](figsize=(10, 6))
[Link](X_train['Study Hours'], y_train, color='blue', label='Training data')
[Link](X_train['Study Hours'], [Link](X_train), color='red', label='Model
prediction')
[Link]('Training Set')
[Link]('Study Hours')
[Link]('Scores')
[Link]()
[Link]()
# Visualize testing set
[Link](figsize=(10, 6))
[Link](X_test['Study Hours'], y_test, color='green', label='Testing data')
[Link](X_test['Study Hours'], y_pred, color='red', label='Model prediction')
[Link]('Testing Set')
[Link]('Study Hours')
[Link]('Scores')
[Link]()
[Link]()
# Predict the test set results
y_pred = [Link](X_test)
print("Predicted values:", y_pred)
# Compare actual vs predicted values
comparison = [Link]({'Actual': y_test, 'Predicted': y_pred})
print(comparison)

8. import pandas as pd
import numpy as np
# Create a sample dataset
data = {
'Study Hours': [1.2, 2.3, 3.1, 4.0, 5.8, 6.1, 7.4, 8.6, 9.7, 10.5, 11.3, 12.2, 2.5, 3.9,
4.8, 5.2, 6.4, 7.3, 8.9, 9.4],
'Scores': [10, 22, 30, 45, 48, 60, 74, 85, 92, 100, 105, 110, 15, 40, 50, 55, 65, 72,
88, 95]
}
# Save the dataset to a CSV file
df = [Link](data)
df.to_csv('study_scores.csv', index=False)
# Read the saved CSV file
df = pd.read_csv('study_scores.csv')
print([Link]())
from sklearn.model_selection import train_test_split
# Define features and target variable
X = df[['Study Hours']]
y = df['Scores']
# Split the data
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2,
random_state=42)
from [Link] import DecisionTreeRegressor
from [Link] import mean_squared_error
# Create and train the model
model = DecisionTreeRegressor(random_state=42)
[Link](X_train, y_train)
# Predict the test set results
y_pred = [Link](X_test)
# Evaluate the model
mse = mean_squared_error(y_test, y_pred)
print("Mean Squared Error:", mse)
from [Link] import entropy
# Calculate the entropy of the target variable
target_entropy = entropy(df['Scores'].value_counts(normalize=True), base=2)
print("Entropy of the target variable:", target_entropy)
def information_gain(df, split_attribute_name, target_name="Scores"):
# Calculate the entropy of the total dataset
total_entropy = entropy(df[target_name].value_counts(normalize=True),
base=2)
# Calculate the values and the corresponding counts for the split attribute
vals, counts = [Link](df[split_attribute_name], return_counts=True)
# Calculate the weighted entropy
weighted_entropy = sum((counts[i] / [Link](counts)) *
entropy([Link](df[split_attribute_name] ==
vals[i]).dropna()[target_name].value_counts(normalize=True), base=2) for i in
range(len(vals)))
# Calculate the information gain
information_gain = total_entropy - weighted_entropy
return information_gain
# Calculate the information gain for 'Study Hours'
info_gain = information_gain(df, 'Study Hours')
print("Information Gain for Study Hours:", info_gain)
from [Link] import plot_tree
import [Link] as plt
# Plot the decision tree
[Link](figsize=(12, 8))
plot_tree(model, feature_names=['Study Hours'], filled=True)
[Link]('Decision Tree')
[Link]()

9. import pandas as pd
# Create the dataset
data = {
'Study Hours': [1.2, 2.3, 3.1, 4.0, 5.8, 6.1, 7.4, 8.6, 9.7, 10.5, 11.3, 12.2, 2.5, 3.9,
4.8, 5.2, 6.4, 7.3, 8.9, 9.4],
'Scores': [10, 22, 30, 45, 48, 60, 74, 85, 92, 100, 105, 110, 15, 40, 50, 55, 65, 72,
88, 95]
}
# Create DataFrame
df = [Link](data)
# Find missing values
missing_values = [Link]().sum()
print("Missing values in each column:\n", missing_values)
from sklearn.model_selection import train_test_split
# Define features and target variable
X = df[['Study Hours']]
y = df['Scores']
# Split the data
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2,
random_state=42)
import [Link] as plt
# Visualize training set
[Link](figsize=(10, 6))
[Link](X_train['Study Hours'], y_train, color='blue', label='Training data')
[Link]('Training Set')
[Link]('Study Hours')
[Link]('Scores')
[Link]()
[Link]()
# Visualize testing set
[Link](figsize=(10, 6))
[Link](X_test['Study Hours'], y_test, color='green', label='Testing data')
[Link]('Testing Set')
[Link]('Study Hours')
[Link]('Scores')
[Link]()
[Link]()
from [Link] import RandomForestRegressor
# Create and train the model
model = RandomForestRegressor(n_estimators=100, random_state=42)
[Link](X_train, y_train)
# Predict the test set results
y_pred = [Link](X_test)
print("Predicted values:", y_pred)
# Compare actual vs predicted values
comparison = [Link]({'Actual': y_test, 'Predicted': y_pred})
print(comparison)
10. import pandas as pd
# Create the dataset
data = {
'Study Hours': [1.2, 2.3, 3.1, 4.0, 5.8, 6.1, 7.4, 8.6, 9.7, 10.5, 11.3, 12.2, 2.5, 3.9,
4.8, 5.2, 6.4, 7.3, 8.9, 9.4],
'Scores': [10, 22, 30, 45, 48, 60, 74, 85, 92, 100, 105, 110, 15, 40, 50, 55, 65, 72,
88, 95]
}
# Create DataFrame
df = [Link](data)
from sklearn.model_selection import train_test_split
# Split the data
X_train, X_test = train_test_split(df, test_size=0.2, random_state=42)
import [Link] as plt
# Visualize training set
[Link](figsize=(10, 6))
[Link](X_train['Study Hours'], X_train['Scores'], color='blue', label='Training
data')
[Link]('Training Set')
[Link]('Study Hours')
[Link]('Scores')
[Link]()
[Link]()
# Visualize testing set
[Link](figsize=(10, 6))
[Link](X_test['Study Hours'], X_test['Scores'], color='green', label='Testing
data')
[Link]('Testing Set')
[Link]('Study Hours')
[Link]('Scores')
[Link]()
[Link]()
from [Link] import KMeans
# Create and train the model
kmeans = KMeans(n_clusters=3, random_state=42)
[Link](X_train)

# Predict the test set results


y_pred = [Link](X_test)
print("Predicted values:", y_pred)
# Compare actual vs predicted values
comparison = [Link]({'Actual': X_test.index, 'Predicted': y_pred})
print(comparison)

11. import pandas as pd


# Create the dataset
data = {
'Study Hours': [1.2, 2.3, 3.1, 4.0, 5.8, 6.1, 7.4, 8.6, 9.7, 10.5, 11.3, 12.2, 2.5, 3.9,
4.8, 5.2, 6.4, 7.3, 8.9, 9.4],
'Scores': [10, 22, 30, 45, 48, 60, 74, 85, 92, 100, 105, 110, 15, 40, 50, 55, 65, 72,
88, 95]
}
# Create DataFrame
df = [Link](data)
from sklearn.model_selection import train_test_split
# Split the data
X_train, X_test = train_test_split(df, test_size=0.2, random_state=42)
import [Link] as plt
# Visualize training set
[Link](figsize=(10, 6))
[Link](X_train['Study Hours'], X_train['Scores'], color='blue', label='Training
data')
[Link]('Training Set')
[Link]('Study Hours')
[Link]('Scores')
[Link]()
[Link]()
# Visualize testing set
[Link](figsize=(10, 6))
[Link](X_test['Study Hours'], X_test['Scores'], color='green', label='Testing
data')
[Link]('Testing Set')
[Link]('Study Hours')
[Link]('Scores')
[Link]()
[Link]()
from [Link] import DBSCAN
# Create and train the model
dbscan = DBSCAN(eps=0.5, min_samples=5)
[Link](X_train)
# Predict the test set results
y_pred = dbscan.fit_predict(X_test)
print("Predicted values:", y_pred)
# Compare actual vs predicted values
comparison = [Link]({'Actual': X_test.index, 'Predicted': y_pred})
print(comparison)

12. import pandas as pd


# Create the dataset
data = {
'Study Hours': [1.2, 2.3, 3.1, 4.0, 5.8, 6.1, 7.4, 8.6, 9.7, 10.5, 11.3, 12.2, 2.5, 3.9,
4.8, 5.2, 6.4, 7.3, 8.9, 9.4],
'Scores': [10, 22, 30, 45, 48, 60, 74, 85, 92, 100, 105, 110, 15, 40, 50, 55, 65, 72,
88, 95]
}
# Create DataFrame
df = [Link](data)
from sklearn.model_selection import train_test_split
# Split the data
X_train, X_test = train_test_split(df, test_size=0.2, random_state=42)
import [Link] as plt
# Visualize training set
[Link](figsize=(10, 6))
[Link](X_train['Study Hours'], X_train['Scores'], color='blue', label='Training
data')
[Link]('Training Set')
[Link]('Study Hours')
[Link]('Scores')
[Link]()
[Link]()
# Visualize testing set
[Link](figsize=(10, 6))
[Link](X_test['Study Hours'], X_test['Scores'], color='green', label='Testing
data')
[Link]('Testing Set')
[Link]('Study Hours')
[Link]('Scores')
[Link]()
[Link]()
from [Link] import GaussianMixture
# Create and train the model
gmm = GaussianMixture(n_components=2, random_state=42)
[Link](X_train)
# Predict the test set results
y_pred = [Link](X_test)
print("Predicted values:", y_pred)
# Compare actual vs predicted values
comparison = [Link]({'Actual': X_test.index, 'Predicted': y_pred})
print(comparison)

You might also like