Data Analysis and Visualization in Python

Uploaded by

lochanmeherkhaspa0419

We take content rights seriously. If you suspect this is your content, claim it here.

Available Formats

Download as PDF, TXT or read online on Scribd

0% found this document useful (0 votes)

33 views17 pages

Data Analysis and Visualization in Python

Uploaded by

lochanmeherkhaspa0419

We take content rights seriously. If you suspect this is your content, claim it here.

Available Formats

Download as PDF, TXT or read online on Scribd

1.

import pandas as pd
# Sample dataset
data = {
'Location': ['Jubilee Hills', 'Banjara Hills', 'Madhapur', 'Gachibowli', 'Kondapur',
'Kukatpally', 'Ameerpet', 'Secunderabad', 'Begumpet', 'Hitech City'],
'Size (sqft)': [2000, 2500, 1800, 2200, 1600, 1700, 1900, 2400, 2100, 2300],
'Bedrooms': [3, 4, 3, 4, 2, 3, 3, 4, 3, 4],
'Bathrooms': [2, 3, 2, 3, 2, 2, 2, 3, 2, 3],
'Year Built': [2010, 2015, 2012, 2018, 2008, 2011, 2013, 2017, 2016, 2019],
'Price (INR)': [12000000, 15000000, 10000000, 14000000, 9000000, 9500000,
11000000, 13000000, 12500000, 15500000],
'Distance to City Center (km)': [8.5, 7.0, 10.0, 12.0, 14.0, 15.5, 9.0, 16.0, 11.0,
13.0]
}
df = [Link](data)
# Check for missing values
print([Link]().sum())
# Print the number of rows and columns
print([Link])
import [Link] as plt
import seaborn as sns
# Plot box plot
[Link](figsize=(10, 6))
[Link](x='Location', y='Price (INR)', data=df)
[Link](rotation=45)
[Link]('Box Plot of Property Prices by Location')
[Link]()
# Plot heat map
[Link](figsize=(10, 6))
[Link](df.select_dtypes(include='number').corr(), annot=True,
cmap='coolwarm')
[Link]('Correlation Heat Map')
[Link]()
# Plot scatter plot
[Link](figsize=(10, 6))
[Link](x='Size (sqft)', y='Price (INR)', hue='Location', data=df,
palette='viridis')
[Link]('Scatter Plot of Size vs Price')
[Link]()
# Plot bubble chart
[Link](figsize=(10, 6))
[Link](df['Size (sqft)'], df['Price (INR)'], s=df['Bedrooms']*100, alpha=0.5)
[Link]('Size (sqft)')
[Link]('Price (INR)')
[Link]('Bubble Chart of Size vs Price')
[Link]()
# Plot area chart
[Link](figsize=(10, 6))
df.sort_values('Year Built').plot(kind='area', x='Year Built', y='Price (INR)',
alpha=0.5)
[Link]('Area Chart of Property Prices Over Years')
[Link]()

2. import pandas as pd
import numpy as np
import [Link] as plt
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from [Link] import mean_squared_error, r2_score
# Load your dataset from a CSV file
df = pd.read_csv(r"C:\Users\harik\Downloads\[Link]") # Replace
'your_dataset.csv' with the path to your CSV file
# a. Finding missing data
print([Link]().sum())
df = [Link]() # Drop rows with missing values if any
# b. Splitting training and test data
X = df[['sugarpercent']] # Replace 'Your Feature Column' with the name of your
feature column
y = df['pricepercent'] # Replace 'Your Target Column' with the name of your
target column
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2,
random_state=42)
# c. Evaluate the model (intercept and slope)
model = LinearRegression()
[Link](X_train, y_train)
intercept = model.intercept_
coefficients = model.coef_
print(f'Intercept: {intercept}')
print(f'Coefficients: {coefficients}')
# d. Visualize the training set and testing set
[Link](figsize=(10, 6))
[Link](X_train, y_train, color='blue', label='Training data')
[Link](X_train, [Link](X_train), color='red', label='Linear Regression
Line')
[Link]('Training set')
[Link]('sugarpercent') # Replace 'Your Feature Column' with your feature
column name
[Link]('pricepercent') # Replace 'Your Target Column' with your target
column name
[Link]()
[Link]()
[Link](figsize=(10, 6))
[Link](X_test, y_test, color='green', label='Testing data')
[Link](X_train, [Link](X_train), color='red', label='Linear Regression
Line')
[Link]('Testing set')
[Link]('sugarpercent') # Replace 'Your Feature Column' with your feature
column name
[Link]('pricepercent') # Replace 'Your Target Column' with your target
column name
[Link]()
[Link]()
# e. Predict the test set result
y_pred = [Link](X_test)
# f. Compare actual output value with predicted values
comparison = [Link]({'Actual': y_test, 'Predicted': y_pred})
print(comparison)
# Calculate and print the evaluation metrics
mse = mean_squared_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)
print(f'Mean Squared Error: {mse}')
print(f'R^2 Score: {r2}')

3. import pandas as pd
import numpy as np
import [Link] as plt
from sklearn.model_selection import train_test_split
from [Link] import mean_squared_error, r2_score
# Load your dataset from a CSV file
df = pd.read_csv(r"C:\Users\harik\Downloads\[Link]") # Replace
'your_dataset.csv' with the path to your CSV file
# a. Finding missing data
print([Link]().sum())
df = [Link]() # Drop rows with missing values if any
# b. Splitting training and test data
X = df[['pricepercent']].values # Replace 'Your Feature Column' with the name of
your feature column
y = df['winpercent'].values # Replace 'Your Target Column' with the name of
your target column
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2,
random_state=42)
# c. Evaluate the model (intercept and slope) using Gradient Descent
class LinearRegressionGD:
def _init_(self, learning_rate=0.01, n_iterations=1000):
self.learning_rate = learning_rate
self.n_iterations = n_iterations
self.intercept_ = None
self.coef_ = None
def fit(self, X, y):
X = [Link](X, 0, 1, axis=1) # Add bias (intercept term)
[Link] = [Link]([Link][1])
m = len(y)
for _ in range(self.n_iterations):
gradients = 1/m * [Link]([Link]([Link]) - y)
[Link] -= self.learning_rate * gradients
self.intercept_ = [Link][0]
self.coef_ = [Link][1:]
def predict(self, X):
X = [Link](X, 0, 1, axis=1) # Add bias (intercept term)
return [Link]([Link])
model = LinearRegressionGD(learning_rate=0.01, n_iterations=1000)
[Link](X_train, y_train)
intercept = model.intercept_
coefficients = model.coef_
print(f'Intercept: {intercept}')
print(f'Coefficients: {coefficients}')
# d. Visualize the training set and testing set
[Link](figsize=(10, 6))
[Link](X_train, y_train, color='blue', label='Training data')
[Link](X_train, [Link](X_train), color='red', label='Linear Regression
Line')
[Link]('Training set')
[Link]('pricepercent') # Replace 'Your Feature Column' with your feature
column name
[Link]('winpercent') # Replace 'Your Target Column' with your target column
name
[Link]()
[Link]()
[Link](figsize=(10, 6))
[Link](X_test, y_test, color='green', label='Testing data')
[Link](X_train, [Link](X_train), color='red', label='Linear Regression
Line')
[Link]('Testing set')
[Link]('pricepercent') # Replace 'Your Feature Column' with your feature
column name
[Link]('winpercent') # Replace 'Your Target Column' with your target column
name
[Link]()
[Link]()
# e. Predict the test set result
y_pred = [Link](X_test)
# f. Compare actual output value with predicted values
comparison = [Link]({'Actual': y_test, 'Predicted': y_pred})
print(comparison)
# Calculate and print the evaluation metrics
mse = mean_squared_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)
print(f'Mean Squared Error: {mse}')
print(f'R^2 Score: {r2}')

4. import pandas as pd
import numpy as np
import [Link] as plt
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from [Link] import mean_squared_error, r2_score
# Load your dataset from a CSV file
df = pd.read_csv(r"C:\Users\harik\Downloads\[Link]") # Replace
'your_dataset.csv' with the path to your CSV file
# a. Finding missing data
print([Link]().sum())
df = [Link]() # Drop rows with missing values if any
# b. Splitting training and test data
X = df[['sugarpercent']] # Replace 'Your Feature Column' with the name of your
feature column
y = df['winpercent'] # Replace 'Your Target Column' with the name of your target
column
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2,
random_state=42)
# c. Evaluate the model (intercept and slope)
model = LinearRegression()
[Link](X_train, y_train)
intercept = model.intercept_
coefficients = model.coef_
print(f'Intercept: {intercept}')
print(f'Coefficients: {coefficients}')
# d. Visualize the training set and testing set
[Link](figsize=(10, 6))
[Link](X_train, y_train, color='blue', label='Training data')
[Link](X_train, [Link](X_train), color='red', label='Linear Regression
Line')
[Link]('Training set')
[Link]('sugarpercent') # Replace 'Your Feature Column' with your feature
column name
[Link]('winpercent') # Replace 'Your Target Column' with your target column
name
[Link]()
[Link]()
[Link](figsize=(10, 6))
[Link](X_test, y_test, color='green', label='Testing data')
[Link](X_train, [Link](X_train), color='red', label='Linear Regression
Line')
[Link]('Testing set')
[Link]('sugarpercent') # Replace 'Your Feature Column' with your feature
column name
[Link]('winpercent') # Replace 'Your Target Column' with your target column
name
[Link]()
[Link]()

# e. Predict the test set result

y_pred = [Link](X_test)
# f. Compare actual output value with predicted values
comparison = [Link]({'Actual': y_test, 'Predicted': y_pred})
print(comparison)
# Calculate and print the evaluation metrics
mse = mean_squared_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)
print(f'Mean Squared Error: {mse}')
print(f'R^2 Score: {r2}')

5. import pandas as pd
import numpy as np
import [Link] as plt
from sklearn.model_selection import train_test_split
from [Link] import PolynomialFeatures
from sklearn.linear_model import LinearRegression
from [Link] import mean_squared_error, r2_score
# Load your dataset from a CSV file
df = pd.read_csv(r"C:\Users\harik\Downloads\MBA_ADMISSIONS.csv") #
Replace 'your_dataset.csv' with the path to your CSV file
# a. Finding missing data
print([Link]().sum())
df = [Link]() # Drop rows with missing values if any
# b. Splitting training and test data
X = df[['post_score']] # Replace 'Your Feature Column' with the name of your
feature column
y = df['Age_in_years'] # Replace 'Your Target Column' with the name of your
target column
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2,
random_state=42)
# c. Evaluate the model
poly = PolynomialFeatures(degree=2)
X_train_poly = poly.fit_transform(X_train)
X_test_poly = [Link](X_test)
model = LinearRegression()
[Link](X_train_poly, y_train)
intercept = model.intercept_
coefficients = model.coef_
print(f'Intercept: {intercept}')
print(f'Coefficients: {coefficients}')
# d. Visualize the training set and testing set
[Link](figsize=(10, 6))
[Link](X_train, y_train, color='blue', label='Training data')
[Link]([Link](X_train, axis=0), [Link]([Link](X_train_poly, axis=0)),
color='red', label='Quadratic Regression Line')
[Link]('Training set')
[Link]('post_score') # Replace 'Your Feature Column' with your feature
column name
[Link]('Age_in_years') # Replace 'Your Target Column' with your target
column name
[Link]()
[Link]()
[Link](figsize=(10, 6))
[Link](X_test, y_test, color='green', label='Testing data')
[Link]([Link](X_train, axis=0), [Link]([Link](X_train_poly, axis=0)),
color='red', label='Quadratic Regression Line')
[Link]('Testing set')
[Link]('post_score') # Replace 'Your Feature Column' with your feature
column name
[Link]('Age_in_years') # Replace 'Your Target Column' with your target
column name
[Link]()
[Link]()
# e. Predict the test set result
y_pred = [Link](X_test_poly)
# f. Compare actual output value with predicted values
comparison = [Link]({'Actual': y_test, 'Predicted': y_pred})
print(comparison)
# Calculate and print the evaluation metrics
mse = mean_squared_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)
print(f'Mean Squared Error: {mse}')
print(f'R^2 Score: {r2}')

6. import pandas as pd
# Create the dataset
data = {
'Study Hours': [1.2, 2.3, 3.1, 4.0, 5.8, 6.1, 7.4, 8.6, 9.7, 10.5, 11.3, 12.2, 2.5, 3.9,
4.8, 5.2, 6.4, 7.3, 8.9, 9.4],
'Scores': [10, 22, 30, 45, 48, 60, 74, 85, 92, 100, 105, 110, 15, 40, 50, 55, 65, 72,
88, 95]
}
# Create DataFrame
df = [Link](data)
# Find missing values
missing_values = [Link]().sum()
print("Missing values in each column:\n", missing_values)
from sklearn.model_selection import train_test_split
# Define features and target variable
X = df[['Study Hours']]
y = df['Scores']
# Split the data
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2,
random_state=42)
import [Link] as plt
# Visualize training set
[Link](figsize=(10, 6))
[Link](X_train['Study Hours'], y_train, color='blue', label='Training data')
[Link]('Training Set')
[Link]('Study Hours')
[Link]('Scores')
[Link]()
[Link]()
# Visualize testing set
[Link](figsize=(10, 6))
[Link](X_test['Study Hours'], y_test, color='green', label='Testing data')
[Link]('Testing Set')
[Link]('Study Hours')
[Link]('Scores')
[Link]()
[Link]()
from sklearn.linear_model import LogisticRegression
# Create and train the model
model = LogisticRegression()
[Link](X_train, y_train)
# Predict the test set results
y_pred = [Link](X_test)
print("Predicted values:", y_pred)
# Compare actual vs predicted values
comparison = [Link]({'Actual': y_test, 'Predicted': y_pred})
print(comparison)

7. import pandas as pd
# Create the dataset
data = {
'Study Hours': [1.2, 2.3, 3.1, 4.0, 5.8, 6.1, 7.4, 8.6, 9.7, 10.5, 11.3, 12.2, 2.5, 3.9,
4.8, 5.2, 6.4, 7.3, 8.9, 9.4],
'Scores': [10, 22, 30, 45, 48, 60, 74, 85, 92, 100, 105, 110, 15, 40, 50, 55, 65, 72,
88, 95]
}
# Create DataFrame
df = [Link](data)
# Find missing values
missing_values = [Link]().sum()
print("Missing values in each column:\n", missing_values)
from sklearn.model_selection import train_test_split
# Define features and target variable
X = df[['Study Hours']]
y = df['Scores']
# Split the data
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2,
random_state=42)
from [Link] import SVR
from [Link] import mean_squared_error
# Create and train the model
model = SVR(kernel='linear')
[Link](X_train, y_train)
# Predict the test set results
y_pred = [Link](X_test)
# Evaluate the model
mse = mean_squared_error(y_test, y_pred)
print("Mean Squared Error:", mse)
import [Link] as plt
# Visualize training set
[Link](figsize=(10, 6))
[Link](X_train['Study Hours'], y_train, color='blue', label='Training data')
[Link](X_train['Study Hours'], [Link](X_train), color='red', label='Model
prediction')
[Link]('Training Set')
[Link]('Study Hours')
[Link]('Scores')
[Link]()
[Link]()
# Visualize testing set
[Link](figsize=(10, 6))
[Link](X_test['Study Hours'], y_test, color='green', label='Testing data')
[Link](X_test['Study Hours'], y_pred, color='red', label='Model prediction')
[Link]('Testing Set')
[Link]('Study Hours')
[Link]('Scores')
[Link]()
[Link]()
# Predict the test set results
y_pred = [Link](X_test)
print("Predicted values:", y_pred)
# Compare actual vs predicted values
comparison = [Link]({'Actual': y_test, 'Predicted': y_pred})
print(comparison)

8. import pandas as pd
import numpy as np
# Create a sample dataset
data = {
'Study Hours': [1.2, 2.3, 3.1, 4.0, 5.8, 6.1, 7.4, 8.6, 9.7, 10.5, 11.3, 12.2, 2.5, 3.9,
4.8, 5.2, 6.4, 7.3, 8.9, 9.4],
'Scores': [10, 22, 30, 45, 48, 60, 74, 85, 92, 100, 105, 110, 15, 40, 50, 55, 65, 72,
88, 95]
}
# Save the dataset to a CSV file
df = [Link](data)
df.to_csv('study_scores.csv', index=False)
# Read the saved CSV file
df = pd.read_csv('study_scores.csv')
print([Link]())
from sklearn.model_selection import train_test_split
# Define features and target variable
X = df[['Study Hours']]
y = df['Scores']
# Split the data
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2,
random_state=42)
from [Link] import DecisionTreeRegressor
from [Link] import mean_squared_error
# Create and train the model
model = DecisionTreeRegressor(random_state=42)
[Link](X_train, y_train)
# Predict the test set results
y_pred = [Link](X_test)
# Evaluate the model
mse = mean_squared_error(y_test, y_pred)
print("Mean Squared Error:", mse)
from [Link] import entropy
# Calculate the entropy of the target variable
target_entropy = entropy(df['Scores'].value_counts(normalize=True), base=2)
print("Entropy of the target variable:", target_entropy)
def information_gain(df, split_attribute_name, target_name="Scores"):
# Calculate the entropy of the total dataset
total_entropy = entropy(df[target_name].value_counts(normalize=True),
base=2)
# Calculate the values and the corresponding counts for the split attribute
vals, counts = [Link](df[split_attribute_name], return_counts=True)
# Calculate the weighted entropy
weighted_entropy = sum((counts[i] / [Link](counts)) *
entropy([Link](df[split_attribute_name] ==
vals[i]).dropna()[target_name].value_counts(normalize=True), base=2) for i in
range(len(vals)))
# Calculate the information gain
information_gain = total_entropy - weighted_entropy
return information_gain
# Calculate the information gain for 'Study Hours'
info_gain = information_gain(df, 'Study Hours')
print("Information Gain for Study Hours:", info_gain)
from [Link] import plot_tree
import [Link] as plt
# Plot the decision tree
[Link](figsize=(12, 8))
plot_tree(model, feature_names=['Study Hours'], filled=True)
[Link]('Decision Tree')
[Link]()

9. import pandas as pd
# Create the dataset
data = {
'Study Hours': [1.2, 2.3, 3.1, 4.0, 5.8, 6.1, 7.4, 8.6, 9.7, 10.5, 11.3, 12.2, 2.5, 3.9,
4.8, 5.2, 6.4, 7.3, 8.9, 9.4],
'Scores': [10, 22, 30, 45, 48, 60, 74, 85, 92, 100, 105, 110, 15, 40, 50, 55, 65, 72,
88, 95]
}
# Create DataFrame
df = [Link](data)
# Find missing values
missing_values = [Link]().sum()
print("Missing values in each column:\n", missing_values)
from sklearn.model_selection import train_test_split
# Define features and target variable
X = df[['Study Hours']]
y = df['Scores']
# Split the data
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2,
random_state=42)
import [Link] as plt
# Visualize training set
[Link](figsize=(10, 6))
[Link](X_train['Study Hours'], y_train, color='blue', label='Training data')
[Link]('Training Set')
[Link]('Study Hours')
[Link]('Scores')
[Link]()
[Link]()
# Visualize testing set
[Link](figsize=(10, 6))
[Link](X_test['Study Hours'], y_test, color='green', label='Testing data')
[Link]('Testing Set')
[Link]('Study Hours')
[Link]('Scores')
[Link]()
[Link]()
from [Link] import RandomForestRegressor
# Create and train the model
model = RandomForestRegressor(n_estimators=100, random_state=42)
[Link](X_train, y_train)
# Predict the test set results
y_pred = [Link](X_test)
print("Predicted values:", y_pred)
# Compare actual vs predicted values
comparison = [Link]({'Actual': y_test, 'Predicted': y_pred})
print(comparison)
10. import pandas as pd
# Create the dataset
data = {
'Study Hours': [1.2, 2.3, 3.1, 4.0, 5.8, 6.1, 7.4, 8.6, 9.7, 10.5, 11.3, 12.2, 2.5, 3.9,
4.8, 5.2, 6.4, 7.3, 8.9, 9.4],
'Scores': [10, 22, 30, 45, 48, 60, 74, 85, 92, 100, 105, 110, 15, 40, 50, 55, 65, 72,
88, 95]
}
# Create DataFrame
df = [Link](data)
from sklearn.model_selection import train_test_split
# Split the data
X_train, X_test = train_test_split(df, test_size=0.2, random_state=42)
import [Link] as plt
# Visualize training set
[Link](figsize=(10, 6))
[Link](X_train['Study Hours'], X_train['Scores'], color='blue', label='Training
data')
[Link]('Training Set')
[Link]('Study Hours')
[Link]('Scores')
[Link]()
[Link]()
# Visualize testing set
[Link](figsize=(10, 6))
[Link](X_test['Study Hours'], X_test['Scores'], color='green', label='Testing
data')
[Link]('Testing Set')
[Link]('Study Hours')
[Link]('Scores')
[Link]()
[Link]()
from [Link] import KMeans
# Create and train the model
kmeans = KMeans(n_clusters=3, random_state=42)
[Link](X_train)

# Predict the test set results

y_pred = [Link](X_test)
print("Predicted values:", y_pred)
# Compare actual vs predicted values
comparison = [Link]({'Actual': X_test.index, 'Predicted': y_pred})
print(comparison)

11. import pandas as pd

# Create the dataset
data = {
'Study Hours': [1.2, 2.3, 3.1, 4.0, 5.8, 6.1, 7.4, 8.6, 9.7, 10.5, 11.3, 12.2, 2.5, 3.9,
4.8, 5.2, 6.4, 7.3, 8.9, 9.4],
'Scores': [10, 22, 30, 45, 48, 60, 74, 85, 92, 100, 105, 110, 15, 40, 50, 55, 65, 72,
88, 95]
}
# Create DataFrame
df = [Link](data)
from sklearn.model_selection import train_test_split
# Split the data
X_train, X_test = train_test_split(df, test_size=0.2, random_state=42)
import [Link] as plt
# Visualize training set
[Link](figsize=(10, 6))
[Link](X_train['Study Hours'], X_train['Scores'], color='blue', label='Training
data')
[Link]('Training Set')
[Link]('Study Hours')
[Link]('Scores')
[Link]()
[Link]()
# Visualize testing set
[Link](figsize=(10, 6))
[Link](X_test['Study Hours'], X_test['Scores'], color='green', label='Testing
data')
[Link]('Testing Set')
[Link]('Study Hours')
[Link]('Scores')
[Link]()
[Link]()
from [Link] import DBSCAN
# Create and train the model
dbscan = DBSCAN(eps=0.5, min_samples=5)
[Link](X_train)
# Predict the test set results
y_pred = dbscan.fit_predict(X_test)
print("Predicted values:", y_pred)
# Compare actual vs predicted values
comparison = [Link]({'Actual': X_test.index, 'Predicted': y_pred})
print(comparison)

12. import pandas as pd

# Create the dataset
data = {
'Study Hours': [1.2, 2.3, 3.1, 4.0, 5.8, 6.1, 7.4, 8.6, 9.7, 10.5, 11.3, 12.2, 2.5, 3.9,
4.8, 5.2, 6.4, 7.3, 8.9, 9.4],
'Scores': [10, 22, 30, 45, 48, 60, 74, 85, 92, 100, 105, 110, 15, 40, 50, 55, 65, 72,
88, 95]
}
# Create DataFrame
df = [Link](data)
from sklearn.model_selection import train_test_split
# Split the data
X_train, X_test = train_test_split(df, test_size=0.2, random_state=42)
import [Link] as plt
# Visualize training set
[Link](figsize=(10, 6))
[Link](X_train['Study Hours'], X_train['Scores'], color='blue', label='Training
data')
[Link]('Training Set')
[Link]('Study Hours')
[Link]('Scores')
[Link]()
[Link]()
# Visualize testing set
[Link](figsize=(10, 6))
[Link](X_test['Study Hours'], X_test['Scores'], color='green', label='Testing
data')
[Link]('Testing Set')
[Link]('Study Hours')
[Link]('Scores')
[Link]()
[Link]()
from [Link] import GaussianMixture
# Create and train the model
gmm = GaussianMixture(n_components=2, random_state=42)
[Link](X_train)
# Predict the test set results
y_pred = [Link](X_test)
print("Predicted values:", y_pred)
# Compare actual vs predicted values
comparison = [Link]({'Actual': X_test.index, 'Predicted': y_pred})
print(comparison)

Car Price Prediction with Linear Regression
No ratings yet
Car Price Prediction with Linear Regression
7 pages
Linear and Multiple Regression Analysis
No ratings yet
Linear and Multiple Regression Analysis
10 pages
Python ML Implementation Guide
No ratings yet
Python ML Implementation Guide
19 pages
Linear Regression and Classification Models
No ratings yet
Linear Regression and Classification Models
22 pages
MSE Evaluation in Econometrics with Pandas
No ratings yet
MSE Evaluation in Econometrics with Pandas
8 pages
Linear Regression on Housing Dataset
No ratings yet
Linear Regression on Housing Dataset
38 pages
Machine Learning Mini Project Report
No ratings yet
Machine Learning Mini Project Report
35 pages
Home Price Prediction with Regression
No ratings yet
Home Price Prediction with Regression
19 pages
Linear Regression with CSV Data
No ratings yet
Linear Regression with CSV Data
10 pages
Data Analysis and Modeling for Sales
No ratings yet
Data Analysis and Modeling for Sales
24 pages
Python Data Analysis and ML Techniques
No ratings yet
Python Data Analysis and ML Techniques
17 pages
Data Analysis with Regression Techniques
No ratings yet
Data Analysis with Regression Techniques
5 pages
Decision Tree and Machine Learning Models
No ratings yet
Decision Tree and Machine Learning Models
40 pages
Deepak Data Analysis 1
No ratings yet
Deepak Data Analysis 1
31 pages
Data Analysis and Visualization Techniques
No ratings yet
Data Analysis and Visualization Techniques
13 pages
Data Analysis and ML Techniques in Python
No ratings yet
Data Analysis and ML Techniques in Python
30 pages
Mechatronics AI & ML Lab Manual 2024
No ratings yet
Mechatronics AI & ML Lab Manual 2024
22 pages
Data Visualization and SVM Experiments
No ratings yet
Data Visualization and SVM Experiments
19 pages
Data Preprocessing and Modeling Techniques
No ratings yet
Data Preprocessing and Modeling Techniques
25 pages
Machine Learning Practical Guide
No ratings yet
Machine Learning Practical Guide
29 pages
Machine Learning Experiments Overview
No ratings yet
Machine Learning Experiments Overview
11 pages
Linear Regression and Classification Models
No ratings yet
Linear Regression and Classification Models
13 pages
Machine Learning Course Lab Guide
No ratings yet
Machine Learning Course Lab Guide
9 pages
Sales and User Data Analysis Guide
No ratings yet
Sales and User Data Analysis Guide
30 pages
Data Visualization and Analysis Techniques
No ratings yet
Data Visualization and Analysis Techniques
9 pages
Data Preprocessing Techniques in Python
No ratings yet
Data Preprocessing Techniques in Python
44 pages
Linear Regression Implementation Guide
100% (1)
Linear Regression Implementation Guide
45 pages
Data Exploration and Preprocessing in Python
No ratings yet
Data Exploration and Preprocessing in Python
20 pages
Linear Regression on Boston Dataset
No ratings yet
Linear Regression on Boston Dataset
2 pages
List Operations and Regression Techniques
No ratings yet
List Operations and Regression Techniques
3 pages
Python Data Analysis with Libraries
No ratings yet
Python Data Analysis with Libraries
27 pages
Experiments in Machine Learning Models
No ratings yet
Experiments in Machine Learning Models
21 pages
Machine Learning Lab Assignments
100% (2)
Machine Learning Lab Assignments
23 pages
Machine Learning Algorithms in Python
No ratings yet
Machine Learning Algorithms in Python
8 pages
ML PDF
No ratings yet
ML PDF
30 pages
Data Analysis and Visualization Techniques
No ratings yet
Data Analysis and Visualization Techniques
16 pages
Salary Prediction Using Linear Regression
No ratings yet
Salary Prediction Using Linear Regression
35 pages
Linear Regression and Classification Models
No ratings yet
Linear Regression and Classification Models
20 pages
Data Analysis and Regression Models
No ratings yet
Data Analysis and Regression Models
9 pages
Data Analysis and Regression Models
No ratings yet
Data Analysis and Regression Models
14 pages
MSE and Model Evaluation with Sklearn
No ratings yet
MSE and Model Evaluation with Sklearn
7 pages
Machine Learning Algorithms in Python
No ratings yet
Machine Learning Algorithms in Python
34 pages
CP4252 Machine Learning Lab Manual
No ratings yet
CP4252 Machine Learning Lab Manual
26 pages
Data Analysis with Python Visualizations
No ratings yet
Data Analysis with Python Visualizations
14 pages
Linear Regression and Classification Examples
No ratings yet
Linear Regression and Classification Examples
18 pages
Linear & Logistic Regression Programs
No ratings yet
Linear & Logistic Regression Programs
17 pages
Linear Regression Techniques in Python
No ratings yet
Linear Regression Techniques in Python
3 pages
Machine Learning Model Implementations
No ratings yet
Machine Learning Model Implementations
24 pages
Feature Selection for House Price Prediction
No ratings yet
Feature Selection for House Price Prediction
17 pages
LDA and Linear Regression Implementation
No ratings yet
LDA and Linear Regression Implementation
21 pages
Wine Quality Prediction Models Analysis
No ratings yet
Wine Quality Prediction Models Analysis
4 pages
Regression Analysis Cheat Sheet
No ratings yet
Regression Analysis Cheat Sheet
9 pages
Linear Regression with Boston Dataset
No ratings yet
Linear Regression with Boston Dataset
14 pages
Multiple Linear Regression Implementation
No ratings yet
Multiple Linear Regression Implementation
9 pages
Simple Linear Regression with Python
No ratings yet
Simple Linear Regression with Python
30 pages
Data Processing for Profit Prediction
No ratings yet
Data Processing for Profit Prediction
18 pages
Supervised Learning with Regression Techniques
No ratings yet
Supervised Learning with Regression Techniques
14 pages
Simple Linear Regression in Python
No ratings yet
Simple Linear Regression in Python
14 pages
NumPy and Pandas Data Analysis Techniques
No ratings yet
NumPy and Pandas Data Analysis Techniques
14 pages
Bimbo Bread Production Flowchart
No ratings yet
Bimbo Bread Production Flowchart
2 pages
JEE Advanced 2024 Model A Solutions
No ratings yet
JEE Advanced 2024 Model A Solutions
11 pages
Helical Antenna Overview and Design
0% (1)
Helical Antenna Overview and Design
17 pages
Primary 6 English Learning Scheme 2025
No ratings yet
Primary 6 English Learning Scheme 2025
52 pages
Measures of Central Tendency in Math 7
100% (1)
Measures of Central Tendency in Math 7
8 pages
Understanding 1-Way ANCOVA Results
No ratings yet
Understanding 1-Way ANCOVA Results
5 pages
Grade-IX - Annual Exam Syllabus-2025-26
No ratings yet
Grade-IX - Annual Exam Syllabus-2025-26
3 pages
Equilibrium Problems and Solutions
No ratings yet
Equilibrium Problems and Solutions
33 pages
Amplitude Modulated Signals Methods
No ratings yet
Amplitude Modulated Signals Methods
16 pages
CS 114 Winter 2025: Assignment 01
No ratings yet
CS 114 Winter 2025: Assignment 01
3 pages
Econometrics Exercises and Solutions
No ratings yet
Econometrics Exercises and Solutions
8 pages
Sliding Window Techniques in Strings
No ratings yet
Sliding Window Techniques in Strings
2 pages
TORA Solutions for Optimization Problems
No ratings yet
TORA Solutions for Optimization Problems
15 pages
Decentralized Control for Islanded Microgrids
No ratings yet
Decentralized Control for Islanded Microgrids
8 pages
Gliese 146's Visual Magnitude Insights
No ratings yet
Gliese 146's Visual Magnitude Insights
34 pages
History of Geometry: Ancient Insights
No ratings yet
History of Geometry: Ancient Insights
12 pages
Mathematics: Quarter 2 - Module 11: Dividing Decimal With Up To 2 Decimal Places
100% (1)
Mathematics: Quarter 2 - Module 11: Dividing Decimal With Up To 2 Decimal Places
9 pages
Chisel 2.2 Hardware Design Tutorial
No ratings yet
Chisel 2.2 Hardware Design Tutorial
16 pages
Multi-Class Classification and Regression
No ratings yet
Multi-Class Classification and Regression
22 pages
Double Integrals in Polar Coordinates
No ratings yet
Double Integrals in Polar Coordinates
13 pages
IELTS Academic Writing Task 1 Guide
No ratings yet
IELTS Academic Writing Task 1 Guide
6 pages
RBI Grade B DEPR 2025 Exam Guide
No ratings yet
RBI Grade B DEPR 2025 Exam Guide
22 pages
Radix-2 Decimation-in-Frequency FFT
No ratings yet
Radix-2 Decimation-in-Frequency FFT
5 pages
Tirunelveli 10th Maths Quarterly Exam 2024
No ratings yet
Tirunelveli 10th Maths Quarterly Exam 2024
2 pages
Math3 q3 Mod11
No ratings yet
Math3 q3 Mod11
19 pages
Sensory-Tonic Field Theory Explained
No ratings yet
Sensory-Tonic Field Theory Explained
23 pages
Aerial Building Detection Thesis by Piyal Roy
No ratings yet
Aerial Building Detection Thesis by Piyal Roy
46 pages
ICRAMCS 2024 Conference Details
No ratings yet
ICRAMCS 2024 Conference Details
1 page
Class 11 Straight Line Formulas
No ratings yet
Class 11 Straight Line Formulas
4 pages
IB HL Calculus Differentiation Questions
No ratings yet
IB HL Calculus Differentiation Questions
10 pages

Data Analysis and Visualization in Python

Uploaded by

Data Analysis and Visualization in Python

Uploaded by

1.

# e. Predict the test set result

# Predict the test set results

11. import pandas as pd

12. import pandas as pd

You might also like