0% found this document useful (0 votes)

27 views33 pages

Boston Housing Data Analysis

The document outlines two practical exercises involving data processing and machine learning using datasets from Kaggle. The first exercise focuses on housing data, where a linear regression model is built to predict housing prices, while the second exercise deals with sentiment analysis of movie reviews from IMDB. Both exercises involve data preparation, model training, and evaluation using Python libraries such as pandas, numpy, and PyTorch.

Uploaded by

ayushdeshpande2001

We take content rights seriously. If you suspect this is your content, claim it here.

Available Formats

Download as PDF, TXT or read online on Scribd

0% found this document useful (0 votes)

27 views33 pages

Boston Housing Data Analysis

Uploaded by

ayushdeshpande2001

We take content rights seriously. If you suspect this is your content, claim it here.

Available Formats

Download as PDF, TXT or read online on Scribd

practical_1

February 6, 2024

[ ]: # name = "schirmerchad/bostonhoustingmlnd"
# dataset = [Link]("/")[1] + ".zip"
# # Mount your Google Drive.
# from [Link] import drive
# [Link]("/content/drive")

# kaggle_creds_path = "kaggle_token/[Link]"

# ! pip install kaggle --quiet

# ! mkdir ~/.kaggle
# ! cp "/content/drive/MyDrive/kaggle_token/[Link]" ~/.kaggle/
# ! chmod 600 ~/.kaggle/[Link]

# ! kaggle datasets download -d {name}

# ! mkdir kaggle_data
# ! unzip {dataset} -d kaggle_data

# # Unmount your Google Drive

# drive.flush_and_unmount()

Mounted at /content/drive
mkdir: cannot create directory ‘/root/.kaggle’: File exists
Downloading [Link] to /content
0% 0.00/4.35k [00:00<?, ?B/s]
100% 4.35k/4.35k [00:00<00:00, 10.8MB/s]
mkdir: cannot create directory ‘kaggle_data’: File exists
Archive: [Link]
inflating: kaggle_data/[Link]

[ ]: import torch
import pandas as pd
import numpy as np
import [Link] as plt
from [Link] import tqdm

[ ]: df = pd.read_csv("kaggle_data/[Link]")

1
[ ]: device = "cuda" if [Link].is_available() else "cpu"

[ ]: from sklearn.model_selection import train_test_split

[ ]: df

[ ]: RM LSTAT PTRATIO MEDV

0 6.575 4.98 15.3 504000.0
1 6.421 9.14 17.8 453600.0
2 7.185 4.03 17.8 728700.0
3 6.998 2.94 18.7 701400.0
4 7.147 5.33 18.7 760200.0
.. … … … …
484 6.593 9.67 21.0 470400.0
485 6.120 9.08 21.0 432600.0
486 6.976 5.64 21.0 501900.0
487 6.794 6.48 21.0 462000.0
488 6.030 7.88 21.0 249900.0

[489 rows x 4 columns]

[ ]: [Link]().sum()

[ ]: RM 0
LSTAT 0
PTRATIO 0
MEDV 0
dtype: int64

[ ]: from [Link] import StandardScaler, MinMaxScaler, RobustScaler

[ ]:

[ ]: X = [Link]('MEDV', axis=1)
y = df['MEDV']

X = [Link](X)
y = [Link](y)

scaler = StandardScaler()

X = scaler.fit_transform(X)
y = scaler.fit_transform([Link](-1, 1))

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2,␣

↪random_state=42)

2
print(f"X_train shape: {X_train.shape}")
print(f"y_train shape: {y_train.shape}")
print(f"X_test shape: {X_test.shape}")
print(f"y_test shape: {y_test.shape}")

X_train shape: (391, 3)

y_train shape: (391, 1)
X_test shape: (98, 3)
y_test shape: (98, 1)

[ ]:

[ ]: def␣
↪plot(train_data=X_train,train_labels=y_train,test_data=X_test,test_labels=y_test,predictions

[Link](figsize=(10,7))

[Link](train_data,train_labels,c="b",s=4,label="Training data")

[Link](test_data,test_labels,c="r",s=4,label="Testing Data")

if predictions is not None:

[Link](test_data,predictions,c="g",label="Predictions")
x_values = [Link](-4, 4, 100)

y_values = weight * x_values + bias

[Link](x_values, y_values, color='red', label=f'y = {weight}x + {bias}')

[Link](prop={"size":14})

[ ]: plot(train_data=X_train.T[0], test_data=X_test.T[0])

3
[ ]: class Model([Link]):
def __init__(self):
super().__init__()
self.linear_layer = [Link](in_features=3,out_features=1)

def forward(self,x:[Link]) -> [Link]:

return self.linear_layer(x)

[ ]: torch.manual_seed(42)

[ ]: <torch._C.Generator at 0x7ea8f1b79630>

[ ]: model = Model()

[ ]: loss = [Link].L1Loss() # mae

[ ]: opt = [Link](lr=0.01,params=[Link]())

[ ]: X_train, X_test, y_train, y_test = [Link](X_train, dtype=torch.

↪float),[Link](X_test,dtype=[Link]),torch.

↪tensor(y_train,dtype=[Link]),[Link](y_test,dtype=[Link])

<ipython-input-113-e7e4131fbf59>:1: UserWarning: To copy construct from a

4
tensor, it is recommended to use [Link]().detach() or
[Link]().detach().requires_grad_(True), rather than
[Link](sourceTensor).
X_train, X_test, y_train, y_test = [Link](X_train, dtype=[Link]),to
[Link](X_test,dtype=[Link]),[Link](y_train,dtype=[Link]),tor
[Link](y_test,dtype=[Link])

[ ]:

[ ]: epochs = 200
for epoch in tqdm(range(epochs)):
[Link]()

# fw pass
y_pred = model(X_train)

lossval = loss(y_pred,y_train)

opt.zero_grad()

# back prop

[Link]()

# optimizer

[Link]()

# TEST

[Link]()

with torch.inference_mode():
test_pred = model(X_test)
test_loss = loss(test_pred,y_test)

# print

if epoch % 10 == 0:
print(f"epoch {epoch}, loss {lossval}, test_loss {test_loss}")

0%| | 0/200 [00:00<?, ?it/s]

epoch 0, loss 0.8999658823013306, test_loss 0.9410500526428223
epoch 10, loss 0.8269047737121582, test_loss 0.8732254505157471
epoch 20, loss 0.7580215930938721, test_loss 0.8083762526512146
epoch 30, loss 0.6956650018692017, test_loss 0.7480545043945312
epoch 40, loss 0.6434133052825928, test_loss 0.6937804818153381

5
epoch 50, loss 0.6047266125679016, test_loss 0.6513829231262207
epoch 60, loss 0.5723316073417664, test_loss 0.6149174571037292
epoch 70, loss 0.543216347694397, test_loss 0.58405601978302
epoch 80, loss 0.5205875635147095, test_loss 0.5580319166183472
epoch 90, loss 0.5055828094482422, test_loss 0.5371074676513672
epoch 100, loss 0.4923928380012512, test_loss 0.5179988145828247
epoch 110, loss 0.48150429129600525, test_loss 0.5013106465339661
epoch 120, loss 0.472374826669693, test_loss 0.4893476665019989
epoch 130, loss 0.46420061588287354, test_loss 0.4796214699745178
epoch 140, loss 0.45730704069137573, test_loss 0.4711703956127167
epoch 150, loss 0.450693279504776, test_loss 0.46309569478034973
epoch 160, loss 0.4443809688091278, test_loss 0.4567875862121582
epoch 170, loss 0.4393230974674225, test_loss 0.45154622197151184
epoch 180, loss 0.4351387619972229, test_loss 0.4470271170139313
epoch 190, loss 0.43130671977996826, test_loss 0.4429181218147278

[ ]: y_preds = []
[Link]()
with torch.inference_mode():
test_pred = model(X_test)
y_preds.append(test_pred.numpy())

[ ]:

[ ]: model.state_dict()

[ ]: OrderedDict([('linear_layer.weight', tensor([[ 0.6438, -0.1606, -0.2250]])),

('linear_layer.bias', tensor([0.0866]))])

[ ]: #y = weight * X + bias

[ ]: weight = model.state_dict()["linear_layer.weight"][0][0]

[ ]: bias = model.state_dict()["linear_layer.bias"][0]

[ ]: plot(train_data=X_train.T[0], test_data=X_test.
↪T[0],predictions=y_preds,weight=[Link](),bias=[Link]())

6
[ ]:

[ ]:

7
practical_2

February 6, 2024

[ ]: name = "lakshmi25npathi/imdb-dataset-of-50k-movie-reviews"
dataset = [Link]("/")[1] + ".zip"
# Mount your Google Drive.
from [Link] import drive
[Link]("/content/drive")

kaggle_creds_path = "kaggle_token/[Link]"

! pip install kaggle --quiet

! mkdir ~/.kaggle
! cp "/content/drive/MyDrive/kaggle_token/[Link]" ~/.kaggle/
! chmod 600 ~/.kaggle/[Link]

! kaggle datasets download -d {name}

! mkdir kaggle_data
! unzip {dataset} -d kaggle_data

# Unmount your Google Drive

drive.flush_and_unmount()

Mounted at /content/drive
Downloading [Link] to /content
66% 17.0M/25.7M [00:00<00:00, 69.4MB/s]
100% 25.7M/25.7M [00:00<00:00, 85.5MB/s]
Archive: [Link]
inflating: kaggle_data/IMDB [Link]

[ ]: import pandas as pd
import numpy as np
import [Link] as plt
import re
import nltk
from [Link] import word_tokenize, sent_tokenize
from [Link] import stopwords
from [Link] import WordNetLemmatizer
[Link]('stopwords')

1
[Link]('wordnet')
[Link]('punkt')
stopwords = set([Link]('english'))

[nltk_data] Downloading package stopwords to /root/nltk_data…

[nltk_data] Unzipping corpora/[Link].
[nltk_data] Downloading package wordnet to /root/nltk_data…
[nltk_data] Downloading package punkt to /root/nltk_data…
[nltk_data] Unzipping tokenizers/[Link].

[ ]: import os
from [Link] import tqdm
from collections import Counter

[ ]: df = pd.read_csv("kaggle_data/IMDB [Link]")

[ ]: def transform_label(label):
return 1 if label == 'positive' else 0

x = []
for i in df["sentiment"]:
[Link](transform_label(i))

df["label"] = x
[Link]()

[ ]: review sentiment label

0 One of the other reviewers has mentioned that … positive 1
1 A wonderful little production. <br /><br />The… positive 1
2 I thought this was a wonderful way to spend ti… positive 1
3 Basically there's a family where a little boy … negative 0
4 Petter Mattei's "Love in the Time of Money" is… positive 1

[ ]: df['token_length'] = [Link](lambda x: len([Link]()))

[ ]: data_pos = df[df['label'] == 1]
data_pos['token_length'].describe()

[ ]: count 25000.000000
mean 232.849320
std 177.497046
min 10.000000
25% 125.000000
50% 172.000000
75% 284.000000
max 2470.000000
Name: token_length, dtype: float64

2
[ ]: def process_text(text):
text = word_tokenize(text)
text = [i for i in text if i not in stopwords]
lemmatizer = WordNetLemmatizer()
text = [[Link](t) for t in text]
text = [i for i in text if i not in stopwords]
return ' '.join(text)

[ ]: #df['clean'] = df['review'].apply(process_text)
df['processed'] = df['review'].apply(process_text)
[Link]()

[ ]: review sentiment label \

token_length processed
0 307 One reviewer mentioned watching 1 Oz episode '…
1 162 A wonderful little production . < br / > < br …
2 166 I thought wonderful way spend time hot summer …
3 138 Basically 's family little boy ( Jake ) think …
4 230 Petter Mattei 's `` Love Time Money '' visuall…

[ ]: reviews = [Link]
words = ' '.join(reviews)
words = [Link]()
words[:10]

[ ]: ['One',
'reviewer',
'mentioned',
'watching',
'1',
'Oz',
'episode',
"'ll",
'hooked',
'.']

[ ]: counter = Counter(words)
vocab = sorted(counter, key=[Link], reverse=True)
int2word = dict(enumerate(vocab, 1))
int2word[0] = '<PAD>'
word2int = {word: id for id, word in [Link]()}

3
[ ]: reviews_enc = [[word2int[word] for word in [Link]()] for review in␣
↪tqdm(reviews)]

# print first-10 words of first 5 reviews

for i in range(5):
print(reviews_enc[i][:5])

0%| | 0/50000 [00:00<?, ?it/s]

[212, 1097, 970, 97, 409]
[66, 341, 74, 268, 2]
[7, 116, 341, 44, 1036]
[2376, 8, 160, 74, 269]
[87832, 11029, 8, 16, 1122]

[ ]: def pad_features(reviews, pad_id, seq_length=128):

# features = [Link]((len(reviews), seq_length), dtype=int)
features = [Link]((len(reviews), seq_length), pad_id, dtype=int)

for i, row in enumerate(reviews):

# if seq_length < len(row) then review will be trimmed
features[i, :len(row)] = [Link](row)[:seq_length]

return features

seq_length = 256
features = pad_features(reviews_enc, pad_id=word2int['<PAD>'],␣
↪seq_length=seq_length)

assert len(features) == len(reviews_enc)

assert len(features[0]) == seq_length

features[:10, :10]

[ ]: array([[ 212, 1097, 970, 97, 409, 3810, 204, 161, 3069,
2],
[ 66, 341, 74, 268, 2, 5, 6, 3, 4,
5],
[ 7, 116, 341, 44, 1036, 23, 923, 1794, 2653,
1],
[ 2376, 8, 160, 74, 269, 13, 3233, 12, 50,
8],
[87832, 11029, 8, 16, 1122, 2011, 7681, 14, 2255,
1321],
[ 2788, 3730, 386, 9, 1, 28, 47677, 1, 3106,
7808],
[ 7, 220, 26, 19, 30, 13224, 1925, 65527, 151,
8291],

4
[ 22, 43, 448, 1, 1367, 107, 4044, 186, 891,
8],
[54585, 1000, 376, 10, 7, 217, 853, 97, 10,
2],
[ 78, 19, 141, 2602, 9168, 2087, 19, 9, 2,
78]])

[ ]: labels = [Link].to_numpy()
labels

[ ]: array([1, 1, 1, …, 0, 0, 0])

[ ]: train_size = .7
val_size = .5

split_id = int(len(features) * train_size)

train_x, remain_x = features[:split_id], features[split_id:]
train_y, remain_y = labels[:split_id], labels[split_id:]

split_val_id = int(len(remain_x) * val_size)

val_x, test_x = remain_x[:split_val_id], remain_x[split_val_id:]
val_y, test_y = remain_y[:split_val_id], remain_y[split_val_id:]

print('Feature Shapes:')
print('===============')
print('Train set: {}'.format(train_x.shape))
print('Validation set: {}'.format(val_x.shape))
print('Test set: {}'.format(test_x.shape))

Feature Shapes:
===============
Train set: (35000, 256)
Validation set: (7500, 256)
Test set: (7500, 256)

[ ]: print(len(train_y[train_y == 0]), len(train_y[train_y == 1]))

print(len(val_y[val_y == 0]), len(val_y[val_y == 1]))
print(len(test_y[test_y == 0]), len(test_y[test_y == 1]))

17510 17490
3753 3747
3737 3763

[ ]: import torch
from [Link] import TensorDataset, DataLoader
from torch import nn
from [Link] import Adam

5
[ ]: batch_size = 128

trainset = TensorDataset(torch.from_numpy(train_x), torch.from_numpy(train_y))

validset = TensorDataset(torch.from_numpy(val_x), torch.from_numpy(val_y))
testset = TensorDataset(torch.from_numpy(test_x), torch.from_numpy(test_y))

trainloader = DataLoader(trainset, shuffle=True, batch_size=batch_size)

valloader = DataLoader(validset, shuffle=True, batch_size=batch_size)
testloader = DataLoader(testset, shuffle=True, batch_size=batch_size)

[ ]: class SentimentModel([Link]):
def __init__(self, vocab_size, output_size, hidden_size=128,␣
↪embedding_size=400, n_layers=2, dropout=0.2):

super(SentimentModel, self).__init__()

# embedding layer is useful to map input into vector representation

[Link] = [Link](vocab_size, embedding_size)

# LSTM layer preserved by PyTorch library

[Link] = [Link](embedding_size, hidden_size, n_layers,␣
↪dropout=dropout, batch_first=True)

# dropout layer
[Link] = [Link](0.3)

# Linear layer for output

[Link] = [Link](hidden_size, output_size)

# Sigmoid layer cz we will have binary classification

[Link] = [Link]()

def forward(self, x):

# convert feature to long

x = [Link]()

# map input to vector

x = [Link](x)

# pass forward to lstm

o, _ = [Link](x)

# get last sequence output

o = o[:, -1, :]

# apply dropout and fully connected layer

o = [Link](o)

6
o = [Link](o)

# sigmoid
o = [Link](o)

return o

[ ]: device = [Link]('cuda' if [Link].is_available() else 'cpu')

print(device)

cuda

[ ]: vocab_size = len(word2int)
output_size = 1
embedding_size = 256
hidden_size = 512
n_layers = 2
dropout=0.25

model = SentimentModel(vocab_size, output_size, hidden_size, embedding_size,␣

↪n_layers, dropout)

print(model)

SentimentModel(
(embedding): Embedding(186157, 256)
(lstm): LSTM(256, 512, num_layers=2, batch_first=True, dropout=0.25)
(dropout): Dropout(p=0.3, inplace=False)
(fc): Linear(in_features=512, out_features=1, bias=True)
(sigmoid): Sigmoid()
)

[ ]: lr = 0.001
criterion = [Link]() # we use BCELoss cz we have binary classification␣
↪problem

7
[ ]: model = [Link](device)

epochloop = tqdm(range(epochs), position=0, desc='Training', leave=True)

# early stop trigger

es_trigger = 0
val_loss_min = [Link]

for e in epochloop:
[Link]()

train_loss = 0
train_acc = 0

for id, (feature, target) in enumerate(trainloader):

# add epoch meta info
epochloop.set_postfix_str(f'Training batch {id}/{len(trainloader)}')

feature, target = [Link](device), [Link](device)

optim.zero_grad()

out = model(feature)

predicted = [Link]([1 if i == True else 0 for i in out > 0.5],␣

↪device=device)
equals = predicted == target
acc = [Link]([Link]([Link]))
train_acc += [Link]()

loss = criterion([Link](), [Link]())

train_loss += [Link]()
[Link]()

# clip grad
# [Link].clip_grad_norm_([Link](), grad_clip)

[Link]()

del feature, target, predicted

history['train_loss'].append(train_loss / len(trainloader))
history['train_acc'].append(train_acc / len(trainloader))

[Link]()

8
val_loss = 0
val_acc = 0

with torch.inference_mode():
for id, (feature, target) in enumerate(valloader):
epochloop.set_postfix_str(f'Validation batch {id}/{len(valloader)}')

feature, target = [Link](device), [Link](device)

out = model(feature)

predicted = [Link]([1 if i == True else 0 for i in out > 0.

↪5], device=device)
equals = predicted == target
acc = [Link]([Link]([Link]))
val_acc += [Link]()

loss = criterion([Link](), [Link]())

val_loss += [Link]()

del feature, target, predicted

history['val_loss'].append(val_loss / len(valloader))
history['val_acc'].append(val_acc / len(valloader))

# reset model mode

[Link]()

# add epoch meta info

epochloop.set_postfix_str(f'Val Loss: {val_loss / len(valloader):.3f} | Val␣
↪Acc: {val_acc / len(valloader):.3f}')

# print epoch
if (e+1) % print_every == 0:
[Link](f'Epoch {e+1}/{epochs} | Train Loss: {train_loss /␣
↪len(trainloader):.3f} Train Acc: {train_acc / len(trainloader):.3f} | Val␣

↪Loss: {val_loss / len(valloader):.3f} Val Acc: {val_acc / len(valloader):.

↪3f}')

[Link]()

# save model if validation loss decrease

if val_loss / len(valloader) <= val_loss_min:
[Link](model.state_dict(), './sentiment_lstm.pt')
val_loss_min = val_loss / len(valloader)
es_trigger = 0
else:

9
[Link](f'[WARNING] Validation loss did not improved␣
↪({val_loss_min:.3f} --> {val_loss / len(valloader):.3f})')
es_trigger += 1

# force early stop

if es_trigger >= es_limit:
[Link](f'Early stopped at Epoch-{e+1}')
# update epochs history
history['epochs'] = e+1
break

Training: 0%| | 0/8 [00:00<?, ?it/s]

Epoch 1/8 | Train Loss: 0.694 Train Acc: 0.504 | Val Loss: 0.692 Val Acc: 0.510
Epoch 2/8 | Train Loss: 0.690 Train Acc: 0.516 | Val Loss: 0.691 Val Acc: 0.500
Epoch 3/8 | Train Loss: 0.693 Train Acc: 0.511 | Val Loss: 0.694 Val Acc: 0.498
[WARNING] Validation loss did not improved (0.691 --> 0.694)
Epoch 4/8 | Train Loss: 0.694 Train Acc: 0.505 | Val Loss: 0.693 Val Acc: 0.508
[WARNING] Validation loss did not improved (0.691 --> 0.693)
Epoch 5/8 | Train Loss: 0.692 Train Acc: 0.508 | Val Loss: 0.695 Val Acc: 0.509
[WARNING] Validation loss did not improved (0.691 --> 0.695)
Epoch 6/8 | Train Loss: 0.682 Train Acc: 0.536 | Val Loss: 0.671 Val Acc: 0.668
Epoch 7/8 | Train Loss: 0.631 Train Acc: 0.640 | Val Loss: 0.612 Val Acc: 0.730
Epoch 8/8 | Train Loss: 0.447 Train Acc: 0.806 | Val Loss: 0.417 Val Acc: 0.821

[ ]: [Link]()

# metrics
test_loss = 0
test_acc = 0

all_target = []
all_predicted = []

testloop = tqdm(testloader, leave=True, desc='Inference')

with torch.no_grad():
for feature, target in testloop:
feature, target = [Link](device), [Link](device)

out = model(feature)

predicted = [Link]([1 if i == True else 0 for i in out > 0.5],␣

↪device=device)

equals = predicted == target

acc = [Link]([Link]([Link]))
test_acc += [Link]()

loss = criterion([Link](), [Link]())

10
test_loss += [Link]()

all_target.extend([Link]().numpy())
all_predicted.extend([Link]().numpy())

print(f'Accuracy: {test_acc/len(testloader):.4f}, Loss: {test_loss/

↪len(testloader):.4f}')

Inference: 0%| | 0/59 [00:00<?, ?it/s]

Accuracy: 0.8260, Loss: 0.3995

[ ]: from [Link] import classification_report, confusion_matrix

import seaborn as sns

[ ]: print(classification_report(all_predicted, all_target))

precision recall f1-score support

0 0.89 0.79 0.84 4191

1 0.77 0.87 0.82 3309

accuracy 0.83 7500

macro avg 0.83 0.83 0.83 7500
weighted avg 0.83 0.83 0.83 7500

[ ]: cm = confusion_matrix(all_predicted, all_target)
[Link](figsize=(5,5))
[Link](cm, annot=True, fmt='g')
[Link]('Confusion Matrix')
[Link]()

11
12
practical_3_fashionMNIST

February 6, 2024

[ ]: import torch
from torch import nn
import torchvision
from torchvision import datasets
from torchvision import transforms
from [Link] import ToTensor

import [Link] as plt

[ ]: torch.__version__

[ ]: '2.1.0+cu121'

[ ]: torchvision.__version__

[ ]: '0.16.0+cu121'

[ ]: train_data = [Link](
root="data",
train=True,
download=True,
transform=ToTensor(),
target_transform=None
)

test_data = [Link](
root="data",
train=False,
download=True,
transform=ToTensor(),
target_transform=None
)

[ ]: train_data,test_data

1
[ ]: (Dataset FashionMNIST
Number of datapoints: 60000
Root location: data
Split: Train
StandardTransform
Transform: ToTensor(),
Dataset FashionMNIST
Number of datapoints: 10000
Root location: data
Split: Test
StandardTransform
Transform: ToTensor())

[ ]: class_names = train_data.classes

[ ]: fig = [Link](figsize=(10,10))
rows,cols = 4,4
for i in range(1,rows*cols+1):
random_idx = [Link](0,len(train_data),size=[1]).item()
img,label = train_data[random_idx]
fig.add_subplot(rows,cols,i)
[Link]([Link](),cmap="gray")
[Link](class_names[label])

2
[ ]: from [Link] import DataLoader

[ ]: BATCH_SIZE = 32
train_dataloader =␣
↪DataLoader(dataset=train_data,batch_size=BATCH_SIZE,shuffle=True)

test_dataloader =␣
↪DataLoader(dataset=test_data,batch_size=BATCH_SIZE,shuffle=False)

train_dataloader,test_dataloader

3
[ ]: (<[Link] at 0x79cace64b460>,
<[Link] at 0x79cace64b7c0>)

[ ]: train_features_batch, train_labels_batch = next(iter(train_dataloader))

[ ]: # torch.manual_seed(42)
random_idx = [Link](0,len(train_features_batch),size=[1]).item()

img,label = train_features_batch[random_idx],train_labels_batch[random_idx]
[Link]([Link](),cmap="gray")
[Link](class_names[label])
[Link](False)

[ ]: (-0.5, 27.5, 27.5, -0.5)

[ ]: flatten_model = [Link]()

x = train_features_batch[0]

output = flatten_model(x)

4
[ ]: [Link]().shape

[ ]: [Link]([784])

[ ]: import requests
from pathlib import Path
if Path("helper_functions.py").is_file():
print("Helper Functions exists, no download required")
else:
print()
req = [Link]("[Link]
↪pytorch-deep-learning/main/helper_functions.py")

with open("helper_functions.py","wb") as f:
[Link]([Link])

import helper_functions

[ ]: from helper_functions import accuracy_fn

[ ]: from timeit import default_timer as timer

def print_train_time(start:float,end:float,device:[Link]=None):
total = end-start
print(f"Train time on {device}: {total:.3f} secs")

[ ]: from [Link] import tqdm

[ ]: import torch
device = 'cuda' if [Link].is_available() else "cpu"

[ ]: device

[ ]: 'cuda'

[ ]: def train_step(
model:[Link],
data_loader:[Link],
loss_fn:[Link],
optimizer:[Link],
accuracy_fn,
device:[Link]):

train_loss,train_acc = 0,0

[Link]()

5
for batch,(X,y) in enumerate(data_loader):
X, y = [Link](device),[Link](device)

y_pred = model(X)

loss = loss_fn(y_pred,y)
train_loss += loss
train_acc += accuracy_fn(y_true=y,y_pred=y_pred.argmax(dim=1))

optimizer.zero_grad()

[Link]()

train_loss /= len(data_loader)
train_acc /= len(data_loader)
print(f"Train loss:{train_loss:.5f} | train acc:{train_acc:.2f}%")

def test_step(
model:[Link],
data_loader:[Link],
loss_fn:[Link],
accuracy_fn,
device:[Link]
):
## test loop
test_loss,test_acc = 0,0
[Link]()
with torch.inference_mode():
for X,y in data_loader:
X, y = [Link](device),[Link](device)

test_pred = model(X)

test_loss += loss_fn(test_pred,y)

test_acc += accuracy_fn(y_true=y,y_pred=test_pred.argmax(dim=1))

test_loss /= len(test_dataloader)

test_acc /= len(test_dataloader)

6
print(f"test loss: {test_loss:.5f} test_acc: {test_acc:.2f}")

[ ]: torch.manual_seed(42)
def eval_model(model:[Link],data_loader:[Link],loss_fn:
↪[Link],accuracy_fn,device):

"""Returns dict containing containing results of mdoel predicting on␣

↪data_loader"""

loss,acc = 0,0
[Link]()
with torch.inference_mode():
for X,y in tqdm(data_loader):
X,y = [Link](device),[Link](device)
y_pred = model(X)

loss += loss_fn(y_pred,y)
acc += accuracy_fn(y_true=y,y_pred=y_pred.argmax(dim=1))

# scale loss and acc to find avg acc loss

loss /= len(data_loader)

acc /= len(data_loader)

return {"model":model.class.name, "model_loss":loss.

↪item(),"model_acc":acc}

[ ]: class FashionMNISTModelV2CNN([Link]):
"""
TinyVGG architecture
"""

def __init__(self,input_shape:int,hidden_units:int,output_shape:int):
super().__init__()
self.conv_block_1 = [Link](
nn.Conv2d(in_channels=input_shape,
out_channels=hidden_units,
kernel_size=3,
stride=1,
padding=1),
[Link](),
nn.Conv2d(in_channels=hidden_units,
out_channels=hidden_units,
kernel_size=3,
stride=1,
padding=1),
[Link](),
nn.MaxPool2d(kernel_size=2)

7
)

self.conv_block_2 = [Link](
nn.Conv2d(in_channels=hidden_units,
out_channels=hidden_units,
kernel_size=3,
stride=1,
padding=1),
[Link](),
nn.Conv2d(in_channels=hidden_units,
out_channels=hidden_units,
kernel_size=3,
stride=1,
padding=1),
[Link](),
nn.MaxPool2d(kernel_size=2)
)

[Link] = [Link](
[Link](),
[Link](in_features=hidden_units*49,out_features=output_shape)
)

def forward(self,x):
x = self.conv_block_1(x)
# print([Link])
x = self.conv_block_2(x)
# print([Link])
x = [Link](x)
return x

[ ]: from helper_functions import accuracy_fn

[ ]: # train and test

[ ]: torch.manual_seed(42)

model2 = FashionMNISTModelV2CNN(
input_shape=1,
hidden_units=10,
output_shape=len(class_names)
).to(device)

loss_fn = [Link]()

8
optimizer = [Link](params=[Link](),lr=0.1)

torch.manual_seed(42)
[Link].manual_seed(42)

from timeit import default_timer

train_time_start = timer()

epochs = 3

for epoch in tqdm(range(epochs)):

print(f"\nEpoch: {epoch} \n----------------")

train_step(
model=model2,
data_loader=train_dataloader,
loss_fn=loss_fn,
optimizer=optimizer,
accuracy_fn=accuracy_fn,
device=device
)

test_step(
model=model2,
data_loader=test_dataloader,
loss_fn=loss_fn,
accuracy_fn=accuracy_fn,
device=device
)

train_end_time = timer()

total_time_train = print_train_time(
start = train_time_start,
end = train_end_time,
device=device
)

0%| | 0/3 [00:00<?, ?it/s]

Epoch: 0

9
----------------
Train loss:0.59443 | train acc:78.32%
test loss: 0.39969 test_acc: 86.04

Epoch: 1
----------------
Train loss:0.35867 | train acc:87.27%
test loss: 0.35115 test_acc: 87.08

Epoch: 2
----------------
Train loss:0.31848 | train acc:88.45%
test loss: 0.31587 test_acc: 88.79
Train time on cuda: 37.646 secs

[ ]: model2_results = eval_model(
model=model2,
data_loader=test_dataloader,
accuracy_fn=accuracy_fn,
loss_fn=loss_fn,
device=device
)

0%| | 0/313 [00:00<?, ?it/s]

[ ]: model2_results

[ ]: {'model': 'FashionMNISTModelV2CNN',
'model_loss': 0.3158661723136902,
'model_acc': 88.7879392971246}

[ ]: def make_predictions(
model,
data,
device,
):
pred_probs = []

[Link]()
with torch.inference_mode():
for sample in data:
sample = [Link](sample,dim=0).to(device)

pred_logit = model(sample)

pred_prob = [Link](pred_logit.squeeze(),dim=0)

10
pred_probs.append(pred_prob.cpu())

return [Link](pred_probs)

[ ]: test_data

[ ]: Dataset FashionMNIST
Number of datapoints: 10000
Root location: data
Split: Test
StandardTransform
Transform: ToTensor()

[ ]:

[ ]: import random
test_samples = []
test_labels = []

for sample,label in [Link](list(test_data),k=9):

test_samples.append(sample)
test_labels.append(label)

[ ]:

[ ]: pred_probs = make_predictions(
model=model2,
data=test_samples,
device=device
)

[ ]: pred_classes = pred_probs.argmax(dim=1)

[ ]: [Link](figsize=(9,9))
rows = 3
cols = 3
for i,sample in enumerate(test_samples):
[Link](rows,cols,i+1)
[Link]([Link](),cmap="gray")

pred_label = class_names[pred_classes[i]]

11
truth_label = class_names[test_labels[i]]

title_text = f"Pred: {pred_label} | Truth: {truth_label}"

if pred_label == truth_label:
[Link](title_text,fontsize=10,c="g")
else:
[Link](title_text,fontsize=10,c="r")

[Link](False)

12
[ ]: try:
import torchmetrics,mlxtend
print(mlxtend.__version__)
assert int(mlxtend.__version__.split(".")[1]) >= 19, "mlxtend should be␣
↪greater than 0.19.0"

except:
!pip install torchmetrics mlxtend -U
import torchmetrics,mlxtend

[ ]: mlxtend.__version__

[ ]: '0.23.1'

[ ]: from [Link] import tqdm

[ ]: y_preds = []
[Link]()
with torch.inference_mode():
for X,y in tqdm(test_dataloader,desc="Make pred"):
X,y = [Link](device),[Link](device)

y_logits = model2(X)

y_pred = [Link](y_logits.squeeze(),dim=0).argmax(dim=1)

y_preds.append(y_pred.cpu())

y_pred_tensor = [Link](y_preds)

Make pred: 0%| | 0/313 [00:00<?, ?it/s]

[ ]: y_pred_tensor

[ ]: tensor([9, 2, 1, …, 8, 1, 6])

[ ]: from torchmetrics import ConfusionMatrix

from [Link] import plot_confusion_matrix

confmat = ConfusionMatrix(task="multiclass",num_classes=len(class_names))
conf_mat_tensor= confmat(preds=y_pred_tensor,target=test_data.targets)

fig,ax = plot_confusion_matrix(conf_mat=conf_mat_tensor.
↪numpy(),class_names=class_names,figsize=(10,7))

13
[ ]:

[ ]:

Neural Network Code Companion
No ratings yet
Neural Network Code Companion
9 pages
Implement CBOW Neural Network in Keras
No ratings yet
Implement CBOW Neural Network in Keras
10 pages
Update on TensorFlow Loss Function
No ratings yet
Update on TensorFlow Loss Function
52 pages
Build and Train CNN with PyTorch
No ratings yet
Build and Train CNN with PyTorch
23 pages
CCS355 Neural Networks Assignment 1
No ratings yet
CCS355 Neural Networks Assignment 1
15 pages
CCS355 Neural Networks Assignment
No ratings yet
CCS355 Neural Networks Assignment
15 pages
Build a 4-Layer Neural Network
No ratings yet
Build a 4-Layer Neural Network
5 pages
Deep Learning Lab File for BCA Students
No ratings yet
Deep Learning Lab File for BCA Students
46 pages
Neural Network Basics with Python & PyTorch
No ratings yet
Neural Network Basics with Python & PyTorch
26 pages
CTRL
No ratings yet
CTRL
5 pages
Multi-Layer Perceptron for MNIST Classification
No ratings yet
Multi-Layer Perceptron for MNIST Classification
24 pages
JSON to PDF in Transformer Lab
No ratings yet
JSON to PDF in Transformer Lab
29 pages
Deep Learning Assignment 3 Details
No ratings yet
Deep Learning Assignment 3 Details
6 pages
Deep Learning Lab Manual with Code
No ratings yet
Deep Learning Lab Manual with Code
10 pages
Feed Forward Neural Network Implementation
No ratings yet
Feed Forward Neural Network Implementation
24 pages
Computer Vision Assignment 3 Guide
No ratings yet
Computer Vision Assignment 3 Guide
11 pages
PyTorch Linear Regression Guide
No ratings yet
PyTorch Linear Regression Guide
10 pages
Deep Learning Lab: Data Processing & Model Training
No ratings yet
Deep Learning Lab: Data Processing & Model Training
34 pages
TensorFlow One-Hot Encoding Guide
No ratings yet
TensorFlow One-Hot Encoding Guide
8 pages
Implementing Neural Networks with Keras
No ratings yet
Implementing Neural Networks with Keras
42 pages
Sentiment Analysis Dataset CSV Guide
No ratings yet
Sentiment Analysis Dataset CSV Guide
2 pages
Python3 Image Processing and ML Guide
No ratings yet
Python3 Image Processing and ML Guide
13 pages
Deep Learning Assignment: CNN & Clustering
No ratings yet
Deep Learning Assignment: CNN & Clustering
1 page
Installing BERT for Sentiment Analysis
No ratings yet
Installing BERT for Sentiment Analysis
5 pages
Handwritten Digit Recognition with CNN
No ratings yet
Handwritten Digit Recognition with CNN
24 pages
Image Classification with CNNs
No ratings yet
Image Classification with CNNs
15 pages
Intro to PyTorch Tensors and Models
No ratings yet
Intro to PyTorch Tensors and Models
12 pages
Naïve Bayes Classifier Implementation
No ratings yet
Naïve Bayes Classifier Implementation
5 pages
Real-Time Dog Emotion Detection System
No ratings yet
Real-Time Dog Emotion Detection System
6 pages
Essential ML Functions Overview
No ratings yet
Essential ML Functions Overview
7 pages
Deep Learning Lab Exercises for CSE
No ratings yet
Deep Learning Lab Exercises for CSE
75 pages
IMDB Dataset Neural Network Model
No ratings yet
IMDB Dataset Neural Network Model
10 pages
Practical Work in Neural Networks
No ratings yet
Practical Work in Neural Networks
60 pages
Keras and Google Colab Guide
No ratings yet
Keras and Google Colab Guide
4 pages
Plotting Loss in TensorFlow Models
No ratings yet
Plotting Loss in TensorFlow Models
65 pages
Neural Network for Cyber Traffic Analysis
No ratings yet
Neural Network for Cyber Traffic Analysis
4 pages
RNN for Google Stock Price Prediction
No ratings yet
RNN for Google Stock Price Prediction
15 pages
SVHN Image Classifier with Keras
No ratings yet
SVHN Image Classifier with Keras
15 pages
Binary Classification of Movie Reviews
No ratings yet
Binary Classification of Movie Reviews
8 pages
Machine Learning Lab Manual 2021-22
No ratings yet
Machine Learning Lab Manual 2021-22
23 pages
Deep Learning Laboratory Manual
100% (10)
Deep Learning Laboratory Manual
30 pages
Computer Vision Lab Experiments Guide
No ratings yet
Computer Vision Lab Experiments Guide
120 pages
Deep Learning Employee Gender Analysis
No ratings yet
Deep Learning Employee Gender Analysis
6 pages
Neural Network Basics with MNIST Data
No ratings yet
Neural Network Basics with MNIST Data
26 pages
Medical Text Classification with NLP
No ratings yet
Medical Text Classification with NLP
12 pages
Cat and Dog Image Classification Guide
No ratings yet
Cat and Dog Image Classification Guide
13 pages
Skin Disease Classification with PyTorch
No ratings yet
Skin Disease Classification with PyTorch
35 pages
CNN Training on CIFAR-10 Dataset
No ratings yet
CNN Training on CIFAR-10 Dataset
8 pages
Beginner's RNN Guide with Keras
No ratings yet
Beginner's RNN Guide with Keras
13 pages
Yandex Image Parser for Neural Networks
No ratings yet
Yandex Image Parser for Neural Networks
31 pages
PyTorch Tabular Regression Guide
No ratings yet
PyTorch Tabular Regression Guide
13 pages
Installing and Using BERT with Transformers
No ratings yet
Installing and Using BERT with Transformers
5 pages
Machine Learning with Python Guide
No ratings yet
Machine Learning with Python Guide
240 pages
Pattern Recognition Lab Experiments
No ratings yet
Pattern Recognition Lab Experiments
24 pages
Keras Deep Learning Cheat Sheet
No ratings yet
Keras Deep Learning Cheat Sheet
1 page
B.Tech S5 Exam Time Table August 2025
No ratings yet
B.Tech S5 Exam Time Table August 2025
1 page
كتاب الأضواء للغة العربية أولى ابتدائي
No ratings yet
كتاب الأضواء للغة العربية أولى ابتدائي
207 pages
AI's Role in New Music Genres
No ratings yet
AI's Role in New Music Genres
4 pages
Hidden Developmental State in U.S. Economy
No ratings yet
Hidden Developmental State in U.S. Economy
38 pages
Introduction to LATEX Basics
No ratings yet
Introduction to LATEX Basics
17 pages
PS2 Wireless Controller Adapter Guide
No ratings yet
PS2 Wireless Controller Adapter Guide
32 pages
PySpark Final Assessment Overview
No ratings yet
PySpark Final Assessment Overview
3 pages
Boost Work Productivity with ChatGPT
No ratings yet
Boost Work Productivity with ChatGPT
37 pages
Oracle Payables MCQs Overview
No ratings yet
Oracle Payables MCQs Overview
3 pages
Finite Element Analysis: Methods & Applications
No ratings yet
Finite Element Analysis: Methods & Applications
17 pages
Backtracking in the N-Queen Problem
No ratings yet
Backtracking in the N-Queen Problem
3 pages
Hyundai Rear View Camera Wiring Guide
No ratings yet
Hyundai Rear View Camera Wiring Guide
1 page
Amharic LLMs: Enhancing Low-Resource NLP
No ratings yet
Amharic LLMs: Enhancing Low-Resource NLP
17 pages
Wireless EV Charging Station Project
No ratings yet
Wireless EV Charging Station Project
23 pages
Animesh Baghel: Coding Achievements
No ratings yet
Animesh Baghel: Coding Achievements
1 page
Ablative Thermal Protection Modeling
No ratings yet
Ablative Thermal Protection Modeling
20 pages
Data Center Infrastructure Management Solutions
No ratings yet
Data Center Infrastructure Management Solutions
8 pages
VLAN and Trunking Configuration Guide
No ratings yet
VLAN and Trunking Configuration Guide
4 pages
Information Literacy in Education Today
No ratings yet
Information Literacy in Education Today
13 pages
Viterbi Algorithm in HMM Explained
No ratings yet
Viterbi Algorithm in HMM Explained
12 pages
Junior Accountant Interview Guide
100% (2)
Junior Accountant Interview Guide
7 pages
Testbank Introduction To Health Research Methods 2nd Edition Fast Download
No ratings yet
Testbank Introduction To Health Research Methods 2nd Edition Fast Download
205 pages
Fixed Point Theory and Optimization Insights
No ratings yet
Fixed Point Theory and Optimization Insights
3 pages
15HP 4P XP Us Motors
No ratings yet
15HP 4P XP Us Motors
10 pages
Weka Overview and Data Handling
No ratings yet
Weka Overview and Data Handling
99 pages
Guidelines for Composite Grooved Sole Plates
No ratings yet
Guidelines for Composite Grooved Sole Plates
14 pages
Types of Electric Motors Explained
No ratings yet
Types of Electric Motors Explained
17 pages
Keyboarding Techniques for Exams
No ratings yet
Keyboarding Techniques for Exams
2 pages
Advancements in Plastic Solar Cells
No ratings yet
Advancements in Plastic Solar Cells
24 pages
September 2020 STOM - Safe Operation of Mobile Equipment V3
No ratings yet
September 2020 STOM - Safe Operation of Mobile Equipment V3
10 pages

Boston Housing Data Analysis

Uploaded by

Boston Housing Data Analysis

Uploaded by

practical_1

# ! pip install kaggle --quiet

# ! kaggle datasets download -d {name}

# # Unmount your Google Drive

[ ]: from sklearn.model_selection import train_test_split

[ ]: RM LSTAT PTRATIO MEDV

[489 rows x 4 columns]

[ ]: from [Link] import StandardScaler, MinMaxScaler, RobustScaler

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2,␣

X_train shape: (391, 3)

if predictions is not None:

y_values = weight * x_values + bias

[Link](x_values, y_values, color='red', label=f'y = {weight}x + {bias}')

def forward(self,x:[Link]) -> [Link]:

[ ]: loss = [Link].L1Loss() # mae

[ ]: X_train, X_test, y_train, y_test = [Link](X_train, dtype=torch.

<ipython-input-113-e7e4131fbf59>:1: UserWarning: To copy construct from a

0%| | 0/200 [00:00<?, ?it/s]

[ ]: OrderedDict([('linear_layer.weight', tensor([[ 0.6438, -0.1606, -0.2250]])),

! pip install kaggle --quiet

! kaggle datasets download -d {name}

# Unmount your Google Drive

[nltk_data] Downloading package stopwords to /root/nltk_data…

[ ]: review sentiment label

[ ]: df['token_length'] = [Link](lambda x: len([Link]()))

[ ]: review sentiment label \

# print first-10 words of first 5 reviews

0%| | 0/50000 [00:00<?, ?it/s]

[ ]: def pad_features(reviews, pad_id, seq_length=128):

for i, row in enumerate(reviews):

assert len(features) == len(reviews_enc)

split_id = int(len(features) * train_size)

split_val_id = int(len(remain_x) * val_size)

[ ]: print(len(train_y[train_y == 0]), len(train_y[train_y == 1]))

trainset = TensorDataset(torch.from_numpy(train_x), torch.from_numpy(train_y))

trainloader = DataLoader(trainset, shuffle=True, batch_size=batch_size)

# embedding layer is useful to map input into vector representation

# LSTM layer preserved by PyTorch library

# Linear layer for output

# Sigmoid layer cz we will have binary classification

def forward(self, x):

# convert feature to long

# map input to vector

# pass forward to lstm

# get last sequence output

# apply dropout and fully connected layer

[ ]: device = [Link]('cuda' if [Link].is_available() else 'cpu')

model = SentimentModel(vocab_size, output_size, hidden_size, embedding_size,␣

optim = Adam([Link](), lr=lr)

epochloop = tqdm(range(epochs), position=0, desc='Training', leave=True)

# early stop trigger

for id, (feature, target) in enumerate(trainloader):

feature, target = [Link](device), [Link](device)

predicted = [Link]([1 if i == True else 0 for i in out > 0.5],␣

loss = criterion([Link](), [Link]())

del feature, target, predicted

feature, target = [Link](device), [Link](device)

predicted = [Link]([1 if i == True else 0 for i in out > 0.

loss = criterion([Link](), [Link]())

del feature, target, predicted

# reset model mode

# add epoch meta info

↪Loss: {val_loss / len(valloader):.3f} Val Acc: {val_acc / len(valloader):.

# save model if validation loss decrease

# force early stop

Training: 0%| | 0/8 [00:00<?, ?it/s]

testloop = tqdm(testloader, leave=True, desc='Inference')

predicted = [Link]([1 if i == True else 0 for i in out > 0.5],␣

equals = predicted == target

loss = criterion([Link](), [Link]())

print(f'Accuracy: {test_acc/len(testloader):.4f}, Loss: {test_loss/

Inference: 0%| | 0/59 [00:00<?, ?it/s]

[ ]: from [Link] import classification_report, confusion_matrix

precision recall f1-score support

0 0.89 0.79 0.84 4191

accuracy 0.83 7500

return {"model":model.class.name, "model_loss":loss.