4c. Randomness
Randomness
Since we use only torch, we need to set the seed only once:
This controls
- the shuffling of the datasets
- sampling of augmentations
- initialization of weights if applicable
- sampling in layers like
nn.Dropout
Case 1: Shuffling of datasets
import torch
from torch.utils.data import Dataset
# Basic dataset that just returns the idx called
class Ids(Dataset[int]):
def __init__(self, total: int = 10):
self.total = total
def __len__(self):
return self.total
def __getitem__(self, idx) -> int:
return idx
dataset = Ids()
print("\n\n--- Not shuffled ---\n")
batches = list(torch.utils.data.DataLoader(dataset, shuffle=False, batch_size=3))
print(batches)
# [tensor([0, 1, 2]), tensor([3, 4, 5]), tensor([6, 7, 8]), tensor([9])]
print("\n\n--- Shuffled, no seed set ---\n")
for i in range(3):
batches = list(torch.utils.data.DataLoader(dataset, shuffle=True, batch_size=3))
print(i, batches)
# 0 [tensor([3, 8, 7]), tensor([6, 4, 1]), tensor([0, 9, 2]), tensor([5])]
# 1 [tensor([4, 5, 8]), tensor([0, 7, 1]), tensor([3, 6, 2]), tensor([9])]
# 2 [tensor([6, 4, 5]), tensor([0, 7, 9]), tensor([3, 8, 1]), tensor([2])]
print("\n\n--- Shuffled, seed set at start of section ---\n")
torch.manual_seed(10)
for i in range(3):
batches = list(torch.utils.data.DataLoader(dataset, shuffle=True, batch_size=3))
print(i, batches)
# 0 [tensor([6, 2, 8]), tensor([1, 5, 3]), tensor([7, 4, 0]), tensor([9])]
# 1 [tensor([5, 0, 8]), tensor([6, 2, 9]), tensor([7, 4, 3]), tensor([1])]
# 2 [tensor([3, 5, 0]), tensor([4, 7, 6]), tensor([9, 8, 2]), tensor([1])]
print("\n\n--- Shuffled, seed set before DataLoader init ---\n")
for i in range(3):
torch.manual_seed(10)
batches = list(torch.utils.data.DataLoader(dataset, shuffle=True, batch_size=3))
print(i, batches)
# 0 [tensor([6, 2, 8]), tensor([1, 5, 3]), tensor([7, 4, 0]), tensor([9])]
# 1 [tensor([6, 2, 8]), tensor([1, 5, 3]), tensor([7, 4, 0]), tensor([9])]
# 2 [tensor([6, 2, 8]), tensor([1, 5, 3]), tensor([7, 4, 0]), tensor([9])]
Case 2: Sampling of augmentations
import torch
from torchvision import transforms
aug = transforms.RandomHorizontalFlip(0.5)
img = torch.tensor([[0, 1]])
print("\n\n--- No seed set ---\n")
for i in range(3):
flipped = [int(aug(img)[0, 0] == 0) for _ in range(10)]
print(i, flipped)
# 0 [1, 0, 1, 0, 1, 1, 0, 1, 0, 0]
# 1 [1, 0, 1, 1, 1, 0, 0, 1, 1, 0]
# 2 [0, 0, 0, 1, 0, 1, 0, 0, 0, 1]
print("\n\n--- Seed set (NOTE: `aug` is already initialized) ---\n")
for i in range(3):
torch.manual_seed(10)
flipped = [int(aug(img)[0, 0] == 0) for _ in range(10)]
print(i, flipped)
# 0 [0, 0, 0, 1, 0, 0, 1, 1, 1, 0]
# 1 [0, 0, 0, 1, 0, 0, 1, 1, 1, 0]
# 2 [0, 0, 0, 1, 0, 0, 1, 1, 1, 0]
Case 3: Initialization of model weights
import torch
import torch.nn as nn
print("\n\n--- No seed set ---\n")
for i in range(3):
model = nn.Linear(1, 1, bias=False)
print(i, model.state_dict()["weight"].numpy()[0, 0])
# 0 0.916343
# 1 0.41053927
# 2 -0.24007404
print("\n\n--- Seed set ---\n")
for i in range(3):
torch.manual_seed(10)
model = nn.Linear(1, 1, bias=False)
print(i, model.state_dict()["weight"].numpy()[0, 0])
# 0 -0.083830714
# 1 -0.083830714
# 2 -0.083830714
Case 4: Sampling in dropout layer
import torch
import torch.nn as nn
class Drop(nn.Module):
def __init__(self, p: float = 0.5):
super().__init__()
self.drop = nn.Dropout(p)
def forward(self, x):
return self.drop(x)
model = Drop()
model.train()
x = torch.tensor([1.0, 2.0, 3.0])
# BUG: What is happening here?? Why x*2 ?
print("\n\n--- No seed set ---\n")
for i in range(3):
print(i, model(x))
# 0 tensor([2., 4., 6.])
# 1 tensor([0., 0., 0.])
# 2 tensor([0., 4., 6.])
print("\n\n--- Seed set ---\n")
for i in range(3):
torch.manual_seed(10)
print(i, model(x))
# 0 tensor([0., 4., 0.])
# 1 tensor([0., 4., 0.])
# 2 tensor([0., 4., 0.])