Skip to content

4c. Randomness

Randomness

Since we use only torch, we need to set the seed only once:

torch.manual_seed(args.seed)

This controls

  • the shuffling of the datasets
  • sampling of augmentations
  • initialization of weights if applicable
  • sampling in layers like nn.Dropout

Case 1: Shuffling of datasets

import torch
from torch.utils.data import Dataset

# Basic dataset that just returns the idx called
class Ids(Dataset[int]):
    def __init__(self, total: int = 10):
        self.total = total

    def __len__(self):
        return self.total

    def __getitem__(self, idx) -> int:
        return idx

dataset = Ids()

print("\n\n--- Not shuffled ---\n")
batches = list(torch.utils.data.DataLoader(dataset, shuffle=False, batch_size=3))
print(batches)
# [tensor([0, 1, 2]), tensor([3, 4, 5]), tensor([6, 7, 8]), tensor([9])]

print("\n\n--- Shuffled, no seed set ---\n")
for i in range(3):
    batches = list(torch.utils.data.DataLoader(dataset, shuffle=True, batch_size=3))
    print(i, batches)
# 0 [tensor([3, 8, 7]), tensor([6, 4, 1]), tensor([0, 9, 2]), tensor([5])]
# 1 [tensor([4, 5, 8]), tensor([0, 7, 1]), tensor([3, 6, 2]), tensor([9])]
# 2 [tensor([6, 4, 5]), tensor([0, 7, 9]), tensor([3, 8, 1]), tensor([2])]

print("\n\n--- Shuffled, seed set at start of section ---\n")
torch.manual_seed(10)
for i in range(3):
    batches = list(torch.utils.data.DataLoader(dataset, shuffle=True, batch_size=3))
    print(i, batches)
# 0 [tensor([6, 2, 8]), tensor([1, 5, 3]), tensor([7, 4, 0]), tensor([9])]
# 1 [tensor([5, 0, 8]), tensor([6, 2, 9]), tensor([7, 4, 3]), tensor([1])]
# 2 [tensor([3, 5, 0]), tensor([4, 7, 6]), tensor([9, 8, 2]), tensor([1])]

print("\n\n--- Shuffled, seed set before DataLoader init ---\n")
for i in range(3):
    torch.manual_seed(10)
    batches = list(torch.utils.data.DataLoader(dataset, shuffle=True, batch_size=3))
    print(i, batches)
# 0 [tensor([6, 2, 8]), tensor([1, 5, 3]), tensor([7, 4, 0]), tensor([9])]
# 1 [tensor([6, 2, 8]), tensor([1, 5, 3]), tensor([7, 4, 0]), tensor([9])]
# 2 [tensor([6, 2, 8]), tensor([1, 5, 3]), tensor([7, 4, 0]), tensor([9])]

Case 2: Sampling of augmentations

import torch
from torchvision import transforms

aug = transforms.RandomHorizontalFlip(0.5)

img = torch.tensor([[0, 1]])

print("\n\n--- No seed set ---\n")
for i in range(3):
    flipped = [int(aug(img)[0, 0] == 0) for _ in range(10)]
    print(i, flipped)
# 0 [1, 0, 1, 0, 1, 1, 0, 1, 0, 0]
# 1 [1, 0, 1, 1, 1, 0, 0, 1, 1, 0]
# 2 [0, 0, 0, 1, 0, 1, 0, 0, 0, 1]

print("\n\n--- Seed set (NOTE: `aug` is already initialized) ---\n")
for i in range(3):
    torch.manual_seed(10)
    flipped = [int(aug(img)[0, 0] == 0) for _ in range(10)]
    print(i, flipped)
# 0 [0, 0, 0, 1, 0, 0, 1, 1, 1, 0]
# 1 [0, 0, 0, 1, 0, 0, 1, 1, 1, 0]
# 2 [0, 0, 0, 1, 0, 0, 1, 1, 1, 0]

Case 3: Initialization of model weights

import torch
import torch.nn as nn

print("\n\n--- No seed set ---\n")
for i in range(3):
    model = nn.Linear(1, 1, bias=False)
    print(i, model.state_dict()["weight"].numpy()[0, 0])
# 0 0.916343
# 1 0.41053927
# 2 -0.24007404

print("\n\n--- Seed set ---\n")
for i in range(3):
    torch.manual_seed(10)
    model = nn.Linear(1, 1, bias=False)
    print(i, model.state_dict()["weight"].numpy()[0, 0])
# 0 -0.083830714
# 1 -0.083830714
# 2 -0.083830714

Case 4: Sampling in dropout layer

import torch
import torch.nn as nn


class Drop(nn.Module):
    def __init__(self, p: float = 0.5):
        super().__init__()
        self.drop = nn.Dropout(p)

    def forward(self, x):
        return self.drop(x)


model = Drop()
model.train()
x = torch.tensor([1.0, 2.0, 3.0])

# BUG: What is happening here?? Why x*2 ?

print("\n\n--- No seed set ---\n")
for i in range(3):
    print(i, model(x))
# 0 tensor([2., 4., 6.])
# 1 tensor([0., 0., 0.])
# 2 tensor([0., 4., 6.])

print("\n\n--- Seed set ---\n")
for i in range(3):
    torch.manual_seed(10)
    print(i, model(x))
# 0 tensor([0., 4., 0.])
# 1 tensor([0., 4., 0.])
# 2 tensor([0., 4., 0.])