1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134
|
# Basic training configuration
import os
from functools import partial
import albumentations as A
import cv2
import torch.nn as nn
import torch.optim as optim
import torch.optim.lr_scheduler as lrs
from albumentations.pytorch import ToTensorV2 as ToTensor
from dataflow import get_train_val_loaders, ignore_mask_boundaries
from torchvision.models.segmentation import deeplabv3_resnet101
# ##############################
# Global configs
# ##############################
seed = 21
device = "cuda"
debug = False
# Use AMP with torch native
with_amp = True
num_classes = 21
batch_size = 18 # total batch size
val_batch_size = batch_size * 2
num_workers = 12 # total num workers per node
val_interval = 3
# grads accumulation:
accumulation_steps = 4
val_img_size = 513
train_img_size = 480
# ##############################
# Setup Dataflow
# ##############################
assert "DATASET_PATH" in os.environ
data_path = os.environ["DATASET_PATH"]
assert "SBD_DATASET_PATH" in os.environ
sbd_data_path = os.environ["SBD_DATASET_PATH"]
mean = (0.485, 0.456, 0.406)
std = (0.229, 0.224, 0.225)
train_transforms = A.Compose(
[
A.RandomScale(scale_limit=(0.0, 1.5), interpolation=cv2.INTER_LINEAR, p=1.0),
A.PadIfNeeded(val_img_size, val_img_size, border_mode=cv2.BORDER_CONSTANT),
A.RandomCrop(train_img_size, train_img_size),
A.HorizontalFlip(),
A.Blur(blur_limit=3),
A.Normalize(mean=mean, std=std),
ignore_mask_boundaries,
ToTensor(),
]
)
val_transforms = A.Compose(
[
A.PadIfNeeded(val_img_size, val_img_size, border_mode=cv2.BORDER_CONSTANT),
A.Normalize(mean=mean, std=std),
ignore_mask_boundaries,
ToTensor(),
]
)
train_loader, val_loader, train_eval_loader = get_train_val_loaders(
root_path=data_path,
train_transforms=train_transforms,
val_transforms=val_transforms,
batch_size=batch_size,
num_workers=num_workers,
val_batch_size=val_batch_size,
sbd_path=sbd_data_path,
limit_train_num_samples=100 if debug else None,
limit_val_num_samples=100 if debug else None,
)
# ##############################
# Setup model
# ##############################
num_classes = 21
model = deeplabv3_resnet101(num_classes=num_classes)
def model_output_transform(output):
return output["out"]
# ##############################
# Setup solver
# ##############################
save_every_iters = len(train_loader)
num_epochs = 100
criterion = nn.CrossEntropyLoss()
lr = 0.007
weight_decay = 5e-4
momentum = 0.9
nesterov = False
optimizer = optim.SGD(
[{"params": model.backbone.parameters()}, {"params": model.classifier.parameters()}],
lr=1.0,
momentum=momentum,
weight_decay=weight_decay,
nesterov=nesterov,
)
le = len(train_loader)
def lambda_lr_scheduler(iteration, lr0, n, a):
return lr0 * pow((1.0 - 1.0 * iteration / n), a)
lr_scheduler = lrs.LambdaLR(
optimizer,
lr_lambda=[
partial(lambda_lr_scheduler, lr0=lr, n=num_epochs * le, a=0.9),
partial(lambda_lr_scheduler, lr0=lr * 10.0, n=num_epochs * le, a=0.9),
],
)
|