import torch
import torch.nn as nn
import torch.optim as optim
import numpy as np
from accelerate import Accelerator
def bce_false(x):
bce = nn.BCEWithLogitsLoss(reduction='none')
target = torch.zeros(x.size()).cuda()
return bce(x, target)
def bce_true(x):
bce = nn.BCEWithLogitsLoss(reduction='none')
target = torch.ones(x.size()).cuda()
return bce(x, target)
accelerator = Accelerator()
class Discriminator(nn.Module):
def __init__(self, in_dim=1, image_size=128, conv_dim=64, c_dim=512, repeat_num=6):
super(Discriminator, self).__init__()
layers = []
layers.append(
nn.Sequential(
nn.Conv2d(in_dim, conv_dim, kernel_size=4, stride=2, padding=1),
nn.BatchNorm2d(conv_dim, affine=True, track_running_stats=True),
nn.LeakyReLU(inplace=True))
)
curr_dim = conv_dim
for i in range(1, repeat_num):
layer = nn.Sequential(
nn.Conv2d(curr_dim, curr_dim*2, kernel_size=4, stride=2, padding=1),
nn.BatchNorm2d(curr_dim*2, affine=True, track_running_stats=True),
nn.LeakyReLU(inplace=True))
layers.append(layer)
curr_dim = curr_dim * 2
self.down = nn.ModuleList(layers)
kernel_size = int(image_size / np.power(2, repeat_num))
self.conv1 = nn.Conv2d(curr_dim, 1, kernel_size=3, stride=1, padding=1, bias=False)
def forward(self, x):
(b, t, c, h, w) = x.size()
x = x.view(b * t, -1, h, w)
for layer in self.down:
x = layer(x)
out_src = self.conv1(x)
return out_src
D = Discriminator(image_size=96).cuda()
lr = 1e-4
optim_d = optim.Adam(D.parameters(), lr = lr, weight_decay=1e-4)
optim_d, D = accelerator.prepare(optim_d, D)
torch.autograd.set_detect_anomaly(True)
video = torch.zeros(12, 29, 1, 96, 96).cuda()
loss_d = 0.0
loss_d = bce_true(D(video.clone())).reshape(-1).mean()
loss_d = loss_d + bce_false(D(video.clone())).reshape(-1).mean()
optim_d.zero_grad()
accelerator.backward(loss_d)
optim_d.step()
RuntimeError: one of the variables needed for gradient computation has been modified by an inplace operation: [torch.cuda.FloatTensor [2048]] is at version 4; expected version 3 instead. Hint: the backtrace further above shows the operation that failed to compute its gradient. The variable in question was changed in there or anywhere later. Good luck!
This code runs correct in PyTorch Single GPU mode. So it might cause by Accelerate
?