圖像分類案例2
Kaggle上的狗品種識別(ImageNet Dogs)
在本節(jié)中,我們將解決Kaggle競賽中的犬種識別挑戰(zhàn),比賽的網址是https://www.kaggle.com/c/dog-breed-identification 在這項比賽中,我們嘗試確定120種不同的狗。該比賽中使用的數據集實際上是著名的ImageNet數據集的子集。
# 在本節(jié)notebook中,使用后續(xù)設置的參數在完整訓練集上訓練模型,大致需要40-50分鐘
# 請大家合理安排GPU時長,盡量只在訓練時切換到GPU資源
# 也可以在Kaggle上訪問本節(jié)notebook:
# https://www.kaggle.com/boyuai/boyu-d2l-dog-breed-identification-imagenet-dogs
import torch
import torch.nn as nn
import torch.optim as optim
import torchvision
import torchvision.transforms as transforms
import torchvision.models as models
import os
import shutil
import time
import pandas as pd
import random
# 設置隨機數種子
random.seed(0)
torch.manual_seed(0)
torch.cuda.manual_seed(0)
整理數據集
我們可以從比賽網址上下載數據集,其目錄結構為:
| Dog Breed Identification
| train
| | 000bec180eb18c7604dcecc8fe0dba07.jpg
| | 00a338a92e4e7bf543340dc849230e75.jpg
| | ...
| test
| | 00a3edd22dc7859c487a64777fc8d093.jpg
| | 00a6892e5c7f92c1f465e213fd904582.jpg
| | ...
| labels.csv
| sample_submission.csv
train和test目錄下分別是訓練集和測試集的圖像,訓練集包含10,222張圖像,測試集包含10,357張圖像,圖像格式都是JPEG,每張圖像的文件名是一個唯一的id。labels.csv包含訓練集圖像的標簽,文件包含10,222行,每行包含兩列,第一列是圖像id,第二列是狗的類別。狗的類別一共有120種。
我們希望對數據進行整理,方便后續(xù)的讀取,我們的主要目標是:
從訓練集中劃分出驗證數據集,用于調整超參數。劃分之后,數據集應該包含4個部分:劃分后的訓練集、劃分后的驗證集、完整訓練集、完整測試集
對于4個部分,建立4個文件夾:train, valid, train_valid, test。在上述文件夾中,對每個類別都建立一個文件夾,在其中存放屬于該類別的圖像。前三個部分的標簽已知,所以各有120個子文件夾,而測試集的標簽未知,所以僅建立一個名為unknown的子文件夾,存放所有測試數據。
我們希望整理后的數據集目錄結構為:
| train_valid_test
| train
| | affenpinscher
| | | 00ca18751837cd6a22813f8e221f7819.jpg
| | | ...
| | afghan_hound
| | | 0a4f1e17d720cdff35814651402b7cf4.jpg
| | | ...
| | ...
| valid
| | affenpinscher
| | | 56af8255b46eb1fa5722f37729525405.jpg
| | | ...
| | afghan_hound
| | | 0df400016a7e7ab4abff824bf2743f02.jpg
| | | ...
| | ...
| train_valid
| | affenpinscher
| | | 00ca18751837cd6a22813f8e221f7819.jpg
| | | ...
| | afghan_hound
| | | 0a4f1e17d720cdff35814651402b7cf4.jpg
| | | ...
| | ...
| test
| | unknown
| | | 00a3edd22dc7859c487a64777fc8d093.jpg
| | | ...
data_dir = '/home/kesci/input/Kaggle_Dog6357/dog-breed-identification' # 數據集目錄
label_file, train_dir, test_dir = 'labels.csv', 'train', 'test' # data_dir中的文件夾、文件
new_data_dir = './train_valid_test' # 整理之后的數據存放的目錄
valid_ratio = 0.1 # 驗證集所占比例
def mkdir_if_not_exist(path):
# 若目錄path不存在,則創(chuàng)建目錄
if not os.path.exists(os.path.join(*path)):
os.makedirs(os.path.join(*path))
def reorg_dog_data(data_dir, label_file, train_dir, test_dir, new_data_dir, valid_ratio):
# 讀取訓練數據標簽
labels = pd.read_csv(os.path.join(data_dir, label_file))
id2label = {Id: label for Id, label in labels.values} # (key: value): (id: label)
# 隨機打亂訓練數據
train_files = os.listdir(os.path.join(data_dir, train_dir))
random.shuffle(train_files)
# 原訓練集
valid_ds_size = int(len(train_files) * valid_ratio) # 驗證集大小
for i, file in enumerate(train_files):
img_id = file.split('.')[0] # file是形式為id.jpg的字符串
img_label = id2label[img_id]
if i < valid_ds_size:
mkdir_if_not_exist([new_data_dir, 'valid', img_label])
shutil.copy(os.path.join(data_dir, train_dir, file),
os.path.join(new_data_dir, 'valid', img_label))
else:
mkdir_if_not_exist([new_data_dir, 'train', img_label])
shutil.copy(os.path.join(data_dir, train_dir, file),
os.path.join(new_data_dir, 'train', img_label))
mkdir_if_not_exist([new_data_dir, 'train_valid', img_label])
shutil.copy(os.path.join(data_dir, train_dir, file),
os.path.join(new_data_dir, 'train_valid', img_label))
# 測試集
mkdir_if_not_exist([new_data_dir, 'test', 'unknown'])
for test_file in os.listdir(os.path.join(data_dir, test_dir)):
shutil.copy(os.path.join(data_dir, test_dir, test_file),
os.path.join(new_data_dir, 'test', 'unknown'))
reorg_dog_data(data_dir, label_file, train_dir, test_dir, new_data_dir, valid_ratio)
圖像增強
transform_train = transforms.Compose([
# 隨機對圖像裁剪出面積為原圖像面積0.08~1倍、且高和寬之比在3/4~4/3的圖像,再放縮為高和寬均為224像素的新圖像
transforms.RandomResizedCrop(224, scale=(0.08, 1.0),
ratio=(3.0/4.0, 4.0/3.0)),
# 以0.5的概率隨機水平翻轉
transforms.RandomHorizontalFlip(),
# 隨機更改亮度、對比度和飽和度
transforms.ColorJitter(brightness=0.4, contrast=0.4, saturation=0.4),
transforms.ToTensor(),
# 對各個通道做標準化,(0.485, 0.456, 0.406)和(0.229, 0.224, 0.225)是在ImageNet上計算得的各通道均值與方差
transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]) # ImageNet上的均值和方差
])
# 在測試集上的圖像增強只做確定性的操作
transform_test = transforms.Compose([
transforms.Resize(256),
# 將圖像中央的高和寬均為224的正方形區(qū)域裁剪出來
transforms.CenterCrop(224),
transforms.ToTensor(),
transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
])
讀取數據
# new_data_dir目錄下有train, valid, train_valid, test四個目錄
# 這四個目錄中,每個子目錄表示一種類別,目錄中是屬于該類別的所有圖像
train_ds = torchvision.datasets.ImageFolder(root=os.path.join(new_data_dir, 'train'),
transform=transform_train)
valid_ds = torchvision.datasets.ImageFolder(root=os.path.join(new_data_dir, 'valid'),
transform=transform_test)
train_valid_ds = torchvision.datasets.ImageFolder(root=os.path.join(new_data_dir, 'train_valid'),
transform=transform_train)
test_ds = torchvision.datasets.ImageFolder(root=os.path.join(new_data_dir, 'test'),
transform=transform_test)
batch_size = 128
train_iter = torch.utils.data.DataLoader(train_ds, batch_size=batch_size, shuffle=True)
valid_iter = torch.utils.data.DataLoader(valid_ds, batch_size=batch_size, shuffle=True)
train_valid_iter = torch.utils.data.DataLoader(train_valid_ds, batch_size=batch_size, shuffle=True)
test_iter = torch.utils.data.DataLoader(test_ds, batch_size=batch_size, shuffle=False) # shuffle=False
定義模型
這個比賽的數據屬于ImageNet數據集的子集,我們使用微調的方法,選用在ImageNet完整數據集上預訓練的模型來抽取圖像特征,以作為自定義小規(guī)模輸出網絡的輸入。
此處我們使用與訓練的ResNet-34模型,直接復用預訓練模型在輸出層的輸入,即抽取的特征,然后我們重新定義輸出層,本次我們僅對重定義的輸出層的參數進行訓練,而對于用于抽取特征的部分,我們保留預訓練模型的參數
def get_net(device):
finetune_net = models.resnet34(pretrained=False) # 預訓練的resnet34網絡
finetune_net.load_state_dict(torch.load('/home/kesci/input/resnet347742/resnet34-333f7ec4.pth'))
for param in finetune_net.parameters(): # 凍結參數
param.requires_grad = False
# 原finetune_net.fc是一個輸入單元數為512,輸出單元數為1000的全連接層
# 替換掉原finetune_net.fc,新finetuen_net.fc中的模型參數會記錄梯度
finetune_net.fc = nn.Sequential(
nn.Linear(in_features=512, out_features=256),
nn.ReLU(),
nn.Linear(in_features=256, out_features=120) # 120是輸出類別數
)
return finetune_net
定義訓練函數
def evaluate_loss_acc(data_iter, net, device):
# 計算data_iter上的平均損失與準確率
loss = nn.CrossEntropyLoss()
is_training = net.training # Bool net是否處于train模式
net.eval()
l_sum, acc_sum, n = 0, 0, 0
with torch.no_grad():
for X, y in data_iter:
X, y = X.to(device), y.to(device)
y_hat = net(X)
l = loss(y_hat, y)
l_sum += l.item() * y.shape[0]
acc_sum += (y_hat.argmax(dim=1) == y).sum().item()
n += y.shape[0]
net.train(is_training) # 恢復net的train/eval狀態(tài)
return l_sum / n, acc_sum / n
def train(net, train_iter, valid_iter, num_epochs, lr, wd, device, lr_period,
lr_decay):
loss = nn.CrossEntropyLoss()
optimizer = optim.SGD(net.fc.parameters(), lr=lr, momentum=0.9, weight_decay=wd)
net = net.to(device)
for epoch in range(num_epochs):
train_l_sum, n, start = 0.0, 0, time.time()
if epoch > 0 and epoch % lr_period == 0: # 每lr_period個epoch,學習率衰減一次
lr = lr * lr_decay
for param_group in optimizer.param_groups:
param_group['lr'] = lr
for X, y in train_iter:
X, y = X.to(device), y.to(device)
optimizer.zero_grad()
y_hat = net(X)
l = loss(y_hat, y)
l.backward()
optimizer.step()
train_l_sum += l.item() * y.shape[0]
n += y.shape[0]
time_s = "time %.2f sec" % (time.time() - start)
if valid_iter is not None:
valid_loss, valid_acc = evaluate_loss_acc(valid_iter, net, device)
epoch_s = ("epoch %d, train loss %f, valid loss %f, valid acc %f, "
% (epoch + 1, train_l_sum / n, valid_loss, valid_acc))
else:
epoch_s = ("epoch %d, train loss %f, "
% (epoch + 1, train_l_sum / n))
print(epoch_s + time_s + ', lr ' + str(lr))
調參
num_epochs, lr_period, lr_decay = 20, 10, 0.1
lr, wd = 0.03, 1e-4
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
net = get_net(device)
train(net, train_iter, valid_iter, num_epochs, lr, wd, device, lr_period, lr_decay)
在完整數據集上訓練模型
# 使用上面的參數設置,在完整數據集上訓練模型大致需要40-50分鐘的時間
net = get_net(device)
train(net, train_valid_iter, None, num_epochs, lr, wd, device, lr_period, lr_decay)
對測試集分類并提交結果
用訓練好的模型對測試數據進行預測。比賽要求對測試集中的每張圖片,都要預測其屬于各個類別的概率。
preds = []
for X, _ in test_iter:
X = X.to(device)
output = net(X)
output = torch.softmax(output, dim=1)
preds += output.tolist()
ids = sorted(os.listdir(os.path.join(new_data_dir, 'test/unknown')))
with open('submission.csv', 'w') as f:
f.write('id,' + ','.join(train_valid_ds.classes) + '\n')
for i, output in zip(ids, preds):
f.write(i.split('.')[0] + ',' + ','.join(
[str(num) for num in output]) + '\n')
GAN生成對抗網絡
介紹生成對抗網絡的原理和實現
Generative Adversarial Networks
Throughout most of this book, we have talked about how to make predictions. In some form or another, we used deep neural networks learned mappings from data points to labels. This kind of learning is called discriminative learning, as in, we'd like to be able to discriminate between photos cats and photos of dogs. Classifiers and regressors are both examples of discriminative learning. And neural networks trained by backpropagation have upended everything we thought we knew about discriminative learning on large complicated datasets. Classification accuracies on high-res images has gone from useless to human-level (with some caveats) in just 5-6 years. We will spare you another spiel about all the other discriminative tasks where deep neural networks do astoundingly well.
But there is more to machine learning than just solving discriminative tasks. For example, given a large dataset, without any labels, we might want to learn a model that concisely captures the characteristics of this data. Given such a model, we could sample synthetic data points that resemble the distribution of the training data. For example, given a large corpus of photographs of faces, we might want to be able to generate a new photorealistic image that looks like it might plausibly have come from the same dataset. This kind of learning is called generative modeling.
Until recently, we had no method that could synthesize novel photorealistic images. But the success of deep neural networks for discriminative learning opened up new possibilities. One big trend over the last three years has been the application of discriminative deep nets to overcome challenges in problems that we do not generally think of as supervised learning problems. The recurrent neural network language models are one example of using a discriminative network (trained to predict the next character) that once trained can act as a generative model.
The GAN architecture is illustrated.As you can see, there are two pieces in GAN architecture - first off, we need a device (say, a deep network but it really could be anything, such as a game rendering engine) that might potentially be able to generate data that looks just like the real thing. If we are dealing with images, this needs to generate images. If we are dealing with speech, it needs to generate audio sequences, and so on. We call this the generator network. The second component is the discriminator network. It attempts to distinguish fake and real data from each other. Both networks are in competition with each other. The generator network attempts to fool the discriminator network. At that point, the discriminator network adapts to the new fake data. This information, in turn is used to improve the generator network, and so on.
%matplotlib inline
import matplotlib.pyplot as plt
from torch.utils.data import DataLoader
from torch import nn
import numpy as np
from torch.autograd import Variable
import torch
Generate some "real" data
Since this is going to be the world's lamest example, we simply generate data drawn from a Gaussian.
X=np.random.normal(size=(1000,2))
A=np.array([[1,2],[-0.1,0.5]])
b=np.array([1,2])
data=X.dot(A)+b
plt.figure(figsize=(3.5,2.5))
plt.scatter(X[:100,0],X[:100,1],color='red')
plt.show()
plt.figure(figsize=(3.5,2.5))
plt.scatter(data[:100,0],data[:100,1],color='blue')
plt.show()
print("The covariance matrix is\n%s" % np.dot(A.T, A))
The covariance matrix is
[[1.01 1.95]
[1.95 4.25]]
batch_size=8
data_iter=DataLoader(data,batch_size=batch_size)
Generator
Our generator network will be the simplest network possible - a single layer linear model. This is since we will be driving that linear network with a Gaussian data generator. Hence, it literally only needs to learn the parameters to fake things perfectly.
class net_G(nn.Module):
def __init__(self):
super(net_G,self).__init__()
self.model=nn.Sequential(
nn.Linear(2,2),
)
self._initialize_weights()
def forward(self,x):
x=self.model(x)
return x
def _initialize_weights(self):
for m in self.modules():
if isinstance(m,nn.Linear):
m.weight.data.normal_(0,0.02)
m.bias.data.zero_()
Discriminator
For the discriminator we will be a bit more discriminating: we will use an MLP with 3 layers to make things a bit more interesting.
class net_D(nn.Module):
def __init__(self):
super(net_D,self).__init__()
self.model=nn.Sequential(
nn.Linear(2,5),
nn.Tanh(),
nn.Linear(5,3),
nn.Tanh(),
nn.Linear(3,1),
nn.Sigmoid()
)
self._initialize_weights()
def forward(self,x):
x=self.model(x)
return x
def _initialize_weights(self):
for m in self.modules():
if isinstance(m,nn.Linear):
m.weight.data.normal_(0,0.02)
m.bias.data.zero_()
Training
First we define a function to update the discriminator.
# Saved in the d2l package for later use
def update_D(X,Z,net_D,net_G,loss,trainer_D):
batch_size=X.shape[0]
Tensor=torch.FloatTensor
ones=Variable(Tensor(np.ones(batch_size))).view(batch_size,1)
zeros = Variable(Tensor(np.zeros(batch_size))).view(batch_size,1)
real_Y=net_D(X.float())
fake_X=net_G(Z)
fake_Y=net_D(fake_X)
loss_D=(loss(real_Y,ones)+loss(fake_Y,zeros))/2
loss_D.backward()
trainer_D.step()
return float(loss_D.sum())
The generator is updated similarly. Here we reuse the cross-entropy loss but change the label of the fake data from 0 to 1 .
# Saved in the d2l package for later use
def update_G(Z,net_D,net_G,loss,trainer_G):
batch_size=Z.shape[0]
Tensor=torch.FloatTensor
ones=Variable(Tensor(np.ones((batch_size,)))).view(batch_size,1)
fake_X=net_G(Z)
fake_Y=net_D(fake_X)
loss_G=loss(fake_Y,ones)
loss_G.backward()
trainer_G.step()
return float(loss_G.sum())
Both the discriminator and the generator performs a binary logistic regression with the cross-entropy loss. We use Adam to smooth the training process. In each iteration, we first update the discriminator and then the generator. We visualize both losses and generated examples.
def train(net_D,net_G,data_iter,num_epochs,lr_D,lr_G,latent_dim,data):
loss=nn.BCELoss()
Tensor=torch.FloatTensor
trainer_D=torch.optim.Adam(net_D.parameters(),lr=lr_D)
trainer_G=torch.optim.Adam(net_G.parameters(),lr=lr_G)
plt.figure(figsize=(7,4))
d_loss_point=[]
g_loss_point=[]
d_loss=0
g_loss=0
for epoch in range(1,num_epochs+1):
d_loss_sum=0
g_loss_sum=0
batch=0
for X in data_iter:
batch+=1
X=Variable(X)
batch_size=X.shape[0]
Z=Variable(Tensor(np.random.normal(0,1,(batch_size,latent_dim))))
trainer_D.zero_grad()
d_loss = update_D(X, Z, net_D, net_G, loss, trainer_D)
d_loss_sum+=d_loss
trainer_G.zero_grad()
g_loss = update_G(Z, net_D, net_G, loss, trainer_G)
g_loss_sum+=g_loss
d_loss_point.append(d_loss_sum/batch)
g_loss_point.append(g_loss_sum/batch)
plt.ylabel('Loss', fontdict={'size': 14})
plt.xlabel('epoch', fontdict={'size': 14})
plt.xticks(range(0,num_epochs+1,3))
plt.plot(range(1,num_epochs+1),d_loss_point,color='orange',label='discriminator')
plt.plot(range(1,num_epochs+1),g_loss_point,color='blue',label='generator')
plt.legend()
plt.show()
print(d_loss,g_loss)
Z =Variable(Tensor( np.random.normal(0, 1, size=(100, latent_dim))))
fake_X=net_G(Z).detach().numpy()
plt.figure(figsize=(3.5,2.5))
plt.scatter(data[:,0],data[:,1],color='blue',label='real')
plt.scatter(fake_X[:,0],fake_X[:,1],color='orange',label='generated')
plt.legend()
plt.show()
Now we specify the hyper-parameters to fit the Gaussian distribution.
if __name__ == '__main__':
lr_D,lr_G,latent_dim,num_epochs=0.05,0.005,2,20
generator=net_G()
discriminator=net_D()
train(discriminator,generator,data_iter,num_epochs,lr_D,lr_G,latent_dim,data)
0.6932446360588074 0.6927103996276855
DCGAN
介紹Deep Convolutional Generative Adversarial Networks的原理和實現
Deep Convolutional Generative Adversarial Networks
we introduced the basic ideas behind how GANs work. We showed that they can draw samples from some simple, easy-to-sample distribution, like a uniform or normal distribution, and transform them into samples that appear to match the distribution of some dataset. And while our example of matching a 2D Gaussian distribution got the point across, it is not especially exciting.
In this section, we will demonstrate how you can use GANs to generate photorealistic images. We will be basing our models on the deep convolutional GANs (DCGAN) introduced in :cite:Radford.Metz.Chintala.2015
. We will borrow the convolutional architecture that have proven so successful for discriminative computer vision problems and show how via GANs, they can be leveraged to generate photorealistic images.
import matplotlib.pyplot as plt
from torch.utils.data import DataLoader
from torch import nn
import numpy as np
from torch.autograd import Variable
import torch
from torchvision.datasets import ImageFolder
from torchvision.transforms import transforms
import zipfile
cuda = True if torch.cuda.is_available() else False
print(cuda)
The Pokemon Dataset
The dataset we will use is a collection of Pokemon sprites obtained from pokemondb. First download, extract and load this dataset.
We resize each image into 64X64. The ToTensor
transformation will project the pixel value into[0,1], while our generator will use the tanh function to obtain outputs in [-1,1]. Therefore we normalize the data with 0.5 mean and0.5standard deviation to match the value range.
data_dir='/home/kesci/input/pokemon8600/'
batch_size=256
transform=transforms.Compose([
transforms.Resize((64,64)),
transforms.ToTensor(),
transforms.Normalize((0.5,0.5,0.5),(0.5,0.5,0.5))
])
pokemon=ImageFolder(data_dir+'pokemon',transform)
data_iter=DataLoader(pokemon,batch_size=batch_size,shuffle=True)
Let's visualize the first 20 images.
fig=plt.figure(figsize=(4,4))
imgs=data_iter.dataset.imgs
for i in range(20):
img = plt.imread(imgs[i*150][0])
plt.subplot(4,5,i+1)
plt.imshow(img)
plt.axis('off')
plt.show()
class G_block(nn.Module):
def __init__(self, in_channels, out_channels, kernel_size=4,strides=2, padding=1):
super(G_block,self).__init__()
self.conv2d_trans=nn.ConvTranspose2d(in_channels, out_channels, kernel_size=kernel_size,
stride=strides, padding=padding, bias=False)
self.batch_norm=nn.BatchNorm2d(out_channels,0.8)
self.activation=nn.ReLU()
def forward(self,x):
return self.activation(self.batch_norm(self.conv2d_trans(x)))
Tensor=torch.cuda.FloatTensor
x=Variable(Tensor(np.zeros((2,3,16,16))))
g_blk=G_block(3,20)
g_blk.cuda()
print(g_blk(x).shape)
out
torch.Size([2, 20, 32, 32])
x=Variable(Tensor(np.zeros((2,3,1,1))))
g_blk=G_block(3,20,strides=1,padding=0)
g_blk.cuda()
print(g_blk(x).shape)
out
torch.Size([2, 20, 4, 4])
class net_G(nn.Module):
def __init__(self,in_channels):
super(net_G,self).__init__()
n_G=64
self.model=nn.Sequential(
G_block(in_channels,n_G*8,strides=1,padding=0),
G_block(n_G*8,n_G*4),
G_block(n_G*4,n_G*2),
G_block(n_G*2,n_G),
nn.ConvTranspose2d(
n_G,3,kernel_size=4,stride=2,padding=1,bias=False
),
nn.Tanh()
)
def forward(self,x):
x=self.model(x)
return x
def weights_init_normal(m):
classname = m.__class__.__name__
if classname.find("Conv") != -1:
torch.nn.init.normal_(m.weight.data, mean=0, std=0.02)
elif classname.find("BatchNorm2d") != -1:
torch.nn.init.normal_(m.weight.data, mean=1.0, std=0.02)
torch.nn.init.constant_(m.bias.data, 0.0)
Generate a 100 dimensional latent variable to verify the generator's output shape.
x=Variable(Tensor(np.zeros((1,100,1,1))))
generator=net_G(100)
generator.cuda()
generator.apply(weights_init_normal)
print(generator(x).shape)
out
torch.Size([1, 3, 64, 64])
alphas = [0, 0.2, 0.4, .6]
x = np.arange(-2, 1, 0.1)
Y = [nn.LeakyReLU(alpha)(Tensor(x)).cpu().numpy()for alpha in alphas]
plt.figure(figsize=(4,4))
for y in Y:
plt.plot(x,y)
plt.show()
The basic block of the discriminator is a convolution layer followed by a batch normalization layer and a leaky ReLU activation. The hyper-parameters of the convolution layer are similar to the transpose convolution layer in the generator block.
class D_block(nn.Module):
def __init__(self,in_channels,out_channels,kernel_size=4,strides=2,
padding=1,alpha=0.2):
super(D_block,self).__init__()
self.conv2d=nn.Conv2d(in_channels,out_channels,kernel_size,strides,padding,bias=False)
self.batch_norm=nn.BatchNorm2d(out_channels,0.8)
self.activation=nn.LeakyReLU(alpha)
def forward(self,X):
return self.activation(self.batch_norm(self.conv2d(X)))
x = Variable(Tensor(np.zeros((2, 3, 16, 16))))
d_blk = D_block(3,20)
d_blk.cuda()
print(d_blk(x).shape)
out
torch.Size([2, 20, 8, 8])
The discriminator is a mirror of the generator.
class net_D(nn.Module):
def __init__(self,in_channels):
super(net_D,self).__init__()
n_D=64
self.model=nn.Sequential(
D_block(in_channels,n_D),
D_block(n_D,n_D*2),
D_block(n_D*2,n_D*4),
D_block(n_D*4,n_D*8)
)
self.conv=nn.Conv2d(n_D*8,1,kernel_size=4,bias=False)
self.activation=nn.Sigmoid()
# self._initialize_weights()
def forward(self,x):
x=self.model(x)
x=self.conv(x)
x=self.activation(x)
return x
It uses a convolution layer with output channel 1 as the last layer to obtain a single prediction value.
x = Variable(Tensor(np.zeros((1, 3, 64, 64))))
discriminator=net_D(3)
discriminator.cuda()
discriminator.apply(weights_init_normal)
print(discriminator(x).shape)
out
torch.Size([1, 1, 1, 1])
def update_D(X,Z,net_D,net_G,loss,trainer_D):
batch_size=X.shape[0]
Tensor=torch.cuda.FloatTensor
ones=Variable(Tensor(np.ones(batch_size,)),requires_grad=False).view(batch_size,1)
zeros = Variable(Tensor(np.zeros(batch_size,)),requires_grad=False).view(batch_size,1)
real_Y=net_D(X).view(batch_size,-1)
fake_X=net_G(Z)
fake_Y=net_D(fake_X).view(batch_size,-1)
loss_D=(loss(real_Y,ones)+loss(fake_Y,zeros))/2
loss_D.backward()
trainer_D.step()
return float(loss_D.sum())
def update_G(Z,net_D,net_G,loss,trainer_G):
batch_size=Z.shape[0]
Tensor=torch.cuda.FloatTensor
ones=Variable(Tensor(np.ones((batch_size,))),requires_grad=False).view(batch_size,1)
fake_X=net_G(Z)
fake_Y=net_D(fake_X).view(batch_size,-1)
loss_G=loss(fake_Y,ones)
loss_G.backward()
trainer_G.step()
return float(loss_G.sum())
def train(net_D,net_G,data_iter,num_epochs,lr,latent_dim):
loss=nn.BCELoss()
Tensor=torch.cuda.FloatTensor
trainer_D=torch.optim.Adam(net_D.parameters(),lr=lr,betas=(0.5,0.999))
trainer_G=torch.optim.Adam(net_G.parameters(),lr=lr,betas=(0.5,0.999))
plt.figure(figsize=(7,4))
d_loss_point=[]
g_loss_point=[]
d_loss=0
g_loss=0
for epoch in range(1,num_epochs+1):
d_loss_sum=0
g_loss_sum=0
batch=0
for X in data_iter:
X=X[:][0]
batch+=1
X=Variable(X.type(Tensor))
batch_size=X.shape[0]
Z=Variable(Tensor(np.random.normal(0,1,(batch_size,latent_dim,1,1))))
trainer_D.zero_grad()
d_loss = update_D(X, Z, net_D, net_G, loss, trainer_D)
d_loss_sum+=d_loss
trainer_G.zero_grad()
g_loss = update_G(Z, net_D, net_G, loss, trainer_G)
g_loss_sum+=g_loss
d_loss_point.append(d_loss_sum/batch)
g_loss_point.append(g_loss_sum/batch)
print(
"[Epoch %d/%d] [D loss: %f] [G loss: %f]"
% (epoch, num_epochs, d_loss_sum/batch_size, g_loss_sum/batch_size)
)
plt.ylabel('Loss', fontdict={ 'size': 14})
plt.xlabel('epoch', fontdict={ 'size': 14})
plt.xticks(range(0,num_epochs+1,3))
plt.plot(range(1,num_epochs+1),d_loss_point,color='orange',label='discriminator')
plt.plot(range(1,num_epochs+1),g_loss_point,color='blue',label='generator')
plt.legend()
plt.show()
print(d_loss,g_loss)
Z = Variable(Tensor(np.random.normal(0, 1, size=(21, latent_dim, 1, 1))),requires_grad=False)
fake_x = generator(Z)
fake_x=fake_x.cpu().detach().numpy()
plt.figure(figsize=(14,6))
for i in range(21):
im=np.transpose(fake_x[i])
plt.subplot(3,7,i+1)
plt.imshow(im)
plt.show()
Now let's train the model.
if __name__ == '__main__':
lr,latent_dim,num_epochs=0.005,100,50
train(discriminator,generator,data_iter,num_epochs,lr,latent_dim)