【Pytorch】MLPを実装しmnistを使って予測を行う方法【mnist】

【Pytorch】MLPを実装しmnistを使って予測を行う方法【mnist】

【Pytorch】MLPを実装しmnistを使って予測を行う方法【mnist】

Pytorchを使ってMLPを実装し、mnistのデータを使って学習し予測してみる。

全結合層を定義する（Dense）

まずは必要になるライブラリをインポートしておく

import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
import torch.autograd as autograd
from torchvision import datasets, transforms

次に、MLPの構成要素となる全結合層（Dense）を実装する。

詳細は以下を参照。

以下のように実装する。

def relu(x):
    x = torch.where(x > 0, x, torch.zeros_like(x))
    return x


def softmax(x):
    x -= torch.cat([x.max(axis=1, keepdim=True).values] * x.size()[1], dim=1)
    x_exp = torch.exp(x)
    return x_exp/torch.cat([x_exp.sum(dim=1, keepdim=True)] * x.size()[1], dim=1)

class Dense(nn.Module):  # nn.Moduleを継承する
    def __init__(self, in_dim, out_dim, function=lambda x: x):
        super().__init__()
        # He Initialization
        # in_dim: 入力の次元数、out_dim: 出力の次元数
        self.W = nn.Parameter(torch.tensor(rng.uniform(
                        low=-np.sqrt(6/in_dim),
                        high=np.sqrt(6/in_dim),
                        size=(in_dim, out_dim)
                    ).astype('float32')))
        self.b = nn.Parameter(torch.tensor(np.zeros([out_dim]).astype('float32')))
        self.function = function

    def forward(self, x):  # forwardをoverride
        return self.function(torch.matmul(x, self.W) + self.b)

MLPクラスを定義する

以下でMLPクラスを実装する。

optimizerはSGDとしているがAdamなどに変更することも可能。

class MLP(nn.Module):
    def __init__(self, in_dim, hid_dim, out_dim):
        super(MLP, self).__init__()
        self.linear1 = Dense(in_dim, hid_dim)
        self.linear2 = Dense(hid_dim, out_dim)

    def forward(self, x):
        x = relu(self.linear1(x))
        x = softmax(self.linear2(x))
        return x

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
in_dim = 784
hid_dim = 200
out_dim = 10
lr = 0.001
batch_size = 32
n_epochs = 10


mlp = MLP(in_dim, hid_dim, out_dim).to(device)

optimizer = optim.SGD(mlp.parameters(), lr=lr)

DataLoaderを定義する

以下でmnistのデータを読み込み、学習データと検証データに分ける。

# 前処理を定義
transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Lambda(lambda x: x.view(in_dim))
])

# torchvisionのdatasetsを使ってMNISTのデータを取得
# ミニバッチ化や前処理などの処理を行ってくれるDataLoaderを定義
dataloader_train = torch.utils.data.DataLoader(
    datasets.MNIST('./data/mnist', train=True, download=True, transform=transform),
    batch_size=batch_size,
    shuffle=True
)

dataloader_valid = torch.utils.data.DataLoader(
    datasets.MNIST('./data/mnist', train=False, download=True, transform=transform),
    batch_size=batch_size,
    shuffle=False
)

学習を実行する

そして定義したMLPモデルで学習を実際に行う。

for epoch in range(n_epochs):
    losses_train = []
    losses_valid = []
    train_num = 0
    train_true_num = 0
    valid_num = 0
    valid_true_num = 0

    mlp.train()  # 訓練時には勾配を計算するtrainモードにする
    for x, t in dataloader_train:
        true = t.tolist()

        t_hot = torch.eye(10)[t]  # 正解ラベルをone-hot vector化

        # テンソルをGPUに移動
        x = x.to(device)
        t_hot = t_hot.to(device)

        # 順伝播
        #WRITE ME
        y = mlp.forward(x)

        # 誤差の計算(クロスエントロピー誤差関数)
        #WRITE ME
        loss = -(t_hot*torch.log(y)).sum(axis=1).mean()

        # 誤差の逆伝播
        optimizer.zero_grad()
        #WRITE ME
        loss.backward()

        # パラメータの更新
        #WRITE ME
        optimizer.step()

        # モデルの出力を予測値のスカラーに変換
        pred = y.argmax(1)

        losses_train.append(loss.tolist())

        acc = torch.where(t - pred.to("cpu") == 0, torch.ones_like(t), torch.zeros_like(t))
        train_num += acc.size()[0]
        train_true_num += acc.sum().item()

    mlp.eval()  # 評価時には勾配を計算しないevalモードにする
    for x, t in dataloader_valid:
        true = t.tolist()

        t_hot = torch.eye(10)[t]  # 正解ラベルをone-hot vector化

        # テンソルをGPUに移動
        x = x.to(device)
        t_hot = t_hot.to(device)

        # 順伝播
        #WRITE ME
        y = mlp.forward(x)
        # 誤差の計算(クロスエントロピー誤差関数)
        #WRITE ME
        loss = -(t_hot*torch.log(y)).sum(axis=1).mean()

        # モデルの出力を予測値のスカラーに変換
        pred = y.argmax(1)

        losses_valid.append(loss.tolist())

        acc = torch.where(t - pred.to("cpu") == 0, torch.ones_like(t), torch.zeros_like(t))
        valid_num += acc.size()[0]
        valid_true_num += acc.sum().item()

    print('EPOCH: {}, Train [Loss: {:.3f}, Accuracy: {:.3f}], Valid [Loss: {:.3f}, Accuracy: {:.3f}]'.format(
        epoch,
        np.mean(losses_train),
        train_true_num/train_num,
        np.mean(losses_valid),
        valid_true_num/valid_num
    ))

# 出力
# EPOCH: 0, Train [Loss: 1.614, Accuracy: 0.580], Valid [Loss: 1.086, Accuracy: 0.779]
# EPOCH: 1, Train [Loss: 0.883, Accuracy: 0.812], Valid [Loss: 0.706, Accuracy: 0.841]
# EPOCH: 2, Train [Loss: 0.647, Accuracy: 0.849], Valid [Loss: 0.561, Accuracy: 0.865]
# EPOCH: 3, Train [Loss: 0.542, Accuracy: 0.867], Valid [Loss: 0.487, Accuracy: 0.879]
# EPOCH: 4, Train [Loss: 0.483, Accuracy: 0.877], Valid [Loss: 0.442, Accuracy: 0.887]
# EPOCH: 5, Train [Loss: 0.445, Accuracy: 0.884], Valid [Loss: 0.411, Accuracy: 0.894]
# EPOCH: 6, Train [Loss: 0.418, Accuracy: 0.889], Valid [Loss: 0.388, Accuracy: 0.899]
# EPOCH: 7, Train [Loss: 0.398, Accuracy: 0.892], Valid [Loss: 0.371, Accuracy: 0.902]
# EPOCH: 8, Train [Loss: 0.381, Accuracy: 0.896], Valid [Loss: 0.357, Accuracy: 0.904]
# EPOCH: 9, Train [Loss: 0.368, Accuracy: 0.899], Valid [Loss: 0.346, Accuracy: 0.907]

以上がMLPを実装しmnistを使って予測を行う方法。