Source code for vegans.models.unconditional.VanillaVAE

"""
VanillaVAE
----------
Implements the Variational Autoencoder[1].

Trains on Kullback-Leibler loss and mean squared error reconstruction loss.

Losses:
    - Encoder: Kullback-Leibler
    - Decoder: L2 (Mean Squared Error)
Default optimizer:
    - torch.optim.Adam
Custom parameter:
    - lambda_KL: Weight for the encoder loss computing the Kullback-Leibler divergence in the latent space.

References
----------
.. [1] https://arxiv.org/pdf/1906.02691.pdf
"""

import torch

import numpy as np
import torch.nn as nn

from torch.nn import MSELoss
from vegans.utils.layers import LayerReshape
from vegans.utils.networks import Encoder, Decoder, Autoencoder
from vegans.models.unconditional.AbstractGenerativeModel import AbstractGenerativeModel

[docs]class VanillaVAE(AbstractGenerativeModel):
    """
    Parameters
    ----------
    encoder: nn.Module
        Encoder architecture. Produces predictions in the latent space.
    decoder: nn.Module
        Decoder architecture. Produces output in the real space.
    x_dim : list, tuple
        Number of the output dimensions of the generator and input dimension of the discriminator / critic.
        In the case of images this will be [nr_channels, nr_height_pixels, nr_width_pixels].
    z_dim : int, list, tuple
        Number of the latent dimensions for the generator input. Might have dimensions of an image.
    optim : dict or torch.optim
        Optimizer used for each network. Could be either an optimizer from torch.optim or a dictionary with network
        name keys and torch.optim as value, i.e. {"Generator": torch.optim.Adam}.
    optim_kwargs : dict
        Optimizer keyword arguments used for each network. Must be a dictionary with network
        name keys and dictionary with keyword arguments as value, i.e. {"Generator": {"lr": 0.0001}}.
    lambda_KL: float
        Weight for the encoder loss computing the Kullback-Leibler divergence in the latent space.
    fixed_noise_size : int
        Number of images shown when logging. The fixed noise is used to produce the images in the folder/images
        subdirectory, the tensorboard images tab and the samples in get_training_results().
    device : string
        Device used while training the model. Either "cpu" or "cuda".
    ngpu : int
        Number of gpus used during training if device == "cuda".
    folder : string
        Creates a folder in the current working directory with this name. All relevant files like summary, images, models and
        tensorboard output are written there. Existing folders are never overwritten or deleted. If a folder with the same name
        already exists a time stamp is appended to make it unique.
    """

    #########################################################################
    # Actions before training
    #########################################################################
    def __init__(
            self,
            encoder,
            decoder,
            x_dim,
            z_dim,
            optim=None,
            optim_kwargs=None,
            lambda_KL=10,
            fixed_noise_size=32,
            device=None,
            ngpu=0,
            folder="./veganModels/VanillaVAE",
            secure=True):

        self.decoder = Decoder(decoder, input_size=z_dim, device=device, ngpu=ngpu, secure=secure)
        self.encoder = Encoder(encoder, input_size=x_dim, device=device, ngpu=ngpu, secure=secure)
        self.autoencoder = Autoencoder(self.encoder, self.decoder)
        self.neural_nets = {
            "Autoencoder": self.autoencoder
        }


        super().__init__(
            x_dim=x_dim, z_dim=z_dim, optim=optim, optim_kwargs=optim_kwargs, feature_layer=None,
            fixed_noise_size=fixed_noise_size, device=device, folder=folder, ngpu=ngpu, secure=secure
        )
        self.mu = nn.Sequential(
            nn.Flatten(),
            nn.Linear(np.prod(self.encoder.output_size), np.prod(z_dim)),
            LayerReshape(shape=z_dim)
        ).to(self.device)
        self.log_variance = nn.Sequential(
            nn.Flatten(),
            nn.Linear(np.prod(self.encoder.output_size), np.prod(z_dim)),
            LayerReshape(shape=z_dim)
        ).to(self.device)

        self.lambda_KL = lambda_KL
        self.hyperparameters["lambda_KL"] = lambda_KL

        if self.secure:
            # if self.encoder.output_size == self.z_dim:
            #     raise ValueError(
            #         "Encoder output size is equal to z_dim, but for VAE algorithms the encoder last layers for mu and sigma " +
            #         "are constructed by the algorithm itself.\nSpecify up to the second last layer for this particular encoder."
            #     )
            assert (self.decoder.output_size == self.x_dim), (
                "Decoder output shape must be equal to x_dim. {} vs. {}.".format(self.decoder.output_size, self.x_dim)
            )

    def _define_loss(self):
        loss_functions = {"Autoencoder": MSELoss()}
        return loss_functions


    #########################################################################
    # Actions during training
    #########################################################################
[docs]    def encode(self, x):
        return self.encoder(x)

[docs]    def calculate_losses(self, X_batch, Z_batch, who=None):
        losses = self._calculate_autoencoder_loss(X_batch=X_batch, Z_batch=Z_batch)
        return losses

    def _calculate_autoencoder_loss(self, X_batch, Z_batch, fake_images=None):
        encoded_output = self.encode(X_batch)
        mu = self.mu(encoded_output)
        log_variance = self.log_variance(encoded_output)

        if fake_images is None:
            Z_batch_encoded = mu + torch.exp(log_variance)*Z_batch
            fake_images = self.generate(Z_batch_encoded)

        kl_loss = 0.5*(log_variance.exp() + mu**2 - log_variance - 1).sum()
        reconstruction_loss = self.loss_functions["Autoencoder"](
            fake_images, X_batch
        )

        total_loss = reconstruction_loss + self.lambda_KL*kl_loss
        return {
            "Autoencoder": total_loss,
            "Kullback-Leibler": self.lambda_KL*kl_loss,
            "Reconstruction": reconstruction_loss,
        }