[BUG] NaN when training with selection initializer

How to reproduce: Run the `glvq_spiral.py` file under `examples/`. The error seems to occur when using a lot of prototypes in combination with the `StratifiedSelectionInitializer`. Using only a prototype per class, or using another initializer like the `StratifiedMeanInitializer` seems to make the problem go away.
2021-04-29 19:09:10 +02:00
parent 8bad54fc2d
commit fef73e2fbf
2 changed files with 82 additions and 31 deletions
--- a/examples/glvq_spiral.py
+++ b/examples/glvq_spiral.py
@@ -0,0 +1,56 @@
 """GLVQ example using the spiral dataset."""
 import pytorch_lightning as pl
 import torch
 from prototorch.components import initializers as cinit
 from prototorch.datasets.abstract import NumpyDataset
 from prototorch.datasets.spiral import make_spiral
 from prototorch.models.callbacks.visualization import VisGLVQ2D
 from prototorch.models.glvq import GLVQ
 from torch.utils.data import DataLoader
 class StopOnNaN(pl.Callback):
    def __init__(self, param):
        super().__init__()
        self.param = param
    def on_epoch_end(self, trainer, pl_module, logs={}):
        if torch.isnan(self.param).any():
            raise ValueError("NaN encountered. Stopping.")
 if __name__ == "__main__":
    # Dataset
    x_train, y_train = make_spiral(n_samples=600, noise=0.6)
    train_ds = NumpyDataset(x_train, y_train)
    # Dataloaders
    train_loader = DataLoader(train_ds, num_workers=0, batch_size=256)
    # Hyperparameters
    hparams = dict(
        nclasses=2,
        prototypes_per_class=20,
        # prototype_initializer=cinit.SSI(torch.Tensor(x_train),
        prototype_initializer=cinit.SMI(torch.Tensor(x_train),
                                        torch.Tensor(y_train)),
        lr=0.01,
    )
    # Initialize the model
    model = GLVQ(hparams)
    # Callbacks
    vis = VisGLVQ2D(x_train, y_train)
    # vis = VisGLVQ2D(x_train, y_train, show_last_only=True, block=True)
    snan = StopOnNaN(model.proto_layer.components)
    # Setup trainer
    trainer = pl.Trainer(
        max_epochs=200,
        callbacks=[vis, snan],
    )
    # Training loop
    trainer.fit(model, train_loader)
--- a/prototorch/models/callbacks/visualization.py
+++ b/prototorch/models/callbacks/visualization.py
@@ -261,20 +261,29 @@ class VisPointProtos(VisWeights):
        self._show_and_save(epoch)
-class VisGLVQ2D(pl.Callback):
+class Vis2DAbstract(pl.Callback):
    def __init__(self,
                 x_train,
                 y_train,
                 title="Prototype Visualization",
-                 cmap="viridis"):
+                 cmap="viridis",
                 show_last_only=False,
                 block=False):
        super().__init__()
        self.x_train = x_train
        self.y_train = y_train
        self.title = title
        self.fig = plt.figure(self.title)
        self.cmap = cmap
        self.show_last_only = show_last_only
        self.block = block
 class VisGLVQ2D(Vis2DAbstract):
    def on_epoch_end(self, trainer, pl_module):
        if self.show_last_only:
            if trainer.current_epoch != trainer.max_epochs - 1:
                return
        protos = pl_module.prototypes
        plabels = pl_module.prototype_labels
        x_train, y_train = self.x_train, self.y_train
@@ -306,22 +315,13 @@ class VisGLVQ2D(pl.Callback):
        ax.contourf(xx, yy, y_pred, cmap=self.cmap, alpha=0.35)
        ax.set_xlim(left=x_min + 0, right=x_max - 0)
        ax.set_ylim(bottom=y_min + 0, top=y_max - 0)
-        plt.pause(0.1)
+        if not self.block:
            plt.pause(0.01)
        else:
            plt.show(block=True)
-class VisSiameseGLVQ2D(pl.Callback):
+class VisSiameseGLVQ2D(Vis2DAbstract):
    def __init__(self,
                 x_train,
                 y_train,
                 title="Prototype Visualization",
                 cmap="viridis"):
        super().__init__()
        self.x_train = x_train
        self.y_train = y_train
        self.title = title
        self.fig = plt.figure(self.title)
        self.cmap = cmap
    def on_epoch_end(self, trainer, pl_module):
        protos = pl_module.prototypes
        plabels = pl_module.prototype_labels
@@ -361,22 +361,14 @@ class VisSiameseGLVQ2D(pl.Callback):
            global_step=trainer.current_epoch,
            close=False,
        )
-        plt.pause(0.1)
+
        if not self.block:
            plt.pause(0.01)
        else:
            plt.show(block=True)
-class VisNG2D(pl.Callback):
+class VisNG2D(Vis2DAbstract):
    def __init__(self,
                 x_train,
                 y_train,
                 title="Neural Gas Visualization",
                 cmap="viridis"):
        super().__init__()
        self.x_train = x_train
        self.y_train = y_train
        self.title = title
        self.fig = plt.figure(self.title)
        self.cmap = cmap
    def on_epoch_end(self, trainer, pl_module):
        protos = pl_module.prototypes
        cmat = pl_module.topology_layer.cmat.cpu().numpy()
@@ -410,4 +402,7 @@ class VisNG2D(pl.Callback):
                        "k-",
                    )
        if not self.block:
            plt.pause(0.01)
        else:
            plt.show(block=True)