build: bump version 1.0.0a1 → 1.0.0a2

fix: Fix __init__.py
build: bump version 0.5.2 → 1.0.0a1
2022-06-03 11:52:50 +02:00 · 2022-06-03 11:40:45 +02:00 · 2022-06-03 11:07:10 +02:00 · 2022-06-03 11:06:44 +02:00 · 2022-06-03 10:39:11 +02:00 · 2022-06-02 19:55:03 +02:00
47 changed files with 1713 additions and 437 deletions
--- a/.bumpversion.cfg
+++ b/.bumpversion.cfg
@@ -1,9 +1,11 @@
 [bumpversion]
-current_version = 0.5.0
+current_version = 1.0.0a2
 commit = True
 tag = True
-parse = (?P<major>\d+)\.(?P<minor>\d+)\.(?P<patch>\d+)
-serialize = {major}.{minor}.{patch}
+parse = (?P<major>\d+)\.(?P<minor>\d+)\.(?P<patch>\d+)((?P<release>[a-zA-Z0-9_.-]+))?
+serialize = 
+	{major}.{minor}.{patch}-{release}
+	{major}.{minor}.{patch}
 message = build: bump version {current_version} → {new_version}

 [bumpversion:file:setup.py]
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -3,9 +3,10 @@

 repos:
 - repo: https://github.com/pre-commit/pre-commit-hooks
-  rev: v4.1.0
+  rev: v4.2.0
  hooks:
  - id: trailing-whitespace
+    exclude: (^\.bumpversion\.cfg$|cli_messages\.py)
  - id: end-of-file-fixer
  - id: check-yaml
  - id: check-added-large-files
@@ -23,7 +24,7 @@ repos:
  - id: isort

 - repo: https://github.com/pre-commit/mirrors-mypy
-  rev: v0.931
+  rev: v0.950
  hooks:
  - id: mypy
    files: prototorch
@@ -42,7 +43,7 @@ repos:
  - id: python-check-blanket-noqa

 - repo: https://github.com/asottile/pyupgrade
-  rev: v2.31.0
+  rev: v2.32.1
  hooks:
  - id: pyupgrade

--- a/docs/source/conf.py
+++ b/docs/source/conf.py
@@ -23,7 +23,7 @@ author = "Jensun Ravichandran"

 # The full version, including alpha/beta/rc tags
 #
-release = "0.5.0"
+release = "1.0.0-a2"

 # -- General configuration ---------------------------------------------------

--- a/examples/cbc_iris.py
+++ b/examples/cbc_iris.py
@@ -1,12 +1,22 @@
 """CBC example using the Iris dataset."""

 import argparse
+import warnings

 import prototorch as pt
 import pytorch_lightning as pl
-import torch
+from prototorch.models import CBC, VisCBC2D
+from pytorch_lightning.utilities.seed import seed_everything
+from pytorch_lightning.utilities.warnings import PossibleUserWarning
+from torch.utils.data import DataLoader
+
+warnings.filterwarnings("ignore", category=PossibleUserWarning)
+warnings.filterwarnings("ignore", category=UserWarning)

 if __name__ == "__main__":
+    # Reproducibility
+    seed_everything(seed=4)
+
    # Command-line arguments
    parser = argparse.ArgumentParser()
    parser = pl.Trainer.add_argparse_args(parser)
@@ -15,11 +25,8 @@ if __name__ == "__main__":
    # Dataset
    train_ds = pt.datasets.Iris(dims=[0, 2])

-    # Reproducibility
-    pl.utilities.seed.seed_everything(seed=42)
-
    # Dataloaders
-    train_loader = torch.utils.data.DataLoader(train_ds, batch_size=32)
+    train_loader = DataLoader(train_ds, batch_size=32)

    # Hyperparameters
    hparams = dict(
@@ -30,23 +37,30 @@ if __name__ == "__main__":
    )

    # Initialize the model
-    model = pt.models.CBC(
+    model = CBC(
        hparams,
-        components_initializer=pt.initializers.SSCI(train_ds, noise=0.01),
-        reasonings_iniitializer=pt.initializers.
+        components_initializer=pt.initializers.SSCI(train_ds, noise=0.1),
+        reasonings_initializer=pt.initializers.
        PurePositiveReasoningsInitializer(),
    )

    # Callbacks
-    vis = pt.models.VisCBC2D(data=train_ds,
-                             title="CBC Iris Example",
-                             resolution=100,
-                             axis_off=True)
+    vis = VisCBC2D(
+        data=train_ds,
+        title="CBC Iris Example",
+        resolution=100,
+        axis_off=True,
+    )

    # Setup trainer
    trainer = pl.Trainer.from_argparse_args(
        args,
-        callbacks=[vis],
+        callbacks=[
+            vis,
+        ],
+        detect_anomaly=True,
+        log_every_n_steps=1,
+        max_epochs=1000,
    )

    # Training loop
--- a/examples/dynamic_pruning.py
+++ b/examples/dynamic_pruning.py
@@ -1,12 +1,29 @@
 """Dynamically prune 'loser' prototypes in GLVQ-type models."""

 import argparse
+import logging
+import warnings

 import prototorch as pt
 import pytorch_lightning as pl
 import torch
+from prototorch.models import (
+    CELVQ,
+    PruneLoserPrototypes,
+    VisGLVQ2D,
+)
+from pytorch_lightning.callbacks import EarlyStopping
+from pytorch_lightning.utilities.seed import seed_everything
+from pytorch_lightning.utilities.warnings import PossibleUserWarning
+from torch.utils.data import DataLoader
+
+warnings.filterwarnings("ignore", category=PossibleUserWarning)
+warnings.filterwarnings("ignore", category=UserWarning)

 if __name__ == "__main__":
+    # Reproducibility
+    seed_everything(seed=4)
+
    # Command-line arguments
    parser = argparse.ArgumentParser()
    parser = pl.Trainer.add_argparse_args(parser)
@@ -16,15 +33,17 @@ if __name__ == "__main__":
    num_classes = 4
    num_features = 2
    num_clusters = 1
-    train_ds = pt.datasets.Random(num_samples=500,
-                                  num_classes=num_classes,
-                                  num_features=num_features,
-                                  num_clusters=num_clusters,
-                                  separation=3.0,
-                                  seed=42)
+    train_ds = pt.datasets.Random(
+        num_samples=500,
+        num_classes=num_classes,
+        num_features=num_features,
+        num_clusters=num_clusters,
+        separation=3.0,
+        seed=42,
+    )

    # Dataloaders
-    train_loader = torch.utils.data.DataLoader(train_ds, batch_size=256)
+    train_loader = DataLoader(train_ds, batch_size=256)

    # Hyperparameters
    prototypes_per_class = num_clusters * 5
@@ -34,7 +53,7 @@ if __name__ == "__main__":
    )

    # Initialize the model
-    model = pt.models.CELVQ(
+    model = CELVQ(
        hparams,
        prototypes_initializer=pt.initializers.FVCI(2, 3.0),
    )
@@ -43,18 +62,18 @@ if __name__ == "__main__":
    model.example_input_array = torch.zeros(4, 2)

    # Summary
-    print(model)
+    logging.info(model)

    # Callbacks
-    vis = pt.models.VisGLVQ2D(train_ds)
-    pruning = pt.models.PruneLoserPrototypes(
+    vis = VisGLVQ2D(train_ds)
+    pruning = PruneLoserPrototypes(
        threshold=0.01,  # prune prototype if it wins less than 1%
        idle_epochs=20,  # pruning too early may cause problems
        prune_quota_per_epoch=2,  # prune at most 2 prototypes per epoch
        frequency=1,  # prune every epoch
        verbose=True,
    )
-    es = pl.callbacks.EarlyStopping(
+    es = EarlyStopping(
        monitor="train_loss",
        min_delta=0.001,
        patience=20,
@@ -71,10 +90,9 @@ if __name__ == "__main__":
            pruning,
            es,
        ],
-        progress_bar_refresh_rate=0,
-        terminate_on_nan=True,
-        weights_summary="full",
-        accelerator="ddp",
+        detect_anomaly=True,
+        log_every_n_steps=1,
+        max_epochs=1000,
    )

    # Training loop
--- a/examples/glvq_iris.py
+++ b/examples/glvq_iris.py
@@ -1,13 +1,24 @@
 """GLVQ example using the Iris dataset."""

 import argparse
+import logging
+import warnings

 import prototorch as pt
 import pytorch_lightning as pl
 import torch
+from prototorch.models import GLVQ, VisGLVQ2D
+from pytorch_lightning.utilities.seed import seed_everything
+from pytorch_lightning.utilities.warnings import PossibleUserWarning
 from torch.optim.lr_scheduler import ExponentialLR
+from torch.utils.data import DataLoader
+
+warnings.filterwarnings("ignore", category=UserWarning)
+warnings.filterwarnings("ignore", category=PossibleUserWarning)

 if __name__ == "__main__":
+    # Reproducibility
+    seed_everything(seed=4)
    # Command-line arguments
    parser = argparse.ArgumentParser()
    parser = pl.Trainer.add_argparse_args(parser)
@@ -17,7 +28,7 @@ if __name__ == "__main__":
    train_ds = pt.datasets.Iris(dims=[0, 2])

    # Dataloaders
-    train_loader = torch.utils.data.DataLoader(train_ds, batch_size=64)
+    train_loader = DataLoader(train_ds, batch_size=64, num_workers=4)

    # Hyperparameters
    hparams = dict(
@@ -29,7 +40,7 @@ if __name__ == "__main__":
    )

    # Initialize the model
-    model = pt.models.GLVQ(
+    model = GLVQ(
        hparams,
        optimizer=torch.optim.Adam,
        prototypes_initializer=pt.initializers.SMCI(train_ds),
@@ -41,14 +52,17 @@ if __name__ == "__main__":
    model.example_input_array = torch.zeros(4, 2)

    # Callbacks
-    vis = pt.models.VisGLVQ2D(data=train_ds)
+    vis = VisGLVQ2D(data=train_ds)

    # Setup trainer
    trainer = pl.Trainer.from_argparse_args(
        args,
-        callbacks=[vis],
-        weights_summary="full",
-        accelerator="ddp",
+        callbacks=[
+            vis,
+        ],
+        max_epochs=100,
+        log_every_n_steps=1,
+        detect_anomaly=True,
    )

    # Training loop
@@ -58,8 +72,8 @@ if __name__ == "__main__":
    trainer.save_checkpoint("./glvq_iris.ckpt")

    # Load saved model
-    new_model = pt.models.GLVQ.load_from_checkpoint(
+    new_model = GLVQ.load_from_checkpoint(
        checkpoint_path="./glvq_iris.ckpt",
        strict=False,
    )
-    print(new_model)
+    logging.info(new_model)
--- a/examples/gmlvq_iris.py
+++ b/examples/gmlvq_iris.py
@@ -1,13 +1,25 @@
 """GMLVQ example using the Iris dataset."""

 import argparse
+import warnings

 import prototorch as pt
 import pytorch_lightning as pl
 import torch
+from prototorch.models import GMLVQ, VisGMLVQ2D
+from pytorch_lightning.utilities.seed import seed_everything
+from pytorch_lightning.utilities.warnings import PossibleUserWarning
 from torch.optim.lr_scheduler import ExponentialLR
+from torch.utils.data import DataLoader
+
+warnings.filterwarnings("ignore", category=PossibleUserWarning)
+warnings.filterwarnings("ignore", category=UserWarning)

 if __name__ == "__main__":
+
+    # Reproducibility
+    seed_everything(seed=4)
+
    # Command-line arguments
    parser = argparse.ArgumentParser()
    parser = pl.Trainer.add_argparse_args(parser)
@@ -17,7 +29,7 @@ if __name__ == "__main__":
    train_ds = pt.datasets.Iris()

    # Dataloaders
-    train_loader = torch.utils.data.DataLoader(train_ds, batch_size=64)
+    train_loader = DataLoader(train_ds, batch_size=64)

    # Hyperparameters
    hparams = dict(
@@ -32,7 +44,7 @@ if __name__ == "__main__":
    )

    # Initialize the model
-    model = pt.models.GMLVQ(
+    model = GMLVQ(
        hparams,
        optimizer=torch.optim.Adam,
        prototypes_initializer=pt.initializers.SMCI(train_ds),
@@ -44,14 +56,17 @@ if __name__ == "__main__":
    model.example_input_array = torch.zeros(4, 4)

    # Callbacks
-    vis = pt.models.VisGMLVQ2D(data=train_ds)
+    vis = VisGMLVQ2D(data=train_ds)

    # Setup trainer
    trainer = pl.Trainer.from_argparse_args(
        args,
-        callbacks=[vis],
-        weights_summary="full",
-        accelerator="ddp",
+        callbacks=[
+            vis,
+        ],
+        max_epochs=100,
+        log_every_n_steps=1,
+        detect_anomaly=True,
    )

    # Training loop
--- a/examples/gmlvq_mnist.py
+++ b/examples/gmlvq_mnist.py
@@ -1,14 +1,29 @@
 """GMLVQ example using the MNIST dataset."""

 import argparse
+import warnings

 import prototorch as pt
 import pytorch_lightning as pl
 import torch
+from prototorch.models import (
+    ImageGMLVQ,
+    PruneLoserPrototypes,
+    VisImgComp,
+)
+from pytorch_lightning.callbacks import EarlyStopping
+from pytorch_lightning.utilities.seed import seed_everything
+from pytorch_lightning.utilities.warnings import PossibleUserWarning
+from torch.utils.data import DataLoader
 from torchvision import transforms
 from torchvision.datasets import MNIST

+warnings.filterwarnings("ignore", category=PossibleUserWarning)
+warnings.filterwarnings("ignore", category=UserWarning)
+
 if __name__ == "__main__":
+    # Reproducibility
+    seed_everything(seed=4)
    # Command-line arguments
    parser = argparse.ArgumentParser()
    parser = pl.Trainer.add_argparse_args(parser)
@@ -33,12 +48,8 @@ if __name__ == "__main__":
    )

    # Dataloaders
-    train_loader = torch.utils.data.DataLoader(train_ds,
-                                               num_workers=0,
-                                               batch_size=256)
-    test_loader = torch.utils.data.DataLoader(test_ds,
-                                              num_workers=0,
-                                              batch_size=256)
+    train_loader = DataLoader(train_ds, num_workers=4, batch_size=256)
+    test_loader = DataLoader(test_ds, num_workers=4, batch_size=256)

    # Hyperparameters
    num_classes = 10
@@ -52,14 +63,14 @@ if __name__ == "__main__":
    )

    # Initialize the model
-    model = pt.models.ImageGMLVQ(
+    model = ImageGMLVQ(
        hparams,
        optimizer=torch.optim.Adam,
        prototypes_initializer=pt.initializers.SMCI(train_ds),
    )

    # Callbacks
-    vis = pt.models.VisImgComp(
+    vis = VisImgComp(
        data=train_ds,
        num_columns=10,
        show=False,
@@ -69,14 +80,14 @@ if __name__ == "__main__":
        embedding_data=200,
        flatten_data=False,
    )
-    pruning = pt.models.PruneLoserPrototypes(
+    pruning = PruneLoserPrototypes(
        threshold=0.01,
        idle_epochs=1,
        prune_quota_per_epoch=10,
        frequency=1,
        verbose=True,
    )
-    es = pl.callbacks.EarlyStopping(
+    es = EarlyStopping(
        monitor="train_loss",
        min_delta=0.001,
        patience=15,
@@ -90,11 +101,11 @@ if __name__ == "__main__":
        callbacks=[
            vis,
            pruning,
-            # es,
+            es,
        ],
-        terminate_on_nan=True,
-        weights_summary=None,
-        # accelerator="ddp",
+        max_epochs=1000,
+        log_every_n_steps=1,
+        detect_anomaly=True,
    )

    # Training loop
--- a/examples/gmlvq_spiral.py
+++ b/examples/gmlvq_spiral.py
@@ -1,12 +1,28 @@
 """GMLVQ example using the spiral dataset."""

 import argparse
+import warnings

 import prototorch as pt
 import pytorch_lightning as pl
 import torch
+from prototorch.models import (
+    GMLVQ,
+    PruneLoserPrototypes,
+    VisGLVQ2D,
+)
+from pytorch_lightning.callbacks import EarlyStopping
+from pytorch_lightning.utilities.seed import seed_everything
+from pytorch_lightning.utilities.warnings import PossibleUserWarning
+from torch.utils.data import DataLoader
+
+warnings.filterwarnings("ignore", category=PossibleUserWarning)
+warnings.filterwarnings("ignore", category=UserWarning)

 if __name__ == "__main__":
+    # Reproducibility
+    seed_everything(seed=4)
+
    # Command-line arguments
    parser = argparse.ArgumentParser()
    parser = pl.Trainer.add_argparse_args(parser)
@@ -16,7 +32,7 @@ if __name__ == "__main__":
    train_ds = pt.datasets.Spiral(num_samples=500, noise=0.5)

    # Dataloaders
-    train_loader = torch.utils.data.DataLoader(train_ds, batch_size=256)
+    train_loader = DataLoader(train_ds, batch_size=256)

    # Hyperparameters
    num_classes = 2
@@ -32,19 +48,19 @@ if __name__ == "__main__":
    )

    # Initialize the model
-    model = pt.models.GMLVQ(
+    model = GMLVQ(
        hparams,
        optimizer=torch.optim.Adam,
        prototypes_initializer=pt.initializers.SSCI(train_ds, noise=1e-2),
    )

    # Callbacks
-    vis = pt.models.VisGLVQ2D(
+    vis = VisGLVQ2D(
        train_ds,
        show_last_only=False,
        block=False,
    )
-    pruning = pt.models.PruneLoserPrototypes(
+    pruning = PruneLoserPrototypes(
        threshold=0.01,
        idle_epochs=10,
        prune_quota_per_epoch=5,
@@ -53,7 +69,7 @@ if __name__ == "__main__":
        prototypes_initializer=pt.initializers.SSCI(train_ds, noise=1e-1),
        verbose=True,
    )
-    es = pl.callbacks.EarlyStopping(
+    es = EarlyStopping(
        monitor="train_loss",
        min_delta=1.0,
        patience=5,
@@ -69,7 +85,9 @@ if __name__ == "__main__":
            es,
            pruning,
        ],
-        terminate_on_nan=True,
+        max_epochs=1000,
+        log_every_n_steps=1,
+        detect_anomaly=True,
    )

    # Training loop
--- a/examples/gng_iris.py
+++ b/examples/gng_iris.py
@@ -1,10 +1,19 @@
 """Growing Neural Gas example using the Iris dataset."""

 import argparse
+import logging
+import warnings

 import prototorch as pt
 import pytorch_lightning as pl
 import torch
+from prototorch.models import GrowingNeuralGas, VisNG2D
+from pytorch_lightning.utilities.seed import seed_everything
+from pytorch_lightning.utilities.warnings import PossibleUserWarning
+from torch.utils.data import DataLoader
+
+warnings.filterwarnings("ignore", category=PossibleUserWarning)
+warnings.filterwarnings("ignore", category=UserWarning)

 if __name__ == "__main__":
    # Command-line arguments
@@ -13,11 +22,11 @@ if __name__ == "__main__":
    args = parser.parse_args()

    # Reproducibility
-    pl.utilities.seed.seed_everything(seed=42)
+    seed_everything(seed=42)

    # Prepare the data
    train_ds = pt.datasets.Iris(dims=[0, 2])
-    train_loader = torch.utils.data.DataLoader(train_ds, batch_size=64)
+    train_loader = DataLoader(train_ds, batch_size=64)

    # Hyperparameters
    hparams = dict(
@@ -27,7 +36,7 @@ if __name__ == "__main__":
    )

    # Initialize the model
-    model = pt.models.GrowingNeuralGas(
+    model = GrowingNeuralGas(
        hparams,
        prototypes_initializer=pt.initializers.ZCI(2),
    )
@@ -36,17 +45,20 @@ if __name__ == "__main__":
    model.example_input_array = torch.zeros(4, 2)

    # Model summary
-    print(model)
+    logging.info(model)

    # Callbacks
-    vis = pt.models.VisNG2D(data=train_loader)
+    vis = VisNG2D(data=train_loader)

    # Setup trainer
    trainer = pl.Trainer.from_argparse_args(
        args,
+        callbacks=[
+            vis,
+        ],
        max_epochs=100,
-        callbacks=[vis],
-        weights_summary="full",
+        log_every_n_steps=1,
+        detect_anomaly=True,
    )

    # Training loop
--- a/examples/gtlvq_mnist.py
+++ b/examples/gtlvq_mnist.py
@@ -1,14 +1,30 @@
 """GTLVQ example using the MNIST dataset."""

 import argparse
+import warnings

 import prototorch as pt
 import pytorch_lightning as pl
 import torch
+from prototorch.models import (
+    ImageGTLVQ,
+    PruneLoserPrototypes,
+    VisImgComp,
+)
+from pytorch_lightning.callbacks import EarlyStopping
+from pytorch_lightning.utilities.seed import seed_everything
+from pytorch_lightning.utilities.warnings import PossibleUserWarning
+from torch.utils.data import DataLoader
 from torchvision import transforms
 from torchvision.datasets import MNIST

+warnings.filterwarnings("ignore", category=PossibleUserWarning)
+warnings.filterwarnings("ignore", category=UserWarning)
+
 if __name__ == "__main__":
+    # Reproducibility
+    seed_everything(seed=4)
+
    # Command-line arguments
    parser = argparse.ArgumentParser()
    parser = pl.Trainer.add_argparse_args(parser)
@@ -33,12 +49,8 @@ if __name__ == "__main__":
    )

    # Dataloaders
-    train_loader = torch.utils.data.DataLoader(train_ds,
-                                               num_workers=0,
-                                               batch_size=256)
-    test_loader = torch.utils.data.DataLoader(test_ds,
-                                              num_workers=0,
-                                              batch_size=256)
+    train_loader = DataLoader(train_ds, num_workers=0, batch_size=256)
+    test_loader = DataLoader(test_ds, num_workers=0, batch_size=256)

    # Hyperparameters
    num_classes = 10
@@ -52,7 +64,7 @@ if __name__ == "__main__":
    )

    # Initialize the model
-    model = pt.models.ImageGTLVQ(
+    model = ImageGTLVQ(
        hparams,
        optimizer=torch.optim.Adam,
        prototypes_initializer=pt.initializers.SMCI(train_ds),
@@ -61,7 +73,7 @@ if __name__ == "__main__":
            next(iter(train_loader))[0].reshape(256, 28 * 28)))

    # Callbacks
-    vis = pt.models.VisImgComp(
+    vis = VisImgComp(
        data=train_ds,
        num_columns=10,
        show=False,
@@ -71,14 +83,14 @@ if __name__ == "__main__":
        embedding_data=200,
        flatten_data=False,
    )
-    pruning = pt.models.PruneLoserPrototypes(
+    pruning = PruneLoserPrototypes(
        threshold=0.01,
        idle_epochs=1,
        prune_quota_per_epoch=10,
        frequency=1,
        verbose=True,
    )
-    es = pl.callbacks.EarlyStopping(
+    es = EarlyStopping(
        monitor="train_loss",
        min_delta=0.001,
        patience=15,
@@ -93,11 +105,11 @@ if __name__ == "__main__":
        callbacks=[
            vis,
            pruning,
-            # es,
+            es,
        ],
-        terminate_on_nan=True,
-        weights_summary=None,
-        accelerator="ddp",
+        max_epochs=1000,
+        log_every_n_steps=1,
+        detect_anomaly=True,
    )

    # Training loop
--- a/examples/gtlvq_moons.py
+++ b/examples/gtlvq_moons.py
@@ -1,10 +1,20 @@
 """Localized-GTLVQ example using the Moons dataset."""

 import argparse
+import logging
+import warnings

 import prototorch as pt
 import pytorch_lightning as pl
 import torch
+from prototorch.models import GTLVQ, VisGLVQ2D
+from pytorch_lightning.callbacks import EarlyStopping
+from pytorch_lightning.utilities.seed import seed_everything
+from pytorch_lightning.utilities.warnings import PossibleUserWarning
+from torch.utils.data import DataLoader
+
+warnings.filterwarnings("ignore", category=PossibleUserWarning)
+warnings.filterwarnings("ignore", category=UserWarning)

 if __name__ == "__main__":
    # Command-line arguments
@@ -13,33 +23,35 @@ if __name__ == "__main__":
    args = parser.parse_args()

    # Reproducibility
-    pl.utilities.seed.seed_everything(seed=2)
+    seed_everything(seed=2)

    # Dataset
    train_ds = pt.datasets.Moons(num_samples=300, noise=0.2, seed=42)

    # Dataloaders
-    train_loader = torch.utils.data.DataLoader(train_ds,
-                                               batch_size=256,
-                                               shuffle=True)
+    train_loader = DataLoader(
+        train_ds,
+        batch_size=256,
+        shuffle=True,
+    )

    # Hyperparameters
    # Latent_dim should be lower than input dim.
    hparams = dict(distribution=[1, 3], input_dim=2, latent_dim=1)

    # Initialize the model
-    model = pt.models.GTLVQ(
-        hparams, prototypes_initializer=pt.initializers.SMCI(train_ds))
+    model = GTLVQ(hparams,
+                  prototypes_initializer=pt.initializers.SMCI(train_ds))

    # Compute intermediate input and output sizes
    model.example_input_array = torch.zeros(4, 2)

    # Summary
-    print(model)
+    logging.info(model)

    # Callbacks
-    vis = pt.models.VisGLVQ2D(data=train_ds)
-    es = pl.callbacks.EarlyStopping(
+    vis = VisGLVQ2D(data=train_ds)
+    es = EarlyStopping(
        monitor="train_acc",
        min_delta=0.001,
        patience=20,
@@ -55,8 +67,9 @@ if __name__ == "__main__":
            vis,
            es,
        ],
-        weights_summary="full",
-        accelerator="ddp",
+        max_epochs=1000,
+        log_every_n_steps=1,
+        detect_anomaly=True,
    )

    # Training loop
--- a/examples/knn_iris.py
+++ b/examples/knn_iris.py
@@ -1,12 +1,19 @@
 """k-NN example using the Iris dataset from scikit-learn."""

 import argparse
+import logging
+import warnings

 import prototorch as pt
 import pytorch_lightning as pl
 import torch
+from prototorch.models import KNN, VisGLVQ2D
+from pytorch_lightning.utilities.warnings import PossibleUserWarning
 from sklearn.datasets import load_iris
 from sklearn.model_selection import train_test_split
+from torch.utils.data import DataLoader
+
+warnings.filterwarnings("ignore", category=PossibleUserWarning)

 if __name__ == "__main__":
    # Command-line arguments
@@ -16,34 +23,36 @@ if __name__ == "__main__":

    # Dataset
    X, y = load_iris(return_X_y=True)
-    X = X[:, [0, 2]]
+    X = X[:, 0:3:2]

-    X_train, X_test, y_train, y_test = train_test_split(X,
-                                                        y,
-                                                        test_size=0.5,
-                                                        random_state=42)
+    X_train, X_test, y_train, y_test = train_test_split(
+        X,
+        y,
+        test_size=0.5,
+        random_state=42,
+    )

    train_ds = pt.datasets.NumpyDataset(X_train, y_train)
    test_ds = pt.datasets.NumpyDataset(X_test, y_test)

    # Dataloaders
-    train_loader = torch.utils.data.DataLoader(train_ds, batch_size=16)
-    test_loader = torch.utils.data.DataLoader(test_ds, batch_size=16)
+    train_loader = DataLoader(train_ds, batch_size=16)
+    test_loader = DataLoader(test_ds, batch_size=16)

    # Hyperparameters
    hparams = dict(k=5)

    # Initialize the model
-    model = pt.models.KNN(hparams, data=train_ds)
+    model = KNN(hparams, data=train_ds)

    # Compute intermediate input and output sizes
    model.example_input_array = torch.zeros(4, 2)

    # Summary
-    print(model)
+    logging.info(model)

    # Callbacks
-    vis = pt.models.VisGLVQ2D(
+    vis = VisGLVQ2D(
        data=(X_train, y_train),
        resolution=200,
        block=True,
@@ -53,8 +62,11 @@ if __name__ == "__main__":
    trainer = pl.Trainer.from_argparse_args(
        args,
        max_epochs=1,
-        callbacks=[vis],
-        weights_summary="full",
+        callbacks=[
+            vis,
+        ],
+        log_every_n_steps=1,
+        detect_anomaly=True,
    )

    # Training loop
@@ -63,7 +75,7 @@ if __name__ == "__main__":

    # Recall
    y_pred = model.predict(torch.tensor(X_train))
-    print(y_pred)
+    logging.info(y_pred)

    # Test
    trainer.test(model, dataloaders=test_loader)
--- a/examples/ksom_colors.py
+++ b/examples/ksom_colors.py
@@ -1,12 +1,21 @@
 """Kohonen Self Organizing Map."""

 import argparse
+import logging
+import warnings

 import prototorch as pt
 import pytorch_lightning as pl
 import torch
 from matplotlib import pyplot as plt
+from prototorch.models import KohonenSOM
 from prototorch.utils.colors import hex_to_rgb
+from pytorch_lightning.utilities.seed import seed_everything
+from pytorch_lightning.utilities.warnings import PossibleUserWarning
+from torch.utils.data import DataLoader, TensorDataset
+
+warnings.filterwarnings("ignore", category=PossibleUserWarning)
+warnings.filterwarnings("ignore", category=UserWarning)


 class Vis2DColorSOM(pl.Callback):
@@ -18,7 +27,7 @@ class Vis2DColorSOM(pl.Callback):
        self.data = data
        self.pause_time = pause_time

-    def on_epoch_end(self, trainer, pl_module):
+    def on_train_epoch_end(self, trainer, pl_module: KohonenSOM):
        ax = self.fig.gca()
        ax.cla()
        ax.set_title(self.title)
@@ -31,12 +40,14 @@ class Vis2DColorSOM(pl.Callback):
        d = pl_module.compute_distances(self.data)
        wp = pl_module.predict_from_distances(d)
        for i, iloc in enumerate(wp):
-            plt.text(iloc[1],
-                     iloc[0],
-                     cnames[i],
-                     ha="center",
-                     va="center",
-                     bbox=dict(facecolor="white", alpha=0.5, lw=0))
+            plt.text(
+                iloc[1],
+                iloc[0],
+                color_names[i],
+                ha="center",
+                va="center",
+                bbox=dict(facecolor="white", alpha=0.5, lw=0),
+            )

        if trainer.current_epoch != trainer.max_epochs - 1:
            plt.pause(self.pause_time)
@@ -51,7 +62,7 @@ if __name__ == "__main__":
    args = parser.parse_args()

    # Reproducibility
-    pl.utilities.seed.seed_everything(seed=42)
+    seed_everything(seed=42)

    # Prepare the data
    hex_colors = [
@@ -59,15 +70,15 @@ if __name__ == "__main__":
        "#00ff00", "#ff0000", "#00ffff", "#ff00ff", "#ffff00", "#ffffff",
        "#545454", "#7f7f7f", "#a8a8a8", "#808000", "#800080", "#ffa500"
    ]
-    cnames = [
+    color_names = [
        "black", "blue", "darkblue", "skyblue", "greyblue", "lilac", "green",
        "red", "cyan", "magenta", "yellow", "white", "darkgrey", "mediumgrey",
        "lightgrey", "olive", "purple", "orange"
    ]
    colors = list(hex_to_rgb(hex_colors))
    data = torch.Tensor(colors) / 255.0
-    train_ds = torch.utils.data.TensorDataset(data)
-    train_loader = torch.utils.data.DataLoader(train_ds, batch_size=8)
+    train_ds = TensorDataset(data)
+    train_loader = DataLoader(train_ds, batch_size=8)

    # Hyperparameters
    hparams = dict(
@@ -78,7 +89,7 @@ if __name__ == "__main__":
    )

    # Initialize the model
-    model = pt.models.KohonenSOM(
+    model = KohonenSOM(
        hparams,
        prototypes_initializer=pt.initializers.RNCI(3),
    )
@@ -87,7 +98,7 @@ if __name__ == "__main__":
    model.example_input_array = torch.zeros(4, 3)

    # Model summary
-    print(model)
+    logging.info(model)

    # Callbacks
    vis = Vis2DColorSOM(data=data)
@@ -96,8 +107,11 @@ if __name__ == "__main__":
    trainer = pl.Trainer.from_argparse_args(
        args,
        max_epochs=500,
-        callbacks=[vis],
-        weights_summary="full",
+        callbacks=[
+            vis,
+        ],
+        log_every_n_steps=1,
+        detect_anomaly=True,
    )

    # Training loop
--- a/examples/lgmlvq_moons.py
+++ b/examples/lgmlvq_moons.py
@@ -1,10 +1,20 @@
 """Localized-GMLVQ example using the Moons dataset."""

 import argparse
+import logging
+import warnings

 import prototorch as pt
 import pytorch_lightning as pl
 import torch
+from prototorch.models import LGMLVQ, VisGLVQ2D
+from pytorch_lightning.callbacks import EarlyStopping
+from pytorch_lightning.utilities.seed import seed_everything
+from pytorch_lightning.utilities.warnings import PossibleUserWarning
+from torch.utils.data import DataLoader
+
+warnings.filterwarnings("ignore", category=PossibleUserWarning)
+warnings.filterwarnings("ignore", category=UserWarning)

 if __name__ == "__main__":
    # Command-line arguments
@@ -13,15 +23,13 @@ if __name__ == "__main__":
    args = parser.parse_args()

    # Reproducibility
-    pl.utilities.seed.seed_everything(seed=2)
+    seed_everything(seed=2)

    # Dataset
    train_ds = pt.datasets.Moons(num_samples=300, noise=0.2, seed=42)

    # Dataloaders
-    train_loader = torch.utils.data.DataLoader(train_ds,
-                                               batch_size=256,
-                                               shuffle=True)
+    train_loader = DataLoader(train_ds, batch_size=256, shuffle=True)

    # Hyperparameters
    hparams = dict(
@@ -31,7 +39,7 @@ if __name__ == "__main__":
    )

    # Initialize the model
-    model = pt.models.LGMLVQ(
+    model = LGMLVQ(
        hparams,
        prototypes_initializer=pt.initializers.SMCI(train_ds),
    )
@@ -40,11 +48,11 @@ if __name__ == "__main__":
    model.example_input_array = torch.zeros(4, 2)

    # Summary
-    print(model)
+    logging.info(model)

    # Callbacks
-    vis = pt.models.VisGLVQ2D(data=train_ds)
-    es = pl.callbacks.EarlyStopping(
+    vis = VisGLVQ2D(data=train_ds)
+    es = EarlyStopping(
        monitor="train_acc",
        min_delta=0.001,
        patience=20,
@@ -60,8 +68,9 @@ if __name__ == "__main__":
            vis,
            es,
        ],
-        weights_summary="full",
-        accelerator="ddp",
+        log_every_n_steps=1,
+        max_epochs=1000,
+        detect_anomaly=True,
    )

    # Training loop
--- a/examples/lvqmln_iris.py
+++ b/examples/lvqmln_iris.py
@@ -1,10 +1,22 @@
 """LVQMLN example using all four dimensions of the Iris dataset."""

 import argparse
+import warnings

 import prototorch as pt
 import pytorch_lightning as pl
 import torch
+from prototorch.models import (
+    LVQMLN,
+    PruneLoserPrototypes,
+    VisSiameseGLVQ2D,
+)
+from pytorch_lightning.utilities.seed import seed_everything
+from pytorch_lightning.utilities.warnings import PossibleUserWarning
+from torch.utils.data import DataLoader
+
+warnings.filterwarnings("ignore", category=PossibleUserWarning)
+warnings.filterwarnings("ignore", category=UserWarning)


 class Backbone(torch.nn.Module):
@@ -34,10 +46,10 @@ if __name__ == "__main__":
    train_ds = pt.datasets.Iris()

    # Reproducibility
-    pl.utilities.seed.seed_everything(seed=42)
+    seed_everything(seed=42)

    # Dataloaders
-    train_loader = torch.utils.data.DataLoader(train_ds, batch_size=150)
+    train_loader = DataLoader(train_ds, batch_size=150)

    # Hyperparameters
    hparams = dict(
@@ -50,7 +62,7 @@ if __name__ == "__main__":
    backbone = Backbone()

    # Initialize the model
-    model = pt.models.LVQMLN(
+    model = LVQMLN(
        hparams,
        prototypes_initializer=pt.initializers.SSCI(
            train_ds,
@@ -59,18 +71,15 @@ if __name__ == "__main__":
        backbone=backbone,
    )

-    # Model summary
-    print(model)
-
    # Callbacks
-    vis = pt.models.VisSiameseGLVQ2D(
+    vis = VisSiameseGLVQ2D(
        data=train_ds,
        map_protos=False,
        border=0.1,
        resolution=500,
        axis_off=True,
    )
-    pruning = pt.models.PruneLoserPrototypes(
+    pruning = PruneLoserPrototypes(
        threshold=0.01,
        idle_epochs=20,
        prune_quota_per_epoch=2,
@@ -85,6 +94,9 @@ if __name__ == "__main__":
            vis,
            pruning,
        ],
+        log_every_n_steps=1,
+        max_epochs=1000,
+        detect_anomaly=True,
    )

    # Training loop
--- a/examples/median_lvq_iris.py
+++ b/examples/median_lvq_iris.py
@@ -1,12 +1,23 @@
 """Median-LVQ example using the Iris dataset."""

 import argparse
+import warnings

 import prototorch as pt
 import pytorch_lightning as pl
 import torch
+from prototorch.models import MedianLVQ, VisGLVQ2D
+from pytorch_lightning.callbacks import EarlyStopping
+from pytorch_lightning.utilities.seed import seed_everything
+from pytorch_lightning.utilities.warnings import PossibleUserWarning
+from torch.utils.data import DataLoader
+
+warnings.filterwarnings("ignore", category=PossibleUserWarning)
+warnings.filterwarnings("ignore", category=UserWarning)

 if __name__ == "__main__":
+    # Reproducibility
+    seed_everything(seed=4)
    # Command-line arguments
    parser = argparse.ArgumentParser()
    parser = pl.Trainer.add_argparse_args(parser)
@@ -16,13 +27,13 @@ if __name__ == "__main__":
    train_ds = pt.datasets.Iris(dims=[0, 2])

    # Dataloaders
-    train_loader = torch.utils.data.DataLoader(
+    train_loader = DataLoader(
        train_ds,
        batch_size=len(train_ds),  # MedianLVQ cannot handle mini-batches
    )

    # Initialize the model
-    model = pt.models.MedianLVQ(
+    model = MedianLVQ(
        hparams=dict(distribution=(3, 2), lr=0.01),
        prototypes_initializer=pt.initializers.SSCI(train_ds),
    )
@@ -31,8 +42,8 @@ if __name__ == "__main__":
    model.example_input_array = torch.zeros(4, 2)

    # Callbacks
-    vis = pt.models.VisGLVQ2D(data=train_ds)
-    es = pl.callbacks.EarlyStopping(
+    vis = VisGLVQ2D(data=train_ds)
+    es = EarlyStopping(
        monitor="train_acc",
        min_delta=0.01,
        patience=5,
@@ -44,8 +55,13 @@ if __name__ == "__main__":
    # Setup trainer
    trainer = pl.Trainer.from_argparse_args(
        args,
-        callbacks=[vis, es],
-        weights_summary="full",
+        callbacks=[
+            vis,
+            es,
+        ],
+        max_epochs=1000,
+        log_every_n_steps=1,
+        detect_anomaly=True,
    )

    # Training loop
--- a/examples/ng_iris.py
+++ b/examples/ng_iris.py
@@ -1,15 +1,26 @@
 """Neural Gas example using the Iris dataset."""

 import argparse
+import warnings

 import prototorch as pt
 import pytorch_lightning as pl
 import torch
+from prototorch.models import NeuralGas, VisNG2D
+from pytorch_lightning.utilities.seed import seed_everything
+from pytorch_lightning.utilities.warnings import PossibleUserWarning
 from sklearn.datasets import load_iris
 from sklearn.preprocessing import StandardScaler
 from torch.optim.lr_scheduler import ExponentialLR
+from torch.utils.data import DataLoader
+
+warnings.filterwarnings("ignore", category=PossibleUserWarning)
+warnings.filterwarnings("ignore", category=UserWarning)

 if __name__ == "__main__":
+    # Reproducibility
+    seed_everything(seed=4)
+
    # Command-line arguments
    parser = argparse.ArgumentParser()
    parser = pl.Trainer.add_argparse_args(parser)
@@ -17,7 +28,7 @@ if __name__ == "__main__":

    # Prepare and pre-process the dataset
    x_train, y_train = load_iris(return_X_y=True)
-    x_train = x_train[:, [0, 2]]
+    x_train = x_train[:, 0:3:2]
    scaler = StandardScaler()
    scaler.fit(x_train)
    x_train = scaler.transform(x_train)
@@ -25,7 +36,7 @@ if __name__ == "__main__":
    train_ds = pt.datasets.NumpyDataset(x_train, y_train)

    # Dataloaders
-    train_loader = torch.utils.data.DataLoader(train_ds, batch_size=150)
+    train_loader = DataLoader(train_ds, batch_size=150)

    # Hyperparameters
    hparams = dict(
@@ -35,7 +46,7 @@ if __name__ == "__main__":
    )

    # Initialize the model
-    model = pt.models.NeuralGas(
+    model = NeuralGas(
        hparams,
        prototypes_initializer=pt.core.ZCI(2),
        lr_scheduler=ExponentialLR,
@@ -45,17 +56,18 @@ if __name__ == "__main__":
    # Compute intermediate input and output sizes
    model.example_input_array = torch.zeros(4, 2)

-    # Model summary
-    print(model)
-
    # Callbacks
-    vis = pt.models.VisNG2D(data=train_ds)
+    vis = VisNG2D(data=train_ds)

    # Setup trainer
    trainer = pl.Trainer.from_argparse_args(
        args,
-        callbacks=[vis],
-        weights_summary="full",
+        callbacks=[
+            vis,
+        ],
+        max_epochs=1000,
+        log_every_n_steps=1,
+        detect_anomaly=True,
    )

    # Training loop
--- a/examples/rslvq_iris.py
+++ b/examples/rslvq_iris.py
@@ -1,10 +1,18 @@
 """RSLVQ example using the Iris dataset."""

 import argparse
+import warnings

 import prototorch as pt
 import pytorch_lightning as pl
 import torch
+from prototorch.models import RSLVQ, VisGLVQ2D
+from pytorch_lightning.utilities.seed import seed_everything
+from pytorch_lightning.utilities.warnings import PossibleUserWarning
+from torch.utils.data import DataLoader
+
+warnings.filterwarnings("ignore", category=PossibleUserWarning)
+warnings.filterwarnings("ignore", category=UserWarning)

 if __name__ == "__main__":
    # Command-line arguments
@@ -13,13 +21,13 @@ if __name__ == "__main__":
    args = parser.parse_args()

    # Reproducibility
-    pl.utilities.seed.seed_everything(seed=42)
+    seed_everything(seed=42)

    # Dataset
    train_ds = pt.datasets.Iris(dims=[0, 2])

    # Dataloaders
-    train_loader = torch.utils.data.DataLoader(train_ds, batch_size=64)
+    train_loader = DataLoader(train_ds, batch_size=64)

    # Hyperparameters
    hparams = dict(
@@ -33,7 +41,7 @@ if __name__ == "__main__":
    )

    # Initialize the model
-    model = pt.models.RSLVQ(
+    model = RSLVQ(
        hparams,
        optimizer=torch.optim.Adam,
        prototypes_initializer=pt.initializers.SSCI(train_ds, noise=0.2),
@@ -42,19 +50,18 @@ if __name__ == "__main__":
    # Compute intermediate input and output sizes
    model.example_input_array = torch.zeros(4, 2)

-    # Summary
-    print(model)
-
    # Callbacks
-    vis = pt.models.VisGLVQ2D(data=train_ds)
+    vis = VisGLVQ2D(data=train_ds)

    # Setup trainer
    trainer = pl.Trainer.from_argparse_args(
        args,
-        callbacks=[vis],
-        terminate_on_nan=True,
-        weights_summary="full",
-        accelerator="ddp",
+        callbacks=[
+            vis,
+        ],
+        detect_anomaly=True,
+        max_epochs=100,
+        log_every_n_steps=1,
    )

    # Training loop
--- a/examples/siamese_glvq_iris.py
+++ b/examples/siamese_glvq_iris.py
@@ -1,10 +1,18 @@
 """Siamese GLVQ example using all four dimensions of the Iris dataset."""

 import argparse
+import warnings

 import prototorch as pt
 import pytorch_lightning as pl
 import torch
+from prototorch.models import SiameseGLVQ, VisSiameseGLVQ2D
+from pytorch_lightning.utilities.seed import seed_everything
+from pytorch_lightning.utilities.warnings import PossibleUserWarning
+from torch.utils.data import DataLoader
+
+warnings.filterwarnings("ignore", category=PossibleUserWarning)
+warnings.filterwarnings("ignore", category=UserWarning)


 class Backbone(torch.nn.Module):
@@ -34,10 +42,10 @@ if __name__ == "__main__":
    train_ds = pt.datasets.Iris()

    # Reproducibility
-    pl.utilities.seed.seed_everything(seed=2)
+    seed_everything(seed=2)

    # Dataloaders
-    train_loader = torch.utils.data.DataLoader(train_ds, batch_size=150)
+    train_loader = DataLoader(train_ds, batch_size=150)

    # Hyperparameters
    hparams = dict(
@@ -50,23 +58,25 @@ if __name__ == "__main__":
    backbone = Backbone()

    # Initialize the model
-    model = pt.models.SiameseGLVQ(
+    model = SiameseGLVQ(
        hparams,
        prototypes_initializer=pt.initializers.SMCI(train_ds),
        backbone=backbone,
        both_path_gradients=False,
    )

-    # Model summary
-    print(model)
-
    # Callbacks
-    vis = pt.models.VisSiameseGLVQ2D(data=train_ds, border=0.1)
+    vis = VisSiameseGLVQ2D(data=train_ds, border=0.1)

    # Setup trainer
    trainer = pl.Trainer.from_argparse_args(
        args,
-        callbacks=[vis],
+        callbacks=[
+            vis,
+        ],
+        max_epochs=1000,
+        log_every_n_steps=1,
+        detect_anomaly=True,
    )

    # Training loop
--- a/examples/siamese_gtlvq_iris.py
+++ b/examples/siamese_gtlvq_iris.py
@@ -1,10 +1,18 @@
 """Siamese GTLVQ example using all four dimensions of the Iris dataset."""

 import argparse
+import warnings

 import prototorch as pt
 import pytorch_lightning as pl
 import torch
+from prototorch.models import SiameseGTLVQ, VisSiameseGLVQ2D
+from pytorch_lightning.utilities.seed import seed_everything
+from pytorch_lightning.utilities.warnings import PossibleUserWarning
+from torch.utils.data import DataLoader
+
+warnings.filterwarnings("ignore", category=PossibleUserWarning)
+warnings.filterwarnings("ignore", category=UserWarning)


 class Backbone(torch.nn.Module):
@@ -34,39 +42,43 @@ if __name__ == "__main__":
    train_ds = pt.datasets.Iris()

    # Reproducibility
-    pl.utilities.seed.seed_everything(seed=2)
+    seed_everything(seed=2)

    # Dataloaders
-    train_loader = torch.utils.data.DataLoader(train_ds, batch_size=150)
+    train_loader = DataLoader(train_ds, batch_size=150)

    # Hyperparameters
-    hparams = dict(distribution=[1, 2, 3],
-                   proto_lr=0.01,
-                   bb_lr=0.01,
-                   input_dim=2,
-                   latent_dim=1)
+    hparams = dict(
+        distribution=[1, 2, 3],
+        proto_lr=0.01,
+        bb_lr=0.01,
+        input_dim=2,
+        latent_dim=1,
+    )

    # Initialize the backbone
    backbone = Backbone(latent_size=hparams["input_dim"])

    # Initialize the model
-    model = pt.models.SiameseGTLVQ(
+    model = SiameseGTLVQ(
        hparams,
        prototypes_initializer=pt.initializers.SMCI(train_ds),
        backbone=backbone,
        both_path_gradients=False,
    )

-    # Model summary
-    print(model)
-
    # Callbacks
-    vis = pt.models.VisSiameseGLVQ2D(data=train_ds, border=0.1)
+    vis = VisSiameseGLVQ2D(data=train_ds, border=0.1)

    # Setup trainer
    trainer = pl.Trainer.from_argparse_args(
        args,
-        callbacks=[vis],
+        callbacks=[
+            vis,
+        ],
+        max_epochs=1000,
+        log_every_n_steps=1,
+        detect_anomaly=True,
    )

    # Training loop
--- a/examples/warm_starting.py
+++ b/examples/warm_starting.py
@@ -1,13 +1,30 @@
 """Warm-starting GLVQ with prototypes from Growing Neural Gas."""

 import argparse
+import warnings

 import prototorch as pt
 import pytorch_lightning as pl
 import torch
+from prototorch.models import (
+    GLVQ,
+    KNN,
+    GrowingNeuralGas,
+    PruneLoserPrototypes,
+    VisGLVQ2D,
+)
+from pytorch_lightning.callbacks import EarlyStopping
+from pytorch_lightning.utilities.seed import seed_everything
+from pytorch_lightning.utilities.warnings import PossibleUserWarning
 from torch.optim.lr_scheduler import ExponentialLR
+from torch.utils.data import DataLoader
+
+warnings.filterwarnings("ignore", category=PossibleUserWarning)

 if __name__ == "__main__":
+
+    # Reproducibility
+    seed_everything(seed=4)
    # Command-line arguments
    parser = argparse.ArgumentParser()
    parser = pl.Trainer.add_argparse_args(parser)
@@ -15,10 +32,10 @@ if __name__ == "__main__":

    # Prepare the data
    train_ds = pt.datasets.Iris(dims=[0, 2])
-    train_loader = torch.utils.data.DataLoader(train_ds, batch_size=64)
+    train_loader = DataLoader(train_ds, batch_size=64, num_workers=0)

    # Initialize the gng
-    gng = pt.models.GrowingNeuralGas(
+    gng = GrowingNeuralGas(
        hparams=dict(num_prototypes=5, insert_freq=2, lr=0.1),
        prototypes_initializer=pt.initializers.ZCI(2),
        lr_scheduler=ExponentialLR,
@@ -26,7 +43,7 @@ if __name__ == "__main__":
    )

    # Callbacks
-    es = pl.callbacks.EarlyStopping(
+    es = EarlyStopping(
        monitor="loss",
        min_delta=0.001,
        patience=20,
@@ -37,9 +54,12 @@ if __name__ == "__main__":

    # Setup trainer for GNG
    trainer = pl.Trainer(
-        max_epochs=100,
-        callbacks=[es],
-        weights_summary=None,
+        max_epochs=1000,
+        callbacks=[
+            es,
+        ],
+        log_every_n_steps=1,
+        detect_anomaly=True,
    )

    # Training loop
@@ -52,12 +72,12 @@ if __name__ == "__main__":
    )

    # Warm-start prototypes
-    knn = pt.models.KNN(dict(k=1), data=train_ds)
+    knn = KNN(dict(k=1), data=train_ds)
    prototypes = gng.prototypes
    plabels = knn.predict(prototypes)

    # Initialize the model
-    model = pt.models.GLVQ(
+    model = GLVQ(
        hparams,
        optimizer=torch.optim.Adam,
        prototypes_initializer=pt.initializers.LCI(prototypes),
@@ -70,15 +90,15 @@ if __name__ == "__main__":
    model.example_input_array = torch.zeros(4, 2)

    # Callbacks
-    vis = pt.models.VisGLVQ2D(data=train_ds)
-    pruning = pt.models.PruneLoserPrototypes(
+    vis = VisGLVQ2D(data=train_ds)
+    pruning = PruneLoserPrototypes(
        threshold=0.02,
        idle_epochs=2,
        prune_quota_per_epoch=5,
        frequency=1,
        verbose=True,
    )
-    es = pl.callbacks.EarlyStopping(
+    es = EarlyStopping(
        monitor="train_loss",
        min_delta=0.001,
        patience=10,
@@ -95,8 +115,9 @@ if __name__ == "__main__":
            pruning,
            es,
        ],
-        weights_summary="full",
-        accelerator="ddp",
+        max_epochs=1000,
+        log_every_n_steps=1,
+        detect_anomaly=True,
    )

    # Training loop
--- a/examples/y_architecture_example.py
+++ b/examples/y_architecture_example.py
@@ -0,0 +1,88 @@
+import prototorch as pt
+import pytorch_lightning as pl
+import torchmetrics
+from prototorch.core import SMCI
+from prototorch.y.callbacks import (
+    LogTorchmetricCallback,
+    PlotLambdaMatrixToTensorboard,
+    VisGMLVQ2D,
+)
+from prototorch.y.library.gmlvq import GMLVQ
+from pytorch_lightning.callbacks import EarlyStopping
+from torch.utils.data import DataLoader
+
+# ##############################################################################
+
+if __name__ == "__main__":
+
+    # ------------------------------------------------------------
+    # DATA
+    # ------------------------------------------------------------
+
+    # Dataset
+    train_ds = pt.datasets.Iris()
+
+    # Dataloader
+    train_loader = DataLoader(
+        train_ds,
+        batch_size=32,
+        num_workers=0,
+        shuffle=True,
+    )
+
+    # ------------------------------------------------------------
+    # HYPERPARAMETERS
+    # ------------------------------------------------------------
+
+    # Select Initializer
+    components_initializer = SMCI(train_ds)
+
+    # Define Hyperparameters
+    hyperparameters = GMLVQ.HyperParameters(
+        lr=dict(components_layer=0.1, _omega=0),
+        input_dim=4,
+        distribution=dict(
+            num_classes=3,
+            per_class=1,
+        ),
+        component_initializer=components_initializer,
+    )
+
+    # Create Model
+    model = GMLVQ(hyperparameters)
+
+    print(model)
+
+    # ------------------------------------------------------------
+    # TRAINING
+    # ------------------------------------------------------------
+
+    # Controlling Callbacks
+    stopping_criterion = LogTorchmetricCallback(
+        'recall',
+        torchmetrics.Recall,
+        num_classes=3,
+    )
+
+    es = EarlyStopping(
+        monitor=stopping_criterion.name,
+        mode="max",
+        patience=10,
+    )
+
+    # Visualization Callback
+    vis = VisGMLVQ2D(data=train_ds)
+
+    # Define trainer
+    trainer = pl.Trainer(
+        callbacks=[
+            vis,
+            stopping_criterion,
+            es,
+            PlotLambdaMatrixToTensorboard(),
+        ],
+        max_epochs=1000,
+    )
+
+    # Train
+    trainer.fit(model, train_loader)
--- a/prototorch/models/init.py
+++ b/prototorch/models/init.py
@@ -36,4 +36,4 @@ from .unsupervised import (
 )
 from .vis import *

-__version__ = "0.5.0"
+__version__ = "1.0.0-a2"
--- a/prototorch/models/abstract.py
+++ b/prototorch/models/abstract.py
@@ -1,19 +1,37 @@
 """Abstract classes to be inherited by prototorch models."""

+import logging
+
 import pytorch_lightning as pl
 import torch
+import torch.nn.functional as F
 import torchmetrics
-
-from ..core.competitions import WTAC
-from ..core.components import Components, LabeledComponents
-from ..core.distances import euclidean_distance
-from ..core.initializers import LabelsInitializer, ZerosCompInitializer
-from ..core.pooling import stratified_min_pooling
-from ..nn.wrappers import LambdaLayer
+from prototorch.core.competitions import WTAC
+from prototorch.core.components import (
+    AbstractComponents,
+    Components,
+    LabeledComponents,
+)
+from prototorch.core.distances import euclidean_distance
+from prototorch.core.initializers import (
+    LabelsInitializer,
+    ZerosCompInitializer,
+)
+from prototorch.core.pooling import stratified_min_pooling
+from prototorch.nn.wrappers import LambdaLayer


 class ProtoTorchBolt(pl.LightningModule):
-    """All ProtoTorch models are ProtoTorch Bolts."""
+    """All ProtoTorch models are ProtoTorch Bolts.
+
+    hparams:
+        - lr: learning rate
+
+    kwargs:
+        - optimizer: optimizer class
+        - lr_scheduler: learning rate scheduler class
+        - lr_scheduler_kwargs: learning rate scheduler kwargs
+    """

    def __init__(self, hparams, **kwargs):
        super().__init__()
@@ -30,7 +48,7 @@ class ProtoTorchBolt(pl.LightningModule):
        self.lr_scheduler_kwargs = kwargs.get("lr_scheduler_kwargs", dict())

    def configure_optimizers(self):
-        optimizer = self.optimizer(self.parameters(), lr=self.hparams.lr)
+        optimizer = self.optimizer(self.parameters(), lr=self.hparams["lr"])
        if self.lr_scheduler is not None:
            scheduler = self.lr_scheduler(optimizer,
                                          **self.lr_scheduler_kwargs)
@@ -43,7 +61,10 @@ class ProtoTorchBolt(pl.LightningModule):
            return optimizer

    def reconfigure_optimizers(self):
-        self.trainer.strategy.setup_optimizers(self.trainer)
+        if self.trainer:
+            self.trainer.strategy.setup_optimizers(self.trainer)
+        else:
+            logging.warning("No trainer to reconfigure optimizers!")

    def __repr__(self):
        surep = super().__repr__()
@@ -53,6 +74,12 @@ class ProtoTorchBolt(pl.LightningModule):


 class PrototypeModel(ProtoTorchBolt):
+    """Abstract Prototype Model
+
+    kwargs:
+        - distance_fn: distance function
+    """
+    proto_layer: AbstractComponents

    def __init__(self, hparams, **kwargs):
        super().__init__(hparams, **kwargs)
@@ -75,16 +102,17 @@ class PrototypeModel(ProtoTorchBolt):

    def add_prototypes(self, *args, **kwargs):
        self.proto_layer.add_components(*args, **kwargs)
-        self.hparams.distribution = self.proto_layer.distribution
+        self.hparams["distribution"] = self.proto_layer.distribution
        self.reconfigure_optimizers()

    def remove_prototypes(self, indices):
        self.proto_layer.remove_components(indices)
-        self.hparams.distribution = self.proto_layer.distribution
+        self.hparams["distribution"] = self.proto_layer.distribution
        self.reconfigure_optimizers()


 class UnsupervisedPrototypeModel(PrototypeModel):
+    proto_layer: Components

    def __init__(self, hparams, **kwargs):
        super().__init__(hparams, **kwargs)
@@ -93,7 +121,7 @@ class UnsupervisedPrototypeModel(PrototypeModel):
        prototypes_initializer = kwargs.get("prototypes_initializer", None)
        if prototypes_initializer is not None:
            self.proto_layer = Components(
-                self.hparams.num_prototypes,
+                self.hparams["num_prototypes"],
                initializer=prototypes_initializer,
            )

@@ -108,6 +136,7 @@ class UnsupervisedPrototypeModel(PrototypeModel):


 class SupervisedPrototypeModel(PrototypeModel):
+    proto_layer: LabeledComponents

    def __init__(self, hparams, skip_proto_layer=False, **kwargs):
        super().__init__(hparams, **kwargs)
@@ -127,13 +156,13 @@ class SupervisedPrototypeModel(PrototypeModel):
                    labels_initializer=labels_initializer,
                )
                proto_shape = self.proto_layer.components.shape[1:]
-                self.hparams.initialized_proto_shape = proto_shape
+                self.hparams["initialized_proto_shape"] = proto_shape
            else:
                # when restoring a checkpointed model
                self.proto_layer = LabeledComponents(
                    distribution=distribution,
                    components_initializer=ZerosCompInitializer(
-                        self.hparams.initialized_proto_shape),
+                        self.hparams["initialized_proto_shape"]),
                )
        self.competition_layer = WTAC()

@@ -154,7 +183,7 @@ class SupervisedPrototypeModel(PrototypeModel):
        distances = self.compute_distances(x)
        _, plabels = self.proto_layer()
        winning = stratified_min_pooling(distances, plabels)
-        y_pred = torch.nn.functional.softmin(winning, dim=1)
+        y_pred = F.softmin(winning, dim=1)
        return y_pred

    def predict_from_distances(self, distances):
@@ -188,33 +217,3 @@ class SupervisedPrototypeModel(PrototypeModel):
        accuracy = torchmetrics.functional.accuracy(preds.int(), targets.int())

        self.log("test_acc", accuracy)
-
-
-class ProtoTorchMixin(object):
-    """All mixins are ProtoTorchMixins."""
-
-
-class NonGradientMixin(ProtoTorchMixin):
-    """Mixin for custom non-gradient optimization."""
-
-    def __init__(self, *args, **kwargs):
-        super().__init__(*args, **kwargs)
-        self.automatic_optimization = False
-
-    def training_step(self, train_batch, batch_idx, optimizer_idx=None):
-        raise NotImplementedError
-
-
-class ImagePrototypesMixin(ProtoTorchMixin):
-    """Mixin for models with image prototypes."""
-
-    def on_train_batch_end(self, outputs, batch, batch_idx, dataloader_idx):
-        """Constrain the components to the range [0, 1] by clamping after updates."""
-        self.proto_layer.components.data.clamp_(0.0, 1.0)
-
-    def get_prototype_grid(self, num_columns=2, return_channels_last=True):
-        from torchvision.utils import make_grid
-        grid = make_grid(self.components, nrow=num_columns)
-        if return_channels_last:
-            grid = grid.permute((1, 2, 0))
-        return grid.cpu()
--- a/prototorch/models/callbacks.py
+++ b/prototorch/models/callbacks.py
@@ -1,25 +1,30 @@
 """Lightning Callbacks."""

 import logging
+from typing import TYPE_CHECKING

 import pytorch_lightning as pl
 import torch
+from prototorch.core.initializers import LiteralCompInitializer

-from ..core.components import Components
-from ..core.initializers import LiteralCompInitializer
 from .extras import ConnectionTopology

+if TYPE_CHECKING:
+    from prototorch.models import GLVQ, GrowingNeuralGas
+

 class PruneLoserPrototypes(pl.Callback):

-    def __init__(self,
-                 threshold=0.01,
-                 idle_epochs=10,
-                 prune_quota_per_epoch=-1,
-                 frequency=1,
-                 replace=False,
-                 prototypes_initializer=None,
-                 verbose=False):
+    def __init__(
+        self,
+        threshold=0.01,
+        idle_epochs=10,
+        prune_quota_per_epoch=-1,
+        frequency=1,
+        replace=False,
+        prototypes_initializer=None,
+        verbose=False,
+    ):
        self.threshold = threshold  # minimum win ratio
        self.idle_epochs = idle_epochs  # epochs to wait before pruning
        self.prune_quota_per_epoch = prune_quota_per_epoch
@@ -28,42 +33,44 @@ class PruneLoserPrototypes(pl.Callback):
        self.verbose = verbose
        self.prototypes_initializer = prototypes_initializer

-    def on_epoch_end(self, trainer, pl_module):
+    def on_train_epoch_end(self, trainer, pl_module: "GLVQ"):
        if (trainer.current_epoch + 1) < self.idle_epochs:
            return None
        if (trainer.current_epoch + 1) % self.frequency:
            return None

        ratios = pl_module.prototype_win_ratios.mean(dim=0)
-        to_prune = torch.arange(len(ratios))[ratios < self.threshold]
-        to_prune = to_prune.tolist()
+        to_prune_tensor = torch.arange(len(ratios))[ratios < self.threshold]
+        to_prune = to_prune_tensor.tolist()
        prune_labels = pl_module.prototype_labels[to_prune]
        if self.prune_quota_per_epoch > 0:
            to_prune = to_prune[:self.prune_quota_per_epoch]
            prune_labels = prune_labels[:self.prune_quota_per_epoch]

        if len(to_prune) > 0:
-            if self.verbose:
-                print(f"\nPrototype win ratios: {ratios}")
-                print(f"Pruning prototypes at: {to_prune}")
-                print(f"Corresponding labels are: {prune_labels.tolist()}")
+            logging.debug(f"\nPrototype win ratios: {ratios}")
+            logging.debug(f"Pruning prototypes at: {to_prune}")
+            logging.debug(f"Corresponding labels are: {prune_labels.tolist()}")
+
            cur_num_protos = pl_module.num_prototypes
            pl_module.remove_prototypes(indices=to_prune)
+
            if self.replace:
                labels, counts = torch.unique(prune_labels,
                                              sorted=True,
                                              return_counts=True)
                distribution = dict(zip(labels.tolist(), counts.tolist()))
-                if self.verbose:
-                    print(f"Re-adding pruned prototypes...")
-                    print(f"distribution={distribution}")
+
+                logging.info(f"Re-adding pruned prototypes...")
+                logging.debug(f"distribution={distribution}")
+
                pl_module.add_prototypes(
                    distribution=distribution,
                    components_initializer=self.prototypes_initializer)
            new_num_protos = pl_module.num_prototypes
-            if self.verbose:
-                print(f"`num_prototypes` changed from {cur_num_protos} "
-                      f"to {new_num_protos}.")
+
+            logging.info(f"`num_prototypes` changed from {cur_num_protos} "
+                         f"to {new_num_protos}.")
        return True


@@ -74,11 +81,11 @@ class PrototypeConvergence(pl.Callback):
        self.idle_epochs = idle_epochs  # epochs to wait
        self.verbose = verbose

-    def on_epoch_end(self, trainer, pl_module):
+    def on_train_epoch_end(self, trainer, pl_module):
        if (trainer.current_epoch + 1) < self.idle_epochs:
            return None
-        if self.verbose:
-            print("Stopping...")
+
+        logging.info("Stopping...")
        # TODO
        return True

@@ -96,12 +103,16 @@ class GNGCallback(pl.Callback):
        self.reduction = reduction
        self.freq = freq

-    def on_epoch_end(self, trainer: pl.Trainer, pl_module):
+    def on_train_epoch_end(
+        self,
+        trainer: pl.Trainer,
+        pl_module: "GrowingNeuralGas",
+    ):
        if (trainer.current_epoch + 1) % self.freq == 0:
            # Get information
            errors = pl_module.errors
            topology: ConnectionTopology = pl_module.topology_layer
-            components: Components = pl_module.proto_layer.components
+            components = pl_module.proto_layer.components

            # Insertion point
            worst = torch.argmax(errors)
@@ -121,8 +132,9 @@ class GNGCallback(pl.Callback):

            # Add component
            pl_module.proto_layer.add_components(
-                None,
-                initializer=LiteralCompInitializer(new_component.unsqueeze(0)))
+                1,
+                initializer=LiteralCompInitializer(new_component.unsqueeze(0)),
+            )

            # Adjust Topology
            topology.add_prototype()
--- a/prototorch/models/cbc.py
+++ b/prototorch/models/cbc.py
@@ -1,18 +1,20 @@
 import torch
+import torch.nn.functional as F
 import torchmetrics
+from prototorch.core.competitions import CBCC
+from prototorch.core.components import ReasoningComponents
+from prototorch.core.initializers import RandomReasoningsInitializer
+from prototorch.core.losses import MarginLoss
+from prototorch.core.similarities import euclidean_similarity
+from prototorch.nn.wrappers import LambdaLayer

-from ..core.competitions import CBCC
-from ..core.components import ReasoningComponents
-from ..core.initializers import RandomReasoningsInitializer
-from ..core.losses import MarginLoss
-from ..core.similarities import euclidean_similarity
-from ..nn.wrappers import LambdaLayer
-from .abstract import ImagePrototypesMixin
 from .glvq import SiameseGLVQ
+from .mixins import ImagePrototypesMixin


 class CBC(SiameseGLVQ):
    """Classification-By-Components."""
+    proto_layer: ReasoningComponents

    def __init__(self, hparams, **kwargs):
        super().__init__(hparams, skip_proto_layer=True, **kwargs)
@@ -22,7 +24,7 @@ class CBC(SiameseGLVQ):
        reasonings_initializer = kwargs.get("reasonings_initializer",
                                            RandomReasoningsInitializer())
        self.components_layer = ReasoningComponents(
-            self.hparams.distribution,
+            self.hparams["distribution"],
            components_initializer=components_initializer,
            reasonings_initializer=reasonings_initializer,
        )
@@ -32,7 +34,7 @@ class CBC(SiameseGLVQ):
        # Namespace hook
        self.proto_layer = self.components_layer

-        self.loss = MarginLoss(self.hparams.margin)
+        self.loss = MarginLoss(self.hparams["margin"])

    def forward(self, x):
        components, reasonings = self.components_layer()
@@ -48,7 +50,7 @@ class CBC(SiameseGLVQ):
        x, y = batch
        y_pred = self(x)
        num_classes = self.num_classes
-        y_true = torch.nn.functional.one_hot(y.long(), num_classes=num_classes)
+        y_true = F.one_hot(y.long(), num_classes=num_classes)
        loss = self.loss(y_pred, y_true).mean()
        return y_pred, loss

--- a/prototorch/models/extras.py
+++ b/prototorch/models/extras.py
@@ -5,8 +5,7 @@ Modules not yet available in prototorch go here temporarily.
 """

 import torch
-
-from ..core.similarities import gaussian
+from prototorch.core.similarities import gaussian


 def rank_scaled_gaussian(distances, lambd):
--- a/prototorch/models/glvq.py
+++ b/prototorch/models/glvq.py
@@ -1,24 +1,25 @@
 """Models based on the GLVQ framework."""

 import torch
-from torch.nn.parameter import Parameter
-
-from ..core.competitions import wtac
-from ..core.distances import (
+from prototorch.core.competitions import wtac
+from prototorch.core.distances import (
    lomega_distance,
    omega_distance,
    squared_euclidean_distance,
 )
-from ..core.initializers import EyeLinearTransformInitializer
-from ..core.losses import (
+from prototorch.core.initializers import EyeLinearTransformInitializer
+from prototorch.core.losses import (
    GLVQLoss,
    lvq1_loss,
    lvq21_loss,
 )
-from ..core.transforms import LinearTransform
-from ..nn.wrappers import LambdaLayer, LossLayer
-from .abstract import ImagePrototypesMixin, SupervisedPrototypeModel
+from prototorch.core.transforms import LinearTransform
+from prototorch.nn.wrappers import LambdaLayer, LossLayer
+from torch.nn.parameter import Parameter
+
+from .abstract import SupervisedPrototypeModel
 from .extras import ltangent_distance, orthogonalization
+from .mixins import ImagePrototypesMixin


 class GLVQ(SupervisedPrototypeModel):
@@ -34,9 +35,9 @@ class GLVQ(SupervisedPrototypeModel):

        # Loss
        self.loss = GLVQLoss(
-            margin=self.hparams.margin,
-            transfer_fn=self.hparams.transfer_fn,
-            beta=self.hparams.transfer_beta,
+            margin=self.hparams["margin"],
+            transfer_fn=self.hparams["transfer_fn"],
+            beta=self.hparams["transfer_beta"],
        )

    # def on_save_checkpoint(self, checkpoint):
@@ -46,19 +47,24 @@ class GLVQ(SupervisedPrototypeModel):
    def initialize_prototype_win_ratios(self):
        self.register_buffer(
            "prototype_win_ratios",
-            torch.zeros(self.num_prototypes, device=self.device))
+            torch.zeros(self.num_prototypes, device=self.device),
+        )

-    def on_epoch_start(self):
+    def on_train_epoch_start(self):
        self.initialize_prototype_win_ratios()

    def log_prototype_win_ratios(self, distances):
        batch_size = len(distances)
-        prototype_wc = torch.zeros(self.num_prototypes,
-                                   dtype=torch.long,
-                                   device=self.device)
-        wi, wc = torch.unique(distances.min(dim=-1).indices,
-                              sorted=True,
-                              return_counts=True)
+        prototype_wc = torch.zeros(
+            self.num_prototypes,
+            dtype=torch.long,
+            device=self.device,
+        )
+        wi, wc = torch.unique(
+            distances.min(dim=-1).indices,
+            sorted=True,
+            return_counts=True,
+        )
        prototype_wc[wi] = wc
        prototype_wr = prototype_wc / batch_size
        self.prototype_win_ratios = torch.vstack([
@@ -81,14 +87,12 @@ class GLVQ(SupervisedPrototypeModel):
        return train_loss

    def validation_step(self, batch, batch_idx):
-        # `model.eval()` and `torch.no_grad()` handled by pl
        out, val_loss = self.shared_step(batch, batch_idx)
        self.log("val_loss", val_loss)
        self.log_acc(out, batch[-1], tag="val_acc")
        return val_loss

    def test_step(self, batch, batch_idx):
-        # `model.eval()` and `torch.no_grad()` handled by pl
        out, test_loss = self.shared_step(batch, batch_idx)
        self.log_acc(out, batch[-1], tag="test_acc")
        return test_loss
@@ -99,10 +103,6 @@ class GLVQ(SupervisedPrototypeModel):
            test_loss += batch_loss.item()
        self.log("test_loss", test_loss)

-    # TODO
-    # def predict_step(self, batch, batch_idx, dataloader_idx=None):
-    #     pass
-

 class SiameseGLVQ(GLVQ):
    """GLVQ in a Siamese setting.
@@ -113,23 +113,27 @@ class SiameseGLVQ(GLVQ):

    """

-    def __init__(self,
-                 hparams,
-                 backbone=torch.nn.Identity(),
-                 both_path_gradients=False,
-                 **kwargs):
+    def __init__(
+            self,
+            hparams,
+            backbone=torch.nn.Identity(),
+            both_path_gradients=False,
+            **kwargs,
+    ):
        distance_fn = kwargs.pop("distance_fn", squared_euclidean_distance)
        super().__init__(hparams, distance_fn=distance_fn, **kwargs)
        self.backbone = backbone
        self.both_path_gradients = both_path_gradients

    def configure_optimizers(self):
-        proto_opt = self.optimizer(self.proto_layer.parameters(),
-                                   lr=self.hparams.proto_lr)
+        proto_opt = self.optimizer(
+            self.proto_layer.parameters(),
+            lr=self.hparams["proto_lr"],
+        )
        # Only add a backbone optimizer if backbone has trainable parameters
        bb_params = list(self.backbone.parameters())
        if (bb_params):
-            bb_opt = self.optimizer(bb_params, lr=self.hparams.bb_lr)
+            bb_opt = self.optimizer(bb_params, lr=self.hparams["bb_lr"])
            optimizers = [proto_opt, bb_opt]
        else:
            optimizers = [proto_opt]
@@ -199,12 +203,13 @@ class GRLVQ(SiameseGLVQ):
    TODO Make a RelevanceLayer. `bb_lr` is ignored otherwise.

    """
+    _relevances: torch.Tensor

    def __init__(self, hparams, **kwargs):
        super().__init__(hparams, **kwargs)

        # Additional parameters
-        relevances = torch.ones(self.hparams.input_dim, device=self.device)
+        relevances = torch.ones(self.hparams["input_dim"], device=self.device)
        self.register_parameter("_relevances", Parameter(relevances))

        # Override the backbone
@@ -233,8 +238,8 @@ class SiameseGMLVQ(SiameseGLVQ):
        omega_initializer = kwargs.get("omega_initializer",
                                       EyeLinearTransformInitializer())
        self.backbone = LinearTransform(
-            self.hparams.input_dim,
-            self.hparams.latent_dim,
+            self.hparams["input_dim"],
+            self.hparams["latent_dim"],
            initializer=omega_initializer,
        )

@@ -244,7 +249,7 @@ class SiameseGMLVQ(SiameseGLVQ):

    @property
    def lambda_matrix(self):
-        omega = self.backbone.weight  # (input_dim, latent_dim)
+        omega = self.backbone.weights  # (input_dim, latent_dim)
        lam = omega @ omega.T
        return lam.detach().cpu()

@@ -257,18 +262,27 @@ class GMLVQ(GLVQ):

    """

+    # Parameters
+    _omega: torch.Tensor
+
    def __init__(self, hparams, **kwargs):
        distance_fn = kwargs.pop("distance_fn", omega_distance)
        super().__init__(hparams, distance_fn=distance_fn, **kwargs)

        # Additional parameters
-        omega_initializer = kwargs.get("omega_initializer",
-                                       EyeLinearTransformInitializer())
-        omega = omega_initializer.generate(self.hparams.input_dim,
-                                           self.hparams.latent_dim)
+        omega_initializer = kwargs.get(
+            "omega_initializer",
+            EyeLinearTransformInitializer(),
+        )
+        omega = omega_initializer.generate(
+            self.hparams["input_dim"],
+            self.hparams["latent_dim"],
+        )
        self.register_parameter("_omega", Parameter(omega))
-        self.backbone = LambdaLayer(lambda x: x @ self._omega,
-                                    name="omega matrix")
+        self.backbone = LambdaLayer(
+            lambda x: x @ self._omega,
+            name="omega matrix",
+        )

    @property
    def omega_matrix(self):
@@ -299,8 +313,8 @@ class LGMLVQ(GMLVQ):
        # Re-register `_omega` to override the one from the super class.
        omega = torch.randn(
            self.num_prototypes,
-            self.hparams.input_dim,
-            self.hparams.latent_dim,
+            self.hparams["input_dim"],
+            self.hparams["latent_dim"],
            device=self.device,
        )
        self.register_parameter("_omega", Parameter(omega))
@@ -316,23 +330,27 @@ class GTLVQ(LGMLVQ):
        omega_initializer = kwargs.get("omega_initializer")

        if omega_initializer is not None:
-            subspace = omega_initializer.generate(self.hparams.input_dim,
-                                                  self.hparams.latent_dim)
-            omega = torch.repeat_interleave(subspace.unsqueeze(0),
-                                            self.num_prototypes,
-                                            dim=0)
+            subspace = omega_initializer.generate(
+                self.hparams["input_dim"],
+                self.hparams["latent_dim"],
+            )
+            omega = torch.repeat_interleave(
+                subspace.unsqueeze(0),
+                self.num_prototypes,
+                dim=0,
+            )
        else:
            omega = torch.rand(
                self.num_prototypes,
-                self.hparams.input_dim,
-                self.hparams.latent_dim,
+                self.hparams["input_dim"],
+                self.hparams["latent_dim"],
                device=self.device,
            )

        # Re-register `_omega` to override the one from the super class.
        self.register_parameter("_omega", Parameter(omega))

-    def on_train_batch_end(self, outputs, batch, batch_idx, dataloader_idx):
+    def on_train_batch_end(self, outputs, batch, batch_idx):
        with torch.no_grad():
            self._omega.copy_(orthogonalization(self._omega))

@@ -389,7 +407,7 @@ class ImageGTLVQ(ImagePrototypesMixin, GTLVQ):

    """

-    def on_train_batch_end(self, outputs, batch, batch_idx, dataloader_idx):
+    def on_train_batch_end(self, outputs, batch, batch_idx):
        """Constrain the components to the range [0, 1] by clamping after updates."""
        self.proto_layer.components.data.clamp_(0.0, 1.0)
        with torch.no_grad():
--- a/prototorch/models/knn.py
+++ b/prototorch/models/knn.py
@@ -2,13 +2,14 @@

 import warnings

-from ..core.competitions import KNNC
-from ..core.components import LabeledComponents
-from ..core.initializers import (
+from prototorch.core.competitions import KNNC
+from prototorch.core.components import LabeledComponents
+from prototorch.core.initializers import (
    LiteralCompInitializer,
    LiteralLabelsInitializer,
 )
-from ..utils.utils import parse_data_arg
+from prototorch.utils.utils import parse_data_arg
+
 from .abstract import SupervisedPrototypeModel


@@ -36,10 +37,7 @@ class KNN(SupervisedPrototypeModel):
    def training_step(self, train_batch, batch_idx, optimizer_idx=None):
        return 1  # skip training step

-    def on_train_batch_start(self,
-                             train_batch,
-                             batch_idx,
-                             dataloader_idx=None):
+    def on_train_batch_start(self, train_batch, batch_idx):
        warnings.warn("k-NN has no training, skipping!")
        return -1

--- a/prototorch/models/lvq.py
+++ b/prototorch/models/lvq.py
@@ -1,17 +1,21 @@
 """LVQ models that are optimized using non-gradient methods."""

-from ..core.losses import _get_dp_dm
-from ..nn.activations import get_activation
-from ..nn.wrappers import LambdaLayer
-from .abstract import NonGradientMixin
+import logging
+from collections import OrderedDict
+
+from prototorch.core.losses import _get_dp_dm
+from prototorch.nn.activations import get_activation
+from prototorch.nn.wrappers import LambdaLayer
+
 from .glvq import GLVQ
+from .mixins import NonGradientMixin


 class LVQ1(NonGradientMixin, GLVQ):
    """Learning Vector Quantization 1."""

    def training_step(self, train_batch, batch_idx, optimizer_idx=None):
-        protos, plables = self.proto_layer()
+        protos, plabels = self.proto_layer()
        x, y = train_batch
        dis = self.compute_distances(x)
        # TODO Vectorized implementation
@@ -25,12 +29,14 @@ class LVQ1(NonGradientMixin, GLVQ):
            else:
                shift = protos[w] - xi
            updated_protos = protos + 0.0
-            updated_protos[w] = protos[w] + (self.hparams.lr * shift)
-            self.proto_layer.load_state_dict({"_components": updated_protos},
-                                             strict=False)
+            updated_protos[w] = protos[w] + (self.hparams["lr"] * shift)
+            self.proto_layer.load_state_dict(
+                OrderedDict(_components=updated_protos),
+                strict=False,
+            )

-        print(f"dis={dis}")
-        print(f"y={y}")
+        logging.debug(f"dis={dis}")
+        logging.debug(f"y={y}")
        # Logging
        self.log_acc(dis, y, tag="train_acc")

@@ -55,10 +61,12 @@ class LVQ21(NonGradientMixin, GLVQ):
            shiftp = xi - protos[wp]
            shiftn = protos[wn] - xi
            updated_protos = protos + 0.0
-            updated_protos[wp] = protos[wp] + (self.hparams.lr * shiftp)
-            updated_protos[wn] = protos[wn] + (self.hparams.lr * shiftn)
-            self.proto_layer.load_state_dict({"_components": updated_protos},
-                                             strict=False)
+            updated_protos[wp] = protos[wp] + (self.hparams["lr"] * shiftp)
+            updated_protos[wn] = protos[wn] + (self.hparams["lr"] * shiftn)
+            self.proto_layer.load_state_dict(
+                OrderedDict(_components=updated_protos),
+                strict=False,
+            )

        # Logging
        self.log_acc(dis, y, tag="train_acc")
@@ -73,19 +81,21 @@ class MedianLVQ(NonGradientMixin, GLVQ):

    """

-    def __init__(self, hparams, verbose=True, **kwargs):
-        self.verbose = verbose
+    def __init__(self, hparams, **kwargs):
        super().__init__(hparams, **kwargs)

        self.transfer_layer = LambdaLayer(
-            get_activation(self.hparams.transfer_fn))
+            get_activation(self.hparams["transfer_fn"]))

    def _f(self, x, y, protos, plabels):
        d = self.distance_layer(x, protos)
-        dp, dm = _get_dp_dm(d, y, plabels)
+        dp, dm = _get_dp_dm(d, y, plabels, with_indices=False)
        mu = (dp - dm) / (dp + dm)
-        invmu = -1.0 * mu
-        f = self.transfer_layer(invmu, beta=self.hparams.transfer_beta) + 1.0
+        negative_mu = -1.0 * mu
+        f = self.transfer_layer(
+            negative_mu,
+            beta=self.hparams["transfer_beta"],
+        ) + 1.0
        return f

    def expectation(self, x, y, protos, plabels):
@@ -115,10 +125,11 @@ class MedianLVQ(NonGradientMixin, GLVQ):
                _protos[i] = xk
                _lower_bound = self.lower_bound(x, y, _protos, plabels, gamma)
                if _lower_bound > lower_bound:
-                    if self.verbose:
-                        print(f"Updating prototype {i} to data {k}...")
-                    self.proto_layer.load_state_dict({"_components": _protos},
-                                                     strict=False)
+                    logging.debug(f"Updating prototype {i} to data {k}...")
+                    self.proto_layer.load_state_dict(
+                        OrderedDict(_components=_protos),
+                        strict=False,
+                    )
                    break

        # Logging
--- a/prototorch/models/mixins.py
+++ b/prototorch/models/mixins.py
@@ -0,0 +1,35 @@
+import pytorch_lightning as pl
+import torch
+from prototorch.core.components import Components
+
+
+class ProtoTorchMixin(pl.LightningModule):
+    """All mixins are ProtoTorchMixins."""
+
+
+class NonGradientMixin(ProtoTorchMixin):
+    """Mixin for custom non-gradient optimization."""
+
+    def __init__(self, *args, **kwargs):
+        super().__init__(*args, **kwargs)
+        self.automatic_optimization = False
+
+    def training_step(self, train_batch, batch_idx, optimizer_idx=None):
+        raise NotImplementedError
+
+
+class ImagePrototypesMixin(ProtoTorchMixin):
+    """Mixin for models with image prototypes."""
+    proto_layer: Components
+    components: torch.Tensor
+
+    def on_train_batch_end(self, outputs, batch, batch_idx):
+        """Constrain the components to the range [0, 1] by clamping after updates."""
+        self.proto_layer.components.data.clamp_(0.0, 1.0)
+
+    def get_prototype_grid(self, num_columns=2, return_channels_last=True):
+        from torchvision.utils import make_grid
+        grid = make_grid(self.components, nrow=num_columns)
+        if return_channels_last:
+            grid = grid.permute((1, 2, 0))
+        return grid.cpu()
--- a/prototorch/models/probabilistic.py
+++ b/prototorch/models/probabilistic.py
@@ -1,10 +1,13 @@
 """Probabilistic GLVQ methods"""

 import torch
+from prototorch.core.losses import nllr_loss, rslvq_loss
+from prototorch.core.pooling import (
+    stratified_min_pooling,
+    stratified_sum_pooling,
+)
+from prototorch.nn.wrappers import LossLayer

-from ..core.losses import nllr_loss, rslvq_loss
-from ..core.pooling import stratified_min_pooling, stratified_sum_pooling
-from ..nn.wrappers import LambdaLayer, LossLayer
 from .extras import GaussianPrior, RankScaledGaussianPrior
 from .glvq import GLVQ, SiameseGMLVQ

@@ -34,17 +37,24 @@ class ProbabilisticLVQ(GLVQ):
    def __init__(self, hparams, rejection_confidence=0.0, **kwargs):
        super().__init__(hparams, **kwargs)

-        self.conditional_distribution = None
        self.rejection_confidence = rejection_confidence
+        self._conditional_distribution = None

    def forward(self, x):
        distances = self.compute_distances(x)
+
        conditional = self.conditional_distribution(distances)
        prior = (1. / self.num_prototypes) * torch.ones(self.num_prototypes,
                                                        device=self.device)
        posterior = conditional * prior
+
        plabels = self.proto_layer._labels
-        y_pred = stratified_sum_pooling(posterior, plabels)
+        if isinstance(plabels, torch.LongTensor) or isinstance(
+                plabels, torch.cuda.LongTensor):  # type: ignore
+            y_pred = stratified_sum_pooling(posterior, plabels)  # type: ignore
+        else:
+            raise ValueError("Labels must be LongTensor.")
+
        return y_pred

    def predict(self, x):
@@ -61,6 +71,12 @@ class ProbabilisticLVQ(GLVQ):
        loss = batch_loss.sum()
        return loss

+    def conditional_distribution(self, distances):
+        """Conditional distribution of distances."""
+        if self._conditional_distribution is None:
+            raise ValueError("Conditional distribution is not set.")
+        return self._conditional_distribution(distances)
+

 class SLVQ(ProbabilisticLVQ):
    """Soft Learning Vector Quantization."""
@@ -72,7 +88,7 @@ class SLVQ(ProbabilisticLVQ):
        self.hparams.setdefault("variance", 1.0)
        variance = self.hparams.get("variance")

-        self.conditional_distribution = GaussianPrior(variance)
+        self._conditional_distribution = GaussianPrior(variance)
        self.loss = LossLayer(nllr_loss)


@@ -86,7 +102,7 @@ class RSLVQ(ProbabilisticLVQ):
        self.hparams.setdefault("variance", 1.0)
        variance = self.hparams.get("variance")

-        self.conditional_distribution = GaussianPrior(variance)
+        self._conditional_distribution = GaussianPrior(variance)
        self.loss = LossLayer(rslvq_loss)


--- a/prototorch/models/unsupervised.py
+++ b/prototorch/models/unsupervised.py
@@ -2,14 +2,14 @@

 import numpy as np
 import torch
+from prototorch.core.competitions import wtac
+from prototorch.core.distances import squared_euclidean_distance
+from prototorch.core.losses import NeuralGasEnergy

-from ..core.competitions import wtac
-from ..core.distances import squared_euclidean_distance
-from ..core.losses import NeuralGasEnergy
-from ..nn.wrappers import LambdaLayer
-from .abstract import NonGradientMixin, UnsupervisedPrototypeModel
+from .abstract import UnsupervisedPrototypeModel
 from .callbacks import GNGCallback
 from .extras import ConnectionTopology
+from .mixins import NonGradientMixin


 class KohonenSOM(NonGradientMixin, UnsupervisedPrototypeModel):
@@ -18,6 +18,7 @@ class KohonenSOM(NonGradientMixin, UnsupervisedPrototypeModel):
    TODO Allow non-2D grids

    """
+    _grid: torch.Tensor

    def __init__(self, hparams, **kwargs):
        h, w = hparams.get("shape")
@@ -93,10 +94,10 @@ class NeuralGas(UnsupervisedPrototypeModel):
        self.hparams.setdefault("age_limit", 10)
        self.hparams.setdefault("lm", 1)

-        self.energy_layer = NeuralGasEnergy(lm=self.hparams.lm)
+        self.energy_layer = NeuralGasEnergy(lm=self.hparams["lm"])
        self.topology_layer = ConnectionTopology(
-            agelimit=self.hparams.age_limit,
-            num_prototypes=self.hparams.num_prototypes,
+            agelimit=self.hparams["age_limit"],
+            num_prototypes=self.hparams["num_prototypes"],
        )

    def training_step(self, train_batch, batch_idx):
@@ -109,12 +110,9 @@ class NeuralGas(UnsupervisedPrototypeModel):
        self.log("loss", loss)
        return loss

-    # def training_epoch_end(self, training_step_outputs):
-    #     print(f"{self.trainer.lr_schedulers}")
-    #     print(f"{self.trainer.lr_schedulers[0]['scheduler'].optimizer}")
-

 class GrowingNeuralGas(NeuralGas):
+    errors: torch.Tensor

    def __init__(self, hparams, **kwargs):
        super().__init__(hparams, **kwargs)
@@ -124,7 +122,10 @@ class GrowingNeuralGas(NeuralGas):
        self.hparams.setdefault("insert_reduction", 0.1)
        self.hparams.setdefault("insert_freq", 10)

-        errors = torch.zeros(self.hparams.num_prototypes, device=self.device)
+        errors = torch.zeros(
+            self.hparams["num_prototypes"],
+            device=self.device,
+        )
        self.register_buffer("errors", errors)

    def training_step(self, train_batch, _batch_idx):
@@ -139,7 +140,7 @@ class GrowingNeuralGas(NeuralGas):
        dp = d * mask

        self.errors += torch.sum(dp * dp)
-        self.errors *= self.hparams.step_reduction
+        self.errors *= self.hparams["step_reduction"]

        self.topology_layer(d)
        self.log("loss", loss)
@@ -148,7 +149,7 @@ class GrowingNeuralGas(NeuralGas):
    def configure_callbacks(self):
        return [
            GNGCallback(
-                reduction=self.hparams.insert_reduction,
-                freq=self.hparams.insert_freq,
+                reduction=self.hparams["insert_reduction"],
+                freq=self.hparams["insert_freq"],
            )
        ]
--- a/prototorch/models/vis.py
+++ b/prototorch/models/vis.py
@@ -1,15 +1,19 @@
 """Visualization Callbacks."""

+import os
+import warnings
+from typing import Sized
+
 import numpy as np
 import pytorch_lightning as pl
 import torch
 import torchvision
 from matplotlib import pyplot as plt
+from prototorch.utils.colors import get_colors, get_legend_handles
+from prototorch.utils.utils import mesh2d
+from pytorch_lightning.loggers import TensorBoardLogger
 from torch.utils.data import DataLoader, Dataset

-from ..utils.colors import get_colors, get_legend_handles
-from ..utils.utils import mesh2d
-

 class Vis2DAbstract(pl.Callback):

@@ -29,13 +33,22 @@ class Vis2DAbstract(pl.Callback):
                 tensorboard=False,
                 show_last_only=False,
                 pause_time=0.1,
+                 save=False,
+                 save_dir="./img",
+                 fig_size=(5, 4),
+                 dpi=500,
                 block=False):
        super().__init__()

        if data:
            if isinstance(data, Dataset):
-                x, y = next(iter(DataLoader(data, batch_size=len(data))))
-            elif isinstance(data, torch.utils.data.DataLoader):
+                if isinstance(data, Sized):
+                    x, y = next(iter(DataLoader(data, batch_size=len(data))))
+                else:
+                    # TODO: Add support for non-sized datasets
+                    raise NotImplementedError(
+                        "Data must be a dataset with a __len__ method.")
+            elif isinstance(data, DataLoader):
                x = torch.tensor([])
                y = torch.tensor([])
                for x_b, y_b in data:
@@ -67,8 +80,16 @@ class Vis2DAbstract(pl.Callback):
        self.tensorboard = tensorboard
        self.show_last_only = show_last_only
        self.pause_time = pause_time
+        self.save = save
+        self.save_dir = save_dir
+        self.fig_size = fig_size
+        self.dpi = dpi
        self.block = block

+        if save:
+            if not os.path.exists(save_dir):
+                os.makedirs(save_dir)
+
    def precheck(self, trainer):
        if self.show_last_only:
            if trainer.current_epoch != trainer.max_epochs - 1:
@@ -117,13 +138,18 @@ class Vis2DAbstract(pl.Callback):
    def log_and_display(self, trainer, pl_module):
        if self.tensorboard:
            self.add_to_tensorboard(trainer, pl_module)
+        if self.save:
+            plt.tight_layout()
+            self.fig.set_size_inches(*self.fig_size, forward=False)
+            plt.savefig(f"{self.save_dir}/{trainer.current_epoch}.png",
+                        dpi=self.dpi)
        if self.show:
            if not self.block:
                plt.pause(self.pause_time)
            else:
                plt.show(block=self.block)

-    def on_epoch_end(self, trainer, pl_module):
+    def on_train_epoch_end(self, trainer, pl_module):
        if not self.precheck(trainer):
            return True
        self.visualize(pl_module)
@@ -132,6 +158,9 @@ class Vis2DAbstract(pl.Callback):
    def on_train_end(self, trainer, pl_module):
        plt.close()

+    def visualize(self, pl_module):
+        raise NotImplementedError
+

 class VisGLVQ2D(Vis2DAbstract):

@@ -292,30 +321,45 @@ class VisImgComp(Vis2DAbstract):
        self.add_embedding = add_embedding
        self.embedding_data = embedding_data

-    def on_train_start(self, trainer, pl_module):
-        tb = pl_module.logger.experiment
-        if self.add_embedding:
-            ind = np.random.choice(len(self.x_train),
-                                   size=self.embedding_data,
-                                   replace=False)
-            data = self.x_train[ind]
-            tb.add_embedding(data.view(len(ind), -1),
-                             label_img=data,
-                             global_step=None,
-                             tag="Data Embedding",
-                             metadata=self.y_train[ind],
-                             metadata_header=None)
+    def on_train_start(self, _, pl_module):
+        if isinstance(pl_module.logger, TensorBoardLogger):
+            tb = pl_module.logger.experiment

-        if self.random_data:
-            ind = np.random.choice(len(self.x_train),
-                                   size=self.random_data,
-                                   replace=False)
-            data = self.x_train[ind]
-            grid = torchvision.utils.make_grid(data, nrow=self.num_columns)
-            tb.add_image(tag="Data",
-                         img_tensor=grid,
-                         global_step=None,
-                         dataformats=self.dataformats)
+            # Add embedding
+            if self.add_embedding:
+                if self.x_train is not None and self.y_train is not None:
+                    ind = np.random.choice(len(self.x_train),
+                                           size=self.embedding_data,
+                                           replace=False)
+                    data = self.x_train[ind]
+                    tb.add_embedding(data.view(len(ind), -1),
+                                     label_img=data,
+                                     global_step=None,
+                                     tag="Data Embedding",
+                                     metadata=self.y_train[ind],
+                                     metadata_header=None)
+                else:
+                    raise ValueError("No data for add embedding flag")
+
+            # Random Data
+            if self.random_data:
+                if self.x_train is not None:
+                    ind = np.random.choice(len(self.x_train),
+                                           size=self.random_data,
+                                           replace=False)
+                    data = self.x_train[ind]
+                    grid = torchvision.utils.make_grid(data,
+                                                       nrow=self.num_columns)
+                    tb.add_image(tag="Data",
+                                 img_tensor=grid,
+                                 global_step=None,
+                                 dataformats=self.dataformats)
+                else:
+                    raise ValueError("No data for random data flag")
+
+        else:
+            warnings.warn(
+                f"TensorBoardLogger is required, got {type(pl_module.logger)}")

    def add_to_tensorboard(self, trainer, pl_module):
        tb = pl_module.logger.experiment
--- a/prototorch/y/init.py
+++ b/prototorch/y/init.py
@@ -0,0 +1,23 @@
+from .architectures.base import BaseYArchitecture
+from .architectures.comparison import (
+    OmegaComparisonMixin,
+    SimpleComparisonMixin,
+)
+from .architectures.competition import WTACompetitionMixin
+from .architectures.components import SupervisedArchitecture
+from .architectures.loss import GLVQLossMixin
+from .architectures.optimization import (
+    MultipleLearningRateMixin,
+    SingleLearningRateMixin,
+)
+
+__all__ = [
+    'BaseYArchitecture',
+    "OmegaComparisonMixin",
+    "SimpleComparisonMixin",
+    "SingleLearningRateMixin",
+    "MultipleLearningRateMixin",
+    "SupervisedArchitecture",
+    "WTACompetitionMixin",
+    "GLVQLossMixin",
+]
--- a/prototorch/y/architectures/base.py
+++ b/prototorch/y/architectures/base.py
@@ -0,0 +1,212 @@
+"""
+Proto Y Architecture
+
+Network architecture for Component based Learning.
+"""
+from dataclasses import dataclass
+from typing import (
+    Dict,
+    Set,
+    Type,
+)
+
+import pytorch_lightning as pl
+import torch
+from torchmetrics import Metric
+from torchmetrics.classification.accuracy import Accuracy
+
+
+class BaseYArchitecture(pl.LightningModule):
+
+    @dataclass
+    class HyperParameters:
+        ...
+
+    registered_metrics: Dict[Type[Metric], Metric] = {}
+    registered_metric_names: Dict[Type[Metric], Set[str]] = {}
+
+    components_layer: torch.nn.Module
+
+    def __init__(self, hparams) -> None:
+        super().__init__()
+
+        # Common Steps
+        self.init_components(hparams)
+        self.init_latent(hparams)
+        self.init_comparison(hparams)
+        self.init_competition(hparams)
+
+        # Train Steps
+        self.init_loss(hparams)
+
+        # Inference Steps
+        self.init_inference(hparams)
+
+        # Initialize Model Metrics
+        self.init_model_metrics()
+
+    # internal API, called by models and callbacks
+    def register_torchmetric(
+        self,
+        name: str,
+        metric: Type[Metric],
+        **metric_kwargs,
+    ):
+        if metric not in self.registered_metrics:
+            self.registered_metrics[metric] = metric(**metric_kwargs)
+            self.registered_metric_names[metric] = {name}
+        else:
+            self.registered_metric_names[metric].add(name)
+
+    # external API
+    def get_competition(self, batch, components):
+        latent_batch, latent_components = self.latent(batch, components)
+        # TODO: => Latent Hook
+        comparison_tensor = self.comparison(latent_batch, latent_components)
+        # TODO: => Comparison Hook
+        return comparison_tensor
+
+    def forward(self, batch):
+        if isinstance(batch, torch.Tensor):
+            batch = (batch, None)
+        # TODO: manage different datatypes?
+        components = self.components_layer()
+        # TODO: => Component Hook
+        comparison_tensor = self.get_competition(batch, components)
+        # TODO: => Competition Hook
+        return self.inference(comparison_tensor, components)
+
+    def predict(self, batch):
+        """
+        Alias for forward
+        """
+        return self.forward(batch)
+
+    def forward_comparison(self, batch):
+        if isinstance(batch, torch.Tensor):
+            batch = (batch, None)
+        # TODO: manage different datatypes?
+        components = self.components_layer()
+        # TODO: => Component Hook
+        return self.get_competition(batch, components)
+
+    def loss_forward(self, batch):
+        # TODO: manage different datatypes?
+        components = self.components_layer()
+        # TODO: => Component Hook
+        comparison_tensor = self.get_competition(batch, components)
+        # TODO: => Competition Hook
+        return self.loss(comparison_tensor, batch, components)
+
+    # Empty Initialization
+    # TODO: Type hints
+    # TODO: Docs
+    def init_components(self, hparams: HyperParameters) -> None:
+        ...
+
+    def init_latent(self, hparams: HyperParameters) -> None:
+        ...
+
+    def init_comparison(self, hparams: HyperParameters) -> None:
+        ...
+
+    def init_competition(self, hparams: HyperParameters) -> None:
+        ...
+
+    def init_loss(self, hparams: HyperParameters) -> None:
+        ...
+
+    def init_inference(self, hparams: HyperParameters) -> None:
+        ...
+
+    def init_model_metrics(self) -> None:
+        self.register_torchmetric('accuracy', Accuracy)
+
+    # Empty Steps
+    # TODO: Type hints
+    def components(self):
+        """
+        This step has no input.
+
+        It returns the components.
+        """
+        raise NotImplementedError(
+            "The components step has no reasonable default.")
+
+    def latent(self, batch, components):
+        """
+        The latent step receives the data batch and the components.
+        It can transform both by an arbitrary function.
+
+        It returns the transformed batch and components, each of the same length as the original input.
+        """
+        return batch, components
+
+    def comparison(self, batch, components):
+        """
+        Takes a batch of size N and the component set of size M.
+
+        It returns an NxMxD tensor containing D (usually 1) pairwise comparison measures.
+        """
+        raise NotImplementedError(
+            "The comparison step has no reasonable default.")
+
+    def competition(self, comparison_measures, components):
+        """
+        Takes the tensor of comparison measures.
+
+        Assigns a competition vector to each class.
+        """
+        raise NotImplementedError(
+            "The competition step has no reasonable default.")
+
+    def loss(self, comparison_measures, batch, components):
+        """
+        Takes the tensor of competition measures.
+
+        Calculates a single loss value
+        """
+        raise NotImplementedError("The loss step has no reasonable default.")
+
+    def inference(self, comparison_measures, components):
+        """
+        Takes the tensor of competition measures.
+
+        Returns the inferred vector.
+        """
+        raise NotImplementedError(
+            "The inference step has no reasonable default.")
+
+    def update_metrics_step(self, batch):
+        x, y = batch
+
+        # Prediction Metrics
+        preds = self(x)
+        for metric in self.registered_metrics:
+            instance = self.registered_metrics[metric].to(self.device)
+            instance(y, preds)
+
+    def update_metrics_epoch(self):
+        for metric in self.registered_metrics:
+            instance = self.registered_metrics[metric].to(self.device)
+            value = instance.compute()
+
+            for name in self.registered_metric_names[metric]:
+                self.log(name, value)
+
+            instance.reset()
+
+    # Lightning Hooks
+    def training_step(self, batch, batch_idx, optimizer_idx=None):
+        self.update_metrics_step(batch)
+
+        return self.loss_forward(batch)
+
+    def training_epoch_end(self, outs) -> None:
+        self.update_metrics_epoch()
+
+    def validation_step(self, batch, batch_idx):
+        return self.loss_forward(batch)
+
+    def test_step(self, batch, batch_idx):
+        return self.loss_forward(batch)
--- a/prototorch/y/architectures/comparison.py
+++ b/prototorch/y/architectures/comparison.py
@@ -0,0 +1,112 @@
+from __future__ import annotations
+
+from dataclasses import dataclass, field
+from typing import Callable, Dict
+
+import torch
+from prototorch.core.distances import euclidean_distance
+from prototorch.core.initializers import (
+    AbstractLinearTransformInitializer,
+    EyeLinearTransformInitializer,
+)
+from prototorch.nn.wrappers import LambdaLayer
+from prototorch.y.architectures.base import BaseYArchitecture
+from torch import Tensor
+from torch.nn.parameter import Parameter
+
+
+class SimpleComparisonMixin(BaseYArchitecture):
+    """
+    Simple Comparison
+
+    A comparison layer that only uses the positions of the components and the batch for dissimilarity computation.
+    """
+
+    # HyperParameters
+    # ----------------------------------------------------------------------------------------------------
+    @dataclass
+    class HyperParameters(BaseYArchitecture.HyperParameters):
+        """
+        comparison_fn: The comparison / dissimilarity function to use. Default: euclidean_distance.
+        comparison_args: Keyword arguments for the comparison function. Default: {}.
+        """
+        comparison_fn: Callable = euclidean_distance
+        comparison_args: dict = field(default_factory=lambda: dict())
+
+        comparison_parameters: dict = field(default_factory=lambda: dict())
+
+    # Steps
+    # ----------------------------------------------------------------------------------------------------
+    def init_comparison(self, hparams: HyperParameters):
+        self.comparison_layer = LambdaLayer(
+            fn=hparams.comparison_fn,
+            **hparams.comparison_args,
+        )
+
+        self.comparison_kwargs: dict[str, Tensor] = dict()
+
+    def comparison(self, batch, components):
+        comp_tensor, _ = components
+        batch_tensor, _ = batch
+
+        comp_tensor = comp_tensor.unsqueeze(1)
+
+        distances = self.comparison_layer(
+            batch_tensor,
+            comp_tensor,
+            **self.comparison_kwargs,
+        )
+
+        return distances
+
+
+class OmegaComparisonMixin(SimpleComparisonMixin):
+    """
+    Omega Comparison
+
+    A comparison layer that uses the positions of the components and the batch for dissimilarity computation.
+    """
+
+    _omega: torch.Tensor
+
+    # HyperParameters
+    # ----------------------------------------------------------------------------------------------------
+    @dataclass
+    class HyperParameters(SimpleComparisonMixin.HyperParameters):
+        """
+        input_dim: Necessary Field: The dimensionality of the input.
+        latent_dim: The dimensionality of the latent space. Default: 2.
+        omega_initializer: The initializer to use for the omega matrix. Default: EyeLinearTransformInitializer.
+        """
+        input_dim: int | None = None
+        latent_dim: int = 2
+        omega_initializer: type[
+            AbstractLinearTransformInitializer] = EyeLinearTransformInitializer
+
+    # Steps
+    # ----------------------------------------------------------------------------------------------------
+    def init_comparison(self, hparams: HyperParameters) -> None:
+        super().init_comparison(hparams)
+
+        # Initialize the omega matrix
+        if hparams.input_dim is None:
+            raise ValueError("input_dim must be specified.")
+        else:
+            omega = hparams.omega_initializer().generate(
+                hparams.input_dim,
+                hparams.latent_dim,
+            )
+            self.register_parameter("_omega", Parameter(omega))
+            self.comparison_kwargs = dict(omega=self._omega)
+
+    # Properties
+    # ----------------------------------------------------------------------------------------------------
+    @property
+    def omega_matrix(self):
+        return self._omega.detach().cpu()
+
+    @property
+    def lambda_matrix(self):
+        omega = self._omega.detach()
+        lam = omega @ omega.T
+        return lam.detach().cpu()
--- a/prototorch/y/architectures/competition.py
+++ b/prototorch/y/architectures/competition.py
@@ -0,0 +1,29 @@
+from dataclasses import dataclass
+
+from prototorch.core.competitions import WTAC
+from prototorch.y.architectures.base import BaseYArchitecture
+
+
+class WTACompetitionMixin(BaseYArchitecture):
+    """
+    Winner Take All Competition
+
+    A competition layer that uses the winner-take-all strategy.
+    """
+
+    # HyperParameters
+    # ----------------------------------------------------------------------------------------------------
+    @dataclass
+    class HyperParameters(BaseYArchitecture.HyperParameters):
+        """
+        No hyperparameters.
+        """
+
+    # Steps
+    # ----------------------------------------------------------------------------------------------------
+    def init_inference(self, hparams: HyperParameters):
+        self.competition_layer = WTAC()
+
+    def inference(self, comparison_measures, components):
+        comp_labels = components[1]
+        return self.competition_layer(comparison_measures, comp_labels)
--- a/prototorch/y/architectures/components.py
+++ b/prototorch/y/architectures/components.py
@@ -0,0 +1,53 @@
+from dataclasses import dataclass
+
+from prototorch.core.components import LabeledComponents
+from prototorch.core.initializers import (
+    AbstractComponentsInitializer,
+    LabelsInitializer,
+)
+from prototorch.y import BaseYArchitecture
+
+
+class SupervisedArchitecture(BaseYArchitecture):
+    """
+    Supervised Architecture
+
+    An architecture that uses labeled Components as component Layer.
+    """
+    components_layer: LabeledComponents
+
+    # HyperParameters
+    # ----------------------------------------------------------------------------------------------------
+    @dataclass
+    class HyperParameters:
+        """
+        distribution: A valid prototype distribution. No default possible.
+        components_initializer: An implementation of AbstractComponentsInitializer. No default possible.
+        """
+        distribution: "dict[str, int]"
+        component_initializer: AbstractComponentsInitializer
+
+    # Steps
+    # ----------------------------------------------------------------------------------------------------
+    def init_components(self, hparams: HyperParameters):
+        self.components_layer = LabeledComponents(
+            distribution=hparams.distribution,
+            components_initializer=hparams.component_initializer,
+            labels_initializer=LabelsInitializer(),
+        )
+
+    # Properties
+    # ----------------------------------------------------------------------------------------------------
+    @property
+    def prototypes(self):
+        """
+        Returns the position of the prototypes.
+        """
+        return self.components_layer.components.detach().cpu()
+
+    @property
+    def prototype_labels(self):
+        """
+        Returns the labels of the prototypes.
+        """
+        return self.components_layer.labels.detach().cpu()
--- a/prototorch/y/architectures/loss.py
+++ b/prototorch/y/architectures/loss.py
@@ -0,0 +1,42 @@
+from dataclasses import dataclass, field
+
+from prototorch.core.losses import GLVQLoss
+from prototorch.y.architectures.base import BaseYArchitecture
+
+
+class GLVQLossMixin(BaseYArchitecture):
+    """
+    GLVQ Loss
+
+    A loss layer that uses the Generalized Learning Vector Quantization (GLVQ) loss.
+    """
+
+    # HyperParameters
+    # ----------------------------------------------------------------------------------------------------
+    @dataclass
+    class HyperParameters(BaseYArchitecture.HyperParameters):
+        """
+        margin: The margin of the GLVQ loss. Default: 0.0.
+        transfer_fn: Transfer function to use. Default: sigmoid_beta.
+        transfer_args: Keyword arguments for the transfer function. Default: {beta: 10.0}.
+        """
+        margin: float = 0.0
+
+        transfer_fn: str = "sigmoid_beta"
+        transfer_args: dict = field(default_factory=lambda: dict(beta=10.0))
+
+    # Steps
+    # ----------------------------------------------------------------------------------------------------
+    def init_loss(self, hparams: HyperParameters):
+        self.loss_layer = GLVQLoss(
+            margin=hparams.margin,
+            transfer_fn=hparams.transfer_fn,
+            **hparams.transfer_args,
+        )
+
+    def loss(self, comparison_measures, batch, components):
+        target = batch[1]
+        comp_labels = components[1]
+        loss = self.loss_layer(comparison_measures, target, comp_labels)
+        self.log('loss', loss)
+        return loss
--- a/prototorch/y/architectures/optimization.py
+++ b/prototorch/y/architectures/optimization.py
@@ -0,0 +1,86 @@
+from dataclasses import dataclass, field
+from typing import Type
+
+import torch
+from prototorch.y import BaseYArchitecture
+from torch.nn.parameter import Parameter
+
+
+class SingleLearningRateMixin(BaseYArchitecture):
+    """
+    Single Learning Rate
+
+    All parameters are updated with a single learning rate.
+    """
+
+    # HyperParameters
+    # ----------------------------------------------------------------------------------------------------
+    @dataclass
+    class HyperParameters(BaseYArchitecture.HyperParameters):
+        """
+        lr: The learning rate. Default: 0.1.
+        optimizer: The optimizer to use. Default: torch.optim.Adam.
+        """
+        lr: float = 0.1
+        optimizer: Type[torch.optim.Optimizer] = torch.optim.Adam
+
+    # Steps
+    # ----------------------------------------------------------------------------------------------------
+    def __init__(self, hparams: HyperParameters) -> None:
+        super().__init__(hparams)
+        self.lr = hparams.lr
+        self.optimizer = hparams.optimizer
+
+    # Hooks
+    # ----------------------------------------------------------------------------------------------------
+    def configure_optimizers(self):
+        return self.optimizer(self.parameters(), lr=self.lr)  # type: ignore
+
+
+class MultipleLearningRateMixin(BaseYArchitecture):
+    """
+    Multiple Learning Rates
+
+    Define Different Learning Rates for different parameters.
+    """
+
+    # HyperParameters
+    # ----------------------------------------------------------------------------------------------------
+    @dataclass
+    class HyperParameters(BaseYArchitecture.HyperParameters):
+        """
+        lr: The learning rate. Default: 0.1.
+        optimizer: The optimizer to use. Default: torch.optim.Adam.
+        """
+        lr: dict = field(default_factory=lambda: dict())
+        optimizer: Type[torch.optim.Optimizer] = torch.optim.Adam
+
+    # Steps
+    # ----------------------------------------------------------------------------------------------------
+    def __init__(self, hparams: HyperParameters) -> None:
+        super().__init__(hparams)
+        self.lr = hparams.lr
+        self.optimizer = hparams.optimizer
+
+    # Hooks
+    # ----------------------------------------------------------------------------------------------------
+    def configure_optimizers(self):
+        optimizers = []
+        for name, lr in self.lr.items():
+            if not hasattr(self, name):
+                raise ValueError(f"{name} is not a parameter of {self}")
+            else:
+                model_part = getattr(self, name)
+                if isinstance(model_part, Parameter):
+                    optimizers.append(
+                        self.optimizer(
+                            [model_part],
+                            lr=lr,  # type: ignore
+                        ))
+                elif hasattr(model_part, "parameters"):
+                    optimizers.append(
+                        self.optimizer(
+                            model_part.parameters(),
+                            lr=lr,  # type: ignore
+                        ))
+        return optimizers
--- a/prototorch/y/callbacks.py
+++ b/prototorch/y/callbacks.py
@@ -0,0 +1,149 @@
+import warnings
+from typing import Optional, Type
+
+import numpy as np
+import pytorch_lightning as pl
+import torch
+import torchmetrics
+from matplotlib import pyplot as plt
+from prototorch.models.vis import Vis2DAbstract
+from prototorch.utils.utils import mesh2d
+from prototorch.y.architectures.base import BaseYArchitecture
+from prototorch.y.library.gmlvq import GMLVQ
+from pytorch_lightning.loggers import TensorBoardLogger
+
+DIVERGING_COLOR_MAPS = [
+    'PiYG', 'PRGn', 'BrBG', 'PuOr', 'RdGy', 'RdBu', 'RdYlBu', 'RdYlGn',
+    'Spectral', 'coolwarm', 'bwr', 'seismic'
+]
+
+
+class LogTorchmetricCallback(pl.Callback):
+
+    def __init__(
+        self,
+        name,
+        metric: Type[torchmetrics.Metric],
+        on="prediction",
+        **metric_kwargs,
+    ) -> None:
+        self.name = name
+        self.metric = metric
+        self.metric_kwargs = metric_kwargs
+        self.on = on
+
+    def setup(
+        self,
+        trainer: pl.Trainer,
+        pl_module: BaseYArchitecture,
+        stage: Optional[str] = None,
+    ) -> None:
+        if self.on == "prediction":
+            pl_module.register_torchmetric(
+                self.name,
+                self.metric,
+                **self.metric_kwargs,
+            )
+        else:
+            raise ValueError(f"{self.on} is no valid metric hook")
+
+
+class VisGLVQ2D(Vis2DAbstract):
+
+    def visualize(self, pl_module):
+        protos = pl_module.prototypes
+        plabels = pl_module.prototype_labels
+        x_train, y_train = self.x_train, self.y_train
+        ax = self.setup_ax()
+        self.plot_protos(ax, protos, plabels)
+        if x_train is not None:
+            self.plot_data(ax, x_train, y_train)
+            mesh_input, xx, yy = mesh2d(
+                np.vstack([x_train, protos]),
+                self.border,
+                self.resolution,
+            )
+        else:
+            mesh_input, xx, yy = mesh2d(protos, self.border, self.resolution)
+        _components = pl_module.components_layer.components
+        mesh_input = torch.from_numpy(mesh_input).type_as(_components)
+        y_pred = pl_module.predict(mesh_input)
+        y_pred = y_pred.cpu().reshape(xx.shape)
+        ax.contourf(xx, yy, y_pred, cmap=self.cmap, alpha=0.35)
+
+
+class VisGMLVQ2D(Vis2DAbstract):
+
+    def __init__(self, *args, ev_proj=True, **kwargs):
+        super().__init__(*args, **kwargs)
+        self.ev_proj = ev_proj
+
+    def visualize(self, pl_module):
+        protos = pl_module.prototypes
+        plabels = pl_module.prototype_labels
+        x_train, y_train = self.x_train, self.y_train
+        device = pl_module.device
+        omega = pl_module._omega.detach()
+        lam = omega @ omega.T
+        u, _, _ = torch.pca_lowrank(lam, q=2)
+        with torch.no_grad():
+            x_train = torch.Tensor(x_train).to(device)
+            x_train = x_train @ u
+            x_train = x_train.cpu().detach()
+        if self.show_protos:
+            with torch.no_grad():
+                protos = torch.Tensor(protos).to(device)
+                protos = protos @ u
+                protos = protos.cpu().detach()
+        ax = self.setup_ax()
+        self.plot_data(ax, x_train, y_train)
+        if self.show_protos:
+            self.plot_protos(ax, protos, plabels)
+
+
+class PlotLambdaMatrixToTensorboard(pl.Callback):
+
+    def __init__(self, cmap='seismic') -> None:
+        super().__init__()
+        self.cmap = cmap
+
+        if self.cmap not in DIVERGING_COLOR_MAPS and type(self.cmap) is str:
+            warnings.warn(
+                f"{self.cmap} is not a diverging color map. We recommend to use one of the following: {DIVERGING_COLOR_MAPS}"
+            )
+
+    def on_train_start(self, trainer, pl_module: GMLVQ):
+        self.plot_lambda(trainer, pl_module)
+
+    def on_train_epoch_end(self, trainer, pl_module: GMLVQ):
+        self.plot_lambda(trainer, pl_module)
+
+    def plot_lambda(self, trainer, pl_module: GMLVQ):
+
+        self.fig, self.ax = plt.subplots(1, 1)
+
+        # plot lambda matrix
+        l_matrix = pl_module.lambda_matrix
+
+        # normalize lambda matrix
+        l_matrix = l_matrix / torch.max(torch.abs(l_matrix))
+
+        # plot lambda matrix
+        self.ax.imshow(l_matrix.detach().numpy(), self.cmap, vmin=-1, vmax=1)
+
+        self.fig.colorbar(self.ax.images[-1])
+
+        # add title
+        self.ax.set_title('Lambda Matrix')
+
+        # add to tensorboard
+        if isinstance(trainer.logger, TensorBoardLogger):
+            trainer.logger.experiment.add_figure(
+                f"lambda_matrix",
+                self.fig,
+                trainer.global_step,
+            )
+        else:
+            warnings.warn(
+                f"{self.__class__.__name__} is not compatible with {trainer.logger.__class__.__name__} as logger. Use TensorBoardLogger instead."
+            )
--- a/prototorch/y/library/init.py
+++ b/prototorch/y/library/init.py
@@ -0,0 +1,5 @@
+from .glvq import GLVQ
+
+__all__ = [
+    "GLVQ",
+]
--- a/prototorch/y/library/glvq.py
+++ b/prototorch/y/library/glvq.py
@@ -0,0 +1,35 @@
+from dataclasses import dataclass
+
+from prototorch.y import (
+    SimpleComparisonMixin,
+    SingleLearningRateMixin,
+    SupervisedArchitecture,
+    WTACompetitionMixin,
+)
+from prototorch.y.architectures.loss import GLVQLossMixin
+
+
+class GLVQ(
+        SupervisedArchitecture,
+        SimpleComparisonMixin,
+        GLVQLossMixin,
+        WTACompetitionMixin,
+        SingleLearningRateMixin,
+):
+    """
+    Generalized Learning Vector Quantization (GLVQ)
+
+    A GLVQ architecture that uses the winner-take-all strategy and the GLVQ loss.
+    """
+
+    @dataclass
+    class HyperParameters(
+            SimpleComparisonMixin.HyperParameters,
+            SingleLearningRateMixin.HyperParameters,
+            GLVQLossMixin.HyperParameters,
+            WTACompetitionMixin.HyperParameters,
+            SupervisedArchitecture.HyperParameters,
+    ):
+        """
+        No hyperparameters.
+        """
--- a/prototorch/y/library/gmlvq.py
+++ b/prototorch/y/library/gmlvq.py
@@ -0,0 +1,50 @@
+from __future__ import annotations
+
+from dataclasses import dataclass, field
+from typing import Callable
+
+import torch
+from prototorch.core.distances import omega_distance
+from prototorch.y import (
+    GLVQLossMixin,
+    MultipleLearningRateMixin,
+    OmegaComparisonMixin,
+    SupervisedArchitecture,
+    WTACompetitionMixin,
+)
+
+
+class GMLVQ(
+        SupervisedArchitecture,
+        OmegaComparisonMixin,
+        GLVQLossMixin,
+        WTACompetitionMixin,
+        MultipleLearningRateMixin,
+):
+    """
+    Generalized Matrix Learning Vector Quantization (GMLVQ)
+
+    A GMLVQ architecture that uses the winner-take-all strategy and the GLVQ loss.
+    """
+    # HyperParameters
+    # ----------------------------------------------------------------------------------------------------
+    @dataclass
+    class HyperParameters(
+            MultipleLearningRateMixin.HyperParameters,
+            OmegaComparisonMixin.HyperParameters,
+            GLVQLossMixin.HyperParameters,
+            WTACompetitionMixin.HyperParameters,
+            SupervisedArchitecture.HyperParameters,
+    ):
+        """
+        comparison_fn: The comparison / dissimilarity function to use. Override Default: omega_distance.
+        comparison_args: Keyword arguments for the comparison function. Override Default: {}.
+        """
+        comparison_fn: Callable = omega_distance
+        comparison_args: dict = field(default_factory=lambda: dict())
+        optimizer: type[torch.optim.Optimizer] = torch.optim.Adam
+
+        lr: dict = field(default_factory=lambda: dict(
+            components_layer=0.1,
+            _omega=0.5,
+        ))
--- a/setup.py
+++ b/setup.py
@@ -25,6 +25,7 @@ INSTALL_REQUIRES = [
    "prototorch>=0.7.3",
    "pytorch_lightning>=1.6.0",
    "torchmetrics",
+    "protobuf<3.20.0",
 ]
 CLI = [
    "jsonargparse",
@@ -54,7 +55,7 @@ ALL = CLI + DEV + DOCS + EXAMPLES + TESTS

 setup(
    name=safe_name("prototorch_" + PLUGIN_NAME),
-    version="0.5.0",
+    version="1.0.0-a2",
    description="Pre-packaged prototype-based "
    "machine learning models using ProtoTorch and PyTorch-Lightning.",
    long_description=long_description,
@@ -80,6 +81,7 @@ setup(
        "Intended Audience :: Science/Research",
        "License :: OSI Approved :: MIT License",
        "Natural Language :: English",
+        "Programming Language :: Python :: 3",
        "Programming Language :: Python :: 3.10",
        "Programming Language :: Python :: 3.9",
        "Programming Language :: Python :: 3.8",
Author	SHA1	Message	Date
Alexander Engelsberger	696719600b	build: bump version 1.0.0a1 → 1.0.0a2	2022-06-03 11:52:50 +02:00
Alexander Engelsberger	48e7c029fa	fix: Fix __init__.py	2022-06-03 11:40:45 +02:00
Alexander Engelsberger	5de3a480c7	build: bump version 0.5.2 → 1.0.0a1	2022-06-03 11:07:10 +02:00
Alexander Engelsberger	626f51ce80	ci: Add possible prerelease to bumpversion	2022-06-03 11:06:44 +02:00
Alexander Engelsberger	6d7d93c8e8	chore: rename y_arch to y	2022-06-03 10:39:11 +02:00
Jensun Ravichandran	93b1d0bd46	feat(vis): add flag to save visualization frames	2022-06-02 19:55:03 +02:00
Alexander Engelsberger	b7992c01db	fix: apply hotfix	2022-06-01 14:26:37 +02:00
Alexander Engelsberger	fcd944d3ff	build: bump version 0.5.1 → 0.5.2	2022-06-01 14:25:44 +02:00
Alexander Engelsberger	054720dd7b	fix(hotfix): Protobuf error workaround	2022-06-01 14:14:57 +02:00
Alexander Engelsberger	23d1a71b31	feat: distribute GMLVQ into mixins	2022-05-31 17:56:03 +02:00
Alexander Engelsberger	e922aae432	feat: add GMLVQ with new architecture	2022-05-19 16:13:08 +02:00
Alexander Engelsberger	3e50d0d817	chore(protoy): mixin restructuring	2022-05-18 15:43:09 +02:00
Alexander Engelsberger	dc4f31d700	chore: rename clc-lc to proto-Y-architecture	2022-05-18 14:11:46 +02:00
Alexander Engelsberger	02954044d7	chore: improve clc-lc test	2022-05-17 17:26:03 +02:00
Alexander Engelsberger	8f08ba66ea	feat: copy old clc-lc implementation	2022-05-17 16:25:43 +02:00
Alexander Engelsberger	e0b92e9ac2	chore: move mixins to seperate file	2022-05-17 16:19:47 +02:00
Alexander Engelsberger	d16a0de202	build: bump version 0.5.0 → 0.5.1	2022-05-17 12:04:08 +02:00
Alexander Engelsberger	76fea3f881	chore: update all examples to pytorch 1.6	2022-05-17 12:03:43 +02:00
Alexander Engelsberger	c00513ae0d	chore: minor updates and version updates	2022-05-17 12:00:52 +02:00
Alexander Engelsberger	bccef8bef0	chore: replace relative imports	2022-05-16 11:12:53 +02:00
Alexander Engelsberger	29ee326b85	ci: Update PreCommit hooks	2022-05-16 11:11:48 +02:00
Jensun Ravichandran	055568dc86	fix: glvq_iris example works again	2022-05-09 17:33:52 +02:00
Alexander Engelsberger	3a7328e290	chore: small changes	2022-04-27 10:37:12 +02:00