build: bump version 0.1.8 → 0.2.0

test: remove examples/liramlvq_tecator.py temporarily
chore(setup): require prototorch>=0.6.0
2021-06-21 16:47:17 +02:00 · 2021-06-21 16:13:41 +02:00 · 2021-06-21 15:51:07 +02:00 · 2021-06-21 15:06:37 +02:00 · 2021-06-21 14:59:54 +02:00 · 2021-06-21 14:42:28 +02:00
56 changed files with 3611 additions and 1044 deletions
--- a/.bumpversion.cfg
+++ b/.bumpversion.cfg
@@ -1,11 +1,13 @@
 [bumpversion]
-current_version = 0.1.6
+current_version = 0.2.0
 commit = True
 tag = True
 parse = (?P<major>\d+)\.(?P<minor>\d+)\.(?P<patch>\d+)
-serialize = 
-	{major}.{minor}.{patch}
+serialize = {major}.{minor}.{patch}
+message = build: bump version {current_version} → {new_version}

 [bumpversion:file:setup.py]

 [bumpversion:file:./prototorch/models/__init__.py]
+
+[bumpversion:file:./docs/source/conf.py]
--- a/.gitignore
+++ b/.gitignore
@@ -128,8 +128,19 @@ dmypy.json
 # Pyre type checker
 .pyre/

-# Datasets
-datasets/
+.vscode/

-# PyTorch-Lightning
-lightning_logs/
+# Vim
+*~
+*.swp
+*.swo
+
+#  Pytorch Models or Weights
+#  If necessary make exceptions for single pretrained models
+*.pt
+
+# Artifacts created by ProtoTorch Models
+datasets/
+lightning_logs/
+examples/_*.py
+examples/_*.ipynb
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -0,0 +1,53 @@
+# See https://pre-commit.com for more information
+# See https://pre-commit.com/hooks.html for more hooks
+
+repos:
+- repo: https://github.com/pre-commit/pre-commit-hooks
+  rev: v4.0.1
+  hooks:
+  - id: trailing-whitespace
+  - id: end-of-file-fixer
+  - id: check-yaml
+  - id: check-added-large-files
+  - id: check-ast
+  - id: check-case-conflict
+
+- repo: https://github.com/myint/autoflake
+  rev: v1.4
+  hooks:
+  - id: autoflake
+
+- repo: http://github.com/PyCQA/isort
+  rev: 5.8.0
+  hooks:
+  - id: isort
+
+- repo: https://github.com/pre-commit/mirrors-mypy
+  rev: v0.902
+  hooks:
+  - id: mypy
+    files: prototorch
+    additional_dependencies: [types-pkg_resources]
+
+- repo: https://github.com/pre-commit/mirrors-yapf
+  rev: v0.31.0
+  hooks:
+  - id: yapf
+
+- repo: https://github.com/pre-commit/pygrep-hooks
+  rev: v1.9.0
+  hooks:
+  - id: python-use-type-annotations
+  - id: python-no-log-warn
+  - id: python-check-blanket-noqa
+
+- repo: https://github.com/asottile/pyupgrade
+  rev: v2.19.4
+  hooks:
+  - id: pyupgrade
+
+- repo: https://github.com/si-cim/gitlint
+  rev: v0.15.2-unofficial
+  hooks:
+  - id: gitlint
+    args: [--contrib=CT1, --ignore=B6, --msg-filename]
--- a/.readthedocs.yml
+++ b/.readthedocs.yml
@@ -0,0 +1,27 @@
+# .readthedocs.yml
+# Read the Docs configuration file
+# See https://docs.readthedocs.io/en/stable/config-file/v2.html for details
+
+# Required
+version: 2
+
+# Build documentation in the docs/ directory with Sphinx
+sphinx:
+  configuration: docs/source/conf.py
+  fail_on_warning: true
+
+# Build documentation with MkDocs
+# mkdocs:
+#   configuration: mkdocs.yml
+
+# Optionally build your docs in additional formats such as PDF and ePub
+formats: all
+
+# Optionally set the version of Python and requirements required to build your docs
+python:
+  version: 3.9
+  install:
+    - method: pip
+      path: .
+      extra_requirements:
+        - all
--- a/.remarkrc
+++ b/.remarkrc
@@ -0,0 +1,7 @@
+{
+  "plugins": [
+    "remark-preset-lint-recommended",
+    ["remark-lint-list-item-indent", false],
+    ["no-emphasis-as-header", true]
+  ]
+}
--- a/.travis.yml
+++ b/.travis.yml
@@ -1,14 +1,18 @@
 dist: bionic
 sudo: false
 language: python
-python: 3.8
+python: 3.9
 cache:
  directories:
+  - "$HOME/.cache/pip"
  - "./tests/artifacts"
+  - "$HOME/datasets"
 install:
+- pip install git+git://github.com/si-cim/prototorch@dev --progress-bar off
 - pip install .[all] --progress-bar off
 script:
 - coverage run -m pytest
+- ./tests/test_examples.sh examples/
 after_success:
 - bash <(curl -s https://codecov.io/bash)
 deploy:
--- a/README.md
+++ b/README.md
@@ -1,30 +1,59 @@
 # ProtoTorch Models

-[![Build Status](https://travis-ci.org/si-cim/prototorch_models.svg?branch=main)](https://travis-ci.org/si-cim/prototorch_models)
+[![Build Status](https://api.travis-ci.com/si-cim/prototorch_models.svg?branch=main)](https://travis-ci.com/github/si-cim/prototorch_models)
+[![GitHub tag (latest by date)](https://img.shields.io/github/v/tag/si-cim/prototorch_models?color=yellow&label=version)](https://github.com/si-cim/prototorch_models/releases)
 [![PyPI](https://img.shields.io/pypi/v/prototorch_models)](https://pypi.org/project/prototorch_models/)
+[![GitHub license](https://img.shields.io/github/license/si-cim/prototorch_models)](https://github.com/si-cim/prototorch_models/blob/master/LICENSE)

 Pre-packaged prototype-based machine learning models using ProtoTorch and
 PyTorch-Lightning.

 ## Installation

-To install this plugin, first install
-[ProtoTorch](https://github.com/si-cim/prototorch) with:
+To install this plugin, simply run the following command:

 ```sh
-git clone https://github.com/si-cim/prototorch.git && cd prototorch
-pip install -e .
+pip install prototorch_models
 ```

-and then install the plugin itself with:
+**Installing the models plugin should automatically install a suitable version
+of** [ProtoTorch](https://github.com/si-cim/prototorch). The plugin should then
+be available for use in your Python environment as `prototorch.models`.

-```sh
-git clone https://github.com/si-cim/prototorch_models.git && cd prototorch_models
-pip install -e .
-```
+## Available models

-The plugin should then be available for use in your Python environment as
-`prototorch.models`.
+### LVQ Family
+
+- Learning Vector Quantization 1 (LVQ1)
+- Generalized Learning Vector Quantization (GLVQ)
+- Generalized Relevance Learning Vector Quantization (GRLVQ)
+- Generalized Matrix Learning Vector Quantization (GMLVQ)
+- Limited-Rank Matrix Learning Vector Quantization (LiRaMLVQ)
+- Localized and Generalized Matrix Learning Vector Quantization (LGMLVQ)
+- Learning Vector Quantization Multi-Layer Network (LVQMLN)
+- Siamese GLVQ
+- Cross-Entropy Learning Vector Quantization (CELVQ)
+- Soft Learning Vector Quantization (SLVQ)
+- Robust Soft Learning Vector Quantization (RSLVQ)
+- Probabilistic Learning Vector Quantization (PLVQ)
+
+### Other
+
+- k-Nearest Neighbors (KNN)
+- Neural Gas (NG)
+- Growing Neural Gas (GNG)
+
+## Work in Progress
+
+- Classification-By-Components Network (CBC)
+- Learning Vector Quantization 2.1 (LVQ2.1)
+- Self-Organizing-Map (SOM)
+
+## Planned models
+
+- Median-LVQ
+- Generalized Tangent Learning Vector Quantization (GTLVQ)
+- Self-Incremental Learning Vector Quantization (SILVQ)

 ## Development setup

@@ -53,31 +82,26 @@ pip install -e .[all]  # \[all\] if you are using zsh or MacOS
 ```

 To assist in the development process, you may also find it useful to install
-`yapf`, `isort` and `autoflake`. You can install them easily with `pip`.
+`yapf`, `isort` and `autoflake`. You can install them easily with `pip`. **Also,
+please avoid installing Tensorflow in this environment. It is known to cause
+problems with PyTorch-Lightning.**

-## Available models
+## Contribution

- Generalized Learning Vector Quantization (GLVQ)
- Generalized Relevance Learning Vector Quantization (GRLVQ)
- Generalized Matrix Learning Vector Quantization (GMLVQ)
- Limited-Rank Matrix Learning Vector Quantization (LiRaMLVQ)
- Siamese GLVQ
- Neural Gas (NG)
+This repository contains definition for [git hooks](https://githooks.com).
+[Pre-commit](https://pre-commit.com) is automatically installed as development
+dependency with prototorch or you can install it manually with `pip install
+pre-commit`.

-## Work in Progress
+Please install the hooks by running:
+```bash
+pre-commit install
+pre-commit install --hook-type commit-msg
+```
+before creating the first commit.

- Classification-By-Components Network (CBC)
- Learning Vector Quantization Multi-Layer Network (LVQMLN)
-
-## Planned models
-
- Local-Matrix GMLVQ
- Generalized Tangent Learning Vector Quantization (GTLVQ)
- Robust Soft Learning Vector Quantization (RSLVQ)
- Probabilistic Learning Vector Quantization (PLVQ)
- Self-Incremental Learning Vector Quantization (SILVQ)
- K-Nearest Neighbors (KNN)
- Learning Vector Quantization 1 (LVQ1)
+The commit will fail if the commit message does not follow the specification
+provided [here](https://www.conventionalcommits.org/en/v1.0.0/#specification).

 ## FAQ

--- a/docs/Makefile
+++ b/docs/Makefile
@@ -0,0 +1,20 @@
+# Minimal makefile for Sphinx documentation
+#
+
+# You can set these variables from the command line, and also
+# from the environment for the first two.
+SPHINXOPTS    ?=
+SPHINXBUILD   ?= python3 -m sphinx
+SOURCEDIR     = source
+BUILDDIR      = build
+
+# Put it first so that "make" without argument is like "make help".
+help:
+	@$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
+
+.PHONY: help Makefile
+
+# Catch-all target: route all unknown targets to Sphinx using the new
+# "make mode" option.  $(O) is meant as a shortcut for $(SPHINXOPTS).
+%: Makefile
+	@$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
--- a/docs/make.bat
+++ b/docs/make.bat
@@ -0,0 +1,35 @@
+@ECHO OFF
+
+pushd %~dp0
+
+REM Command file for Sphinx documentation
+
+if "%SPHINXBUILD%" == "" (
+  set SPHINXBUILD=sphinx-build
+)
+set SOURCEDIR=source
+set BUILDDIR=build
+
+if "%1" == "" goto help
+
+%SPHINXBUILD% >NUL 2>NUL
+if errorlevel 9009 (
+  echo.
+  echo.The 'sphinx-build' command was not found. Make sure you have Sphinx
+  echo.installed, then set the SPHINXBUILD environment variable to point
+  echo.to the full path of the 'sphinx-build' executable. Alternatively you
+  echo.may add the Sphinx directory to PATH.
+  echo.
+  echo.If you don't have Sphinx installed, grab it from
+  echo.http://sphinx-doc.org/
+  exit /b 1
+)
+
+%SPHINXBUILD% -M %1 %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O%
+goto end
+
+:help
+%SPHINXBUILD% -M help %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O%
+
+:end
+popd
--- a/docs/source/_static/img/horizontal-lockup.png
+++ b/docs/source/_static/img/horizontal-lockup.png
--- a/docs/source/_static/img/logo.png
+++ b/docs/source/_static/img/logo.png
--- a/docs/source/_static/img/model_tree.png
+++ b/docs/source/_static/img/model_tree.png
--- a/docs/source/conf.py
+++ b/docs/source/conf.py
@@ -0,0 +1,209 @@
+# Configuration file for the Sphinx documentation builder.
+#
+# This file only contains a selection of the most common options. For a full
+# list see the documentation:
+# https://www.sphinx-doc.org/en/master/usage/configuration.html
+
+# -- Path setup --------------------------------------------------------------
+
+# If extensions (or modules to document with autodoc) are in another directory,
+# add these directories to sys.path here. If the directory is relative to the
+# documentation root, use os.path.abspath to make it absolute, like shown here.
+#
+import os
+import sys
+
+sys.path.insert(0, os.path.abspath("../../"))
+
+# -- Project information -----------------------------------------------------
+
+project = "ProtoTorch Models"
+copyright = "2021, Jensun Ravichandran"
+author = "Jensun Ravichandran"
+
+# The full version, including alpha/beta/rc tags
+#
+release = "0.2.0"
+
+# -- General configuration ---------------------------------------------------
+
+# If your documentation needs a minimal Sphinx version, state it here.
+#
+needs_sphinx = "1.6"
+
+# Add any Sphinx extension module names here, as strings. They can be
+# extensions coming with Sphinx (named "sphinx.ext.*") or your custom
+# ones.
+extensions = [
+    "recommonmark",
+    "nbsphinx",
+    "sphinx.ext.autodoc",
+    "sphinx.ext.autosummary",
+    "sphinx.ext.doctest",
+    "sphinx.ext.intersphinx",
+    "sphinx.ext.todo",
+    "sphinx.ext.coverage",
+    "sphinx.ext.napoleon",
+    "sphinx.ext.viewcode",
+    "sphinx_rtd_theme",
+    "sphinxcontrib.katex",
+    "sphinxcontrib.bibtex",
+]
+
+# https://nbsphinx.readthedocs.io/en/0.8.5/custom-css.html#For-All-Pages
+nbsphinx_prolog = """
+.. raw:: html
+
+    <style>
+        .nbinput .prompt,
+        .nboutput .prompt {
+            display: none;
+        }
+    </style>
+"""
+
+# katex_prerender = True
+katex_prerender = False
+
+napoleon_use_ivar = True
+
+# Add any paths that contain templates here, relative to this directory.
+templates_path = ["_templates"]
+
+# The suffix(es) of source filenames.
+# You can specify multiple suffix as a list of string:
+#
+source_suffix = [".rst", ".md"]
+
+# The master toctree document.
+master_doc = "index"
+
+# List of patterns, relative to source directory, that match files and
+# directories to ignore when looking for source files.
+# This pattern also affects html_static_path and html_extra_path.
+exclude_patterns = []
+
+# The name of the Pygments (syntax highlighting) style to use. Choose from:
+# ["default", "emacs", "friendly", "colorful", "autumn", "murphy", "manni",
+#  "monokai", "perldoc", "pastie", "borland", "trac", "native", "fruity", "bw",
+#  "vim", "vs", "tango", "rrt", "xcode", "igor", "paraiso-light", "paraiso-dark",
+#  "lovelace", "algol", "algol_nu", "arduino", "rainbo w_dash", "abap",
+#  "solarized-dark", "solarized-light", "sas", "stata", "stata-light",
+#  "stata-dark", "inkpot"]
+pygments_style = "monokai"
+
+# If true, `todo` and `todoList` produce output, else they produce nothing.
+todo_include_todos = True
+
+# Disable docstring inheritance
+autodoc_inherit_docstrings = False
+
+# -- Options for HTML output -------------------------------------------------
+
+# The theme to use for HTML and HTML Help pages.  See the documentation for
+# a list of builtin themes.
+# https://sphinx-themes.org/
+html_theme = "sphinx_rtd_theme"
+
+html_logo = "_static/img/logo.png"
+
+html_theme_options = {
+    "logo_only": True,
+    "display_version": True,
+    "prev_next_buttons_location": "bottom",
+    "style_external_links": False,
+    "style_nav_header_background": "#ffffff",
+    # Toc options
+    "collapse_navigation": True,
+    "sticky_navigation": True,
+    "navigation_depth": 4,
+    "includehidden": True,
+    "titles_only": False,
+}
+
+# Add any paths that contain custom static files (such as style sheets) here,
+# relative to this directory. They are copied after the builtin static files,
+# so a file named "default.css" will overwrite the builtin "default.css".
+html_static_path = ["_static"]
+
+html_css_files = [
+    "https://cdn.jsdelivr.net/npm/katex@0.11.1/dist/katex.min.css",
+]
+
+# -- Options for HTMLHelp output ------------------------------------------
+
+# Output file base name for HTML help builder.
+htmlhelp_basename = "protoflowdoc"
+
+# -- Options for LaTeX output ---------------------------------------------
+
+latex_elements = {
+    # The paper size ("letterpaper" or "a4paper").
+    #
+    # "papersize": "letterpaper",
+    # The font size ("10pt", "11pt" or "12pt").
+    #
+    # "pointsize": "10pt",
+    # Additional stuff for the LaTeX preamble.
+    #
+    # "preamble": "",
+    # Latex figure (float) alignment
+    #
+    # "figure_align": "htbp",
+}
+
+# Grouping the document tree into LaTeX files. List of tuples
+# (source start file, target name, title,
+#  author, documentclass [howto, manual, or own class]).
+latex_documents = [
+    (
+        master_doc,
+        "prototorch.tex",
+        "ProtoTorch Documentation",
+        "Jensun Ravichandran",
+        "manual",
+    ),
+]
+
+# -- Options for manual page output ---------------------------------------
+
+# One entry per manual page. List of tuples
+# (source start file, name, description, authors, manual section).
+man_pages = [(master_doc, "ProtoTorch Models",
+              "ProtoTorch Models Plugin Documentation", [author], 1)]
+
+# -- Options for Texinfo output -------------------------------------------
+
+# Grouping the document tree into Texinfo files. List of tuples
+# (source start file, target name, title, author,
+#  dir menu entry, description, category)
+texinfo_documents = [
+    (
+        master_doc,
+        "prototorch models",
+        "ProtoTorch Models Plugin Documentation",
+        author,
+        "prototorch models",
+        "Prototype-based machine learning Models in ProtoTorch.",
+        "Miscellaneous",
+    ),
+]
+
+# Example configuration for intersphinx: refer to the Python standard library.
+intersphinx_mapping = {
+    "python": ("https://docs.python.org/3/", None),
+    "numpy": ("https://numpy.org/doc/stable/", None),
+    "torch": ('https://pytorch.org/docs/stable/', None),
+    "pytorch_lightning":
+    ("https://pytorch-lightning.readthedocs.io/en/stable/", None),
+}
+
+# -- Options for Epub output ----------------------------------------------
+# https://www.sphinx-doc.org/en/master/usage/configuration.html#options-for-epub-output
+
+epub_cover = ()
+version = release
+
+# -- Options for Bibliography -------------------------------------------
+bibtex_bibfiles = ['refs.bib']
+bibtex_reference_style = 'author_year'
--- a/docs/source/custom.rst
+++ b/docs/source/custom.rst
@@ -0,0 +1,7 @@
+.. Customize the Models
+
+Abstract Models
+========================================
+.. automodule:: prototorch.models.abstract
+   :members:
+   :undoc-members:
--- a/docs/source/index.rst
+++ b/docs/source/index.rst
@@ -0,0 +1,40 @@
+.. ProtoTorch Models documentation master file
+
+ProtoTorch Models Plugins
+========================================
+.. toctree::
+   :hidden:
+   :maxdepth: 3
+
+   self
+   tutorial.ipynb
+
+.. toctree::
+   :hidden:
+   :maxdepth: 3
+   :caption: Library
+
+   library
+
+.. toctree::
+   :hidden:
+   :maxdepth: 3
+   :caption: Customize
+
+   custom
+
+About
+-----------------------------------------
+`Prototorch Models <https://github.com/si-cim/prototorch_models>`_ is a Plugin
+for `Prototorch <https://github.com/si-cim/prototorch>`_. It implements common
+prototype-based Machine Learning algorithms using `PyTorch-Lightning
+<https://www.pytorchlightning.ai/>`_.
+
+Library
+-----------------------------------------
+Prototorch Models delivers many application ready models.
+These models have been published in the past and have been adapted to the Prototorch library.
+
+Customizable
+-----------------------------------------
+Prototorch Models also contains the building blocks to build own models with PyTorch-Lightning and Prototorch.
--- a/docs/source/library.rst
+++ b/docs/source/library.rst
@@ -0,0 +1,117 @@
+.. Available Models
+
+Models
+========================================
+
+.. image:: _static/img/model_tree.png
+   :width: 600
+
+Unsupervised Methods
+-----------------------------------------
+.. autoclass:: prototorch.models.knn.KNN
+   :members:
+
+.. autoclass:: prototorch.models.unsupervised.NeuralGas
+   :members:
+
+.. autoclass:: prototorch.models.unsupervised.GrowingNeuralGas
+   :members:
+
+Classical Learning Vector Quantization
+-----------------------------------------
+Original LVQ models introduced by :cite:t:`kohonen1989`.
+These heuristic algorithms do not use gradient descent.
+
+.. autoclass:: prototorch.models.lvq.LVQ1
+   :members:
+.. autoclass:: prototorch.models.lvq.LVQ21
+   :members:
+
+It is also possible to use the GLVQ structure as shown by :cite:t:`sato1996` in chapter 4.
+This allows the use of gradient descent methods.
+
+.. autoclass:: prototorch.models.glvq.GLVQ1
+   :members:
+.. autoclass:: prototorch.models.glvq.GLVQ21
+   :members:
+
+Generalized Learning Vector Quantization
+-----------------------------------------
+
+:cite:t:`sato1996` presented a LVQ variant with a cost function called GLVQ.
+This allows the use of gradient descent methods.
+
+.. autoclass:: prototorch.models.glvq.GLVQ
+   :members:
+
+The cost function of GLVQ can be extended by a learnable dissimilarity.
+These learnable dissimilarities assign relevances to each data dimension during the learning phase.
+For example GRLVQ :cite:p:`hammer2002` and GMLVQ :cite:p:`schneider2009` .
+
+.. autoclass:: prototorch.models.glvq.GRLVQ
+   :members:
+
+.. autoclass:: prototorch.models.glvq.GMLVQ
+   :members:
+
+The dissimilarity from GMLVQ can be interpreted as a projection into another dataspace.
+Applying this projection only to the data results in LVQMLN
+
+.. autoclass:: prototorch.models.glvq.LVQMLN
+   :members:
+
+The projection idea from GMLVQ can be extended to an arbitrary transformation with learnable parameters.
+
+.. autoclass:: prototorch.models.glvq.SiameseGLVQ
+   :members:
+
+Probabilistic Models
+--------------------------------------------
+
+Probabilistic variants assume, that the prototypes generate a probability distribution over the classes.
+For a test sample they return a distribution instead of a class assignment.
+
+The following two algorihms were presented by :cite:t:`seo2003` .
+Every prototypes is a center of a gaussian distribution of its class, generating a mixture model.
+
+.. autoclass:: prototorch.models.probabilistic.SLVQ
+   :members:
+
+.. autoclass:: prototorch.models.probabilistic.RSLVQ
+   :members:
+
+:cite:t:`villmann2018` proposed two changes to RSLVQ: First incooperate the winning rank into the prior probability calculation.
+And second use divergence as loss function.
+
+.. autoclass:: prototorch.models.probabilistic.PLVQ
+   :members:
+
+Classification by Component
+--------------------------------------------
+
+The Classification by Component (CBC) has been introduced by :cite:t:`saralajew2019` .
+In a CBC architecture there is no class assigned to the prototypes.
+Instead the dissimilarities are used in a reasoning process, that favours or rejects a class by a learnable degree.
+The output of a CBC network is a probability distribution over all classes.
+
+.. autoclass:: prototorch.models.cbc.CBC
+   :members:
+
+.. autoclass:: prototorch.models.cbc.ImageCBC
+   :members:
+
+Visualization
+========================================
+
+Visualization is very specific to its application.
+PrototorchModels delivers visualization for two dimensional data and image data.
+
+The visulizations can be shown in a seperate window and inside a tensorboard.
+
+.. automodule:: prototorch.models.vis
+   :members:
+   :undoc-members:
+
+Bibliography
+========================================
+.. bibliography::
--- a/docs/source/refs.bib
+++ b/docs/source/refs.bib
@@ -0,0 +1,72 @@
+@article{sato1996,
+    title={Generalized learning vector quantization},
+    author={Sato, Atsushi and Yamada, Keiji},
+    journal={Advances in neural information processing systems},
+    pages={423--429},
+    year={1996},
+    publisher={MORGAN KAUFMANN PUBLISHERS},
+    url={http://papers.nips.cc/paper/1113-generalized-learning-vector-quantization.pdf},
+}
+
+@book{kohonen1989,
+    doi = {10.1007/978-3-642-88163-3},
+    year = {1989},
+    publisher = {Springer Berlin Heidelberg},
+    author = {Teuvo Kohonen},
+    title = {Self-Organization and Associative Memory}
+}
+
+@inproceedings{saralajew2019,
+    author = {Saralajew, Sascha and Holdijk, Lars and Rees, Maike and Asan, Ebubekir and Villmann, Thomas},
+    booktitle = {Advances in Neural Information Processing Systems},
+    title = {Classification-by-Components: Probabilistic Modeling of Reasoning over a Set of Components},
+    url = {https://proceedings.neurips.cc/paper/2019/file/dca5672ff3444c7e997aa9a2c4eb2094-Paper.pdf},
+    volume = {32},
+    year = {2019}
+}
+
+@article{seo2003,
+    author = {Seo, Sambu and Obermayer, Klaus},
+    title = "{Soft Learning Vector Quantization}",
+    journal = {Neural Computation},
+    volume = {15},
+    number = {7},
+    pages = {1589-1604},
+    year = {2003},
+    month = {07},
+    doi = {10.1162/089976603321891819},
+}
+
+@article{hammer2002,
+    title = {Generalized relevance learning vector quantization},
+    journal = {Neural Networks},
+    volume = {15},
+    number = {8},
+    pages = {1059-1068},
+    year = {2002},
+    doi = {https://doi.org/10.1016/S0893-6080(02)00079-5},
+    author = {Barbara Hammer and Thomas Villmann},
+}
+
+@article{schneider2009,
+    author = {Schneider, Petra and Biehl, Michael and Hammer, Barbara},
+    title = "{Adaptive Relevance Matrices in Learning Vector Quantization}",
+    journal = {Neural Computation},
+    volume = {21},
+    number = {12},
+    pages = {3532-3561},
+    year = {2009},
+    month = {12},
+    doi = {10.1162/neco.2009.11-08-908},
+}
+
+@InProceedings{villmann2018,
+    author="Villmann, Andrea
+    and Kaden, Marika
+    and Saralajew, Sascha
+    and Villmann, Thomas",
+    title="Probabilistic Learning Vector Quantization with Cross-Entropy for Probabilistic Class Assignments in Classification Learning",
+    booktitle="Artificial Intelligence and Soft Computing",
+    year="2018",
+    publisher="Springer International Publishing",
+}
--- a/docs/source/tutorial.ipynb
+++ b/docs/source/tutorial.ipynb
--- a/examples/cbc_iris.py
+++ b/examples/cbc_iris.py
@@ -1,46 +1,52 @@
 """CBC example using the Iris dataset."""

+import argparse
+
 import prototorch as pt
 import pytorch_lightning as pl
 import torch

 if __name__ == "__main__":
+    # Command-line arguments
+    parser = argparse.ArgumentParser()
+    parser = pl.Trainer.add_argparse_args(parser)
+    args = parser.parse_args()
+
    # Dataset
-    from sklearn.datasets import load_iris
-    x_train, y_train = load_iris(return_X_y=True)
-    x_train = x_train[:, [0, 2]]
-    train_ds = pt.datasets.NumpyDataset(x_train, y_train)
+    train_ds = pt.datasets.Iris(dims=[0, 2])

    # Reproducibility
-    pl.utilities.seed.seed_everything(seed=2)
+    pl.utilities.seed.seed_everything(seed=42)

    # Dataloaders
-    train_loader = torch.utils.data.DataLoader(train_ds,
-                                               num_workers=0,
-                                               batch_size=150)
+    train_loader = torch.utils.data.DataLoader(train_ds, batch_size=32)

    # Hyperparameters
    hparams = dict(
-        input_dim=x_train.shape[1],
-        nclasses=3,
-        num_components=5,
-        component_initializer=pt.components.SSI(train_ds, noise=0.01),
-        lr=0.01,
+        distribution=[1, 0, 3],
+        margin=0.1,
+        proto_lr=0.01,
+        bb_lr=0.01,
    )

    # Initialize the model
-    model = pt.models.CBC(hparams)
+    model = pt.models.CBC(
+        hparams,
+        components_initializer=pt.initializers.SSCI(train_ds, noise=0.01),
+        reasonings_iniitializer=pt.initializers.
+        PurePositiveReasoningsInitializer(),
+    )

    # Callbacks
-    dvis = pt.models.VisCBC2D(data=(x_train, y_train),
-                              title="CBC Iris Example")
+    vis = pt.models.VisCBC2D(data=train_ds,
+                             title="CBC Iris Example",
+                             resolution=100,
+                             axis_off=True)

    # Setup trainer
-    trainer = pl.Trainer(
-        max_epochs=200,
-        callbacks=[
-            dvis,
-        ],
+    trainer = pl.Trainer.from_argparse_args(
+        args,
+        callbacks=[vis],
    )

    # Training loop
--- a/examples/cli/README.md
+++ b/examples/cli/README.md
@@ -0,0 +1,8 @@
+# Examples using Lightning CLI
+
+Examples in this folder use the experimental [Lightning CLI](https://pytorch-lightning.readthedocs.io/en/latest/common/lightning_cli.html).
+
+To use the example run
+```
+python gmlvq.py --config gmlvq.yaml
+```
--- a/examples/cli/gmlvq.py
+++ b/examples/cli/gmlvq.py
@@ -0,0 +1,20 @@
+"""GMLVQ example using the MNIST dataset."""
+
+import torch
+from pytorch_lightning.utilities.cli import LightningCLI
+
+import prototorch as pt
+from prototorch.models import ImageGMLVQ
+from prototorch.models.abstract import PrototypeModel
+from prototorch.models.data import MNISTDataModule
+
+
+class ExperimentClass(ImageGMLVQ):
+    def __init__(self, hparams, **kwargs):
+        super().__init__(hparams,
+                         optimizer=torch.optim.Adam,
+                         prototype_initializer=pt.components.zeros(28 * 28),
+                         **kwargs)
+
+
+cli = LightningCLI(ImageGMLVQ, MNISTDataModule)
--- a/examples/cli/gmlvq.yaml
+++ b/examples/cli/gmlvq.yaml
@@ -0,0 +1,11 @@
+model:
+  hparams:
+    input_dim: 784
+    latent_dim: 784
+    distribution:
+      num_classes: 10
+      prototypes_per_class: 2
+    proto_lr: 0.01
+    bb_lr: 0.01
+data:
+  batch_size: 32
--- a/examples/dynamic_pruning.py
+++ b/examples/dynamic_pruning.py
@@ -0,0 +1,81 @@
+"""Dynamically prune 'loser' prototypes in GLVQ-type models."""
+
+import argparse
+
+import prototorch as pt
+import pytorch_lightning as pl
+import torch
+
+if __name__ == "__main__":
+    # Command-line arguments
+    parser = argparse.ArgumentParser()
+    parser = pl.Trainer.add_argparse_args(parser)
+    args = parser.parse_args()
+
+    # Dataset
+    num_classes = 4
+    num_features = 2
+    num_clusters = 1
+    train_ds = pt.datasets.Random(num_samples=500,
+                                  num_classes=num_classes,
+                                  num_features=num_features,
+                                  num_clusters=num_clusters,
+                                  separation=3.0,
+                                  seed=42)
+
+    # Dataloaders
+    train_loader = torch.utils.data.DataLoader(train_ds, batch_size=256)
+
+    # Hyperparameters
+    prototypes_per_class = num_clusters * 5
+    hparams = dict(
+        distribution=(num_classes, prototypes_per_class),
+        lr=0.2,
+    )
+
+    # Initialize the model
+    model = pt.models.CELVQ(
+        hparams,
+        prototypes_initializer=pt.initializers.FVCI(2, 3.0),
+    )
+
+    # Compute intermediate input and output sizes
+    model.example_input_array = torch.zeros(4, 2)
+
+    # Summary
+    print(model)
+
+    # Callbacks
+    vis = pt.models.VisGLVQ2D(train_ds)
+    pruning = pt.models.PruneLoserPrototypes(
+        threshold=0.01,  # prune prototype if it wins less than 1%
+        idle_epochs=20,  # pruning too early may cause problems
+        prune_quota_per_epoch=2,  # prune at most 2 prototypes per epoch
+        frequency=1,  # prune every epoch
+        verbose=True,
+    )
+    es = pl.callbacks.EarlyStopping(
+        monitor="train_loss",
+        min_delta=0.001,
+        patience=20,
+        mode="min",
+        verbose=True,
+        check_on_train_epoch_end=True,
+    )
+
+    # Setup trainer
+    trainer = pl.Trainer.from_argparse_args(
+        args,
+        callbacks=[
+            vis,
+            pruning,
+            es,
+        ],
+        progress_bar_refresh_rate=0,
+        terminate_on_nan=True,
+        weights_summary="full",
+        accelerator="ddp",
+    )
+
+    # Training loop
+    trainer.fit(model, train_loader)
--- a/examples/glvq_iris.py
+++ b/examples/glvq_iris.py
@@ -1,39 +1,54 @@
 """GLVQ example using the Iris dataset."""

+import argparse
+
 import prototorch as pt
 import pytorch_lightning as pl
 import torch
+from torch.optim.lr_scheduler import ExponentialLR

 if __name__ == "__main__":
+    # Command-line arguments
+    parser = argparse.ArgumentParser()
+    parser = pl.Trainer.add_argparse_args(parser)
+    args = parser.parse_args()
+
    # Dataset
-    from sklearn.datasets import load_iris
-    x_train, y_train = load_iris(return_X_y=True)
-    x_train = x_train[:, [0, 2]]
-    train_ds = pt.datasets.NumpyDataset(x_train, y_train)
+    train_ds = pt.datasets.Iris(dims=[0, 2])

    # Dataloaders
-    train_loader = torch.utils.data.DataLoader(train_ds,
-                                               num_workers=0,
-                                               batch_size=150)
+    train_loader = torch.utils.data.DataLoader(train_ds, batch_size=64)

    # Hyperparameters
    hparams = dict(
-        nclasses=3,
-        prototypes_per_class=2,
-        prototype_initializer=pt.components.SMI(train_ds),
+        distribution={
+            "num_classes": 3,
+            "per_class": 4
+        },
        lr=0.01,
    )

    # Initialize the model
-    model = pt.models.GLVQ(hparams)
+    model = pt.models.GLVQ(
+        hparams,
+        optimizer=torch.optim.Adam,
+        prototypes_initializer=pt.initializers.SMCI(train_ds),
+        lr_scheduler=ExponentialLR,
+        lr_scheduler_kwargs=dict(gamma=0.99, verbose=False),
+    )
+
+    # Compute intermediate input and output sizes
+    model.example_input_array = torch.zeros(4, 2)

    # Callbacks
-    vis = pt.models.VisGLVQ2D(data=(x_train, y_train))
+    vis = pt.models.VisGLVQ2D(data=train_ds)

    # Setup trainer
-    trainer = pl.Trainer(
-        max_epochs=50,
+    trainer = pl.Trainer.from_argparse_args(
+        args,
        callbacks=[vis],
+        weights_summary="full",
+        accelerator="ddp",
    )

    # Training loop
--- a/examples/glvq_spiral.py
+++ b/examples/glvq_spiral.py
@@ -1,50 +1,75 @@
 """GLVQ example using the spiral dataset."""

+import argparse
+
 import prototorch as pt
 import pytorch_lightning as pl
 import torch

-
-class StopOnNaN(pl.Callback):
-    def __init__(self, param):
-        super().__init__()
-        self.param = param
-
-    def on_epoch_end(self, trainer, pl_module, logs={}):
-        if torch.isnan(self.param).any():
-            raise ValueError("NaN encountered. Stopping.")
-
-
 if __name__ == "__main__":
+    # Command-line arguments
+    parser = argparse.ArgumentParser()
+    parser = pl.Trainer.add_argparse_args(parser)
+    args = parser.parse_args()
+
    # Dataset
-    train_ds = pt.datasets.Spiral(n_samples=600, noise=0.6)
+    train_ds = pt.datasets.Spiral(num_samples=500, noise=0.5)

    # Dataloaders
-    train_loader = torch.utils.data.DataLoader(train_ds,
-                                               num_workers=0,
-                                               batch_size=256)
+    train_loader = torch.utils.data.DataLoader(train_ds, batch_size=256)

    # Hyperparameters
+    num_classes = 2
+    prototypes_per_class = 10
    hparams = dict(
-        nclasses=2,
-        prototypes_per_class=20,
-        prototype_initializer=pt.components.SSI(train_ds, noise=1e-7),
-        transfer_function="sigmoid_beta",
+        distribution=(num_classes, prototypes_per_class),
+        transfer_function="swish_beta",
        transfer_beta=10.0,
-        lr=0.01,
+        proto_lr=0.1,
+        bb_lr=0.1,
+        input_dim=2,
+        latent_dim=2,
    )

    # Initialize the model
-    model = pt.models.GLVQ(hparams)
+    model = pt.models.GMLVQ(
+        hparams,
+        optimizer=torch.optim.Adam,
+        prototypes_initializer=pt.initializers.SSCI(train_ds, noise=1e-2),
+    )

    # Callbacks
-    vis = pt.models.VisGLVQ2D(train_ds, show_last_only=True, block=True)
-    snan = StopOnNaN(model.proto_layer.components)
+    vis = pt.models.VisGLVQ2D(
+        train_ds,
+        show_last_only=False,
+        block=False,
+    )
+    pruning = pt.models.PruneLoserPrototypes(
+        threshold=0.01,
+        idle_epochs=10,
+        prune_quota_per_epoch=5,
+        frequency=5,
+        replace=True,
+        prototypes_initializer=pt.initializers.SSCI(train_ds, noise=1e-1),
+        verbose=True,
+    )
+    es = pl.callbacks.EarlyStopping(
+        monitor="train_loss",
+        min_delta=1.0,
+        patience=5,
+        mode="min",
+        check_on_train_epoch_end=True,
+    )

    # Setup trainer
-    trainer = pl.Trainer(
-        max_epochs=200,
-        callbacks=[vis, snan],
+    trainer = pl.Trainer.from_argparse_args(
+        args,
+        callbacks=[
+            vis,
+            # es, # FIXME
+            pruning,
+        ],
+        terminate_on_nan=True,
    )

    # Training loop
--- a/examples/gmlvq_iris.py
+++ b/examples/gmlvq_iris.py
@@ -1,37 +0,0 @@
-"""GMLVQ example using all four dimensions of the Iris dataset."""
-
-import prototorch as pt
-import pytorch_lightning as pl
-import torch
-
-if __name__ == "__main__":
-    # Dataset
-    from sklearn.datasets import load_iris
-    x_train, y_train = load_iris(return_X_y=True)
-    train_ds = pt.datasets.NumpyDataset(x_train, y_train)
-
-    # Dataloaders
-    train_loader = torch.utils.data.DataLoader(train_ds,
-                                               num_workers=0,
-                                               batch_size=150)
-    # Hyperparameters
-    hparams = dict(
-        nclasses=3,
-        prototypes_per_class=1,
-        input_dim=x_train.shape[1],
-        latent_dim=x_train.shape[1],
-        prototype_initializer=pt.components.SMI(train_ds),
-        lr=0.01,
-    )
-
-    # Initialize the model
-    model = pt.models.GMLVQ(hparams)
-
-    # Setup trainer
-    trainer = pl.Trainer(max_epochs=100)
-
-    # Training loop
-    trainer.fit(model, train_loader)
-
-    # Display the Lambda matrix
-    model.show_lambda()
--- a/examples/gmlvq_mnist.py
+++ b/examples/gmlvq_mnist.py
@@ -0,0 +1,101 @@
+"""GMLVQ example using the MNIST dataset."""
+
+import argparse
+
+import prototorch as pt
+import pytorch_lightning as pl
+import torch
+from torchvision import transforms
+from torchvision.datasets import MNIST
+
+if __name__ == "__main__":
+    # Command-line arguments
+    parser = argparse.ArgumentParser()
+    parser = pl.Trainer.add_argparse_args(parser)
+    args = parser.parse_args()
+
+    # Dataset
+    train_ds = MNIST(
+        "~/datasets",
+        train=True,
+        download=True,
+        transform=transforms.Compose([
+            transforms.ToTensor(),
+        ]),
+    )
+    test_ds = MNIST(
+        "~/datasets",
+        train=False,
+        download=True,
+        transform=transforms.Compose([
+            transforms.ToTensor(),
+        ]),
+    )
+
+    # Dataloaders
+    train_loader = torch.utils.data.DataLoader(train_ds,
+                                               num_workers=0,
+                                               batch_size=256)
+    test_loader = torch.utils.data.DataLoader(test_ds,
+                                              num_workers=0,
+                                              batch_size=256)
+
+    # Hyperparameters
+    num_classes = 10
+    prototypes_per_class = 10
+    hparams = dict(
+        input_dim=28 * 28,
+        latent_dim=28 * 28,
+        distribution=(num_classes, prototypes_per_class),
+        proto_lr=0.01,
+        bb_lr=0.01,
+    )
+
+    # Initialize the model
+    model = pt.models.ImageGMLVQ(
+        hparams,
+        optimizer=torch.optim.Adam,
+        prototypes_initializer=pt.initializers.SMCI(train_ds),
+    )
+
+    # Callbacks
+    vis = pt.models.VisImgComp(
+        data=train_ds,
+        num_columns=10,
+        show=False,
+        tensorboard=True,
+        random_data=100,
+        add_embedding=True,
+        embedding_data=200,
+        flatten_data=False,
+    )
+    pruning = pt.models.PruneLoserPrototypes(
+        threshold=0.01,
+        idle_epochs=1,
+        prune_quota_per_epoch=10,
+        frequency=1,
+        verbose=True,
+    )
+    es = pl.callbacks.EarlyStopping(
+        monitor="train_loss",
+        min_delta=0.001,
+        patience=15,
+        mode="min",
+        check_on_train_epoch_end=True,
+    )
+
+    # Setup trainer
+    trainer = pl.Trainer.from_argparse_args(
+        args,
+        callbacks=[
+            vis,
+            pruning,
+            # es,
+        ],
+        terminate_on_nan=True,
+        weights_summary=None,
+        # accelerator="ddp",
+    )
+
+    # Training loop
+    trainer.fit(model, train_loader)
--- a/examples/gng_iris.py
+++ b/examples/gng_iris.py
@@ -0,0 +1,53 @@
+"""Growing Neural Gas example using the Iris dataset."""
+
+import argparse
+
+import prototorch as pt
+import pytorch_lightning as pl
+import torch
+
+if __name__ == "__main__":
+    # Command-line arguments
+    parser = argparse.ArgumentParser()
+    parser = pl.Trainer.add_argparse_args(parser)
+    args = parser.parse_args()
+
+    # Reproducibility
+    pl.utilities.seed.seed_everything(seed=42)
+
+    # Prepare the data
+    train_ds = pt.datasets.Iris(dims=[0, 2])
+    train_loader = torch.utils.data.DataLoader(train_ds, batch_size=64)
+
+    # Hyperparameters
+    hparams = dict(
+        num_prototypes=5,
+        input_dim=2,
+        lr=0.1,
+    )
+
+    # Initialize the model
+    model = pt.models.GrowingNeuralGas(
+        hparams,
+        prototypes_initializer=pt.initializers.ZCI(2),
+    )
+
+    # Compute intermediate input and output sizes
+    model.example_input_array = torch.zeros(4, 2)
+
+    # Model summary
+    print(model)
+
+    # Callbacks
+    vis = pt.models.VisNG2D(data=train_loader)
+
+    # Setup trainer
+    trainer = pl.Trainer.from_argparse_args(
+        args,
+        max_epochs=100,
+        callbacks=[vis],
+        weights_summary="full",
+    )
+
+    # Training loop
+    trainer.fit(model, train_loader)
--- a/examples/knn_iris.py
+++ b/examples/knn_iris.py
@@ -0,0 +1,58 @@
+"""k-NN example using the Iris dataset from scikit-learn."""
+
+import argparse
+
+import pytorch_lightning as pl
+import torch
+from sklearn.datasets import load_iris
+
+import prototorch as pt
+
+if __name__ == "__main__":
+    # Command-line arguments
+    parser = argparse.ArgumentParser()
+    parser = pl.Trainer.add_argparse_args(parser)
+    args = parser.parse_args()
+
+    # Dataset
+    x_train, y_train = load_iris(return_X_y=True)
+    x_train = x_train[:, [0, 2]]
+    train_ds = pt.datasets.NumpyDataset(x_train, y_train)
+
+    # Dataloaders
+    train_loader = torch.utils.data.DataLoader(train_ds, batch_size=150)
+
+    # Hyperparameters
+    hparams = dict(k=5)
+
+    # Initialize the model
+    model = pt.models.KNN(hparams, data=train_ds)
+
+    # Compute intermediate input and output sizes
+    model.example_input_array = torch.zeros(4, 2)
+
+    # Summary
+    print(model)
+
+    # Callbacks
+    vis = pt.models.VisGLVQ2D(
+        data=(x_train, y_train),
+        resolution=200,
+        block=True,
+    )
+
+    # Setup trainer
+    trainer = pl.Trainer.from_argparse_args(
+        args,
+        max_epochs=1,
+        callbacks=[vis],
+        weights_summary="full",
+    )
+
+    # Training loop
+    # This is only for visualization. k-NN has no training phase.
+    trainer.fit(model, train_loader)
+
+    # Recall
+    y_pred = model.predict(torch.tensor(x_train))
+    print(y_pred)
--- a/examples/ksom_colors.py
+++ b/examples/ksom_colors.py
@@ -0,0 +1,103 @@
+"""Kohonen Self Organizing Map."""
+
+import argparse
+
+import prototorch as pt
+import pytorch_lightning as pl
+import torch
+from matplotlib import pyplot as plt
+from prototorch.utils.colors import hex_to_rgb
+
+
+class Vis2DColorSOM(pl.Callback):
+    def __init__(self, data, title="ColorSOMe", pause_time=0.1):
+        super().__init__()
+        self.title = title
+        self.fig = plt.figure(self.title)
+        self.data = data
+        self.pause_time = pause_time
+
+    def on_epoch_end(self, trainer, pl_module):
+        ax = self.fig.gca()
+        ax.cla()
+        ax.set_title(self.title)
+        h, w = pl_module._grid.shape[:2]
+        protos = pl_module.prototypes.view(h, w, 3)
+        ax.imshow(protos)
+        ax.axis("off")
+
+        # Overlay color names
+        d = pl_module.compute_distances(self.data)
+        wp = pl_module.predict_from_distances(d)
+        for i, iloc in enumerate(wp):
+            plt.text(iloc[1],
+                     iloc[0],
+                     cnames[i],
+                     ha="center",
+                     va="center",
+                     bbox=dict(facecolor="white", alpha=0.5, lw=0))
+
+        if trainer.current_epoch != trainer.max_epochs - 1:
+            plt.pause(self.pause_time)
+        else:
+            plt.show(block=True)
+
+
+if __name__ == "__main__":
+    # Command-line arguments
+    parser = argparse.ArgumentParser()
+    parser = pl.Trainer.add_argparse_args(parser)
+    args = parser.parse_args()
+
+    # Reproducibility
+    pl.utilities.seed.seed_everything(seed=42)
+
+    # Prepare the data
+    hex_colors = [
+        "#000000", "#0000ff", "#00007f", "#1f86ff", "#5466aa", "#997fff",
+        "#00ff00", "#ff0000", "#00ffff", "#ff00ff", "#ffff00", "#ffffff",
+        "#545454", "#7f7f7f", "#a8a8a8", "#808000", "#800080", "#ffa500"
+    ]
+    cnames = [
+        "black", "blue", "darkblue", "skyblue", "greyblue", "lilac", "green",
+        "red", "cyan", "magenta", "yellow", "white", "darkgrey", "mediumgrey",
+        "lightgrey", "olive", "purple", "orange"
+    ]
+    colors = list(hex_to_rgb(hex_colors))
+    data = torch.Tensor(colors) / 255.0
+    train_ds = torch.utils.data.TensorDataset(data)
+    train_loader = torch.utils.data.DataLoader(train_ds, batch_size=8)
+
+    # Hyperparameters
+    hparams = dict(
+        shape=(18, 32),
+        alpha=1.0,
+        sigma=16,
+        lr=0.1,
+    )
+
+    # Initialize the model
+    model = pt.models.KohonenSOM(
+        hparams,
+        prototypes_initializer=pt.initializers.RNCI(3),
+    )
+
+    # Compute intermediate input and output sizes
+    model.example_input_array = torch.zeros(4, 3)
+
+    # Model summary
+    print(model)
+
+    # Callbacks
+    vis = Vis2DColorSOM(data=data)
+
+    # Setup trainer
+    trainer = pl.Trainer.from_argparse_args(
+        args,
+        max_epochs=500,
+        callbacks=[vis],
+        weights_summary="full",
+    )
+
+    # Training loop
+    trainer.fit(model, train_loader)
--- a/examples/lgmlvq_moons.py
+++ b/examples/lgmlvq_moons.py
@@ -0,0 +1,68 @@
+"""Localized-GMLVQ example using the Moons dataset."""
+
+import argparse
+
+import prototorch as pt
+import pytorch_lightning as pl
+import torch
+
+if __name__ == "__main__":
+    # Command-line arguments
+    parser = argparse.ArgumentParser()
+    parser = pl.Trainer.add_argparse_args(parser)
+    args = parser.parse_args()
+
+    # Reproducibility
+    pl.utilities.seed.seed_everything(seed=2)
+
+    # Dataset
+    train_ds = pt.datasets.Moons(num_samples=300, noise=0.2, seed=42)
+
+    # Dataloaders
+    train_loader = torch.utils.data.DataLoader(train_ds,
+                                               batch_size=256,
+                                               shuffle=True)
+
+    # Hyperparameters
+    hparams = dict(
+        distribution=[1, 3],
+        input_dim=2,
+        latent_dim=2,
+    )
+
+    # Initialize the model
+    model = pt.models.LGMLVQ(
+        hparams,
+        prototypes_initializer=pt.initializers.SMCI(train_ds),
+    )
+
+    # Compute intermediate input and output sizes
+    model.example_input_array = torch.zeros(4, 2)
+
+    # Summary
+    print(model)
+
+    # Callbacks
+    vis = pt.models.VisGLVQ2D(data=train_ds)
+    es = pl.callbacks.EarlyStopping(
+        monitor="train_acc",
+        min_delta=0.001,
+        patience=20,
+        mode="max",
+        verbose=False,
+        check_on_train_epoch_end=True,
+    )
+
+    # Setup trainer
+    trainer = pl.Trainer.from_argparse_args(
+        args,
+        callbacks=[
+            vis,
+            es,
+        ],
+        weights_summary="full",
+        accelerator="ddp",
+    )
+
+    # Training loop
+    trainer.fit(model, train_loader)
--- a/examples/liramlvq_tecator.py
+++ b/examples/liramlvq_tecator.py
@@ -1,48 +0,0 @@
-"""Limited Rank Matrix LVQ example using the Tecator dataset."""
-
-import prototorch as pt
-import pytorch_lightning as pl
-import torch
-
-if __name__ == "__main__":
-    # Dataset
-    train_ds = pt.datasets.Tecator(root="~/datasets/", train=True)
-
-    # Reproducibility
-    pl.utilities.seed.seed_everything(seed=42)
-
-    # Dataloaders
-    train_loader = torch.utils.data.DataLoader(train_ds,
-                                               num_workers=0,
-                                               batch_size=32)
-
-    # Hyperparameters
-    hparams = dict(
-        nclasses=2,
-        prototypes_per_class=2,
-        input_dim=100,
-        latent_dim=2,
-        prototype_initializer=pt.components.SMI(train_ds),
-        lr=0.001,
-    )
-
-    # Initialize the model
-    model = pt.models.GMLVQ(hparams)
-
-    # Callbacks
-    vis = pt.models.VisSiameseGLVQ2D(train_ds, border=0.1)
-
-    # Setup trainer
-    trainer = pl.Trainer(max_epochs=200, callbacks=[vis])
-
-    # Training loop
-    trainer.fit(model, train_loader)
-
-    # Save the model
-    torch.save(model, "liramlvq_tecator.pt")
-
-    # Load a saved model
-    saved_model = torch.load("liramlvq_tecator.pt")
-
-    # Display the Lambda matrix
-    saved_model.show_lambda()
--- a/examples/lvq_iris.py
+++ b/examples/lvq_iris.py
@@ -1,42 +0,0 @@
-"""Classical LVQ using GLVQ example on the Iris dataset."""
-
-import prototorch as pt
-import pytorch_lightning as pl
-import torch
-
-if __name__ == "__main__":
-    # Dataset
-    from sklearn.datasets import load_iris
-    x_train, y_train = load_iris(return_X_y=True)
-    x_train = x_train[:, [0, 2]]
-    train_ds = pt.datasets.NumpyDataset(x_train, y_train)
-
-    # Dataloaders
-    train_loader = torch.utils.data.DataLoader(train_ds,
-                                               num_workers=0,
-                                               batch_size=150)
-
-    # Hyperparameters
-    hparams = dict(
-        nclasses=3,
-        prototypes_per_class=2,
-        prototype_initializer=pt.components.SMI(train_ds),
-        #prototype_initializer=pt.components.Random(2),
-        lr=0.005,
-    )
-
-    # Initialize the model
-    model = pt.models.LVQ1(hparams)
-    #model = pt.models.LVQ21(hparams)
-
-    # Callbacks
-    vis = pt.models.VisGLVQ2D(data=(x_train, y_train))
-
-    # Setup trainer
-    trainer = pl.Trainer(
-        max_epochs=200,
-        callbacks=[vis],
-    )
-
-    # Training loop
-    trainer.fit(model, train_loader)
--- a/examples/lvqmln_iris.py
+++ b/examples/lvqmln_iris.py
@@ -0,0 +1,90 @@
+"""LVQMLN example using all four dimensions of the Iris dataset."""
+
+import argparse
+
+import prototorch as pt
+import pytorch_lightning as pl
+import torch
+
+
+class Backbone(torch.nn.Module):
+    def __init__(self, input_size=4, hidden_size=10, latent_size=2):
+        super().__init__()
+        self.input_size = input_size
+        self.hidden_size = hidden_size
+        self.latent_size = latent_size
+        self.dense1 = torch.nn.Linear(self.input_size, self.hidden_size)
+        self.dense2 = torch.nn.Linear(self.hidden_size, self.latent_size)
+        self.activation = torch.nn.Sigmoid()
+
+    def forward(self, x):
+        x = self.activation(self.dense1(x))
+        out = self.activation(self.dense2(x))
+        return out
+
+
+if __name__ == "__main__":
+    # Command-line arguments
+    parser = argparse.ArgumentParser()
+    parser = pl.Trainer.add_argparse_args(parser)
+    args = parser.parse_args()
+
+    # Dataset
+    train_ds = pt.datasets.Iris()
+
+    # Reproducibility
+    pl.utilities.seed.seed_everything(seed=42)
+
+    # Dataloaders
+    train_loader = torch.utils.data.DataLoader(train_ds, batch_size=150)
+
+    # Hyperparameters
+    hparams = dict(
+        distribution=[3, 4, 5],
+        proto_lr=0.001,
+        bb_lr=0.001,
+    )
+
+    # Initialize the backbone
+    backbone = Backbone()
+
+    # Initialize the model
+    model = pt.models.LVQMLN(
+        hparams,
+        prototypes_initializer=pt.initializers.SSCI(
+            train_ds,
+            transform=backbone,
+        ),
+        backbone=backbone,
+    )
+
+    # Model summary
+    print(model)
+
+    # Callbacks
+    vis = pt.models.VisSiameseGLVQ2D(
+        data=train_ds,
+        map_protos=False,
+        border=0.1,
+        resolution=500,
+        axis_off=True,
+    )
+    pruning = pt.models.PruneLoserPrototypes(
+        threshold=0.01,
+        idle_epochs=20,
+        prune_quota_per_epoch=2,
+        frequency=10,
+        verbose=True,
+    )
+
+    # Setup trainer
+    trainer = pl.Trainer.from_argparse_args(
+        args,
+        callbacks=[
+            vis,
+            pruning,
+        ],
+    )
+
+    # Training loop
+    trainer.fit(model, train_loader)
--- a/examples/ng_iris.py
+++ b/examples/ng_iris.py
@@ -1,13 +1,21 @@
 """Neural Gas example using the Iris dataset."""

+import argparse
+
 import prototorch as pt
 import pytorch_lightning as pl
 import torch
+from sklearn.datasets import load_iris
+from sklearn.preprocessing import StandardScaler
+from torch.optim.lr_scheduler import ExponentialLR

 if __name__ == "__main__":
+    # Command-line arguments
+    parser = argparse.ArgumentParser()
+    parser = pl.Trainer.add_argparse_args(parser)
+    args = parser.parse_args()
+
    # Prepare and pre-process the dataset
-    from sklearn.datasets import load_iris
-    from sklearn.preprocessing import StandardScaler
    x_train, y_train = load_iris(return_X_y=True)
    x_train = x_train[:, [0, 2]]
    scaler = StandardScaler()
@@ -17,15 +25,25 @@ if __name__ == "__main__":
    train_ds = pt.datasets.NumpyDataset(x_train, y_train)

    # Dataloaders
-    train_loader = torch.utils.data.DataLoader(train_ds,
-                                               num_workers=0,
-                                               batch_size=150)
+    train_loader = torch.utils.data.DataLoader(train_ds, batch_size=150)

    # Hyperparameters
-    hparams = dict(num_prototypes=30, lr=0.03)
+    hparams = dict(
+        num_prototypes=30,
+        input_dim=2,
+        lr=0.03,
+    )

    # Initialize the model
-    model = pt.models.NeuralGas(hparams)
+    model = pt.models.NeuralGas(
+        hparams,
+        prototypes_initializer=pt.core.ZCI(2),
+        lr_scheduler=ExponentialLR,
+        lr_scheduler_kwargs=dict(gamma=0.99, verbose=False),
+    )
+
+    # Compute intermediate input and output sizes
+    model.example_input_array = torch.zeros(4, 2)

    # Model summary
    print(model)
@@ -34,7 +52,11 @@ if __name__ == "__main__":
    vis = pt.models.VisNG2D(data=train_ds)

    # Setup trainer
-    trainer = pl.Trainer(max_epochs=200, callbacks=[vis])
+    trainer = pl.Trainer.from_argparse_args(
+        args,
+        callbacks=[vis],
+        weights_summary="full",
+    )

    # Training loop
    trainer.fit(model, train_loader)
--- a/examples/rslvq_iris.py
+++ b/examples/rslvq_iris.py
@@ -0,0 +1,61 @@
+"""RSLVQ example using the Iris dataset."""
+
+import argparse
+
+import prototorch as pt
+import pytorch_lightning as pl
+import torch
+
+if __name__ == "__main__":
+    # Command-line arguments
+    parser = argparse.ArgumentParser()
+    parser = pl.Trainer.add_argparse_args(parser)
+    args = parser.parse_args()
+
+    # Reproducibility
+    pl.utilities.seed.seed_everything(seed=42)
+
+    # Dataset
+    train_ds = pt.datasets.Iris(dims=[0, 2])
+
+    # Dataloaders
+    train_loader = torch.utils.data.DataLoader(train_ds, batch_size=64)
+
+    # Hyperparameters
+    hparams = dict(
+        distribution=[2, 2, 3],
+        proto_lr=0.05,
+        lambd=0.1,
+        variance=1.0,
+        input_dim=2,
+        latent_dim=2,
+        bb_lr=0.01,
+    )
+
+    # Initialize the model
+    model = pt.models.RSLVQ(
+        hparams,
+        optimizer=torch.optim.Adam,
+        prototypes_initializer=pt.initializers.SSCI(train_ds, noise=0.2),
+    )
+
+    # Compute intermediate input and output sizes
+    model.example_input_array = torch.zeros(4, 2)
+
+    # Summary
+    print(model)
+
+    # Callbacks
+    vis = pt.models.VisGLVQ2D(data=train_ds)
+
+    # Setup trainer
+    trainer = pl.Trainer.from_argparse_args(
+        args,
+        callbacks=[vis],
+        terminate_on_nan=True,
+        weights_summary="full",
+        accelerator="ddp",
+    )
+
+    # Training loop
+    trainer.fit(model, train_loader)
--- a/examples/siamese_glvq_iris.py
+++ b/examples/siamese_glvq_iris.py
@@ -1,12 +1,13 @@
 """Siamese GLVQ example using all four dimensions of the Iris dataset."""

+import argparse
+
 import prototorch as pt
 import pytorch_lightning as pl
 import torch


 class Backbone(torch.nn.Module):
-    """Two fully connected layers with ReLU activation."""
    def __init__(self, input_size=4, hidden_size=10, latent_size=2):
        super().__init__()
        self.input_size = input_size
@@ -14,51 +15,58 @@ class Backbone(torch.nn.Module):
        self.latent_size = latent_size
        self.dense1 = torch.nn.Linear(self.input_size, self.hidden_size)
        self.dense2 = torch.nn.Linear(self.hidden_size, self.latent_size)
-        self.relu = torch.nn.ReLU()
+        self.activation = torch.nn.Sigmoid()

    def forward(self, x):
-        x = self.relu(self.dense1(x))
-        out = self.relu(self.dense2(x))
+        x = self.activation(self.dense1(x))
+        out = self.activation(self.dense2(x))
        return out


 if __name__ == "__main__":
+    # Command-line arguments
+    parser = argparse.ArgumentParser()
+    parser = pl.Trainer.add_argparse_args(parser)
+    args = parser.parse_args()
+
    # Dataset
-    from sklearn.datasets import load_iris
-    x_train, y_train = load_iris(return_X_y=True)
-    train_ds = pt.datasets.NumpyDataset(x_train, y_train)
+    train_ds = pt.datasets.Iris()

    # Reproducibility
    pl.utilities.seed.seed_everything(seed=2)

    # Dataloaders
-    train_loader = torch.utils.data.DataLoader(train_ds,
-                                               num_workers=0,
-                                               batch_size=150)
+    train_loader = torch.utils.data.DataLoader(train_ds, batch_size=150)

    # Hyperparameters
    hparams = dict(
-        nclasses=3,
-        prototypes_per_class=2,
-        prototype_initializer=pt.components.SMI((x_train, y_train)),
-        proto_lr=0.001,
-        bb_lr=0.001,
+        distribution=[1, 2, 3],
+        proto_lr=0.01,
+        bb_lr=0.01,
    )

+    # Initialize the backbone
+    backbone = Backbone()
+
    # Initialize the model
    model = pt.models.SiameseGLVQ(
        hparams,
-        backbone_module=Backbone,
+        prototypes_initializer=pt.initializers.SMCI(train_ds),
+        backbone=backbone,
+        both_path_gradients=False,
    )

    # Model summary
    print(model)

    # Callbacks
-    vis = pt.models.VisSiameseGLVQ2D(data=(x_train, y_train), border=0.1)
+    vis = pt.models.VisSiameseGLVQ2D(data=train_ds, border=0.1)

    # Setup trainer
-    trainer = pl.Trainer(max_epochs=100, callbacks=[vis])
+    trainer = pl.Trainer.from_argparse_args(
+        args,
+        callbacks=[vis],
+    )

    # Training loop
    trainer.fit(model, train_loader)
--- a/examples/warm_starting.py
+++ b/examples/warm_starting.py
@@ -0,0 +1,84 @@
+"""Warm-starting GLVQ with prototypes from Growing Neural Gas."""
+
+import argparse
+
+import prototorch as pt
+import pytorch_lightning as pl
+import torch
+from torch.optim.lr_scheduler import ExponentialLR
+
+if __name__ == "__main__":
+    # Command-line arguments
+    parser = argparse.ArgumentParser()
+    parser = pl.Trainer.add_argparse_args(parser)
+    args = parser.parse_args()
+
+    # Prepare the data
+    train_ds = pt.datasets.Iris(dims=[0, 2])
+    train_loader = torch.utils.data.DataLoader(train_ds, batch_size=64)
+
+    # Initialize the gng
+    gng = pt.models.GrowingNeuralGas(
+        hparams=dict(num_prototypes=5, insert_freq=2, lr=0.1),
+        prototypes_initializer=pt.initializers.ZCI(2),
+        lr_scheduler=ExponentialLR,
+        lr_scheduler_kwargs=dict(gamma=0.99, verbose=False),
+    )
+
+    # Callbacks
+    es = pl.callbacks.EarlyStopping(
+        monitor="loss",
+        min_delta=0.001,
+        patience=20,
+        mode="min",
+        verbose=False,
+        check_on_train_epoch_end=True,
+    )
+
+    # Setup trainer for GNG
+    trainer = pl.Trainer(
+        max_epochs=200,
+        callbacks=[es],
+        weights_summary=None,
+    )
+
+    # Training loop
+    trainer.fit(gng, train_loader)
+
+    # Hyperparameters
+    hparams = dict(
+        distribution=[],
+        lr=0.01,
+    )
+
+    # Warm-start prototypes
+    knn = pt.models.KNN(dict(k=1), data=train_ds)
+    prototypes = gng.prototypes
+    plabels = knn.predict(prototypes)
+
+    # Initialize the model
+    model = pt.models.GLVQ(
+        hparams,
+        optimizer=torch.optim.Adam,
+        prototypes_initializer=pt.initializers.LCI(prototypes),
+        labels_initializer=pt.initializers.LLI(plabels),
+        lr_scheduler=ExponentialLR,
+        lr_scheduler_kwargs=dict(gamma=0.99, verbose=False),
+    )
+
+    # Compute intermediate input and output sizes
+    model.example_input_array = torch.zeros(4, 2)
+
+    # Callbacks
+    vis = pt.models.VisGLVQ2D(data=train_ds)
+
+    # Setup trainer
+    trainer = pl.Trainer.from_argparse_args(
+        args,
+        callbacks=[vis],
+        weights_summary="full",
+        accelerator="ddp",
+    )
+
+    # Training loop
+    trainer.fit(model, train_loader)
--- a/prototorch/models/init.py
+++ b/prototorch/models/init.py
@@ -1,8 +1,26 @@
+"""`models` plugin for the `prototorch` package."""
+
 from importlib.metadata import PackageNotFoundError, version

-from .cbc import CBC
-from .glvq import GLVQ, GMLVQ, GRLVQ, LVQMLN, ImageGLVQ, SiameseGLVQ, LVQ1, LVQ21
-from .neural_gas import NeuralGas
+from .callbacks import PrototypeConvergence, PruneLoserPrototypes
+from .cbc import CBC, ImageCBC
+from .glvq import (
+    GLVQ,
+    GLVQ1,
+    GLVQ21,
+    GMLVQ,
+    GRLVQ,
+    LGMLVQ,
+    LVQMLN,
+    ImageGLVQ,
+    ImageGMLVQ,
+    SiameseGLVQ,
+    SiameseGMLVQ,
+)
+from .knn import KNN
+from .lvq import LVQ1, LVQ21, MedianLVQ
+from .probabilistic import CELVQ, PLVQ, RSLVQ, SLVQ
+from .unsupervised import GrowingNeuralGas, HeskesSOM, KohonenSOM, NeuralGas
 from .vis import *

-__version__ = "0.1.6"
+__version__ = "0.2.0"
--- a/prototorch/models/abstract.py
+++ b/prototorch/models/abstract.py
@@ -1,23 +1,192 @@
+"""Abstract classes to be inherited by prototorch models."""
+
+from typing import Final, final
+
 import pytorch_lightning as pl
 import torch
-from torch.optim.lr_scheduler import ExponentialLR
+import torchmetrics
+
+from ..core.competitions import WTAC
+from ..core.components import Components, LabeledComponents
+from ..core.distances import euclidean_distance
+from ..core.initializers import LabelsInitializer
+from ..core.pooling import stratified_min_pooling
+from ..nn.wrappers import LambdaLayer


-class AbstractLightningModel(pl.LightningModule):
-    def configure_optimizers(self):
-        optimizer = torch.optim.Adam(self.parameters(), lr=self.hparams.lr)
-        scheduler = ExponentialLR(optimizer,
-                                  gamma=0.99,
-                                  last_epoch=-1,
-                                  verbose=False)
-        sch = {
-            "scheduler": scheduler,
-            "interval": "step",
-        }  # called after each training step
-        return [optimizer], [sch]
+class ProtoTorchMixin(object):
+    pass


-class AbstractPrototypeModel(AbstractLightningModel):
+class ProtoTorchBolt(pl.LightningModule):
+    """All ProtoTorch models are ProtoTorch Bolts."""
+    def __repr__(self):
+        surep = super().__repr__()
+        indented = "".join([f"\t{line}\n" for line in surep.splitlines()])
+        wrapped = f"ProtoTorch Bolt(\n{indented})"
+        return wrapped
+
+
+class PrototypeModel(ProtoTorchBolt):
+    def __init__(self, hparams, **kwargs):
+        super().__init__()
+
+        # Hyperparameters
+        self.save_hyperparameters(hparams)
+
+        # Default hparams
+        self.hparams.setdefault("lr", 0.01)
+
+        # Default config
+        self.optimizer = kwargs.get("optimizer", torch.optim.Adam)
+        self.lr_scheduler = kwargs.get("lr_scheduler", None)
+        self.lr_scheduler_kwargs = kwargs.get("lr_scheduler_kwargs", dict())
+
+        distance_fn = kwargs.get("distance_fn", euclidean_distance)
+        self.distance_layer = LambdaLayer(distance_fn)
+
+    @property
+    def num_prototypes(self):
+        return len(self.proto_layer.components)
+
    @property
    def prototypes(self):
        return self.proto_layer.components.detach().cpu()
+
+    @property
+    def components(self):
+        """Only an alias for the prototypes."""
+        return self.prototypes
+
+    def configure_optimizers(self):
+        optimizer = self.optimizer(self.parameters(), lr=self.hparams.lr)
+        if self.lr_scheduler is not None:
+            scheduler = self.lr_scheduler(optimizer,
+                                          **self.lr_scheduler_kwargs)
+            sch = {
+                "scheduler": scheduler,
+                "interval": "step",
+            }  # called after each training step
+            return [optimizer], [sch]
+        else:
+            return optimizer
+
+    @final
+    def reconfigure_optimizers(self):
+        self.trainer.accelerator_backend.setup_optimizers(self.trainer)
+
+    def add_prototypes(self, *args, **kwargs):
+        self.proto_layer.add_components(*args, **kwargs)
+        self.reconfigure_optimizers()
+
+    def remove_prototypes(self, indices):
+        self.proto_layer.remove_components(indices)
+        self.reconfigure_optimizers()
+
+
+class UnsupervisedPrototypeModel(PrototypeModel):
+    def __init__(self, hparams, **kwargs):
+        super().__init__(hparams, **kwargs)
+
+        # Layers
+        prototypes_initializer = kwargs.get("prototypes_initializer", None)
+        if prototypes_initializer is not None:
+            self.proto_layer = Components(
+                self.hparams.num_prototypes,
+                initializer=prototypes_initializer,
+            )
+
+    def compute_distances(self, x):
+        protos = self.proto_layer()
+        distances = self.distance_layer(x, protos)
+        return distances
+
+    def forward(self, x):
+        distances = self.compute_distances(x)
+        return distances
+
+
+class SupervisedPrototypeModel(PrototypeModel):
+    def __init__(self, hparams, **kwargs):
+        super().__init__(hparams, **kwargs)
+
+        # Layers
+        prototypes_initializer = kwargs.get("prototypes_initializer", None)
+        labels_initializer = kwargs.get("labels_initializer",
+                                        LabelsInitializer())
+        if prototypes_initializer is not None:
+            self.proto_layer = LabeledComponents(
+                distribution=self.hparams.distribution,
+                components_initializer=prototypes_initializer,
+                labels_initializer=labels_initializer,
+            )
+        self.competition_layer = WTAC()
+
+    @property
+    def prototype_labels(self):
+        return self.proto_layer.labels.detach().cpu()
+
+    @property
+    def num_classes(self):
+        return self.proto_layer.num_classes
+
+    def compute_distances(self, x):
+        protos, _ = self.proto_layer()
+        distances = self.distance_layer(x, protos)
+        return distances
+
+    def forward(self, x):
+        distances = self.compute_distances(x)
+        plabels = self.proto_layer.labels
+        winning = stratified_min_pooling(distances, plabels)
+        y_pred = torch.nn.functional.softmin(winning)
+        return y_pred
+
+    def predict_from_distances(self, distances):
+        with torch.no_grad():
+            plabels = self.proto_layer.labels
+            y_pred = self.competition_layer(distances, plabels)
+        return y_pred
+
+    def predict(self, x):
+        with torch.no_grad():
+            distances = self.compute_distances(x)
+        y_pred = self.predict_from_distances(distances)
+        return y_pred
+
+    def log_acc(self, distances, targets, tag):
+        preds = self.predict_from_distances(distances)
+        accuracy = torchmetrics.functional.accuracy(preds.int(), targets.int())
+        # `.int()` because FloatTensors are assumed to be class probabilities
+
+        self.log(tag,
+                 accuracy,
+                 on_step=False,
+                 on_epoch=True,
+                 prog_bar=True,
+                 logger=True)
+
+
+class NonGradientMixin(ProtoTorchMixin):
+    """Mixin for custom non-gradient optimization."""
+    def __init__(self, *args, **kwargs):
+        super().__init__(*args, **kwargs)
+        self.automatic_optimization: Final = False
+
+    def training_step(self, train_batch, batch_idx, optimizer_idx=None):
+        raise NotImplementedError
+
+
+class ImagePrototypesMixin(ProtoTorchMixin):
+    """Mixin for models with image prototypes."""
+    @final
+    def on_train_batch_end(self, outputs, batch, batch_idx, dataloader_idx):
+        """Constrain the components to the range [0, 1] by clamping after updates."""
+        self.proto_layer.components.data.clamp_(0.0, 1.0)
+
+    def get_prototype_grid(self, num_columns=2, return_channels_last=True):
+        from torchvision.utils import make_grid
+        grid = make_grid(self.components, nrow=num_columns)
+        if return_channels_last:
+            grid = grid.permute((1, 2, 0))
+        return grid.cpu()
--- a/prototorch/models/callbacks.py
+++ b/prototorch/models/callbacks.py
@@ -0,0 +1,137 @@
+"""Lightning Callbacks."""
+
+import logging
+
+import pytorch_lightning as pl
+import torch
+
+from ..core.components import Components
+from ..core.initializers import LiteralCompInitializer
+from .extras import ConnectionTopology
+
+
+class PruneLoserPrototypes(pl.Callback):
+    def __init__(self,
+                 threshold=0.01,
+                 idle_epochs=10,
+                 prune_quota_per_epoch=-1,
+                 frequency=1,
+                 replace=False,
+                 prototypes_initializer=None,
+                 verbose=False):
+        self.threshold = threshold  # minimum win ratio
+        self.idle_epochs = idle_epochs  # epochs to wait before pruning
+        self.prune_quota_per_epoch = prune_quota_per_epoch
+        self.frequency = frequency
+        self.replace = replace
+        self.verbose = verbose
+        self.prototypes_initializer = prototypes_initializer
+
+    def on_epoch_end(self, trainer, pl_module):
+        if (trainer.current_epoch + 1) < self.idle_epochs:
+            return None
+        if (trainer.current_epoch + 1) % self.frequency:
+            return None
+
+        ratios = pl_module.prototype_win_ratios.mean(dim=0)
+        to_prune = torch.arange(len(ratios))[ratios < self.threshold]
+        to_prune = to_prune.tolist()
+        prune_labels = pl_module.prototype_labels[to_prune]
+        if self.prune_quota_per_epoch > 0:
+            to_prune = to_prune[:self.prune_quota_per_epoch]
+            prune_labels = prune_labels[:self.prune_quota_per_epoch]
+
+        if len(to_prune) > 0:
+            if self.verbose:
+                print(f"\nPrototype win ratios: {ratios}")
+                print(f"Pruning prototypes at: {to_prune}")
+                print(f"Corresponding labels are: {prune_labels.tolist()}")
+            cur_num_protos = pl_module.num_prototypes
+            pl_module.remove_prototypes(indices=to_prune)
+            if self.replace:
+                labels, counts = torch.unique(prune_labels,
+                                              sorted=True,
+                                              return_counts=True)
+                distribution = dict(zip(labels.tolist(), counts.tolist()))
+                if self.verbose:
+                    print(f"Re-adding pruned prototypes...")
+                    print(f"{distribution=}")
+                pl_module.add_prototypes(
+                    distribution=distribution,
+                    components_initializer=self.prototypes_initializer)
+            new_num_protos = pl_module.num_prototypes
+            if self.verbose:
+                print(f"`num_prototypes` changed from {cur_num_protos} "
+                      f"to {new_num_protos}.")
+        return True
+
+
+class PrototypeConvergence(pl.Callback):
+    def __init__(self, min_delta=0.01, idle_epochs=10, verbose=False):
+        self.min_delta = min_delta
+        self.idle_epochs = idle_epochs  # epochs to wait
+        self.verbose = verbose
+
+    def on_epoch_end(self, trainer, pl_module):
+        if (trainer.current_epoch + 1) < self.idle_epochs:
+            return None
+        if self.verbose:
+            print("Stopping...")
+        # TODO
+        return True
+
+
+class GNGCallback(pl.Callback):
+    """GNG Callback.
+
+    Applies growing algorithm based on accumulated error and topology.
+
+    Based on "A Growing Neural Gas Network Learns Topologies" by Bernd Fritzke.
+
+    """
+    def __init__(self, reduction=0.1, freq=10):
+        self.reduction = reduction
+        self.freq = freq
+
+    def on_epoch_end(self, trainer: pl.Trainer, pl_module):
+        if (trainer.current_epoch + 1) % self.freq == 0:
+            # Get information
+            errors = pl_module.errors
+            topology: ConnectionTopology = pl_module.topology_layer
+            components: Components = pl_module.proto_layer.components
+
+            # Insertion point
+            worst = torch.argmax(errors)
+
+            neighbors = topology.get_neighbors(worst)[0]
+
+            if len(neighbors) == 0:
+                logging.log(level=20, msg="No neighbor-pairs found!")
+                return
+
+            neighbors_errors = errors[neighbors]
+            worst_neighbor = neighbors[torch.argmax(neighbors_errors)]
+
+            # New Prototype
+            new_component = 0.5 * (components[worst] +
+                                   components[worst_neighbor])
+
+            # Add component
+            pl_module.proto_layer.add_components(
+                None,
+                initializer=LiteralCompInitializer(new_component.unsqueeze(0)))
+
+            # Adjust Topology
+            topology.add_prototype()
+            topology.add_connection(worst, -1)
+            topology.add_connection(worst_neighbor, -1)
+            topology.remove_connection(worst, worst_neighbor)
+
+            # New errors
+            worst_error = errors[worst].unsqueeze(0)
+            pl_module.errors = torch.cat([pl_module.errors, worst_error])
+            pl_module.errors[worst] = errors[worst] * self.reduction
+            pl_module.errors[
+                worst_neighbor] = errors[worst_neighbor] * self.reduction
+
+            trainer.accelerator_backend.setup_optimizers(trainer)
--- a/prototorch/models/cbc.py
+++ b/prototorch/models/cbc.py
@@ -1,165 +1,77 @@
-import pytorch_lightning as pl
 import torch
 import torchmetrics
-from prototorch.components.components import Components
-from prototorch.functions.distances import euclidean_distance
-from prototorch.functions.similarities import cosine_similarity
+
+from ..core.competitions import CBCC
+from ..core.components import ReasoningComponents
+from ..core.initializers import RandomReasoningsInitializer
+from ..core.losses import MarginLoss
+from ..core.similarities import euclidean_similarity
+from ..nn.wrappers import LambdaLayer
+from .abstract import ImagePrototypesMixin
+from .glvq import SiameseGLVQ


-def rescaled_cosine_similarity(x, y):
-    """Cosine Similarity rescaled to [0, 1]."""
-    similarities = cosine_similarity(x, y)
-    return (similarities + 1.0) / 2.0
-
-
-def shift_activation(x):
-    return (x + 1.0) / 2.0
-
-
-def euclidean_similarity(x, y):
-    d = euclidean_distance(x, y)
-    return torch.exp(-d * 3)
-
-
-class CosineSimilarity(torch.nn.Module):
-    def __init__(self, activation=shift_activation):
-        super().__init__()
-        self.activation = activation
-
-    def forward(self, x, y):
-        epsilon = torch.finfo(x.dtype).eps
-        normed_x = (x / x.pow(2).sum(dim=tuple(range(
-            1, x.ndim)), keepdim=True).clamp(min=epsilon).sqrt()).flatten(
-                start_dim=1)
-        normed_y = (y / y.pow(2).sum(dim=tuple(range(
-            1, y.ndim)), keepdim=True).clamp(min=epsilon).sqrt()).flatten(
-                start_dim=1)
-        # normed_x = (x / torch.linalg.norm(x, dim=1))
-        diss = torch.inner(normed_x, normed_y)
-        return self.activation(diss)
-
-
-class MarginLoss(torch.nn.modules.loss._Loss):
-    def __init__(self,
-                 margin=0.3,
-                 size_average=None,
-                 reduce=None,
-                 reduction="mean"):
-        super().__init__(size_average, reduce, reduction)
-        self.margin = margin
-
-    def forward(self, input_, target):
-        dp = torch.sum(target * input_, dim=-1)
-        dm = torch.max(input_ - target, dim=-1).values
-        return torch.nn.functional.relu(dm - dp + self.margin)
-
-
-class ReasoningLayer(torch.nn.Module):
-    def __init__(self, n_components, n_classes, n_replicas=1):
-        super().__init__()
-        self.n_replicas = n_replicas
-        self.n_classes = n_classes
-        probabilities_init = torch.zeros(2, 1, n_components, self.n_classes)
-        probabilities_init.uniform_(0.4, 0.6)
-        self.reasoning_probabilities = torch.nn.Parameter(probabilities_init)
-
-    @property
-    def reasonings(self):
-        pk = self.reasoning_probabilities[0]
-        nk = (1 - pk) * self.reasoning_probabilities[1]
-        ik = 1 - pk - nk
-        img = torch.cat([pk, nk, ik], dim=0).permute(1, 0, 2)
-        return img.unsqueeze(1)
-
-    def forward(self, detections):
-        pk = self.reasoning_probabilities[0].clamp(0, 1)
-        nk = (1 - pk) * self.reasoning_probabilities[1].clamp(0, 1)
-        epsilon = torch.finfo(pk.dtype).eps
-        numerator = (detections @ (pk - nk)) + nk.sum(1)
-        probs = numerator / (pk + nk).sum(1)
-        probs = probs.squeeze(0)
-        return probs
-
-
-class CBC(pl.LightningModule):
+class CBC(SiameseGLVQ):
    """Classification-By-Components."""
-    def __init__(self,
-                 hparams,
-                 margin=0.1,
-                 backbone_class=torch.nn.Identity,
-                 similarity=euclidean_similarity,
-                 **kwargs):
-        super().__init__()
-        self.save_hyperparameters(hparams)
-        self.margin = margin
-        self.component_layer = Components(self.hparams.num_components,
-                                          self.hparams.component_initializer)
-        # self.similarity = CosineSimilarity()
-        self.similarity = similarity
-        self.backbone = backbone_class()
-        self.backbone_dependent = backbone_class().requires_grad_(False)
-        n_components = self.components.shape[0]
-        self.reasoning_layer = ReasoningLayer(n_components=n_components,
-                                              n_classes=self.hparams.nclasses)
-        self.train_acc = torchmetrics.Accuracy()
+    def __init__(self, hparams, **kwargs):
+        super().__init__(hparams, **kwargs)

-    @property
-    def components(self):
-        return self.component_layer.components.detach().cpu()
+        similarity_fn = kwargs.get("similarity_fn", euclidean_similarity)
+        components_initializer = kwargs.get("components_initializer", None)
+        reasonings_initializer = kwargs.get("reasonings_initializer",
+                                            RandomReasoningsInitializer())
+        self.components_layer = ReasoningComponents(
+            self.hparams.distribution,
+            components_initializer=components_initializer,
+            reasonings_initializer=reasonings_initializer,
+        )
+        self.similarity_layer = LambdaLayer(similarity_fn)
+        self.competition_layer = CBCC()

-    @property
-    def reasonings(self):
-        return self.reasoning_layer.reasonings.cpu()
+        # Namespace hook
+        self.proto_layer = self.components_layer

-    def configure_optimizers(self):
-        optimizer = torch.optim.Adam(self.parameters(), lr=self.hparams.lr)
-        return optimizer
-
-    def sync_backbones(self):
-        master_state = self.backbone.state_dict()
-        self.backbone_dependent.load_state_dict(master_state, strict=True)
+        self.loss = MarginLoss(self.hparams.margin)

    def forward(self, x):
-        self.sync_backbones()
-        protos = self.component_layer()
-
+        components, reasonings = self.components_layer()
        latent_x = self.backbone(x)
-        latent_protos = self.backbone_dependent(protos)
-
-        detections = self.similarity(latent_x, latent_protos)
-        probs = self.reasoning_layer(detections)
+        self.backbone.requires_grad_(self.both_path_gradients)
+        latent_components = self.backbone(components)
+        self.backbone.requires_grad_(True)
+        detections = self.similarity_layer(latent_x, latent_components)
+        probs = self.competition_layer(detections, reasonings)
        return probs

-    def training_step(self, train_batch, batch_idx):
-        x, y = train_batch
-        x = x.view(x.size(0), -1)
+    def shared_step(self, batch, batch_idx, optimizer_idx=None):
+        x, y = batch
        y_pred = self(x)
-        nclasses = self.reasoning_layer.n_classes
-        y_true = torch.nn.functional.one_hot(y.long(), num_classes=nclasses)
-        loss = MarginLoss(self.margin)(y_pred, y_true).mean(dim=0)
-        self.log("train_loss", loss)
-        self.train_acc(y_pred, y_true)
-        self.log(
-            "acc",
-            self.train_acc,
-            on_step=False,
-            on_epoch=True,
-            prog_bar=True,
-            logger=True,
-        )
-        return loss
+        num_classes = self.num_classes
+        y_true = torch.nn.functional.one_hot(y.long(), num_classes=num_classes)
+        loss = self.loss(y_pred, y_true).mean(dim=0)
+        return y_pred, loss
+
+    def training_step(self, batch, batch_idx, optimizer_idx=None):
+        y_pred, train_loss = self.shared_step(batch, batch_idx, optimizer_idx)
+        preds = torch.argmax(y_pred, dim=1)
+        accuracy = torchmetrics.functional.accuracy(preds.int(),
+                                                    batch[1].int())
+        self.log("train_acc",
+                 accuracy,
+                 on_step=False,
+                 on_epoch=True,
+                 prog_bar=True,
+                 logger=True)
+        return train_loss

    def predict(self, x):
        with torch.no_grad():
            y_pred = self(x)
            y_pred = torch.argmax(y_pred, dim=1)
-        return y_pred.numpy()
+        return y_pred


-class ImageCBC(CBC):
+class ImageCBC(ImagePrototypesMixin, CBC):
    """CBC model that constrains the components to the range [0, 1] by
    clamping after updates.
    """
-    def on_train_batch_end(self, outputs, batch, batch_idx, dataloader_idx):
-        # super().on_train_batch_end(outputs, batch, batch_idx, dataloader_idx)
-        self.component_layer.prototypes.data.clamp_(0.0, 1.0)
--- a/prototorch/models/data.py
+++ b/prototorch/models/data.py
@@ -0,0 +1,124 @@
+"""Prototorch Data Modules
+
+This allows to store the used dataset inside a Lightning Module.
+Mainly used for PytorchLightningCLI configurations.
+"""
+from typing import Any, Optional, Type
+
+import pytorch_lightning as pl
+from torch.utils.data import DataLoader, Dataset, random_split
+from torchvision import transforms
+from torchvision.datasets import MNIST
+
+import prototorch as pt
+
+
+# MNIST
+class MNISTDataModule(pl.LightningDataModule):
+    def __init__(self, batch_size=32):
+        super().__init__()
+        self.batch_size = batch_size
+
+    # Download mnist dataset as side-effect, only called on the first cpu
+    def prepare_data(self):
+        MNIST("~/datasets", train=True, download=True)
+        MNIST("~/datasets", train=False, download=True)
+
+    # called for every GPU/machine (assigning state is OK)
+    def setup(self, stage=None):
+        # Transforms
+        transform = transforms.Compose([
+            transforms.ToTensor(),
+        ])
+        # Split dataset
+        if stage in (None, "fit"):
+            mnist_train = MNIST("~/datasets", train=True, transform=transform)
+            self.mnist_train, self.mnist_val = random_split(
+                mnist_train,
+                [55000, 5000],
+            )
+        if stage == (None, "test"):
+            self.mnist_test = MNIST(
+                "~/datasets",
+                train=False,
+                transform=transform,
+            )
+
+    # Dataloaders
+    def train_dataloader(self):
+        mnist_train = DataLoader(self.mnist_train, batch_size=self.batch_size)
+        return mnist_train
+
+    def val_dataloader(self):
+        mnist_val = DataLoader(self.mnist_val, batch_size=self.batch_size)
+        return mnist_val
+
+    def test_dataloader(self):
+        mnist_test = DataLoader(self.mnist_test, batch_size=self.batch_size)
+        return mnist_test
+
+
+# def train_on_mnist(batch_size=256) -> type:
+#     class DataClass(pl.LightningModule):
+#         datamodule = MNISTDataModule(batch_size=batch_size)
+
+#         def __init__(self, *args, **kwargs):
+#             prototype_initializer = kwargs.pop(
+#                 "prototype_initializer", pt.components.Zeros((28, 28, 1)))
+#             super().__init__(*args,
+#                              prototype_initializer=prototype_initializer,
+#                              **kwargs)
+
+#     dc: Type[DataClass] = DataClass
+#     return dc
+
+
+# ABSTRACT
+class GeneralDataModule(pl.LightningDataModule):
+    def __init__(self, dataset: Dataset, batch_size: int = 32) -> None:
+        super().__init__()
+        self.train_dataset = dataset
+        self.batch_size = batch_size
+
+    def train_dataloader(self) -> DataLoader:
+        return DataLoader(self.train_dataset, batch_size=self.batch_size)
+
+
+# def train_on_dataset(dataset: Dataset, batch_size: int = 256):
+#     class DataClass(pl.LightningModule):
+#         datamodule = GeneralDataModule(dataset, batch_size)
+#         datashape = dataset[0][0].shape
+#         example_input_array = torch.zeros_like(dataset[0][0]).unsqueeze(0)
+
+#         def __init__(self, *args: Any, **kwargs: Any) -> None:
+#             prototype_initializer = kwargs.pop(
+#                 "prototype_initializer",
+#                 pt.components.Zeros(self.datashape),
+#             )
+#             super().__init__(*args,
+#                              prototype_initializer=prototype_initializer,
+#                              **kwargs)
+
+#     return DataClass
+
+# if __name__ == "__main__":
+#     from prototorch.models import GLVQ
+
+#     demo_dataset = pt.datasets.Iris()
+
+#     TrainingClass: Type = train_on_dataset(demo_dataset)
+
+#     class DemoGLVQ(TrainingClass, GLVQ):
+#         """Model Definition."""
+
+#     # Hyperparameters
+#     hparams = dict(
+#         distribution={
+#             "num_classes": 3,
+#             "prototypes_per_class": 4
+#         },
+#         lr=0.01,
+#     )
+
+#     initialized = DemoGLVQ(hparams)
+#     print(initialized)
--- a/prototorch/models/extras.py
+++ b/prototorch/models/extras.py
@@ -0,0 +1,90 @@
+"""prototorch.models.extras
+
+Modules not yet available in prototorch go here temporarily.
+
+"""
+
+import torch
+
+from ..core.similarities import gaussian
+
+
+def rank_scaled_gaussian(distances, lambd):
+    order = torch.argsort(distances, dim=1)
+    ranks = torch.argsort(order, dim=1)
+    return torch.exp(-torch.exp(-ranks / lambd) * distances)
+
+
+class GaussianPrior(torch.nn.Module):
+    def __init__(self, variance):
+        super().__init__()
+        self.variance = variance
+
+    def forward(self, distances):
+        return gaussian(distances, self.variance)
+
+
+class RankScaledGaussianPrior(torch.nn.Module):
+    def __init__(self, lambd):
+        super().__init__()
+        self.lambd = lambd
+
+    def forward(self, distances):
+        return rank_scaled_gaussian(distances, self.lambd)
+
+
+class ConnectionTopology(torch.nn.Module):
+    def __init__(self, agelimit, num_prototypes):
+        super().__init__()
+        self.agelimit = agelimit
+        self.num_prototypes = num_prototypes
+
+        self.cmat = torch.zeros((self.num_prototypes, self.num_prototypes))
+        self.age = torch.zeros_like(self.cmat)
+
+    def forward(self, d):
+        order = torch.argsort(d, dim=1)
+
+        for element in order:
+            i0, i1 = element[0], element[1]
+
+            self.cmat[i0][i1] = 1
+            self.cmat[i1][i0] = 1
+
+            self.age[i0][i1] = 0
+            self.age[i1][i0] = 0
+
+            self.age[i0][self.cmat[i0] == 1] += 1
+            self.age[i1][self.cmat[i1] == 1] += 1
+
+            self.cmat[i0][self.age[i0] > self.agelimit] = 0
+            self.cmat[i1][self.age[i1] > self.agelimit] = 0
+
+    def get_neighbors(self, position):
+        return torch.where(self.cmat[position])
+
+    def add_prototype(self):
+        new_cmat = torch.zeros([dim + 1 for dim in self.cmat.shape])
+        new_cmat[:-1, :-1] = self.cmat
+        self.cmat = new_cmat
+
+        new_age = torch.zeros([dim + 1 for dim in self.age.shape])
+        new_age[:-1, :-1] = self.age
+        self.age = new_age
+
+    def add_connection(self, a, b):
+        self.cmat[a][b] = 1
+        self.cmat[b][a] = 1
+
+        self.age[a][b] = 0
+        self.age[b][a] = 0
+
+    def remove_connection(self, a, b):
+        self.cmat[a][b] = 0
+        self.cmat[b][a] = 0
+
+        self.age[a][b] = 0
+        self.age[b][a] = 0
+
+    def extra_repr(self):
+        return f"(agelimit): ({self.agelimit})"
--- a/prototorch/models/glvq.py
+++ b/prototorch/models/glvq.py
@@ -1,130 +1,94 @@
+"""Models based on the GLVQ framework."""
+
 import torch
-import torchmetrics
-from prototorch.components import LabeledComponents
-from prototorch.functions.activations import get_activation
-from prototorch.functions.competitions import wtac
-from prototorch.functions.distances import (euclidean_distance, omega_distance,
-                                            squared_euclidean_distance)
-from prototorch.functions.losses import glvq_loss, lvq1_loss, lvq21_loss
+from torch.nn.parameter import Parameter

-from .abstract import AbstractPrototypeModel
-
-from torch.optim.lr_scheduler import ExponentialLR
+from ..core.competitions import wtac
+from ..core.distances import lomega_distance, omega_distance, squared_euclidean_distance
+from ..core.initializers import EyeTransformInitializer
+from ..core.losses import glvq_loss, lvq1_loss, lvq21_loss
+from ..nn.activations import get_activation
+from ..nn.wrappers import LambdaLayer, LossLayer
+from .abstract import ImagePrototypesMixin, SupervisedPrototypeModel


-class GLVQ(AbstractPrototypeModel):
+class GLVQ(SupervisedPrototypeModel):
    """Generalized Learning Vector Quantization."""
    def __init__(self, hparams, **kwargs):
-        super().__init__()
+        super().__init__(hparams, **kwargs)

-        self.save_hyperparameters(hparams)
-
-        # Default Values
-        self.hparams.setdefault("distance", euclidean_distance)
-        self.hparams.setdefault("optimizer", torch.optim.Adam)
-        self.hparams.setdefault("transfer_function", "identity")
+        # Default hparams
+        self.hparams.setdefault("transfer_fn", "identity")
        self.hparams.setdefault("transfer_beta", 10.0)

-        self.proto_layer = LabeledComponents(
-            labels=(self.hparams.nclasses, self.hparams.prototypes_per_class),
-            initializer=self.hparams.prototype_initializer)
+        # Layers
+        transfer_fn = get_activation(self.hparams.transfer_fn)
+        self.transfer_layer = LambdaLayer(transfer_fn)

-        self.transfer_function = get_activation(self.hparams.transfer_function)
-        self.train_acc = torchmetrics.Accuracy()
+        # Loss
+        self.loss = LossLayer(glvq_loss)

-        self.loss = glvq_loss
+    def initialize_prototype_win_ratios(self):
+        self.register_buffer(
+            "prototype_win_ratios",
+            torch.zeros(self.num_prototypes, device=self.device))

-    @property
-    def prototype_labels(self):
-        return self.proto_layer.component_labels.detach().cpu()
+    def on_epoch_start(self):
+        self.initialize_prototype_win_ratios()

-    def forward(self, x):
-        protos, _ = self.proto_layer()
-        dis = self.hparams.distance(x, protos)
-        return dis
+    def log_prototype_win_ratios(self, distances):
+        batch_size = len(distances)
+        prototype_wc = torch.zeros(self.num_prototypes,
+                                   dtype=torch.long,
+                                   device=self.device)
+        wi, wc = torch.unique(distances.min(dim=-1).indices,
+                              sorted=True,
+                              return_counts=True)
+        prototype_wc[wi] = wc
+        prototype_wr = prototype_wc / batch_size
+        self.prototype_win_ratios = torch.vstack([
+            self.prototype_win_ratios,
+            prototype_wr,
+        ])

-    def training_step(self, train_batch, batch_idx, optimizer_idx=None):
-        x, y = train_batch
-        x = x.view(x.size(0), -1)  # flatten
-        dis = self(x)
-        plabels = self.proto_layer.component_labels
-        mu = self.loss(dis, y, prototype_labels=plabels)
-        batch_loss = self.transfer_function(mu,
-                                            beta=self.hparams.transfer_beta)
+    def shared_step(self, batch, batch_idx, optimizer_idx=None):
+        x, y = batch
+        out = self.compute_distances(x)
+        plabels = self.proto_layer.labels
+        mu = self.loss(out, y, prototype_labels=plabels)
+        batch_loss = self.transfer_layer(mu, beta=self.hparams.transfer_beta)
        loss = batch_loss.sum(dim=0)
+        return out, loss

-        # Compute training accuracy
-        with torch.no_grad():
-            preds = wtac(dis, plabels)
+    def training_step(self, batch, batch_idx, optimizer_idx=None):
+        out, train_loss = self.shared_step(batch, batch_idx, optimizer_idx)
+        self.log_prototype_win_ratios(out)
+        self.log("train_loss", train_loss)
+        self.log_acc(out, batch[-1], tag="train_acc")
+        return train_loss

-        self.train_acc(preds.int(), y.int())
-        # `.int()` because FloatTensors are assumed to be class probabilities
+    def validation_step(self, batch, batch_idx):
+        # `model.eval()` and `torch.no_grad()` handled by pl
+        out, val_loss = self.shared_step(batch, batch_idx)
+        self.log("val_loss", val_loss)
+        self.log_acc(out, batch[-1], tag="val_acc")
+        return val_loss

-        # Logging
-        self.log("train_loss", loss)
-        self.log("acc",
-                 self.train_acc,
-                 on_step=False,
-                 on_epoch=True,
-                 prog_bar=True,
-                 logger=True)
+    def test_step(self, batch, batch_idx):
+        # `model.eval()` and `torch.no_grad()` handled by pl
+        out, test_loss = self.shared_step(batch, batch_idx)
+        self.log_acc(out, batch[-1], tag="test_acc")
+        return test_loss

-        return loss
+    def test_epoch_end(self, outputs):
+        test_loss = 0.0
+        for batch_loss in outputs:
+            test_loss += batch_loss.item()
+        self.log("test_loss", test_loss)

-    def predict(self, x):
-        # model.eval()  # ?!
-        with torch.no_grad():
-            d = self(x)
-            plabels = self.proto_layer.component_labels
-            y_pred = wtac(d, plabels)
-        return y_pred.numpy()
-
-
-class LVQ1(GLVQ):
-    def __init__(self, hparams, **kwargs):
-        super().__init__(hparams, **kwargs)
-        self.loss = lvq1_loss
-
-    def configure_optimizers(self):
-        optimizer = torch.optim.SGD(self.parameters(), lr=self.hparams.lr)
-        scheduler = ExponentialLR(optimizer,
-                                  gamma=0.99,
-                                  last_epoch=-1,
-                                  verbose=False)
-        sch = {
-            "scheduler": scheduler,
-            "interval": "step",
-        }  # called after each training step
-        return [optimizer], [sch]
-
-
-class LVQ21(GLVQ):
-    def __init__(self, hparams, **kwargs):
-        super().__init__(hparams, **kwargs)
-        self.loss = lvq21_loss
-
-    def configure_optimizers(self):
-        optimizer = torch.optim.SGD(self.parameters(), lr=self.hparams.lr)
-        scheduler = ExponentialLR(optimizer,
-                                  gamma=0.99,
-                                  last_epoch=-1,
-                                  verbose=False)
-        sch = {
-            "scheduler": scheduler,
-            "interval": "step",
-        }  # called after each training step
-        return [optimizer], [sch]
-
-
-class ImageGLVQ(GLVQ):
-    """GLVQ for training on image data.
-
-    GLVQ model that constrains the prototypes to the range [0, 1] by clamping
-    after updates.
-
-    """
-    def on_train_batch_end(self, outputs, batch, batch_idx, dataloader_idx):
-        self.proto_layer.components.data.clamp_(0.0, 1.0)
+    # TODO
+    # def predict_step(self, batch, batch_idx, dataloader_idx=None):
+    #     pass


 class SiameseGLVQ(GLVQ):
@@ -137,147 +101,61 @@ class SiameseGLVQ(GLVQ):
    """
    def __init__(self,
                 hparams,
-                 backbone_module=torch.nn.Identity,
-                 backbone_params={},
-                 sync=True,
+                 backbone=torch.nn.Identity(),
+                 both_path_gradients=False,
                 **kwargs):
-        super().__init__(hparams, **kwargs)
-        self.backbone = backbone_module(**backbone_params)
-        self.backbone_dependent = backbone_module(
-            **backbone_params).requires_grad_(False)
-        self.sync = sync
-
-    def sync_backbones(self):
-        master_state = self.backbone.state_dict()
-        self.backbone_dependent.load_state_dict(master_state, strict=True)
+        distance_fn = kwargs.pop("distance_fn", squared_euclidean_distance)
+        super().__init__(hparams, distance_fn=distance_fn, **kwargs)
+        self.backbone = backbone
+        self.both_path_gradients = both_path_gradients

    def configure_optimizers(self):
-        optim = self.hparams.optimizer
-        proto_opt = optim(self.proto_layer.parameters(),
-                          lr=self.hparams.proto_lr)
-        if list(self.backbone.parameters()):
-            # only add an optimizer is the backbone has trainable parameters
-            # otherwise, the next line fails
-            bb_opt = optim(self.backbone.parameters(), lr=self.hparams.bb_lr)
-            return proto_opt, bb_opt
+        proto_opt = self.optimizer(self.proto_layer.parameters(),
+                                   lr=self.hparams.proto_lr)
+        # Only add a backbone optimizer if backbone has trainable parameters
+        if (bb_params := list(self.backbone.parameters())):
+            bb_opt = self.optimizer(bb_params, lr=self.hparams.bb_lr)
+            optimizers = [proto_opt, bb_opt]
        else:
-            return proto_opt
+            optimizers = [proto_opt]
+        if self.lr_scheduler is not None:
+            schedulers = []
+            for optimizer in optimizers:
+                scheduler = self.lr_scheduler(optimizer,
+                                              **self.lr_scheduler_kwargs)
+                schedulers.append(scheduler)
+            return optimizers, schedulers
+        else:
+            return optimizers

-    def forward(self, x):
-        if self.sync:
-            self.sync_backbones()
+    def compute_distances(self, x):
        protos, _ = self.proto_layer()
+        x, protos = [arr.view(arr.size(0), -1) for arr in (x, protos)]
        latent_x = self.backbone(x)
-        latent_protos = self.backbone_dependent(protos)
-        dis = euclidean_distance(latent_x, latent_protos)
-        return dis
+        self.backbone.requires_grad_(self.both_path_gradients)
+        latent_protos = self.backbone(protos)
+        self.backbone.requires_grad_(True)
+        distances = self.distance_layer(latent_x, latent_protos)
+        return distances

-    def predict_latent(self, x):
+    def predict_latent(self, x, map_protos=True):
        """Predict `x` assuming it is already embedded in the latent space.

        Only the prototypes are embedded in the latent space using the
        backbone.

        """
-        # model.eval()  # ?!
+        self.eval()
        with torch.no_grad():
            protos, plabels = self.proto_layer()
-            latent_protos = self.backbone_dependent(protos)
-            d = euclidean_distance(x, latent_protos)
+            if map_protos:
+                protos = self.backbone(protos)
+            d = self.distance_layer(x, protos)
            y_pred = wtac(d, plabels)
-        return y_pred.numpy()
+        return y_pred


-class GRLVQ(GLVQ):
-    """Generalized Relevance Learning Vector Quantization."""
-    def __init__(self, hparams, **kwargs):
-        super().__init__(hparams, **kwargs)
-        self.relevances = torch.nn.parameter.Parameter(
-            torch.ones(self.hparams.input_dim))
-
-    def forward(self, x):
-        protos, _ = self.proto_layer()
-        dis = omega_distance(x, protos, torch.diag(self.relevances))
-        return dis
-
-    def backbone(self, x):
-        return x @ torch.diag(self.relevances)
-
-    @property
-    def relevance_profile(self):
-        return self.relevances.detach().cpu()
-
-    def predict_latent(self, x):
-        """Predict `x` assuming it is already embedded in the latent space.
-
-        Only the prototypes are embedded in the latent space using the
-        backbone.
-
-        """
-        # model.eval()  # ?!
-        with torch.no_grad():
-            protos, plabels = self.proto_layer()
-            latent_protos = protos @ torch.diag(self.relevances)
-            d = squared_euclidean_distance(x, latent_protos)
-            y_pred = wtac(d, plabels)
-        return y_pred.numpy()
-
-
-class GMLVQ(GLVQ):
-    """Generalized Matrix Learning Vector Quantization."""
-    def __init__(self, hparams, **kwargs):
-        super().__init__(hparams, **kwargs)
-        self.omega_layer = torch.nn.Linear(self.hparams.input_dim,
-                                           self.hparams.latent_dim,
-                                           bias=False)
-
-        # Namespace hook for the visualization callbacks to work
-        self.backbone = self.omega_layer
-
-    @property
-    def omega_matrix(self):
-        return self.omega_layer.weight.detach().cpu()
-
-    @property
-    def lambda_matrix(self):
-        omega = self.omega_layer.weight  # (latent_dim, input_dim)
-        lam = omega.T @ omega
-        return lam.detach().cpu()
-
-    def show_lambda(self):
-        import matplotlib.pyplot as plt
-        title = "Lambda matrix"
-        plt.figure(title)
-        plt.title(title)
-        plt.imshow(self.lambda_matrix, cmap="gray")
-        plt.axis("off")
-        plt.colorbar()
-        plt.show(block=True)
-
-    def forward(self, x):
-        protos, _ = self.proto_layer()
-        latent_x = self.omega_layer(x)
-        latent_protos = self.omega_layer(protos)
-        dis = squared_euclidean_distance(latent_x, latent_protos)
-        return dis
-
-    def predict_latent(self, x):
-        """Predict `x` assuming it is already embedded in the latent space.
-
-        Only the prototypes are embedded in the latent space using the
-        backbone.
-
-        """
-        # model.eval()  # ?!
-        with torch.no_grad():
-            protos, plabels = self.proto_layer()
-            latent_protos = self.omega_layer(protos)
-            d = squared_euclidean_distance(x, latent_protos)
-            y_pred = wtac(d, plabels)
-        return y_pred.numpy()
-
-
-class LVQMLN(GLVQ):
+class LVQMLN(SiameseGLVQ):
    """Learning Vector Quantization Multi-Layer Network.

    GLVQ model that applies an arbitrary transformation on the inputs, BUT NOT
@@ -286,27 +164,143 @@ class LVQMLN(GLVQ):
    rather in the embedding space.

    """
-    def __init__(self,
-                 hparams,
-                 backbone_module=torch.nn.Identity,
-                 backbone_params={},
-                 **kwargs):
-        super().__init__(hparams, **kwargs)
-        self.backbone = backbone_module(**backbone_params)
-        with torch.no_grad():
-            protos = self.backbone(self.proto_layer()[0])
-        self.proto_layer.load_state_dict({"_components": protos}, strict=False)
-
-    def forward(self, x):
+    def compute_distances(self, x):
        latent_protos, _ = self.proto_layer()
        latent_x = self.backbone(x)
-        dis = euclidean_distance(latent_x, latent_protos)
-        return dis
+        distances = self.distance_layer(latent_x, latent_protos)
+        return distances

-    def predict_latent(self, x):
-        """Predict `x` assuming it is already embedded in the latent space."""
-        with torch.no_grad():
-            latent_protos, plabels = self.proto_layer()
-            d = euclidean_distance(x, latent_protos)
-            y_pred = wtac(d, plabels)
-        return y_pred.numpy()
+
+class GRLVQ(SiameseGLVQ):
+    """Generalized Relevance Learning Vector Quantization.
+
+    Implemented as a Siamese network with a linear transformation backbone.
+
+    TODO Make a RelevanceLayer. `bb_lr` is ignored otherwise.
+
+    """
+    def __init__(self, hparams, **kwargs):
+        super().__init__(hparams, **kwargs)
+
+        # Additional parameters
+        relevances = torch.ones(self.hparams.input_dim, device=self.device)
+        self.register_parameter("_relevances", Parameter(relevances))
+
+        # Override the backbone
+        self.backbone = LambdaLayer(lambda x: x @ torch.diag(self._relevances),
+                                    name="relevance scaling")
+
+    @property
+    def relevance_profile(self):
+        return self._relevances.detach().cpu()
+
+    def extra_repr(self):
+        return f"(relevances): (shape: {tuple(self._relevances.shape)})"
+
+
+class SiameseGMLVQ(SiameseGLVQ):
+    """Generalized Matrix Learning Vector Quantization.
+
+    Implemented as a Siamese network with a linear transformation backbone.
+
+    """
+    def __init__(self, hparams, **kwargs):
+        super().__init__(hparams, **kwargs)
+
+        # Override the backbone
+        self.backbone = torch.nn.Linear(self.hparams.input_dim,
+                                        self.hparams.latent_dim,
+                                        bias=False)
+
+    @property
+    def omega_matrix(self):
+        return self.backbone.weight.detach().cpu()
+
+    @property
+    def lambda_matrix(self):
+        omega = self.backbone.weight  # (latent_dim, input_dim)
+        lam = omega.T @ omega
+        return lam.detach().cpu()
+
+
+class GMLVQ(GLVQ):
+    """Generalized Matrix Learning Vector Quantization.
+
+    Implemented as a regular GLVQ network that simply uses a different distance
+    function. This makes it easier to implement a localized variant.
+
+    """
+    def __init__(self, hparams, **kwargs):
+        distance_fn = kwargs.pop("distance_fn", omega_distance)
+        super().__init__(hparams, distance_fn=distance_fn, **kwargs)
+
+        # Additional parameters
+        omega_initializer = kwargs.get("omega_initializer",
+                                       EyeTransformInitializer())
+        omega = omega_initializer.generate(self.hparams.input_dim,
+                                           self.hparams.latent_dim)
+        self.register_parameter("_omega", Parameter(omega))
+        self.backbone = LambdaLayer(lambda x: x @ self._omega,
+                                    name="omega matrix")
+
+    @property
+    def omega_matrix(self):
+        return self._omega.detach().cpu()
+
+    def compute_distances(self, x):
+        protos, _ = self.proto_layer()
+        distances = self.distance_layer(x, protos, self._omega)
+        return distances
+
+    def extra_repr(self):
+        return f"(omega): (shape: {tuple(self._omega.shape)})"
+
+
+class LGMLVQ(GMLVQ):
+    """Localized and Generalized Matrix Learning Vector Quantization."""
+    def __init__(self, hparams, **kwargs):
+        distance_fn = kwargs.pop("distance_fn", lomega_distance)
+        super().__init__(hparams, distance_fn=distance_fn, **kwargs)
+
+        # Re-register `_omega` to override the one from the super class.
+        omega = torch.randn(
+            self.num_prototypes,
+            self.hparams.input_dim,
+            self.hparams.latent_dim,
+            device=self.device,
+        )
+        self.register_parameter("_omega", Parameter(omega))
+
+
+class GLVQ1(GLVQ):
+    """Generalized Learning Vector Quantization 1."""
+    def __init__(self, hparams, **kwargs):
+        super().__init__(hparams, **kwargs)
+        self.loss = LossLayer(lvq1_loss)
+        self.optimizer = torch.optim.SGD
+
+
+class GLVQ21(GLVQ):
+    """Generalized Learning Vector Quantization 2.1."""
+    def __init__(self, hparams, **kwargs):
+        super().__init__(hparams, **kwargs)
+        self.loss = LossLayer(lvq21_loss)
+        self.optimizer = torch.optim.SGD
+
+
+class ImageGLVQ(ImagePrototypesMixin, GLVQ):
+    """GLVQ for training on image data.
+
+    GLVQ model that constrains the prototypes to the range [0, 1] by clamping
+    after updates.
+
+    """
+
+
+class ImageGMLVQ(ImagePrototypesMixin, GMLVQ):
+    """GMLVQ for training on image data.
+
+    GMLVQ model that constrains the prototypes to the range [0, 1] by clamping
+    after updates.
+
+    """
--- a/prototorch/models/knn.py
+++ b/prototorch/models/knn.py
@@ -0,0 +1,43 @@
+"""ProtoTorch KNN model."""
+
+import warnings
+
+from ..core.competitions import KNNC
+from ..core.components import LabeledComponents
+from ..core.initializers import LiteralCompInitializer, LiteralLabelsInitializer
+from ..utils.utils import parse_data_arg
+from .abstract import SupervisedPrototypeModel
+
+
+class KNN(SupervisedPrototypeModel):
+    """K-Nearest-Neighbors classification algorithm."""
+    def __init__(self, hparams, **kwargs):
+        super().__init__(hparams, **kwargs)
+
+        # Default hparams
+        self.hparams.setdefault("k", 1)
+
+        data = kwargs.get("data", None)
+        if data is None:
+            raise ValueError("KNN requires data, but was not provided!")
+        data, targets = parse_data_arg(data)
+
+        # Layers
+        self.proto_layer = LabeledComponents(
+            distribution=[],
+            components_initializer=LiteralCompInitializer(data),
+            labels_initializer=LiteralLabelsInitializer(targets))
+        self.competition_layer = KNNC(k=self.hparams.k)
+
+    def training_step(self, train_batch, batch_idx, optimizer_idx=None):
+        return 1  # skip training step
+
+    def on_train_batch_start(self,
+                             train_batch,
+                             batch_idx,
+                             dataloader_idx=None):
+        warnings.warn("k-NN has no training, skipping!")
+        return -1
+
+    def configure_optimizers(self):
+        return None
--- a/prototorch/models/lvq.py
+++ b/prototorch/models/lvq.py
@@ -0,0 +1,69 @@
+"""LVQ models that are optimized using non-gradient methods."""
+
+from ..core.losses import _get_dp_dm
+from .abstract import NonGradientMixin
+from .glvq import GLVQ
+
+
+class LVQ1(NonGradientMixin, GLVQ):
+    """Learning Vector Quantization 1."""
+    def training_step(self, train_batch, batch_idx, optimizer_idx=None):
+        protos = self.proto_layer.components
+        plabels = self.proto_layer.labels
+
+        x, y = train_batch
+        dis = self.compute_distances(x)
+        # TODO Vectorized implementation
+
+        for xi, yi in zip(x, y):
+            d = self.compute_distances(xi.view(1, -1))
+            preds = self.competition_layer(d, plabels)
+            w = d.argmin(1)
+            if yi == preds:
+                shift = xi - protos[w]
+            else:
+                shift = protos[w] - xi
+            updated_protos = protos + 0.0
+            updated_protos[w] = protos[w] + (self.hparams.lr * shift)
+            self.proto_layer.load_state_dict({"_components": updated_protos},
+                                             strict=False)
+
+        print(f"{dis=}")
+        print(f"{y=}")
+        # Logging
+        self.log_acc(dis, y, tag="train_acc")
+
+        return None
+
+
+class LVQ21(NonGradientMixin, GLVQ):
+    """Learning Vector Quantization 2.1."""
+    def training_step(self, train_batch, batch_idx, optimizer_idx=None):
+        protos = self.proto_layer.components
+        plabels = self.proto_layer.labels
+
+        x, y = train_batch
+        dis = self.compute_distances(x)
+        # TODO Vectorized implementation
+
+        for xi, yi in zip(x, y):
+            xi = xi.view(1, -1)
+            yi = yi.view(1, )
+            d = self.compute_distances(xi)
+            (_, wp), (_, wn) = _get_dp_dm(d, yi, plabels, with_indices=True)
+            shiftp = xi - protos[wp]
+            shiftn = protos[wn] - xi
+            updated_protos = protos + 0.0
+            updated_protos[wp] = protos[wp] + (self.hparams.lr * shiftp)
+            updated_protos[wn] = protos[wn] + (self.hparams.lr * shiftn)
+            self.proto_layer.load_state_dict({"_components": updated_protos},
+                                             strict=False)
+
+        # Logging
+        self.log_acc(dis, y, tag="train_acc")
+
+        return None
+
+
+class MedianLVQ(NonGradientMixin, GLVQ):
+    """Median LVQ"""
--- a/prototorch/models/neural_gas.py
+++ b/prototorch/models/neural_gas.py
@@ -1,69 +0,0 @@
-import torch
-from prototorch.components import Components
-from prototorch.components import initializers as cinit
-from prototorch.functions.distances import euclidean_distance
-from prototorch.modules.losses import NeuralGasEnergy
-
-from .abstract import AbstractPrototypeModel
-
-
-class EuclideanDistance(torch.nn.Module):
-    def forward(self, x, y):
-        return euclidean_distance(x, y)
-
-
-class ConnectionTopology(torch.nn.Module):
-    def __init__(self, agelimit, num_prototypes):
-        super().__init__()
-        self.agelimit = agelimit
-        self.num_prototypes = num_prototypes
-
-        self.cmat = torch.zeros((self.num_prototypes, self.num_prototypes))
-        self.age = torch.zeros_like(self.cmat)
-
-    def forward(self, d):
-        order = torch.argsort(d, dim=1)
-
-        for element in order:
-            i0, i1 = element[0], element[1]
-            self.cmat[i0][i1] = 1
-            self.age[i0][i1] = 0
-            self.age[i0][self.cmat[i0] == 1] += 1
-            self.cmat[i0][self.age[i0] > self.agelimit] = 0
-
-    def extra_repr(self):
-        return f"agelimit: {self.agelimit}"
-
-
-class NeuralGas(AbstractPrototypeModel):
-    def __init__(self, hparams, **kwargs):
-        super().__init__()
-
-        self.save_hyperparameters(hparams)
-
-        # Default Values
-        self.hparams.setdefault("input_dim", 2)
-        self.hparams.setdefault("agelimit", 10)
-        self.hparams.setdefault("lm", 1)
-        self.hparams.setdefault("prototype_initializer",
-                                cinit.ZerosInitializer(self.hparams.input_dim))
-
-        self.proto_layer = Components(
-            self.hparams.num_prototypes,
-            initializer=self.hparams.prototype_initializer)
-
-        self.distance_layer = EuclideanDistance()
-        self.energy_layer = NeuralGasEnergy(lm=self.hparams.lm)
-        self.topology_layer = ConnectionTopology(
-            agelimit=self.hparams.agelimit,
-            num_prototypes=self.hparams.num_prototypes,
-        )
-
-    def training_step(self, train_batch, batch_idx):
-        x = train_batch[0]
-        protos = self.proto_layer()
-        d = self.distance_layer(x, protos)
-        cost, order = self.energy_layer(d)
-
-        self.topology_layer(d)
-        return cost
--- a/prototorch/models/probabilistic.py
+++ b/prototorch/models/probabilistic.py
@@ -0,0 +1,96 @@
+"""Probabilistic GLVQ methods"""
+
+import torch
+
+from ..core.losses import nllr_loss, rslvq_loss
+from ..core.pooling import stratified_min_pooling, stratified_sum_pooling
+from ..nn.wrappers import LambdaLayer, LossLayer
+from .extras import GaussianPrior, RankScaledGaussianPrior
+from .glvq import GLVQ, SiameseGMLVQ
+
+
+class CELVQ(GLVQ):
+    """Cross-Entropy Learning Vector Quantization."""
+    def __init__(self, hparams, **kwargs):
+        super().__init__(hparams, **kwargs)
+
+        # Loss
+        self.loss = torch.nn.CrossEntropyLoss()
+
+    def shared_step(self, batch, batch_idx, optimizer_idx=None):
+        x, y = batch
+        out = self.compute_distances(x)  # [None, num_protos]
+        plabels = self.proto_layer.labels
+        winning = stratified_min_pooling(out, plabels)  # [None, num_classes]
+        probs = -1.0 * winning
+        batch_loss = self.loss(probs, y.long())
+        loss = batch_loss.sum(dim=0)
+        return out, loss
+
+
+class ProbabilisticLVQ(GLVQ):
+    def __init__(self, hparams, rejection_confidence=0.0, **kwargs):
+        super().__init__(hparams, **kwargs)
+
+        self.conditional_distribution = None
+        self.rejection_confidence = rejection_confidence
+
+    def forward(self, x):
+        distances = self.compute_distances(x)
+        conditional = self.conditional_distribution(distances)
+        prior = (1. / self.num_prototypes) * torch.ones(self.num_prototypes,
+                                                        device=self.device)
+        posterior = conditional * prior
+        plabels = self.proto_layer._labels
+        y_pred = stratified_sum_pooling(posterior, plabels)
+        return y_pred
+
+    def predict(self, x):
+        y_pred = self.forward(x)
+        confidence, prediction = torch.max(y_pred, dim=1)
+        prediction[confidence < self.rejection_confidence] = -1
+        return prediction
+
+    def training_step(self, batch, batch_idx, optimizer_idx=None):
+        x, y = batch
+        out = self.forward(x)
+        plabels = self.proto_layer.labels
+        batch_loss = self.loss(out, y, plabels)
+        loss = batch_loss.sum(dim=0)
+        return loss
+
+
+class SLVQ(ProbabilisticLVQ):
+    """Soft Learning Vector Quantization."""
+    def __init__(self, *args, **kwargs):
+        super().__init__(*args, **kwargs)
+        self.loss = LossLayer(nllr_loss)
+        self.conditional_distribution = GaussianPrior(self.hparams.variance)
+
+
+class RSLVQ(ProbabilisticLVQ):
+    """Robust Soft Learning Vector Quantization."""
+    def __init__(self, *args, **kwargs):
+        super().__init__(*args, **kwargs)
+        self.loss = LossLayer(rslvq_loss)
+        self.conditional_distribution = GaussianPrior(self.hparams.variance)
+
+
+class PLVQ(ProbabilisticLVQ, SiameseGMLVQ):
+    """Probabilistic Learning Vector Quantization.
+
+    TODO: Use Backbone LVQ instead
+    """
+    def __init__(self, *args, **kwargs):
+        super().__init__(*args, **kwargs)
+        self.conditional_distribution = RankScaledGaussianPrior(
+            self.hparams.lambd)
+        self.loss = torch.nn.KLDivLoss()
+
+    # FIXME
+    # def training_step(self, batch, batch_idx, optimizer_idx=None):
+    #     x, y = batch
+    #     y_pred = self(x)
+    #     batch_loss = self.loss(y_pred, y)
+    #     loss = batch_loss.sum(dim=0)
+    #     return loss
--- a/prototorch/models/unsupervised.py
+++ b/prototorch/models/unsupervised.py
@@ -0,0 +1,146 @@
+"""Unsupervised prototype learning algorithms."""
+
+import numpy as np
+import torch
+
+from ..core.competitions import wtac
+from ..core.distances import squared_euclidean_distance
+from ..core.losses import NeuralGasEnergy
+from ..nn.wrappers import LambdaLayer
+from .abstract import NonGradientMixin, UnsupervisedPrototypeModel
+from .callbacks import GNGCallback
+from .extras import ConnectionTopology
+
+
+class KohonenSOM(NonGradientMixin, UnsupervisedPrototypeModel):
+    """Kohonen Self-Organizing-Map.
+
+    TODO Allow non-2D grids
+
+    """
+    def __init__(self, hparams, **kwargs):
+        h, w = hparams.get("shape")
+        # Ignore `num_prototypes`
+        hparams["num_prototypes"] = h * w
+        distance_fn = kwargs.pop("distance_fn", squared_euclidean_distance)
+        super().__init__(hparams, distance_fn=distance_fn, **kwargs)
+
+        # Hyperparameters
+        self.save_hyperparameters(hparams)
+
+        # Default hparams
+        self.hparams.setdefault("alpha", 0.3)
+        self.hparams.setdefault("sigma", max(h, w) / 2.0)
+
+        # Additional parameters
+        x, y = torch.arange(h), torch.arange(w)
+        grid = torch.stack(torch.meshgrid(x, y), dim=-1)
+        self.register_buffer("_grid", grid)
+        self._sigma = self.hparams.sigma
+        self._lr = self.hparams.lr
+
+    def predict_from_distances(self, distances):
+        grid = self._grid.view(-1, 2)
+        wp = wtac(distances, grid)
+        return wp
+
+    def training_step(self, train_batch, batch_idx):
+        # x = train_batch
+        # TODO Check if the batch has labels
+        x = train_batch[0]
+        d = self.compute_distances(x)
+        wp = self.predict_from_distances(d)
+        grid = self._grid.view(-1, 2)
+        gd = squared_euclidean_distance(wp, grid)
+        nh = torch.exp(-gd / self._sigma**2)
+        protos = self.proto_layer.components
+        diff = x.unsqueeze(dim=1) - protos
+        delta = self._lr * self.hparams.alpha * nh.unsqueeze(-1) * diff
+        updated_protos = protos + delta.sum(dim=0)
+        self.proto_layer.load_state_dict({"_components": updated_protos},
+                                         strict=False)
+
+    def training_epoch_end(self, training_step_outputs):
+        self._sigma = self.hparams.sigma * np.exp(
+            -self.current_epoch / self.trainer.max_epochs)
+
+    def extra_repr(self):
+        return f"(grid): (shape: {tuple(self._grid.shape)})"
+
+
+class HeskesSOM(UnsupervisedPrototypeModel):
+    def __init__(self, hparams, **kwargs):
+        super().__init__(hparams, **kwargs)
+
+    def training_step(self, train_batch, batch_idx):
+        # TODO Implement me!
+        raise NotImplementedError()
+
+
+class NeuralGas(UnsupervisedPrototypeModel):
+    def __init__(self, hparams, **kwargs):
+        super().__init__(hparams, **kwargs)
+
+        # Hyperparameters
+        self.save_hyperparameters(hparams)
+
+        # Default hparams
+        self.hparams.setdefault("agelimit", 10)
+        self.hparams.setdefault("lm", 1)
+
+        self.energy_layer = NeuralGasEnergy(lm=self.hparams.lm)
+        self.topology_layer = ConnectionTopology(
+            agelimit=self.hparams.agelimit,
+            num_prototypes=self.hparams.num_prototypes,
+        )
+
+    def training_step(self, train_batch, batch_idx):
+        # x = train_batch
+        # TODO Check if the batch has labels
+        x = train_batch[0]
+        d = self.compute_distances(x)
+        loss, _ = self.energy_layer(d)
+        self.topology_layer(d)
+        self.log("loss", loss)
+        return loss
+
+    # def training_epoch_end(self, training_step_outputs):
+    #     print(f"{self.trainer.lr_schedulers}")
+    #     print(f"{self.trainer.lr_schedulers[0]['scheduler'].optimizer}")
+
+
+class GrowingNeuralGas(NeuralGas):
+    def __init__(self, hparams, **kwargs):
+        super().__init__(hparams, **kwargs)
+
+        # Defaults
+        self.hparams.setdefault("step_reduction", 0.5)
+        self.hparams.setdefault("insert_reduction", 0.1)
+        self.hparams.setdefault("insert_freq", 10)
+
+        errors = torch.zeros(self.hparams.num_prototypes, device=self.device)
+        self.register_buffer("errors", errors)
+
+    def training_step(self, train_batch, _batch_idx):
+        # x = train_batch
+        # TODO Check if the batch has labels
+        x = train_batch[0]
+        d = self.compute_distances(x)
+        loss, order = self.energy_layer(d)
+        winner = order[:, 0]
+        mask = torch.zeros_like(d)
+        mask[torch.arange(len(mask)), winner] = 1.0
+        dp = d * mask
+
+        self.errors += torch.sum(dp * dp, dim=0)
+        self.errors *= self.hparams.step_reduction
+
+        self.topology_layer(d)
+        self.log("loss", loss)
+        return loss
+
+    def configure_callbacks(self):
+        return [
+            GNGCallback(reduction=self.hparams.insert_reduction,
+                        freq=self.hparams.insert_freq)
+        ]
--- a/prototorch/models/vis.py
+++ b/prototorch/models/vis.py
@@ -1,265 +1,13 @@
-import os
+"""Visualization Callbacks."""

 import numpy as np
 import pytorch_lightning as pl
 import torch
+import torchvision
 from matplotlib import pyplot as plt
-from matplotlib.offsetbox import AnchoredText
-from prototorch.utils.celluloid import Camera
-from prototorch.utils.colors import color_scheme
-from prototorch.utils.utils import (gif_from_dir, make_directory,
-                                    prettify_string)
 from torch.utils.data import DataLoader, Dataset

-
-class VisWeights(pl.Callback):
-    """Abstract weight visualization callback."""
-    def __init__(
-        self,
-        data=None,
-        ignore_last_output_row=False,
-        label_map=None,
-        project_mesh=False,
-        project_protos=False,
-        voronoi=False,
-        axis_off=True,
-        cmap="viridis",
-        show=True,
-        display_logs=True,
-        display_logs_settings={},
-        pause_time=0.5,
-        border=1,
-        resolution=10,
-        interval=False,
-        save=False,
-        snap=True,
-        save_dir="./img",
-        make_gif=False,
-        make_mp4=False,
-        verbose=True,
-        dpi=500,
-        fps=5,
-        figsize=(11, 8.5),  # standard paper in inches
-        prefix="",
-        distance_layer_index=-1,
-        **kwargs,
-    ):
-        super().__init__(**kwargs)
-        self.data = data
-        self.ignore_last_output_row = ignore_last_output_row
-        self.label_map = label_map
-        self.voronoi = voronoi
-        self.axis_off = True
-        self.project_mesh = project_mesh
-        self.project_protos = project_protos
-        self.cmap = cmap
-        self.show = show
-        self.display_logs = display_logs
-        self.display_logs_settings = display_logs_settings
-        self.pause_time = pause_time
-        self.border = border
-        self.resolution = resolution
-        self.interval = interval
-        self.save = save
-        self.snap = snap
-        self.save_dir = save_dir
-        self.make_gif = make_gif
-        self.make_mp4 = make_mp4
-        self.verbose = verbose
-        self.dpi = dpi
-        self.fps = fps
-        self.figsize = figsize
-        self.prefix = prefix
-        self.distance_layer_index = distance_layer_index
-        self.title = "Weights Visualization"
-        make_directory(self.save_dir)
-
-    def _skip_epoch(self, epoch):
-        if self.interval:
-            if epoch % self.interval != 0:
-                return True
-        return False
-
-    def _clean_and_setup_ax(self):
-        ax = self.ax
-        if not self.snap:
-            ax.cla()
-        ax.set_title(self.title)
-        if self.axis_off:
-            ax.axis("off")
-
-    def _savefig(self, fignum, orientation="horizontal"):
-        figname = f"{self.save_dir}/{self.prefix}{fignum:05d}.png"
-        figsize = self.figsize
-        if orientation == "vertical":
-            figsize = figsize[::-1]
-        elif orientation == "horizontal":
-            pass
-        else:
-            pass
-        self.fig.set_size_inches(figsize, forward=False)
-        self.fig.savefig(figname, dpi=self.dpi)
-
-    def _show_and_save(self, epoch):
-        if self.show:
-            plt.pause(self.pause_time)
-        if self.save:
-            self._savefig(epoch)
-        if self.snap:
-            self.camera.snap()
-
-    def _display_logs(self, ax, epoch, logs):
-        if self.display_logs:
-            settings = dict(
-                loc="lower right",
-                # padding between the text and bounding box
-                pad=0.5,
-                # padding between the bounding box and the axes
-                borderpad=1.0,
-                # https://matplotlib.org/api/text_api.html#matplotlib.text.Text
-                prop=dict(
-                    fontfamily="monospace",
-                    fontweight="medium",
-                    fontsize=12,
-                ),
-            )
-
-            # Override settings with self.display_logs_settings.
-            settings = {**settings, **self.display_logs_settings}
-
-            log_string = f"""Epoch: {epoch:04d},
-            val_loss: {logs.get('val_loss', np.nan):.03f},
-            val_acc: {logs.get('val_acc', np.nan):.03f},
-            loss: {logs.get('loss', np.nan):.03f},
-            acc: {logs.get('acc', np.nan):.03f}
-            """
-            log_string = prettify_string(log_string, end="")
-            # https://matplotlib.org/api/offsetbox_api.html#matplotlib.offsetbox.AnchoredText
-            anchored_text = AnchoredText(log_string, **settings)
-            self.ax.add_artist(anchored_text)
-
-    def on_train_start(self, trainer, pl_module, logs={}):
-        self.fig = plt.figure(self.title)
-        self.fig.set_size_inches(self.figsize, forward=False)
-        self.ax = self.fig.add_subplot(111)
-        self.camera = Camera(self.fig)
-
-    def on_train_end(self, trainer, pl_module, logs={}):
-        if self.make_gif:
-            gif_from_dir(directory=self.save_dir,
-                         prefix=self.prefix,
-                         duration=1.0 / self.fps)
-        if self.snap and self.make_mp4:
-            animation = self.camera.animate()
-            vid = os.path.join(self.save_dir, f"{self.prefix}animation.mp4")
-            if self.verbose:
-                print(f"Saving mp4 under {vid}.")
-            animation.save(vid, fps=self.fps, dpi=self.dpi)
-
-
-class VisPointProtos(VisWeights):
-    """Visualization of prototypes.
-    .. TODO::
-        Still in Progress.
-    """
-    def __init__(self, **kwargs):
-        super().__init__(**kwargs)
-        self.title = "Point Prototypes Visualization"
-        self.data_scatter_settings = {
-            "marker": "o",
-            "s": 30,
-            "edgecolor": "k",
-            "cmap": self.cmap,
-        }
-        self.protos_scatter_settings = {
-            "marker": "D",
-            "s": 50,
-            "edgecolor": "k",
-            "cmap": self.cmap,
-        }
-
-    def on_epoch_start(self, trainer, pl_module, logs={}):
-        epoch = trainer.current_epoch
-        if self._skip_epoch(epoch):
-            return True
-
-        self._clean_and_setup_ax()
-
-        protos = pl_module.prototypes
-        labels = pl_module.proto_layer.prototype_labels.detach().cpu().numpy()
-
-        if self.project_protos:
-            protos = self.model.projection(protos).numpy()
-
-        color_map = color_scheme(n=len(set(labels)),
-                                 cmap=self.cmap,
-                                 zero_indexed=True)
-        # TODO Get rid of the assumption y values in [0, num_of_classes]
-        label_colors = [color_map[l] for l in labels]
-
-        if self.data is not None:
-            x, y = self.data
-            # TODO Get rid of the assumption y values in [0, num_of_classes]
-            y_colors = [color_map[l] for l in y]
-            # x = self.model.projection(x)
-            if not isinstance(x, np.ndarray):
-                x = x.numpy()
-
-            # Plot data points.
-            self.ax.scatter(x[:, 0],
-                            x[:, 1],
-                            c=y_colors,
-                            **self.data_scatter_settings)
-
-            # Paint decision regions.
-            if self.voronoi:
-                border = self.border
-                resolution = self.resolution
-                x = np.vstack((x, protos))
-                x_min, x_max = x[:, 0].min(), x[:, 0].max()
-                y_min, y_max = x[:, 1].min(), x[:, 1].max()
-                x_min, x_max = x_min - border, x_max + border
-                y_min, y_max = y_min - border, y_max + border
-                try:
-                    xx, yy = np.meshgrid(
-                        np.arange(x_min, x_max, (x_max - x_min) / resolution),
-                        np.arange(y_min, y_max, (x_max - x_min) / resolution),
-                    )
-                except ValueError as ve:
-                    print(ve)
-                    raise ValueError(f"x_min: {x_min}, x_max: {x_max}. "
-                                     f"x_min - x_max is {x_max - x_min}.")
-                except MemoryError as me:
-                    print(me)
-                    raise ValueError("Too many points. "
-                                     "Try reducing the resolution.")
-                mesh_input = np.c_[xx.ravel(), yy.ravel()]
-
-                # Predict mesh labels.
-                if self.project_mesh:
-                    mesh_input = self.model.projection(mesh_input)
-
-                y_pred = pl_module.predict(torch.Tensor(mesh_input))
-                y_pred = y_pred.reshape(xx.shape)
-
-                # Plot voronoi regions.
-                self.ax.contourf(xx, yy, y_pred, cmap=self.cmap, alpha=0.35)
-
-                self.ax.set_xlim(left=x_min + 0, right=x_max - 0)
-                self.ax.set_ylim(bottom=y_min + 0, top=y_max - 0)
-
-        # Plot prototypes.
-        self.ax.scatter(protos[:, 0],
-                        protos[:, 1],
-                        c=label_colors,
-                        **self.protos_scatter_settings)
-
-        # self._show_and_save(epoch)
-
-    def on_epoch_end(self, trainer, pl_module, logs={}):
-        epoch = trainer.current_epoch
-        self._display_logs(self.ax, epoch, logs)
-        self._show_and_save(epoch)
+from ..utils.utils import mesh2d


 class Vis2DAbstract(pl.Callback):
@@ -267,9 +15,12 @@ class Vis2DAbstract(pl.Callback):
                 data,
                 title="Prototype Visualization",
                 cmap="viridis",
-                 border=1,
-                 resolution=50,
+                 border=0.1,
+                 resolution=100,
+                 flatten_data=True,
+                 axis_off=False,
                 show_protos=True,
+                 show=True,
                 tensorboard=False,
                 show_last_only=False,
                 pause_time=0.1,
@@ -278,9 +29,18 @@ class Vis2DAbstract(pl.Callback):

        if isinstance(data, Dataset):
            x, y = next(iter(DataLoader(data, batch_size=len(data))))
-            x = x.view(len(data), -1)  # flatten
+        elif isinstance(data, torch.utils.data.DataLoader):
+            x = torch.tensor([])
+            y = torch.tensor([])
+            for x_b, y_b in data:
+                x = torch.cat([x, x_b])
+                y = torch.cat([y, y_b])
        else:
            x, y = data
+
+        if flatten_data:
+            x = x.reshape(len(x), -1)
+
        self.x_train = x
        self.y_train = y

@@ -289,7 +49,9 @@ class Vis2DAbstract(pl.Callback):
        self.cmap = cmap
        self.border = border
        self.resolution = resolution
+        self.axis_off = axis_off
        self.show_protos = show_protos
+        self.show = show
        self.tensorboard = tensorboard
        self.show_last_only = show_last_only
        self.pause_time = pause_time
@@ -298,27 +60,21 @@ class Vis2DAbstract(pl.Callback):
    def precheck(self, trainer):
        if self.show_last_only:
            if trainer.current_epoch != trainer.max_epochs - 1:
-                return
+                return False
+        return True

    def setup_ax(self, xlabel=None, ylabel=None):
        ax = self.fig.gca()
        ax.cla()
        ax.set_title(self.title)
-        ax.axis("off")
        if xlabel:
            ax.set_xlabel("Data dimension 1")
        if ylabel:
            ax.set_ylabel("Data dimension 2")
+        if self.axis_off:
+            ax.axis("off")
        return ax

-    def get_mesh_input(self, x):
-        x_min, x_max = x[:, 0].min() - self.border, x[:, 0].max() + self.border
-        y_min, y_max = x[:, 1].min() - self.border, x[:, 1].max() + self.border
-        xx, yy = np.meshgrid(np.arange(x_min, x_max, 1 / self.resolution),
-                             np.arange(y_min, y_max, 1 / self.resolution))
-        mesh_input = np.c_[xx.ravel(), yy.ravel()]
-        return mesh_input, xx, yy
-
    def plot_data(self, ax, x, y):
        ax.scatter(
            x[:, 0],
@@ -351,18 +107,20 @@ class Vis2DAbstract(pl.Callback):
    def log_and_display(self, trainer, pl_module):
        if self.tensorboard:
            self.add_to_tensorboard(trainer, pl_module)
-        if not self.block:
-            plt.pause(self.pause_time)
-        else:
-            plt.show(block=True)
+        if self.show:
+            if not self.block:
+                plt.pause(self.pause_time)
+            else:
+                plt.show(block=self.block)

    def on_train_end(self, trainer, pl_module):
-        plt.show()
+        plt.close()


 class VisGLVQ2D(Vis2DAbstract):
    def on_epoch_end(self, trainer, pl_module):
-        self.precheck(trainer)
+        if not self.precheck(trainer):
+            return True

        protos = pl_module.prototypes
        plabels = pl_module.prototype_labels
@@ -372,9 +130,11 @@ class VisGLVQ2D(Vis2DAbstract):
        self.plot_data(ax, x_train, y_train)
        self.plot_protos(ax, protos, plabels)
        x = np.vstack((x_train, protos))
-        mesh_input, xx, yy = self.get_mesh_input(x)
-        y_pred = pl_module.predict(torch.Tensor(mesh_input))
-        y_pred = y_pred.reshape(xx.shape)
+        mesh_input, xx, yy = mesh2d(x, self.border, self.resolution)
+        _components = pl_module.proto_layer._components
+        mesh_input = torch.from_numpy(mesh_input).type_as(_components)
+        y_pred = pl_module.predict(mesh_input)
+        y_pred = y_pred.cpu().reshape(xx.shape)
        ax.contourf(xx, yy, y_pred, cmap=self.cmap, alpha=0.35)

        self.log_and_display(trainer, pl_module)
@@ -386,24 +146,33 @@ class VisSiameseGLVQ2D(Vis2DAbstract):
        self.map_protos = map_protos

    def on_epoch_end(self, trainer, pl_module):
-        self.precheck(trainer)
+        if not self.precheck(trainer):
+            return True

        protos = pl_module.prototypes
        plabels = pl_module.prototype_labels
        x_train, y_train = self.x_train, self.y_train
-        x_train = pl_module.backbone(torch.Tensor(x_train)).detach()
+        device = pl_module.device
+        with torch.no_grad():
+            x_train = pl_module.backbone(torch.Tensor(x_train).to(device))
+            x_train = x_train.cpu().detach()
        if self.map_protos:
-            protos = pl_module.backbone(torch.Tensor(protos)).detach()
+            with torch.no_grad():
+                protos = pl_module.backbone(torch.Tensor(protos).to(device))
+                protos = protos.cpu().detach()
        ax = self.setup_ax()
        self.plot_data(ax, x_train, y_train)
        if self.show_protos:
            self.plot_protos(ax, protos, plabels)
            x = np.vstack((x_train, protos))
-            mesh_input, xx, yy = self.get_mesh_input(x)
+            mesh_input, xx, yy = mesh2d(x, self.border, self.resolution)
        else:
-            mesh_input, xx, yy = self.get_mesh_input(x_train)
-        y_pred = pl_module.predict_latent(torch.Tensor(mesh_input))
-        y_pred = y_pred.reshape(xx.shape)
+            mesh_input, xx, yy = mesh2d(x_train, self.border, self.resolution)
+        _components = pl_module.proto_layer._components
+        mesh_input = torch.Tensor(mesh_input).type_as(_components)
+        y_pred = pl_module.predict_latent(mesh_input,
+                                          map_protos=self.map_protos)
+        y_pred = y_pred.cpu().reshape(xx.shape)
        ax.contourf(xx, yy, y_pred, cmap=self.cmap, alpha=0.35)

        self.log_and_display(trainer, pl_module)
@@ -411,18 +180,21 @@ class VisSiameseGLVQ2D(Vis2DAbstract):

 class VisCBC2D(Vis2DAbstract):
    def on_epoch_end(self, trainer, pl_module):
-        self.precheck(trainer)
+        if not self.precheck(trainer):
+            return True

        x_train, y_train = self.x_train, self.y_train
        protos = pl_module.components
        ax = self.setup_ax(xlabel="Data dimension 1",
                           ylabel="Data dimension 2")
        self.plot_data(ax, x_train, y_train)
-        self.plot_protos(ax, protos, plabels)
+        self.plot_protos(ax, protos, "w")
        x = np.vstack((x_train, protos))
-        mesh_input, xx, yy = self.get_mesh_input(x)
-        y_pred = pl_module.predict(torch.Tensor(mesh_input))
-        y_pred = y_pred.reshape(xx.shape)
+        mesh_input, xx, yy = mesh2d(x, self.border, self.resolution)
+        _components = pl_module.components_layer._components
+        y_pred = pl_module.predict(
+            torch.Tensor(mesh_input).type_as(_components))
+        y_pred = y_pred.cpu().reshape(xx.shape)

        ax.contourf(xx, yy, y_pred, cmap=self.cmap, alpha=0.35)

@@ -431,7 +203,8 @@ class VisCBC2D(Vis2DAbstract):

 class VisNG2D(Vis2DAbstract):
    def on_epoch_end(self, trainer, pl_module):
-        self.precheck(trainer)
+        if not self.precheck(trainer):
+            return True

        x_train, y_train = self.x_train, self.y_train
        protos = pl_module.prototypes
@@ -453,3 +226,71 @@ class VisNG2D(Vis2DAbstract):
                    )

        self.log_and_display(trainer, pl_module)
+
+
+class VisImgComp(Vis2DAbstract):
+    def __init__(self,
+                 *args,
+                 random_data=0,
+                 dataformats="CHW",
+                 num_columns=2,
+                 add_embedding=False,
+                 embedding_data=100,
+                 **kwargs):
+        super().__init__(*args, **kwargs)
+        self.random_data = random_data
+        self.dataformats = dataformats
+        self.num_columns = num_columns
+        self.add_embedding = add_embedding
+        self.embedding_data = embedding_data
+
+    def on_train_start(self, trainer, pl_module):
+        tb = pl_module.logger.experiment
+        if self.add_embedding:
+            ind = np.random.choice(len(self.x_train),
+                                   size=self.embedding_data,
+                                   replace=False)
+            data = self.x_train[ind]
+            # print(f"{data.shape=}")
+            # print(f"{self.y_train[ind].shape=}")
+            tb.add_embedding(data.view(len(ind), -1),
+                             label_img=data,
+                             global_step=None,
+                             tag="Data Embedding",
+                             metadata=self.y_train[ind],
+                             metadata_header=None)
+
+        if self.random_data:
+            ind = np.random.choice(len(self.x_train),
+                                   size=self.random_data,
+                                   replace=False)
+            data = self.x_train[ind]
+            grid = torchvision.utils.make_grid(data, nrow=self.num_columns)
+            tb.add_image(tag="Data",
+                         img_tensor=grid,
+                         global_step=None,
+                         dataformats=self.dataformats)
+
+    def add_to_tensorboard(self, trainer, pl_module):
+        tb = pl_module.logger.experiment
+
+        components = pl_module.components
+        grid = torchvision.utils.make_grid(components, nrow=self.num_columns)
+        tb.add_image(
+            tag="Components",
+            img_tensor=grid,
+            global_step=trainer.current_epoch,
+            dataformats=self.dataformats,
+        )
+
+    def on_epoch_end(self, trainer, pl_module):
+        if not self.precheck(trainer):
+            return True
+
+        if self.show:
+            components = pl_module.components
+            grid = torchvision.utils.make_grid(components,
+                                               nrow=self.num_columns)
+            plt.imshow(grid.permute((1, 2, 0)).cpu(), cmap=self.cmap)
+
+        self.log_and_display(trainer, pl_module)
--- a/setup.cfg
+++ b/setup.cfg
@@ -0,0 +1,8 @@
+[isort]
+profile = hug
+src_paths = isort, test
+
+[yapf]
+based_on_style = pep8
+spaces_before_comment = 2
+split_before_logical_operator = true
--- a/setup.py
+++ b/setup.py
@@ -1,10 +1,12 @@
 """
-  _____           _     _______             _
- |  __ \         | |   |__   __|           | |
- | |__) | __ ___ | |_ ___ | | ___  _ __ ___| |__
- |  ___/ '__/ _ \| __/ _ \| |/ _ \| '__/ __| '_ \
- | |   | | | (_) | || (_) | | (_) | | | (__| | | |
- |_|   |_|  \___/ \__\___/|_|\___/|_|  \___|_| |_|Plugin
+
+ ######
+ #     # #####   ####  #####  ####  #####  ####  #####   ####  #    #
+ #     # #    # #    #   #   #    #   #   #    # #    # #    # #    #
+ ######  #    # #    #   #   #    #   #   #    # #    # #      ######
+ #       #####  #    #   #   #    #   #   #    # #####  #      #    #
+ #       #   #  #    #   #   #    #   #   #    # #   #  #    # #    #
+ #       #    #  ####    #    ####    #    ####  #    #  ####  #    #Plugin

 ProtoTorch models Plugin Package
 """
@@ -19,15 +21,39 @@ DOWNLOAD_URL = "https://github.com/si-cim/prototorch_models.git"
 with open("README.md", "r") as fh:
    long_description = fh.read()

-INSTALL_REQUIRES = ["prototorch>=0.4.1", "pytorch_lightning", "torchmetrics"]
-DEV = ["bumpversion"]
-EXAMPLES = ["matplotlib", "scikit-learn"]
-TESTS = ["codecov", "pytest"]
-ALL = DEV + EXAMPLES + TESTS
+INSTALL_REQUIRES = [
+    "prototorch>=0.6.0",
+    "pytorch_lightning>=1.3.5",
+    "torchmetrics",
+]
+CLI = [
+    "jsonargparse",
+]
+DEV = [
+    "bumpversion",
+    "pre-commit",
+]
+DOCS = [
+    "recommonmark",
+    "sphinx",
+    "nbsphinx",
+    "sphinx_rtd_theme",
+    "sphinxcontrib-katex",
+    "sphinxcontrib-bibtex",
+]
+EXAMPLES = [
+    "matplotlib",
+    "scikit-learn",
+]
+TESTS = [
+    "codecov",
+    "pytest",
+]
+ALL = CLI + DEV + DOCS + EXAMPLES + TESTS

 setup(
    name=safe_name("prototorch_" + PLUGIN_NAME),
-    version="0.1.6",
+    version="0.2.0",
    description="Pre-packaged prototype-based "
    "machine learning models using ProtoTorch and PyTorch-Lightning.",
    long_description=long_description,
@@ -37,6 +63,7 @@ setup(
    url=PROJECT_URL,
    download_url=DOWNLOAD_URL,
    license="MIT",
+    python_requires=">=3.9",
    install_requires=INSTALL_REQUIRES,
    extras_require={
        "dev": DEV,
@@ -52,9 +79,6 @@ setup(
        "Intended Audience :: Science/Research",
        "License :: OSI Approved :: MIT License",
        "Natural Language :: English",
-        "Programming Language :: Python :: 3.6",
-        "Programming Language :: Python :: 3.7",
-        "Programming Language :: Python :: 3.8",
        "Programming Language :: Python :: 3.9",
        "Operating System :: OS Independent",
        "Topic :: Scientific/Engineering :: Artificial Intelligence",
--- a/tests/test_.py
+++ b/tests/test_.py
@@ -0,0 +1,14 @@
+"""prototorch.models test suite."""
+
+import unittest
+
+
+class TestDummy(unittest.TestCase):
+    def setUp(self):
+        pass
+
+    def test_dummy(self):
+        pass
+
+    def tearDown(self):
+        pass
--- a/tests/test_dummy.py
+++ b/tests/test_dummy.py
@@ -1,6 +0,0 @@
-import unittest
-
-
-class TestDummy(unittest.TestCase):
-    def test_one(self):
-        self.assertEqual(True, True)
--- a/tests/test_examples.sh
+++ b/tests/test_examples.sh
@@ -0,0 +1,19 @@
+#! /bin/bash
+
+failed=0
+
+for example in $(find $1 -maxdepth 1 -name "*.py")
+do
+    echo  -n "$x" $example '... '
+    export DISPLAY= && python $example --fast_dev_run 1 &> run_log.txt
+    if [[ $? -ne 0 ]]; then
+        echo "FAILED!!"
+        cat run_log.txt
+        failed=1
+    else
+        echo "SUCCESS!"
+    fi
+    rm run_log.txt
+done
+
+exit $failed