Improve documentation.

2021-04-21 14:55:54 +02:00 · 2021-04-21 14:55:54 +02:00 · 34973808b8
commit 34973808b8
parent c42df6e203
5 changed files with 164 additions and 102 deletions
--- a/docs/source/api.rst
+++ b/docs/source/api.rst
@ -11,8 +11,26 @@ Datasets

 Functions
 --------------------------------------
-.. automodule:: prototorch.functions
+
+**Dimensions:**
+
+- :math:`B` ... Batch size
+- :math:`P` ... Number of prototypes
+- :math:`n_x` ... Data dimension for vectorial data
+- :math:`n_w` ... Data dimension for vectorial prototypes
+
+Activations
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+.. automodule:: prototorch.functions.activations
   :members:
+   :exclude-members: register_activation, get_activation
+   :undoc-members:
+
+Distances
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+.. automodule:: prototorch.functions.distances
+   :members:
+   :exclude-members: sed
   :undoc-members:

 Modules
--- a/prototorch/datasets/abstract.py
+++ b/prototorch/datasets/abstract.py
@ -14,6 +14,7 @@ import torch

 class Dataset(torch.utils.data.Dataset):
    """Abstract dataset class to be inherited."""
+
    _repr_indent = 2

    def __init__(self, root):
@ -30,8 +31,9 @@ class Dataset(torch.utils.data.Dataset):

 class ProtoDataset(Dataset):
    """Abstract dataset class to be inherited."""
-    training_file = 'training.pt'
-    test_file = 'test.pt'
+
+    training_file = "training.pt"
+    test_file = "test.pt"

    def __init__(self, root, train=True, download=True, verbose=True):
        super().__init__(root)
@ -39,43 +41,44 @@ class ProtoDataset(Dataset):
        self.verbose = verbose

        if download:
-            self.download()
+            self._download()

        if not self._check_exists():
-            raise RuntimeError('Dataset not found. '
-                               'You can use download=True to download it')
+            raise RuntimeError(
+                "Dataset not found. " "You can use download=True to download it"
+            )

        data_file = self.training_file if self.train else self.test_file

        self.data, self.targets = torch.load(
-            os.path.join(self.processed_folder, data_file))
+            os.path.join(self.processed_folder, data_file)
+        )

    @property
    def raw_folder(self):
-        return os.path.join(self.root, self.__class__.__name__, 'raw')
+        return os.path.join(self.root, self.__class__.__name__, "raw")

    @property
    def processed_folder(self):
-        return os.path.join(self.root, self.__class__.__name__, 'processed')
+        return os.path.join(self.root, self.__class__.__name__, "processed")

    @property
    def class_to_idx(self):
        return {_class: i for i, _class in enumerate(self.classes)}

    def _check_exists(self):
-        return (os.path.exists(
-            os.path.join(self.processed_folder, self.training_file))
-                and os.path.exists(
-                    os.path.join(self.processed_folder, self.test_file)))
+        return os.path.exists(
+            os.path.join(self.processed_folder, self.training_file)
+        ) and os.path.exists(os.path.join(self.processed_folder, self.test_file))

    def __repr__(self):
-        head = 'Dataset ' + self.__class__.__name__
-        body = ['Number of datapoints: {}'.format(self.__len__())]
+        head = "Dataset " + self.__class__.__name__
+        body = ["Number of datapoints: {}".format(self.__len__())]
        if self.root is not None:
-            body.append('Root location: {}'.format(self.root))
+            body.append("Root location: {}".format(self.root))
        body += self.extra_repr().splitlines()
-        lines = [head] + [' ' * self._repr_indent + line for line in body]
-        return '\n'.join(lines)
+        lines = [head] + [" " * self._repr_indent + line for line in body]
+        return "\n".join(lines)

    def extra_repr(self):
        return f"Split: {'Train' if self.train is True else 'Test'}"
@ -83,5 +86,5 @@ class ProtoDataset(Dataset):
    def __len__(self):
        return len(self.data)

-    def download(self):
+    def _download(self):
        raise NotImplementedError
--- a/prototorch/datasets/tecator.py
+++ b/prototorch/datasets/tecator.py
@ -46,42 +46,45 @@ from prototorch.datasets.abstract import ProtoDataset


 class Tecator(ProtoDataset):
-    """Tecator dataset for classification."""
-    resources = [
-        ('1MMuUK8V41IgNpnPDbg3E-QAL6wlErTk0',
-         'ba5607c580d0f91bb27dc29d13c2f8df'),
+    """
+    `Tecator Dataset <http://lib.stat.cmu.edu/datasets/tecator>`__
+    for classification.
+    """
+
+    _resources = [
+        ("1MMuUK8V41IgNpnPDbg3E-QAL6wlErTk0", "ba5607c580d0f91bb27dc29d13c2f8df"),
    ]  # (google_storage_id, md5hash)
-    classes = ['0 - low_fat', '1 - high_fat']
+    classes = ["0 - low_fat", "1 - high_fat"]

    def __getitem__(self, index):
        img, target = self.data[index], int(self.targets[index])
        return img, target

-    def download(self):
+    def _download(self):
        """Download the data if it doesn't exist in already."""
        if self._check_exists():
            return

        if self.verbose:
-            print('Making directories...')
+            print("Making directories...")
        os.makedirs(self.raw_folder, exist_ok=True)
        os.makedirs(self.processed_folder, exist_ok=True)

        if self.verbose:
-            print('Downloading...')
-        for fileid, md5 in self.resources:
-            filename = 'tecator.npz'
-            download_file_from_google_drive(fileid,
-                                            root=self.raw_folder,
-                                            filename=filename,
-                                            md5=md5)
+            print("Downloading...")
+        for fileid, md5 in self._resources:
+            filename = "tecator.npz"
+            download_file_from_google_drive(
+                fileid, root=self.raw_folder, filename=filename, md5=md5
+            )

        if self.verbose:
-            print('Processing...')
-        with np.load(os.path.join(self.raw_folder, 'tecator.npz'),
-                     allow_pickle=False) as f:
-            x_train, y_train = f['x_train'], f['y_train']
-            x_test, y_test = f['x_test'], f['y_test']
+            print("Processing...")
+        with np.load(
+            os.path.join(self.raw_folder, "tecator.npz"), allow_pickle=False
+        ) as f:
+            x_train, y_train = f["x_train"], f["y_train"]
+            x_test, y_test = f["x_test"], f["y_test"]
        training_set = [
            torch.tensor(x_train, dtype=torch.float32),
            torch.tensor(y_train),
@ -91,12 +94,10 @@ class Tecator(ProtoDataset):
            torch.tensor(y_test),
        ]

-        with open(os.path.join(self.processed_folder, self.training_file),
-                  'wb') as f:
+        with open(os.path.join(self.processed_folder, self.training_file), "wb") as f:
            torch.save(training_set, f)
-        with open(os.path.join(self.processed_folder, self.test_file),
-                  'wb') as f:
+        with open(os.path.join(self.processed_folder, self.test_file), "wb") as f:
            torch.save(test_set, f)

        if self.verbose:
-            print('Done!')
+            print("Done!")
--- a/prototorch/functions/distances.py
+++ b/prototorch/functions/distances.py
@ -1,15 +1,24 @@
 """ProtoTorch distance functions."""

 import torch
-from prototorch.functions.helper import equal_int_shape, _int_and_mixed_shape, _check_shapes
+from prototorch.functions.helper import (
+    equal_int_shape,
+    _int_and_mixed_shape,
+    _check_shapes,
+)
 import numpy as np


 def squared_euclidean_distance(x, y):
-    """Compute the squared Euclidean distance between :math:`x` and :math:`y`.
+    r"""Compute the squared Euclidean distance between :math:`\bm x` and :math:`\bm y`.

-    Expected dimension of x is 2.
-    Expected dimension of y is 2.
+    Compute :math:`{\langle \bm x - \bm y \rangle}_2`
+
+    :param `torch.tensor` x: Two dimensional vector
+    :param `torch.tensor` y: Two dimensional vector
+
+    **Alias:**
+    ``prototorch.functions.distances.sed``
    """
    expanded_x = x.unsqueeze(dim=1)
    batchwise_difference = y - expanded_x
@ -19,10 +28,15 @@ def squared_euclidean_distance(x, y):


 def euclidean_distance(x, y):
-    """Compute the Euclidean distance between :math:`x` and :math:`y`.
+    r"""Compute the Euclidean distance between :math:`x` and :math:`y`.

-    Expected dimension of x is 2.
-    Expected dimension of y is 2.
+    Compute :math:`\sqrt{{\langle \bm x - \bm y \rangle}_2}`
+
+    :param `torch.tensor` x: Input Tensor of shape :math:`X \times N`
+    :param `torch.tensor` y: Input Tensor of shape :math:`Y \times N`
+
+    :returns: Distance Tensor of shape :math:`X \times Y`
+    :rtype: `torch.tensor`
    """
    distances_raised = squared_euclidean_distance(x, y)
    distances = torch.sqrt(distances_raised)
@ -30,10 +44,17 @@ def euclidean_distance(x, y):


 def lpnorm_distance(x, y, p):
-    r"""Compute :math:`{\langle x, y \rangle}_p`.
+    r"""
+    Calculates the lp-norm between :math:`\bm x` and :math:`\bm y`.
+    Also known as Minkowski distance.

-    Expected dimension of x is 2.
-    Expected dimension of y is 2.
+    Compute :math:`{\| \bm x - \bm y \|}_p`.
+
+    Calls ``torch.cdist``
+
+    :param `torch.tensor` x: Two dimensional vector
+    :param `torch.tensor` y: Two dimensional vector
+    :param p: p parameter of the lp norm
    """
    distances = torch.cdist(x, y, p=p)
    return distances
@ -42,11 +63,11 @@ def lpnorm_distance(x, y, p):
 def omega_distance(x, y, omega):
    r"""Omega distance.

-    Compute :math:`{\langle \Omega x, \Omega y \rangle}_p`
+    Compute :math:`{\| \Omega \bm x - \Omega \bm y \|}_p`

-    Expected dimension of x is 2.
-    Expected dimension of y is 2.
-    Expected dimension of omega is 2.
+    :param `torch.tensor` x: Two dimensional vector
+    :param `torch.tensor` y: Two dimensional vector
+    :param `torch.tensor` omega: Two dimensional matrix
    """
    projected_x = x @ omega
    projected_y = y @ omega
@ -57,11 +78,11 @@ def omega_distance(x, y, omega):
 def lomega_distance(x, y, omegas):
    r"""Localized Omega distance.

-    Compute :math:`{\langle \Omega_k x, \Omega_k y_k \rangle}_p`
+    Compute :math:`{\| \Omega_k \bm x - \Omega_k \bm y_k \|}_p`

-    Expected dimension of x is 2.
-    Expected dimension of y is 2.
-    Expected dimension of omegas is 3.
+    :param `torch.tensor` x: Two dimensional vector
+    :param `torch.tensor` y: Two dimensional vector
+    :param `torch.tensor` omegas: Three dimensional matrix
    """
    projected_x = x @ omegas
    projected_y = torch.diagonal(y @ omegas).T
@ -74,31 +95,38 @@ def lomega_distance(x, y, omegas):


 def euclidean_distance_matrix(x, y, squared=False, epsilon=1e-10):
-    r""" Computes an euclidean distanes matrix given two distinct vectors.
+    r"""Computes an euclidean distances matrix given two distinct vectors.
    last dimension must be the vector dimension!
    compute the distance via the identity of the dot product. This avoids the memory overhead due to the subtraction!

-    x.shape = (number_of_x_vectors, vector_dim)
-    y.shape = (number_of_y_vectors, vector_dim)
+    - ``x.shape = (number_of_x_vectors, vector_dim)``
+    - ``y.shape = (number_of_y_vectors, vector_dim)``

    output: matrix of distances (number_of_x_vectors, number_of_y_vectors)
    """
    for tensor in [x, y]:
        if tensor.ndim != 2:
            raise ValueError(
-                'The tensor dimension must be two. You provide: tensor.ndim=' +
-                str(tensor.ndim) + '.')
+                "The tensor dimension must be two. You provide: tensor.ndim="
+                + str(tensor.ndim)
+                + "."
+            )
    if not equal_int_shape([tuple(x.shape)[1]], [tuple(y.shape)[1]]):
        raise ValueError(
-            'The vector shape must be equivalent in both tensors. You provide: tuple(y.shape)[1]='
-            + str(tuple(x.shape)[1]) + ' and  tuple(y.shape)(y)[1]=' +
-            str(tuple(y.shape)[1]) + '.')
+            "The vector shape must be equivalent in both tensors. You provide: tuple(y.shape)[1]="
+            + str(tuple(x.shape)[1])
+            + " and  tuple(y.shape)(y)[1]="
+            + str(tuple(y.shape)[1])
+            + "."
+        )

    y = torch.transpose(y)

-    diss = torch.sum(x**2, axis=1,
-                     keepdims=True) - 2 * torch.dot(x, y) + torch.sum(
-                         y**2, axis=0, keepdims=True)
+    diss = (
+        torch.sum(x ** 2, axis=1, keepdims=True)
+        - 2 * torch.dot(x, y)
+        + torch.sum(y ** 2, axis=0, keepdims=True)
+    )

    if not squared:
        if epsilon == 0:
@ -111,12 +139,18 @@ def euclidean_distance_matrix(x, y, squared=False, epsilon=1e-10):

 def tangent_distance(signals, protos, subspaces, squared=False, epsilon=1e-10):
    r"""Tangent distances based on the tensorflow implementation of Sascha Saralajews
-    For more info about Tangen distances see DOI:10.1109/IJCNN.2016.7727534.
+
+    For more info about Tangen distances see
+
+    DOI:10.1109/IJCNN.2016.7727534.
+
    The subspaces is always assumed as transposed and must be orthogonal!
    For local non sparse signals subspaces must be provided!
-    shape(signals): batch x proto_number x channels x dim1 x dim2 x ... x dimN
-    shape(protos): proto_number x dim1 x dim2 x ... x dimN
-    shape(subspaces): (optional [proto_number]) x prod(dim1 * dim2 * ... * dimN)  x prod(projected_atom_shape)
+
+    - shape(signals): batch x proto_number x channels x dim1 x dim2 x ... x dimN
+    - shape(protos): proto_number x dim1 x dim2 x ... x dimN
+    - shape(subspaces): (optional [proto_number]) x prod(dim1 * dim2 * ... * dimN)  x prod(projected_atom_shape)
+
    subspace should be orthogonalized
    Pytorch implementation of Sascha Saralajew's tensorflow code.
    Translation by Christoph Raab
@ -139,18 +173,19 @@ def tangent_distance(signals, protos, subspaces, squared=False, epsilon=1e-10):
        if subspaces.ndim == 2:
            # clean solution without map if the matrix_scope is global
            projectors = torch.eye(subspace_int_shape[-2]) - torch.dot(
-                subspaces, torch.transpose(subspaces))
+                subspaces, torch.transpose(subspaces)
+            )

            projected_signals = torch.dot(signals, projectors)
            projected_protos = torch.dot(protos, projectors)

-            diss = euclidean_distance_matrix(projected_signals,
-                                             projected_protos,
-                                             squared=squared,
-                                             epsilon=epsilon)
+            diss = euclidean_distance_matrix(
+                projected_signals, projected_protos, squared=squared, epsilon=epsilon
+            )

            diss = torch.reshape(
-                diss, [signal_shape[0], signal_shape[2], proto_shape[0]])
+                diss, [signal_shape[0], signal_shape[2], proto_shape[0]]
+            )

            return torch.permute(diss, [0, 2, 1])

@ -158,18 +193,21 @@ def tangent_distance(signals, protos, subspaces, squared=False, epsilon=1e-10):

            # no solution without map possible --> memory efficient but slow!
            projectors = torch.eye(subspace_int_shape[-2]) - torch.bmm(
-                subspaces,
-                subspaces)  #K.batch_dot(subspaces, subspaces, [2, 2])
+                subspaces, subspaces
+            )  # K.batch_dot(subspaces, subspaces, [2, 2])

-            projected_protos = (protos @ subspaces
+            projected_protos = (
+                protos @ subspaces
            ).T  # K.batch_dot(projectors, protos, [1, 1]))

            def projected_norm(projector):
                return torch.sum(torch.dot(signals, projector) ** 2, axis=1)

-            diss = torch.transpose(map(projected_norm, projectors)) \
-                    - 2 * torch.dot(signals, projected_protos) \
+            diss = (
+                torch.transpose(map(projected_norm, projectors))
+                - 2 * torch.dot(signals, projected_protos)
                + torch.sum(projected_protos ** 2, axis=0, keepdims=True)
+            )

            if not squared:
                if epsilon == 0:
@ -178,7 +216,8 @@ def tangent_distance(signals, protos, subspaces, squared=False, epsilon=1e-10):
                    diss = torch.sqrt(torch.max(diss, epsilon))

            diss = torch.reshape(
-                diss, [signal_shape[0], signal_shape[2], proto_shape[0]])
+                diss, [signal_shape[0], signal_shape[2], proto_shape[0]]
+            )

            return torch.permute(diss, [0, 2, 1])

@ -194,12 +233,13 @@ def tangent_distance(signals, protos, subspaces, squared=False, epsilon=1e-10):

            # Scope: Tangentspace Projections
            diff = torch.reshape(
-                diff, (signal_shape[0] * signal_shape[2], signal_shape[1], -1))
+                diff, (signal_shape[0] * signal_shape[2], signal_shape[1], -1)
+            )
            projected_diff = diff @ projectors
            projected_diff = torch.reshape(
                projected_diff,
-                (signal_shape[0], signal_shape[2], signal_shape[1]) +
-                signal_shape[3:])
+                (signal_shape[0], signal_shape[2], signal_shape[1]) + signal_shape[3:],
+            )

            diss = torch.norm(projected_diff, 2, dim=-1)
            return diss.permute([0, 2, 1])
@ -211,13 +251,14 @@ def tangent_distance(signals, protos, subspaces, squared=False, epsilon=1e-10):

            # Scope: Tangentspace Projections
            diff = torch.reshape(
-                diff, (signal_shape[0] * signal_shape[2], signal_shape[1], -1))
+                diff, (signal_shape[0] * signal_shape[2], signal_shape[1], -1)
+            )
            diff = diff.permute([1, 0, 2])
            projected_diff = torch.bmm(diff, projectors)
            projected_diff = torch.reshape(
                projected_diff,
-                (signal_shape[1], signal_shape[0], signal_shape[2]) +
-                signal_shape[3:])
+                (signal_shape[1], signal_shape[0], signal_shape[2]) + signal_shape[3:],
+            )

            diss = torch.norm(projected_diff, 2, dim=-1)
            return diss.permute([1, 0, 2]).squeeze(-1)
--- a/setup.py
+++ b/setup.py
@ -8,7 +8,6 @@

 ProtoTorch Core Package
 """
-
 from setuptools import setup
 from setuptools import find_packages