Improve documentation.
This commit is contained in:
		| @@ -11,8 +11,26 @@ Datasets | ||||
|  | ||||
| Functions | ||||
| -------------------------------------- | ||||
| .. automodule:: prototorch.functions | ||||
|  | ||||
| **Dimensions:** | ||||
|  | ||||
| - :math:`B` ... Batch size | ||||
| - :math:`P` ... Number of prototypes | ||||
| - :math:`n_x` ... Data dimension for vectorial data | ||||
| - :math:`n_w` ... Data dimension for vectorial prototypes | ||||
|  | ||||
| Activations | ||||
| ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ | ||||
| .. automodule:: prototorch.functions.activations | ||||
|    :members: | ||||
|    :exclude-members: register_activation, get_activation | ||||
|    :undoc-members: | ||||
|  | ||||
| Distances | ||||
| ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ | ||||
| .. automodule:: prototorch.functions.distances | ||||
|    :members: | ||||
|    :exclude-members: sed | ||||
|    :undoc-members: | ||||
|  | ||||
| Modules | ||||
|   | ||||
| @@ -14,6 +14,7 @@ import torch | ||||
|  | ||||
| class Dataset(torch.utils.data.Dataset): | ||||
|     """Abstract dataset class to be inherited.""" | ||||
|  | ||||
|     _repr_indent = 2 | ||||
|  | ||||
|     def __init__(self, root): | ||||
| @@ -30,8 +31,9 @@ class Dataset(torch.utils.data.Dataset): | ||||
|  | ||||
| class ProtoDataset(Dataset): | ||||
|     """Abstract dataset class to be inherited.""" | ||||
|     training_file = 'training.pt' | ||||
|     test_file = 'test.pt' | ||||
|  | ||||
|     training_file = "training.pt" | ||||
|     test_file = "test.pt" | ||||
|  | ||||
|     def __init__(self, root, train=True, download=True, verbose=True): | ||||
|         super().__init__(root) | ||||
| @@ -39,43 +41,44 @@ class ProtoDataset(Dataset): | ||||
|         self.verbose = verbose | ||||
|  | ||||
|         if download: | ||||
|             self.download() | ||||
|             self._download() | ||||
|  | ||||
|         if not self._check_exists(): | ||||
|             raise RuntimeError('Dataset not found. ' | ||||
|                                'You can use download=True to download it') | ||||
|             raise RuntimeError( | ||||
|                 "Dataset not found. " "You can use download=True to download it" | ||||
|             ) | ||||
|  | ||||
|         data_file = self.training_file if self.train else self.test_file | ||||
|  | ||||
|         self.data, self.targets = torch.load( | ||||
|             os.path.join(self.processed_folder, data_file)) | ||||
|             os.path.join(self.processed_folder, data_file) | ||||
|         ) | ||||
|  | ||||
|     @property | ||||
|     def raw_folder(self): | ||||
|         return os.path.join(self.root, self.__class__.__name__, 'raw') | ||||
|         return os.path.join(self.root, self.__class__.__name__, "raw") | ||||
|  | ||||
|     @property | ||||
|     def processed_folder(self): | ||||
|         return os.path.join(self.root, self.__class__.__name__, 'processed') | ||||
|         return os.path.join(self.root, self.__class__.__name__, "processed") | ||||
|  | ||||
|     @property | ||||
|     def class_to_idx(self): | ||||
|         return {_class: i for i, _class in enumerate(self.classes)} | ||||
|  | ||||
|     def _check_exists(self): | ||||
|         return (os.path.exists( | ||||
|             os.path.join(self.processed_folder, self.training_file)) | ||||
|                 and os.path.exists( | ||||
|                     os.path.join(self.processed_folder, self.test_file))) | ||||
|         return os.path.exists( | ||||
|             os.path.join(self.processed_folder, self.training_file) | ||||
|         ) and os.path.exists(os.path.join(self.processed_folder, self.test_file)) | ||||
|  | ||||
|     def __repr__(self): | ||||
|         head = 'Dataset ' + self.__class__.__name__ | ||||
|         body = ['Number of datapoints: {}'.format(self.__len__())] | ||||
|         head = "Dataset " + self.__class__.__name__ | ||||
|         body = ["Number of datapoints: {}".format(self.__len__())] | ||||
|         if self.root is not None: | ||||
|             body.append('Root location: {}'.format(self.root)) | ||||
|             body.append("Root location: {}".format(self.root)) | ||||
|         body += self.extra_repr().splitlines() | ||||
|         lines = [head] + [' ' * self._repr_indent + line for line in body] | ||||
|         return '\n'.join(lines) | ||||
|         lines = [head] + [" " * self._repr_indent + line for line in body] | ||||
|         return "\n".join(lines) | ||||
|  | ||||
|     def extra_repr(self): | ||||
|         return f"Split: {'Train' if self.train is True else 'Test'}" | ||||
| @@ -83,5 +86,5 @@ class ProtoDataset(Dataset): | ||||
|     def __len__(self): | ||||
|         return len(self.data) | ||||
|  | ||||
|     def download(self): | ||||
|     def _download(self): | ||||
|         raise NotImplementedError | ||||
|   | ||||
| @@ -46,42 +46,45 @@ from prototorch.datasets.abstract import ProtoDataset | ||||
|  | ||||
|  | ||||
| class Tecator(ProtoDataset): | ||||
|     """Tecator dataset for classification.""" | ||||
|     resources = [ | ||||
|         ('1MMuUK8V41IgNpnPDbg3E-QAL6wlErTk0', | ||||
|          'ba5607c580d0f91bb27dc29d13c2f8df'), | ||||
|     """ | ||||
|     `Tecator Dataset <http://lib.stat.cmu.edu/datasets/tecator>`__ | ||||
|     for classification. | ||||
|     """ | ||||
|  | ||||
|     _resources = [ | ||||
|         ("1MMuUK8V41IgNpnPDbg3E-QAL6wlErTk0", "ba5607c580d0f91bb27dc29d13c2f8df"), | ||||
|     ]  # (google_storage_id, md5hash) | ||||
|     classes = ['0 - low_fat', '1 - high_fat'] | ||||
|     classes = ["0 - low_fat", "1 - high_fat"] | ||||
|  | ||||
|     def __getitem__(self, index): | ||||
|         img, target = self.data[index], int(self.targets[index]) | ||||
|         return img, target | ||||
|  | ||||
|     def download(self): | ||||
|     def _download(self): | ||||
|         """Download the data if it doesn't exist in already.""" | ||||
|         if self._check_exists(): | ||||
|             return | ||||
|  | ||||
|         if self.verbose: | ||||
|             print('Making directories...') | ||||
|             print("Making directories...") | ||||
|         os.makedirs(self.raw_folder, exist_ok=True) | ||||
|         os.makedirs(self.processed_folder, exist_ok=True) | ||||
|  | ||||
|         if self.verbose: | ||||
|             print('Downloading...') | ||||
|         for fileid, md5 in self.resources: | ||||
|             filename = 'tecator.npz' | ||||
|             download_file_from_google_drive(fileid, | ||||
|                                             root=self.raw_folder, | ||||
|                                             filename=filename, | ||||
|                                             md5=md5) | ||||
|             print("Downloading...") | ||||
|         for fileid, md5 in self._resources: | ||||
|             filename = "tecator.npz" | ||||
|             download_file_from_google_drive( | ||||
|                 fileid, root=self.raw_folder, filename=filename, md5=md5 | ||||
|             ) | ||||
|  | ||||
|         if self.verbose: | ||||
|             print('Processing...') | ||||
|         with np.load(os.path.join(self.raw_folder, 'tecator.npz'), | ||||
|                      allow_pickle=False) as f: | ||||
|             x_train, y_train = f['x_train'], f['y_train'] | ||||
|             x_test, y_test = f['x_test'], f['y_test'] | ||||
|             print("Processing...") | ||||
|         with np.load( | ||||
|             os.path.join(self.raw_folder, "tecator.npz"), allow_pickle=False | ||||
|         ) as f: | ||||
|             x_train, y_train = f["x_train"], f["y_train"] | ||||
|             x_test, y_test = f["x_test"], f["y_test"] | ||||
|         training_set = [ | ||||
|             torch.tensor(x_train, dtype=torch.float32), | ||||
|             torch.tensor(y_train), | ||||
| @@ -91,12 +94,10 @@ class Tecator(ProtoDataset): | ||||
|             torch.tensor(y_test), | ||||
|         ] | ||||
|  | ||||
|         with open(os.path.join(self.processed_folder, self.training_file), | ||||
|                   'wb') as f: | ||||
|         with open(os.path.join(self.processed_folder, self.training_file), "wb") as f: | ||||
|             torch.save(training_set, f) | ||||
|         with open(os.path.join(self.processed_folder, self.test_file), | ||||
|                   'wb') as f: | ||||
|         with open(os.path.join(self.processed_folder, self.test_file), "wb") as f: | ||||
|             torch.save(test_set, f) | ||||
|  | ||||
|         if self.verbose: | ||||
|             print('Done!') | ||||
|             print("Done!") | ||||
|   | ||||
| @@ -1,15 +1,24 @@ | ||||
| """ProtoTorch distance functions.""" | ||||
|  | ||||
| import torch | ||||
| from prototorch.functions.helper import equal_int_shape, _int_and_mixed_shape, _check_shapes | ||||
| from prototorch.functions.helper import ( | ||||
|     equal_int_shape, | ||||
|     _int_and_mixed_shape, | ||||
|     _check_shapes, | ||||
| ) | ||||
| import numpy as np | ||||
|  | ||||
|  | ||||
| def squared_euclidean_distance(x, y): | ||||
|     """Compute the squared Euclidean distance between :math:`x` and :math:`y`. | ||||
|     r"""Compute the squared Euclidean distance between :math:`\bm x` and :math:`\bm y`. | ||||
|  | ||||
|     Expected dimension of x is 2. | ||||
|     Expected dimension of y is 2. | ||||
|     Compute :math:`{\langle \bm x - \bm y \rangle}_2` | ||||
|  | ||||
|     :param `torch.tensor` x: Two dimensional vector | ||||
|     :param `torch.tensor` y: Two dimensional vector | ||||
|  | ||||
|     **Alias:** | ||||
|     ``prototorch.functions.distances.sed`` | ||||
|     """ | ||||
|     expanded_x = x.unsqueeze(dim=1) | ||||
|     batchwise_difference = y - expanded_x | ||||
| @@ -19,10 +28,15 @@ def squared_euclidean_distance(x, y): | ||||
|  | ||||
|  | ||||
| def euclidean_distance(x, y): | ||||
|     """Compute the Euclidean distance between :math:`x` and :math:`y`. | ||||
|     r"""Compute the Euclidean distance between :math:`x` and :math:`y`. | ||||
|  | ||||
|     Expected dimension of x is 2. | ||||
|     Expected dimension of y is 2. | ||||
|     Compute :math:`\sqrt{{\langle \bm x - \bm y \rangle}_2}` | ||||
|  | ||||
|     :param `torch.tensor` x: Input Tensor of shape :math:`X \times N` | ||||
|     :param `torch.tensor` y: Input Tensor of shape :math:`Y \times N` | ||||
|  | ||||
|     :returns: Distance Tensor of shape :math:`X \times Y` | ||||
|     :rtype: `torch.tensor` | ||||
|     """ | ||||
|     distances_raised = squared_euclidean_distance(x, y) | ||||
|     distances = torch.sqrt(distances_raised) | ||||
| @@ -30,10 +44,17 @@ def euclidean_distance(x, y): | ||||
|  | ||||
|  | ||||
| def lpnorm_distance(x, y, p): | ||||
|     r"""Compute :math:`{\langle x, y \rangle}_p`. | ||||
|     r""" | ||||
|     Calculates the lp-norm between :math:`\bm x` and :math:`\bm y`. | ||||
|     Also known as Minkowski distance. | ||||
|  | ||||
|     Expected dimension of x is 2. | ||||
|     Expected dimension of y is 2. | ||||
|     Compute :math:`{\| \bm x - \bm y \|}_p`. | ||||
|  | ||||
|     Calls ``torch.cdist`` | ||||
|  | ||||
|     :param `torch.tensor` x: Two dimensional vector | ||||
|     :param `torch.tensor` y: Two dimensional vector | ||||
|     :param p: p parameter of the lp norm | ||||
|     """ | ||||
|     distances = torch.cdist(x, y, p=p) | ||||
|     return distances | ||||
| @@ -42,11 +63,11 @@ def lpnorm_distance(x, y, p): | ||||
| def omega_distance(x, y, omega): | ||||
|     r"""Omega distance. | ||||
|  | ||||
|     Compute :math:`{\langle \Omega x, \Omega y \rangle}_p` | ||||
|     Compute :math:`{\| \Omega \bm x - \Omega \bm y \|}_p` | ||||
|  | ||||
|     Expected dimension of x is 2. | ||||
|     Expected dimension of y is 2. | ||||
|     Expected dimension of omega is 2. | ||||
|     :param `torch.tensor` x: Two dimensional vector | ||||
|     :param `torch.tensor` y: Two dimensional vector | ||||
|     :param `torch.tensor` omega: Two dimensional matrix | ||||
|     """ | ||||
|     projected_x = x @ omega | ||||
|     projected_y = y @ omega | ||||
| @@ -57,11 +78,11 @@ def omega_distance(x, y, omega): | ||||
| def lomega_distance(x, y, omegas): | ||||
|     r"""Localized Omega distance. | ||||
|  | ||||
|     Compute :math:`{\langle \Omega_k x, \Omega_k y_k \rangle}_p` | ||||
|     Compute :math:`{\| \Omega_k \bm x - \Omega_k \bm y_k \|}_p` | ||||
|  | ||||
|     Expected dimension of x is 2. | ||||
|     Expected dimension of y is 2. | ||||
|     Expected dimension of omegas is 3. | ||||
|     :param `torch.tensor` x: Two dimensional vector | ||||
|     :param `torch.tensor` y: Two dimensional vector | ||||
|     :param `torch.tensor` omegas: Three dimensional matrix | ||||
|     """ | ||||
|     projected_x = x @ omegas | ||||
|     projected_y = torch.diagonal(y @ omegas).T | ||||
| @@ -74,31 +95,38 @@ def lomega_distance(x, y, omegas): | ||||
|  | ||||
|  | ||||
| def euclidean_distance_matrix(x, y, squared=False, epsilon=1e-10): | ||||
|     r""" Computes an euclidean distanes matrix given two distinct vectors. | ||||
|     r"""Computes an euclidean distances matrix given two distinct vectors. | ||||
|     last dimension must be the vector dimension! | ||||
|     compute the distance via the identity of the dot product. This avoids the memory overhead due to the subtraction! | ||||
|  | ||||
|     x.shape = (number_of_x_vectors, vector_dim) | ||||
|     y.shape = (number_of_y_vectors, vector_dim) | ||||
|     - ``x.shape = (number_of_x_vectors, vector_dim)`` | ||||
|     - ``y.shape = (number_of_y_vectors, vector_dim)`` | ||||
|  | ||||
|     output: matrix of distances (number_of_x_vectors, number_of_y_vectors) | ||||
|     """ | ||||
|     for tensor in [x, y]: | ||||
|         if tensor.ndim != 2: | ||||
|             raise ValueError( | ||||
|                 'The tensor dimension must be two. You provide: tensor.ndim=' + | ||||
|                 str(tensor.ndim) + '.') | ||||
|                 "The tensor dimension must be two. You provide: tensor.ndim=" | ||||
|                 + str(tensor.ndim) | ||||
|                 + "." | ||||
|             ) | ||||
|     if not equal_int_shape([tuple(x.shape)[1]], [tuple(y.shape)[1]]): | ||||
|         raise ValueError( | ||||
|             'The vector shape must be equivalent in both tensors. You provide: tuple(y.shape)[1]=' | ||||
|             + str(tuple(x.shape)[1]) + ' and  tuple(y.shape)(y)[1]=' + | ||||
|             str(tuple(y.shape)[1]) + '.') | ||||
|             "The vector shape must be equivalent in both tensors. You provide: tuple(y.shape)[1]=" | ||||
|             + str(tuple(x.shape)[1]) | ||||
|             + " and  tuple(y.shape)(y)[1]=" | ||||
|             + str(tuple(y.shape)[1]) | ||||
|             + "." | ||||
|         ) | ||||
|  | ||||
|     y = torch.transpose(y) | ||||
|  | ||||
|     diss = torch.sum(x**2, axis=1, | ||||
|                      keepdims=True) - 2 * torch.dot(x, y) + torch.sum( | ||||
|                          y**2, axis=0, keepdims=True) | ||||
|     diss = ( | ||||
|         torch.sum(x ** 2, axis=1, keepdims=True) | ||||
|         - 2 * torch.dot(x, y) | ||||
|         + torch.sum(y ** 2, axis=0, keepdims=True) | ||||
|     ) | ||||
|  | ||||
|     if not squared: | ||||
|         if epsilon == 0: | ||||
| @@ -111,12 +139,18 @@ def euclidean_distance_matrix(x, y, squared=False, epsilon=1e-10): | ||||
|  | ||||
| def tangent_distance(signals, protos, subspaces, squared=False, epsilon=1e-10): | ||||
|     r"""Tangent distances based on the tensorflow implementation of Sascha Saralajews | ||||
|     For more info about Tangen distances see DOI:10.1109/IJCNN.2016.7727534. | ||||
|  | ||||
|     For more info about Tangen distances see | ||||
|  | ||||
|     DOI:10.1109/IJCNN.2016.7727534. | ||||
|  | ||||
|     The subspaces is always assumed as transposed and must be orthogonal! | ||||
|     For local non sparse signals subspaces must be provided! | ||||
|     shape(signals): batch x proto_number x channels x dim1 x dim2 x ... x dimN | ||||
|     shape(protos): proto_number x dim1 x dim2 x ... x dimN | ||||
|     shape(subspaces): (optional [proto_number]) x prod(dim1 * dim2 * ... * dimN)  x prod(projected_atom_shape) | ||||
|  | ||||
|     - shape(signals): batch x proto_number x channels x dim1 x dim2 x ... x dimN | ||||
|     - shape(protos): proto_number x dim1 x dim2 x ... x dimN | ||||
|     - shape(subspaces): (optional [proto_number]) x prod(dim1 * dim2 * ... * dimN)  x prod(projected_atom_shape) | ||||
|  | ||||
|     subspace should be orthogonalized | ||||
|     Pytorch implementation of Sascha Saralajew's tensorflow code. | ||||
|     Translation by Christoph Raab | ||||
| @@ -139,18 +173,19 @@ def tangent_distance(signals, protos, subspaces, squared=False, epsilon=1e-10): | ||||
|         if subspaces.ndim == 2: | ||||
|             # clean solution without map if the matrix_scope is global | ||||
|             projectors = torch.eye(subspace_int_shape[-2]) - torch.dot( | ||||
|                 subspaces, torch.transpose(subspaces)) | ||||
|                 subspaces, torch.transpose(subspaces) | ||||
|             ) | ||||
|  | ||||
|             projected_signals = torch.dot(signals, projectors) | ||||
|             projected_protos = torch.dot(protos, projectors) | ||||
|  | ||||
|             diss = euclidean_distance_matrix(projected_signals, | ||||
|                                              projected_protos, | ||||
|                                              squared=squared, | ||||
|                                              epsilon=epsilon) | ||||
|             diss = euclidean_distance_matrix( | ||||
|                 projected_signals, projected_protos, squared=squared, epsilon=epsilon | ||||
|             ) | ||||
|  | ||||
|             diss = torch.reshape( | ||||
|                 diss, [signal_shape[0], signal_shape[2], proto_shape[0]]) | ||||
|                 diss, [signal_shape[0], signal_shape[2], proto_shape[0]] | ||||
|             ) | ||||
|  | ||||
|             return torch.permute(diss, [0, 2, 1]) | ||||
|  | ||||
| @@ -158,18 +193,21 @@ def tangent_distance(signals, protos, subspaces, squared=False, epsilon=1e-10): | ||||
|  | ||||
|             # no solution without map possible --> memory efficient but slow! | ||||
|             projectors = torch.eye(subspace_int_shape[-2]) - torch.bmm( | ||||
|                 subspaces, | ||||
|                 subspaces)  #K.batch_dot(subspaces, subspaces, [2, 2]) | ||||
|                 subspaces, subspaces | ||||
|             )  # K.batch_dot(subspaces, subspaces, [2, 2]) | ||||
|  | ||||
|             projected_protos = (protos @ subspaces | ||||
|             projected_protos = ( | ||||
|                 protos @ subspaces | ||||
|             ).T  # K.batch_dot(projectors, protos, [1, 1])) | ||||
|  | ||||
|             def projected_norm(projector): | ||||
|                 return torch.sum(torch.dot(signals, projector) ** 2, axis=1) | ||||
|  | ||||
|             diss = torch.transpose(map(projected_norm, projectors)) \ | ||||
|                     - 2 * torch.dot(signals, projected_protos) \ | ||||
|             diss = ( | ||||
|                 torch.transpose(map(projected_norm, projectors)) | ||||
|                 - 2 * torch.dot(signals, projected_protos) | ||||
|                 + torch.sum(projected_protos ** 2, axis=0, keepdims=True) | ||||
|             ) | ||||
|  | ||||
|             if not squared: | ||||
|                 if epsilon == 0: | ||||
| @@ -178,7 +216,8 @@ def tangent_distance(signals, protos, subspaces, squared=False, epsilon=1e-10): | ||||
|                     diss = torch.sqrt(torch.max(diss, epsilon)) | ||||
|  | ||||
|             diss = torch.reshape( | ||||
|                 diss, [signal_shape[0], signal_shape[2], proto_shape[0]]) | ||||
|                 diss, [signal_shape[0], signal_shape[2], proto_shape[0]] | ||||
|             ) | ||||
|  | ||||
|             return torch.permute(diss, [0, 2, 1]) | ||||
|  | ||||
| @@ -194,12 +233,13 @@ def tangent_distance(signals, protos, subspaces, squared=False, epsilon=1e-10): | ||||
|  | ||||
|             # Scope: Tangentspace Projections | ||||
|             diff = torch.reshape( | ||||
|                 diff, (signal_shape[0] * signal_shape[2], signal_shape[1], -1)) | ||||
|                 diff, (signal_shape[0] * signal_shape[2], signal_shape[1], -1) | ||||
|             ) | ||||
|             projected_diff = diff @ projectors | ||||
|             projected_diff = torch.reshape( | ||||
|                 projected_diff, | ||||
|                 (signal_shape[0], signal_shape[2], signal_shape[1]) + | ||||
|                 signal_shape[3:]) | ||||
|                 (signal_shape[0], signal_shape[2], signal_shape[1]) + signal_shape[3:], | ||||
|             ) | ||||
|  | ||||
|             diss = torch.norm(projected_diff, 2, dim=-1) | ||||
|             return diss.permute([0, 2, 1]) | ||||
| @@ -211,13 +251,14 @@ def tangent_distance(signals, protos, subspaces, squared=False, epsilon=1e-10): | ||||
|  | ||||
|             # Scope: Tangentspace Projections | ||||
|             diff = torch.reshape( | ||||
|                 diff, (signal_shape[0] * signal_shape[2], signal_shape[1], -1)) | ||||
|                 diff, (signal_shape[0] * signal_shape[2], signal_shape[1], -1) | ||||
|             ) | ||||
|             diff = diff.permute([1, 0, 2]) | ||||
|             projected_diff = torch.bmm(diff, projectors) | ||||
|             projected_diff = torch.reshape( | ||||
|                 projected_diff, | ||||
|                 (signal_shape[1], signal_shape[0], signal_shape[2]) + | ||||
|                 signal_shape[3:]) | ||||
|                 (signal_shape[1], signal_shape[0], signal_shape[2]) + signal_shape[3:], | ||||
|             ) | ||||
|  | ||||
|             diss = torch.norm(projected_diff, 2, dim=-1) | ||||
|             return diss.permute([1, 0, 2]).squeeze(-1) | ||||
|   | ||||
		Reference in New Issue
	
	Block a user