Merge branch 'master' into kernel_distances

This commit is contained in:
Alexander Engelsberger 2021-05-11 16:10:56 +02:00
commit 09c80e2d54
22 changed files with 319 additions and 160 deletions

View File

@ -1,20 +1,11 @@
[bumpversion]
current_version = 0.3.0-dev0
current_version = 0.4.2
commit = True
tag = True
parse = (?P<major>\d+)\.(?P<minor>\d+)\.(?P<patch>\d+)(\-(?P<release>[a-z]+)(?P<build>\d+))?
parse = (?P<major>\d+)\.(?P<minor>\d+)\.(?P<patch>\d+)
serialize =
{major}.{minor}.{patch}-{release}{build}
{major}.{minor}.{patch}
[bumpversion:part:release]
optional_value = prod
first_value = dev
values =
dev
rc
prod
[bumpversion:file:setup.py]
[bumpversion:file:./prototorch/__init__.py]

31
.github/ISSUE_TEMPLATE/bug_report.md vendored Normal file
View File

@ -0,0 +1,31 @@
---
name: Bug report
about: Create a report to help us improve
title: ''
labels: ''
assignees: ''
---
**Describe the bug**
A clear and concise description of what the bug is.
**To Reproduce**
Steps to reproduce the behavior:
1. Install Prototorch by running '...'
2. Run script '...'
3. See errors
**Expected behavior**
A clear and concise description of what you expected to happen.
**Screenshots**
If applicable, add screenshots to help explain your problem.
**Desktop (please complete the following information):**
- OS: [e.g. Ubuntu 20.10]
- Prototorch Version: [e.g. v0.4.0]
- Python Version: [e.g. 3.9.5]
**Additional context**
Add any other context about the problem here.

View File

@ -0,0 +1,20 @@
---
name: Feature request
about: Suggest an idea for this project
title: ''
labels: ''
assignees: ''
---
**Is your feature request related to a problem? Please describe.**
A clear and concise description of what the problem is. Ex. I'm always frustrated when [...]
**Describe the solution you'd like**
A clear and concise description of what you want to happen.
**Describe alternatives you've considered**
A clear and concise description of any alternative solutions or features you've considered.
**Additional context**
Add any other context or screenshots about the feature request here.

View File

@ -23,10 +23,7 @@ jobs:
- name: Install dependencies
run: |
python -m pip install --upgrade pip
pip install .
- name: Install extras
run: |
pip install -r requirements.txt
pip install .[all]
- name: Lint with flake8
run: |
pip install flake8

View File

@ -5,10 +5,8 @@ python: 3.8
cache:
directories:
- "./tests/artifacts"
# - "$HOME/.prototorch/datasets"
install:
- pip install . --progress-bar off
- pip install -r requirements.txt
- pip install .[all] --progress-bar off
# Generate code coverage report
script:

View File

@ -31,15 +31,15 @@ To also install the extras, use
pip install -U prototorch[all]
```
*Note: If you're using [ZSH](https://www.zsh.org/), the square brackets `[ ]`
have to be escaped like so: `\[\]`, making the install command `pip install -U
prototorch\[all\]`.*
*Note: If you're using [ZSH](https://www.zsh.org/) (which is also the default
shell on MacOS now), the square brackets `[ ]` have to be escaped like so:
`\[\]`, making the install command `pip install -U prototorch\[all\]`.*
To install the bleeding-edge features and improvements:
```bash
git clone https://github.com/si-cim/prototorch.git
git checkout dev
cd prototorch
git checkout dev
pip install -e .[all]
```

View File

@ -23,7 +23,7 @@ author = "Jensun Ravichandran"
# The full version, including alpha/beta/rc tags
#
release = "0.3.0-dev0"
release = "0.4.2"
# -- General configuration ---------------------------------------------------

View File

@ -1,6 +1,9 @@
#
"""This example script shows the usage of the new components architecture.
Serialization/deserialization also works as expected.
"""
# DATASET
#
import torch
from sklearn.datasets import load_iris
from sklearn.preprocessing import StandardScaler
@ -15,9 +18,7 @@ x_train = torch.Tensor(x_train)
y_train = torch.Tensor(y_train)
num_classes = len(torch.unique(y_train))
#
# CREATE NEW COMPONENTS
#
from prototorch.components import *
from prototorch.components.initializers import *
@ -32,9 +33,7 @@ components = ReasoningComponents(
(3, 6), StratifiedSelectionInitializer(x_train, y_train))
print(components())
#
# TEST SERIALIZATION
#
import io
save = io.BytesIO()
@ -52,8 +51,8 @@ serialized_prototypes = torch.load(save)
assert torch.all(prototypes.components == serialized_prototypes.components
), "Serialization of Components failed."
assert torch.all(prototypes.labels == serialized_prototypes.labels
), "Serialization of Components failed."
assert torch.all(prototypes.component_labels == serialized_prototypes.
component_labels), "Serialization of Components failed."
save = io.BytesIO()
torch.save(components, save)

View File

@ -1,9 +1,7 @@
"""ProtoTorch package."""
# #############################################
# Core Setup
# #############################################
__version__ = "0.3.0-dev0"
__version__ = "0.4.2"
__all_core__ = [
"datasets",
@ -11,9 +9,9 @@ __all_core__ = [
"modules",
]
# #############################################
from .datasets import *
# Plugin Loader
# #############################################
import pkgutil
import pkg_resources

View File

@ -1,11 +1,2 @@
from prototorch.components.components import (
Components,
LabeledComponents,
ReasoningComponents,
)
__all__ = [
"Components",
"LabeledComponents",
"ReasoningComponents",
]
from prototorch.components.components import *
from prototorch.components.initializers import *

View File

@ -1,48 +1,52 @@
"""ProtoTorch components modules."""
import warnings
from typing import Tuple
import torch
from prototorch.components.initializers import (ClassAwareInitializer,
ComponentsInitializer,
EqualLabelsInitializer,
UnequalLabelsInitializer,
ZeroReasoningsInitializer)
from prototorch.functions.initializers import get_initializer
from torch.nn.parameter import Parameter
from prototorch.components.initializers import (
EqualLabelInitializer,
ZeroReasoningsInitializer,
)
class Components(torch.nn.Module):
"""
Components is a set of learnable Tensors.
"""
def __init__(
self,
"""Components is a set of learnable Tensors."""
def __init__(self,
number_of_components=None,
initializer=None,
*,
initialized_components=None,
dtype=torch.float32,
):
dtype=torch.float32):
super().__init__()
# Ignore all initialization settings if initialized_components is given.
if initialized_components is not None:
self._components = Parameter(initialized_components)
if number_of_components is not None or initializer is not None:
warnings.warn(
"Arguments ignored while initializing Components")
wmsg = "Arguments ignored while initializing Components"
warnings.warn(wmsg)
else:
self._initialize_components(number_of_components, initializer)
def _precheck_initializer(self, initializer):
if not isinstance(initializer, ComponentsInitializer):
emsg = f"`initializer` has to be some subtype of " \
f"{ComponentsInitializer}. " \
f"You have provided: {initializer=} instead."
raise TypeError(emsg)
def _initialize_components(self, number_of_components, initializer):
self._precheck_initializer(initializer)
self._components = Parameter(
initializer.generate(number_of_components))
@property
def components(self):
"""
Tensor containing the component tensors.
"""
"""Tensor containing the component tensors."""
return self._components.detach().cpu()
def forward(self):
@ -53,12 +57,12 @@ class Components(torch.nn.Module):
class LabeledComponents(Components):
"""
LabeledComponents generate a set of components and a set of labels.
"""LabeledComponents generate a set of components and a set of labels.
Every Component has a label assigned.
"""
def __init__(self,
labels=None,
distribution=None,
initializer=None,
*,
initialized_components=None):
@ -66,22 +70,32 @@ class LabeledComponents(Components):
super().__init__(initialized_components=initialized_components[0])
self._labels = initialized_components[1]
else:
self._initialize_labels(labels, initializer)
self._initialize_labels(distribution)
super().__init__(number_of_components=len(self._labels),
initializer=initializer)
def _initialize_labels(self, labels, initializer):
if type(labels) == tuple:
num_classes, prototypes_per_class = labels
labels = EqualLabelInitializer(num_classes, prototypes_per_class)
def _initialize_components(self, number_of_components, initializer):
if isinstance(initializer, ClassAwareInitializer):
self._precheck_initializer(initializer)
self._components = Parameter(
initializer.generate(number_of_components, self.distribution))
else:
super()._initialize_components(self, number_of_components,
initializer)
def _initialize_labels(self, distribution):
if type(distribution) == tuple:
num_classes, prototypes_per_class = distribution
labels = EqualLabelsInitializer(num_classes, prototypes_per_class)
elif type(distribution) == list:
labels = UnequalLabelsInitializer(distribution)
self.distribution = labels.distribution
self._labels = labels.generate()
@property
def labels(self):
"""
Tensor containing the component tensors.
"""
def component_labels(self):
"""Tensor containing the component tensors."""
return self._labels.detach().cpu()
def forward(self):
@ -89,16 +103,19 @@ class LabeledComponents(Components):
class ReasoningComponents(Components):
"""
ReasoningComponents generate a set of components and a set of reasoning matrices.
"""ReasoningComponents generate a set of components and a set of reasoning matrices.
Every Component has a reasoning matrix assigned.
A reasoning matrix is a Nx2 matrix, where N is the number of Classes.
The first element is called positive reasoning :math:`p`, the second negative reasoning :math:`n`.
A components can reason in favour (positive) of a class, against (negative) a class or not at all (neutral).
A reasoning matrix is a Nx2 matrix, where N is the number of Classes. The
first element is called positive reasoning :math:`p`, the second negative
reasoning :math:`n`. A components can reason in favour (positive) of a
class, against (negative) a class or not at all (neutral).
It holds that :math:`0 \leq n \leq 1`, :math:`0 \leq p \leq 1` and :math:`0
\leq n+p \leq 1`. Therefore :math:`n` and :math:`p` are two elements of a
three element probability distribution.
It holds that :math:`0 \leq n \leq 1`, :math:`0 \leq p \leq 1` and :math:`0 \leq n+p \leq 1`.
Therefore :math:`n` and :math:`p` are two elements of a three element probability distribution.
"""
def __init__(self,
reasonings=None,
@ -123,10 +140,10 @@ class ReasoningComponents(Components):
@property
def reasonings(self):
"""
Returns Reasoning Matrix.
"""Returns Reasoning Matrix.
Dimension NxCx2
"""
return self._reasonings.detach().cpu()

View File

@ -1,12 +1,33 @@
"""ProtoTroch Initializers."""
import warnings
from collections.abc import Iterable
from itertools import chain
import torch
from torch.utils.data import DataLoader, Dataset
def parse_init_arg(arg):
if isinstance(arg, Dataset):
data, labels = next(iter(DataLoader(arg, batch_size=len(arg))))
# data = data.view(len(arg), -1) # flatten
else:
data, labels = arg
if not isinstance(data, torch.Tensor):
wmsg = f"Converting data to {torch.Tensor}."
warnings.warn(wmsg)
data = torch.Tensor(data)
if not isinstance(labels, torch.Tensor):
wmsg = f"Converting labels to {torch.Tensor}."
warnings.warn(wmsg)
labels = torch.Tensor(labels)
return data, labels
# Components
class ComponentsInitializer:
class ComponentsInitializer(object):
def generate(self, number_of_components):
pass
raise NotImplementedError("Subclasses should implement this!")
class DimensionAwareInitializer(ComponentsInitializer):
@ -39,7 +60,7 @@ class UniformInitializer(DimensionAwareInitializer):
def generate(self, length):
gen_dims = (length, ) + self.components_dims
return torch.FloatTensor(gen_dims).uniform_(self.min, self.max)
return torch.ones(gen_dims).uniform_(self.min, self.max)
class PositionAwareInitializer(ComponentsInitializer):
@ -62,58 +83,95 @@ class MeanInitializer(PositionAwareInitializer):
class ClassAwareInitializer(ComponentsInitializer):
def __init__(self, positions, classes):
def __init__(self, arg):
super().__init__()
self.data = positions
self.classes = classes
data, labels = parse_init_arg(arg)
self.data = data
self.labels = labels
self.names = torch.unique(self.classes)
self.num_classes = len(self.names)
self.clabels = torch.unique(self.labels)
self.num_classes = len(self.clabels)
def _get_samples_from_initializer(self, length, dist):
if not dist:
per_class = length // self.num_classes
dist = self.num_classes * [per_class]
samples_list = [
init.generate(n) for init, n in zip(self.initializers, dist)
]
return torch.vstack(samples_list)
class StratifiedMeanInitializer(ClassAwareInitializer):
def __init__(self, positions, classes):
super().__init__(positions, classes)
def __init__(self, arg):
super().__init__(arg)
self.initializers = []
for name in self.names:
class_data = self.data[self.classes == name]
for clabel in self.clabels:
class_data = self.data[self.labels == clabel]
class_initializer = MeanInitializer(class_data)
self.initializers.append(class_initializer)
def generate(self, length):
per_class = length // self.num_classes
return torch.vstack(
[init.generate(per_class) for init in self.initializers])
def generate(self, length, dist=[]):
samples = self._get_samples_from_initializer(length, dist)
return samples
class StratifiedSelectionInitializer(ClassAwareInitializer):
def __init__(self, positions, classes):
super().__init__(positions, classes)
def __init__(self, arg, *, noise=None):
super().__init__(arg)
self.noise = noise
self.initializers = []
for name in self.names:
class_data = self.data[self.classes == name]
for clabel in self.clabels:
class_data = self.data[self.labels == clabel]
class_initializer = SelectionInitializer(class_data)
self.initializers.append(class_initializer)
def generate(self, length):
per_class = length // self.num_classes
return torch.vstack(
[init.generate(per_class) for init in self.initializers])
def add_noise(self, x):
"""Shifts some dimensions of the data randomly."""
n1 = torch.rand_like(x)
n2 = torch.rand_like(x)
mask = torch.bernoulli(n1) - torch.bernoulli(n2)
return x + (self.noise * mask)
def generate(self, length, dist=[]):
samples = self._get_samples_from_initializer(length, dist)
if self.noise is not None:
# samples = self.add_noise(samples)
samples = samples + self.noise
return samples
# Labels
class LabelsInitializer:
def generate(self):
pass
raise NotImplementedError("Subclasses should implement this!")
class EqualLabelInitializer(LabelsInitializer):
class UnequalLabelsInitializer(LabelsInitializer):
def __init__(self, dist):
self.dist = dist
@property
def distribution(self):
return self.dist
def generate(self):
clabels = range(len(self.dist))
labels = list(chain(*[[i] * n for i, n in zip(clabels, self.dist)]))
return torch.tensor(labels)
class EqualLabelsInitializer(LabelsInitializer):
def __init__(self, classes, per_class):
self.classes = classes
self.per_class = per_class
@property
def distribution(self):
return self.classes * [self.per_class]
def generate(self):
return torch.arange(self.classes).repeat(self.per_class, 1).T.flatten()
@ -121,7 +179,7 @@ class EqualLabelInitializer(LabelsInitializer):
# Reasonings
class ReasoningsInitializer:
def generate(self, length):
pass
raise NotImplementedError("Subclasses should implement this!")
class ZeroReasoningsInitializer(ReasoningsInitializer):
@ -131,3 +189,9 @@ class ZeroReasoningsInitializer(ReasoningsInitializer):
def generate(self):
return torch.zeros((self.length, self.classes, 2))
# Aliases
SSI = StratifiedSampleInitializer = StratifiedSelectionInitializer
SMI = StratifiedMeanInitializer
Random = RandomInitializer = UniformInitializer

View File

@ -1,7 +1,11 @@
"""ProtoTorch datasets."""
from .abstract import NumpyDataset
from .spiral import Spiral
from .tecator import Tecator
__all__ = [
"NumpyDataset",
"Spiral",
"Tecator",
]

View File

@ -13,6 +13,7 @@ import torch
class NumpyDataset(torch.utils.data.TensorDataset):
"""Create a PyTorch TensorDataset from NumPy arrays."""
def __init__(self, *arrays):
tensors = [torch.Tensor(arr) for arr in arrays]
super().__init__(*tensors)

View File

@ -0,0 +1,33 @@
"""Spiral dataset for binary classification."""
import numpy as np
import torch
def make_spiral(n_samples=500, noise=0.3):
def get_samples(n, delta_t):
points = []
for i in range(n):
r = i / n_samples * 5
t = 1.75 * i / n * 2 * np.pi + delta_t
x = r * np.sin(t) + np.random.rand(1) * noise
y = r * np.cos(t) + np.random.rand(1) * noise
points.append([x, y])
return points
n = n_samples // 2
positive = get_samples(n=n, delta_t=0)
negative = get_samples(n=n, delta_t=np.pi)
x = np.concatenate(
[np.array(positive).reshape(n, -1),
np.array(negative).reshape(n, -1)],
axis=0)
y = np.concatenate([np.zeros(n), np.ones(n)])
return x, y
class Spiral(torch.utils.data.TensorDataset):
"""Spiral dataset for binary classification."""
def __init__(self, n_samples=500, noise=0.3):
x, y = make_spiral(n_samples, noise)
super().__init__(torch.Tensor(x), torch.LongTensor(y))

View File

@ -52,7 +52,7 @@ class Tecator(ProtoDataset):
"""
_resources = [
("1MMuUK8V41IgNpnPDbg3E-QAL6wlErTk0",
("1P9WIYnyxFPh6f1vqAbnKfK8oYmUgyV83",
"ba5607c580d0f91bb27dc29d13c2f8df"),
] # (google_storage_id, md5hash)
classes = ["0 - low_fat", "1 - high_fat"]

View File

@ -16,40 +16,43 @@ def register_activation(function):
@register_activation
# @torch.jit.script
def identity(x, beta=torch.tensor(0)):
def identity(x, beta=0.0):
"""Identity activation function.
Definition:
:math:`f(x) = x`
Keyword Arguments:
beta (`float`): Ignored.
"""
return x
@register_activation
# @torch.jit.script
def sigmoid_beta(x, beta=torch.tensor(10)):
def sigmoid_beta(x, beta=10.0):
r"""Sigmoid activation function with scaling.
Definition:
:math:`f(x) = \frac{1}{1 + e^{-\beta x}}`
Keyword Arguments:
beta (`torch.tensor`): Scaling parameter :math:`\beta`
beta (`float`): Scaling parameter :math:`\beta`
"""
out = torch.reciprocal(1.0 + torch.exp(-int(beta.item()) * x))
out = 1.0 / (1.0 + torch.exp(-1.0 * beta * x))
return out
@register_activation
# @torch.jit.script
def swish_beta(x, beta=torch.tensor(10)):
def swish_beta(x, beta=10.0):
r"""Swish activation function with scaling.
Definition:
:math:`f(x) = \frac{x}{1 + e^{-\beta x}}`
Keyword Arguments:
beta (`torch.tensor`): Scaling parameter :math:`\beta`
beta (`float`): Scaling parameter :math:`\beta`
"""
out = x * sigmoid_beta(x, beta=beta)
return out

View File

@ -3,12 +3,19 @@
import torch
def _get_dp_dm(distances, targets, plabels):
matcher = torch.eq(targets.unsqueeze(dim=1), plabels)
if plabels.ndim == 2:
def _get_matcher(targets, labels):
"""Returns a boolean tensor."""
matcher = torch.eq(targets.unsqueeze(dim=1), labels)
if labels.ndim == 2:
# if the labels are one-hot vectors
nclasses = targets.size()[1]
matcher = torch.eq(torch.sum(matcher, dim=-1), nclasses)
return matcher
def _get_dp_dm(distances, targets, plabels):
"""Returns the d+ and d- values for a batch of distances."""
matcher = _get_matcher(targets, plabels)
not_matcher = torch.bitwise_not(matcher)
inf = torch.full_like(distances, fill_value=float("inf"))
@ -24,3 +31,26 @@ def glvq_loss(distances, target_labels, prototype_labels):
dp, dm = _get_dp_dm(distances, target_labels, prototype_labels)
mu = (dp - dm) / (dp + dm)
return mu
def lvq1_loss(distances, target_labels, prototype_labels):
"""LVQ1 loss function with support for one-hot labels.
See Section 4 [Sado&Yamada]
https://papers.nips.cc/paper/1995/file/9c3b1830513cc3b8fc4b76635d32e692-Paper.pdf
"""
dp, dm = _get_dp_dm(distances, target_labels, prototype_labels)
mu = dp
mu[dp > dm] = -dm[dp > dm]
return mu
def lvq21_loss(distances, target_labels, prototype_labels):
"""LVQ2.1 loss function with support for one-hot labels.
See Section 4 [Sado&Yamada]
https://papers.nips.cc/paper/1995/file/9c3b1830513cc3b8fc4b76635d32e692-Paper.pdf
"""
dp, dm = _get_dp_dm(distances, target_labels, prototype_labels)
mu = dp - dm
return mu

View File

@ -1,5 +0,0 @@
matplotlib==3.1.2
pytest==5.3.4
requests==2.22.0
codecov==2.0.22
tqdm==4.44.1

View File

@ -21,27 +21,28 @@ INSTALL_REQUIRES = [
"torchvision>=0.5.0",
"numpy>=1.9.1",
]
DATASETS = [
"requests",
"tqdm",
]
DEV = ["bumpversion"]
DOCS = [
"recommonmark",
"sphinx",
"sphinx_rtd_theme",
"sphinxcontrib-katex",
]
DATASETS = [
"requests",
"tqdm",
]
EXAMPLES = [
"sklearn",
"matplotlib",
"torchinfo",
]
TESTS = ["pytest"]
ALL = DOCS + DATASETS + EXAMPLES + TESTS
TESTS = ["codecov", "pytest"]
ALL = DATASETS + DEV + DOCS + EXAMPLES + TESTS
setup(
name="prototorch",
version="0.3.0-dev0",
version="0.4.2",
description="Highly extensible, GPU-supported "
"Learning Vector Quantization (LVQ) toolbox "
"built using PyTorch and its nn API.",
@ -71,6 +72,7 @@ setup(
"Programming Language :: Python :: 3.6",
"Programming Language :: Python :: 3.7",
"Programming Language :: Python :: 3.8",
"Programming Language :: Python :: 3.9",
"Operating System :: OS Independent",
"Topic :: Scientific/Engineering :: Artificial Intelligence",
"Topic :: Software Development :: Libraries",

View File

@ -57,7 +57,7 @@ class TestActivations(unittest.TestCase):
self.assertIsNone(mismatch)
def test_sigmoid_beta1(self):
actual = activations.sigmoid_beta(self.x, beta=torch.tensor(1))
actual = activations.sigmoid_beta(self.x, beta=1.0)
desired = torch.sigmoid(self.x)
mismatch = np.testing.assert_array_almost_equal(actual,
desired,
@ -65,7 +65,7 @@ class TestActivations(unittest.TestCase):
self.assertIsNone(mismatch)
def test_swish_beta1(self):
actual = activations.swish_beta(self.x, beta=torch.tensor(1))
actual = activations.swish_beta(self.x, beta=1.0)
desired = self.x * torch.sigmoid(self.x)
mismatch = np.testing.assert_array_almost_equal(actual,
desired,

15
tox.ini
View File

@ -1,15 +0,0 @@
# tox (https://tox.readthedocs.io/) is a tool for running tests
# in multiple virtualenvs. This configuration file will run the
# test suite on all supported python versions. To use it, "pip install tox"
# and then run "tox" from this directory.
[tox]
envlist = py36,py37,py38
[testenv]
deps =
pytest
coverage
commands =
pip install -e .
coverage run -m pytest