From 2a85c94b55c2b4cd09f9d605fb04f47b9ee716fc Mon Sep 17 00:00:00 2001 From: Alexander Engelsberger Date: Tue, 17 May 2022 11:56:18 +0200 Subject: [PATCH] chore: minor changes and version updates --- examples/cbc_iris.py | 8 ++- prototorch/datasets/sklearn.py | 112 +++++++++++++++++++-------------- prototorch/datasets/tecator.py | 13 ++-- 3 files changed, 75 insertions(+), 58 deletions(-) diff --git a/examples/cbc_iris.py b/examples/cbc_iris.py index b90429c..c20566e 100644 --- a/examples/cbc_iris.py +++ b/examples/cbc_iris.py @@ -1,5 +1,7 @@ """ProtoTorch CBC example using 2D Iris data.""" +import logging + import torch from matplotlib import pyplot as plt @@ -34,7 +36,7 @@ class VisCBC2D(): self.resolution = 100 self.cmap = "viridis" - def on_epoch_end(self): + def on_train_epoch_end(self): x_train, y_train = self.x_train, self.y_train _components = self.model.components_layer._components.detach() ax = self.fig.gca() @@ -94,5 +96,5 @@ if __name__ == "__main__": correct += (y_pred.argmax(1) == y).float().sum(0) acc = 100 * correct / len(train_ds) - print(f"Epoch: {epoch} Accuracy: {acc:05.02f}%") - vis.on_epoch_end() + logging.info(f"Epoch: {epoch} Accuracy: {acc:05.02f}%") + vis.on_train_epoch_end() diff --git a/prototorch/datasets/sklearn.py b/prototorch/datasets/sklearn.py index a71ce34..bb5e530 100644 --- a/prototorch/datasets/sklearn.py +++ b/prototorch/datasets/sklearn.py @@ -5,8 +5,10 @@ URL: """ +from __future__ import annotations + import warnings -from typing import Sequence, Union +from typing import Sequence from sklearn.datasets import ( load_iris, @@ -41,9 +43,9 @@ class Iris(NumpyDataset): :param dims: select a subset of dimensions """ - def __init__(self, dims: Sequence[int] = None): + def __init__(self, dims: Sequence[int] | None = None): x, y = load_iris(return_X_y=True) - if dims: + if dims is not None: x = x[:, dims] super().__init__(x, y) @@ -56,15 +58,19 @@ class Blobs(NumpyDataset): """ - def __init__(self, - num_samples: int = 300, - num_features: int = 2, - seed: Union[None, int] = 0): - x, y = make_blobs(num_samples, - num_features, - centers=None, - random_state=seed, - shuffle=False) + def __init__( + self, + num_samples: int = 300, + num_features: int = 2, + seed: None | int = 0, + ): + x, y = make_blobs( + num_samples, + num_features, + centers=None, + random_state=seed, + shuffle=False, + ) super().__init__(x, y) @@ -77,29 +83,33 @@ class Random(NumpyDataset): Note: n_classes * n_clusters_per_class <= 2**n_informative must satisfy. """ - def __init__(self, - num_samples: int = 300, - num_features: int = 2, - num_classes: int = 2, - num_clusters: int = 2, - num_informative: Union[None, int] = None, - separation: float = 1.0, - seed: Union[None, int] = 0): + def __init__( + self, + num_samples: int = 300, + num_features: int = 2, + num_classes: int = 2, + num_clusters: int = 2, + num_informative: None | int = None, + separation: float = 1.0, + seed: None | int = 0, + ): if not num_informative: import math num_informative = math.ceil(math.log2(num_classes * num_clusters)) if num_features < num_informative: warnings.warn("Generating more features than requested.") num_features = num_informative - x, y = make_classification(num_samples, - num_features, - n_informative=num_informative, - n_redundant=0, - n_classes=num_classes, - n_clusters_per_class=num_clusters, - class_sep=separation, - random_state=seed, - shuffle=False) + x, y = make_classification( + num_samples, + num_features, + n_informative=num_informative, + n_redundant=0, + n_classes=num_classes, + n_clusters_per_class=num_clusters, + class_sep=separation, + random_state=seed, + shuffle=False, + ) super().__init__(x, y) @@ -113,16 +123,20 @@ class Circles(NumpyDataset): """ - def __init__(self, - num_samples: int = 300, - noise: float = 0.3, - factor: float = 0.8, - seed: Union[None, int] = 0): - x, y = make_circles(num_samples, - noise=noise, - factor=factor, - random_state=seed, - shuffle=False) + def __init__( + self, + num_samples: int = 300, + noise: float = 0.3, + factor: float = 0.8, + seed: None | int = 0, + ): + x, y = make_circles( + num_samples, + noise=noise, + factor=factor, + random_state=seed, + shuffle=False, + ) super().__init__(x, y) @@ -136,12 +150,16 @@ class Moons(NumpyDataset): """ - def __init__(self, - num_samples: int = 300, - noise: float = 0.3, - seed: Union[None, int] = 0): - x, y = make_moons(num_samples, - noise=noise, - random_state=seed, - shuffle=False) + def __init__( + self, + num_samples: int = 300, + noise: float = 0.3, + seed: None | int = 0, + ): + x, y = make_moons( + num_samples, + noise=noise, + random_state=seed, + shuffle=False, + ) super().__init__(x, y) diff --git a/prototorch/datasets/tecator.py b/prototorch/datasets/tecator.py index 46e8f47..fe85fbf 100644 --- a/prototorch/datasets/tecator.py +++ b/prototorch/datasets/tecator.py @@ -36,6 +36,7 @@ Description: are determined by analytic chemistry. """ +import logging import os import numpy as np @@ -81,13 +82,11 @@ class Tecator(ProtoDataset): if self._check_exists(): return - if self.verbose: - print("Making directories...") + logging.debug("Making directories...") os.makedirs(self.raw_folder, exist_ok=True) os.makedirs(self.processed_folder, exist_ok=True) - if self.verbose: - print("Downloading...") + logging.debug("Downloading...") for fileid, md5 in self._resources: filename = "tecator.npz" download_file_from_google_drive(fileid, @@ -95,8 +94,7 @@ class Tecator(ProtoDataset): filename=filename, md5=md5) - if self.verbose: - print("Processing...") + logging.debug("Processing...") with np.load(os.path.join(self.raw_folder, "tecator.npz"), allow_pickle=False) as f: x_train, y_train = f["x_train"], f["y_train"] @@ -117,5 +115,4 @@ class Tecator(ProtoDataset): "wb") as f: torch.save(test_set, f) - if self.verbose: - print("Done!") + logging.debug("Done!")