From 134a3ce1268b8b964041b3e0c99d72f7d8a0c396 Mon Sep 17 00:00:00 2001 From: Lukas Tatzel Date: Mon, 6 Sep 2021 15:45:43 +0200 Subject: [PATCH 1/3] [ADD] Noise-free quadratic with an example --- deepobs/pytorch/testproblems/__init__.py | 1 + .../testproblems/noise_free_quadratic.py | 242 ++++++++++++++++++ examples/runner_noise_free_quadratic.py | 19 ++ 3 files changed, 262 insertions(+) create mode 100644 deepobs/pytorch/testproblems/noise_free_quadratic.py create mode 100644 examples/runner_noise_free_quadratic.py diff --git a/deepobs/pytorch/testproblems/__init__.py b/deepobs/pytorch/testproblems/__init__.py index 3b8bd446..02c90fbc 100644 --- a/deepobs/pytorch/testproblems/__init__.py +++ b/deepobs/pytorch/testproblems/__init__.py @@ -18,6 +18,7 @@ from .mnist_mlp import mnist_mlp from .mnist_vae import mnist_vae from .quadratic_deep import quadratic_deep +from .noise_free_quadratic import noise_free_quadratic from .svhn_3c3d import svhn_3c3d from .svhn_wrn164 import svhn_wrn164 from .testproblem import TestProblem diff --git a/deepobs/pytorch/testproblems/noise_free_quadratic.py b/deepobs/pytorch/testproblems/noise_free_quadratic.py new file mode 100644 index 00000000..94e2433a --- /dev/null +++ b/deepobs/pytorch/testproblems/noise_free_quadratic.py @@ -0,0 +1,242 @@ +# -*- coding: utf-8 -*- +"""TODO Description of the testproblem.""" + +import scipy.linalg +import torch +from torch.utils import data + +from ..datasets import dataset +from .testproblem import UnregularizedTestproblem + + +# The data set +class data_noise_free_quadratic(dataset.DataSet): + """Dataset class for the noise-free quadratic + + The inputs and corresponding labels for training (and similarly for + validation and testing) are basically both tensors of size + ``train_size`` x ``dim``, where ```dim`` is the dimensionality of the + problem. The input data is arbitraty (because it is later multiplied by + zero), the labels are zero. The methods below yield the respective + data loaders for all three sets. + """ + + def __init__( + self, batch_size, dim, train_size=128, valid_size=128, test_size=128, + ): + self._dim = dim + + # Check batch size + assert batch_size <= min( + [train_size, valid_size, test_size] + ), "Batch size exceeds size of training/validation/test set" + + self._train_size = train_size + self._valid_size = valid_size + self._test_size = test_size + + # This attribute is needed by _make_train_eval_dataloader + self._train_eval_size = self._train_size + + super(data_noise_free_quadratic, self).__init__(batch_size) + + def _make_train_and_valid_dataloader(self): + """Creates the training and validation data loader.""" + + # Training data + train_data = torch.rand(self._train_size, self._dim) + train_labels = torch.zeros(self._train_size, self._dim) + train_dataset = data.TensorDataset(train_data, train_labels) + train_loader = self._make_dataloader(train_dataset, shuffle=True) + + # Validation data + valid_data = torch.rand(self._valid_size, self._dim) + valid_labels = torch.zeros(self._valid_size, self._dim) + valid_dataset = data.TensorDataset(valid_data, valid_labels) + valid_loader = self._make_dataloader(valid_dataset) + + return train_loader, valid_loader + + def _make_test_dataloader(self): + """Creates the test data loader.""" + + # Test data + test_data = torch.rand(self._test_size, self._dim) + test_labels = torch.zeros(self._test_size, self._dim) + test_dataset = data.TensorDataset(test_data, test_labels) + test_loader = self._make_dataloader(test_dataset) + + return test_loader + + +# Some helper functions +def set_weights(linear_layer, weights, req_grad): + """Set weights in linear layer and choose if these parameters are + trainable. + """ + w = linear_layer.weight + if weights.shape == w.data.shape: + w.data = weights + w.requires_grad = req_grad + else: + raise ValueError("Weights don't have the right shape") + + +def set_biases(linear_layer, biases, req_grad): + """Set biases in linear layer and choose if these parameters are + trainable. + """ + b = linear_layer.bias + if biases.shape == b.data.shape: + b.data = biases + b.requires_grad = req_grad + else: + raise ValueError("Biases don't have the right shape") + + +def torch_to_numpy(tensor): + """Convert a torch tensor to a numpy array""" + return tensor.detach().cpu().numpy() + + +def numpy_to_torch(array): + """Convert a numpy array to a torch float tensor""" + return (torch.from_numpy(array)).to(torch.float32) + + +# The network +def get_noise_free_quadratic_net(H, theta): + """Build the network for the noise-free quadratic + + The network is based on the Hessian ``H`` and the vector ``theta``. It + is designed such that the MSE loss of the network (which is parameterized + by ``theta``) is ``theta.T @ H @ theta`` for arbitrary inputs with labels + that are zero. + """ + + dim = H.shape[0] + + # Use the matrix square root from scipy + H_sqrt = numpy_to_torch(scipy.linalg.sqrtm(torch_to_numpy(H), disp=True)) + + # First layer returns ``0 @ x + theta = theta`` + L1 = torch.nn.Linear(dim, dim, bias=True) + set_weights(L1, weights=torch.zeros(dim, dim), req_grad=False) + set_biases(L1, biases=theta.reshape(dim), req_grad=True) + + # Second layer returns ``H_sqrt @ theta`` + L2 = torch.nn.Linear(dim, dim, bias=False) + set_weights(L2, weights=H_sqrt, req_grad=False) + + return torch.nn.Sequential(L1, L2) + + +# The problem class +class noise_free_quadratic(UnregularizedTestproblem): + """Problem class for the noise-free quadratic + + The problem (determined by the Hessian and initial network parameters) is + defined in the constructor. It is a quadratic problem of the form + ``theta.T @ H @ theta``, where ``H`` is the Hessian and ``theta`` + corresponds to the trainable parameters of the network. They are initially + set to ``theta_init``. + """ + + def __init__( + self, batch_size, weight_decay=None, + ): + """Here, the quadratic problem is defined. Note that the batch size + is arbitrary: since the problem is noise-free, the batch size has no + impact on the resulting loss. + """ + + super(noise_free_quadratic, self).__init__(batch_size, weight_decay) + + # Define quadratic problem + D = 20 + H_diag = torch.Tensor([i ** 2 for i in range(1, D + 1)]) + self._H = torch.diagflat(H_diag) + self._theta_init = 100 * torch.ones(D) + + # Check problem + self.check_problem() + self._dim = self._H.shape[0] + + def check_problem(self): + """Make sure that the attributes ``self._H`` and ``self._theta_init`` + "match" (dimensions) and that the Hessian is symmetric pos. definite. + """ + + H = self._H + theta_init = self._theta_init + + # Check dimensions + dim1, dim2 = H.shape + assert dim1 == dim2, "Hessian has to be square" + assert theta_init.shape == torch.Size( + [dim1] + ), "`theta_init` has to be 1D Tensor of the right size" + + # Check symmetric positive definite + assert torch.allclose(H.T, H), "Hessian has to be symmetric" + H_eigvals, _ = torch.symeig(H, eigenvectors=False) + assert torch.all(H_eigvals > 0), "Hessian has to be positive definite" + + def set_up(self): + """Initialize the global attributes ``net``, ``data`` and + ``loss_function``. + """ + + # Network + H_net = self._dim * self._H + self.net = get_noise_free_quadratic_net(H_net, self._theta_init) + self.net.to(self._device) + + # Data set + self.data = data_noise_free_quadratic(self._batch_size, dim=self._dim) + + # Loss function + self.loss_function = torch.nn.MSELoss + + # Create regularization groups (in our case no regularization is used) + self.regularization_groups = self.get_regularization_groups() + + def get_batch_loss_and_accuracy_func( + self, reduction="mean", add_regularization_if_available=True + ): + """The original method from the base class doesn't work here + (especially for the accuracy), so we overwrite it. It is basically a + copy of the original method, but we set the accuracy to zero instead + of trying to computing it. Note that the accuracy does't make sense + as a metric for our particular problem. + """ + + inputs, labels = self._get_next_batch() + inputs = inputs.to(self._device) + labels = labels.to(self._device) + + loss_function = self.loss_function(reduction=reduction) + + def forward_func(): + + # Evaluate loss: In evaluation phase no gradient is needed + if self.phase in ["train_eval", "test", "valid"]: + with torch.no_grad(): + outputs = self.net(inputs) + loss = loss_function(outputs, labels) + else: + outputs = self.net(inputs) + loss = loss_function(outputs, labels) + + # Evaluate regularizer loss + if add_regularization_if_available: + regularizer_loss = self.get_regularization_loss() + else: + regularizer_loss = torch.zeros(1).to(self._device) + + # Accuracy + accuracy = 0.0 + + return loss + regularizer_loss, accuracy + + return forward_func diff --git a/examples/runner_noise_free_quadratic.py b/examples/runner_noise_free_quadratic.py new file mode 100644 index 00000000..9c0214fd --- /dev/null +++ b/examples/runner_noise_free_quadratic.py @@ -0,0 +1,19 @@ +"""This script runs SGD on the noise-free quadratic problem.""" + +import torch +from deepobs import pytorch as pt + +optimizer_class = torch.optim.SGD +hyperparams = { + "lr": {"type": float}, + "momentum": {"type": float, "default": 0.99}, + "nesterov": {"type": bool, "default": False}, +} +runner = pt.runners.StandardRunner(optimizer_class, hyperparams) + +runner.run( + testproblem="noise_free_quadratic", + hyperparams={"lr": 1e-3}, + batch_size=8, + num_epochs=10, +) From bade1a906c8954a013b2e138601b19188b465ec4 Mon Sep 17 00:00:00 2001 From: Lukas Tatzel Date: Mon, 6 Sep 2021 15:46:03 +0200 Subject: [PATCH 2/3] [ADD] Test case for the noise-free quadratic problem --- .../testproblems/test_noise_free_quadratic.py | 69 +++++++++++++++++++ 1 file changed, 69 insertions(+) create mode 100644 tests/pytorch/testproblems/test_noise_free_quadratic.py diff --git a/tests/pytorch/testproblems/test_noise_free_quadratic.py b/tests/pytorch/testproblems/test_noise_free_quadratic.py new file mode 100644 index 00000000..2892d835 --- /dev/null +++ b/tests/pytorch/testproblems/test_noise_free_quadratic.py @@ -0,0 +1,69 @@ +"""This scripts tests if the loss, defined by the problem class, actually +corresponds to a quadratic loss. We do this by repeadedly defining problems +(determined by a random Hessian and an initial network parameter vector) and +checking if the model's loss corresponds to the manually computed quadratic +loss ``theta_init.T @ H @ theta_init``. It is assumed that the loss-function is +used with ``reduction="mean"``. +""" + +import pytest +import torch +from deepobs.pytorch.testproblems import noise_free_quadratic + +NOF_BATCHES = 2 + +DIMS = [1, 5, 50] +IDS_DIMS = [f"dimension={dim}" for dim in DIMS] + +SEEDS = [0, 1, 42] +IDS_SEEDS = [f"seed_value={seed}" for seed in SEEDS] + + +@pytest.mark.parametrize("seed", SEEDS, ids=IDS_SEEDS) +@pytest.mark.parametrize("dim", DIMS, ids=IDS_DIMS) +def test_func(seed, dim): + + # Initialize testproblem + nf_quadratic = noise_free_quadratic(batch_size=8) + + # Create random symmetric pos. definite Hessian and `theta_init` + torch.manual_seed(seed) + + theta_init = torch.rand(dim) + R = torch.rand(dim, dim) + H = R @ R.T + 0.01 * torch.diag(torch.ones(dim)) + + # Set up the problem + nf_quadratic._dim = dim + nf_quadratic._H = H + nf_quadratic._theta_init = theta_init + nf_quadratic.check_problem() + nf_quadratic.set_up() + + # Extract dataset, net, loss function, device + data = nf_quadratic.data + train_loader, _ = data._make_train_and_valid_dataloader() + net = nf_quadratic.net + loss_function = nf_quadratic.loss_function(reduction="mean") + device = torch.device(nf_quadratic._device) + + for batch_idx in range(NOF_BATCHES): + + # Get some data (the inputs shouldn't affect the loss at all) + input, labels = list(train_loader)[batch_idx] + input = input.to(device) + labels = labels.to(device) + + # Compare the model's loss with the manually computed loss + loss_model = loss_function(net(input), labels) + loss_manually = theta_init.T @ H @ theta_init + + assert torch.allclose( + loss_model, loss_manually + ), "The model's loss and the manually computed quadratic loss deviate." + + +if __name__ == "__main__": + + # For debugging + test_func(seed=0, dim=10) From 3df764ce14f53c3599b68a5b5d62480804a9322c Mon Sep 17 00:00:00 2001 From: Lukas Tatzel Date: Wed, 27 Oct 2021 10:12:37 +0200 Subject: [PATCH 3/3] [REF] Incorporated PR feedback --- .../testproblems/noise_free_quadratic.py | 61 +++++++------------ .../testproblems/test_noise_free_quadratic.py | 12 +--- 2 files changed, 25 insertions(+), 48 deletions(-) diff --git a/deepobs/pytorch/testproblems/noise_free_quadratic.py b/deepobs/pytorch/testproblems/noise_free_quadratic.py index 94e2433a..7c0f3c46 100644 --- a/deepobs/pytorch/testproblems/noise_free_quadratic.py +++ b/deepobs/pytorch/testproblems/noise_free_quadratic.py @@ -1,6 +1,7 @@ # -*- coding: utf-8 -*- -"""TODO Description of the testproblem.""" +"""Noise-free quadratic problem (network with two linear layers and MSE-loss)""" +from contextlib import nullcontext import scipy.linalg import torch from torch.utils import data @@ -28,7 +29,7 @@ def __init__( # Check batch size assert batch_size <= min( - [train_size, valid_size, test_size] + train_size, valid_size, test_size ), "Batch size exceeds size of training/validation/test set" self._train_size = train_size @@ -38,7 +39,7 @@ def __init__( # This attribute is needed by _make_train_eval_dataloader self._train_eval_size = self._train_size - super(data_noise_free_quadratic, self).__init__(batch_size) + super().__init__(batch_size) def _make_train_and_valid_dataloader(self): """Creates the training and validation data loader.""" @@ -70,28 +71,17 @@ def _make_test_dataloader(self): # Some helper functions -def set_weights(linear_layer, weights, req_grad): - """Set weights in linear layer and choose if these parameters are - trainable. +def set_param(linear_layer, param, param_str, req_grad): + """Set weights (`param_str = weight`) or biases (`param_str = bias`) in + linear layer and choose if these parameters are trainable. """ - w = linear_layer.weight - if weights.shape == w.data.shape: - w.data = weights - w.requires_grad = req_grad - else: - raise ValueError("Weights don't have the right shape") + p = getattr(linear_layer, param_str) + if param.shape != p.shape: + raise ValueError("parameters don't have the right shape") -def set_biases(linear_layer, biases, req_grad): - """Set biases in linear layer and choose if these parameters are - trainable. - """ - b = linear_layer.bias - if biases.shape == b.data.shape: - b.data = biases - b.requires_grad = req_grad - else: - raise ValueError("Biases don't have the right shape") + p.data = param + p.requires_grad = req_grad def torch_to_numpy(tensor): @@ -113,7 +103,6 @@ def get_noise_free_quadratic_net(H, theta): by ``theta``) is ``theta.T @ H @ theta`` for arbitrary inputs with labels that are zero. """ - dim = H.shape[0] # Use the matrix square root from scipy @@ -121,12 +110,12 @@ def get_noise_free_quadratic_net(H, theta): # First layer returns ``0 @ x + theta = theta`` L1 = torch.nn.Linear(dim, dim, bias=True) - set_weights(L1, weights=torch.zeros(dim, dim), req_grad=False) - set_biases(L1, biases=theta.reshape(dim), req_grad=True) + set_param(L1, torch.zeros(dim, dim), "weight", req_grad=False) + set_param(L1, theta.reshape(dim), "bias", req_grad=True) # Second layer returns ``H_sqrt @ theta`` L2 = torch.nn.Linear(dim, dim, bias=False) - set_weights(L2, weights=H_sqrt, req_grad=False) + set_param(L2, H_sqrt, "weight", req_grad=False) return torch.nn.Sequential(L1, L2) @@ -142,15 +131,12 @@ class noise_free_quadratic(UnregularizedTestproblem): set to ``theta_init``. """ - def __init__( - self, batch_size, weight_decay=None, - ): + def __init__(self, batch_size, weight_decay=None): """Here, the quadratic problem is defined. Note that the batch size is arbitrary: since the problem is noise-free, the batch size has no impact on the resulting loss. """ - - super(noise_free_quadratic, self).__init__(batch_size, weight_decay) + super().__init__(batch_size, weight_decay) # Define quadratic problem D = 20 @@ -166,7 +152,6 @@ def check_problem(self): """Make sure that the attributes ``self._H`` and ``self._theta_init`` "match" (dimensions) and that the Hessian is symmetric pos. definite. """ - H = self._H theta_init = self._theta_init @@ -186,7 +171,6 @@ def set_up(self): """Initialize the global attributes ``net``, ``data`` and ``loss_function``. """ - # Network H_net = self._dim * self._H self.net = get_noise_free_quadratic_net(H_net, self._theta_init) @@ -210,7 +194,6 @@ def get_batch_loss_and_accuracy_func( of trying to computing it. Note that the accuracy does't make sense as a metric for our particular problem. """ - inputs, labels = self._get_next_batch() inputs = inputs.to(self._device) labels = labels.to(self._device) @@ -220,11 +203,11 @@ def get_batch_loss_and_accuracy_func( def forward_func(): # Evaluate loss: In evaluation phase no gradient is needed - if self.phase in ["train_eval", "test", "valid"]: - with torch.no_grad(): - outputs = self.net(inputs) - loss = loss_function(outputs, labels) - else: + with torch.no_grad() if self.phase in [ + "train_eval", + "test", + "valid", + ] else nullcontext(): outputs = self.net(inputs) loss = loss_function(outputs, labels) diff --git a/tests/pytorch/testproblems/test_noise_free_quadratic.py b/tests/pytorch/testproblems/test_noise_free_quadratic.py index 2892d835..a05dc207 100644 --- a/tests/pytorch/testproblems/test_noise_free_quadratic.py +++ b/tests/pytorch/testproblems/test_noise_free_quadratic.py @@ -23,15 +23,15 @@ @pytest.mark.parametrize("dim", DIMS, ids=IDS_DIMS) def test_func(seed, dim): + torch.manual_seed(seed) + # Initialize testproblem nf_quadratic = noise_free_quadratic(batch_size=8) # Create random symmetric pos. definite Hessian and `theta_init` - torch.manual_seed(seed) - theta_init = torch.rand(dim) R = torch.rand(dim, dim) - H = R @ R.T + 0.01 * torch.diag(torch.ones(dim)) + H = R @ R.T + 0.01 * torch.eye(dim) # Set up the problem nf_quadratic._dim = dim @@ -61,9 +61,3 @@ def test_func(seed, dim): assert torch.allclose( loss_model, loss_manually ), "The model's loss and the manually computed quadratic loss deviate." - - -if __name__ == "__main__": - - # For debugging - test_func(seed=0, dim=10)