diff --git a/flik/trustregion/__init__.py b/flik/trustregion/__init__.py new file mode 100644 index 0000000..ed63d8d --- /dev/null +++ b/flik/trustregion/__init__.py @@ -0,0 +1 @@ +"""Module for optimization using trust region.""" diff --git a/flik/trustregion/cauchy.py b/flik/trustregion/cauchy.py new file mode 100644 index 0000000..1d122b5 --- /dev/null +++ b/flik/trustregion/cauchy.py @@ -0,0 +1,96 @@ +"""Cauchy-point method for the subproblem of the trust-region method.""" + +import numpy as np + +from flik.trustregion.sub_problem import SubProblem + +__all__ = ['Cauchy'] + + +class Cauchy(SubProblem): + """The Cauchy-point method. + + Attributes + ---------- + point: np.ndarray (1-dimensional) + Initial point of the optimization. + func: callable + Objective function. + grad: callable + Gradient of the objective function. + hess: callable + Hessian of the objective function. + radius: float + The trust region radius. + current_gradient: np.ndarray (1-dimensional) + The gradient evaluated at current point. + current_hessian: np.ndarray (1-dimensional) + The hessian evaluated at current point. + + Methods + ------- + __init__(self, point, func, grad, hess, radius, *params) + Initialize the data corresponding to the given point. + get_scaling(self) + Compute the scaling factor. + solver(self) + Subproblem Dogleg solver. + + """ + + def __init__(self, point, func, grad, hess, radius): + """Initialize the method. + + Parameters + ---------- + point: np.ndarray((N,)) + Initial point of the optimization. + func: callable + Objective function. + grad: callable + Gradient of the objective function. + hess: callable + Hessian of the objective function. + radius: float + The trust region radius. + + """ + super().__init__(point, func, grad, hess, radius) + self.current_gradient = self.grad(self.point) + self.current_hessian = self.hess(self.point) + + def get_scaling(self): + r"""Define the scaling-factor of the step. + + :math:`\tau = 1` if :math:`g^T B g \leq 0` + :math:`\tau = \min(||g||^3/(\Delta g^T B g), 1)` otherwise. + + where :math:`g` is the gradient, :math:`B` the Hessian, and + :math:`\Delta` the trust-region radius. + + Returns: + -------- + tau: float + The scaling factor. + + """ + gbg = np.dot(self.current_gradient.T, np.dot(self.current_hessian, self.current_gradient)) + if gbg <= 0.: + tau = 1.0 + else: + gnorm_cubic = np.power(np.linalg.norm(self.current_gradient), 3.0) + tau = min(gnorm_cubic/(self.radius*gbg), 1.0) + return tau + + def solver(self): + """Solve the sub-problem using the Cauchy-point. + + Returns + ------- + step: np.ndarray((N,)) + Correction added to the current point. + + """ + tau = self.get_scaling() + step = - tau * (self.radius/np.linalg.norm(self.current_gradient)) * self.current_gradient + return step diff --git a/flik/trustregion/dogleg.py b/flik/trustregion/dogleg.py new file mode 100644 index 0000000..e20e282 --- /dev/null +++ b/flik/trustregion/dogleg.py @@ -0,0 +1,184 @@ +"""Dogleg method for the subproblem of the trust-region method.""" + +import numpy as np + +from flik.trustregion.sub_problem import SubProblem + + +__all__ = ['Dogleg', 'get_tau'] + + +class Dogleg(SubProblem): + r"""The Dogleg method. + + Attributes + ---------- + point: np.ndarray (1-dimensional) + Initial point of the optimization. + func: callable + Objective function. + grad: callable + Gradient of the objective function. + hessian: callable + Hessian of the objective function. + radius: float + The trust region radius. + full_step: np.ndarray (1-dimensional) + Full step :math:`p^B = - B^{-1} g`. + steepest_step: np.ndarray (1-dimensional) + Steepest descent direction step. + + Methods + ------- + __init__(self, point, function, grad, hessian, radius, *params) + Initialize the data corresponding to the given point. + initialize_attributes(self) + Initialize internal attributes. + update_attr(self, attr, value) + Update one attribute and then all the attributes in the class. + compute_tau(self) + Solve for the segment-division parameter. + compute_dogleg_step(self, tau) + Compute the mixed segment step. + solver(self) + Subproblem Dogleg solver. + + """ + + def __init__(self, point, func, grad, hess, radius): + """Initialize the method. + + Parameters + ---------- + point: np.ndarray((N,)) + Initial point of the optimization. + func: callable + Objective function. + grad: callable + Gradient of the objective function. + hess: callable + Hessian of the objective function. + radius: float + The trust region radius. + + """ + super().__init__(point, func, grad, hess, radius) + self.initialize_attributes() + + def initialize_attributes(self): + """Initialize internal attributes.""" + current_gradient = self.grad(self.point) + current_hessian = self.hess(self.point) + # Check Hessian is positive definite + evals = np.linalg.eigvalsh(current_hessian) + if (evals < 0.).any(): + raise ValueError("The Hessian matrix must be positive-definite") + # Compute full step + hessian_inv = np.linalg.inv(current_hessian) + self.full_step = - np.dot(hessian_inv, current_gradient) + # Compute steepest-descent direction step + grad_squared = np.dot(current_gradient.T, current_gradient) + transformed_hessian = np.dot(current_gradient.T, np.dot(current_hessian, current_gradient)) + self.steepest_step = - (grad_squared / transformed_hessian) * current_gradient + + def update_attr(self, attr, value): + """Update all attributes after one of the changes. + + Arguments: + ---------- + attr: str + Attribute name. + value: + New value of the attribute. + + """ + setattr(self, attr, value) + if attr in ['fun', 'grad', 'hess', 'point']: + self.initialize_attributes() + + def compute_tau(self): + r"""Obtain the division parameter :math:`\tau`. + + Solve the quadratic equation: + :math:`\tau^2 ||p^B-p^U||^2 + 2 \tau (p^B-p^U)p^B + ||p^B||^2 - \Delta^2 = 0` + """ + # Define polynomial coefficients + step_diff = self.full_step - self.steepest_step + qterm = np.dot(step_diff, step_diff) + lterm = 2.0 * np.dot(self.steepest_step, step_diff) + steepest_squared = np.dot(self.steepest_step, self.steepest_step) + cterm = steepest_squared - self.radius**2.0 + tau = get_tau(qterm, lterm, cterm) + return tau + + def compute_dogleg_step(self, tau): + r"""Compute the two-line segments step. + + :math:`\tilde{p}(\tau) = \tau p^u if 0 \leq \tau \leq 1` + :math:`\tilde{p}(\tau) = p^U + (\tau+1)(p^B - p^U)` otherwise + + Arguments: + ---------- + tau: float + Line-segments division parameter + + """ + step = self.steepest_step + tau * (self.full_step - self.steepest_step) + return step + + def solver(self): + """Find new step. + + Returns + ------- + step: np.ndarray((N,)) + Correction added to the current point. + + """ + if np.linalg.norm(self.steepest_step) >= self.radius: + step = (self.radius/np.linalg.norm(self.steepest_step))*self.steepest_step + elif np.linalg.norm(self.full_step) <= self.radius: + step = self.full_step + else: + tau = self.compute_tau() + step = self.compute_dogleg_step(tau) + return step + + +def get_tau(qterm, lterm, cterm): + r"""Compute the value of :math:`\tau`. + + Solve the quadratic equation with numpy.roots + :math:`a x^2 + b x + c = 0` + + Parameters: + ---------- + qterm: float + Coefficient of the quadratic term. + lterm: float + Coefficient of the linear term. + cterm: float + Constant term. + + Returns: + -------- + tau: float + Reasonable solution of the quadratic equation + + """ + if not isinstance(qterm, float): + raise TypeError("Coefficient qterm should be a float.") + if not isinstance(lterm, float): + raise TypeError("Coefficient lterm should be a float.") + if not isinstance(cterm, float): + raise TypeError("Coefficient cterm should be a float.") + roots = np.roots([qterm, lterm, cterm]) + if np.iscomplex(roots).all(): + raise ValueError("The roots of the quadratic equation are complex") + # Use the root with reasonable value + loc = np.where(roots >= 0.0) + if roots[loc][0] <= 2.0: + tau = roots[loc][0] + else: + raise ValueError("Neither value of tau is in the right range.") + return tau diff --git a/flik/trustregion/gradient_descent.py b/flik/trustregion/gradient_descent.py new file mode 100644 index 0000000..2967acc --- /dev/null +++ b/flik/trustregion/gradient_descent.py @@ -0,0 +1,79 @@ +"""Gradient descent method for the subproblem of the trust-region method.""" + +import numpy as np + +from flik.trustregion.sub_problem import SubProblem + + +__all__ = ['GradientDescent'] + + +class GradientDescent(SubProblem): + r"""Gradient Descent method. + + Attributes + ---------- + point: np.ndarray (1-dimensional) + Initial point of the optimization. + func: callable + Objective function. + grad: callable + Gradient of the objective function. + hess: callable + Hessian of the objective function. + current_gradient: np.ndarray (1-dimensional) + Gradient of the objective function at the current point. + current_hessian: np.ndarray (2-dimensional) + Hessian of the objective function at the current point. + radius: float + The trust region radius. + + Methods + ------- + __init__(self, point, function, grad, hessian, radius, *params) + Initialize the data corresponding to the given point. + solver(self) + Subproblem Gradient descent solver. + + """ + + def __init__(self, point, func, grad, hess, radius): + """Initialize the method. + + Parameters + ---------- + point: np.ndarray((N,)) + Initial point of the optimization. + func: callable + Objective function. + grad: callable + Gradient of the objective function. + hess: callable + Hessian of the objective function. + radius: float + The trust region radius. + + """ + super().__init__(point, func, grad, hess, radius) + self.current_gradient = self.grad(self.point) + self.current_hessian = self.hess(self.point) + + def solver(self): + """Find new step. + + Returns + ------- + step: np.ndarray((N,)) + Correction added to the current point. + + """ + # Compute steepest-descent direction step + grad_squared = np.dot(self.current_gradient.T, self.current_gradient) + hessian_term = np.dot(self.current_hessian, self.current_gradient) + hessian_term = np.dot(self.current_gradient.T, hessian_term) + steepest_step = - (grad_squared / hessian_term) * self.current_gradient + if np.linalg.norm(steepest_step) >= self.radius: + step = (self.radius/np.linalg.norm(steepest_step))*steepest_step + else: + step = steepest_step + return step diff --git a/flik/trustregion/sub_problem.py b/flik/trustregion/sub_problem.py new file mode 100644 index 0000000..7f8d37f --- /dev/null +++ b/flik/trustregion/sub_problem.py @@ -0,0 +1,63 @@ +"""Base class for the sub-problem solvers.""" +import abc + + +class SubProblem(abc.ABC): + """Provide the functionality common to the sub-problem solvers. + + Attributes + ---------- + point: np.ndarray((N,)) + Initial point of the optimization. + func: callable + Objective function. + grad: callable + Gradient of the objective function. + hess: callable + Hessian of the objective function. + radius: float + The trust region radius. + + Methods + ------- + __init__(self, point, function, grad, hessian, radius, *params) + Initialize the data corresponding to the given point. + solver(self) + Sub-problem solver. + + """ + + def __init__(self, point, func, grad, hess, radius): + """Initialize the data corresponding to the given point. + + Parameters + ---------- + point: np.ndarray((N,)) + Initial point of the optimization. + func: callable + Objective function. + grad: callable + Gradient of the objective function. + hess: callable + Hessian of the objective function. + radius: float + The trust region radius. + + """ + self.point = point + self.func = func + self.grad = grad + self.hess = hess + self.radius = radius + + @abc.abstractmethod + def solver(self): + """Sub-problem solver. + + Returns + ------- + step: np.ndarray((N,)) + Correction added to the current point. + + """ + pass diff --git a/flik/trustregion/subspace.py b/flik/trustregion/subspace.py new file mode 100644 index 0000000..8b69c3d --- /dev/null +++ b/flik/trustregion/subspace.py @@ -0,0 +1,256 @@ +"""Subspace solver for the sub-problem.""" +import numpy as np +from flik.trustregion.sub_problem import SubProblem +from flik.trustregion.cauchy import Cauchy + + +class Subspace(SubProblem): + """Subspace solver. + + Attributes + ---------- + point: np.ndarray((N,)) + Initial point of the optimization. + func: callable + Objective function. + grad: callable + Gradient of the objective function. + hess: callable + Hessian of the objective function. + radius: float + The trust region radius. + gradient: np.ndarray((N,)) + Gradient at the current point. + hessian: np.ndarray((N,N,)) + Hessian at the current point. + min_hess: float + Minimum eigenvalue of the hessian. + + Methods + ------- + __init__(self, point, function, grad, hessian, radius) + Initialize the data corresponding to the given point. + _aux_grad(self, ahess) + Calculate auxiliary gradient. + _aux_hess(self, ahess) + Calculate auxiliary hessian. + _aux_norm(self, ahess) + Calculate auxiliary matrix. + free_step(ahess, agrad) + Unconstrained step (static method). + solution_norm(ahess, grad) + Weighted norm of a vector (static method). + aux_polynomial(self, mhess, nhess, agrad, radius) + Form auxiliary polynomial (static method). + _solution(self, ssol, ahess) + Solution in original basis + solver(self) + Sub-problem subspace solver. + + """ + + def __init__(self, point, func, grad, hess, radius): + """Initialize the data corresponding to the given point. + + Parameters + ---------- + point: np.ndarray((N,)) + Initial point of the optimization. + func: callable + Objective function. + grad: callable + Gradient of the objective function. + hess: callable + Hessian of the objective function. + radius: float + The trust region radius. + + """ + super().__init__(point, func, grad, hess, radius) + self.gradient = self.grad(self.point) + self.hessian = self.hess(self.point) + self.min_hess = min(np.linalg.eig(self.hessian)[0]) + + def _aux_grad(self, ahess): + """Calculate auxiliary gradient. + + A modified gradient in the basis spanning the subspace. + + Parameters + ---------- + ahess: np.ndarray((N,N,)) + Matrix (approximated hessian) used to modify the gradient. + + Returns + ------- + np.ndarray((N,)): + The modified gradient. + + """ + return np.array([np.linalg.norm(self.gradient), + self.gradient.dot(np.linalg.inv(ahess).dot(self.gradient))]) + + def _aux_hess(self, ahess): + """Calculate auxiliary hessian. + + A modified hessian in the basis spanning the subspace. + + Parameters + ---------- + ahess: np.ndarray((N,N,)) + (Approximated) hessian. + + Returns + ------- + np.ndarray((N,N,)) + The modified hessian. + + """ + return np.array([[self.gradient.dot(ahess.dot(self.gradient)), + np.linalg.norm(self.gradient)], + [np.linalg.norm(self.gradient), + self.gradient.dot(np.linalg.inv(ahess).dot(self.gradient))]]) + + def _aux_norm(self, ahess): + """Calculate auxiliary matrix. + + A matrix used to calculate the norm of a vector + given in the basis spanning the subspace. + + Parameters + ---------- + ahess: np.ndarray((N,N,)) + (Approximated) hessian. + + Returns + ------- + np.ndarray((N,N,)) + The matrix used to calculate the norm. + + + """ + return np.array([[(np.linalg.norm(self.gradient))**2, + self.gradient.dot(np.linalg.inv(ahess).dot(self.gradient))], + [self.gradient.dot(np.linalg.inv(ahess).dot(self.gradient)), + np.linalg.norm(np.linalg.inv(ahess).dot(self.gradient))]]) + + @staticmethod + def free_step(ahess, agrad): + """Unconstrained step. + + The solution of the Newton problem. + + Parameters + ---------- + ahess: np.ndarray((N,N,)) + (Approximated) hessian. + agrad: np.ndarray((N,)) + (Approximated) gradient. + + Returns + ------- + np.ndarray((N,)) + Unconstrained optimization step. + + """ + return -np.linalg.inv(ahess).dot(agrad) + + @staticmethod + def solution_norm(ahess, agrad): + """Weighted norm of a vector. + + Parameters + ---------- + ahess: np.ndarray((N,N,)) + (Approximated) hessian. + agrad: np.ndarray((N,)) + (Approximated) gradient. + + Returns + ------- + float + Weighted norm. + + """ + return 0.5*agrad.dot(ahess.dot(agrad)) + + @staticmethod + def aux_polynomial(mhess, nhess, agrad, radius): + """Form auxiliary polynomial. + + Parameters + ---------- + mhess: np.ndarray((N,N,)) + Modified hessian. + nhess: np.ndarray((N,N,)) + Matrix used to calculate the norm. + agrad: np.ndarray((N,)) + (Approximated) gradient. + radius: float + Trust region radius + + Returns + ------- + list + A list with the coefficients of the 2nd order polynomial. + + """ + qterm = (nhess.dot(agrad)).dot(nhess.dot(nhess.dot(agrad))) + lterm = ((mhess.dot(agrad)).dot(nhess.dot(nhess.dot(agrad))) + + (nhess.dot(agrad)).dot(nhess.dot(mhess.dot(agrad)))) + cterm = ((mhess.dot(agrad)).dot(nhess.dot(mhess.dot(agrad))) - + 2*radius**2) + return [qterm, lterm, cterm] + + def _solution(self, ssol, ahess): + """Solution in original basis. + + Parameters + ---------- + ssol: np.ndarray((N,)) + Solution in the basis spanning the subspace. + ahess: np.ndarray((N,N,)) + (Approximated) hessian. + + Returns + ------- + np.ndarray((N,)) + Solution in the original basis. + + """ + return ssol[0]*self.gradient+ssol[1]*np.linalg.inv(ahess).dot(self.gradient) + + def solver(self): + """Subspace sub-problem solver. + + Returns + ------- + step: np.ndarray((N,)) + Correction added to the current point. + + """ + if self.min_hess == 0: + cauchy = Cauchy(self.point, self.func, self.grad, + self.hess, self.radius) + return cauchy.solver() + elif self.min_hess > 0: + new_hess = self.hessian + else: + alpha = -1.5*self.min_hess + new_hess = self.hessian + alpha*np.identity(self.hessian.ndim) + + mhess = self._aux_hess(new_hess) + nhess = self._aux_norm(new_hess) + new_grad = self._aux_grad(new_hess) + u_step = Subspace.free_step(mhess, new_grad) + sol_norm = Subspace.solution_norm(nhess, u_step) + if sol_norm < self.radius**2: + return self._solution(u_step, new_hess) + else: + roots = np.roots(self.aux_polynomial(mhess, nhess, + new_grad, self.radius)) + solutions = [-np.linalg.inv(mhess+root*nhess).dot(new_grad) + for root in roots] + values = [solution.dot(new_grad)+0.5*solution.dot(mhess.dot(solution)) + for solution in solutions] + return self._solution(solutions[int(np.argmin(values))], new_hess) diff --git a/flik/trustregion/test/test_cauchy.py b/flik/trustregion/test/test_cauchy.py new file mode 100644 index 0000000..272fc23 --- /dev/null +++ b/flik/trustregion/test/test_cauchy.py @@ -0,0 +1,37 @@ +"""Tests for the Cauchy point method.""" + +import numpy as np +from flik.trustregion.cauchy import Cauchy + + +def fun(x): + """Test function.""" + return x**3.0 + + +def grad(x): + """Gradient of the test function.""" + return 3.0 * x**2.0 + + +def hess(x): + """Hessian of the test function.""" + return np.array([[6.0*x[0], 0.], [0., 6.0*x[1]]]) + + +def test_cauchy(): + """Test the Cauchy point method.""" + point = np.array([-0.2, -0.1]) + radius = 0.7 + # Initialize class + cauchy = Cauchy(point, fun, grad, hess, radius) + # Test get_scaling + tau = cauchy.get_scaling() + assert tau == 1.0 + point = np.array([0.2, -0.1]) + cauchy = Cauchy(point, fun, grad, hess, radius) + tau = cauchy.get_scaling() + assert abs(tau - 0.161504) < 1.0e-6 + # Test the method + result = cauchy.solver() + assert np.allclose(result, [-0.109677, -0.0274193], 1.0e-5) diff --git a/flik/trustregion/test/test_dogleg.py b/flik/trustregion/test/test_dogleg.py new file mode 100644 index 0000000..ee7764c --- /dev/null +++ b/flik/trustregion/test/test_dogleg.py @@ -0,0 +1,86 @@ +"""Tests for the Dogleg method.""" + +import numpy as np +from numpy.testing import assert_raises +from flik.trustregion.dogleg import Dogleg, get_tau + + +def test_get_tau(): + """Test tau has the right value.""" + qterm = [1] + lterm = 0.5 + cterm = -3.0 + # Check parameters + assert_raises(TypeError, get_tau, qterm, lterm, cterm) + qterm = 1.0 + lterm = 2 + assert_raises(TypeError, get_tau, qterm, lterm, cterm) + lterm = 2.0 + cterm = np.array(3) + assert_raises(TypeError, get_tau, qterm, lterm, cterm) + cterm = 5.0 + # Check tau value + assert_raises(ValueError, get_tau, qterm, lterm, cterm) + # Check actual value of tau + qterm = 1.0 + lterm = 0.5 + cterm = -3.0 + # Tau not in range (0., 2.) + assert_raises(ValueError, get_tau, qterm, lterm, -6.0) + # Right tau + assert get_tau(1.0, 0.5, -3.0) == 1.5 + + +def fun(x): + """Test function.""" + return x**3.0 + + +def grad(x): + """Gradient of the test function.""" + return 3.0 * x**2.0 + + +def hess(x): + """Hessian of the test function modified to be positive.""" + return np.array([[6.0*x[0], 0.], [0., -6.0*x[1]]]) + + +def test_dogleg(): + """Test Dogleg method.""" + point = np.array([1.5, -0.4]) + radius = 0.7 + # Initialize class + # Check hessian is positive-definite + + def neg_hess(x): + """Negative-definite Hessian.""" + return np.array([[-6.0*x[0], 0.], [0., 6.0*x[1]]]) + + assert_raises(ValueError, Dogleg, point, fun, grad, neg_hess, radius) + # Test update_attr + dogleg = Dogleg(point, fun, grad, hess, radius) + dogleg.update_attr('point', np.array([1.2, -0.4])) + assert np.allclose(dogleg.point, np.array([1.2, -0.4]), 1.0e-6) + assert np.allclose(dogleg.full_step, [-0.6, -0.2], 1.0e-6) + assert np.allclose(dogleg.steepest_step, [-0.604918, -0.0672131], 1.0e-6) + # Test when radius the first segment (steepest descent step) + dogleg.update_attr('point', np.array([1.5, -0.4])) + result = dogleg.solver() + assert np.allclose(result, [-0.698237, -0.0496524], atol=1.0e-6) + # Test when Cauchy point is the solution + dogleg.radius = 0.8 + result = dogleg.solver() + assert np.allclose(result, [-0.75, -0.2], atol=1.0e-6) + # Test when step crosses the second segment + # Test compute tau + dogleg.update_attr('point', np.array([1.8, -0.1])) + dogleg.update_attr('radius', 0.901) + tau = dogleg.compute_tau() + assert abs(tau - 0.8392824) < 1.0e-5 + # Test compute_dogleg_step + step = dogleg.compute_dogleg_step(tau) + assert np.allclose(step, [-0.900001, -0.0424106], atol=1.0e-6) + # Test whole Method + step = dogleg.solver() + assert np.allclose(step, [-0.900001, -0.0424106], atol=1.0e-6) diff --git a/flik/trustregion/test/test_gradient_descent.py b/flik/trustregion/test/test_gradient_descent.py new file mode 100644 index 0000000..35bb15a --- /dev/null +++ b/flik/trustregion/test/test_gradient_descent.py @@ -0,0 +1,34 @@ +"""Tests for the Gradient-descent method.""" + +import numpy as np +from flik.trustregion.gradient_descent import GradientDescent + + +def fun(x): + """Test function.""" + return x**3.0 + + +def grad(x): + """Gradient of the test function.""" + return 3.0 * x**2.0 + + +def hess(x): + """Hessian of the test function modified to be positive.""" + return np.array([[6.0*x[0], 0.], [0., -6.0*x[1]]]) + + +def test_dogleg(): + """Test Dogleg method.""" + point = np.array([1.5, -0.4]) + radius = 0.7 + # Initialize class + gradient_descent = GradientDescent(point, fun, grad, hess, radius) + # Test when radius crosses steepest descent step + result = gradient_descent.solver() + assert np.allclose(result, [-0.698237, -0.0496524], atol=1.0e-6) + # Test when it takes the steepest descent step + gradient_descent.radius = 0.8 + result = gradient_descent.solver() + assert np.allclose(result, [-0.752777, -0.0535308], atol=1.0e-6) diff --git a/flik/trustregion/test/test_sub_problem.py b/flik/trustregion/test/test_sub_problem.py new file mode 100644 index 0000000..2128ce0 --- /dev/null +++ b/flik/trustregion/test/test_sub_problem.py @@ -0,0 +1,49 @@ +"""Tests for flik.trustregion.sub_problem.""" +import numpy as np +from flik.trustregion.sub_problem import SubProblem + + +def test_sub_problem(): + """Test the SubProblem base class.""" + # Set parameters + point = np.array([1., 2.]) + + def func(point): + """Objective function.""" + return point**3 + + def grad(point): + """Gradient of the objective function.""" + return 3*point**2 + + def hess(point): + """Hessian of the objective function.""" + return np.array([[6*point[0], 0.], [0., 6*point[1]]]) + radius = 0.9 + + class EmptySubProblem(SubProblem): + """Class with no solver method.""" + + pass + + try: + EmptySubProblem(point, func, grad, hess, radius) + except TypeError: + pass + # "Child class does not have a solver method" + + class SimpleSubProblem(SubProblem): + """Class to test the SubProblem base class.""" + + def solver(self): + """Sub-problem solver.""" + hessian = self.hess(self.point) + gradient = self.grad(self.point) + return -np.linalg.inv(hessian).dot(gradient) + + a = SimpleSubProblem(point, func, grad, hess, radius) + assert np.array_equal(a.point, np.array([1., 2.])) + assert np.array_equal(a.func(a.point), np.array([1., 8.])) + assert np.array_equal(a.grad(a.point), np.array([3., 12.])) + assert np.array_equal(a.hess(a.point), np.array([[6., 0.], [0., 12.]])) + assert np.allclose(a.solver(), -np.array([0.5, 1.])) diff --git a/flik/trustregion/test/test_subspace.py b/flik/trustregion/test/test_subspace.py new file mode 100644 index 0000000..faa9464 --- /dev/null +++ b/flik/trustregion/test/test_subspace.py @@ -0,0 +1,69 @@ +"""Test for flik.trustregion.subspace.""" +import numpy as np +from flik.trustregion.subspace import Subspace +from flik.trustregion.cauchy import Cauchy + + +def test_subspace(): + """Test the subspace solver.""" + # Set parameters + point = np.array([1., 2.]) # Leading to positive definite hessian + zero_point = np.array([0., 1/6]) # Leading to hessian with 0 eigenvalue + neg_point = np.array([1/6, -1/6]) # Leading tot negative definite hessian + + def func(point): + """Objective function.""" + return point**3 + + def grad(point): + """Gradient of the objective function.""" + return 3*point**2 + + def hess(point): + """Hessian of the objective function.""" + return np.array([[6*point[0], 0.], [0., 6*point[1]]]) + radius = 1. + sub_sol = Subspace(point, func, grad, hess, radius) + zero_sol = Subspace(zero_point, func, grad, hess, radius) + neg_sol = Subspace(neg_point, func, grad, hess, radius) + cauchy = Cauchy(zero_point, sub_sol.func, sub_sol.grad, + sub_sol.hess, radius) + + assert np.array_equal(sub_sol.point, np.array([1., 2.])) + assert np.array_equal(sub_sol.func(sub_sol.point), np.array([1., 8.])) + assert np.array_equal(sub_sol.grad(sub_sol.point), np.array([3., 12.])) + assert np.array_equal(sub_sol.hess(sub_sol.point), + np.array([[6., 0.], [0., 12.]])) + assert np.array_equal(sub_sol.gradient, np.array([3., 12.])) + assert np.array_equal(sub_sol.hessian, np.array([[6., 0.], [0., 12.]])) + assert sub_sol.min_hess == 6.0 + assert np.allclose(sub_sol._aux_grad(sub_sol.hessian), + np.array([np.sqrt(153.), 13.5])) + assert np.allclose(sub_sol._aux_hess(sub_sol.hessian), + np.array([[1782., np.sqrt(153.)], + [np.sqrt(153.), 13.5]])) + assert np.allclose(sub_sol._aux_norm(sub_sol.hessian), + np.array([[153., 13.5], [13.5, 1.11803399]])) + assert np.array_equal(sub_sol.free_step(hess(point), grad(point)), + np.array([-0.5, -1.])) + assert sub_sol.solution_norm(hess(point), grad(point)) == 891. + assert sub_sol.aux_polynomial(sub_sol.hessian, sub_sol.hessian, + sub_sol.gradient, + sub_sol.radius) == [250776., + 501552., + 250774.] + assert np.allclose(sub_sol._solution(np.array([0.1, 0.2]), sub_sol.hessian), + np.array([0.4, 1.4])) + # Hessian with 0 eigenvalue + assert np.allclose(zero_sol.solver(), cauchy.solver()) + # Positive hessian, inside trust region + assert np.allclose(sub_sol.solver(), np.array([-0.5, -1.])) + # Positive hessian, small trust region + small_region = Subspace(point, func, grad, hess, radius=0.4) + assert np.allclose(small_region.solver(), np.array([0.42390953, 1.47969983])) + # Negative hessian, inside trust region + assert np.allclose(neg_sol.solver(), np.array([-1/30, -1/6])) + # Negative hessian, small trust region + small_region = Subspace(neg_point, func, grad, hess, radius=0.2) + assert np.allclose(small_region.solver(), np.array([-0.00568462, + -0.00480769])) diff --git a/flik/trustregion/test/test_tools.py b/flik/trustregion/test/test_tools.py new file mode 100644 index 0000000..4d27498 --- /dev/null +++ b/flik/trustregion/test/test_tools.py @@ -0,0 +1,113 @@ +"""Test for flik.trustregion tools.""" + +import numpy as np +from nose.tools import assert_raises +from flik.trustregion.tools import check_input + + +def test_input_control(): + """Test flik.trustregion.tools.check_input.""" + # check var + assert_raises(TypeError, check_input, var=[1]) + assert_raises(TypeError, check_input, var=np.array([[1, 2]])) + check_input(var=0) + check_input(var=0.0) + check_input(var=np.array([0])) + + # check func + assert_raises(TypeError, check_input, func='1') + check_input(func=lambda x: x) + + # check grad + assert_raises(TypeError, check_input, grad='1') + check_input(grad=lambda x: x) + + # check hess + assert_raises(TypeError, check_input, hess='1') + + def test_hess(x, y): + return np.array([[x, 2*y**2], [2*y**2, x*y]]) + check_input(hess=test_hess) + + # check func_val + assert_raises(TypeError, check_input, func_val=np.array([1])) + check_input(func_val=1) + check_input(func_val=1.0) + + # check grad_val + assert_raises(TypeError, check_input, grad_val=[1]) + check_input(grad_val=1) + check_input(grad_val=1.0) + check_input(grad_val=np.array([1])) + + # check hess_val + assert_raises(TypeError, check_input, hess_val=[1]) + assert_raises(TypeError, check_input, hess_val=np.array([1, 2.0])) + assert_raises(ValueError, check_input, hess_val=np.array([[1, 2.0], [-2.0, 3]])) + check_input(hess_val=1) + check_input(hess_val=1.0) + check_input(hess_val=np.array([[1, 2.0], [2.0, 3]])) + + # check step + assert_raises(TypeError, check_input, step=[1]) + assert_raises(TypeError, check_input, step=np.array(1)) + check_input(step=1) + check_input(step=1.0) + check_input(step=np.array([1])) + + # check var and step + assert_raises(ValueError, check_input, var=1, step=np.array([2, 3])) + assert_raises(ValueError, check_input, var=np.array([1]), step=np.array([2, 3])) + check_input(var=1, step=np.array([2])) + + # check var and grad_val + assert_raises(ValueError, check_input, var=np.array([2, 3]), grad_val=1) + assert_raises(ValueError, check_input, var=np.array([2, 3]), grad_val=np.array([1])) + check_input(var=np.array([2]), grad_val=1) + + # check grad_val and step + assert_raises(ValueError, check_input, grad_val=1, step=np.array([2, 3])) + assert_raises(ValueError, check_input, grad_val=np.array([1]), step=np.array([2, 3])) + check_input(grad_val=1, step=np.array([2])) + + # check var, grad_val and step + assert_raises(ValueError, check_input, var=1, grad_val=1, step=np.array([2, 3])) + check_input(var=0, grad_val=1, step=np.array([2])) + + # check hess_val and step + assert_raises(ValueError, check_input, hess_val=np.array([[1, 2.0], [2.0, 3]]), + step=np.array([2.0])) + check_input(hess_val=np.array([[1, 2.0], [2.0, 3]]), step=np.array([2.0, 0.0])) + + # check maximum_trust_region_radius + assert_raises(TypeError, check_input, maximum_trust_region_radius=[1]) + assert_raises(ValueError, check_input, maximum_trust_region_radius=-0.5) + check_input(maximum_trust_region_radius=3.0) + + # check initial_trust_region_radius + assert_raises(TypeError, check_input, initial_trust_region_radius=[1]) + assert_raises(ValueError, check_input, initial_trust_region_radius=-0.5) + assert_raises(ValueError, check_input, initial_trust_region_radius=0.03, + maximum_trust_region_radius=0.02) + check_input(initial_trust_region_radius=0.001, maximum_trust_region_radius=0.02) + + # check reduction_factor_threshold + assert_raises(TypeError, check_input, reduction_factor_threshold=[1]) + assert_raises(ValueError, check_input, reduction_factor_threshold=-0.5) + assert_raises(ValueError, check_input, reduction_factor_threshold=0.5) + check_input(reduction_factor_threshold=0.1) + + # check convergence_threshold + assert_raises(TypeError, check_input, convergence_threshold=[1]) + assert_raises(ValueError, check_input, convergence_threshold=-0.5) + check_input(convergence_threshold=0.1) + + # check maximum_number_of_iterations + assert_raises(TypeError, check_input, maximum_number_of_iterations=[1]) + assert_raises(TypeError, check_input, maximum_number_of_iterations=2.5) + assert_raises(ValueError, check_input, maximum_number_of_iterations=0) + assert_raises(ValueError, check_input, maximum_number_of_iterations=-2) + check_input(maximum_number_of_iterations=12) + + # do nothing + check_input() diff --git a/flik/trustregion/test/test_trustregion.py b/flik/trustregion/test/test_trustregion.py new file mode 100644 index 0000000..f97ea4a --- /dev/null +++ b/flik/trustregion/test/test_trustregion.py @@ -0,0 +1,35 @@ +"""Test for flik.trustregion.trustregion.""" + +import numpy as np +from nose.tools import assert_raises +from flik.trustregion.trustregion import trust_region_solve + + +def test_trustregion(): + """Test the trustregion algorithm.""" + def func(x): + """Return the value of the function at x.""" + return np.dot(x, x) + + def grad(x): + """Return the value of the gradient of the function at x.""" + return 2 * x + + def hess(x): + """Return the value of the hessian of the function at x. Only for this particular case.""" + return np.array([[2, 0], [0, 2]]) + + x = np.array([1.0, 0.5]) + solution = np.array([0, 0]) + + # Test input for trustregion + assert_raises(NotImplementedError, + trust_region_solve, x, func, grad, hess, 'Iterative', 0.2, 10**(-5)) + assert_raises(ValueError, + trust_region_solve, x, func, grad, hess, 'invalid', 0.2, 10**(-5)) + + # Test actual convergence + assert np.allclose(trust_region_solve(x, func, grad, hess, 'Cauchy', 0.2, 10**(-5)), solution) + assert np.allclose(trust_region_solve(x, func, grad, hess, 'Dogleg', 0.2, 10**(-5)), solution) + assert np.allclose(trust_region_solve(x, func, grad, hess, '2D-subspace', 0.2, 10**(-5)), + solution) diff --git a/flik/trustregion/tools.py b/flik/trustregion/tools.py new file mode 100644 index 0000000..eafca61 --- /dev/null +++ b/flik/trustregion/tools.py @@ -0,0 +1,171 @@ +"""Tools for the trust region module.""" + +import itertools as it +import numpy as np + + +class NotConvergedError(Exception): + """A custom exception for convergence errors.""" + + +def check_input(*, var=None, func=None, grad=None, hess=None, func_val=None, grad_val=None, + hess_val=None, step=None, maximum_trust_region_radius=None, + initial_trust_region_radius=None, reduction_factor_threshold=None, + convergence_threshold=None, maximum_number_of_iterations=None): + r"""Test input used in trust region module. + + Parameters + ---------- + var: {int, float, np.array((N,)), None} + Variables' current values. + func: {callable, None} + Function :math:`f(x)`. + grad: {callable, None} + Gradient of the function :math:`\nabla f(x)`. + hess: {callable, None} + Hessian of the function :math:`\nabla f(x)` or an approximation to it. + func_val: {int, float, None} + Value of the function evaluated at var. + grad_val: {int, float, np.array((N,)), None} + Gradient evaluated at var. + hess_val: {float, np.array((N,N,)), None} + Hessian evaluated at var. + step: {int, float, np.ndarray((N,))} + Direction vector for next step. + maximum_trust_region_radius: {int, double} + The maximum radius the trust region can obtain. + initial_trust_region_radius: {int, double} + The initial trust region radius, should be in [0, maximum_trust_region_radius[ + reduction_factor_threshold: {int, double} + The threshold used for accepting a reduction factor. + convergence_threshold: {int, double} + The threshold used to determine convergence. + maximum_number_of_iterations: int + A maximum of the number of iterations the trust region solver should perform. + + + Raises + ------ + TypeError + If var is not an int, float, or one dimensional numpy array. + If func is not a callable. + If grad is not a callable. + If hess is not callable. + If func_val is not an int or float. + If grad_val is not an int, float, or one-dimensional numpy array. + If hess_val is not an int, float, or two-dimensional numpy array. + If step is not an int, float, or one dimensional numpy array. + ValueError + If var and direction do not have the same shape. + If var and grad_val do not have the same shape. + If step and grad_val do not have the same shape. + If hess_val is incompatible with step. + If hess_val is not a symmetric matrix. + If maximum_trust_region_radius is smaller than zero. + If initial_trust_region_radius is not in the interval ]0, maximum_trust_region_radius[. + If reduction_factor_threshold is not in the interval [0, 0.25[ + If maximum_number_of_iterations is not larger than zero. + + """ + if var is None: + pass + elif isinstance(var, (int, float)): + var = np.array(var, dtype=float, ndmin=1) + elif not (isinstance(var, np.ndarray) and var.ndim == 1): + raise TypeError("Variable vector should be a float or a 1-D numpy.ndarray") + + if func is None: + pass + elif not callable(func): + raise TypeError("func must be callable") + + if grad is None: + pass + elif not callable(grad): + raise TypeError("grad must be callable") + + if hess is None: + pass + elif not callable(hess): + raise TypeError("hess must be callable") + + if func_val is None: + pass + elif not isinstance(func_val, (int, float)): + raise TypeError('func_val must be a float.') + + if grad_val is None: + pass + elif isinstance(grad_val, (int, float)): + grad_val = np.array(grad_val, dtype=float, ndmin=1) + elif not (isinstance(grad_val, np.ndarray) and grad_val.ndim == 1): + raise TypeError('grad_val must be a one dimensional numpy array.') + + if hess_val is None: + pass + elif isinstance(hess_val, (int, float)): + hess_val = np.array(hess_val, dtype=float, ndmin=2) + elif not (isinstance(hess_val, np.ndarray) and hess_val.ndim == 2): + raise TypeError('hess_val must be a two dimensional numpy array.') + elif not np.allclose(hess_val, hess_val.T): + raise ValueError("The current Hessian must be a symmetric matrix.") + + if step is None: + pass + elif isinstance(step, (int, float)): + step = np.array(step, dtype=float, ndmin=1) + elif not (isinstance(step, np.ndarray) and step.ndim == 1): + raise TypeError("The direction vector should be provided as a float or a numpy array") + + name_dict = {'var': var, 'direction': step, 'grad_val': grad_val} + for name1, name2 in it.combinations(name_dict, 2): + array1 = name_dict[name1] + array2 = name_dict[name2] + + if array1 is None or array2 is None: + continue + if array1.shape != array2.shape: + raise ValueError(f'{name1} and {name2} must have the same shape.') + + if hess_val is not None and step is not None: + hess_val_dim_1, hess_val_dim_2 = hess_val.shape + step_dim = step.shape[0] # step is already checked to have dimension (1,) + if (hess_val_dim_1 != step_dim) or (hess_val_dim_2 != step_dim): + raise ValueError("hess_val and step are not compatible.") + + if maximum_trust_region_radius is None: + pass + elif not isinstance(maximum_trust_region_radius, (int, float)): + raise TypeError("maximum_trust_region_radius must be an int or a float.") + elif maximum_trust_region_radius <= 0: + raise ValueError("maximum_trust_region_radius must be larger than zero.") + + if initial_trust_region_radius is None: + pass + elif not isinstance(initial_trust_region_radius, (int, float)): + raise TypeError("initial_trust_region_radius must be an int or a float.") + elif (initial_trust_region_radius <= 0) or \ + (maximum_trust_region_radius < initial_trust_region_radius): + raise ValueError("initial_trust_region_radius must be in the interval ]0, " + "maximum_trust_region_radius[") + + if reduction_factor_threshold is None: + pass + elif not isinstance(reduction_factor_threshold, (int, float)): + raise TypeError("reduction_factor_threshold must be an int or a float.") + elif (reduction_factor_threshold > 0.25) or (reduction_factor_threshold < 0): + raise ValueError("reduction_factor_threshold must be in the interval [0, 0.25[") + + if convergence_threshold is None: + pass + elif not isinstance(convergence_threshold, (int, float)): + raise TypeError("convergence_threshold must be an int or a float.") + elif convergence_threshold < 0: + raise ValueError("convergence_threshold must be larger than 0.") + + if maximum_number_of_iterations is None: + pass + elif not isinstance(maximum_number_of_iterations, int): + raise TypeError("maximum_number_of_iterations must be an int.") + elif maximum_number_of_iterations <= 0: + raise ValueError("maximum_number_of_iterations must be larger than zero.") diff --git a/flik/trustregion/trustregion.py b/flik/trustregion/trustregion.py new file mode 100644 index 0000000..25139c4 --- /dev/null +++ b/flik/trustregion/trustregion.py @@ -0,0 +1,112 @@ +"""A trust region solver.""" + +from flik.trustregion.tools import NotConvergedError, check_input +from flik.trustregion.cauchy import Cauchy +from flik.trustregion.dogleg import Dogleg +from flik.trustregion.subspace import Subspace +import numpy as np + + +def trust_region_solve(x_0, func, grad, hess, subproblem_str, reduction_factor_threshold, + convergence_threshold, initial_trust_region_radius=1.0, + maximum_trust_region_radius=1.5, maximum_number_of_iterations=128): + r"""Minimize a scalar function with the trust region method. + + Parameters + ---------- + x_0: np.array((N,)) + An initial guess for the solution. + func: callable + The scalar function that has to be minimized. + grad: callable + The gradient of the function that has to be minimized. + hess: callable + The Hessian of the function that has to be minimized, or a symmetric approximation to it. + subproblem_str: string + A keyword corresponding to the subproblem that should be used to solve the trust region + sub-problem. + maximum_trust_region_radius: {int, double} + The maximum radius the trust region can obtain. + initial_trust_region_radius: {int, double} + The initial trust region radius, should be in [0, maximum_trust_region_radius[ + reduction_factor_threshold: {int, double} + The threshold used for accepting a reduction factor. + convergence_threshold: {int, double} + The threshold used to determine convergence. + maximum_number_of_iterations: int + A maximum of the number of iterations the trust region solver should perform. + + + Returns + ------- + If converged, returns the found minimizer of func. (np.array((N,))) + + + Raises + ------ + ConvergenceError + if the algorithm hasn't converged within the specified maximum number of iterations + NotImplementedError + for subproblem_str that correspond to subproblem solvers that haven't been implemented + ValueError + if a wrong subproblem_str was given + + """ + # Input checking + check_input(var=x_0, func=func, grad=grad, hess=hess, + maximum_trust_region_radius=maximum_trust_region_radius, + initial_trust_region_radius=initial_trust_region_radius, + reduction_factor_threshold=reduction_factor_threshold, + maximum_number_of_iterations=maximum_number_of_iterations) + + # Do the actual algorithm + x_k = x_0 + trust_region_radius = initial_trust_region_radius + for k in range(1, maximum_number_of_iterations+1): # end is not included + + # Instantiate the sub-problem solver + if subproblem_str == 'Cauchy': + subproblem_solver = Cauchy(x_k, func, grad, hess, trust_region_radius) + elif subproblem_str == 'Dogleg': + subproblem_solver = Dogleg(x_k, func, grad, hess, trust_region_radius) + elif subproblem_str == '2D-subspace': + subproblem_solver = Subspace(x_k, func, grad, hess, trust_region_radius) + elif subproblem_str == 'Iterative': + raise NotImplementedError + else: + raise ValueError( + "The subproblem string did not activate a current implementation of the " + "subproblem solvers. Did you mistype?") + + # Solve the subproblem to obtain a new step + step = subproblem_solver.solver() + + # Calculate the reduction factor + f_k = func(x_k) + g_k = grad(x_k) + hessian_k = hess(x_k) + + m0 = f_k # m(0) + mp_k = f_k + np.inner(g_k, step) + 0.5 * np.inner(step, np.dot(hessian_k, step)) # m(step) + + reduction_factor = (f_k - func(x_k + step)) / (m0 - mp_k) + + # Update the trust region + if reduction_factor < 0.25: + trust_region_radius = 0.25 * trust_region_radius + else: + if (reduction_factor > 0.75) and (np.linalg.norm(step) == trust_region_radius): + trust_region_radius = min(2 * trust_region_radius, maximum_trust_region_radius) + # The 'else' in the algorithm is redundant in code + + # Accept (or don't) the new step + if reduction_factor > reduction_factor_threshold: + x_k = x_k + step + # The 'else' in the algorithm is redundant in code + + # Check for convergence + if np.linalg.norm(grad(x_k)) < convergence_threshold: + return x_k + + # Getting out of the loop means we have reached the maximum number of iterations + raise NotConvergedError