Source code for metaheuristic_designer.strategies.classic.CMA_ES

"""
CMA-ES (Covariance Matrix Adaptation Evolution Strategy) implementation.

.. warning::
   The current implementation is architecturally a temporary solution.
   It will be refactored once the EDA (Distribution-based) interface is
   finalized.
"""

from __future__ import annotations
from typing import Optional
import logging
import numpy as np
import scipy as sp

from metaheuristic_designer.objective_function import ObjectiveFunc

from ...parent_selection import create_parent_selection
from ...population import Population
from ...initializer import Initializer
from ...schedulable_parameter import SchedulableParameter
from ...survivor_selection_base import SurvivorSelection
from ...operators import create_operator
from ...utils import VectorLike, check_rng
from ..eda_strategy import EDAStrategy

logger = logging.getLogger(__name__)



[docs]
class CMA_ES(EDAStrategy):
    """
    Covariance Matrix Adaptation Evolution Strategy (CMA-ES).

    This is a population-based algorithm that samples new solutions
    from a multivariate normal distribution whose mean and covariance
    are adapted each generation based on the best individuals.

    .. note::
       The architecture of this class is provisional.  It currently
       overrides :meth:`initialize` and :meth:`perturb` directly.
       Once the distribution-based (EDA) abstraction is in place,
       CMA-ES will be rewritten to use that common interface.

    Parameters
    ----------
    initializer : Initializer
        Provides population size and genotype shape, but does **not**
        generate the initial solutions.
    survivor_sel : SurvivorSelection, optional
        How survivors are selected.  Defaults to the strategy's
        default (generational).
    name : str, optional
        Display name (default ``"CMA-ES"``).
    offspring_size : int or SchedulableParameter, optional
        Number of offspring per generation.  If ``None``, the
        initializer's population size is used.
    rng : RNGLike, optional
        Random number generator.
    mean : VectorLike, optional
        Initial mean vector.  If not given, it is computed from the
        objective's bounds (or randomly if no bounds exist).
    sigma : VectorLike, optional
        Initial step size.  If not given, a default is computed.
    \\*\\*kwargs
        Forwarded to :class:`VariablePopulation`.
    """


[docs]
    def __init__(
        self,
        initializer: Initializer,
        survivor_sel: SurvivorSelection = None,
        name: str = "CMA-ES",
        offspring_size: Optional[int | SchedulableParameter] = None,
        rng=None,
        mean: Optional[VectorLike] = None,
        sigma: Optional[VectorLike] = None,
        cond_tol: float = 1e8,
        sigma_tol: float = 1e-10,
        **kwargs,
    ):
        rng = check_rng(rng)

        logger.info(
            "In CMA-ES the initializer does not generate solutions, it merely indicates the population size and encoding. Don't expect different results from changing the initializer."
        )

        self.offspring_size = offspring_size if offspring_size is not None else initializer.population_size
        self.cond_tol = cond_tol
        self.sigma_tol = sigma_tol

        super().__init__(
            initializer,
            operator=create_operator(
                "mutation.full_resampling", distribution="multivariate_normal", mean=None, cov=None, allow_singular=True, rng=rng
            ),
            parent_sel=create_parent_selection("best", amount=initializer.population_size, rng=rng),
            survivor_sel=survivor_sel,
            name=name,
            rng=rng,
            # Forced kwargs
            sigma=sigma,
            **kwargs,
        )

        self._initialize_parameters()


    def _initialize_parameters(self):
        self._cov = np.eye(self.initializer.dimension)

        # initialize weights
        self.mu = self.initializer.population_size
        self.lambda_ = self.offspring_size

        val_range = np.arange(self.mu) + 1
        weights = np.log(self.mu + 0.5) - np.log(val_range)
        self._weights = weights / np.sum(weights)

        # Initialize internal parameters
        self._effective_pop_size = 1 / np.sum(self._weights**2)

        n = self.initializer.dimension
        norm_eff_pop = self._effective_pop_size / n
        term1 = 4 + norm_eff_pop
        term2 = n + 4 + 2 * norm_eff_pop
        self._cc = term1 / term2

        term1 = self._effective_pop_size + 2
        term2 = n + self._effective_pop_size + 5
        self._csigma = term1 / term2

        term1_a = 1 / self._effective_pop_size
        term1_b = 2 / (n + np.sqrt(2)) ** 2
        term1 = term1_a * term1_b

        term2_a = 1 - term1_a
        term3_a = 2 * self._effective_pop_size - 1
        term3_b = (n + 2) ** 2 + self._effective_pop_size
        term3 = term3_a / term3_b
        term2_b = np.minimum(term3, 1)
        term2 = term2_a * term2_b
        self._ccov = term1 + term2

        term1_a = (self._effective_pop_size - 1) / (n + 1)
        term1_b = np.sqrt(term1_a) - 1
        term1 = 2 * np.maximum(term1_b, 0)
        self._dsigma = 1 + term1 + self._csigma

        self._A = np.eye(n)

        self._xin = np.sqrt(n) * (1 - (1 / (4 * n)) + (1 / (21 * n * n)))

        # Declare internal parameters, assign dummy values
        self._path_cov = np.zeros(n)
        self._path_sigma = np.zeros(n)

        self.n = n


[docs]
    def initialize(self, objfunc: ObjectiveFunc) -> Population:
        """Create the initial population by sampling from the current distribution.

        Parameters
        ----------
        objfunc : ObjectiveFunc
            The objective function, used to infer bounds if *mean* or
            *sigma* are not provided.

        Returns
        -------
        Population
            A freshly sampled population with unevaluated fitness.
        """

        if self.operator.params.mean is None:
            if hasattr(objfunc, "lower_bound") and hasattr(objfunc, "upper_bound"):
                computed_mean = 0.5 * (objfunc.upper_bound + objfunc.lower_bound)
            else:
                logger.warning(
                    "Using random mean since no lower bounds could be found in the objective function. This can lead to bad convergence properties."
                )
                computed_mean = self.initializer.generate_individual()

            if np.asarray(computed_mean).ndim == 0:
                computed_mean = np.repeat(computed_mean, self.initializer.dimension)

            self.update_kwargs(mean=np.atleast_1d(computed_mean).astype(float))

        if self.params.sigma is None:
            if hasattr(objfunc, "lower_bound") and hasattr(objfunc, "upper_bound"):
                # Recommendation from: The CMA Evolution Strategy: A Tutorial (Nikolaus Hansen)
                sigma = 0.3 * (objfunc.upper_bound - objfunc.lower_bound)
            else:
                sigma = 0.5
            self.update_kwargs(sigma=np.atleast_1d(sigma).astype(float))

        # In CMA-ES the initialization is done from random sampling of the distribution, the initializer is not used.
        mean = self.params.mean
        sigma = self.params.sigma
        cov_matrix = sigma * sigma * self._cov
        genotype = self.rng.multivariate_normal(mean=mean, cov=cov_matrix, size=(self.offspring_size,))

        # Update the operator's parameters since they were undefined in the constructor
        self.operator.update_kwargs(mean=mean, cov=cov_matrix)

        initial_population = Population(genotype, encoding=self.initializer.encoding)
        initial_population = objfunc.calculate_fitness(initial_population)

        return initial_population



[docs]
    def estimate_parameters(self, population):
        """Update the distribution parameters

        The parents (the best μ individuals from the previous generation)
        are used to update *mean*, *sigma*, *covariance*, and the
        evolution paths.

        Parameters
        ----------
        population : Population
            The selected parents (must be already evaluated).

        Returns
        -------
        Population
            Offspring population of size *offspring_size*.
        """

        pop_order = np.argsort(population.fitness)[::-1]
        pop_matrix = population.genotype_matrix[pop_order, :]

        new_mean = np.average(pop_matrix, axis=0, weights=self._weights)

        y_best = (pop_matrix - self.params.mean) / self.params.sigma
        mean_diff = (new_mean - self.params.mean) / self.params.sigma

        # Compute path values
        term1 = (1 - self._cc) * self._path_cov
        term2_a = self._cc * (2 - self._cc) * self._effective_pop_size
        term2 = np.sqrt(term2_a) * mean_diff
        self._path_cov = term1 + term2

        w = sp.linalg.solve_triangular(self._A.T, mean_diff, lower=False)

        term1 = (1 - self._csigma) * self._path_sigma
        term2_a = self._csigma * (2 - self._csigma) * self._effective_pop_size
        term2 = np.sqrt(term2_a) * w
        self._path_sigma = term1 + term2

        term1 = (1 - self._ccov) * self._cov
        term2 = (self._ccov / self._effective_pop_size) * np.outer(self._path_cov, self._path_cov)
        term3_a = self._ccov * (1 - (1 / self._effective_pop_size))
        term_b = np.zeros((self.n, self.n))
        for i in range(self.mu):
            term_b += self._weights[i] * np.outer(y_best[i], y_best[i])
        term3 = term3_a * term_b
        self._cov = term1 + term2 + term3

        self._A = np.linalg.cholesky(self._cov)

        # update sigma
        term1_a = np.linalg.norm(self._path_sigma) - self._xin
        term1_b = self._dsigma * self._xin
        new_sigma = self.params.sigma * np.exp(term1_a / term1_b)

        # Breaks to ensure numerical stability
        if np.all(new_sigma < self.sigma_tol):
            self.finish = True

        if np.linalg.cond(self._cov) > self.cond_tol:
            self.finish = True

        self.update_kwargs(sigma=new_sigma)
        self.operator.update_kwargs(mean=new_mean, cov=new_sigma * new_sigma * self._cov)

        return self.operator