"""
CMA-ES (Covariance Matrix Adaptation Evolution Strategy) implementation.
.. warning::
The current implementation is architecturally a temporary solution.
It will be refactored once the EDA (Distribution-based) interface is
finalised.
"""
from __future__ import annotations
from typing import Optional
import logging
import numpy as np
import scipy as sp
from metaheuristic_designer.objective_function import ObjectiveFunc
from ...search_strategy import SearchStrategy
from ...parent_selection import create_parent_selection
from ...population import Population
from ...initializer import Initializer
from ...schedulable_parameter import SchedulableParameter
from ...survivor_selection_base import SurvivorSelection
from ...operators import create_operator
from ...utils import VectorLike, check_random_state
logger = logging.getLogger(__name__)
[docs]
class CMA_ES(SearchStrategy):
"""
Covariance Matrix Adaptation Evolution Strategy (CMA-ES).
This is a population-based algorithm that samples new solutions
from a multivariate normal distribution whose mean and covariance
are adapted each generation based on the best individuals.
.. note::
The architecture of this class is provisional. It currently
overrides :meth:`initialize` and :meth:`perturb` directly.
Once the distribution-based (EDA) abstraction is in place,
CMA-ES will be rewritten to use that common interface.
Parameters
----------
initializer : Initializer
Provides population size and genotype shape, but does **not**
generate the initial solutions.
survivor_sel : SurvivorSelection, optional
How survivors are selected. Defaults to the strategy's
default (generational).
name : str, optional
Display name (default ``"CMA-ES"``).
offspring_size : int or SchedulableParameter, optional
Number of offspring per generation. If ``None``, the
initializer's population size is used.
random_state : RNGLike, optional
Random number generator.
mean : VectorLike, optional
Initial mean vector. If not given, it is computed from the
objective's bounds (or randomly if no bounds exist).
sigma : VectorLike, optional
Initial step size. If not given, a default is computed.
**kwargs
Forwarded to :class:`VariablePopulation`.
"""
def __init__(
self,
initializer: Initializer,
survivor_sel: SurvivorSelection = None,
name: str = "CMA-ES",
offspring_size: Optional[int | SchedulableParameter] = None,
random_state=None,
mean: Optional[VectorLike] = None,
sigma: Optional[VectorLike] = None,
**kwargs,
):
self.random_state = check_random_state(random_state)
logger.info(
"In CMA-ES the initializer does not generate solutions, it merely indicates the population size and encoding. Don't expect different results from changing the initializer."
)
super().__init__(
initializer,
operator=create_operator("mutation.full_resampling", distribution="multivariate_normal", mean=None, cov=None, allow_singular=True),
parent_sel=create_parent_selection("best", amount=initializer.population_size),
survivor_sel=survivor_sel,
name=name,
# Forced kwargs
mean=mean,
sigma=sigma,
**kwargs,
)
self.offspring_size = offspring_size
self._cov = np.eye(initializer.dimension)
# intialize weights
self.mu = initializer.population_size
self.lambda_ = offspring_size
val_range = np.arange(self.mu) + 1
weights = np.log(self.mu + 0.5) - np.log(val_range)
self._weights = weights / np.sum(weights)
# Initialize internal parameters
self._effective_pop_size = 1 / np.sum(self._weights**2)
n = self.initializer.dimension
norm_eff_pop = self._effective_pop_size / n
term1 = 4 + norm_eff_pop
term2 = n + 4 + 2 * norm_eff_pop
self._cc = term1 / term2
term1 = self._effective_pop_size + 2
term2 = n + self._effective_pop_size + 5
self._csigma = term1 / term2
term1_a = 1 / self._effective_pop_size
term1_b = 2 / (n + np.sqrt(2)) ** 2
term1 = term1_a * term1_b
term2_a = 1 - term1_a
term3_a = 2 * self._effective_pop_size - 1
term3_b = (n + 2) ** 2 + self._effective_pop_size
term3 = term3_a / term3_b
term2_b = np.minimum(term3, 1)
term2 = term2_a * term2_b
self._ccov = term1 + term2
term1_a = (self._effective_pop_size - 1) / (n + 1)
term1_b = np.sqrt(term1_a) - 1
term1 = 2 * np.maximum(term1_b, 0)
self._dsigma = 1 + term1 + self._csigma
self._A = np.eye(n)
self._xin = np.sqrt(n) * (1 - (1 / (4 * n)) + (1 / (21 * n * n)))
# Declare internal parameters, assign dummy values
self._path_cov = np.zeros(n)
self._path_sigma = np.zeros(n)
self.n = n
[docs]
def initialize(self, objfunc: ObjectiveFunc) -> Population:
"""Create the initial population by sampling from the current distribution.
Parameters
----------
objfunc : ObjectiveFunc
The objective function, used to infer bounds if *mean* or
*sigma* are not provided.
Returns
-------
Population
A freshly sampled population with unevaluated fitness.
"""
if self.params.mean is None:
if hasattr(objfunc, "lower_bound") and hasattr(objfunc, "upper_bound"):
computed_mean = 0.5 * (objfunc.upper_bound + objfunc.lower_bound)
else:
logger.warning(
"Using random mean since no lower bounds could be found in the objective function. This can lead to bad convergence properties."
)
computed_mean = self.initializer.generate_individual()
if np.asarray(computed_mean).ndim == 0:
computed_mean = np.repeat(computed_mean, self.initializer.dimension)
self.update_kwargs(mean=np.atleast_1d(computed_mean).astype(float))
if self.params.sigma is None:
if hasattr(objfunc, "lower_bound") and hasattr(objfunc, "upper_bound"):
# Recommendation from: The CMA Evolution Strategy: A Tutorial (Nikolaus Hansen)
sigma = 0.3 * (objfunc.upper_bound - objfunc.lower_bound)
else:
sigma = 0.5
self.update_kwargs(sigma=np.atleast_1d(sigma).astype(float))
# In CMA-ES the initialization is done from random sampling of the distribution, the initializer is not used.
mean = self.params.mean
sigma = self.params.sigma
cov_matrix = sigma * sigma * self._cov
genotype = np.random.multivariate_normal(mean=mean, cov=cov_matrix, size=self.offspring_size)
# Update the operator's parameters since they were undefined in the constructor
self.operator.update_kwargs(mean=mean, cov=cov_matrix)
return Population(objfunc, genotype, encoding=self.initializer.encoding)
[docs]
def perturb(self, parents: Population, **kwargs) -> Population:
"""Update the distribution parameters and generate offspring.
The parents (the best μ individuals from the previous generation)
are used to update *mean*, *sigma*, *covariance*, and the
evolution paths. A new offspring population is then sampled from
the updated distribution.
Parameters
----------
parents : Population
The selected parents (must be already evaluated).
**kwargs
Forwarded to the parent's :meth:`perturb`.
Returns
-------
Population
Offspring population of size *offspring_size*.
"""
pop_order = np.argsort(parents.fitness)[::-1]
pop_matrix = parents.genotype_matrix[pop_order, :]
new_mean = np.average(pop_matrix, axis=0, weights=self._weights)
y_best = (pop_matrix - self.params.mean) / self.params.sigma
mean_diff = (new_mean - self.params.mean) / self.params.sigma
# Compute path values
term1 = (1 - self._cc) * self._path_cov
term2_a = self._cc * (2 - self._cc) * self._effective_pop_size
term2 = np.sqrt(term2_a) * mean_diff
self._path_cov = term1 + term2
w = sp.linalg.solve_triangular(self._A.T, mean_diff, lower=False)
term1 = (1 - self._csigma) * self._path_sigma
term2_a = self._csigma * (2 - self._csigma) * self._effective_pop_size
term2 = np.sqrt(term2_a) * w
self._path_sigma = term1 + term2
term1 = (1 - self._ccov) * self._cov
term2 = (self._ccov / self._effective_pop_size) * np.outer(self._path_cov, self._path_cov)
term3_a = self._ccov * (1 - (1 / self._effective_pop_size))
term_b = np.zeros((self.n, self.n))
for i in range(self.mu):
term_b += self._weights[i] * np.outer(y_best[i], y_best[i])
term3 = term3_a * term_b
self._cov = term1 + term2 + term3
self._A = np.linalg.cholesky(self._cov)
# update sigma
term1_a = np.linalg.norm(self._path_sigma) - self._xin
term1_b = self._dsigma * self._xin
new_sigma = self.params.sigma * np.exp(term1_a / term1_b)
# Breaks to ensure numerical stability
if new_sigma < 1e-10:
self.finish = True
if np.linalg.cond(self._cov) > 1e14:
self.finish = True
self.update_kwargs(mean=new_mean, sigma=new_sigma)
self.operator.update_kwargs(mean=new_mean, cov=new_sigma * new_sigma * self._cov)
return super().perturb(parents, **kwargs)