Source code for metaheuristic_designer.history_tracker

"""Module for recording per-generation metrics and exporting them as pandas DataFrames."""

from __future__ import annotations
from abc import ABC, abstractmethod
import logging
from typing import TYPE_CHECKING
import numpy as np
import pandas as pd

if TYPE_CHECKING:
    from .algorithm import Algorithm

logger = logging.getLogger(__name__)



[docs]
class HistoryTracker(ABC):

[docs]
    @abstractmethod
    def restart(self):
        """Clear all recorded data.

        Call this when an algorithm is reset to start a fresh run.
        """



[docs]
    @abstractmethod
    def update(self, algorithm: Algorithm):
        """Record metrics for the current generation.

        Parameters
        ----------
        algorithm : Algorithm
            The running algorithm from which the current population,
            fitness, objective, and parameters are extracted.
        """



[docs]
    @abstractmethod
    def to_pandas(self) -> pd.DataFrame:
        """Return a DataFrame with per-generation summary metrics.

        Returns
        -------
        pandas.DataFrame
        """



[docs]
    def to_pandas_full_objective(self) -> pd.DataFrame:
        """Return a wide-format DataFrame of all individual objective values.

        Each column ``Individual_0``, ``Individual_1``, ... holds the
        objective of one member of the population across generations.
        This is useful for boxplots or distribution plots of fitness.

        Returns
        -------
        pandas.DataFrame
            Empty by default.
        """

        logger.warning("Full objective tracking not implemented.")
        return pd.DataFrame()



[docs]
    @abstractmethod
    def get_state(self) -> dict:
        """Return a dictionary containing the recorded history.

        Returns
        -------
        dict
        """





[docs]
class ConfigurableHistoryTracker(HistoryTracker):
    """Record per-generation metrics and export them as pandas DataFrames.

    The tracker is called once per generation (via :meth:`step`) and
    stores the requested statistics.  After the run the data can be
    retrieved with :meth:`to_pandas` (a summary of best, median, worst,
    diversity, and scheduled parameters) or :meth:`to_pandas_full_objective`
    (the full objective vector of every individual at each generation).

    Parameters
    ----------
    track_best : bool, optional
        Record the best objective and solution (default ``True``).
    track_median : bool, optional
        Record the median objective (default ``False``).
    track_worst : bool, optional
        Record the worst objective (default ``False``).
    track_full_objective : bool, optional
        Store the complete objective vector of the population at every
        generation.  Enables :meth:`to_pandas_full_objective`.
    track_full_population : bool, optional
        Store the entire population (genotypes) at every generation.
        This can consume a lot of memory.
    track_parameters : bool, optional
        Record the current value of all scheduled parameters (e.g.,
        mutation strength, branch probability).
    track_diversity : bool, optional
        Compute and store a simple diversity metric (average
        Euclidean distance from the centroid).
    """

    def __init__(
        self,
        track_best: bool = True,
        track_median: bool = False,
        track_worst: bool = False,
        track_full_objective: bool = False,
        track_full_population: bool = False,
        track_parameters: bool = False,
        track_diversity: bool = False,
    ):
        self.track_best = track_best
        self.track_median = track_median
        self.track_worst = track_worst
        self.track_full_objective = track_full_objective
        self.track_full_population = track_full_population
        self.track_diversity = track_diversity
        self.track_parameters = track_parameters

        self.best_solutions = []
        self.median_solutions = []
        self.worst_solutions = []

        self.best_objective = []
        self.median_objective = []
        self.worst_objective = []

        self.complete_population = []
        self.complete_objective = []
        self.diversity = []
        self.parameters = []

        self.recorded_iterations = []


[docs]
    def restart(self):
        """Clear all recorded data.

        Call this when an algorithm is reset to start a fresh run.
        """

        self.best_solutions = []
        self.median_solutions = []
        self.worst_solutions = []

        self.best_objective = []
        self.median_objective = []
        self.worst_objective = []

        self.complete_population = []
        self.complete_objective = []
        self.diversity = []
        self.parameters = []

        self.recorded_iterations = []



[docs]
    def update(self, algorithm: Algorithm):
        """Record metrics for the current generation.

        Parameters
        ----------
        algorithm : Algorithm
            The running algorithm from which the current population,
            fitness, objective, and parameters are extracted.
        """

        population = algorithm.population
        solutions = population.decode()
        fitness_array = population.fitness
        objective_array = population.objective
        fitness_order = np.argsort(fitness_array)

        self.recorded_iterations.append(algorithm.stopping_condition.iterations)

        if self.track_full_objective:
            self.complete_objective.append(objective_array)

        if self.track_full_population:
            self.complete_population.append(solutions)

        if self.track_best:
            best_idx = fitness_order[-1]
            self.best_solutions.append(solutions[best_idx])
            self.best_objective.append(objective_array[best_idx])

        if self.track_median:
            half_size = len(fitness_array) // 2
            if len(fitness_array) % 2 == 0:
                median_idx = fitness_order[half_size - 1]
            else:
                median_idx = fitness_order[half_size]
            self.median_solutions.append(solutions[median_idx])
            self.median_objective.append(objective_array[median_idx])

        if self.track_worst:
            worst_idx = fitness_order[0]
            self.worst_solutions.append(solutions[worst_idx])
            self.worst_objective.append(objective_array[worst_idx])

        if self.track_diversity:
            # WIP, right now we have an basic euclidean distance based metric, more flexible methods expected for next versions
            genotype_matrix = algorithm.population.genotype_matrix
            centroid = np.mean(genotype_matrix, axis=0)
            dists = np.sqrt(np.sum((genotype_matrix - centroid) ** 2, axis=1))

            self.diversity.append(np.mean(dists))

        if self.track_parameters:
            self.parameters.append(algorithm.gather_parameters())



[docs]
    def to_pandas(self) -> pd.DataFrame:
        """Return a DataFrame with per-generation summary metrics.

        Columns include ``iteration``, ``best_objective``,
        ``median_objective``, ``worst_objective``, ``diversity``,
        and one column per scheduled parameter.  The DataFrame is
        intended for easy plotting with seaborn or matplotlib.

        Returns
        -------
        pandas.DataFrame
        """

        data_dict = {"iteration": np.asarray(self.recorded_iterations)}

        if self.track_best:
            data_dict["best_objective"] = self.best_objective

        if self.track_median:
            data_dict["median_objective"] = self.median_objective

        if self.track_worst:
            data_dict["worst_objective"] = self.worst_objective

        if self.track_diversity:
            data_dict["diversity"] = self.diversity

        if self.track_parameters:
            # Let pandas do the data reordering and then get back a dictionary
            param_df = pd.DataFrame(self.parameters)
            data_dict.update(param_df.to_dict(orient="list"))

        return pd.DataFrame.from_dict(data_dict)



[docs]
    def to_pandas_full_objective(self) -> pd.DataFrame:
        """Return a wide-format DataFrame of all individual objective values.

        Each column ``Individual_0``, ``Individual_1``, ... holds the
        objective of one member of the population across generations.
        This is useful for boxplots or distribution plots of fitness.

        Returns
        -------
        pandas.DataFrame
            Empty DataFrame if *track_full_objective* was not enabled.
        """

        if not self.track_full_objective:
            logger.warning("Tried to extract the full objective history but it was not being tracked.")
            return pd.DataFrame()

        data_dict = {"iteration": np.asarray(self.recorded_iterations)}
        complete_objective_arr = np.asarray(self.complete_objective)
        data_dict.update(
            {f"Individual_{idx:d}": objective_values for idx, objective_values in enumerate(complete_objective_arr.T)}  # Iterates for each individual
        )

        return pd.DataFrame.from_dict(data_dict)



[docs]
    def get_state(self) -> dict:
        """Return a dictionary containing the recorded history.

        Returns
        -------
        dict
            Keys include ``best_objective``, ``best_solutions``, etc.
            Only metrics that were enabled are present.
        """

        data = {
            "class_name": self.__class__.__name__,
        }

        if self.track_best:
            data["best_solutions"] = self.best_solutions
            data["best_objective"] = self.best_objective

        if self.track_median:
            data["median_solutions"] = self.median_solutions
            data["median_objective"] = self.median_objective

        if self.track_worst:
            data["worst_solutions"] = self.worst_solutions
            data["worst_objective"] = self.worst_objective

        if self.complete_objective:
            data["complete_objectives"] = self.complete_objective

        if self.track_full_population:
            data["populations"] = self.complete_population

        if self.track_diversity:
            data["diversity"] = self.diversity

        return data