Source code for metaheuristic_designer.history_tracker

"""
Module for recording per-generation metrics and exporting them as pandas DataFrames.
"""

from __future__ import annotations
import logging
from typing import TYPE_CHECKING
import numpy as np
import pandas as pd

if TYPE_CHECKING:
    from .algorithm import Algorithm

logger = logging.getLogger(__name__)


[docs] class HistoryTracker: """Record per-generation metrics and export them as pandas DataFrames. The tracker is called once per generation (via :meth:`step`) and stores the requested statistics. After the run the data can be retrieved with :meth:`to_pandas` (a summary of best, median, worst, diversity, and scheduled parameters) or :meth:`to_pandas_full_objective` (the full objective vector of every individual at each generation). Parameters ---------- track_best : bool, optional Record the best objective and solution (default ``True``). track_median : bool, optional Record the median objective (default ``False``). track_worst : bool, optional Record the worst objective (default ``False``). track_full_objective : bool, optional Store the complete objective vector of the population at every generation. Enables :meth:`to_pandas_full_objective`. track_full_population : bool, optional Store the entire population (genotypes) at every generation. This can consume a lot of memory. track_parameters : bool, optional Record the current value of all scheduled parameters (e.g., mutation strength, branch probability). track_diversity : bool, optional Compute and store a simple diversity metric (average Euclidean distance from the centroid). """ def __init__( self, track_best=True, track_median=False, track_worst=False, track_full_objective=False, track_full_population=False, track_parameters=False, track_diversity=False, ): self.track_best = track_best self.track_median = track_median self.track_worst = track_worst self.track_full_objective = track_full_objective self.track_full_population = track_full_population self.track_diversity = track_diversity self.track_parameters = track_parameters self.best_solutions = [] self.median_solutions = [] self.worst_solutions = [] self.best_objective = [] self.median_objective = [] self.worst_objective = [] self.complete_population = [] self.complete_objective = [] self.diversity = [] self.parameters = [] self.recorded_iterations = []
[docs] def restart(self): """Clear all recorded data. Call this when an algorithm is reset to start a fresh run. """ self.best_solutions = [] self.median_solutions = [] self.worst_solutions = [] self.best_objective = [] self.median_objective = [] self.worst_objective = [] self.complete_population = [] self.complete_objective = [] self.diversity = [] self.parameters = [] self.recorded_iterations = []
[docs] def step(self, algorithm: Algorithm): """Record metrics for the current generation. Parameters ---------- algorithm : Algorithm The running algorithm from which the current population, fitness, objective, and parameters are extracted. """ population = algorithm.population solutions = population.decode() fitness_array = population.fitness objective_array = population.objective fitness_order = np.argsort(fitness_array) self.recorded_iterations.append(algorithm.stopping_condition.iterations) if self.track_full_objective: self.complete_objective.append(objective_array) if self.track_full_population: self.complete_population.append(solutions) if self.track_best: best_idx = fitness_order[-1] self.best_solutions.append(solutions[best_idx]) self.best_objective.append(objective_array[best_idx]) if self.track_median: half_size = len(fitness_array) // 2 if len(fitness_array) % 2 == 0: median_idx = fitness_order[half_size - 1] else: median_idx = fitness_order[half_size] self.median_solutions.append(solutions[median_idx]) self.median_objective.append(objective_array[median_idx]) if self.track_worst: worst_idx = fitness_order[0] self.worst_solutions.append(solutions[worst_idx]) self.worst_objective.append(objective_array[worst_idx]) if self.track_diversity: # WIP, right now we have an basic euclidean distance based metric, more flexible methods expected for next versions genotype_matrix = algorithm.population.genotype_matrix centroid = np.mean(genotype_matrix, axis=0) dists = np.sqrt(np.sum((genotype_matrix - centroid) ** 2, axis=1)) self.diversity.append(np.mean(dists)) if self.track_parameters: self.parameters.append(algorithm.gather_parameters())
[docs] def to_pandas(self): """Return a DataFrame with per-generation summary metrics. Columns include ``iteration``, ``best_objective``, ``median_objective``, ``worst_objective``, ``diversity``, and one column per scheduled parameter. The DataFrame is intended for easy plotting with seaborn or matplotlib. Returns ------- pandas.DataFrame """ data_dict = {"iteration": np.asarray(self.recorded_iterations)} if self.track_best: data_dict["best_objective"] = self.best_objective if self.track_median: data_dict["median_objective"] = self.median_objective if self.track_worst: data_dict["worst_objective"] = self.worst_objective if self.track_diversity: data_dict["diversity"] = self.diversity if self.track_parameters: # Let pandas do the data reordering and then get back a dictionary param_df = pd.DataFrame(self.parameters) data_dict.update(param_df.to_dict(orient="list")) return pd.DataFrame.from_dict(data_dict)
[docs] def to_pandas_full_objective(self): """Return a wide-format DataFrame of all individual objective values. Each column ``Individual_0``, ``Individual_1``, ... holds the objective of one member of the population across generations. This is useful for boxplots or distribution plots of fitness. Returns ------- pandas.DataFrame Empty DataFrame if *track_full_objective* was not enabled. """ if not self.track_full_objective: logger.warning("Tried to extract the full objective history but it was not being tracked.") return pd.DataFrame() data_dict = {"iteration": np.asarray(self.recorded_iterations)} complete_objective_arr = np.asarray(self.complete_objective) data_dict.update( {f"Individual_{idx:d}": objective_values for idx, objective_values in enumerate(complete_objective_arr.T)} # Iterates for each individual ) return pd.DataFrame.from_dict(data_dict)
[docs] def get_state(self): """Return a dictionary containing the recorded history. Returns ------- dict Keys include ``best_objective``, ``best_solutions``, etc. Only metrics that were enabled are present. """ data = { "class_name": self.__class__.__name__, } if self.track_best: data["best_solutions"] = self.best_solutions data["best_objective"] = self.best_objective if self.track_median: data["median_solutions"] = self.median_solutions data["median_objective"] = self.median_objective if self.track_worst: data["worst_solutions"] = self.worst_solutions data["worst_objective"] = self.worst_objective if self.complete_objective: data["complete_objectives"] = self.complete_objective if self.track_full_population: data["populations"] = self.complete_population if self.track_diversity: data["divesity"] = self.diversity return data