123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464 |
- /*
- * Copyright Nick Thompson, 2024
- * Use, modification and distribution are subject to the
- * Boost Software License, Version 1.0. (See accompanying file
- * LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
- */
- #ifndef BOOST_MATH_OPTIMIZATION_JSO_HPP
- #define BOOST_MATH_OPTIMIZATION_JSO_HPP
- #include <atomic>
- #include <boost/math/optimization/detail/common.hpp>
- #include <cmath>
- #include <iostream>
- #include <limits>
- #include <random>
- #include <sstream>
- #include <stdexcept>
- #include <thread>
- #include <type_traits>
- #include <utility>
- #include <vector>
- namespace boost::math::optimization {
- #ifndef BOOST_MATH_DEBUG_JSO
- #define BOOST_MATH_DEBUG_JSO 0
- #endif
- // Follows: Brest, Janez, Mirjam Sepesy Maucec, and Borko Boskovic. "Single
- // objective real-parameter optimization: Algorithm jSO." 2017 IEEE congress on
- // evolutionary computation (CEC). IEEE, 2017. In the sequel, this wil be
- // referred to as "the reference". Note that the reference is rather difficult
- // to understand without also reading: Zhang, J., & Sanderson, A. C. (2009).
- // JADE: adaptive differential evolution with optional external archive.
- // IEEE Transactions on evolutionary computation, 13(5), 945-958."
- template <typename ArgumentContainer> struct jso_parameters {
- using Real = typename ArgumentContainer::value_type;
- using DimensionlessReal = decltype(Real()/Real());
- ArgumentContainer lower_bounds;
- ArgumentContainer upper_bounds;
- // Population in the first generation.
- // This defaults to 0, which indicates "use the default specified in the
- // referenced paper". To wit, initial population size
- // =ceil(25log(D+1)sqrt(D)), where D is the dimension of the problem.
- size_t initial_population_size = 0;
- // Maximum number of function evaluations.
- // The default of 0 indicates "use max_function_evaluations = 10,000D", where
- // D is the dimension of the problem.
- size_t max_function_evaluations = 0;
- size_t threads = std::thread::hardware_concurrency();
- ArgumentContainer const *initial_guess = nullptr;
- };
- template <typename ArgumentContainer>
- void validate_jso_parameters(jso_parameters<ArgumentContainer> &jso_params) {
- using std::isfinite;
- using std::isnan;
- std::ostringstream oss;
- if (jso_params.threads == 0) {
- oss << __FILE__ << ":" << __LINE__ << ":" << __func__;
- oss << ": Requested zero threads to perform the calculation, but at least "
- "1 is required.";
- throw std::invalid_argument(oss.str());
- }
- detail::validate_bounds(jso_params.lower_bounds, jso_params.upper_bounds);
- auto const dimension = jso_params.lower_bounds.size();
- if (jso_params.initial_population_size == 0) {
- // Ever so slightly different than the reference-the dimension can be 1,
- // but if we followed the reference, the population size would then be zero.
- jso_params.initial_population_size = static_cast<size_t>(
- std::ceil(25 * std::log(dimension + 1.0) * sqrt(dimension)));
- }
- if (jso_params.max_function_evaluations == 0) {
- // Recommended value from the reference:
- jso_params.max_function_evaluations = 10000 * dimension;
- }
- if (jso_params.initial_population_size < 4) {
- oss << __FILE__ << ":" << __LINE__ << ":" << __func__;
- oss << ": The population size must be at least 4, but requested population "
- "size of "
- << jso_params.initial_population_size << ".";
- throw std::invalid_argument(oss.str());
- }
- if (jso_params.threads > jso_params.initial_population_size) {
- oss << __FILE__ << ":" << __LINE__ << ":" << __func__;
- oss << ": There must be more individuals in the population than threads.";
- throw std::invalid_argument(oss.str());
- }
- if (jso_params.initial_guess) {
- detail::validate_initial_guess(*jso_params.initial_guess,
- jso_params.lower_bounds,
- jso_params.upper_bounds);
- }
- }
- template <typename ArgumentContainer, class Func, class URBG>
- ArgumentContainer
- jso(const Func cost_function, jso_parameters<ArgumentContainer> &jso_params,
- URBG &gen,
- std::invoke_result_t<Func, ArgumentContainer> target_value =
- std::numeric_limits<
- std::invoke_result_t<Func, ArgumentContainer>>::quiet_NaN(),
- std::atomic<bool> *cancellation = nullptr,
- std::atomic<std::invoke_result_t<Func, ArgumentContainer>>
- *current_minimum_cost = nullptr,
- std::vector<std::pair<ArgumentContainer,
- std::invoke_result_t<Func, ArgumentContainer>>>
- *queries = nullptr) {
- using Real = typename ArgumentContainer::value_type;
- using DimensionlessReal = decltype(Real()/Real());
- validate_jso_parameters(jso_params);
- using ResultType = std::invoke_result_t<Func, ArgumentContainer>;
- using std::abs;
- using std::cauchy_distribution;
- using std::clamp;
- using std::isnan;
- using std::max;
- using std::round;
- using std::isfinite;
- using std::uniform_real_distribution;
- // Refer to the referenced paper, pseudocode on page 1313:
- // Algorithm 1, jSO, Line 1:
- std::vector<ArgumentContainer> archive;
- // Algorithm 1, jSO, Line 2
- // "Initialize population P_g = (x_0,g, ..., x_{np-1},g) randomly.
- auto population = detail::random_initial_population(
- jso_params.lower_bounds, jso_params.upper_bounds,
- jso_params.initial_population_size, gen);
- if (jso_params.initial_guess) {
- population[0] = *jso_params.initial_guess;
- }
- size_t dimension = jso_params.lower_bounds.size();
- // Don't force the user to initialize to sane value:
- if (current_minimum_cost) {
- *current_minimum_cost = std::numeric_limits<ResultType>::infinity();
- }
- std::atomic<bool> target_attained = false;
- std::vector<ResultType> cost(jso_params.initial_population_size,
- std::numeric_limits<ResultType>::quiet_NaN());
- for (size_t i = 0; i < cost.size(); ++i) {
- cost[i] = cost_function(population[i]);
- if (!isnan(target_value) && cost[i] <= target_value) {
- target_attained = true;
- }
- if (current_minimum_cost && cost[i] < *current_minimum_cost) {
- *current_minimum_cost = cost[i];
- }
- if (queries) {
- queries->push_back(std::make_pair(population[i], cost[i]));
- }
- }
- std::vector<size_t> indices = detail::best_indices(cost);
- std::atomic<size_t> function_evaluations = population.size();
- #if BOOST_MATH_DEBUG_JSO
- {
- auto const &min_cost = cost[indices[0]];
- auto const &location = population[indices[0]];
- std::cout << __FILE__ << ":" << __LINE__ << ":" << __func__;
- std::cout << "\n\tMinimum cost after evaluation of initial random "
- "population of size "
- << population.size() << " is " << min_cost << "."
- << "\n\tLocation {";
- for (size_t i = 0; i < location.size() - 1; ++i) {
- std::cout << location[i] << ", ";
- }
- std::cout << location.back() << "}.\n";
- }
- #endif
- // Algorithm 1: jSO algorithm, Line 3:
- // "Set all values in M_F to 0.5":
- // I believe this is a typo: Compare with "Section B. Experimental Results",
- // last bullet, which claims this should be set to 0.3. The reference
- // implementation also does 0.3:
- size_t H = 5;
- std::vector<DimensionlessReal> M_F(H, static_cast<DimensionlessReal>(0.3));
- // Algorithm 1: jSO algorithm, Line 4:
- // "Set all values in M_CR to 0.8":
- std::vector<DimensionlessReal> M_CR(H, static_cast<DimensionlessReal>(0.8));
- std::uniform_real_distribution<DimensionlessReal> unif01(DimensionlessReal(0), DimensionlessReal(1));
- bool keep_going = !target_attained;
- if (cancellation && *cancellation) {
- keep_going = false;
- }
- // k from:
- // http://metahack.org/CEC2014-Tanabe-Fukunaga.pdf, Algorithm 1:
- // Determines where Lehmer means are stored in the memory:
- size_t k = 0;
- size_t minimum_population_size = (max)(size_t(4), size_t(jso_params.threads));
- while (keep_going) {
- // Algorithm 1, jSO, Line 7:
- std::vector<DimensionlessReal> S_CR;
- std::vector<DimensionlessReal> S_F;
- // Equation 9 of L-SHADE:
- std::vector<ResultType> delta_f;
- for (size_t i = 0; i < population.size(); ++i) {
- // Algorithm 1, jSO, Line 9:
- auto ri = gen() % H;
- // Algorithm 1, jSO, Line 10-13:
- // Again, what is written in the pseudocode is not quite right.
- // What they mean is mu_F = 0.9-the historical memory is not used.
- // I confess I find it weird to store the historical memory if we're just
- // gonna ignore it, but that's what the paper and the reference
- // implementation says!
- DimensionlessReal mu_F = static_cast<DimensionlessReal>(0.9);
- DimensionlessReal mu_CR = static_cast<DimensionlessReal>(0.9);
- if (ri != H - 1) {
- mu_F = M_F[ri];
- mu_CR = M_CR[ri];
- }
- // Algorithm 1, jSO, Line 14-18:
- DimensionlessReal crossover_probability = static_cast<DimensionlessReal>(0);
- if (mu_CR >= 0) {
- using std::normal_distribution;
- normal_distribution<DimensionlessReal> normal(mu_CR, static_cast<DimensionlessReal>(0.1));
- crossover_probability = normal(gen);
- // Clamp comes from L-SHADE description:
- crossover_probability = clamp(crossover_probability, DimensionlessReal(0), DimensionlessReal(1));
- }
- // Algorithm 1, jSO, Line 19-23:
- // Note that the pseudocode uses a "max_generations parameter",
- // but the reference implementation does not.
- // Since we already require specification of max_function_evaluations,
- // the pseudocode adds an unnecessary parameter.
- if (4 * function_evaluations < jso_params.max_function_evaluations) {
- crossover_probability = (max)(crossover_probability, DimensionlessReal(0.7));
- } else if (2 * function_evaluations <
- jso_params.max_function_evaluations) {
- crossover_probability = (max)(crossover_probability, DimensionlessReal(0.6));
- }
- // Algorithm 1, jSO, Line 24-27:
- // Note the adjustments to the pseudocode given in the reference
- // implementation.
- cauchy_distribution<DimensionlessReal> cauchy(mu_F, static_cast<DimensionlessReal>(0.1));
- DimensionlessReal F;
- do {
- F = cauchy(gen);
- if (F > 1) {
- F = 1;
- }
- } while (F <= 0);
- DimensionlessReal threshold = static_cast<DimensionlessReal>(7) / static_cast<DimensionlessReal>(10);
- if ((10 * function_evaluations <
- 6 * jso_params.max_function_evaluations) &&
- (F > threshold)) {
- F = threshold;
- }
- // > p value for mutation strategy linearly decreases from pmax to pmin
- // during the evolutionary process, > where pmax = 0.25 in jSO and pmin =
- // pmax/2.
- DimensionlessReal p = DimensionlessReal(0.25) * (1 - static_cast<DimensionlessReal>(function_evaluations) /
- (2 * jso_params.max_function_evaluations));
- // Equation (4) of the reference:
- DimensionlessReal Fw = static_cast<DimensionlessReal>(1.2) * F;
- if (10 * function_evaluations < 4 * jso_params.max_function_evaluations) {
- if (10 * function_evaluations <
- 2 * jso_params.max_function_evaluations) {
- Fw = static_cast<DimensionlessReal>(0.7) * F;
- } else {
- Fw = static_cast<DimensionlessReal>(0.8) * F;
- }
- }
- // Algorithm 1, jSO, Line 28:
- // "ui,g := current-to-pBest-w/1/bin using Eq. (3)"
- // This is not explained in the reference, but "current-to-pBest"
- // strategy means "select randomly among the best values available."
- // See:
- // Zhang, J., & Sanderson, A. C. (2009).
- // JADE: adaptive differential evolution with optional external archive.
- // IEEE Transactions on evolutionary computation, 13(5), 945-958.
- // > As a generalization of DE/current-to- best,
- // > DE/current-to-pbest utilizes not only the best solution information
- // > but also the information of other good solutions.
- // > To be specific, any of the top 100p%, p in (0, 1],
- // > solutions can be randomly chosen in DE/current-to-pbest to play the
- // role > designed exclusively for the single best solution in
- // DE/current-to-best."
- size_t max_p_index = static_cast<size_t>(std::ceil(p * indices.size()));
- size_t p_best_idx = gen() % max_p_index;
- // We now need r1, r2 so that r1 != r2 != i:
- size_t r1;
- do {
- r1 = gen() % population.size();
- } while (r1 == i);
- size_t r2;
- do {
- r2 = gen() % (population.size() + archive.size());
- } while (r2 == r1 || r2 == i);
- ArgumentContainer trial_vector;
- if constexpr (detail::has_resize_v<ArgumentContainer>) {
- trial_vector.resize(dimension);
- }
- auto const &xi = population[i];
- auto const &xr1 = population[r1];
- ArgumentContainer xr2;
- if (r2 < population.size()) {
- xr2 = population[r2];
- } else {
- xr2 = archive[r2 - population.size()];
- }
- auto const &x_p_best = population[p_best_idx];
- for (size_t j = 0; j < dimension; ++j) {
- auto guaranteed_changed_idx = gen() % dimension;
- if (unif01(gen) < crossover_probability ||
- j == guaranteed_changed_idx) {
- auto tmp = xi[j] + Fw * (x_p_best[j] - xi[j]) + F * (xr1[j] - xr2[j]);
- auto const &lb = jso_params.lower_bounds[j];
- auto const &ub = jso_params.upper_bounds[j];
- // Some others recommend regenerating the indices rather than
- // clamping; I dunno seems like it could get stuck regenerating . . .
- // Another suggestion is provided in:
- // "JADE: Adaptive Differential Evolution with Optional External
- // Archive" page 947. Perhaps we should implement it!
- trial_vector[j] = clamp(tmp, lb, ub);
- } else {
- trial_vector[j] = population[i][j];
- }
- }
- auto trial_cost = cost_function(trial_vector);
- function_evaluations++;
- if (isnan(trial_cost)) {
- continue;
- }
- if (queries) {
- queries->push_back(std::make_pair(trial_vector, trial_cost));
- }
- // Successful trial:
- if (trial_cost < cost[i] || isnan(cost[i])) {
- if (!isnan(target_value) && trial_cost <= target_value) {
- target_attained = true;
- }
- if (current_minimum_cost && trial_cost < *current_minimum_cost) {
- *current_minimum_cost = trial_cost;
- }
- // Can't decide on improvement if the previous evaluation was a NaN:
- if (!isnan(cost[i])) {
- if (crossover_probability > 1 || crossover_probability < 0) {
- throw std::domain_error("Crossover probability is weird.");
- }
- if (F > 1 || F < 0) {
- throw std::domain_error("Scale factor (F) is weird.");
- }
- S_CR.push_back(crossover_probability);
- S_F.push_back(F);
- delta_f.push_back(abs(cost[i] - trial_cost));
- }
- // Build the historical archive:
- if (archive.size() < cost.size()) {
- archive.push_back(trial_vector);
- } else {
- // If it's already built, then put the successful trial in a random index:
- archive.resize(cost.size());
- auto idx = gen() % archive.size();
- archive[idx] = trial_vector;
- }
- cost[i] = trial_cost;
- population[i] = trial_vector;
- }
- }
- indices = detail::best_indices(cost);
- // If there are no successful updates this generation, we do not update the
- // historical memory:
- if (S_CR.size() > 0) {
- std::vector<DimensionlessReal> weights(S_CR.size(),
- std::numeric_limits<DimensionlessReal>::quiet_NaN());
- ResultType delta_sum = static_cast<ResultType>(0);
- for (auto const &delta : delta_f) {
- delta_sum += delta;
- }
- if (delta_sum <= 0 || !isfinite(delta_sum)) {
- std::ostringstream oss;
- oss << __FILE__ << ":" << __LINE__ << ":" << __func__;
- oss << "\n\tYou've hit a bug: The sum of improvements must be strictly "
- "positive, but got "
- << delta_sum << " improvement from " << S_CR.size()
- << " successful updates.\n";
- oss << "\tImprovements: {" << std::hexfloat;
- for (size_t l = 0; l < delta_f.size() -1; ++l) {
- oss << delta_f[l] << ", ";
- }
- oss << delta_f.back() << "}.\n";
- throw std::logic_error(oss.str());
- }
- for (size_t i = 0; i < weights.size(); ++i) {
- weights[i] = delta_f[i] / delta_sum;
- }
- M_CR[k] = detail::weighted_lehmer_mean(S_CR, weights);
- M_F[k] = detail::weighted_lehmer_mean(S_F, weights);
- }
- k++;
- if (k == M_F.size()) {
- k = 0;
- }
- if (target_attained) {
- break;
- }
- if (cancellation && *cancellation) {
- break;
- }
- if (function_evaluations >= jso_params.max_function_evaluations) {
- break;
- }
- // Linear population size reduction:
- size_t new_population_size = static_cast<size_t>(round(
- -double(jso_params.initial_population_size - minimum_population_size) *
- function_evaluations /
- static_cast<double>(jso_params.max_function_evaluations) +
- jso_params.initial_population_size));
- size_t num_erased = population.size() - new_population_size;
- std::vector<size_t> indices_to_erase(num_erased);
- size_t j = 0;
- for (size_t i = population.size() - 1; i >= new_population_size; --i) {
- indices_to_erase[j++] = indices[i];
- }
- std::sort(indices_to_erase.rbegin(), indices_to_erase.rend());
- for (auto const &idx : indices_to_erase) {
- population.erase(population.begin() + idx);
- cost.erase(cost.begin() + idx);
- }
- indices.resize(new_population_size);
- }
- #if BOOST_MATH_DEBUG_JSO
- {
- auto const &min_cost = cost[indices[0]];
- auto const &location = population[indices[0]];
- std::cout << __FILE__ << ":" << __LINE__ << ":" << __func__;
- std::cout << "\n\tMinimum cost after completion is " << min_cost
- << ".\n\tLocation: {";
- for (size_t i = 0; i < location.size() - 1; ++i) {
- std::cout << location[i] << ", ";
- }
- std::cout << location.back() << "}.\n";
- std::cout << "\tRequired " << function_evaluations
- << " function calls out of "
- << jso_params.max_function_evaluations << " allowed.\n";
- if (target_attained) {
- std::cout << "\tReason for return: Target value attained.\n";
- }
- std::cout << "\tHistorical crossover probabilities (M_CR): {";
- for (size_t i = 0; i < M_CR.size() - 1; ++i) {
- std::cout << M_CR[i] << ", ";
- }
- std::cout << M_CR.back() << "}.\n";
- std::cout << "\tHistorical scale factors (M_F): {";
- for (size_t i = 0; i < M_F.size() - 1; ++i) {
- std::cout << M_F[i] << ", ";
- }
- std::cout << M_F.back() << "}.\n";
- std::cout << "\tFinal population size: " << population.size() << ".\n";
- }
- #endif
- return population[indices[0]];
- }
- } // namespace boost::math::optimization
- #endif
|