jso.hpp 19 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464
  1. /*
  2. * Copyright Nick Thompson, 2024
  3. * Use, modification and distribution are subject to the
  4. * Boost Software License, Version 1.0. (See accompanying file
  5. * LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
  6. */
  7. #ifndef BOOST_MATH_OPTIMIZATION_JSO_HPP
  8. #define BOOST_MATH_OPTIMIZATION_JSO_HPP
  9. #include <atomic>
  10. #include <boost/math/optimization/detail/common.hpp>
  11. #include <cmath>
  12. #include <iostream>
  13. #include <limits>
  14. #include <random>
  15. #include <sstream>
  16. #include <stdexcept>
  17. #include <thread>
  18. #include <type_traits>
  19. #include <utility>
  20. #include <vector>
  21. namespace boost::math::optimization {
  22. #ifndef BOOST_MATH_DEBUG_JSO
  23. #define BOOST_MATH_DEBUG_JSO 0
  24. #endif
  25. // Follows: Brest, Janez, Mirjam Sepesy Maucec, and Borko Boskovic. "Single
  26. // objective real-parameter optimization: Algorithm jSO." 2017 IEEE congress on
  27. // evolutionary computation (CEC). IEEE, 2017. In the sequel, this wil be
  28. // referred to as "the reference". Note that the reference is rather difficult
  29. // to understand without also reading: Zhang, J., & Sanderson, A. C. (2009).
  30. // JADE: adaptive differential evolution with optional external archive.
  31. // IEEE Transactions on evolutionary computation, 13(5), 945-958."
  32. template <typename ArgumentContainer> struct jso_parameters {
  33. using Real = typename ArgumentContainer::value_type;
  34. using DimensionlessReal = decltype(Real()/Real());
  35. ArgumentContainer lower_bounds;
  36. ArgumentContainer upper_bounds;
  37. // Population in the first generation.
  38. // This defaults to 0, which indicates "use the default specified in the
  39. // referenced paper". To wit, initial population size
  40. // =ceil(25log(D+1)sqrt(D)), where D is the dimension of the problem.
  41. size_t initial_population_size = 0;
  42. // Maximum number of function evaluations.
  43. // The default of 0 indicates "use max_function_evaluations = 10,000D", where
  44. // D is the dimension of the problem.
  45. size_t max_function_evaluations = 0;
  46. size_t threads = std::thread::hardware_concurrency();
  47. ArgumentContainer const *initial_guess = nullptr;
  48. };
  49. template <typename ArgumentContainer>
  50. void validate_jso_parameters(jso_parameters<ArgumentContainer> &jso_params) {
  51. using std::isfinite;
  52. using std::isnan;
  53. std::ostringstream oss;
  54. if (jso_params.threads == 0) {
  55. oss << __FILE__ << ":" << __LINE__ << ":" << __func__;
  56. oss << ": Requested zero threads to perform the calculation, but at least "
  57. "1 is required.";
  58. throw std::invalid_argument(oss.str());
  59. }
  60. detail::validate_bounds(jso_params.lower_bounds, jso_params.upper_bounds);
  61. auto const dimension = jso_params.lower_bounds.size();
  62. if (jso_params.initial_population_size == 0) {
  63. // Ever so slightly different than the reference-the dimension can be 1,
  64. // but if we followed the reference, the population size would then be zero.
  65. jso_params.initial_population_size = static_cast<size_t>(
  66. std::ceil(25 * std::log(dimension + 1.0) * sqrt(dimension)));
  67. }
  68. if (jso_params.max_function_evaluations == 0) {
  69. // Recommended value from the reference:
  70. jso_params.max_function_evaluations = 10000 * dimension;
  71. }
  72. if (jso_params.initial_population_size < 4) {
  73. oss << __FILE__ << ":" << __LINE__ << ":" << __func__;
  74. oss << ": The population size must be at least 4, but requested population "
  75. "size of "
  76. << jso_params.initial_population_size << ".";
  77. throw std::invalid_argument(oss.str());
  78. }
  79. if (jso_params.threads > jso_params.initial_population_size) {
  80. oss << __FILE__ << ":" << __LINE__ << ":" << __func__;
  81. oss << ": There must be more individuals in the population than threads.";
  82. throw std::invalid_argument(oss.str());
  83. }
  84. if (jso_params.initial_guess) {
  85. detail::validate_initial_guess(*jso_params.initial_guess,
  86. jso_params.lower_bounds,
  87. jso_params.upper_bounds);
  88. }
  89. }
  90. template <typename ArgumentContainer, class Func, class URBG>
  91. ArgumentContainer
  92. jso(const Func cost_function, jso_parameters<ArgumentContainer> &jso_params,
  93. URBG &gen,
  94. std::invoke_result_t<Func, ArgumentContainer> target_value =
  95. std::numeric_limits<
  96. std::invoke_result_t<Func, ArgumentContainer>>::quiet_NaN(),
  97. std::atomic<bool> *cancellation = nullptr,
  98. std::atomic<std::invoke_result_t<Func, ArgumentContainer>>
  99. *current_minimum_cost = nullptr,
  100. std::vector<std::pair<ArgumentContainer,
  101. std::invoke_result_t<Func, ArgumentContainer>>>
  102. *queries = nullptr) {
  103. using Real = typename ArgumentContainer::value_type;
  104. using DimensionlessReal = decltype(Real()/Real());
  105. validate_jso_parameters(jso_params);
  106. using ResultType = std::invoke_result_t<Func, ArgumentContainer>;
  107. using std::abs;
  108. using std::cauchy_distribution;
  109. using std::clamp;
  110. using std::isnan;
  111. using std::max;
  112. using std::round;
  113. using std::isfinite;
  114. using std::uniform_real_distribution;
  115. // Refer to the referenced paper, pseudocode on page 1313:
  116. // Algorithm 1, jSO, Line 1:
  117. std::vector<ArgumentContainer> archive;
  118. // Algorithm 1, jSO, Line 2
  119. // "Initialize population P_g = (x_0,g, ..., x_{np-1},g) randomly.
  120. auto population = detail::random_initial_population(
  121. jso_params.lower_bounds, jso_params.upper_bounds,
  122. jso_params.initial_population_size, gen);
  123. if (jso_params.initial_guess) {
  124. population[0] = *jso_params.initial_guess;
  125. }
  126. size_t dimension = jso_params.lower_bounds.size();
  127. // Don't force the user to initialize to sane value:
  128. if (current_minimum_cost) {
  129. *current_minimum_cost = std::numeric_limits<ResultType>::infinity();
  130. }
  131. std::atomic<bool> target_attained = false;
  132. std::vector<ResultType> cost(jso_params.initial_population_size,
  133. std::numeric_limits<ResultType>::quiet_NaN());
  134. for (size_t i = 0; i < cost.size(); ++i) {
  135. cost[i] = cost_function(population[i]);
  136. if (!isnan(target_value) && cost[i] <= target_value) {
  137. target_attained = true;
  138. }
  139. if (current_minimum_cost && cost[i] < *current_minimum_cost) {
  140. *current_minimum_cost = cost[i];
  141. }
  142. if (queries) {
  143. queries->push_back(std::make_pair(population[i], cost[i]));
  144. }
  145. }
  146. std::vector<size_t> indices = detail::best_indices(cost);
  147. std::atomic<size_t> function_evaluations = population.size();
  148. #if BOOST_MATH_DEBUG_JSO
  149. {
  150. auto const &min_cost = cost[indices[0]];
  151. auto const &location = population[indices[0]];
  152. std::cout << __FILE__ << ":" << __LINE__ << ":" << __func__;
  153. std::cout << "\n\tMinimum cost after evaluation of initial random "
  154. "population of size "
  155. << population.size() << " is " << min_cost << "."
  156. << "\n\tLocation {";
  157. for (size_t i = 0; i < location.size() - 1; ++i) {
  158. std::cout << location[i] << ", ";
  159. }
  160. std::cout << location.back() << "}.\n";
  161. }
  162. #endif
  163. // Algorithm 1: jSO algorithm, Line 3:
  164. // "Set all values in M_F to 0.5":
  165. // I believe this is a typo: Compare with "Section B. Experimental Results",
  166. // last bullet, which claims this should be set to 0.3. The reference
  167. // implementation also does 0.3:
  168. size_t H = 5;
  169. std::vector<DimensionlessReal> M_F(H, static_cast<DimensionlessReal>(0.3));
  170. // Algorithm 1: jSO algorithm, Line 4:
  171. // "Set all values in M_CR to 0.8":
  172. std::vector<DimensionlessReal> M_CR(H, static_cast<DimensionlessReal>(0.8));
  173. std::uniform_real_distribution<DimensionlessReal> unif01(DimensionlessReal(0), DimensionlessReal(1));
  174. bool keep_going = !target_attained;
  175. if (cancellation && *cancellation) {
  176. keep_going = false;
  177. }
  178. // k from:
  179. // http://metahack.org/CEC2014-Tanabe-Fukunaga.pdf, Algorithm 1:
  180. // Determines where Lehmer means are stored in the memory:
  181. size_t k = 0;
  182. size_t minimum_population_size = (max)(size_t(4), size_t(jso_params.threads));
  183. while (keep_going) {
  184. // Algorithm 1, jSO, Line 7:
  185. std::vector<DimensionlessReal> S_CR;
  186. std::vector<DimensionlessReal> S_F;
  187. // Equation 9 of L-SHADE:
  188. std::vector<ResultType> delta_f;
  189. for (size_t i = 0; i < population.size(); ++i) {
  190. // Algorithm 1, jSO, Line 9:
  191. auto ri = gen() % H;
  192. // Algorithm 1, jSO, Line 10-13:
  193. // Again, what is written in the pseudocode is not quite right.
  194. // What they mean is mu_F = 0.9-the historical memory is not used.
  195. // I confess I find it weird to store the historical memory if we're just
  196. // gonna ignore it, but that's what the paper and the reference
  197. // implementation says!
  198. DimensionlessReal mu_F = static_cast<DimensionlessReal>(0.9);
  199. DimensionlessReal mu_CR = static_cast<DimensionlessReal>(0.9);
  200. if (ri != H - 1) {
  201. mu_F = M_F[ri];
  202. mu_CR = M_CR[ri];
  203. }
  204. // Algorithm 1, jSO, Line 14-18:
  205. DimensionlessReal crossover_probability = static_cast<DimensionlessReal>(0);
  206. if (mu_CR >= 0) {
  207. using std::normal_distribution;
  208. normal_distribution<DimensionlessReal> normal(mu_CR, static_cast<DimensionlessReal>(0.1));
  209. crossover_probability = normal(gen);
  210. // Clamp comes from L-SHADE description:
  211. crossover_probability = clamp(crossover_probability, DimensionlessReal(0), DimensionlessReal(1));
  212. }
  213. // Algorithm 1, jSO, Line 19-23:
  214. // Note that the pseudocode uses a "max_generations parameter",
  215. // but the reference implementation does not.
  216. // Since we already require specification of max_function_evaluations,
  217. // the pseudocode adds an unnecessary parameter.
  218. if (4 * function_evaluations < jso_params.max_function_evaluations) {
  219. crossover_probability = (max)(crossover_probability, DimensionlessReal(0.7));
  220. } else if (2 * function_evaluations <
  221. jso_params.max_function_evaluations) {
  222. crossover_probability = (max)(crossover_probability, DimensionlessReal(0.6));
  223. }
  224. // Algorithm 1, jSO, Line 24-27:
  225. // Note the adjustments to the pseudocode given in the reference
  226. // implementation.
  227. cauchy_distribution<DimensionlessReal> cauchy(mu_F, static_cast<DimensionlessReal>(0.1));
  228. DimensionlessReal F;
  229. do {
  230. F = cauchy(gen);
  231. if (F > 1) {
  232. F = 1;
  233. }
  234. } while (F <= 0);
  235. DimensionlessReal threshold = static_cast<DimensionlessReal>(7) / static_cast<DimensionlessReal>(10);
  236. if ((10 * function_evaluations <
  237. 6 * jso_params.max_function_evaluations) &&
  238. (F > threshold)) {
  239. F = threshold;
  240. }
  241. // > p value for mutation strategy linearly decreases from pmax to pmin
  242. // during the evolutionary process, > where pmax = 0.25 in jSO and pmin =
  243. // pmax/2.
  244. DimensionlessReal p = DimensionlessReal(0.25) * (1 - static_cast<DimensionlessReal>(function_evaluations) /
  245. (2 * jso_params.max_function_evaluations));
  246. // Equation (4) of the reference:
  247. DimensionlessReal Fw = static_cast<DimensionlessReal>(1.2) * F;
  248. if (10 * function_evaluations < 4 * jso_params.max_function_evaluations) {
  249. if (10 * function_evaluations <
  250. 2 * jso_params.max_function_evaluations) {
  251. Fw = static_cast<DimensionlessReal>(0.7) * F;
  252. } else {
  253. Fw = static_cast<DimensionlessReal>(0.8) * F;
  254. }
  255. }
  256. // Algorithm 1, jSO, Line 28:
  257. // "ui,g := current-to-pBest-w/1/bin using Eq. (3)"
  258. // This is not explained in the reference, but "current-to-pBest"
  259. // strategy means "select randomly among the best values available."
  260. // See:
  261. // Zhang, J., & Sanderson, A. C. (2009).
  262. // JADE: adaptive differential evolution with optional external archive.
  263. // IEEE Transactions on evolutionary computation, 13(5), 945-958.
  264. // > As a generalization of DE/current-to- best,
  265. // > DE/current-to-pbest utilizes not only the best solution information
  266. // > but also the information of other good solutions.
  267. // > To be specific, any of the top 100p%, p in (0, 1],
  268. // > solutions can be randomly chosen in DE/current-to-pbest to play the
  269. // role > designed exclusively for the single best solution in
  270. // DE/current-to-best."
  271. size_t max_p_index = static_cast<size_t>(std::ceil(p * indices.size()));
  272. size_t p_best_idx = gen() % max_p_index;
  273. // We now need r1, r2 so that r1 != r2 != i:
  274. size_t r1;
  275. do {
  276. r1 = gen() % population.size();
  277. } while (r1 == i);
  278. size_t r2;
  279. do {
  280. r2 = gen() % (population.size() + archive.size());
  281. } while (r2 == r1 || r2 == i);
  282. ArgumentContainer trial_vector;
  283. if constexpr (detail::has_resize_v<ArgumentContainer>) {
  284. trial_vector.resize(dimension);
  285. }
  286. auto const &xi = population[i];
  287. auto const &xr1 = population[r1];
  288. ArgumentContainer xr2;
  289. if (r2 < population.size()) {
  290. xr2 = population[r2];
  291. } else {
  292. xr2 = archive[r2 - population.size()];
  293. }
  294. auto const &x_p_best = population[p_best_idx];
  295. for (size_t j = 0; j < dimension; ++j) {
  296. auto guaranteed_changed_idx = gen() % dimension;
  297. if (unif01(gen) < crossover_probability ||
  298. j == guaranteed_changed_idx) {
  299. auto tmp = xi[j] + Fw * (x_p_best[j] - xi[j]) + F * (xr1[j] - xr2[j]);
  300. auto const &lb = jso_params.lower_bounds[j];
  301. auto const &ub = jso_params.upper_bounds[j];
  302. // Some others recommend regenerating the indices rather than
  303. // clamping; I dunno seems like it could get stuck regenerating . . .
  304. // Another suggestion is provided in:
  305. // "JADE: Adaptive Differential Evolution with Optional External
  306. // Archive" page 947. Perhaps we should implement it!
  307. trial_vector[j] = clamp(tmp, lb, ub);
  308. } else {
  309. trial_vector[j] = population[i][j];
  310. }
  311. }
  312. auto trial_cost = cost_function(trial_vector);
  313. function_evaluations++;
  314. if (isnan(trial_cost)) {
  315. continue;
  316. }
  317. if (queries) {
  318. queries->push_back(std::make_pair(trial_vector, trial_cost));
  319. }
  320. // Successful trial:
  321. if (trial_cost < cost[i] || isnan(cost[i])) {
  322. if (!isnan(target_value) && trial_cost <= target_value) {
  323. target_attained = true;
  324. }
  325. if (current_minimum_cost && trial_cost < *current_minimum_cost) {
  326. *current_minimum_cost = trial_cost;
  327. }
  328. // Can't decide on improvement if the previous evaluation was a NaN:
  329. if (!isnan(cost[i])) {
  330. if (crossover_probability > 1 || crossover_probability < 0) {
  331. throw std::domain_error("Crossover probability is weird.");
  332. }
  333. if (F > 1 || F < 0) {
  334. throw std::domain_error("Scale factor (F) is weird.");
  335. }
  336. S_CR.push_back(crossover_probability);
  337. S_F.push_back(F);
  338. delta_f.push_back(abs(cost[i] - trial_cost));
  339. }
  340. // Build the historical archive:
  341. if (archive.size() < cost.size()) {
  342. archive.push_back(trial_vector);
  343. } else {
  344. // If it's already built, then put the successful trial in a random index:
  345. archive.resize(cost.size());
  346. auto idx = gen() % archive.size();
  347. archive[idx] = trial_vector;
  348. }
  349. cost[i] = trial_cost;
  350. population[i] = trial_vector;
  351. }
  352. }
  353. indices = detail::best_indices(cost);
  354. // If there are no successful updates this generation, we do not update the
  355. // historical memory:
  356. if (S_CR.size() > 0) {
  357. std::vector<DimensionlessReal> weights(S_CR.size(),
  358. std::numeric_limits<DimensionlessReal>::quiet_NaN());
  359. ResultType delta_sum = static_cast<ResultType>(0);
  360. for (auto const &delta : delta_f) {
  361. delta_sum += delta;
  362. }
  363. if (delta_sum <= 0 || !isfinite(delta_sum)) {
  364. std::ostringstream oss;
  365. oss << __FILE__ << ":" << __LINE__ << ":" << __func__;
  366. oss << "\n\tYou've hit a bug: The sum of improvements must be strictly "
  367. "positive, but got "
  368. << delta_sum << " improvement from " << S_CR.size()
  369. << " successful updates.\n";
  370. oss << "\tImprovements: {" << std::hexfloat;
  371. for (size_t l = 0; l < delta_f.size() -1; ++l) {
  372. oss << delta_f[l] << ", ";
  373. }
  374. oss << delta_f.back() << "}.\n";
  375. throw std::logic_error(oss.str());
  376. }
  377. for (size_t i = 0; i < weights.size(); ++i) {
  378. weights[i] = delta_f[i] / delta_sum;
  379. }
  380. M_CR[k] = detail::weighted_lehmer_mean(S_CR, weights);
  381. M_F[k] = detail::weighted_lehmer_mean(S_F, weights);
  382. }
  383. k++;
  384. if (k == M_F.size()) {
  385. k = 0;
  386. }
  387. if (target_attained) {
  388. break;
  389. }
  390. if (cancellation && *cancellation) {
  391. break;
  392. }
  393. if (function_evaluations >= jso_params.max_function_evaluations) {
  394. break;
  395. }
  396. // Linear population size reduction:
  397. size_t new_population_size = static_cast<size_t>(round(
  398. -double(jso_params.initial_population_size - minimum_population_size) *
  399. function_evaluations /
  400. static_cast<double>(jso_params.max_function_evaluations) +
  401. jso_params.initial_population_size));
  402. size_t num_erased = population.size() - new_population_size;
  403. std::vector<size_t> indices_to_erase(num_erased);
  404. size_t j = 0;
  405. for (size_t i = population.size() - 1; i >= new_population_size; --i) {
  406. indices_to_erase[j++] = indices[i];
  407. }
  408. std::sort(indices_to_erase.rbegin(), indices_to_erase.rend());
  409. for (auto const &idx : indices_to_erase) {
  410. population.erase(population.begin() + idx);
  411. cost.erase(cost.begin() + idx);
  412. }
  413. indices.resize(new_population_size);
  414. }
  415. #if BOOST_MATH_DEBUG_JSO
  416. {
  417. auto const &min_cost = cost[indices[0]];
  418. auto const &location = population[indices[0]];
  419. std::cout << __FILE__ << ":" << __LINE__ << ":" << __func__;
  420. std::cout << "\n\tMinimum cost after completion is " << min_cost
  421. << ".\n\tLocation: {";
  422. for (size_t i = 0; i < location.size() - 1; ++i) {
  423. std::cout << location[i] << ", ";
  424. }
  425. std::cout << location.back() << "}.\n";
  426. std::cout << "\tRequired " << function_evaluations
  427. << " function calls out of "
  428. << jso_params.max_function_evaluations << " allowed.\n";
  429. if (target_attained) {
  430. std::cout << "\tReason for return: Target value attained.\n";
  431. }
  432. std::cout << "\tHistorical crossover probabilities (M_CR): {";
  433. for (size_t i = 0; i < M_CR.size() - 1; ++i) {
  434. std::cout << M_CR[i] << ", ";
  435. }
  436. std::cout << M_CR.back() << "}.\n";
  437. std::cout << "\tHistorical scale factors (M_F): {";
  438. for (size_t i = 0; i < M_F.size() - 1; ++i) {
  439. std::cout << M_F[i] << ", ";
  440. }
  441. std::cout << M_F.back() << "}.\n";
  442. std::cout << "\tFinal population size: " << population.size() << ".\n";
  443. }
  444. #endif
  445. return population[indices[0]];
  446. }
  447. } // namespace boost::math::optimization
  448. #endif