adaptive_sort.hpp 28 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654
  1. //////////////////////////////////////////////////////////////////////////////
  2. //
  3. // (C) Copyright Ion Gaztanaga 2015-2016.
  4. // Distributed under the Boost Software License, Version 1.0.
  5. // (See accompanying file LICENSE_1_0.txt or copy at
  6. // http://www.boost.org/LICENSE_1_0.txt)
  7. //
  8. // See http://www.boost.org/libs/move for documentation.
  9. //
  10. //////////////////////////////////////////////////////////////////////////////
  11. #ifndef BHO_MOVE_ADAPTIVE_SORT_HPP
  12. #define BHO_MOVE_ADAPTIVE_SORT_HPP
  13. #include <asio2/bho/move/detail/config_begin.hpp>
  14. #include <asio2/bho/move/algo/detail/adaptive_sort_merge.hpp>
  15. #include <cassert>
  16. #if defined(BHO_CLANG) || (defined(BHO_GCC) && (BHO_GCC >= 40600))
  17. #pragma GCC diagnostic push
  18. #pragma GCC diagnostic ignored "-Wsign-conversion"
  19. #endif
  20. namespace bho {
  21. namespace movelib {
  22. ///@cond
  23. namespace detail_adaptive {
  24. template<class RandIt>
  25. void move_data_backward( RandIt cur_pos
  26. , typename iter_size<RandIt>::type const l_data
  27. , RandIt new_pos
  28. , bool const xbuf_used)
  29. {
  30. //Move buffer to the total combination right
  31. if(xbuf_used){
  32. bho::move_backward(cur_pos, cur_pos+l_data, new_pos+l_data);
  33. }
  34. else{
  35. bho::adl_move_swap_ranges_backward(cur_pos, cur_pos+l_data, new_pos+l_data);
  36. //Rotate does less moves but it seems slower due to cache issues
  37. //rotate_gcd(first-l_block, first+len-l_block, first+len);
  38. }
  39. }
  40. template<class RandIt>
  41. void move_data_forward( RandIt cur_pos
  42. , typename iter_size<RandIt>::type const l_data
  43. , RandIt new_pos
  44. , bool const xbuf_used)
  45. {
  46. //Move buffer to the total combination right
  47. if(xbuf_used){
  48. bho::move(cur_pos, cur_pos+l_data, new_pos);
  49. }
  50. else{
  51. bho::adl_move_swap_ranges(cur_pos, cur_pos+l_data, new_pos);
  52. //Rotate does less moves but it seems slower due to cache issues
  53. //rotate_gcd(first-l_block, first+len-l_block, first+len);
  54. }
  55. }
  56. // build blocks of length 2*l_build_buf. l_build_buf is power of two
  57. // input: [0, l_build_buf) elements are buffer, rest unsorted elements
  58. // output: [0, l_build_buf) elements are buffer, blocks 2*l_build_buf and last subblock sorted
  59. //
  60. // First elements are merged from right to left until elements start
  61. // at first. All old elements [first, first + l_build_buf) are placed at the end
  62. // [first+len-l_build_buf, first+len). To achieve this:
  63. // - If we have external memory to merge, we save elements from the buffer
  64. // so that a non-swapping merge is used. Buffer elements are restored
  65. // at the end of the buffer from the external memory.
  66. //
  67. // - When the external memory is not available or it is insufficient
  68. // for a merge operation, left swap merging is used.
  69. //
  70. // Once elements are merged left to right in blocks of l_build_buf, then a single left
  71. // to right merge step is performed to achieve merged blocks of size 2K.
  72. // If external memory is available, usual merge is used, swap merging otherwise.
  73. //
  74. // As a last step, if auxiliary memory is available in-place merge is performed.
  75. // until all is merged or auxiliary memory is not large enough.
  76. template<class RandIt, class Compare, class XBuf>
  77. typename iter_size<RandIt>::type
  78. adaptive_sort_build_blocks
  79. ( RandIt const first
  80. , typename iter_size<RandIt>::type const len
  81. , typename iter_size<RandIt>::type const l_base
  82. , typename iter_size<RandIt>::type const l_build_buf
  83. , XBuf & xbuf
  84. , Compare comp)
  85. {
  86. typedef typename iter_size<RandIt>::type size_type;
  87. assert(l_build_buf <= len);
  88. assert(0 == ((l_build_buf / l_base)&(l_build_buf/l_base-1)));
  89. //Place the start pointer after the buffer
  90. RandIt first_block = first + l_build_buf;
  91. size_type const elements_in_blocks = size_type(len - l_build_buf);
  92. //////////////////////////////////
  93. // Start of merge to left step
  94. //////////////////////////////////
  95. size_type l_merged = 0u;
  96. assert(l_build_buf);
  97. //If there is no enough buffer for the insertion sort step, just avoid the external buffer
  98. size_type kbuf = min_value<size_type>(l_build_buf, size_type(xbuf.capacity()));
  99. kbuf = kbuf < l_base ? 0 : kbuf;
  100. if(kbuf){
  101. //Backup internal buffer values in external buffer so they can be overwritten
  102. xbuf.move_assign(first+l_build_buf-kbuf, kbuf);
  103. l_merged = op_insertion_sort_step_left(first_block, elements_in_blocks, l_base, comp, move_op());
  104. //Now combine them using the buffer. Elements from buffer can be
  105. //overwritten since they've been saved to xbuf
  106. l_merged = op_merge_left_step_multiple
  107. ( first_block - l_merged, elements_in_blocks, l_merged, l_build_buf, size_type(kbuf - l_merged), comp, move_op());
  108. //Restore internal buffer from external buffer unless kbuf was l_build_buf,
  109. //in that case restoration will happen later
  110. if(kbuf != l_build_buf){
  111. bho::move(xbuf.data()+kbuf-l_merged, xbuf.data() + kbuf, first_block-l_merged+elements_in_blocks);
  112. }
  113. }
  114. else{
  115. l_merged = insertion_sort_step(first_block, elements_in_blocks, l_base, comp);
  116. rotate_gcd(first_block-l_merged, first_block, first_block+elements_in_blocks);
  117. }
  118. //Now combine elements using the buffer. Elements from buffer can't be
  119. //overwritten since xbuf was not big enough, so merge swapping elements.
  120. l_merged = op_merge_left_step_multiple
  121. (first_block-l_merged, elements_in_blocks, l_merged, l_build_buf, size_type(l_build_buf - l_merged), comp, swap_op());
  122. assert(l_merged == l_build_buf);
  123. //////////////////////////////////
  124. // Start of merge to right step
  125. //////////////////////////////////
  126. //If kbuf is l_build_buf then we can merge right without swapping
  127. //Saved data is still in xbuf
  128. if(kbuf && kbuf == l_build_buf){
  129. op_merge_right_step_once(first, elements_in_blocks, l_build_buf, comp, move_op());
  130. //Restore internal buffer from external buffer if kbuf was l_build_buf.
  131. //as this operation was previously delayed.
  132. bho::move(xbuf.data(), xbuf.data() + kbuf, first);
  133. }
  134. else{
  135. op_merge_right_step_once(first, elements_in_blocks, l_build_buf, comp, swap_op());
  136. }
  137. xbuf.clear();
  138. //2*l_build_buf or total already merged
  139. return min_value<size_type>(elements_in_blocks, size_type(2u*l_build_buf));
  140. }
  141. template<class RandItKeys, class KeyCompare, class RandIt, class Compare, class XBuf>
  142. void adaptive_sort_combine_blocks
  143. ( RandItKeys const keys
  144. , KeyCompare key_comp
  145. , RandIt const first
  146. , typename iter_size<RandIt>::type const len
  147. , typename iter_size<RandIt>::type const l_prev_merged
  148. , typename iter_size<RandIt>::type const l_block
  149. , bool const use_buf
  150. , bool const xbuf_used
  151. , XBuf & xbuf
  152. , Compare comp
  153. , bool merge_left)
  154. {
  155. bho::movelib::ignore(xbuf);
  156. typedef typename iter_size<RandIt>::type size_type;
  157. size_type const l_reg_combined = size_type(2u*l_prev_merged);
  158. size_type l_irreg_combined = 0;
  159. size_type const l_total_combined = calculate_total_combined(len, l_prev_merged, &l_irreg_combined);
  160. size_type const n_reg_combined = len/l_reg_combined;
  161. RandIt combined_first = first;
  162. bho::movelib::ignore(l_total_combined);
  163. assert(l_total_combined <= len);
  164. size_type const max_i = size_type(n_reg_combined + (l_irreg_combined != 0));
  165. if(merge_left || !use_buf) {
  166. for( size_type combined_i = 0; combined_i != max_i; ) {
  167. //Now merge blocks
  168. bool const is_last = combined_i==n_reg_combined;
  169. size_type const l_cur_combined = is_last ? l_irreg_combined : l_reg_combined;
  170. range_xbuf<RandIt, size_type, move_op> rbuf( (use_buf && xbuf_used) ? (combined_first-l_block) : combined_first, combined_first);
  171. size_type n_block_a, n_block_b, l_irreg1, l_irreg2;
  172. combine_params( keys, key_comp, l_cur_combined
  173. , l_prev_merged, l_block, rbuf
  174. , n_block_a, n_block_b, l_irreg1, l_irreg2); //Outputs
  175. BHO_MOVE_ADAPTIVE_SORT_PRINT_L2(" A combpar: ", len + l_block);
  176. BHO_MOVE_ADAPTIVE_SORT_INVARIANT(bho::movelib::is_sorted(combined_first, combined_first + n_block_a*l_block+l_irreg1, comp));
  177. BHO_MOVE_ADAPTIVE_SORT_INVARIANT(bho::movelib::is_sorted(combined_first + n_block_a*l_block+l_irreg1, combined_first + n_block_a*l_block+l_irreg1+n_block_b*l_block+l_irreg2, comp));
  178. if(!use_buf){
  179. merge_blocks_bufferless
  180. (keys, key_comp, combined_first, l_block, 0u, n_block_a, n_block_b, l_irreg2, comp);
  181. }
  182. else{
  183. merge_blocks_left
  184. (keys, key_comp, combined_first, l_block, 0u, n_block_a, n_block_b, l_irreg2, comp, xbuf_used);
  185. }
  186. BHO_MOVE_ADAPTIVE_SORT_PRINT_L2(" After merge_blocks_L: ", len + l_block);
  187. ++combined_i;
  188. if(combined_i != max_i)
  189. combined_first += l_reg_combined;
  190. }
  191. }
  192. else{
  193. combined_first += size_type(l_reg_combined*(max_i-1u));
  194. for( size_type combined_i = max_i; combined_i; ) {
  195. --combined_i;
  196. bool const is_last = combined_i==n_reg_combined;
  197. size_type const l_cur_combined = is_last ? l_irreg_combined : l_reg_combined;
  198. RandIt const combined_last(combined_first+l_cur_combined);
  199. range_xbuf<RandIt, size_type, move_op> rbuf(combined_last, xbuf_used ? (combined_last+l_block) : combined_last);
  200. size_type n_block_a, n_block_b, l_irreg1, l_irreg2;
  201. combine_params( keys, key_comp, l_cur_combined
  202. , l_prev_merged, l_block, rbuf
  203. , n_block_a, n_block_b, l_irreg1, l_irreg2); //Outputs
  204. BHO_MOVE_ADAPTIVE_SORT_PRINT_L2(" A combpar: ", len + l_block);
  205. BHO_MOVE_ADAPTIVE_SORT_INVARIANT(bho::movelib::is_sorted(combined_first, combined_first + n_block_a*l_block+l_irreg1, comp));
  206. BHO_MOVE_ADAPTIVE_SORT_INVARIANT(bho::movelib::is_sorted(combined_first + n_block_a*l_block+l_irreg1, combined_first + n_block_a*l_block+l_irreg1+n_block_b*l_block+l_irreg2, comp));
  207. merge_blocks_right
  208. (keys, key_comp, combined_first, l_block, n_block_a, n_block_b, l_irreg2, comp, xbuf_used);
  209. BHO_MOVE_ADAPTIVE_SORT_PRINT_L2(" After merge_blocks_R: ", len + l_block);
  210. if(combined_i)
  211. combined_first -= l_reg_combined;
  212. }
  213. }
  214. }
  215. //Returns true if buffer is placed in
  216. //[buffer+len-l_intbuf, buffer+len). Otherwise, buffer is
  217. //[buffer,buffer+l_intbuf)
  218. template<class RandIt, class Compare, class XBuf>
  219. bool adaptive_sort_combine_all_blocks
  220. ( RandIt keys
  221. , typename iter_size<RandIt>::type &n_keys
  222. , RandIt const buffer
  223. , typename iter_size<RandIt>::type const l_buf_plus_data
  224. , typename iter_size<RandIt>::type l_merged
  225. , typename iter_size<RandIt>::type &l_intbuf
  226. , XBuf & xbuf
  227. , Compare comp)
  228. {
  229. typedef typename iter_size<RandIt>::type size_type;
  230. RandIt const first = buffer + l_intbuf;
  231. size_type const l_data = size_type(l_buf_plus_data - l_intbuf);
  232. size_type const l_unique = size_type(l_intbuf + n_keys);
  233. //Backup data to external buffer once if possible
  234. bool const common_xbuf = l_data > l_merged && l_intbuf && l_intbuf <= xbuf.capacity();
  235. if(common_xbuf){
  236. xbuf.move_assign(buffer, l_intbuf);
  237. }
  238. bool prev_merge_left = true;
  239. size_type l_prev_total_combined = l_merged, l_prev_block = 0;
  240. bool prev_use_internal_buf = true;
  241. for( size_type n = 0; l_data > l_merged
  242. ; l_merged = size_type(2u*l_merged)
  243. , ++n){
  244. //If l_intbuf is non-zero, use that internal buffer.
  245. // Implies l_block == l_intbuf && use_internal_buf == true
  246. //If l_intbuf is zero, see if half keys can be reused as a reduced emergency buffer,
  247. // Implies l_block == n_keys/2 && use_internal_buf == true
  248. //Otherwise, just give up and and use all keys to merge using rotations (use_internal_buf = false)
  249. bool use_internal_buf = false;
  250. size_type const l_block = lblock_for_combine(l_intbuf, n_keys, size_type(2*l_merged), use_internal_buf);
  251. assert(!l_intbuf || (l_block == l_intbuf));
  252. assert(n == 0 || (!use_internal_buf || prev_use_internal_buf) );
  253. assert(n == 0 || (!use_internal_buf || l_prev_block == l_block) );
  254. bool const is_merge_left = (n&1) == 0;
  255. size_type const l_total_combined = calculate_total_combined(l_data, l_merged);
  256. if(n && prev_use_internal_buf && prev_merge_left){
  257. if(is_merge_left || !use_internal_buf){
  258. move_data_backward(first-l_prev_block, l_prev_total_combined, first, common_xbuf);
  259. }
  260. else{
  261. //Put the buffer just after l_total_combined
  262. RandIt const buf_end = first+l_prev_total_combined;
  263. RandIt const buf_beg = buf_end-l_block;
  264. if(l_prev_total_combined > l_total_combined){
  265. size_type const l_diff = size_type(l_prev_total_combined - l_total_combined);
  266. move_data_backward(buf_beg-l_diff, l_diff, buf_end-l_diff, common_xbuf);
  267. }
  268. else if(l_prev_total_combined < l_total_combined){
  269. size_type const l_diff = size_type(l_total_combined - l_prev_total_combined);
  270. move_data_forward(buf_end, l_diff, buf_beg, common_xbuf);
  271. }
  272. }
  273. BHO_MOVE_ADAPTIVE_SORT_PRINT_L2(" After move_data : ", l_data + l_intbuf);
  274. }
  275. //Combine to form l_merged*2 segments
  276. if(n_keys){
  277. size_type upper_n_keys_this_iter = size_type(2u*l_merged/l_block);
  278. if(upper_n_keys_this_iter > 256){
  279. adaptive_sort_combine_blocks
  280. ( keys, comp, !use_internal_buf || is_merge_left ? first : first-l_block
  281. , l_data, l_merged, l_block, use_internal_buf, common_xbuf, xbuf, comp, is_merge_left);
  282. }
  283. else{
  284. unsigned char uint_keys[256];
  285. adaptive_sort_combine_blocks
  286. ( uint_keys, less(), !use_internal_buf || is_merge_left ? first : first-l_block
  287. , l_data, l_merged, l_block, use_internal_buf, common_xbuf, xbuf, comp, is_merge_left);
  288. }
  289. }
  290. else{
  291. size_type *const uint_keys = xbuf.template aligned_trailing<size_type>();
  292. adaptive_sort_combine_blocks
  293. ( uint_keys, less(), !use_internal_buf || is_merge_left ? first : first-l_block
  294. , l_data, l_merged, l_block, use_internal_buf, common_xbuf, xbuf, comp, is_merge_left);
  295. }
  296. BHO_MOVE_ADAPTIVE_SORT_PRINT_L1(is_merge_left ? " After comb blocks L: " : " After comb blocks R: ", l_data + l_intbuf);
  297. prev_merge_left = is_merge_left;
  298. l_prev_total_combined = l_total_combined;
  299. l_prev_block = l_block;
  300. prev_use_internal_buf = use_internal_buf;
  301. }
  302. assert(l_prev_total_combined == l_data);
  303. bool const buffer_right = prev_use_internal_buf && prev_merge_left;
  304. l_intbuf = prev_use_internal_buf ? l_prev_block : 0u;
  305. n_keys = size_type(l_unique - l_intbuf);
  306. //Restore data from to external common buffer if used
  307. if(common_xbuf){
  308. if(buffer_right){
  309. bho::move(xbuf.data(), xbuf.data() + l_intbuf, buffer+l_data);
  310. }
  311. else{
  312. bho::move(xbuf.data(), xbuf.data() + l_intbuf, buffer);
  313. }
  314. }
  315. return buffer_right;
  316. }
  317. template<class RandIt, class Compare, class XBuf>
  318. void adaptive_sort_final_merge( bool buffer_right
  319. , RandIt const first
  320. , typename iter_size<RandIt>::type const l_intbuf
  321. , typename iter_size<RandIt>::type const n_keys
  322. , typename iter_size<RandIt>::type const len
  323. , XBuf & xbuf
  324. , Compare comp)
  325. {
  326. //assert(n_keys || xbuf.size() == l_intbuf);
  327. xbuf.clear();
  328. typedef typename iter_size<RandIt>::type size_type;
  329. size_type const n_key_plus_buf = size_type(l_intbuf+n_keys);
  330. if(buffer_right){
  331. //Use stable sort as some buffer elements might not be unique (see non_unique_buf)
  332. stable_sort(first+len-l_intbuf, first+len, comp, xbuf);
  333. stable_merge( first+n_keys, first+len-l_intbuf, first+len, antistable<Compare>(comp), xbuf);
  334. unstable_sort(first, first+n_keys, comp, xbuf);
  335. stable_merge(first, first+n_keys, first+len, comp, xbuf);
  336. }
  337. else{
  338. //Use stable sort as some buffer elements might not be unique (see non_unique_buf)
  339. stable_sort(first, first+n_key_plus_buf, comp, xbuf);
  340. if(xbuf.capacity() >= n_key_plus_buf){
  341. buffered_merge(first, first+n_key_plus_buf, first+len, comp, xbuf);
  342. }
  343. else if(xbuf.capacity() >= min_value<size_type>(l_intbuf, n_keys)){
  344. stable_merge( first+n_keys, first+n_key_plus_buf
  345. , first+len, comp, xbuf);
  346. stable_merge(first, first+n_keys, first+len, comp, xbuf);
  347. }
  348. else{
  349. stable_merge(first, first+n_key_plus_buf, first+len, comp, xbuf);
  350. }
  351. }
  352. BHO_MOVE_ADAPTIVE_SORT_PRINT_L1(" After final_merge : ", len);
  353. }
  354. template<class RandIt, class Compare, class Unsigned, class XBuf>
  355. bool adaptive_sort_build_params
  356. (RandIt first, Unsigned const len, Compare comp
  357. , Unsigned &n_keys, Unsigned &l_intbuf, Unsigned &l_base, Unsigned &l_build_buf
  358. , XBuf & xbuf
  359. )
  360. {
  361. typedef typename iter_size<RandIt>::type size_type;
  362. //Calculate ideal parameters and try to collect needed unique keys
  363. l_base = 0u;
  364. //Try to find a value near sqrt(len) that is 2^N*l_base where
  365. //l_base <= AdaptiveSortInsertionSortThreshold. This property is important
  366. //as build_blocks merges to the left iteratively duplicating the
  367. //merged size and all the buffer must be used just before the final
  368. //merge to right step. This guarantees "build_blocks" produces
  369. //segments of size l_build_buf*2, maximizing the classic merge phase.
  370. l_intbuf = size_type(ceil_sqrt_multiple(len, &l_base));
  371. //The internal buffer can be expanded if there is enough external memory
  372. while(xbuf.capacity() >= l_intbuf*2){
  373. l_intbuf = size_type(2u*l_intbuf);
  374. }
  375. //This is the minimum number of keys to implement the ideal algorithm
  376. //
  377. //l_intbuf is used as buffer plus the key count
  378. size_type n_min_ideal_keys = size_type(l_intbuf-1u);
  379. while(n_min_ideal_keys >= (len-l_intbuf-n_min_ideal_keys)/l_intbuf){
  380. --n_min_ideal_keys;
  381. }
  382. ++n_min_ideal_keys;
  383. assert(n_min_ideal_keys <= l_intbuf);
  384. if(xbuf.template supports_aligned_trailing<size_type>
  385. (l_intbuf, size_type((size_type(len-l_intbuf)-1u)/l_intbuf+1u))){
  386. n_keys = 0u;
  387. l_build_buf = l_intbuf;
  388. }
  389. else{
  390. //Try to achieve a l_build_buf of length l_intbuf*2, so that we can merge with that
  391. //l_intbuf*2 buffer in "build_blocks" and use half of them as buffer and the other half
  392. //as keys in combine_all_blocks. In that case n_keys >= n_min_ideal_keys but by a small margin.
  393. //
  394. //If available memory is 2*sqrt(l), then only sqrt(l) unique keys are needed,
  395. //(to be used for keys in combine_all_blocks) as the whole l_build_buf
  396. //will be backuped in the buffer during build_blocks.
  397. bool const non_unique_buf = xbuf.capacity() >= l_intbuf;
  398. size_type const to_collect = non_unique_buf ? n_min_ideal_keys : size_type(l_intbuf*2u);
  399. size_type collected = collect_unique(first, first+len, to_collect, comp, xbuf);
  400. //If available memory is 2*sqrt(l), then for "build_params"
  401. //the situation is the same as if 2*l_intbuf were collected.
  402. if(non_unique_buf && collected == n_min_ideal_keys){
  403. l_build_buf = l_intbuf;
  404. n_keys = n_min_ideal_keys;
  405. }
  406. else if(collected == 2*l_intbuf){
  407. //l_intbuf*2 elements found. Use all of them in the build phase
  408. l_build_buf = size_type(l_intbuf*2);
  409. n_keys = l_intbuf;
  410. }
  411. else if(collected >= (n_min_ideal_keys+l_intbuf)){
  412. l_build_buf = l_intbuf;
  413. n_keys = size_type(collected - l_intbuf);
  414. }
  415. //If collected keys are not enough, try to fix n_keys and l_intbuf. If no fix
  416. //is possible (due to very low unique keys), then go to a slow sort based on rotations.
  417. else{
  418. assert(collected < (n_min_ideal_keys+l_intbuf));
  419. if(collected < 4){ //No combination possible with less that 4 keys
  420. return false;
  421. }
  422. n_keys = l_intbuf;
  423. while(n_keys & (n_keys-1u)){
  424. n_keys &= size_type(n_keys-1u); // make it power or 2
  425. }
  426. while(n_keys > collected){
  427. n_keys/=2;
  428. }
  429. //AdaptiveSortInsertionSortThreshold is always power of two so the minimum is power of two
  430. l_base = min_value<Unsigned>(n_keys, AdaptiveSortInsertionSortThreshold);
  431. l_intbuf = 0;
  432. l_build_buf = n_keys;
  433. }
  434. assert((n_keys+l_intbuf) >= l_build_buf);
  435. }
  436. return true;
  437. }
  438. // Main explanation of the sort algorithm.
  439. //
  440. // csqrtlen = ceil(sqrt(len));
  441. //
  442. // * First, 2*csqrtlen unique elements elements are extracted from elements to be
  443. // sorted and placed in the beginning of the range.
  444. //
  445. // * Step "build_blocks": In this nearly-classic merge step, 2*csqrtlen unique elements
  446. // will be used as auxiliary memory, so trailing len-2*csqrtlen elements are
  447. // are grouped in blocks of sorted 4*csqrtlen elements. At the end of the step
  448. // 2*csqrtlen unique elements are again the leading elements of the whole range.
  449. //
  450. // * Step "combine_blocks": pairs of previously formed blocks are merged with a different
  451. // ("smart") algorithm to form blocks of 8*csqrtlen elements. This step is slower than the
  452. // "build_blocks" step and repeated iteratively (forming blocks of 16*csqrtlen, 32*csqrtlen
  453. // elements, etc) of until all trailing (len-2*csqrtlen) elements are merged.
  454. //
  455. // In "combine_blocks" len/csqrtlen elements used are as "keys" (markers) to
  456. // know if elements belong to the first or second block to be merged and another
  457. // leading csqrtlen elements are used as buffer. Explanation of the "combine_blocks" step:
  458. //
  459. // Iteratively until all trailing (len-2*csqrtlen) elements are merged:
  460. // Iteratively for each pair of previously merged block:
  461. // * Blocks are divided groups of csqrtlen elements and
  462. // 2*merged_block/csqrtlen keys are sorted to be used as markers
  463. // * Groups are selection-sorted by first or last element (depending whether they are going
  464. // to be merged to left or right) and keys are reordered accordingly as an imitation-buffer.
  465. // * Elements of each block pair are merged using the csqrtlen buffer taking into account
  466. // if they belong to the first half or second half (marked by the key).
  467. //
  468. // * In the final merge step leading elements (2*csqrtlen) are sorted and merged with
  469. // rotations with the rest of sorted elements in the "combine_blocks" step.
  470. //
  471. // Corner cases:
  472. //
  473. // * If no 2*csqrtlen elements can be extracted:
  474. //
  475. // * If csqrtlen+len/csqrtlen are extracted, then only csqrtlen elements are used
  476. // as buffer in the "build_blocks" step forming blocks of 2*csqrtlen elements. This
  477. // means that an additional "combine_blocks" step will be needed to merge all elements.
  478. //
  479. // * If no csqrtlen+len/csqrtlen elements can be extracted, but still more than a minimum,
  480. // then reduces the number of elements used as buffer and keys in the "build_blocks"
  481. // and "combine_blocks" steps. If "combine_blocks" has no enough keys due to this reduction
  482. // then uses a rotation based smart merge.
  483. //
  484. // * If the minimum number of keys can't be extracted, a rotation-based sorting is performed.
  485. //
  486. // * If auxiliary memory is more or equal than ceil(len/2), half-copying mergesort is used.
  487. //
  488. // * If auxiliary memory is more than csqrtlen+n_keys*sizeof(std::size_t),
  489. // then only csqrtlen elements need to be extracted and "combine_blocks" will use integral
  490. // keys to combine blocks.
  491. //
  492. // * If auxiliary memory is available, the "build_blocks" will be extended to build bigger blocks
  493. // using classic merge and "combine_blocks" will use bigger blocks when merging.
  494. template<class RandIt, class Compare, class XBuf>
  495. void adaptive_sort_impl
  496. ( RandIt first
  497. , typename iter_size<RandIt>::type const len
  498. , Compare comp
  499. , XBuf & xbuf
  500. )
  501. {
  502. typedef typename iter_size<RandIt>::type size_type;
  503. //Small sorts go directly to insertion sort
  504. if(len <= size_type(AdaptiveSortInsertionSortThreshold)){
  505. insertion_sort(first, first + len, comp);
  506. }
  507. else if((len-len/2) <= xbuf.capacity()){
  508. merge_sort(first, first+len, comp, xbuf.data());
  509. }
  510. else{
  511. //Make sure it is at least four
  512. BHO_MOVE_STATIC_ASSERT(AdaptiveSortInsertionSortThreshold >= 4);
  513. size_type l_base = 0;
  514. size_type l_intbuf = 0;
  515. size_type n_keys = 0;
  516. size_type l_build_buf = 0;
  517. //Calculate and extract needed unique elements. If a minimum is not achieved
  518. //fallback to a slow stable sort
  519. if(!adaptive_sort_build_params(first, len, comp, n_keys, l_intbuf, l_base, l_build_buf, xbuf)){
  520. stable_sort(first, first+len, comp, xbuf);
  521. }
  522. else{
  523. assert(l_build_buf);
  524. //Otherwise, continue the adaptive_sort
  525. BHO_MOVE_ADAPTIVE_SORT_PRINT_L1("\n After collect_unique: ", len);
  526. size_type const n_key_plus_buf = size_type(l_intbuf+n_keys);
  527. //l_build_buf is always power of two if l_intbuf is zero
  528. assert(l_intbuf || (0 == (l_build_buf & (l_build_buf-1))));
  529. //Classic merge sort until internal buffer and xbuf are exhausted
  530. size_type const l_merged = adaptive_sort_build_blocks
  531. ( first + n_key_plus_buf-l_build_buf
  532. , size_type(len-n_key_plus_buf+l_build_buf)
  533. , l_base, l_build_buf, xbuf, comp);
  534. BHO_MOVE_ADAPTIVE_SORT_PRINT_L1(" After build_blocks: ", len);
  535. //Non-trivial merge
  536. bool const buffer_right = adaptive_sort_combine_all_blocks
  537. (first, n_keys, first+n_keys, size_type(len-n_keys), l_merged, l_intbuf, xbuf, comp);
  538. //Sort keys and buffer and merge the whole sequence
  539. adaptive_sort_final_merge(buffer_right, first, l_intbuf, n_keys, len, xbuf, comp);
  540. }
  541. }
  542. }
  543. } //namespace detail_adaptive {
  544. ///@endcond
  545. //! <b>Effects</b>: Sorts the elements in the range [first, last) in ascending order according
  546. //! to comparison functor "comp". The sort is stable (order of equal elements
  547. //! is guaranteed to be preserved). Performance is improved if additional raw storage is
  548. //! provided.
  549. //!
  550. //! <b>Requires</b>:
  551. //! - RandIt must meet the requirements of ValueSwappable and RandomAccessIterator.
  552. //! - The type of dereferenced RandIt must meet the requirements of MoveAssignable and MoveConstructible.
  553. //!
  554. //! <b>Parameters</b>:
  555. //! - first, last: the range of elements to sort
  556. //! - comp: comparison function object which returns true if the first argument is is ordered before the second.
  557. //! - uninitialized, uninitialized_len: raw storage starting on "uninitialized", able to hold "uninitialized_len"
  558. //! elements of type iterator_traits<RandIt>::value_type. Maximum performance is achieved when uninitialized_len
  559. //! is ceil(std::distance(first, last)/2).
  560. //!
  561. //! <b>Throws</b>: If comp throws or the move constructor, move assignment or swap of the type
  562. //! of dereferenced RandIt throws.
  563. //!
  564. //! <b>Complexity</b>: Always K x O(Nxlog(N)) comparisons and move assignments/constructors/swaps.
  565. //! Comparisons are close to minimum even with no additional memory. Constant factor for data movement is minimized
  566. //! when uninitialized_len is ceil(std::distance(first, last)/2). Pretty good enough performance is achieved when
  567. //! ceil(sqrt(std::distance(first, last)))*2.
  568. //!
  569. //! <b>Caution</b>: Experimental implementation, not production-ready.
  570. template<class RandIt, class RandRawIt, class Compare>
  571. void adaptive_sort( RandIt first, RandIt last, Compare comp
  572. , RandRawIt uninitialized
  573. , typename iter_size<RandIt>::type uninitialized_len)
  574. {
  575. typedef typename iter_size<RandIt>::type size_type;
  576. typedef typename iterator_traits<RandIt>::value_type value_type;
  577. ::bho::movelib::adaptive_xbuf<value_type, RandRawIt, size_type> xbuf(uninitialized, uninitialized_len);
  578. ::bho::movelib::detail_adaptive::adaptive_sort_impl(first, size_type(last - first), comp, xbuf);
  579. }
  580. template<class RandIt, class Compare>
  581. void adaptive_sort( RandIt first, RandIt last, Compare comp)
  582. {
  583. typedef typename iterator_traits<RandIt>::value_type value_type;
  584. adaptive_sort(first, last, comp, (value_type*)0, 0u);
  585. }
  586. } //namespace movelib {
  587. } //namespace bho {
  588. #include <asio2/bho/move/detail/config_end.hpp>
  589. #if defined(BHO_CLANG) || (defined(BHO_GCC) && (BHO_GCC >= 40600))
  590. #pragma GCC diagnostic pop
  591. #endif
  592. #endif //#define BHO_MOVE_ADAPTIVE_SORT_HPP