123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778 |
- #ifndef BOOST_COMPUTE_ALGORITHM_DETAIL_COUNT_IF_WITH_BALLOT_HPP
- #define BOOST_COMPUTE_ALGORITHM_DETAIL_COUNT_IF_WITH_BALLOT_HPP
- #include <boost/compute/context.hpp>
- #include <boost/compute/command_queue.hpp>
- #include <boost/compute/container/vector.hpp>
- #include <boost/compute/algorithm/reduce.hpp>
- #include <boost/compute/functional/detail/nvidia_ballot.hpp>
- #include <boost/compute/functional/detail/nvidia_popcount.hpp>
- #include <boost/compute/detail/meta_kernel.hpp>
- namespace boost {
- namespace compute {
- namespace detail {
- template<class InputIterator, class Predicate>
- inline size_t count_if_with_ballot(InputIterator first,
- InputIterator last,
- Predicate predicate,
- command_queue &queue)
- {
- size_t count = iterator_range_size(first, last);
- size_t block_size = 32;
- size_t block_count = count / block_size;
- if(block_count * block_size != count){
- block_count++;
- }
- const ::boost::compute::context &context = queue.get_context();
- ::boost::compute::vector<uint_> counts(block_count, context);
- ::boost::compute::detail::nvidia_popcount<uint_> popc;
- ::boost::compute::detail::nvidia_ballot<uint_> ballot;
- meta_kernel k("count_if_with_ballot");
- k <<
- "const uint gid = get_global_id(0);\n" <<
- "bool value = false;\n" <<
- "if(gid < count)\n" <<
- " value = " << predicate(first[k.var<const uint_>("gid")]) << ";\n" <<
- "uint bits = " << ballot(k.var<const uint_>("value")) << ";\n" <<
- "if(get_local_id(0) == 0)\n" <<
- counts.begin()[k.var<uint_>("get_group_id(0)") ]
- << " = " << popc(k.var<uint_>("bits")) << ";\n";
- k.add_set_arg<const uint_>("count", count);
- k.exec_1d(queue, 0, block_size * block_count, block_size);
- uint_ result;
- ::boost::compute::reduce(
- counts.begin(),
- counts.end(),
- &result,
- queue
- );
- return result;
- }
- }
- }
- }
- #endif
|