123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129 |
- #ifndef BOOST_COMPUTE_ALGORITHM_DETAIL_COUNT_IF_WITH_THREADS_HPP
- #define BOOST_COMPUTE_ALGORITHM_DETAIL_COUNT_IF_WITH_THREADS_HPP
- #include <numeric>
- #include <boost/compute/detail/meta_kernel.hpp>
- #include <boost/compute/container/vector.hpp>
- namespace boost {
- namespace compute {
- namespace detail {
- template<class InputIterator, class Predicate>
- class count_if_with_threads_kernel : meta_kernel
- {
- public:
- typedef typename
- std::iterator_traits<InputIterator>::value_type
- value_type;
- count_if_with_threads_kernel()
- : meta_kernel("count_if_with_threads")
- {
- }
- void set_args(InputIterator first,
- InputIterator last,
- Predicate predicate)
- {
- typedef typename std::iterator_traits<InputIterator>::value_type T;
- m_size = detail::iterator_range_size(first, last);
- m_size_arg = add_arg<const ulong_>("size");
- m_counts_arg = add_arg<ulong_ *>(memory_object::global_memory, "counts");
- *this <<
-
- "const uint gid = get_global_id(0);\n" <<
- "const uint block_size = size / get_global_size(0);\n" <<
- "const uint start = block_size * gid;\n" <<
- "uint end = 0;\n" <<
- "if(gid == get_global_size(0) - 1)\n" <<
- " end = size;\n" <<
- "else\n" <<
- " end = block_size * gid + block_size;\n" <<
-
- "uint count = 0;\n" <<
- "for(uint i = start; i < end; i++){\n" <<
- decl<const T>("value") << "="
- << first[expr<uint_>("i")] << ";\n" <<
- if_(predicate(var<const T>("value"))) << "{\n" <<
- "count++;\n" <<
- "}\n" <<
- "}\n" <<
-
- "counts[gid] = count;\n";
- }
- size_t exec(command_queue &queue)
- {
- const device &device = queue.get_device();
- const context &context = queue.get_context();
- size_t threads = device.compute_units();
- const size_t minimum_block_size = 2048;
- if(m_size / threads < minimum_block_size){
- threads = static_cast<size_t>(
- (std::max)(
- std::ceil(float(m_size) / minimum_block_size),
- 1.0f
- )
- );
- }
-
- ::boost::compute::vector<ulong_> counts(threads, context);
-
- set_arg(m_size_arg, static_cast<ulong_>(m_size));
- set_arg(m_counts_arg, counts.get_buffer());
- exec_1d(queue, 0, threads, 1);
-
- std::vector<ulong_> host_counts(threads);
- ::boost::compute::copy(counts.begin(), counts.end(), host_counts.begin(), queue);
-
- return std::accumulate(host_counts.begin(), host_counts.end(), size_t(0));
- }
- private:
- size_t m_size;
- size_t m_size_arg;
- size_t m_counts_arg;
- };
- template<class InputIterator, class Predicate>
- inline size_t count_if_with_threads(InputIterator first,
- InputIterator last,
- Predicate predicate,
- command_queue &queue)
- {
- count_if_with_threads_kernel<InputIterator, Predicate> kernel;
- kernel.set_args(first, last, predicate);
- return kernel.exec(queue);
- }
- }
- }
- }
- #endif
|