123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881 |
- #ifndef BOOST_COMPUTE_ALGORITHM_COPY_HPP
- #define BOOST_COMPUTE_ALGORITHM_COPY_HPP
- #include <algorithm>
- #include <iterator>
- #include <boost/utility/enable_if.hpp>
- #include <boost/mpl/and.hpp>
- #include <boost/mpl/not.hpp>
- #include <boost/mpl/or.hpp>
- #include <boost/compute/buffer.hpp>
- #include <boost/compute/system.hpp>
- #include <boost/compute/command_queue.hpp>
- #include <boost/compute/algorithm/detail/copy_on_device.hpp>
- #include <boost/compute/algorithm/detail/copy_to_device.hpp>
- #include <boost/compute/algorithm/detail/copy_to_host.hpp>
- #include <boost/compute/async/future.hpp>
- #include <boost/compute/container/mapped_view.hpp>
- #include <boost/compute/detail/device_ptr.hpp>
- #include <boost/compute/detail/is_contiguous_iterator.hpp>
- #include <boost/compute/detail/iterator_range_size.hpp>
- #include <boost/compute/detail/parameter_cache.hpp>
- #include <boost/compute/iterator/buffer_iterator.hpp>
- #include <boost/compute/type_traits/type_name.hpp>
- #include <boost/compute/type_traits/is_device_iterator.hpp>
- namespace boost {
- namespace compute {
- namespace detail {
- namespace mpl = boost::mpl;
- template<class InputIterator, class OutputIterator>
- struct can_copy_with_copy_buffer :
- mpl::and_<
- mpl::or_<
- boost::is_same<
- InputIterator,
- buffer_iterator<typename InputIterator::value_type>
- >,
- boost::is_same<
- InputIterator,
- detail::device_ptr<typename InputIterator::value_type>
- >
- >,
- mpl::or_<
- boost::is_same<
- OutputIterator,
- buffer_iterator<typename OutputIterator::value_type>
- >,
- boost::is_same<
- OutputIterator,
- detail::device_ptr<typename OutputIterator::value_type>
- >
- >,
- boost::is_same<
- typename InputIterator::value_type,
- typename OutputIterator::value_type
- >
- >::type {};
- template<class HostIterator, class DeviceIterator>
- struct is_same_value_type :
- boost::is_same<
- typename boost::remove_cv<
- typename std::iterator_traits<HostIterator>::value_type
- >::type,
- typename boost::remove_cv<
- typename DeviceIterator::value_type
- >::type
- >::type {};
- template<class HostIterator>
- struct is_bool_value_type :
- boost::is_same<
- typename boost::remove_cv<
- typename std::iterator_traits<HostIterator>::value_type
- >::type,
- bool
- >::type {};
- template<class InputIterator, class OutputIterator>
- inline future<OutputIterator>
- dispatch_copy_async(InputIterator first,
- InputIterator last,
- OutputIterator result,
- command_queue &queue,
- const wait_list &events,
- typename boost::enable_if<
- mpl::and_<
- mpl::not_<
- is_device_iterator<InputIterator>
- >,
- is_device_iterator<OutputIterator>,
- is_same_value_type<InputIterator, OutputIterator>
- >
- >::type* = 0)
- {
- BOOST_STATIC_ASSERT_MSG(
- is_contiguous_iterator<InputIterator>::value,
- "copy_async() is only supported for contiguous host iterators"
- );
- return copy_to_device_async(first, last, result, queue, events);
- }
- template<class InputIterator, class OutputIterator>
- inline future<OutputIterator>
- dispatch_copy_async(InputIterator first,
- InputIterator last,
- OutputIterator result,
- command_queue &queue,
- const wait_list &events,
- typename boost::enable_if<
- mpl::and_<
- mpl::not_<
- is_device_iterator<InputIterator>
- >,
- is_device_iterator<OutputIterator>,
- mpl::not_<
- is_same_value_type<InputIterator, OutputIterator>
- >
- >
- >::type* = 0)
- {
- BOOST_STATIC_ASSERT_MSG(
- is_contiguous_iterator<InputIterator>::value,
- "copy_async() is only supported for contiguous host iterators"
- );
- typedef typename std::iterator_traits<InputIterator>::value_type input_type;
- const context &context = queue.get_context();
- size_t count = iterator_range_size(first, last);
- if(count < size_t(1)) {
- return future<OutputIterator>();
- }
-
-
- ::boost::compute::mapped_view<input_type> mapped_host(
-
-
- const_cast<const input_type*>(
- ::boost::addressof(*first)
- ),
- count,
- context
- );
- return copy_on_device_async(
- mapped_host.begin(), mapped_host.end(), result, queue, events
- );
- }
- template<class InputIterator, class OutputIterator>
- inline OutputIterator
- dispatch_copy(InputIterator first,
- InputIterator last,
- OutputIterator result,
- command_queue &queue,
- const wait_list &events,
- typename boost::enable_if<
- mpl::and_<
- mpl::not_<
- is_device_iterator<InputIterator>
- >,
- is_device_iterator<OutputIterator>,
- is_same_value_type<InputIterator, OutputIterator>,
- is_contiguous_iterator<InputIterator>
- >
- >::type* = 0)
- {
- return copy_to_device(first, last, result, queue, events);
- }
- template<class InputIterator, class OutputIterator>
- inline OutputIterator
- dispatch_copy(InputIterator first,
- InputIterator last,
- OutputIterator result,
- command_queue &queue,
- const wait_list &events,
- typename boost::enable_if<
- mpl::and_<
- mpl::not_<
- is_device_iterator<InputIterator>
- >,
- is_device_iterator<OutputIterator>,
- mpl::not_<
- is_same_value_type<InputIterator, OutputIterator>
- >,
- is_contiguous_iterator<InputIterator>
- >
- >::type* = 0)
- {
- typedef typename OutputIterator::value_type output_type;
- typedef typename std::iterator_traits<InputIterator>::value_type input_type;
- const device &device = queue.get_device();
-
- std::string cache_key =
- std::string("__boost_compute_copy_to_device_")
- + type_name<input_type>() + "_" + type_name<output_type>();
- boost::shared_ptr<parameter_cache> parameters =
- detail::parameter_cache::get_global_cache(device);
- uint_ map_copy_threshold;
- uint_ direct_copy_threshold;
-
- if (device.type() & device::gpu) {
-
- map_copy_threshold = 524288;
- direct_copy_threshold = 52428800;
- }
- else {
-
- map_copy_threshold = 134217728;
- direct_copy_threshold = 0;
- }
-
- map_copy_threshold =
- parameters->get(
- cache_key, "map_copy_threshold", map_copy_threshold
- );
- direct_copy_threshold =
- parameters->get(
- cache_key, "direct_copy_threshold", direct_copy_threshold
- );
-
- size_t count = iterator_range_size(first, last);
- size_t input_size_bytes = count * sizeof(input_type);
-
- if(input_size_bytes < map_copy_threshold) {
- return copy_to_device_map(first, last, result, queue, events);
- }
- // [map_copy_threshold; direct_copy_threshold) -> convert [first; last)
-
- else if(input_size_bytes < direct_copy_threshold) {
- std::vector<output_type> vector(first, last);
- return copy_to_device(
- vector.begin(), vector.end(), result, queue, events
- );
- }
-
-
-
-
-
-
-
- return dispatch_copy_async(first, last, result, queue, events).get();
- }
- template<class InputIterator, class OutputIterator>
- inline OutputIterator
- dispatch_copy(InputIterator first,
- InputIterator last,
- OutputIterator result,
- command_queue &queue,
- const wait_list &events,
- typename boost::enable_if<
- mpl::and_<
- mpl::not_<
- is_device_iterator<InputIterator>
- >,
- is_device_iterator<OutputIterator>,
- mpl::not_<
- is_contiguous_iterator<InputIterator>
- >
- >
- >::type* = 0)
- {
- typedef typename OutputIterator::value_type output_type;
- typedef typename std::iterator_traits<InputIterator>::value_type input_type;
- const device &device = queue.get_device();
-
- std::string cache_key =
- std::string("__boost_compute_copy_to_device_")
- + type_name<input_type>() + "_" + type_name<output_type>();
- boost::shared_ptr<parameter_cache> parameters =
- detail::parameter_cache::get_global_cache(device);
- uint_ map_copy_threshold;
- uint_ direct_copy_threshold;
-
- if (device.type() & device::gpu) {
-
- map_copy_threshold = 524288;
- direct_copy_threshold = 52428800;
- }
- else {
-
- map_copy_threshold = 134217728;
- direct_copy_threshold = 0;
- }
-
- map_copy_threshold =
- parameters->get(
- cache_key, "map_copy_threshold", map_copy_threshold
- );
- direct_copy_threshold =
- parameters->get(
- cache_key, "direct_copy_threshold", direct_copy_threshold
- );
-
- size_t input_size = iterator_range_size(first, last);
- size_t input_size_bytes = input_size * sizeof(input_type);
-
-
-
-
- if(input_size_bytes < map_copy_threshold
- || direct_copy_threshold <= map_copy_threshold) {
- return copy_to_device_map(first, last, result, queue, events);
- }
- // [map_copy_threshold; inf) -> convert [first; last)
-
- std::vector<output_type> vector(first, last);
- return copy_to_device(vector.begin(), vector.end(), result, queue, events);
- }
- template<class InputIterator, class OutputIterator>
- inline future<OutputIterator>
- dispatch_copy_async(InputIterator first,
- InputIterator last,
- OutputIterator result,
- command_queue &queue,
- const wait_list &events,
- typename boost::enable_if<
- mpl::and_<
- is_device_iterator<InputIterator>,
- mpl::not_<
- is_device_iterator<OutputIterator>
- >,
- is_same_value_type<OutputIterator, InputIterator>
- >
- >::type* = 0)
- {
- BOOST_STATIC_ASSERT_MSG(
- is_contiguous_iterator<OutputIterator>::value,
- "copy_async() is only supported for contiguous host iterators"
- );
- return copy_to_host_async(first, last, result, queue, events);
- }
- template<class InputIterator, class OutputIterator>
- inline future<OutputIterator>
- dispatch_copy_async(InputIterator first,
- InputIterator last,
- OutputIterator result,
- command_queue &queue,
- const wait_list &events,
- typename boost::enable_if<
- mpl::and_<
- is_device_iterator<InputIterator>,
- mpl::not_<
- is_device_iterator<OutputIterator>
- >,
- mpl::not_<
- is_same_value_type<OutputIterator, InputIterator>
- >
- >
- >::type* = 0)
- {
- BOOST_STATIC_ASSERT_MSG(
- is_contiguous_iterator<OutputIterator>::value,
- "copy_async() is only supported for contiguous host iterators"
- );
- typedef typename std::iterator_traits<OutputIterator>::value_type output_type;
- const context &context = queue.get_context();
- size_t count = iterator_range_size(first, last);
- if(count < size_t(1)) {
- return future<OutputIterator>();
- }
-
- buffer mapped_host(
- context,
- count * sizeof(output_type),
- buffer::write_only | buffer::use_host_ptr,
- static_cast<void*>(
- ::boost::addressof(*result)
- )
- );
-
- ::boost::compute::future<buffer_iterator<output_type> > future =
- copy_on_device_async(
- first,
- last,
- make_buffer_iterator<output_type>(mapped_host),
- queue,
- events
- );
-
- event map_event;
- void* ptr = queue.enqueue_map_buffer_async(
- mapped_host,
- CL_MAP_READ,
- 0,
- count * sizeof(output_type),
- map_event,
- future.get_event()
- );
- event unmap_event =
- queue.enqueue_unmap_buffer(mapped_host, ptr, map_event);
- return make_future(result + count, unmap_event);
- }
- template<class InputIterator, class OutputIterator>
- inline OutputIterator
- dispatch_copy(InputIterator first,
- InputIterator last,
- OutputIterator result,
- command_queue &queue,
- const wait_list &events,
- typename boost::enable_if<
- mpl::and_<
- is_device_iterator<InputIterator>,
- mpl::not_<
- is_device_iterator<OutputIterator>
- >,
- is_same_value_type<OutputIterator, InputIterator>,
- is_contiguous_iterator<OutputIterator>,
- mpl::not_<
- is_bool_value_type<OutputIterator>
- >
- >
- >::type* = 0)
- {
- return copy_to_host(first, last, result, queue, events);
- }
- template<class InputIterator, class OutputIterator>
- inline OutputIterator
- dispatch_copy(InputIterator first,
- InputIterator last,
- OutputIterator result,
- command_queue &queue,
- const wait_list &events,
- typename boost::enable_if<
- mpl::and_<
- is_device_iterator<InputIterator>,
- mpl::not_<
- is_device_iterator<OutputIterator>
- >,
- mpl::or_<
- mpl::not_<
- is_contiguous_iterator<OutputIterator>
- >,
- is_bool_value_type<OutputIterator>
- >
- >
- >::type* = 0)
- {
- typedef typename std::iterator_traits<OutputIterator>::value_type output_type;
- typedef typename InputIterator::value_type input_type;
- const device &device = queue.get_device();
-
- std::string cache_key =
- std::string("__boost_compute_copy_to_host_")
- + type_name<input_type>() + "_" + type_name<output_type>();
- boost::shared_ptr<parameter_cache> parameters =
- detail::parameter_cache::get_global_cache(device);
- uint_ map_copy_threshold;
- uint_ direct_copy_threshold;
-
- if (device.type() & device::gpu) {
-
- map_copy_threshold = 33554432;
- direct_copy_threshold = 0;
- }
- else {
-
- map_copy_threshold = 134217728;
- direct_copy_threshold = 0;
- }
-
- map_copy_threshold =
- parameters->get(
- cache_key, "map_copy_threshold", map_copy_threshold
- );
- direct_copy_threshold =
- parameters->get(
- cache_key, "direct_copy_threshold", direct_copy_threshold
- );
-
- size_t count = iterator_range_size(first, last);
- size_t input_size_bytes = count * sizeof(input_type);
-
-
-
-
- if(input_size_bytes < map_copy_threshold
- || direct_copy_threshold <= map_copy_threshold) {
- return copy_to_host_map(first, last, result, queue, events);
- }
- // [map_copy_threshold; inf) -> copy [first;last) to temporary vector
-
- std::vector<input_type> vector(count);
- copy_to_host(first, last, vector.begin(), queue, events);
- return std::copy(vector.begin(), vector.end(), result);
- }
- template<class InputIterator, class OutputIterator>
- inline OutputIterator
- dispatch_copy(InputIterator first,
- InputIterator last,
- OutputIterator result,
- command_queue &queue,
- const wait_list &events,
- typename boost::enable_if<
- mpl::and_<
- is_device_iterator<InputIterator>,
- mpl::not_<
- is_device_iterator<OutputIterator>
- >,
- mpl::not_<
- is_same_value_type<OutputIterator, InputIterator>
- >,
- is_contiguous_iterator<OutputIterator>,
- mpl::not_<
- is_bool_value_type<OutputIterator>
- >
- >
- >::type* = 0)
- {
- typedef typename std::iterator_traits<OutputIterator>::value_type output_type;
- typedef typename InputIterator::value_type input_type;
- const device &device = queue.get_device();
-
- std::string cache_key =
- std::string("__boost_compute_copy_to_host_")
- + type_name<input_type>() + "_" + type_name<output_type>();
- boost::shared_ptr<parameter_cache> parameters =
- detail::parameter_cache::get_global_cache(device);
- uint_ map_copy_threshold;
- uint_ direct_copy_threshold;
-
- if (device.type() & device::gpu) {
-
- map_copy_threshold = 524288;
- direct_copy_threshold = 52428800;
- }
- else {
-
- map_copy_threshold = 134217728;
- direct_copy_threshold = 0;
- }
-
- map_copy_threshold =
- parameters->get(
- cache_key, "map_copy_threshold", map_copy_threshold
- );
- direct_copy_threshold =
- parameters->get(
- cache_key, "direct_copy_threshold", direct_copy_threshold
- );
-
- size_t count = iterator_range_size(first, last);
- size_t input_size_bytes = count * sizeof(input_type);
-
- if(input_size_bytes < map_copy_threshold) {
- return copy_to_host_map(first, last, result, queue, events);
- }
- // [map_copy_threshold; direct_copy_threshold) -> copy [first;last) to
-
- else if(input_size_bytes < direct_copy_threshold) {
- std::vector<input_type> vector(count);
- copy_to_host(first, last, vector.begin(), queue, events);
- return std::copy(vector.begin(), vector.end(), result);
- }
-
-
-
-
-
-
-
- return dispatch_copy_async(first, last, result, queue, events).get();
- }
- template<class InputIterator, class OutputIterator>
- inline OutputIterator
- dispatch_copy(InputIterator first,
- InputIterator last,
- OutputIterator result,
- command_queue &queue,
- const wait_list &events,
- typename boost::enable_if<
- mpl::and_<
- is_device_iterator<InputIterator>,
- is_device_iterator<OutputIterator>,
- mpl::not_<
- can_copy_with_copy_buffer<
- InputIterator, OutputIterator
- >
- >
- >
- >::type* = 0)
- {
- return copy_on_device(first, last, result, queue, events);
- }
- template<class InputIterator, class OutputIterator>
- inline OutputIterator
- dispatch_copy(InputIterator first,
- InputIterator last,
- OutputIterator result,
- command_queue &queue,
- const wait_list &events,
- typename boost::enable_if<
- mpl::and_<
- is_device_iterator<InputIterator>,
- is_device_iterator<OutputIterator>,
- can_copy_with_copy_buffer<
- InputIterator, OutputIterator
- >
- >
- >::type* = 0)
- {
- typedef typename std::iterator_traits<InputIterator>::value_type value_type;
- typedef typename std::iterator_traits<InputIterator>::difference_type difference_type;
- difference_type n = std::distance(first, last);
- if(n < 1){
- // nothing to copy
- return result;
- }
- queue.enqueue_copy_buffer(first.get_buffer(),
- result.get_buffer(),
- first.get_index() * sizeof(value_type),
- result.get_index() * sizeof(value_type),
- static_cast<size_t>(n) * sizeof(value_type),
- events);
- return result + n;
- }
- template<class InputIterator, class OutputIterator>
- inline future<OutputIterator>
- dispatch_copy_async(InputIterator first,
- InputIterator last,
- OutputIterator result,
- command_queue &queue,
- const wait_list &events,
- typename boost::enable_if<
- mpl::and_<
- is_device_iterator<InputIterator>,
- is_device_iterator<OutputIterator>,
- mpl::not_<
- can_copy_with_copy_buffer<
- InputIterator, OutputIterator
- >
- >
- >
- >::type* = 0)
- {
- return copy_on_device_async(first, last, result, queue, events);
- }
- template<class InputIterator, class OutputIterator>
- inline future<OutputIterator>
- dispatch_copy_async(InputIterator first,
- InputIterator last,
- OutputIterator result,
- command_queue &queue,
- const wait_list &events,
- typename boost::enable_if<
- mpl::and_<
- is_device_iterator<InputIterator>,
- is_device_iterator<OutputIterator>,
- can_copy_with_copy_buffer<
- InputIterator, OutputIterator
- >
- >
- >::type* = 0)
- {
- typedef typename std::iterator_traits<InputIterator>::value_type value_type;
- typedef typename std::iterator_traits<InputIterator>::difference_type difference_type;
- difference_type n = std::distance(first, last);
- if(n < 1){
- // nothing to copy
- return make_future(result, event());
- }
- event event_ =
- queue.enqueue_copy_buffer(
- first.get_buffer(),
- result.get_buffer(),
- first.get_index() * sizeof(value_type),
- result.get_index() * sizeof(value_type),
- static_cast<size_t>(n) * sizeof(value_type),
- events
- );
- return make_future(result + n, event_);
- }
- template<class InputIterator, class OutputIterator>
- inline OutputIterator
- dispatch_copy(InputIterator first,
- InputIterator last,
- OutputIterator result,
- command_queue &queue,
- const wait_list &events,
- typename boost::enable_if_c<
- !is_device_iterator<InputIterator>::value &&
- !is_device_iterator<OutputIterator>::value
- >::type* = 0)
- {
- (void) queue;
- (void) events;
- return std::copy(first, last, result);
- }
- }
- template<class InputIterator, class OutputIterator>
- inline OutputIterator copy(InputIterator first,
- InputIterator last,
- OutputIterator result,
- command_queue &queue = system::default_queue(),
- const wait_list &events = wait_list())
- {
- return detail::dispatch_copy(first, last, result, queue, events);
- }
- template<class InputIterator, class OutputIterator>
- inline future<OutputIterator>
- copy_async(InputIterator first,
- InputIterator last,
- OutputIterator result,
- command_queue &queue = system::default_queue(),
- const wait_list &events = wait_list())
- {
- return detail::dispatch_copy_async(first, last, result, queue, events);
- }
- }
- }
- #endif
|