10 #ifndef vtk_m_exec_cuda_internal_ThrustPatches_h
11 #define vtk_m_exec_cuda_internal_ThrustPatches_h
15 #ifdef VTKM_ENABLE_CUDA
18 #include <thrust/version.h>
20 #if THRUST_VERSION >= 100900 && THRUST_VERSION < 100906
29 template <
typename T,
typename U>
30 T aligned_reinterpret_cast(U u);
32 #define ALIGN_RE_T(RT) \
34 inline __host__ __device__ RT* aligned_reinterpret_cast(void* u) \
36 return reinterpret_cast<RT*>(reinterpret_cast<void*>(u)); \
39 inline __host__ __device__ RT* aligned_reinterpret_cast(vtkm::UInt8* u) \
41 return reinterpret_cast<RT*>(reinterpret_cast<void*>(u)); \
43 struct SwallowSemicolon
45 #define ALIGN_RE_VEC(RT) \
47 inline __host__ __device__ vtkm::Vec<RT, 2>* aligned_reinterpret_cast(void* u) \
49 return reinterpret_cast<vtkm::Vec<RT, 2>*>(reinterpret_cast<void*>(u)); \
52 inline __host__ __device__ vtkm::Vec<RT, 3>* aligned_reinterpret_cast(void* u) \
54 return reinterpret_cast<vtkm::Vec<RT, 3>*>(reinterpret_cast<void*>(u)); \
57 inline __host__ __device__ vtkm::Vec<RT, 4>* aligned_reinterpret_cast(void* u) \
59 return reinterpret_cast<vtkm::Vec<RT, 4>*>(reinterpret_cast<void*>(u)); \
62 inline __host__ __device__ vtkm::Vec<vtkm::Vec<RT, 3>, 2>* aligned_reinterpret_cast(void* u) \
64 return reinterpret_cast<vtkm::Vec<vtkm::Vec<RT, 3>, 2>*>(reinterpret_cast<void*>(u)); \
67 inline __host__ __device__ vtkm::Vec<vtkm::Vec<RT, 9>, 2>* aligned_reinterpret_cast(void* u) \
69 return reinterpret_cast<vtkm::Vec<vtkm::Vec<RT, 9>, 2>*>(reinterpret_cast<void*>(u)); \
72 inline __host__ __device__ vtkm::Vec<RT, 2>* aligned_reinterpret_cast(vtkm::UInt8* u) \
74 return reinterpret_cast<vtkm::Vec<RT, 2>*>(reinterpret_cast<void*>(u)); \
77 inline __host__ __device__ vtkm::Vec<RT, 3>* aligned_reinterpret_cast(vtkm::UInt8* u) \
79 return reinterpret_cast<vtkm::Vec<RT, 3>*>(reinterpret_cast<void*>(u)); \
82 inline __host__ __device__ vtkm::Vec<RT, 4>* aligned_reinterpret_cast(vtkm::UInt8* u) \
84 return reinterpret_cast<vtkm::Vec<RT, 4>*>(reinterpret_cast<void*>(u)); \
87 inline __host__ __device__ vtkm::Vec<vtkm::Vec<RT, 2>, 2>* aligned_reinterpret_cast( \
90 return reinterpret_cast<vtkm::Vec<vtkm::Vec<RT, 2>, 2>*>(reinterpret_cast<void*>(u)); \
93 inline __host__ __device__ vtkm::Vec<vtkm::Vec<RT, 3>, 2>* aligned_reinterpret_cast( \
96 return reinterpret_cast<vtkm::Vec<vtkm::Vec<RT, 3>, 2>*>(reinterpret_cast<void*>(u)); \
99 inline __host__ __device__ vtkm::Vec<vtkm::Vec<RT, 4>, 2>* aligned_reinterpret_cast( \
102 return reinterpret_cast<vtkm::Vec<vtkm::Vec<RT, 4>, 2>*>(reinterpret_cast<void*>(u)); \
105 inline __host__ __device__ vtkm::Vec<vtkm::Vec<RT, 9>, 2>* aligned_reinterpret_cast( \
108 return reinterpret_cast<vtkm::Vec<vtkm::Vec<RT, 9>, 2>*>(reinterpret_cast<void*>(u)); \
110 struct SwallowSemicolon
112 #define ALIGN_RE_PAIR(T, U) \
114 inline __host__ __device__ vtkm::Pair<T, U>* aligned_reinterpret_cast(void* u) \
116 return reinterpret_cast<vtkm::Pair<T, U>*>(reinterpret_cast<void*>(u)); \
119 inline __host__ __device__ vtkm::Pair<T, U>* aligned_reinterpret_cast(vtkm::UInt8* u) \
121 return reinterpret_cast<vtkm::Pair<T, U>*>(reinterpret_cast<void*>(u)); \
123 struct SwallowSemicolon
125 #ifndef VTKM_DONT_FIX_THRUST
136 ALIGN_RE_T(
unsigned long);
137 ALIGN_RE_T(vtkm::Int64);
138 ALIGN_RE_T(vtkm::UInt64);
152 ALIGN_RE_VEC(
unsigned long);
153 ALIGN_RE_VEC(vtkm::Int64);
154 ALIGN_RE_VEC(vtkm::UInt64);
164 ALIGN_RE_PAIR(vtkm::Int64, vtkm::Int64);
173 #endif //THRUST_VERSION >= 100900
175 #if THRUST_VERSION >= 100904
181 VTKM_THIRDPARTY_PRE_INCLUDE
182 #include <thrust/mr/allocator.h>
183 #include <thrust/system/cuda/memory_resource.h>
184 VTKM_THIRDPARTY_POST_INCLUDE
190 template <
typename T>
191 class stateless_resource_allocator<T, ::thrust::system::cuda::memory_resource>
192 :
public thrust::mr::allocator<T, ::thrust::system::cuda::memory_resource>
194 typedef ::thrust::system::cuda::memory_resource Upstream;
195 typedef thrust::mr::allocator<T, Upstream> base;
202 template <
typename U>
207 typedef stateless_resource_allocator<U, Upstream> other;
213 __thrust_exec_check_disable__
215 stateless_resource_allocator()
216 : base(get_global_resource<Upstream>())
221 __host__ __device__ stateless_resource_allocator(
const stateless_resource_allocator& other)
227 template <
typename U>
229 stateless_resource_allocator(
const stateless_resource_allocator<U, Upstream>& other)
235 __host__ __device__ ~stateless_resource_allocator() {}
239 #endif //THRUST_VERSION >= 100903
242 #if THRUST_VERSION < 100900
259 template <
typename T,
typename F>
260 class WrappedBinaryOperator;
278 namespace accumulate_detail
280 template <
typename ConcurrentGroup,
281 typename RandomAccessIterator,
286 destructive_accumulate_n(ConcurrentGroup& g,
287 RandomAccessIterator first,
290 vtkm::exec::cuda::internal::WrappedBinaryOperator<T, F> binary_op)
292 using size_type =
typename ConcurrentGroup::size_type;
294 size_type tid = g.this_exec.index();
304 for (size_type offset = 1; offset < g.size(); offset += offset)
306 if (tid >= offset && tid - offset < n)
308 x = binary_op(first[tid - offset], x);
321 T result = binary_op(init, first[n - 1]);
334 #endif //THRUST_VERSION < 100900
336 #endif //CUDA enabled
338 #endif //vtk_m_exec_cuda_internal_ThrustPatches_h