VTK-m  2.0
ArrayPortalBasicCuda.h
Go to the documentation of this file.
1 //============================================================================
2 // Copyright (c) Kitware, Inc.
3 // All rights reserved.
4 // See LICENSE.txt for details.
5 //
6 // This software is distributed WITHOUT ANY WARRANTY; without even
7 // the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
8 // PURPOSE. See the above copyright notice for more information.
9 //============================================================================
10 #ifndef vtk_m_exec_cuda_internal_ArrayPortalBasicCuda_h
11 #define vtk_m_exec_cuda_internal_ArrayPortalBasicCuda_h
12 
13 // This file provides specializations of ArrayPortalBasic that use texture loading
14 // intrinsics to load data from arrays faster in read-only arrays. These intrinsics
15 // are only available with compute capabilities >= 3.5, so only compile this code if
16 // we are compiling for that.
17 #if __CUDA_ARCH__ >= 350
18 
19 #include <vtkm/Types.h>
20 
21 namespace vtkm
22 {
23 namespace internal
24 {
25 namespace detail
26 {
27 
28 // Forward declaration (declared in vtkm/internal/ArrayPortalBasic.h)
29 template <typename T>
30 VTKM_EXEC_CONT static inline T ArrayPortalBasicReadGet(const T* const data);
31 
32 // Use the __ldg intrinsic to load read-only arrays through texture reads.
33 // Currently CUDA doesn't support texture loading of signed char's so that is why
34 // You don't see vtkm::Int8 in any of the lists.
35 
36 VTKM_EXEC_CONT static inline vtkm::UInt8 ArrayPortalBasicReadGet(const vtkm::UInt8* const data)
37 {
38  return __ldg(data);
39 }
40 VTKM_EXEC_CONT static inline vtkm::Int16 ArrayPortalBasicReadGet(const vtkm::Int16* const data)
41 {
42  return __ldg(data);
43 }
44 VTKM_EXEC_CONT static inline vtkm::UInt16 ArrayPortalBasicReadGet(const vtkm::UInt16* const data)
45 {
46  return __ldg(data);
47 }
48 VTKM_EXEC_CONT static inline vtkm::Int32 ArrayPortalBasicReadGet(const vtkm::Int32* const data)
49 {
50  return __ldg(data);
51 }
52 VTKM_EXEC_CONT static inline vtkm::UInt32 ArrayPortalBasicReadGet(const vtkm::UInt32* const data)
53 {
54  return __ldg(data);
55 }
56 VTKM_EXEC_CONT static inline vtkm::Float32 ArrayPortalBasicReadGet(const vtkm::Float32* const data)
57 {
58  return __ldg(data);
59 }
60 VTKM_EXEC_CONT static inline vtkm::Float64 ArrayPortalBasicReadGet(const vtkm::Float64* const data)
61 {
62  return __ldg(data);
63 }
64 
65 // CUDA can do some vector texture loads, but only for its own types, so we have to convert
66 // to the CUDA type first.
67 
68 VTKM_EXEC_CONT static inline vtkm::Vec2i_32 ArrayPortalBasicReadGet(
69  const vtkm::Vec2i_32* const data)
70 {
71  const int2 temp = __ldg(reinterpret_cast<const int2*>(data));
72  return vtkm::Vec2i_32(temp.x, temp.y);
73 }
74 VTKM_EXEC_CONT static inline vtkm::Vec2ui_32 ArrayPortalBasicReadGet(
75  const vtkm::Vec2ui_32* const data)
76 {
77  const uint2 temp = __ldg(reinterpret_cast<const uint2*>(data));
78  return vtkm::Vec2ui_32(temp.x, temp.y);
79 }
80 VTKM_EXEC_CONT static inline vtkm::Vec2f_32 ArrayPortalBasicReadGet(
81  const vtkm::Vec2f_32* const data)
82 {
83  const float2 temp = __ldg(reinterpret_cast<const float2*>(data));
84  return vtkm::Vec2f_32(temp.x, temp.y);
85 }
86 VTKM_EXEC_CONT static inline vtkm::Vec2f_64 ArrayPortalBasicReadGet(
87  const vtkm::Vec2f_64* const data)
88 {
89  const double2 temp = __ldg(reinterpret_cast<const double2*>(data));
90  return vtkm::Vec2f_64(temp.x, temp.y);
91 }
92 
93 VTKM_EXEC_CONT static inline vtkm::Vec4i_32 ArrayPortalBasicReadGet(
94  const vtkm::Vec4i_32* const data)
95 {
96  const int4 temp = __ldg(reinterpret_cast<const int4*>(data));
97  return vtkm::Vec4i_32(temp.x, temp.y, temp.z, temp.w);
98 }
99 VTKM_EXEC_CONT static inline vtkm::Vec4ui_32 ArrayPortalBasicReadGet(
100  const vtkm::Vec4ui_32* const data)
101 {
102  const uint4 temp = __ldg(reinterpret_cast<const uint4*>(data));
103  return vtkm::Vec4ui_32(temp.x, temp.y, temp.z, temp.w);
104 }
105 VTKM_EXEC_CONT static inline vtkm::Vec4f_32 ArrayPortalBasicReadGet(
106  const vtkm::Vec4f_32* const data)
107 {
108  const float4 temp = __ldg(reinterpret_cast<const float4*>(data));
109  return vtkm::Vec4f_32(temp.x, temp.y, temp.z, temp.w);
110 }
111 
112 // CUDA does not support loading many of the vector types we use including 3-wide vectors.
113 // Support these using multiple scalar loads.
114 
115 template <typename T, vtkm::IdComponent N>
116 VTKM_EXEC_CONT static inline vtkm::Vec<T, N> ArrayPortalBasicReadGet(
117  const vtkm::Vec<T, N>* const data)
118 {
119  const T* recastedData = reinterpret_cast<const T*>(data);
120  vtkm::Vec<T, N> result;
121 #pragma unroll
122  for (vtkm::IdComponent i = 0; i < N; ++i)
123  {
124  result[i] = ArrayPortalBasicReadGet(recastedData + i);
125  }
126  return result;
127 }
128 }
129 }
130 } // namespace vtkm::internal::detail
131 
132 #endif // __CUDA_ARCH__ >= 350
133 
134 #endif //vtk_m_exec_cuda_internal_ArrayPortalBasicCuda_h
vtkm
Groups connected points that have the same field value.
Definition: Atomic.h:19
vtkm::Vec4i_32
vtkm::Vec< vtkm::Int32, 4 > Vec4i_32
Vec4i_32 corresponds to a 4-dimensional vector of 32-bit integer values.
Definition: Types.h:1166
Types.h
VTKM_EXEC_CONT
#define VTKM_EXEC_CONT
Definition: ExportMacros.h:52
vtkm::IdComponent
vtkm::Int32 IdComponent
Represents a component ID (index of component in a vector).
Definition: Types.h:168
vtkm::Vec2ui_32
vtkm::Vec< vtkm::UInt32, 2 > Vec2ui_32
Vec2ui_32 corresponds to a 2-dimensional vector of 32-bit unsigned integer values.
Definition: Types.h:966
vtkm::Int16
int16_t Int16
Definition: Types.h:158
vtkm::Vec4f_32
vtkm::Vec< vtkm::Float32, 4 > Vec4f_32
Vec4f_32 corresponds to a 4-dimensional vector of 32-bit floating point values.
Definition: Types.h:1136
vtkm::Vec2i_32
vtkm::Vec< vtkm::Int32, 2 > Vec2i_32
Vec2i_32 corresponds to a 2-dimensional vector of 32-bit integer values.
Definition: Types.h:932
vtkm::Vec2f_32
vtkm::Vec< vtkm::Float32, 2 > Vec2f_32
Vec2f_32 corresponds to a 2-dimensional vector of 32-bit floating point values.
Definition: Types.h:902
vtkm::Vec2f_64
vtkm::Vec< vtkm::Float64, 2 > Vec2f_64
Vec2f_64 corresponds to a 2-dimensional vector of 64-bit floating point values.
Definition: Types.h:908
vtkm::UInt8
uint8_t UInt8
Definition: Types.h:157
vtkm::Vec
A short fixed-length array.
Definition: Types.h:767
vtkm::UInt32
uint32_t UInt32
Definition: Types.h:161
vtkm::Float32
float Float32
Definition: Types.h:154
vtkm::Int32
int32_t Int32
Definition: Types.h:160
vtkm::Float64
double Float64
Definition: Types.h:155
vtkm::UInt16
uint16_t UInt16
Definition: Types.h:159
vtkm::Vec4ui_32
vtkm::Vec< vtkm::UInt32, 4 > Vec4ui_32
Vec4ui_32 corresponds to a 4-dimensional vector of 32-bit unsigned integer values.
Definition: Types.h:1200