10 #ifndef vtk_m_Atomic_h
11 #define vtk_m_Atomic_h
97 return std::memory_order_relaxed;
99 return std::memory_order_acquire;
101 return std::memory_order_release;
103 return std::memory_order_acq_rel;
105 return std::memory_order_seq_cst;
109 return std::memory_order_seq_cst;
117 #if defined(VTKM_CUDA_DEVICE_PASS)
144 template <
typename T>
147 volatile T*
const vaddr = addr;
152 const T value = *vaddr;
154 AtomicLoadFence(order);
158 template <
typename T>
161 volatile T* vaddr = addr;
163 AtomicStoreFence(order);
167 template <
typename T>
170 AtomicStoreFence(order);
171 auto result = atomicAdd(addr, arg);
172 AtomicLoadFence(order);
176 template <
typename T>
179 AtomicStoreFence(order);
180 auto result = atomicAnd(addr, mask);
181 AtomicLoadFence(order);
185 template <
typename T>
188 AtomicStoreFence(order);
189 auto result = atomicOr(addr, mask);
190 AtomicLoadFence(order);
194 template <
typename T>
197 AtomicStoreFence(order);
198 auto result = atomicXor(addr, mask);
199 AtomicLoadFence(order);
203 template <
typename T>
206 return AtomicXorImpl(addr,
static_cast<T
>(~T{ 0u }), order);
209 template <
typename T>
215 AtomicStoreFence(order);
216 auto result = atomicCAS(addr, *expected, desired);
217 AtomicLoadFence(order);
218 if (result == *expected)
228 #if __CUDA_ARCH__ < 200
233 AtomicStoreFence(order);
239 old = atomicCAS(
reinterpret_cast<vtkm::UInt32*
>(address),
241 __float_as_int(__int_as_float(assumed) + value));
242 }
while (assumed != old);
243 AtomicLoadFence(order);
244 return __int_as_float(old);
247 #if __CUDA_ARCH__ < 600
252 AtomicStoreFence(order);
253 vtkm::UInt64 assumed;
254 vtkm::UInt64 old = __double_as_longlong(*address);
258 old = atomicCAS(
reinterpret_cast<vtkm::UInt64*
>(address),
260 __double_as_longlong(__longlong_as_double(assumed) + value));
261 }
while (assumed != old);
262 AtomicLoadFence(order);
263 return __longlong_as_double(old);
269 #elif defined(VTKM_ENABLE_KOKKOS)
271 VTKM_THIRDPARTY_PRE_INCLUDE
279 #ifndef KOKKOS_MACROS_HPP
280 #define KOKKOS_MACROS_HPP
281 #include <KokkosCore_config.h>
282 #undef KOKKOS_MACROS_HPP
283 #define KOKKOS_DONT_INCLUDE_CORE_CONFIG_H
285 #if defined(KOKKOS_ENABLE_CUDA) && !defined(VTKM_CUDA)
286 #undef KOKKOS_ENABLE_CUDA
289 #if KOKKOS_VERSION >= 30401
290 #define KOKKOS_CUDA_SETUP_HPP_
294 #if defined(KOKKOS_ENABLE_HIP) && !defined(VTKM_HIP)
295 #undef KOKKOS_ENABLE_HIP
298 #endif //KOKKOS_MACROS_HPP not loaded
300 #include <Kokkos_Atomic.hpp>
301 VTKM_THIRDPARTY_POST_INCLUDE
314 Kokkos::memory_fence();
324 Kokkos::memory_fence();
328 template <
typename T>
334 return Kokkos::Impl::atomic_load(addr, Kokkos::Impl::memory_order_relaxed);
338 return Kokkos::Impl::atomic_load(addr, Kokkos::Impl::memory_order_acquire);
340 return Kokkos::Impl::atomic_load(addr, Kokkos::Impl::memory_order_seq_cst);
344 return Kokkos::Impl::atomic_load(addr, Kokkos::Impl::memory_order_seq_cst);
347 template <
typename T>
353 Kokkos::Impl::atomic_store(addr, value, Kokkos::Impl::memory_order_relaxed);
358 Kokkos::Impl::atomic_store(addr, value, Kokkos::Impl::memory_order_release);
361 Kokkos::Impl::atomic_store(addr, value, Kokkos::Impl::memory_order_seq_cst);
366 template <
typename T>
369 AtomicStoreFence(order);
370 T result = Kokkos::atomic_fetch_add(addr, arg);
371 AtomicLoadFence(order);
375 template <
typename T>
378 AtomicStoreFence(order);
379 T result = Kokkos::atomic_fetch_and(addr, mask);
380 AtomicLoadFence(order);
384 template <
typename T>
387 AtomicStoreFence(order);
388 T result = Kokkos::atomic_fetch_or(addr, mask);
389 AtomicLoadFence(order);
393 template <
typename T>
396 AtomicStoreFence(order);
397 T result = Kokkos::atomic_fetch_xor(addr, mask);
398 AtomicLoadFence(order);
402 template <
typename T>
405 return AtomicXorImpl(addr,
static_cast<T
>(~T{ 0u }), order);
408 template <
typename T>
414 AtomicStoreFence(order);
415 T oldValue = Kokkos::atomic_compare_exchange(addr, *expected, desired);
416 AtomicLoadFence(order);
417 if (oldValue == *expected)
423 *expected = oldValue;
430 #elif defined(VTKM_MSVC)
443 template <
typename To,
typename From>
450 std::memcpy(&dst, &src,
sizeof(From));
454 template <
typename T>
457 return std::forward<T>(src);
480 auto result = *
static_cast<volatile vtkm::UInt8* const
>(addr);
481 std::atomic_thread_fence(internal::StdAtomicMemOrder(order));
490 auto result = *
static_cast<volatile vtkm::UInt16* const
>(addr);
491 std::atomic_thread_fence(internal::StdAtomicMemOrder(order));
496 auto result = *
static_cast<volatile vtkm::UInt32* const
>(addr);
497 std::atomic_thread_fence(internal::StdAtomicMemOrder(order));
502 auto result = *
static_cast<volatile vtkm::UInt64* const
>(addr);
503 std::atomic_thread_fence(internal::StdAtomicMemOrder(order));
513 _InterlockedExchange8(
reinterpret_cast<volatile CHAR*
>(addr), BitCast<CHAR>(val));
521 _InterlockedExchange16(
reinterpret_cast<volatile SHORT*
>(addr), BitCast<SHORT>(val));
527 std::atomic_thread_fence(internal::StdAtomicMemOrder(order));
534 std::atomic_thread_fence(internal::StdAtomicMemOrder(order));
538 #define VTKM_ATOMIC_OP(vtkmName, winName, vtkmType, winType, suffix) \
539 VTKM_EXEC_CONT inline vtkmType vtkmName(vtkmType* addr, vtkmType arg, vtkm::MemoryOrder order) \
541 return BitCast<vtkmType>( \
542 winName##suffix(reinterpret_cast<volatile winType*>(addr), BitCast<winType>(arg))); \
545 #define VTKM_ATOMIC_OPS_FOR_TYPE(vtkmType, winType, suffix) \
546 VTKM_ATOMIC_OP(AtomicAddImpl, _InterlockedExchangeAdd, vtkmType, winType, suffix) \
547 VTKM_ATOMIC_OP(AtomicAndImpl, _InterlockedAnd, vtkmType, winType, suffix) \
548 VTKM_ATOMIC_OP(AtomicOrImpl, _InterlockedOr, vtkmType, winType, suffix) \
549 VTKM_ATOMIC_OP(AtomicXorImpl, _InterlockedXor, vtkmType, winType, suffix) \
550 VTKM_EXEC_CONT inline vtkmType AtomicNotImpl(vtkmType* addr, vtkm::MemoryOrder order) \
552 return AtomicXorImpl(addr, static_cast<vtkmType>(~vtkmType{ 0u }), order); \
554 VTKM_EXEC_CONT inline bool AtomicCompareExchangeImpl( \
555 vtkmType* addr, vtkmType* expected, vtkmType desired, vtkm::MemoryOrder vtkmNotUsed(order)) \
557 vtkmType result = BitCast<vtkmType>( \
558 _InterlockedCompareExchange##suffix(reinterpret_cast<volatile winType*>(addr), \
559 BitCast<winType>(desired), \
560 BitCast<winType>(*expected))); \
561 if (result == *expected) \
567 *expected = result; \
573 VTKM_ATOMIC_OPS_FOR_TYPE(
vtkm::
UInt16, SHORT, 16)
574 VTKM_ATOMIC_OPS_FOR_TYPE(
vtkm::
UInt32, LONG, )
575 VTKM_ATOMIC_OPS_FOR_TYPE(
vtkm::UInt64, LONG64, 64)
577 #undef VTKM_ATOMIC_OPS_FOR_TYPE
584 LONG old = BitCast<LONG>(*address);
588 old = _InterlockedCompareExchange(
reinterpret_cast<volatile LONG*
>(address),
589 BitCast<LONG>(BitCast<vtkm::Float32>(assumed) + value),
591 }
while (assumed != old);
592 return BitCast<vtkm::Float32>(old);
600 LONG64 old = BitCast<LONG64>(*address);
604 old = _InterlockedCompareExchange64(
reinterpret_cast<volatile LONG64*
>(address),
605 BitCast<LONG64>(BitCast<vtkm::Float64>(assumed) + value),
607 }
while (assumed != old);
608 return BitCast<vtkm::Float64>(old);
614 #else // gcc/clang for CPU
631 return __ATOMIC_RELAXED;
633 return __ATOMIC_ACQUIRE;
635 return __ATOMIC_RELEASE;
637 return __ATOMIC_ACQ_REL;
639 return __ATOMIC_SEQ_CST;
643 return __ATOMIC_SEQ_CST;
646 template <
typename T>
649 return __atomic_load_n(addr, GccAtomicMemOrder(order));
652 template <
typename T>
655 return __atomic_store_n(addr, value, GccAtomicMemOrder(order));
658 template <
typename T>
661 return __atomic_fetch_add(addr, arg, GccAtomicMemOrder(order));
664 #include <vtkmstd/bit_cast.h>
671 vtkm::UInt32 expected = vtkmstd::bit_cast<vtkm::UInt32>(*addr);
676 desired = vtkmstd::bit_cast<vtkm::UInt32>(vtkmstd::bit_cast<vtkm::Float32>(expected) + arg);
678 !__atomic_compare_exchange_n(
reinterpret_cast<vtkm::UInt32*
>(addr),
682 GccAtomicMemOrder(order),
683 GccAtomicMemOrder(order)));
685 return vtkmstd::bit_cast<vtkm::Float32>(expected);
693 vtkm::UInt64 expected = vtkmstd::bit_cast<vtkm::UInt64>(*addr);
694 vtkm::UInt64 desired;
698 desired = vtkmstd::bit_cast<vtkm::UInt64>(vtkmstd::bit_cast<vtkm::Float64>(expected) + arg);
700 !__atomic_compare_exchange_n(
reinterpret_cast<vtkm::UInt64*
>(addr),
704 GccAtomicMemOrder(order),
705 GccAtomicMemOrder(order)));
707 return vtkmstd::bit_cast<vtkm::Float64>(expected);
710 template <
typename T>
713 return __atomic_fetch_and(addr, mask, GccAtomicMemOrder(order));
716 template <
typename T>
719 return __atomic_fetch_or(addr, mask, GccAtomicMemOrder(order));
722 template <
typename T>
725 return __atomic_fetch_xor(addr, mask, GccAtomicMemOrder(order));
728 template <
typename T>
731 return AtomicXorImpl(addr,
static_cast<T
>(~T{ 0u }), order);
734 template <
typename T>
740 return __atomic_compare_exchange_n(
741 addr, expected, desired,
false, GccAtomicMemOrder(order), GccAtomicMemOrder(order));
754 template <
typename T>
755 using OppositeSign =
typename std::conditional<std::is_signed<T>::value,
756 typename std::make_unsigned<T>::type,
757 typename std::make_signed<T>::type>::type;
780 template <
typename T>
784 return detail::AtomicLoadImpl(pointer, order);
794 template <
typename T>
799 detail::AtomicStoreImpl(pointer, value, order);
801 template <
typename T>
803 detail::OppositeSign<T> value,
806 detail::AtomicStoreImpl(pointer,
static_cast<T
>(value), order);
823 template <
typename T>
829 return detail::AtomicAddImpl(pointer, operand, order);
831 template <typename T, typename std::enable_if<std::is_integral<T>::value>::type* =
nullptr>
834 detail::OppositeSign<T> operand,
837 return detail::AtomicAddImpl(pointer,
static_cast<T
>(operand), order);
854 template <
typename T>
860 return detail::AtomicAndImpl(pointer, operand, order);
862 template <
typename T>
865 detail::OppositeSign<T> operand,
868 return detail::AtomicAndImpl(pointer,
static_cast<T
>(operand), order);
885 template <
typename T>
889 return detail::AtomicOrImpl(pointer, operand, order);
891 template <
typename T>
894 detail::OppositeSign<T> operand,
897 return detail::AtomicOrImpl(pointer,
static_cast<T
>(operand), order);
913 template <
typename T>
919 return detail::AtomicXorImpl(pointer, operand, order);
921 template <
typename T>
924 detail::OppositeSign<T> operand,
927 return detail::AtomicXorImpl(pointer,
static_cast<T
>(operand), order);
940 template <
typename T>
945 return detail::AtomicNotImpl(pointer, order);
970 template <
typename T>
977 return detail::AtomicCompareExchangeImpl(shared, expected, desired, order);
982 #endif //vtk_m_Atomic_h