10 #ifndef vtk_m_Atomic_h
11 #define vtk_m_Atomic_h
97 return std::memory_order_relaxed;
99 return std::memory_order_acquire;
101 return std::memory_order_release;
103 return std::memory_order_acq_rel;
105 return std::memory_order_seq_cst;
109 return std::memory_order_seq_cst;
117 #if defined(VTKM_CUDA_DEVICE_PASS)
144 template <
typename T>
147 volatile T*
const vaddr = addr;
152 const T value = *vaddr;
154 AtomicLoadFence(order);
158 template <
typename T>
161 volatile T* vaddr = addr;
163 AtomicStoreFence(order);
167 template <
typename T>
170 AtomicStoreFence(order);
171 auto result = atomicAdd(addr, arg);
172 AtomicLoadFence(order);
176 template <
typename T>
179 AtomicStoreFence(order);
180 auto result = atomicAnd(addr, mask);
181 AtomicLoadFence(order);
185 template <
typename T>
188 AtomicStoreFence(order);
189 auto result = atomicOr(addr, mask);
190 AtomicLoadFence(order);
194 template <
typename T>
197 AtomicStoreFence(order);
198 auto result = atomicXor(addr, mask);
199 AtomicLoadFence(order);
203 template <
typename T>
206 return AtomicXorImpl(addr,
static_cast<T
>(~T{ 0u }), order);
209 template <
typename T>
215 AtomicStoreFence(order);
216 auto result = atomicCAS(addr, *expected, desired);
217 AtomicLoadFence(order);
218 if (result == *expected)
228 #if __CUDA_ARCH__ < 200
233 AtomicStoreFence(order);
239 old = atomicCAS(
reinterpret_cast<vtkm::UInt32*
>(address),
241 __float_as_int(__int_as_float(assumed) + value));
242 }
while (assumed != old);
243 AtomicLoadFence(order);
244 return __int_as_float(old);
247 #if __CUDA_ARCH__ < 600
252 AtomicStoreFence(order);
258 old = atomicCAS(
reinterpret_cast<vtkm::UInt64*
>(address),
260 __double_as_longlong(__longlong_as_double(assumed) + value));
261 }
while (assumed != old);
262 AtomicLoadFence(order);
263 return __longlong_as_double(old);
269 #elif defined(VTKM_ENABLE_KOKKOS)
279 #ifndef KOKKOS_MACROS_HPP
280 #define KOKKOS_MACROS_HPP
281 #include <KokkosCore_config.h>
282 #undef KOKKOS_MACROS_HPP
283 #define KOKKOS_DONT_INCLUDE_CORE_CONFIG_H
285 #if defined(KOKKOS_ENABLE_CUDA) && !defined(VTKM_CUDA)
286 #undef KOKKOS_ENABLE_CUDA
289 #if KOKKOS_VERSION >= 30401
290 #define KOKKOS_CUDA_SETUP_HPP_
294 #if defined(KOKKOS_ENABLE_HIP) && !defined(VTKM_HIP)
295 #undef KOKKOS_ENABLE_HIP
298 #endif //KOKKOS_MACROS_HPP not loaded
300 #include <Kokkos_Atomic.hpp>
314 Kokkos::memory_fence();
324 Kokkos::memory_fence();
327 #ifdef KOKKOS_INTERNAL_NOT_PARALLEL
328 #define VTKM_DESUL_MEM_SCOPE desul::MemoryScopeCaller()
330 #define VTKM_DESUL_MEM_SCOPE desul::MemoryScopeDevice()
333 template <
typename T>
339 return desul::atomic_load(addr, desul::MemoryOrderRelaxed(), VTKM_DESUL_MEM_SCOPE);
343 return desul::atomic_load(addr, desul::MemoryOrderAcquire(), VTKM_DESUL_MEM_SCOPE);
345 return desul::atomic_load(addr, desul::MemoryOrderSeqCst(), VTKM_DESUL_MEM_SCOPE);
349 return desul::atomic_load(addr, desul::MemoryOrderSeqCst(), VTKM_DESUL_MEM_SCOPE);
352 template <
typename T>
358 desul::atomic_store(addr, value, desul::MemoryOrderRelaxed(), VTKM_DESUL_MEM_SCOPE);
363 desul::atomic_store(addr, value, desul::MemoryOrderRelease(), VTKM_DESUL_MEM_SCOPE);
366 desul::atomic_store(addr, value, desul::MemoryOrderSeqCst(), VTKM_DESUL_MEM_SCOPE);
371 template <
typename T>
374 AtomicStoreFence(order);
375 T result = Kokkos::atomic_fetch_add(addr, arg);
376 AtomicLoadFence(order);
380 template <
typename T>
383 AtomicStoreFence(order);
384 T result = Kokkos::atomic_fetch_and(addr, mask);
385 AtomicLoadFence(order);
389 template <
typename T>
392 AtomicStoreFence(order);
393 T result = Kokkos::atomic_fetch_or(addr, mask);
394 AtomicLoadFence(order);
398 template <
typename T>
401 AtomicStoreFence(order);
402 T result = Kokkos::atomic_fetch_xor(addr, mask);
403 AtomicLoadFence(order);
407 template <
typename T>
410 return AtomicXorImpl(addr,
static_cast<T
>(~T{ 0u }), order);
413 template <
typename T>
419 AtomicStoreFence(order);
420 T oldValue = Kokkos::atomic_compare_exchange(addr, *expected, desired);
421 AtomicLoadFence(order);
422 if (oldValue == *expected)
428 *expected = oldValue;
435 #elif defined(VTKM_MSVC)
448 template <
typename To,
typename From>
455 std::memcpy(&dst, &src,
sizeof(From));
459 template <
typename T>
462 return std::forward<T>(src);
485 auto result = *
static_cast<volatile vtkm::UInt8* const
>(addr);
486 std::atomic_thread_fence(internal::StdAtomicMemOrder(order));
495 auto result = *
static_cast<volatile vtkm::UInt16* const
>(addr);
496 std::atomic_thread_fence(internal::StdAtomicMemOrder(order));
501 auto result = *
static_cast<volatile vtkm::UInt32* const
>(addr);
502 std::atomic_thread_fence(internal::StdAtomicMemOrder(order));
507 auto result = *
static_cast<volatile vtkm::UInt64* const
>(addr);
508 std::atomic_thread_fence(internal::StdAtomicMemOrder(order));
518 _InterlockedExchange8(
reinterpret_cast<volatile CHAR*
>(addr), BitCast<CHAR>(val));
526 _InterlockedExchange16(
reinterpret_cast<volatile SHORT*
>(addr), BitCast<SHORT>(val));
532 std::atomic_thread_fence(internal::StdAtomicMemOrder(order));
539 std::atomic_thread_fence(internal::StdAtomicMemOrder(order));
543 #define VTKM_ATOMIC_OP(vtkmName, winName, vtkmType, winType, suffix) \
544 VTKM_EXEC_CONT inline vtkmType vtkmName(vtkmType* addr, vtkmType arg, vtkm::MemoryOrder order) \
546 return BitCast<vtkmType>( \
547 winName##suffix(reinterpret_cast<volatile winType*>(addr), BitCast<winType>(arg))); \
550 #define VTKM_ATOMIC_OPS_FOR_TYPE(vtkmType, winType, suffix) \
551 VTKM_ATOMIC_OP(AtomicAddImpl, _InterlockedExchangeAdd, vtkmType, winType, suffix) \
552 VTKM_ATOMIC_OP(AtomicAndImpl, _InterlockedAnd, vtkmType, winType, suffix) \
553 VTKM_ATOMIC_OP(AtomicOrImpl, _InterlockedOr, vtkmType, winType, suffix) \
554 VTKM_ATOMIC_OP(AtomicXorImpl, _InterlockedXor, vtkmType, winType, suffix) \
555 VTKM_EXEC_CONT inline vtkmType AtomicNotImpl(vtkmType* addr, vtkm::MemoryOrder order) \
557 return AtomicXorImpl(addr, static_cast<vtkmType>(~vtkmType{ 0u }), order); \
559 VTKM_EXEC_CONT inline bool AtomicCompareExchangeImpl( \
560 vtkmType* addr, vtkmType* expected, vtkmType desired, vtkm::MemoryOrder vtkmNotUsed(order)) \
562 vtkmType result = BitCast<vtkmType>( \
563 _InterlockedCompareExchange##suffix(reinterpret_cast<volatile winType*>(addr), \
564 BitCast<winType>(desired), \
565 BitCast<winType>(*expected))); \
566 if (result == *expected) \
572 *expected = result; \
578 VTKM_ATOMIC_OPS_FOR_TYPE(
vtkm::
UInt16, SHORT, 16)
579 VTKM_ATOMIC_OPS_FOR_TYPE(
vtkm::
UInt32, LONG, )
580 VTKM_ATOMIC_OPS_FOR_TYPE(
vtkm::
UInt64, LONG64, 64)
582 #undef VTKM_ATOMIC_OPS_FOR_TYPE
589 LONG old = BitCast<LONG>(*address);
593 old = _InterlockedCompareExchange(
reinterpret_cast<volatile LONG*
>(address),
594 BitCast<LONG>(BitCast<vtkm::Float32>(assumed) + value),
596 }
while (assumed != old);
597 return BitCast<vtkm::Float32>(old);
605 LONG64 old = BitCast<LONG64>(*address);
609 old = _InterlockedCompareExchange64(
reinterpret_cast<volatile LONG64*
>(address),
610 BitCast<LONG64>(BitCast<vtkm::Float64>(assumed) + value),
612 }
while (assumed != old);
613 return BitCast<vtkm::Float64>(old);
619 #else // gcc/clang for CPU
636 return __ATOMIC_RELAXED;
638 return __ATOMIC_ACQUIRE;
640 return __ATOMIC_RELEASE;
642 return __ATOMIC_ACQ_REL;
644 return __ATOMIC_SEQ_CST;
648 return __ATOMIC_SEQ_CST;
651 template <
typename T>
654 return __atomic_load_n(addr, GccAtomicMemOrder(order));
657 template <
typename T>
660 return __atomic_store_n(addr, value, GccAtomicMemOrder(order));
663 template <
typename T>
666 return __atomic_fetch_add(addr, arg, GccAtomicMemOrder(order));
669 #include <vtkmstd/bit_cast.h>
676 vtkm::UInt32 expected = vtkmstd::bit_cast<vtkm::UInt32>(*addr);
681 desired = vtkmstd::bit_cast<vtkm::UInt32>(vtkmstd::bit_cast<vtkm::Float32>(expected) + arg);
683 !__atomic_compare_exchange_n(
reinterpret_cast<vtkm::UInt32*
>(addr),
687 GccAtomicMemOrder(order),
688 GccAtomicMemOrder(order)));
690 return vtkmstd::bit_cast<vtkm::Float32>(expected);
698 vtkm::UInt64 expected = vtkmstd::bit_cast<vtkm::UInt64>(*addr);
703 desired = vtkmstd::bit_cast<vtkm::UInt64>(vtkmstd::bit_cast<vtkm::Float64>(expected) + arg);
705 !__atomic_compare_exchange_n(
reinterpret_cast<vtkm::UInt64*
>(addr),
709 GccAtomicMemOrder(order),
710 GccAtomicMemOrder(order)));
712 return vtkmstd::bit_cast<vtkm::Float64>(expected);
715 template <
typename T>
718 return __atomic_fetch_and(addr, mask, GccAtomicMemOrder(order));
721 template <
typename T>
724 return __atomic_fetch_or(addr, mask, GccAtomicMemOrder(order));
727 template <
typename T>
730 return __atomic_fetch_xor(addr, mask, GccAtomicMemOrder(order));
733 template <
typename T>
736 return AtomicXorImpl(addr,
static_cast<T
>(~T{ 0u }), order);
739 template <
typename T>
745 return __atomic_compare_exchange_n(
746 addr, expected, desired,
false, GccAtomicMemOrder(order), GccAtomicMemOrder(order));
759 template <
typename T>
760 using OppositeSign =
typename std::conditional<std::is_signed<T>::value,
761 typename std::make_unsigned<T>::type,
762 typename std::make_signed<T>::type>::type;
785 template <
typename T>
789 return detail::AtomicLoadImpl(pointer, order);
799 template <
typename T>
804 detail::AtomicStoreImpl(pointer, value, order);
806 template <
typename T>
808 detail::OppositeSign<T> value,
811 detail::AtomicStoreImpl(pointer,
static_cast<T
>(value), order);
828 template <
typename T>
834 return detail::AtomicAddImpl(pointer, operand, order);
836 template <typename T, typename std::enable_if<std::is_integral<T>::value>::type* =
nullptr>
839 detail::OppositeSign<T> operand,
842 return detail::AtomicAddImpl(pointer,
static_cast<T
>(operand), order);
859 template <
typename T>
865 return detail::AtomicAndImpl(pointer, operand, order);
867 template <
typename T>
870 detail::OppositeSign<T> operand,
873 return detail::AtomicAndImpl(pointer,
static_cast<T
>(operand), order);
890 template <
typename T>
894 return detail::AtomicOrImpl(pointer, operand, order);
896 template <
typename T>
899 detail::OppositeSign<T> operand,
902 return detail::AtomicOrImpl(pointer,
static_cast<T
>(operand), order);
918 template <
typename T>
924 return detail::AtomicXorImpl(pointer, operand, order);
926 template <
typename T>
929 detail::OppositeSign<T> operand,
932 return detail::AtomicXorImpl(pointer,
static_cast<T
>(operand), order);
945 template <
typename T>
950 return detail::AtomicNotImpl(pointer, order);
975 template <
typename T>
982 return detail::AtomicCompareExchangeImpl(shared, expected, desired, order);
987 #endif //vtk_m_Atomic_h