10 #ifndef vtk_m_Atomic_h 
   11 #define vtk_m_Atomic_h 
   97       return std::memory_order_relaxed;
 
   99       return std::memory_order_acquire;
 
  101       return std::memory_order_release;
 
  103       return std::memory_order_acq_rel;
 
  105       return std::memory_order_seq_cst;
 
  109   return std::memory_order_seq_cst;
 
  117 #if defined(VTKM_CUDA_DEVICE_PASS) 
  144 template <
typename T>
 
  147   volatile T* 
const vaddr = addr; 
 
  152   const T value = *vaddr;
 
  154   AtomicLoadFence(order);
 
  158 template <
typename T>
 
  161   volatile T* vaddr = addr; 
 
  163   AtomicStoreFence(order);
 
  167 template <
typename T>
 
  170   AtomicStoreFence(order);
 
  171   auto result = atomicAdd(addr, arg);
 
  172   AtomicLoadFence(order);
 
  176 template <
typename T>
 
  179   AtomicStoreFence(order);
 
  180   auto result = atomicAnd(addr, mask);
 
  181   AtomicLoadFence(order);
 
  185 template <
typename T>
 
  188   AtomicStoreFence(order);
 
  189   auto result = atomicOr(addr, mask);
 
  190   AtomicLoadFence(order);
 
  194 template <
typename T>
 
  197   AtomicStoreFence(order);
 
  198   auto result = atomicXor(addr, mask);
 
  199   AtomicLoadFence(order);
 
  203 template <
typename T>
 
  206   return AtomicXorImpl(addr, 
static_cast<T
>(~T{ 0u }), order);
 
  209 template <
typename T>
 
  215   AtomicStoreFence(order);
 
  216   auto result = atomicCAS(addr, *expected, desired);
 
  217   AtomicLoadFence(order);
 
  218   if (result == *expected)
 
  228 #if __CUDA_ARCH__ < 200 
  233   AtomicStoreFence(order);
 
  239     old = atomicCAS(
reinterpret_cast<vtkm::UInt32*
>(address),
 
  241                     __float_as_int(__int_as_float(assumed) + value));
 
  242   } 
while (assumed != old);
 
  243   AtomicLoadFence(order);
 
  244   return __int_as_float(old);
 
  247 #if __CUDA_ARCH__ < 600 
  252   AtomicStoreFence(order);
 
  258     old = atomicCAS(
reinterpret_cast<vtkm::UInt64*
>(address),
 
  260                     __double_as_longlong(__longlong_as_double(assumed) + value));
 
  261   } 
while (assumed != old);
 
  262   AtomicLoadFence(order);
 
  263   return __longlong_as_double(old);
 
  269 #elif defined(VTKM_ENABLE_KOKKOS) 
  279 #ifndef KOKKOS_MACROS_HPP 
  280 #define KOKKOS_MACROS_HPP 
  281 #include <KokkosCore_config.h> 
  282 #undef KOKKOS_MACROS_HPP 
  283 #define KOKKOS_DONT_INCLUDE_CORE_CONFIG_H 
  285 #if defined(KOKKOS_ENABLE_CUDA) && !defined(VTKM_CUDA) 
  286 #undef KOKKOS_ENABLE_CUDA 
  289 #if KOKKOS_VERSION >= 30401 
  290 #define KOKKOS_CUDA_SETUP_HPP_ 
  294 #if defined(KOKKOS_ENABLE_HIP) && !defined(VTKM_HIP) 
  295 #undef KOKKOS_ENABLE_HIP 
  298 #endif //KOKKOS_MACROS_HPP not loaded 
  300 #include <Kokkos_Atomic.hpp> 
  314     Kokkos::memory_fence();
 
  324     Kokkos::memory_fence();
 
  327 #ifdef KOKKOS_INTERNAL_NOT_PARALLEL 
  328 #define VTKM_DESUL_MEM_SCOPE desul::MemoryScopeCaller() 
  330 #define VTKM_DESUL_MEM_SCOPE desul::MemoryScopeDevice() 
  333 template <
typename T>
 
  339       return desul::atomic_load(addr, desul::MemoryOrderRelaxed(), VTKM_DESUL_MEM_SCOPE);
 
  343       return desul::atomic_load(addr, desul::MemoryOrderAcquire(), VTKM_DESUL_MEM_SCOPE);
 
  345       return desul::atomic_load(addr, desul::MemoryOrderSeqCst(), VTKM_DESUL_MEM_SCOPE);
 
  349   return desul::atomic_load(addr, desul::MemoryOrderSeqCst(), VTKM_DESUL_MEM_SCOPE);
 
  352 template <
typename T>
 
  358       desul::atomic_store(addr, value, desul::MemoryOrderRelaxed(), VTKM_DESUL_MEM_SCOPE);
 
  363       desul::atomic_store(addr, value, desul::MemoryOrderRelease(), VTKM_DESUL_MEM_SCOPE);
 
  366       desul::atomic_store(addr, value, desul::MemoryOrderSeqCst(), VTKM_DESUL_MEM_SCOPE);
 
  371 template <
typename T>
 
  374   AtomicStoreFence(order);
 
  375   T result = Kokkos::atomic_fetch_add(addr, arg);
 
  376   AtomicLoadFence(order);
 
  380 template <
typename T>
 
  383   AtomicStoreFence(order);
 
  384   T result = Kokkos::atomic_fetch_and(addr, mask);
 
  385   AtomicLoadFence(order);
 
  389 template <
typename T>
 
  392   AtomicStoreFence(order);
 
  393   T result = Kokkos::atomic_fetch_or(addr, mask);
 
  394   AtomicLoadFence(order);
 
  398 template <
typename T>
 
  401   AtomicStoreFence(order);
 
  402   T result = Kokkos::atomic_fetch_xor(addr, mask);
 
  403   AtomicLoadFence(order);
 
  407 template <
typename T>
 
  410   return AtomicXorImpl(addr, 
static_cast<T
>(~T{ 0u }), order);
 
  413 template <
typename T>
 
  419   AtomicStoreFence(order);
 
  420   T oldValue = Kokkos::atomic_compare_exchange(addr, *expected, desired);
 
  421   AtomicLoadFence(order);
 
  422   if (oldValue == *expected)
 
  428     *expected = oldValue;
 
  435 #elif defined(VTKM_MSVC) 
  448 template <
typename To, 
typename From>
 
  455   std::memcpy(&dst, &src, 
sizeof(From));
 
  459 template <
typename T>
 
  462   return std::forward<T>(src);
 
  485   auto result = *
static_cast<volatile vtkm::UInt8* const
>(addr);
 
  486   std::atomic_thread_fence(internal::StdAtomicMemOrder(order));
 
  495   auto result = *
static_cast<volatile vtkm::UInt16* const
>(addr);
 
  496   std::atomic_thread_fence(internal::StdAtomicMemOrder(order));
 
  501   auto result = *
static_cast<volatile vtkm::UInt32* const
>(addr);
 
  502   std::atomic_thread_fence(internal::StdAtomicMemOrder(order));
 
  507   auto result = *
static_cast<volatile vtkm::UInt64* const
>(addr);
 
  508   std::atomic_thread_fence(internal::StdAtomicMemOrder(order));
 
  518   _InterlockedExchange8(
reinterpret_cast<volatile CHAR*
>(addr), BitCast<CHAR>(val));
 
  526   _InterlockedExchange16(
reinterpret_cast<volatile SHORT*
>(addr), BitCast<SHORT>(val));
 
  532   std::atomic_thread_fence(internal::StdAtomicMemOrder(order));
 
  539   std::atomic_thread_fence(internal::StdAtomicMemOrder(order));
 
  543 #define VTKM_ATOMIC_OP(vtkmName, winName, vtkmType, winType, suffix)                             \ 
  544   VTKM_EXEC_CONT inline vtkmType vtkmName(vtkmType* addr, vtkmType arg, vtkm::MemoryOrder order) \ 
  546     return BitCast<vtkmType>(                                                                    \ 
  547       winName##suffix(reinterpret_cast<volatile winType*>(addr), BitCast<winType>(arg)));        \ 
  550 #define VTKM_ATOMIC_OPS_FOR_TYPE(vtkmType, winType, suffix)                                     \ 
  551   VTKM_ATOMIC_OP(AtomicAddImpl, _InterlockedExchangeAdd, vtkmType, winType, suffix)             \ 
  552   VTKM_ATOMIC_OP(AtomicAndImpl, _InterlockedAnd, vtkmType, winType, suffix)                     \ 
  553   VTKM_ATOMIC_OP(AtomicOrImpl, _InterlockedOr, vtkmType, winType, suffix)                       \ 
  554   VTKM_ATOMIC_OP(AtomicXorImpl, _InterlockedXor, vtkmType, winType, suffix)                     \ 
  555   VTKM_EXEC_CONT inline vtkmType AtomicNotImpl(vtkmType* addr, vtkm::MemoryOrder order)         \ 
  557     return AtomicXorImpl(addr, static_cast<vtkmType>(~vtkmType{ 0u }), order);                  \ 
  559   VTKM_EXEC_CONT inline bool AtomicCompareExchangeImpl(                                         \ 
  560     vtkmType* addr, vtkmType* expected, vtkmType desired, vtkm::MemoryOrder vtkmNotUsed(order)) \ 
  562     vtkmType result = BitCast<vtkmType>(                                                        \ 
  563       _InterlockedCompareExchange##suffix(reinterpret_cast<volatile winType*>(addr),            \ 
  564                                           BitCast<winType>(desired),                            \ 
  565                                           BitCast<winType>(*expected)));                        \ 
  566     if (result == *expected)                                                                    \ 
  572       *expected = result;                                                                       \ 
  578 VTKM_ATOMIC_OPS_FOR_TYPE(
vtkm::
UInt16, SHORT, 16)
 
  579 VTKM_ATOMIC_OPS_FOR_TYPE(
vtkm::
UInt32, LONG, )
 
  580 VTKM_ATOMIC_OPS_FOR_TYPE(
vtkm::
UInt64, LONG64, 64)
 
  582 #undef VTKM_ATOMIC_OPS_FOR_TYPE 
  589   LONG old = BitCast<LONG>(*address);
 
  593     old = _InterlockedCompareExchange(
reinterpret_cast<volatile LONG*
>(address),
 
  594                                       BitCast<LONG>(BitCast<vtkm::Float32>(assumed) + value),
 
  596   } 
while (assumed != old);
 
  597   return BitCast<vtkm::Float32>(old);
 
  605   LONG64 old = BitCast<LONG64>(*address);
 
  609     old = _InterlockedCompareExchange64(
reinterpret_cast<volatile LONG64*
>(address),
 
  610                                         BitCast<LONG64>(BitCast<vtkm::Float64>(assumed) + value),
 
  612   } 
while (assumed != old);
 
  613   return BitCast<vtkm::Float64>(old);
 
  619 #else // gcc/clang for CPU 
  636       return __ATOMIC_RELAXED;
 
  638       return __ATOMIC_ACQUIRE;
 
  640       return __ATOMIC_RELEASE;
 
  642       return __ATOMIC_ACQ_REL;
 
  644       return __ATOMIC_SEQ_CST;
 
  648   return __ATOMIC_SEQ_CST;
 
  651 template <
typename T>
 
  654   return __atomic_load_n(addr, GccAtomicMemOrder(order));
 
  657 template <
typename T>
 
  660   return __atomic_store_n(addr, value, GccAtomicMemOrder(order));
 
  663 template <
typename T>
 
  666   return __atomic_fetch_add(addr, arg, GccAtomicMemOrder(order));
 
  669 #include <vtkmstd/bit_cast.h> 
  676   vtkm::UInt32 expected = vtkmstd::bit_cast<vtkm::UInt32>(*addr);
 
  681     desired = vtkmstd::bit_cast<vtkm::UInt32>(vtkmstd::bit_cast<vtkm::Float32>(expected) + arg);
 
  683     !__atomic_compare_exchange_n(
reinterpret_cast<vtkm::UInt32*
>(addr),
 
  687                                  GccAtomicMemOrder(order),
 
  688                                  GccAtomicMemOrder(order)));
 
  690   return vtkmstd::bit_cast<vtkm::Float32>(expected);
 
  698   vtkm::UInt64 expected = vtkmstd::bit_cast<vtkm::UInt64>(*addr);
 
  703     desired = vtkmstd::bit_cast<vtkm::UInt64>(vtkmstd::bit_cast<vtkm::Float64>(expected) + arg);
 
  705     !__atomic_compare_exchange_n(
reinterpret_cast<vtkm::UInt64*
>(addr),
 
  709                                  GccAtomicMemOrder(order),
 
  710                                  GccAtomicMemOrder(order)));
 
  712   return vtkmstd::bit_cast<vtkm::Float64>(expected);
 
  715 template <
typename T>
 
  718   return __atomic_fetch_and(addr, mask, GccAtomicMemOrder(order));
 
  721 template <
typename T>
 
  724   return __atomic_fetch_or(addr, mask, GccAtomicMemOrder(order));
 
  727 template <
typename T>
 
  730   return __atomic_fetch_xor(addr, mask, GccAtomicMemOrder(order));
 
  733 template <
typename T>
 
  736   return AtomicXorImpl(addr, 
static_cast<T
>(~T{ 0u }), order);
 
  739 template <
typename T>
 
  745   return __atomic_compare_exchange_n(
 
  746     addr, expected, desired, 
false, GccAtomicMemOrder(order), GccAtomicMemOrder(order));
 
  759 template <
typename T>
 
  760 using OppositeSign = 
typename std::conditional<std::is_signed<T>::value,
 
  761                                                typename std::make_unsigned<T>::type,
 
  762                                                typename std::make_signed<T>::type>::type;
 
  785 template <
typename T>
 
  789   return detail::AtomicLoadImpl(pointer, order);
 
  799 template <
typename T>
 
  804   detail::AtomicStoreImpl(pointer, value, order);
 
  806 template <
typename T>
 
  808                                        detail::OppositeSign<T> value,
 
  811   detail::AtomicStoreImpl(pointer, 
static_cast<T
>(value), order);
 
  828 template <
typename T>
 
  834   return detail::AtomicAddImpl(pointer, operand, order);
 
  836 template <typename T, typename std::enable_if<std::is_integral<T>::value>::type* = 
nullptr>
 
  839   detail::OppositeSign<T> operand,
 
  842   return detail::AtomicAddImpl(pointer, 
static_cast<T
>(operand), order);
 
  859 template <
typename T>
 
  865   return detail::AtomicAndImpl(pointer, operand, order);
 
  867 template <
typename T>
 
  870   detail::OppositeSign<T> operand,
 
  873   return detail::AtomicAndImpl(pointer, 
static_cast<T
>(operand), order);
 
  890 template <
typename T>
 
  894   return detail::AtomicOrImpl(pointer, operand, order);
 
  896 template <
typename T>
 
  899   detail::OppositeSign<T> operand,
 
  902   return detail::AtomicOrImpl(pointer, 
static_cast<T
>(operand), order);
 
  918 template <
typename T>
 
  924   return detail::AtomicXorImpl(pointer, operand, order);
 
  926 template <
typename T>
 
  929   detail::OppositeSign<T> operand,
 
  932   return detail::AtomicXorImpl(pointer, 
static_cast<T
>(operand), order);
 
  945 template <
typename T>
 
  950   return detail::AtomicNotImpl(pointer, order);
 
  975 template <
typename T>
 
  982   return detail::AtomicCompareExchangeImpl(shared, expected, desired, order);
 
  987 #endif //vtk_m_Atomic_h