VTK-m  2.0
Atomic.h
Go to the documentation of this file.
1 //============================================================================
2 // Copyright (c) Kitware, Inc.
3 // All rights reserved.
4 // See LICENSE.txt for details.
5 //
6 // This software is distributed WITHOUT ANY WARRANTY; without even
7 // the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
8 // PURPOSE. See the above copyright notice for more information.
9 //============================================================================
10 #ifndef vtk_m_Atomic_h
11 #define vtk_m_Atomic_h
12 
13 #include <vtkm/List.h>
14 
15 #include <vtkm/internal/Windows.h>
16 
17 #include <atomic>
18 
19 namespace vtkm
20 {
21 
56 enum class MemoryOrder
57 {
62  Relaxed,
63 
67  Acquire,
68 
72  Release,
73 
80 
87 };
88 
89 namespace internal
90 {
91 
92 VTKM_EXEC_CONT inline std::memory_order StdAtomicMemOrder(vtkm::MemoryOrder order)
93 {
94  switch (order)
95  {
97  return std::memory_order_relaxed;
99  return std::memory_order_acquire;
101  return std::memory_order_release;
103  return std::memory_order_acq_rel;
105  return std::memory_order_seq_cst;
106  }
107 
108  // Should never reach here, but avoid compiler warnings
109  return std::memory_order_seq_cst;
110 }
111 
112 } // namespace internal
113 
114 } // namespace vtkm
115 
116 
117 #if defined(VTKM_CUDA_DEVICE_PASS)
118 
119 namespace vtkm
120 {
121 namespace detail
122 {
123 
124 // Fence to ensure that previous non-atomic stores are visible to other threads.
125 VTKM_EXEC_CONT inline void AtomicStoreFence(vtkm::MemoryOrder order)
126 {
129  {
130  __threadfence();
131  }
132 }
133 
134 // Fence to ensure that previous non-atomic stores are visible to other threads.
135 VTKM_EXEC_CONT inline void AtomicLoadFence(vtkm::MemoryOrder order)
136 {
139  {
140  __threadfence();
141  }
142 }
143 
144 template <typename T>
145 VTKM_EXEC_CONT inline T AtomicLoadImpl(T* const addr, vtkm::MemoryOrder order)
146 {
147  volatile T* const vaddr = addr; /* volatile to bypass cache*/
149  {
150  __threadfence();
151  }
152  const T value = *vaddr;
153  /* fence to ensure that dependent reads are correctly ordered */
154  AtomicLoadFence(order);
155  return value;
156 }
157 
158 template <typename T>
159 VTKM_EXEC_CONT inline void AtomicStoreImpl(T* addr, T value, vtkm::MemoryOrder order)
160 {
161  volatile T* vaddr = addr; /* volatile to bypass cache */
162  /* fence to ensure that previous non-atomic stores are visible to other threads */
163  AtomicStoreFence(order);
164  *vaddr = value;
165 }
166 
167 template <typename T>
168 VTKM_EXEC_CONT inline T AtomicAddImpl(T* addr, T arg, vtkm::MemoryOrder order)
169 {
170  AtomicStoreFence(order);
171  auto result = atomicAdd(addr, arg);
172  AtomicLoadFence(order);
173  return result;
174 }
175 
176 template <typename T>
177 VTKM_EXEC_CONT inline T AtomicAndImpl(T* addr, T mask, vtkm::MemoryOrder order)
178 {
179  AtomicStoreFence(order);
180  auto result = atomicAnd(addr, mask);
181  AtomicLoadFence(order);
182  return result;
183 }
184 
185 template <typename T>
186 VTKM_EXEC_CONT inline T AtomicOrImpl(T* addr, T mask, vtkm::MemoryOrder order)
187 {
188  AtomicStoreFence(order);
189  auto result = atomicOr(addr, mask);
190  AtomicLoadFence(order);
191  return result;
192 }
193 
194 template <typename T>
195 VTKM_EXEC_CONT inline T AtomicXorImpl(T* addr, T mask, vtkm::MemoryOrder order)
196 {
197  AtomicStoreFence(order);
198  auto result = atomicXor(addr, mask);
199  AtomicLoadFence(order);
200  return result;
201 }
202 
203 template <typename T>
204 VTKM_EXEC_CONT inline T AtomicNotImpl(T* addr, vtkm::MemoryOrder order)
205 {
206  return AtomicXorImpl(addr, static_cast<T>(~T{ 0u }), order);
207 }
208 
209 template <typename T>
210 VTKM_EXEC_CONT inline bool AtomicCompareExchangeImpl(T* addr,
211  T* expected,
212  T desired,
213  vtkm::MemoryOrder order)
214 {
215  AtomicStoreFence(order);
216  auto result = atomicCAS(addr, *expected, desired);
217  AtomicLoadFence(order);
218  if (result == *expected)
219  {
220  return true;
221  }
222  else
223  {
224  *expected = result;
225  return false;
226  }
227 }
228 #if __CUDA_ARCH__ < 200
229 VTKM_EXEC_CONT inline vtkm::Float32 vtkmAtomicAddImpl(vtkm::Float32* address,
230  vtkm::Float32 value,
231  vtkm::MemoryOrder order)
232 {
233  AtomicStoreFence(order);
234  vtkm::UInt32 assumed;
235  vtkm::UInt32 old = __float_as_int(*address);
236  do
237  {
238  assumed = old;
239  old = atomicCAS(reinterpret_cast<vtkm::UInt32*>(address),
240  assumed,
241  __float_as_int(__int_as_float(assumed) + value));
242  } while (assumed != old);
243  AtomicLoadFence(order);
244  return __int_as_float(old);
245 }
246 #endif
247 #if __CUDA_ARCH__ < 600
248 VTKM_EXEC_CONT inline vtkm::Float64 vtkmAtomicAdd(vtkm::Float64* address,
249  vtkm::Float64 value,
250  vtkm::MemoryOrder order)
251 {
252  AtomicStoreFence(order);
253  vtkm::UInt64 assumed;
254  vtkm::UInt64 old = __double_as_longlong(*address);
255  do
256  {
257  assumed = old;
258  old = atomicCAS(reinterpret_cast<vtkm::UInt64*>(address),
259  assumed,
260  __double_as_longlong(__longlong_as_double(assumed) + value));
261  } while (assumed != old);
262  AtomicLoadFence(order);
263  return __longlong_as_double(old);
264 }
265 #endif
266 }
267 } // namespace vtkm::detail
268 
269 #elif defined(VTKM_ENABLE_KOKKOS)
270 
271 VTKM_THIRDPARTY_PRE_INCLUDE
272 // Superhack! Kokkos_Macros.hpp defines macros to include modifiers like __device__.
273 // However, we don't want to actually use those if compiling this with a standard
274 // C++ compiler (because this particular code does not run on a device). Thus,
275 // we want to disable that behavior when not using the device compiler. To do that,
276 // we are going to have to load the KokkosCore_config.h file (which you are not
277 // supposed to do), then undefine the device enables if necessary, then load
278 // Kokkos_Macros.hpp to finish the state.
279 #ifndef KOKKOS_MACROS_HPP
280 #define KOKKOS_MACROS_HPP
281 #include <KokkosCore_config.h>
282 #undef KOKKOS_MACROS_HPP
283 #define KOKKOS_DONT_INCLUDE_CORE_CONFIG_H
284 
285 #if defined(KOKKOS_ENABLE_CUDA) && !defined(VTKM_CUDA)
286 #undef KOKKOS_ENABLE_CUDA
287 
288 // In later versions we need to directly deactivate Kokkos_Setup_Cuda.hpp
289 #if KOKKOS_VERSION >= 30401
290 #define KOKKOS_CUDA_SETUP_HPP_
291 #endif
292 #endif
293 
294 #if defined(KOKKOS_ENABLE_HIP) && !defined(VTKM_HIP)
295 #undef KOKKOS_ENABLE_HIP
296 #endif
297 
298 #endif //KOKKOS_MACROS_HPP not loaded
299 
300 #include <Kokkos_Atomic.hpp>
301 VTKM_THIRDPARTY_POST_INCLUDE
302 
303 namespace vtkm
304 {
305 namespace detail
306 {
307 
308 // Fence to ensure that previous non-atomic stores are visible to other threads.
309 VTKM_EXEC_CONT inline void AtomicStoreFence(vtkm::MemoryOrder order)
310 {
313  {
314  Kokkos::memory_fence();
315  }
316 }
317 
318 // Fence to ensure that previous non-atomic stores are visible to other threads.
319 VTKM_EXEC_CONT inline void AtomicLoadFence(vtkm::MemoryOrder order)
320 {
323  {
324  Kokkos::memory_fence();
325  }
326 }
327 
328 template <typename T>
329 VTKM_EXEC_CONT inline T AtomicLoadImpl(T* const addr, vtkm::MemoryOrder order)
330 {
331  switch (order)
332  {
334  return Kokkos::Impl::atomic_load(addr, Kokkos::Impl::memory_order_relaxed);
336  case vtkm::MemoryOrder::Release: // Release doesn't make sense. Use Acquire.
337  case vtkm::MemoryOrder::AcquireAndRelease: // Release doesn't make sense. Use Acquire.
338  return Kokkos::Impl::atomic_load(addr, Kokkos::Impl::memory_order_acquire);
340  return Kokkos::Impl::atomic_load(addr, Kokkos::Impl::memory_order_seq_cst);
341  }
342 
343  // Should never reach here, but avoid compiler warnings
344  return Kokkos::Impl::atomic_load(addr, Kokkos::Impl::memory_order_seq_cst);
345 }
346 
347 template <typename T>
348 VTKM_EXEC_CONT inline void AtomicStoreImpl(T* addr, T value, vtkm::MemoryOrder order)
349 {
350  switch (order)
351  {
353  Kokkos::Impl::atomic_store(addr, value, Kokkos::Impl::memory_order_relaxed);
354  break;
355  case vtkm::MemoryOrder::Acquire: // Acquire doesn't make sense. Use Release.
357  case vtkm::MemoryOrder::AcquireAndRelease: // Acquire doesn't make sense. Use Release.
358  Kokkos::Impl::atomic_store(addr, value, Kokkos::Impl::memory_order_release);
359  break;
361  Kokkos::Impl::atomic_store(addr, value, Kokkos::Impl::memory_order_seq_cst);
362  break;
363  }
364 }
365 
366 template <typename T>
367 VTKM_EXEC_CONT inline T AtomicAddImpl(T* addr, T arg, vtkm::MemoryOrder order)
368 {
369  AtomicStoreFence(order);
370  T result = Kokkos::atomic_fetch_add(addr, arg);
371  AtomicLoadFence(order);
372  return result;
373 }
374 
375 template <typename T>
376 VTKM_EXEC_CONT inline T AtomicAndImpl(T* addr, T mask, vtkm::MemoryOrder order)
377 {
378  AtomicStoreFence(order);
379  T result = Kokkos::atomic_fetch_and(addr, mask);
380  AtomicLoadFence(order);
381  return result;
382 }
383 
384 template <typename T>
385 VTKM_EXEC_CONT inline T AtomicOrImpl(T* addr, T mask, vtkm::MemoryOrder order)
386 {
387  AtomicStoreFence(order);
388  T result = Kokkos::atomic_fetch_or(addr, mask);
389  AtomicLoadFence(order);
390  return result;
391 }
392 
393 template <typename T>
394 VTKM_EXEC_CONT inline T AtomicXorImpl(T* addr, T mask, vtkm::MemoryOrder order)
395 {
396  AtomicStoreFence(order);
397  T result = Kokkos::atomic_fetch_xor(addr, mask);
398  AtomicLoadFence(order);
399  return result;
400 }
401 
402 template <typename T>
403 VTKM_EXEC_CONT inline T AtomicNotImpl(T* addr, vtkm::MemoryOrder order)
404 {
405  return AtomicXorImpl(addr, static_cast<T>(~T{ 0u }), order);
406 }
407 
408 template <typename T>
409 VTKM_EXEC_CONT inline bool AtomicCompareExchangeImpl(T* addr,
410  T* expected,
411  T desired,
412  vtkm::MemoryOrder order)
413 {
414  AtomicStoreFence(order);
415  T oldValue = Kokkos::atomic_compare_exchange(addr, *expected, desired);
416  AtomicLoadFence(order);
417  if (oldValue == *expected)
418  {
419  return true;
420  }
421  else
422  {
423  *expected = oldValue;
424  return false;
425  }
426 }
427 }
428 } // namespace vtkm::detail
429 
430 #elif defined(VTKM_MSVC)
431 
432 // Supports vtkm::UInt8, vtkm::UInt16, vtkm::UInt32, vtkm::UInt64
433 
434 #include <cstdint>
435 #include <cstring>
436 #include <intrin.h> // For MSVC atomics
437 
438 namespace vtkm
439 {
440 namespace detail
441 {
442 
443 template <typename To, typename From>
444 VTKM_EXEC_CONT inline To BitCast(const From& src)
445 {
446  // The memcpy should be removed by the compiler when possible, but this
447  // works around a host of issues with bitcasting using reinterpret_cast.
448  VTKM_STATIC_ASSERT(sizeof(From) == sizeof(To));
449  To dst;
450  std::memcpy(&dst, &src, sizeof(From));
451  return dst;
452 }
453 
454 template <typename T>
455 VTKM_EXEC_CONT inline T BitCast(T&& src)
456 {
457  return std::forward<T>(src);
458 }
459 
460 // Note about Load and Store implementations:
461 //
462 // "Simple reads and writes to properly-aligned 32-bit variables are atomic
463 // operations"
464 //
465 // "Simple reads and writes to properly aligned 64-bit variables are atomic on
466 // 64-bit Windows. Reads and writes to 64-bit values are not guaranteed to be
467 // atomic on 32-bit Windows."
468 //
469 // "Reads and writes to variables of other sizes [than 32 or 64 bits] are not
470 // guaranteed to be atomic on any platform."
471 //
472 // https://docs.microsoft.com/en-us/windows/desktop/sync/interlocked-variable-access
473 
474 VTKM_EXEC_CONT inline vtkm::UInt8 AtomicLoadImpl(vtkm::UInt8* const addr, vtkm::MemoryOrder order)
475 {
476  // This assumes that the memory interface is smart enough to load a 32-bit
477  // word atomically and a properly aligned 8-bit word from it.
478  // We could build address masks and do shifts to perform this manually if
479  // this assumption is incorrect.
480  auto result = *static_cast<volatile vtkm::UInt8* const>(addr);
481  std::atomic_thread_fence(internal::StdAtomicMemOrder(order));
482  return result;
483 }
484 VTKM_EXEC_CONT inline vtkm::UInt16 AtomicLoadImpl(vtkm::UInt16* const addr, vtkm::MemoryOrder order)
485 {
486  // This assumes that the memory interface is smart enough to load a 32-bit
487  // word atomically and a properly aligned 16-bit word from it.
488  // We could build address masks and do shifts to perform this manually if
489  // this assumption is incorrect.
490  auto result = *static_cast<volatile vtkm::UInt16* const>(addr);
491  std::atomic_thread_fence(internal::StdAtomicMemOrder(order));
492  return result;
493 }
494 VTKM_EXEC_CONT inline vtkm::UInt32 AtomicLoadImpl(vtkm::UInt32* const addr, vtkm::MemoryOrder order)
495 {
496  auto result = *static_cast<volatile vtkm::UInt32* const>(addr);
497  std::atomic_thread_fence(internal::StdAtomicMemOrder(order));
498  return result;
499 }
500 VTKM_EXEC_CONT inline vtkm::UInt64 AtomicLoadImpl(vtkm::UInt64* const addr, vtkm::MemoryOrder order)
501 {
502  auto result = *static_cast<volatile vtkm::UInt64* const>(addr);
503  std::atomic_thread_fence(internal::StdAtomicMemOrder(order));
504  return result;
505 }
506 
507 VTKM_EXEC_CONT inline void AtomicStoreImpl(vtkm::UInt8* addr,
508  vtkm::UInt8 val,
510 {
511  // There doesn't seem to be an atomic store instruction in the windows
512  // API, so just exchange and discard the result.
513  _InterlockedExchange8(reinterpret_cast<volatile CHAR*>(addr), BitCast<CHAR>(val));
514 }
515 VTKM_EXEC_CONT inline void AtomicStoreImpl(vtkm::UInt16* addr,
516  vtkm::UInt16 val,
518 {
519  // There doesn't seem to be an atomic store instruction in the windows
520  // API, so just exchange and discard the result.
521  _InterlockedExchange16(reinterpret_cast<volatile SHORT*>(addr), BitCast<SHORT>(val));
522 }
523 VTKM_EXEC_CONT inline void AtomicStoreImpl(vtkm::UInt32* addr,
524  vtkm::UInt32 val,
525  vtkm::MemoryOrder order)
526 {
527  std::atomic_thread_fence(internal::StdAtomicMemOrder(order));
528  *addr = val;
529 }
530 VTKM_EXEC_CONT inline void AtomicStoreImpl(vtkm::UInt64* addr,
531  vtkm::UInt64 val,
532  vtkm::MemoryOrder order)
533 {
534  std::atomic_thread_fence(internal::StdAtomicMemOrder(order));
535  *addr = val;
536 }
537 
538 #define VTKM_ATOMIC_OP(vtkmName, winName, vtkmType, winType, suffix) \
539  VTKM_EXEC_CONT inline vtkmType vtkmName(vtkmType* addr, vtkmType arg, vtkm::MemoryOrder order) \
540  { \
541  return BitCast<vtkmType>( \
542  winName##suffix(reinterpret_cast<volatile winType*>(addr), BitCast<winType>(arg))); \
543  }
544 
545 #define VTKM_ATOMIC_OPS_FOR_TYPE(vtkmType, winType, suffix) \
546  VTKM_ATOMIC_OP(AtomicAddImpl, _InterlockedExchangeAdd, vtkmType, winType, suffix) \
547  VTKM_ATOMIC_OP(AtomicAndImpl, _InterlockedAnd, vtkmType, winType, suffix) \
548  VTKM_ATOMIC_OP(AtomicOrImpl, _InterlockedOr, vtkmType, winType, suffix) \
549  VTKM_ATOMIC_OP(AtomicXorImpl, _InterlockedXor, vtkmType, winType, suffix) \
550  VTKM_EXEC_CONT inline vtkmType AtomicNotImpl(vtkmType* addr, vtkm::MemoryOrder order) \
551  { \
552  return AtomicXorImpl(addr, static_cast<vtkmType>(~vtkmType{ 0u }), order); \
553  } \
554  VTKM_EXEC_CONT inline bool AtomicCompareExchangeImpl( \
555  vtkmType* addr, vtkmType* expected, vtkmType desired, vtkm::MemoryOrder vtkmNotUsed(order)) \
556  { \
557  vtkmType result = BitCast<vtkmType>( \
558  _InterlockedCompareExchange##suffix(reinterpret_cast<volatile winType*>(addr), \
559  BitCast<winType>(desired), \
560  BitCast<winType>(*expected))); \
561  if (result == *expected) \
562  { \
563  return true; \
564  } \
565  else \
566  { \
567  *expected = result; \
568  return false; \
569  } \
570  }
571 
572 VTKM_ATOMIC_OPS_FOR_TYPE(vtkm::UInt8, CHAR, 8)
573 VTKM_ATOMIC_OPS_FOR_TYPE(vtkm::UInt16, SHORT, 16)
574 VTKM_ATOMIC_OPS_FOR_TYPE(vtkm::UInt32, LONG, )
575 VTKM_ATOMIC_OPS_FOR_TYPE(vtkm::UInt64, LONG64, 64)
576 
577 #undef VTKM_ATOMIC_OPS_FOR_TYPE
578 
579 VTKM_EXEC_CONT inline vtkm::Float32 AtomicAddImpl(vtkm::Float32* address,
580  vtkm::Float32 value,
582 {
583  LONG assumed;
584  LONG old = BitCast<LONG>(*address);
585  do
586  {
587  assumed = old;
588  old = _InterlockedCompareExchange(reinterpret_cast<volatile LONG*>(address),
589  BitCast<LONG>(BitCast<vtkm::Float32>(assumed) + value),
590  assumed);
591  } while (assumed != old);
592  return BitCast<vtkm::Float32>(old);
593 }
594 
595 VTKM_EXEC_CONT inline vtkm::Float64 AtomicAddImpl(vtkm::Float64* address,
596  vtkm::Float64 value,
598 {
599  LONG64 assumed;
600  LONG64 old = BitCast<LONG64>(*address);
601  do
602  {
603  assumed = old;
604  old = _InterlockedCompareExchange64(reinterpret_cast<volatile LONG64*>(address),
605  BitCast<LONG64>(BitCast<vtkm::Float64>(assumed) + value),
606  assumed);
607  } while (assumed != old);
608  return BitCast<vtkm::Float64>(old);
609 }
610 
611 }
612 } // namespace vtkm::detail
613 
614 #else // gcc/clang for CPU
615 
616 // Supports vtkm::UInt8, vtkm::UInt16, vtkm::UInt32, vtkm::UInt64
617 
618 #include <cstdint>
619 #include <cstring>
620 
621 namespace vtkm
622 {
623 namespace detail
624 {
625 
626 VTKM_EXEC_CONT inline int GccAtomicMemOrder(vtkm::MemoryOrder order)
627 {
628  switch (order)
629  {
631  return __ATOMIC_RELAXED;
633  return __ATOMIC_ACQUIRE;
635  return __ATOMIC_RELEASE;
637  return __ATOMIC_ACQ_REL;
639  return __ATOMIC_SEQ_CST;
640  }
641 
642  // Should never reach here, but avoid compiler warnings
643  return __ATOMIC_SEQ_CST;
644 }
645 
646 template <typename T>
647 VTKM_EXEC_CONT inline T AtomicLoadImpl(T* const addr, vtkm::MemoryOrder order)
648 {
649  return __atomic_load_n(addr, GccAtomicMemOrder(order));
650 }
651 
652 template <typename T>
653 VTKM_EXEC_CONT inline void AtomicStoreImpl(T* addr, T value, vtkm::MemoryOrder order)
654 {
655  return __atomic_store_n(addr, value, GccAtomicMemOrder(order));
656 }
657 
658 template <typename T>
659 VTKM_EXEC_CONT inline T AtomicAddImpl(T* addr, T arg, vtkm::MemoryOrder order)
660 {
661  return __atomic_fetch_add(addr, arg, GccAtomicMemOrder(order));
662 }
663 
664 #include <vtkmstd/bit_cast.h>
665 
666 // TODO: Use enable_if to write one version for both Float32 and Float64.
667 VTKM_EXEC_CONT inline vtkm::Float32 AtomicAddImpl(vtkm::Float32* addr,
668  vtkm::Float32 arg,
669  vtkm::MemoryOrder order)
670 {
671  vtkm::UInt32 expected = vtkmstd::bit_cast<vtkm::UInt32>(*addr);
672  vtkm::UInt32 desired;
673 
674  do
675  {
676  desired = vtkmstd::bit_cast<vtkm::UInt32>(vtkmstd::bit_cast<vtkm::Float32>(expected) + arg);
677  } while (
678  !__atomic_compare_exchange_n(reinterpret_cast<vtkm::UInt32*>(addr),
679  &expected, // reloads expected with *addr prior to the operation
680  desired,
681  false,
682  GccAtomicMemOrder(order),
683  GccAtomicMemOrder(order)));
684  // return the "old" value that was in the memory.
685  return vtkmstd::bit_cast<vtkm::Float32>(expected);
686 }
687 
688 // TODO: Use enable_if to write one version for both Float32 and Float64.
689 VTKM_EXEC_CONT inline vtkm::Float64 AtomicAddImpl(vtkm::Float64* addr,
690  vtkm::Float64 arg,
691  vtkm::MemoryOrder order)
692 {
693  vtkm::UInt64 expected = vtkmstd::bit_cast<vtkm::UInt64>(*addr);
694  vtkm::UInt64 desired;
695 
696  do
697  {
698  desired = vtkmstd::bit_cast<vtkm::UInt64>(vtkmstd::bit_cast<vtkm::Float64>(expected) + arg);
699  } while (
700  !__atomic_compare_exchange_n(reinterpret_cast<vtkm::UInt64*>(addr),
701  &expected, // reloads expected with *addr prior to the operation
702  desired,
703  false,
704  GccAtomicMemOrder(order),
705  GccAtomicMemOrder(order)));
706  // return the "old" value that was in the memory.
707  return vtkmstd::bit_cast<vtkm::Float64>(expected);
708 }
709 
710 template <typename T>
711 VTKM_EXEC_CONT inline T AtomicAndImpl(T* addr, T mask, vtkm::MemoryOrder order)
712 {
713  return __atomic_fetch_and(addr, mask, GccAtomicMemOrder(order));
714 }
715 
716 template <typename T>
717 VTKM_EXEC_CONT inline T AtomicOrImpl(T* addr, T mask, vtkm::MemoryOrder order)
718 {
719  return __atomic_fetch_or(addr, mask, GccAtomicMemOrder(order));
720 }
721 
722 template <typename T>
723 VTKM_EXEC_CONT inline T AtomicXorImpl(T* addr, T mask, vtkm::MemoryOrder order)
724 {
725  return __atomic_fetch_xor(addr, mask, GccAtomicMemOrder(order));
726 }
727 
728 template <typename T>
729 VTKM_EXEC_CONT inline T AtomicNotImpl(T* addr, vtkm::MemoryOrder order)
730 {
731  return AtomicXorImpl(addr, static_cast<T>(~T{ 0u }), order);
732 }
733 
734 template <typename T>
735 VTKM_EXEC_CONT inline bool AtomicCompareExchangeImpl(T* addr,
736  T* expected,
737  T desired,
738  vtkm::MemoryOrder order)
739 {
740  return __atomic_compare_exchange_n(
741  addr, expected, desired, false, GccAtomicMemOrder(order), GccAtomicMemOrder(order));
742 }
743 }
744 } // namespace vtkm::detail
745 
746 #endif // gcc/clang
747 
748 namespace vtkm
749 {
750 
751 namespace detail
752 {
753 
754 template <typename T>
755 using OppositeSign = typename std::conditional<std::is_signed<T>::value,
756  typename std::make_unsigned<T>::type,
757  typename std::make_signed<T>::type>::type;
758 
759 } // namespace detail
760 
764 
773 
780 template <typename T>
781 VTKM_EXEC_CONT inline T AtomicLoad(T* const pointer,
783 {
784  return detail::AtomicLoadImpl(pointer, order);
785 }
786 
794 template <typename T>
795 VTKM_EXEC_CONT inline void AtomicStore(T* pointer,
796  T value,
798 {
799  detail::AtomicStoreImpl(pointer, value, order);
800 }
801 template <typename T>
802 VTKM_EXEC_CONT inline void AtomicStore(T* pointer,
803  detail::OppositeSign<T> value,
805 {
806  detail::AtomicStoreImpl(pointer, static_cast<T>(value), order);
807 }
809 
823 template <typename T>
825  T* pointer,
826  T operand,
828 {
829  return detail::AtomicAddImpl(pointer, operand, order);
830 }
831 template <typename T, typename std::enable_if<std::is_integral<T>::value>::type* = nullptr>
833  T* pointer,
834  detail::OppositeSign<T> operand,
836 {
837  return detail::AtomicAddImpl(pointer, static_cast<T>(operand), order);
838 }
840 
854 template <typename T>
856  T* pointer,
857  T operand,
859 {
860  return detail::AtomicAndImpl(pointer, operand, order);
861 }
862 template <typename T>
864  T* pointer,
865  detail::OppositeSign<T> operand,
867 {
868  return detail::AtomicAndImpl(pointer, static_cast<T>(operand), order);
869 }
871 
885 template <typename T>
886 VTKM_EXEC_CONT inline T
888 {
889  return detail::AtomicOrImpl(pointer, operand, order);
890 }
891 template <typename T>
893  T* pointer,
894  detail::OppositeSign<T> operand,
896 {
897  return detail::AtomicOrImpl(pointer, static_cast<T>(operand), order);
898 }
900 
913 template <typename T>
915  T* pointer,
916  T operand,
918 {
919  return detail::AtomicXorImpl(pointer, operand, order);
920 }
921 template <typename T>
923  T* pointer,
924  detail::OppositeSign<T> operand,
926 {
927  return detail::AtomicXorImpl(pointer, static_cast<T>(operand), order);
928 }
930 
940 template <typename T>
942  T* pointer,
944 {
945  return detail::AtomicNotImpl(pointer, order);
946 }
947 
970 template <typename T>
972  T* shared,
973  T* expected,
974  T desired,
976 {
977  return detail::AtomicCompareExchangeImpl(shared, expected, desired, order);
978 }
979 
980 } // namespace vtkm
981 
982 #endif //vtk_m_Atomic_h
vtkm::AtomicAdd
VTKM_EXEC_CONT T AtomicAdd(T *pointer, T operand, vtkm::MemoryOrder order=vtkm::MemoryOrder::SequentiallyConsistent)
Atomic function to add a value to a shared memory location.
Definition: Atomic.h:824
vtkm
Groups connected points that have the same field value.
Definition: Atomic.h:19
VTKM_EXEC_CONT
#define VTKM_EXEC_CONT
Definition: ExportMacros.h:52
vtkm::AtomicStore
VTKM_EXEC_CONT void AtomicStore(T *pointer, T value, vtkm::MemoryOrder order=vtkm::MemoryOrder::Release)
Atomic function to save a value to a shared memory location.
Definition: Atomic.h:795
vtkm::MemoryOrder::Release
@ Release
A store operation with Release memory order will enforce that any local read or write operations list...
vtkm::AtomicOr
VTKM_EXEC_CONT T AtomicOr(T *pointer, T operand, vtkm::MemoryOrder order=vtkm::MemoryOrder::SequentiallyConsistent)
Atomic function to OR bits to a shared memory location.
Definition: Atomic.h:887
vtkm::MemoryOrder::Relaxed
@ Relaxed
An atomic operations with Relaxed memory order enforces no synchronization or ordering constraints on...
vtkm::MemoryOrder::AcquireAndRelease
@ AcquireAndRelease
A read-modify-write operation with AcquireAndRelease memory order will enforce that any local read or...
vtkm::MemoryOrder
MemoryOrder
Specifies memory order semantics for atomic operations.
Definition: Atomic.h:56
vtkm::AtomicLoad
VTKM_EXEC_CONT T AtomicLoad(T *const pointer, vtkm::MemoryOrder order=vtkm::MemoryOrder::Acquire)
Atomic function to load a value from a shared memory location.
Definition: Atomic.h:781
vtkm::AtomicCompareExchange
VTKM_EXEC_CONT bool AtomicCompareExchange(T *shared, T *expected, T desired, vtkm::MemoryOrder order=vtkm::MemoryOrder::SequentiallyConsistent)
Atomic function that replaces a value given a condition.
Definition: Atomic.h:971
vtkm::MemoryOrder::SequentiallyConsistent
@ SequentiallyConsistent
An atomic with SequentiallyConsistent memory order will enforce any appropriate semantics as Acquire,...
VTKM_STATIC_ASSERT
#define VTKM_STATIC_ASSERT(condition)
Definition: StaticAssert.h:16
vtkm::AtomicXor
VTKM_EXEC_CONT T AtomicXor(T *pointer, T operand, vtkm::MemoryOrder order=vtkm::MemoryOrder::SequentiallyConsistent)
Atomic function to XOR bits to a shared memory location.
Definition: Atomic.h:914
vtkm::UInt8
uint8_t UInt8
Definition: Types.h:157
vtkmNotUsed
#define vtkmNotUsed(parameter_name)
Simple macro to identify a parameter as unused.
Definition: ExportMacros.h:128
vtkm::AtomicTypePreferred
vtkm::UInt32 AtomicTypePreferred
The preferred type to use for atomic operations.
Definition: Atomic.h:763
vtkm::UInt32
uint32_t UInt32
Definition: Types.h:161
vtkm::Float32
float Float32
Definition: Types.h:154
vtkm::List
Definition: List.h:34
vtkm::Float64
double Float64
Definition: Types.h:155
vtkm::MemoryOrder::Acquire
@ Acquire
A load operation with Acquire memory order will enforce that any local read or write operations liste...
vtkm::UInt16
uint16_t UInt16
Definition: Types.h:159
vtkm::AtomicAnd
VTKM_EXEC_CONT T AtomicAnd(T *pointer, T operand, vtkm::MemoryOrder order=vtkm::MemoryOrder::SequentiallyConsistent)
Atomic function to AND bits to a shared memory location.
Definition: Atomic.h:855
Windows.h
vtkm::AtomicNot
VTKM_EXEC_CONT T AtomicNot(T *pointer, vtkm::MemoryOrder order=vtkm::MemoryOrder::SequentiallyConsistent)
Atomic function to NOT bits to a shared memory location.
Definition: Atomic.h:941
List.h