VTK-m  2.1
Atomic.h
Go to the documentation of this file.
1 //============================================================================
2 // Copyright (c) Kitware, Inc.
3 // All rights reserved.
4 // See LICENSE.txt for details.
5 //
6 // This software is distributed WITHOUT ANY WARRANTY; without even
7 // the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
8 // PURPOSE. See the above copyright notice for more information.
9 //============================================================================
10 #ifndef vtk_m_Atomic_h
11 #define vtk_m_Atomic_h
12 
13 #include <vtkm/List.h>
14 
15 #include <vtkm/internal/Windows.h>
16 
17 #include <atomic>
18 
19 namespace vtkm
20 {
21 
56 enum class MemoryOrder
57 {
62  Relaxed,
63 
67  Acquire,
68 
72  Release,
73 
80 
87 };
88 
89 namespace internal
90 {
91 
92 VTKM_EXEC_CONT inline std::memory_order StdAtomicMemOrder(vtkm::MemoryOrder order)
93 {
94  switch (order)
95  {
97  return std::memory_order_relaxed;
99  return std::memory_order_acquire;
101  return std::memory_order_release;
103  return std::memory_order_acq_rel;
105  return std::memory_order_seq_cst;
106  }
107 
108  // Should never reach here, but avoid compiler warnings
109  return std::memory_order_seq_cst;
110 }
111 
112 } // namespace internal
113 
114 } // namespace vtkm
115 
116 
117 #if defined(VTKM_CUDA_DEVICE_PASS)
118 
119 namespace vtkm
120 {
121 namespace detail
122 {
123 
124 // Fence to ensure that previous non-atomic stores are visible to other threads.
125 VTKM_EXEC_CONT inline void AtomicStoreFence(vtkm::MemoryOrder order)
126 {
129  {
130  __threadfence();
131  }
132 }
133 
134 // Fence to ensure that previous non-atomic stores are visible to other threads.
135 VTKM_EXEC_CONT inline void AtomicLoadFence(vtkm::MemoryOrder order)
136 {
139  {
140  __threadfence();
141  }
142 }
143 
144 template <typename T>
145 VTKM_EXEC_CONT inline T AtomicLoadImpl(T* const addr, vtkm::MemoryOrder order)
146 {
147  volatile T* const vaddr = addr; /* volatile to bypass cache*/
149  {
150  __threadfence();
151  }
152  const T value = *vaddr;
153  /* fence to ensure that dependent reads are correctly ordered */
154  AtomicLoadFence(order);
155  return value;
156 }
157 
158 template <typename T>
159 VTKM_EXEC_CONT inline void AtomicStoreImpl(T* addr, T value, vtkm::MemoryOrder order)
160 {
161  volatile T* vaddr = addr; /* volatile to bypass cache */
162  /* fence to ensure that previous non-atomic stores are visible to other threads */
163  AtomicStoreFence(order);
164  *vaddr = value;
165 }
166 
167 template <typename T>
168 VTKM_EXEC_CONT inline T AtomicAddImpl(T* addr, T arg, vtkm::MemoryOrder order)
169 {
170  AtomicStoreFence(order);
171  auto result = atomicAdd(addr, arg);
172  AtomicLoadFence(order);
173  return result;
174 }
175 
176 template <typename T>
177 VTKM_EXEC_CONT inline T AtomicAndImpl(T* addr, T mask, vtkm::MemoryOrder order)
178 {
179  AtomicStoreFence(order);
180  auto result = atomicAnd(addr, mask);
181  AtomicLoadFence(order);
182  return result;
183 }
184 
185 template <typename T>
186 VTKM_EXEC_CONT inline T AtomicOrImpl(T* addr, T mask, vtkm::MemoryOrder order)
187 {
188  AtomicStoreFence(order);
189  auto result = atomicOr(addr, mask);
190  AtomicLoadFence(order);
191  return result;
192 }
193 
194 template <typename T>
195 VTKM_EXEC_CONT inline T AtomicXorImpl(T* addr, T mask, vtkm::MemoryOrder order)
196 {
197  AtomicStoreFence(order);
198  auto result = atomicXor(addr, mask);
199  AtomicLoadFence(order);
200  return result;
201 }
202 
203 template <typename T>
204 VTKM_EXEC_CONT inline T AtomicNotImpl(T* addr, vtkm::MemoryOrder order)
205 {
206  return AtomicXorImpl(addr, static_cast<T>(~T{ 0u }), order);
207 }
208 
209 template <typename T>
210 VTKM_EXEC_CONT inline bool AtomicCompareExchangeImpl(T* addr,
211  T* expected,
212  T desired,
213  vtkm::MemoryOrder order)
214 {
215  AtomicStoreFence(order);
216  auto result = atomicCAS(addr, *expected, desired);
217  AtomicLoadFence(order);
218  if (result == *expected)
219  {
220  return true;
221  }
222  else
223  {
224  *expected = result;
225  return false;
226  }
227 }
228 #if __CUDA_ARCH__ < 200
229 VTKM_EXEC_CONT inline vtkm::Float32 vtkmAtomicAddImpl(vtkm::Float32* address,
230  vtkm::Float32 value,
231  vtkm::MemoryOrder order)
232 {
233  AtomicStoreFence(order);
234  vtkm::UInt32 assumed;
235  vtkm::UInt32 old = __float_as_int(*address);
236  do
237  {
238  assumed = old;
239  old = atomicCAS(reinterpret_cast<vtkm::UInt32*>(address),
240  assumed,
241  __float_as_int(__int_as_float(assumed) + value));
242  } while (assumed != old);
243  AtomicLoadFence(order);
244  return __int_as_float(old);
245 }
246 #endif
247 #if __CUDA_ARCH__ < 600
248 VTKM_EXEC_CONT inline vtkm::Float64 vtkmAtomicAdd(vtkm::Float64* address,
249  vtkm::Float64 value,
250  vtkm::MemoryOrder order)
251 {
252  AtomicStoreFence(order);
253  vtkm::UInt64 assumed;
254  vtkm::UInt64 old = __double_as_longlong(*address);
255  do
256  {
257  assumed = old;
258  old = atomicCAS(reinterpret_cast<vtkm::UInt64*>(address),
259  assumed,
260  __double_as_longlong(__longlong_as_double(assumed) + value));
261  } while (assumed != old);
262  AtomicLoadFence(order);
263  return __longlong_as_double(old);
264 }
265 #endif
266 }
267 } // namespace vtkm::detail
268 
269 #elif defined(VTKM_ENABLE_KOKKOS)
270 
272 // Superhack! Kokkos_Macros.hpp defines macros to include modifiers like __device__.
273 // However, we don't want to actually use those if compiling this with a standard
274 // C++ compiler (because this particular code does not run on a device). Thus,
275 // we want to disable that behavior when not using the device compiler. To do that,
276 // we are going to have to load the KokkosCore_config.h file (which you are not
277 // supposed to do), then undefine the device enables if necessary, then load
278 // Kokkos_Macros.hpp to finish the state.
279 #ifndef KOKKOS_MACROS_HPP
280 #define KOKKOS_MACROS_HPP
281 #include <KokkosCore_config.h>
282 #undef KOKKOS_MACROS_HPP
283 #define KOKKOS_DONT_INCLUDE_CORE_CONFIG_H
284 
285 #if defined(KOKKOS_ENABLE_CUDA) && !defined(VTKM_CUDA)
286 #undef KOKKOS_ENABLE_CUDA
287 
288 // In later versions we need to directly deactivate Kokkos_Setup_Cuda.hpp
289 #if KOKKOS_VERSION >= 30401
290 #define KOKKOS_CUDA_SETUP_HPP_
291 #endif
292 #endif
293 
294 #if defined(KOKKOS_ENABLE_HIP) && !defined(VTKM_HIP)
295 #undef KOKKOS_ENABLE_HIP
296 #endif
297 
298 #endif //KOKKOS_MACROS_HPP not loaded
299 
300 #include <Kokkos_Atomic.hpp>
302 
303 namespace vtkm
304 {
305 namespace detail
306 {
307 
308 // Fence to ensure that previous non-atomic stores are visible to other threads.
309 VTKM_EXEC_CONT inline void AtomicStoreFence(vtkm::MemoryOrder order)
310 {
313  {
314  Kokkos::memory_fence();
315  }
316 }
317 
318 // Fence to ensure that previous non-atomic stores are visible to other threads.
319 VTKM_EXEC_CONT inline void AtomicLoadFence(vtkm::MemoryOrder order)
320 {
323  {
324  Kokkos::memory_fence();
325  }
326 }
327 #ifdef KOKKOS_INTERNAL_NOT_PARALLEL
328 #define VTKM_DESUL_MEM_SCOPE desul::MemoryScopeCaller()
329 #else
330 #define VTKM_DESUL_MEM_SCOPE desul::MemoryScopeDevice()
331 #endif
332 
333 template <typename T>
334 VTKM_EXEC_CONT inline T AtomicLoadImpl(T* const addr, vtkm::MemoryOrder order)
335 {
336  switch (order)
337  {
339  return desul::atomic_load(addr, desul::MemoryOrderRelaxed(), VTKM_DESUL_MEM_SCOPE);
341  case vtkm::MemoryOrder::Release: // Release doesn't make sense. Use Acquire.
342  case vtkm::MemoryOrder::AcquireAndRelease: // Release doesn't make sense. Use Acquire.
343  return desul::atomic_load(addr, desul::MemoryOrderAcquire(), VTKM_DESUL_MEM_SCOPE);
345  return desul::atomic_load(addr, desul::MemoryOrderSeqCst(), VTKM_DESUL_MEM_SCOPE);
346  }
347 
348  // Should never reach here, but avoid compiler warnings
349  return desul::atomic_load(addr, desul::MemoryOrderSeqCst(), VTKM_DESUL_MEM_SCOPE);
350 }
351 
352 template <typename T>
353 VTKM_EXEC_CONT inline void AtomicStoreImpl(T* addr, T value, vtkm::MemoryOrder order)
354 {
355  switch (order)
356  {
358  desul::atomic_store(addr, value, desul::MemoryOrderRelaxed(), VTKM_DESUL_MEM_SCOPE);
359  break;
360  case vtkm::MemoryOrder::Acquire: // Acquire doesn't make sense. Use Release.
362  case vtkm::MemoryOrder::AcquireAndRelease: // Acquire doesn't make sense. Use Release.
363  desul::atomic_store(addr, value, desul::MemoryOrderRelease(), VTKM_DESUL_MEM_SCOPE);
364  break;
366  desul::atomic_store(addr, value, desul::MemoryOrderSeqCst(), VTKM_DESUL_MEM_SCOPE);
367  break;
368  }
369 }
370 
371 template <typename T>
372 VTKM_EXEC_CONT inline T AtomicAddImpl(T* addr, T arg, vtkm::MemoryOrder order)
373 {
374  AtomicStoreFence(order);
375  T result = Kokkos::atomic_fetch_add(addr, arg);
376  AtomicLoadFence(order);
377  return result;
378 }
379 
380 template <typename T>
381 VTKM_EXEC_CONT inline T AtomicAndImpl(T* addr, T mask, vtkm::MemoryOrder order)
382 {
383  AtomicStoreFence(order);
384  T result = Kokkos::atomic_fetch_and(addr, mask);
385  AtomicLoadFence(order);
386  return result;
387 }
388 
389 template <typename T>
390 VTKM_EXEC_CONT inline T AtomicOrImpl(T* addr, T mask, vtkm::MemoryOrder order)
391 {
392  AtomicStoreFence(order);
393  T result = Kokkos::atomic_fetch_or(addr, mask);
394  AtomicLoadFence(order);
395  return result;
396 }
397 
398 template <typename T>
399 VTKM_EXEC_CONT inline T AtomicXorImpl(T* addr, T mask, vtkm::MemoryOrder order)
400 {
401  AtomicStoreFence(order);
402  T result = Kokkos::atomic_fetch_xor(addr, mask);
403  AtomicLoadFence(order);
404  return result;
405 }
406 
407 template <typename T>
408 VTKM_EXEC_CONT inline T AtomicNotImpl(T* addr, vtkm::MemoryOrder order)
409 {
410  return AtomicXorImpl(addr, static_cast<T>(~T{ 0u }), order);
411 }
412 
413 template <typename T>
414 VTKM_EXEC_CONT inline bool AtomicCompareExchangeImpl(T* addr,
415  T* expected,
416  T desired,
417  vtkm::MemoryOrder order)
418 {
419  AtomicStoreFence(order);
420  T oldValue = Kokkos::atomic_compare_exchange(addr, *expected, desired);
421  AtomicLoadFence(order);
422  if (oldValue == *expected)
423  {
424  return true;
425  }
426  else
427  {
428  *expected = oldValue;
429  return false;
430  }
431 }
432 }
433 } // namespace vtkm::detail
434 
435 #elif defined(VTKM_MSVC)
436 
437 // Supports vtkm::UInt8, vtkm::UInt16, vtkm::UInt32, vtkm::UInt64
438 
439 #include <cstdint>
440 #include <cstring>
441 #include <intrin.h> // For MSVC atomics
442 
443 namespace vtkm
444 {
445 namespace detail
446 {
447 
448 template <typename To, typename From>
449 VTKM_EXEC_CONT inline To BitCast(const From& src)
450 {
451  // The memcpy should be removed by the compiler when possible, but this
452  // works around a host of issues with bitcasting using reinterpret_cast.
453  VTKM_STATIC_ASSERT(sizeof(From) == sizeof(To));
454  To dst;
455  std::memcpy(&dst, &src, sizeof(From));
456  return dst;
457 }
458 
459 template <typename T>
460 VTKM_EXEC_CONT inline T BitCast(T&& src)
461 {
462  return std::forward<T>(src);
463 }
464 
465 // Note about Load and Store implementations:
466 //
467 // "Simple reads and writes to properly-aligned 32-bit variables are atomic
468 // operations"
469 //
470 // "Simple reads and writes to properly aligned 64-bit variables are atomic on
471 // 64-bit Windows. Reads and writes to 64-bit values are not guaranteed to be
472 // atomic on 32-bit Windows."
473 //
474 // "Reads and writes to variables of other sizes [than 32 or 64 bits] are not
475 // guaranteed to be atomic on any platform."
476 //
477 // https://docs.microsoft.com/en-us/windows/desktop/sync/interlocked-variable-access
478 
479 VTKM_EXEC_CONT inline vtkm::UInt8 AtomicLoadImpl(vtkm::UInt8* const addr, vtkm::MemoryOrder order)
480 {
481  // This assumes that the memory interface is smart enough to load a 32-bit
482  // word atomically and a properly aligned 8-bit word from it.
483  // We could build address masks and do shifts to perform this manually if
484  // this assumption is incorrect.
485  auto result = *static_cast<volatile vtkm::UInt8* const>(addr);
486  std::atomic_thread_fence(internal::StdAtomicMemOrder(order));
487  return result;
488 }
489 VTKM_EXEC_CONT inline vtkm::UInt16 AtomicLoadImpl(vtkm::UInt16* const addr, vtkm::MemoryOrder order)
490 {
491  // This assumes that the memory interface is smart enough to load a 32-bit
492  // word atomically and a properly aligned 16-bit word from it.
493  // We could build address masks and do shifts to perform this manually if
494  // this assumption is incorrect.
495  auto result = *static_cast<volatile vtkm::UInt16* const>(addr);
496  std::atomic_thread_fence(internal::StdAtomicMemOrder(order));
497  return result;
498 }
499 VTKM_EXEC_CONT inline vtkm::UInt32 AtomicLoadImpl(vtkm::UInt32* const addr, vtkm::MemoryOrder order)
500 {
501  auto result = *static_cast<volatile vtkm::UInt32* const>(addr);
502  std::atomic_thread_fence(internal::StdAtomicMemOrder(order));
503  return result;
504 }
505 VTKM_EXEC_CONT inline vtkm::UInt64 AtomicLoadImpl(vtkm::UInt64* const addr, vtkm::MemoryOrder order)
506 {
507  auto result = *static_cast<volatile vtkm::UInt64* const>(addr);
508  std::atomic_thread_fence(internal::StdAtomicMemOrder(order));
509  return result;
510 }
511 
512 VTKM_EXEC_CONT inline void AtomicStoreImpl(vtkm::UInt8* addr,
513  vtkm::UInt8 val,
515 {
516  // There doesn't seem to be an atomic store instruction in the windows
517  // API, so just exchange and discard the result.
518  _InterlockedExchange8(reinterpret_cast<volatile CHAR*>(addr), BitCast<CHAR>(val));
519 }
520 VTKM_EXEC_CONT inline void AtomicStoreImpl(vtkm::UInt16* addr,
521  vtkm::UInt16 val,
523 {
524  // There doesn't seem to be an atomic store instruction in the windows
525  // API, so just exchange and discard the result.
526  _InterlockedExchange16(reinterpret_cast<volatile SHORT*>(addr), BitCast<SHORT>(val));
527 }
528 VTKM_EXEC_CONT inline void AtomicStoreImpl(vtkm::UInt32* addr,
529  vtkm::UInt32 val,
530  vtkm::MemoryOrder order)
531 {
532  std::atomic_thread_fence(internal::StdAtomicMemOrder(order));
533  *addr = val;
534 }
535 VTKM_EXEC_CONT inline void AtomicStoreImpl(vtkm::UInt64* addr,
536  vtkm::UInt64 val,
537  vtkm::MemoryOrder order)
538 {
539  std::atomic_thread_fence(internal::StdAtomicMemOrder(order));
540  *addr = val;
541 }
542 
543 #define VTKM_ATOMIC_OP(vtkmName, winName, vtkmType, winType, suffix) \
544  VTKM_EXEC_CONT inline vtkmType vtkmName(vtkmType* addr, vtkmType arg, vtkm::MemoryOrder order) \
545  { \
546  return BitCast<vtkmType>( \
547  winName##suffix(reinterpret_cast<volatile winType*>(addr), BitCast<winType>(arg))); \
548  }
549 
550 #define VTKM_ATOMIC_OPS_FOR_TYPE(vtkmType, winType, suffix) \
551  VTKM_ATOMIC_OP(AtomicAddImpl, _InterlockedExchangeAdd, vtkmType, winType, suffix) \
552  VTKM_ATOMIC_OP(AtomicAndImpl, _InterlockedAnd, vtkmType, winType, suffix) \
553  VTKM_ATOMIC_OP(AtomicOrImpl, _InterlockedOr, vtkmType, winType, suffix) \
554  VTKM_ATOMIC_OP(AtomicXorImpl, _InterlockedXor, vtkmType, winType, suffix) \
555  VTKM_EXEC_CONT inline vtkmType AtomicNotImpl(vtkmType* addr, vtkm::MemoryOrder order) \
556  { \
557  return AtomicXorImpl(addr, static_cast<vtkmType>(~vtkmType{ 0u }), order); \
558  } \
559  VTKM_EXEC_CONT inline bool AtomicCompareExchangeImpl( \
560  vtkmType* addr, vtkmType* expected, vtkmType desired, vtkm::MemoryOrder vtkmNotUsed(order)) \
561  { \
562  vtkmType result = BitCast<vtkmType>( \
563  _InterlockedCompareExchange##suffix(reinterpret_cast<volatile winType*>(addr), \
564  BitCast<winType>(desired), \
565  BitCast<winType>(*expected))); \
566  if (result == *expected) \
567  { \
568  return true; \
569  } \
570  else \
571  { \
572  *expected = result; \
573  return false; \
574  } \
575  }
576 
577 VTKM_ATOMIC_OPS_FOR_TYPE(vtkm::UInt8, CHAR, 8)
578 VTKM_ATOMIC_OPS_FOR_TYPE(vtkm::UInt16, SHORT, 16)
579 VTKM_ATOMIC_OPS_FOR_TYPE(vtkm::UInt32, LONG, )
580 VTKM_ATOMIC_OPS_FOR_TYPE(vtkm::UInt64, LONG64, 64)
581 
582 #undef VTKM_ATOMIC_OPS_FOR_TYPE
583 
584 VTKM_EXEC_CONT inline vtkm::Float32 AtomicAddImpl(vtkm::Float32* address,
585  vtkm::Float32 value,
587 {
588  LONG assumed;
589  LONG old = BitCast<LONG>(*address);
590  do
591  {
592  assumed = old;
593  old = _InterlockedCompareExchange(reinterpret_cast<volatile LONG*>(address),
594  BitCast<LONG>(BitCast<vtkm::Float32>(assumed) + value),
595  assumed);
596  } while (assumed != old);
597  return BitCast<vtkm::Float32>(old);
598 }
599 
600 VTKM_EXEC_CONT inline vtkm::Float64 AtomicAddImpl(vtkm::Float64* address,
601  vtkm::Float64 value,
603 {
604  LONG64 assumed;
605  LONG64 old = BitCast<LONG64>(*address);
606  do
607  {
608  assumed = old;
609  old = _InterlockedCompareExchange64(reinterpret_cast<volatile LONG64*>(address),
610  BitCast<LONG64>(BitCast<vtkm::Float64>(assumed) + value),
611  assumed);
612  } while (assumed != old);
613  return BitCast<vtkm::Float64>(old);
614 }
615 
616 }
617 } // namespace vtkm::detail
618 
619 #else // gcc/clang for CPU
620 
621 // Supports vtkm::UInt8, vtkm::UInt16, vtkm::UInt32, vtkm::UInt64
622 
623 #include <cstdint>
624 #include <cstring>
625 
626 namespace vtkm
627 {
628 namespace detail
629 {
630 
631 VTKM_EXEC_CONT inline int GccAtomicMemOrder(vtkm::MemoryOrder order)
632 {
633  switch (order)
634  {
636  return __ATOMIC_RELAXED;
638  return __ATOMIC_ACQUIRE;
640  return __ATOMIC_RELEASE;
642  return __ATOMIC_ACQ_REL;
644  return __ATOMIC_SEQ_CST;
645  }
646 
647  // Should never reach here, but avoid compiler warnings
648  return __ATOMIC_SEQ_CST;
649 }
650 
651 template <typename T>
652 VTKM_EXEC_CONT inline T AtomicLoadImpl(T* const addr, vtkm::MemoryOrder order)
653 {
654  return __atomic_load_n(addr, GccAtomicMemOrder(order));
655 }
656 
657 template <typename T>
658 VTKM_EXEC_CONT inline void AtomicStoreImpl(T* addr, T value, vtkm::MemoryOrder order)
659 {
660  return __atomic_store_n(addr, value, GccAtomicMemOrder(order));
661 }
662 
663 template <typename T>
664 VTKM_EXEC_CONT inline T AtomicAddImpl(T* addr, T arg, vtkm::MemoryOrder order)
665 {
666  return __atomic_fetch_add(addr, arg, GccAtomicMemOrder(order));
667 }
668 
669 #include <vtkmstd/bit_cast.h>
670 
671 // TODO: Use enable_if to write one version for both Float32 and Float64.
672 VTKM_EXEC_CONT inline vtkm::Float32 AtomicAddImpl(vtkm::Float32* addr,
673  vtkm::Float32 arg,
674  vtkm::MemoryOrder order)
675 {
676  vtkm::UInt32 expected = vtkmstd::bit_cast<vtkm::UInt32>(*addr);
677  vtkm::UInt32 desired;
678 
679  do
680  {
681  desired = vtkmstd::bit_cast<vtkm::UInt32>(vtkmstd::bit_cast<vtkm::Float32>(expected) + arg);
682  } while (
683  !__atomic_compare_exchange_n(reinterpret_cast<vtkm::UInt32*>(addr),
684  &expected, // reloads expected with *addr prior to the operation
685  desired,
686  false,
687  GccAtomicMemOrder(order),
688  GccAtomicMemOrder(order)));
689  // return the "old" value that was in the memory.
690  return vtkmstd::bit_cast<vtkm::Float32>(expected);
691 }
692 
693 // TODO: Use enable_if to write one version for both Float32 and Float64.
694 VTKM_EXEC_CONT inline vtkm::Float64 AtomicAddImpl(vtkm::Float64* addr,
695  vtkm::Float64 arg,
696  vtkm::MemoryOrder order)
697 {
698  vtkm::UInt64 expected = vtkmstd::bit_cast<vtkm::UInt64>(*addr);
699  vtkm::UInt64 desired;
700 
701  do
702  {
703  desired = vtkmstd::bit_cast<vtkm::UInt64>(vtkmstd::bit_cast<vtkm::Float64>(expected) + arg);
704  } while (
705  !__atomic_compare_exchange_n(reinterpret_cast<vtkm::UInt64*>(addr),
706  &expected, // reloads expected with *addr prior to the operation
707  desired,
708  false,
709  GccAtomicMemOrder(order),
710  GccAtomicMemOrder(order)));
711  // return the "old" value that was in the memory.
712  return vtkmstd::bit_cast<vtkm::Float64>(expected);
713 }
714 
715 template <typename T>
716 VTKM_EXEC_CONT inline T AtomicAndImpl(T* addr, T mask, vtkm::MemoryOrder order)
717 {
718  return __atomic_fetch_and(addr, mask, GccAtomicMemOrder(order));
719 }
720 
721 template <typename T>
722 VTKM_EXEC_CONT inline T AtomicOrImpl(T* addr, T mask, vtkm::MemoryOrder order)
723 {
724  return __atomic_fetch_or(addr, mask, GccAtomicMemOrder(order));
725 }
726 
727 template <typename T>
728 VTKM_EXEC_CONT inline T AtomicXorImpl(T* addr, T mask, vtkm::MemoryOrder order)
729 {
730  return __atomic_fetch_xor(addr, mask, GccAtomicMemOrder(order));
731 }
732 
733 template <typename T>
734 VTKM_EXEC_CONT inline T AtomicNotImpl(T* addr, vtkm::MemoryOrder order)
735 {
736  return AtomicXorImpl(addr, static_cast<T>(~T{ 0u }), order);
737 }
738 
739 template <typename T>
740 VTKM_EXEC_CONT inline bool AtomicCompareExchangeImpl(T* addr,
741  T* expected,
742  T desired,
743  vtkm::MemoryOrder order)
744 {
745  return __atomic_compare_exchange_n(
746  addr, expected, desired, false, GccAtomicMemOrder(order), GccAtomicMemOrder(order));
747 }
748 }
749 } // namespace vtkm::detail
750 
751 #endif // gcc/clang
752 
753 namespace vtkm
754 {
755 
756 namespace detail
757 {
758 
759 template <typename T>
760 using OppositeSign = typename std::conditional<std::is_signed<T>::value,
761  typename std::make_unsigned<T>::type,
762  typename std::make_signed<T>::type>::type;
763 
764 } // namespace detail
765 
769 
778 
785 template <typename T>
786 VTKM_EXEC_CONT inline T AtomicLoad(T* const pointer,
788 {
789  return detail::AtomicLoadImpl(pointer, order);
790 }
791 
799 template <typename T>
800 VTKM_EXEC_CONT inline void AtomicStore(T* pointer,
801  T value,
803 {
804  detail::AtomicStoreImpl(pointer, value, order);
805 }
806 template <typename T>
807 VTKM_EXEC_CONT inline void AtomicStore(T* pointer,
808  detail::OppositeSign<T> value,
810 {
811  detail::AtomicStoreImpl(pointer, static_cast<T>(value), order);
812 }
814 
828 template <typename T>
830  T* pointer,
831  T operand,
833 {
834  return detail::AtomicAddImpl(pointer, operand, order);
835 }
836 template <typename T, typename std::enable_if<std::is_integral<T>::value>::type* = nullptr>
838  T* pointer,
839  detail::OppositeSign<T> operand,
841 {
842  return detail::AtomicAddImpl(pointer, static_cast<T>(operand), order);
843 }
845 
859 template <typename T>
861  T* pointer,
862  T operand,
864 {
865  return detail::AtomicAndImpl(pointer, operand, order);
866 }
867 template <typename T>
869  T* pointer,
870  detail::OppositeSign<T> operand,
872 {
873  return detail::AtomicAndImpl(pointer, static_cast<T>(operand), order);
874 }
876 
890 template <typename T>
891 VTKM_EXEC_CONT inline T
893 {
894  return detail::AtomicOrImpl(pointer, operand, order);
895 }
896 template <typename T>
898  T* pointer,
899  detail::OppositeSign<T> operand,
901 {
902  return detail::AtomicOrImpl(pointer, static_cast<T>(operand), order);
903 }
905 
918 template <typename T>
920  T* pointer,
921  T operand,
923 {
924  return detail::AtomicXorImpl(pointer, operand, order);
925 }
926 template <typename T>
928  T* pointer,
929  detail::OppositeSign<T> operand,
931 {
932  return detail::AtomicXorImpl(pointer, static_cast<T>(operand), order);
933 }
935 
945 template <typename T>
947  T* pointer,
949 {
950  return detail::AtomicNotImpl(pointer, order);
951 }
952 
975 template <typename T>
977  T* shared,
978  T* expected,
979  T desired,
981 {
982  return detail::AtomicCompareExchangeImpl(shared, expected, desired, order);
983 }
984 
985 } // namespace vtkm
986 
987 #endif //vtk_m_Atomic_h
vtkm::AtomicAdd
T AtomicAdd(T *pointer, T operand, vtkm::MemoryOrder order=vtkm::MemoryOrder::SequentiallyConsistent)
Atomic function to add a value to a shared memory location.
Definition: Atomic.h:829
VTKM_THIRDPARTY_POST_INCLUDE
#define VTKM_THIRDPARTY_POST_INCLUDE
Definition: Configure.h:192
vtkm
Groups connected points that have the same field value.
Definition: Atomic.h:19
vtkm::AtomicLoad
T AtomicLoad(T *const pointer, vtkm::MemoryOrder order=vtkm::MemoryOrder::Acquire)
Atomic function to load a value from a shared memory location.
Definition: Atomic.h:786
VTKM_EXEC_CONT
#define VTKM_EXEC_CONT
Definition: ExportMacros.h:52
vtkm::MemoryOrder::Release
@ Release
A store operation with Release memory order will enforce that any local read or write operations list...
vtkm::MemoryOrder::Relaxed
@ Relaxed
An atomic operations with Relaxed memory order enforces no synchronization or ordering constraints on...
vtkm::MemoryOrder::AcquireAndRelease
@ AcquireAndRelease
A read-modify-write operation with AcquireAndRelease memory order will enforce that any local read or...
vtkm::MemoryOrder
MemoryOrder
Specifies memory order semantics for atomic operations.
Definition: Atomic.h:56
vtkm::AtomicNot
T AtomicNot(T *pointer, vtkm::MemoryOrder order=vtkm::MemoryOrder::SequentiallyConsistent)
Atomic function to NOT bits to a shared memory location.
Definition: Atomic.h:946
vtkm::MemoryOrder::SequentiallyConsistent
@ SequentiallyConsistent
An atomic with SequentiallyConsistent memory order will enforce any appropriate semantics as Acquire,...
VTKM_STATIC_ASSERT
#define VTKM_STATIC_ASSERT(condition)
Definition: StaticAssert.h:16
vtkm::AtomicXor
T AtomicXor(T *pointer, T operand, vtkm::MemoryOrder order=vtkm::MemoryOrder::SequentiallyConsistent)
Atomic function to XOR bits to a shared memory location.
Definition: Atomic.h:919
vtkm::UInt8
uint8_t UInt8
Base type to use for 8-bit unsigned integer numbers.
Definition: Types.h:169
vtkmNotUsed
#define vtkmNotUsed(parameter_name)
Simple macro to identify a parameter as unused.
Definition: ExportMacros.h:128
vtkm::AtomicTypePreferred
vtkm::UInt32 AtomicTypePreferred
The preferred type to use for atomic operations.
Definition: Atomic.h:768
VTKM_THIRDPARTY_PRE_INCLUDE
#define VTKM_THIRDPARTY_PRE_INCLUDE
Definition: Configure.h:191
vtkm::UInt32
uint32_t UInt32
Base type to use for 32-bit unsigned integer numbers.
Definition: Types.h:185
vtkm::AtomicOr
T AtomicOr(T *pointer, T operand, vtkm::MemoryOrder order=vtkm::MemoryOrder::SequentiallyConsistent)
Atomic function to OR bits to a shared memory location.
Definition: Atomic.h:892
vtkm::Float32
float Float32
Base type to use for 32-bit floating-point numbers.
Definition: Types.h:157
vtkm::UInt64
unsigned long long UInt64
Base type to use for 64-bit signed integer numbers.
Definition: Types.h:207
vtkm::List
A template used to hold a list of types.
Definition: List.h:39
vtkm::Float64
double Float64
Base type to use for 64-bit floating-point numbers.
Definition: Types.h:161
vtkm::AtomicCompareExchange
bool AtomicCompareExchange(T *shared, T *expected, T desired, vtkm::MemoryOrder order=vtkm::MemoryOrder::SequentiallyConsistent)
Atomic function that replaces a value given a condition.
Definition: Atomic.h:976
vtkm::MemoryOrder::Acquire
@ Acquire
A load operation with Acquire memory order will enforce that any local read or write operations liste...
vtkm::UInt16
uint16_t UInt16
Base type to use for 16-bit unsigned integer numbers.
Definition: Types.h:177
vtkm::AtomicAnd
T AtomicAnd(T *pointer, T operand, vtkm::MemoryOrder order=vtkm::MemoryOrder::SequentiallyConsistent)
Atomic function to AND bits to a shared memory location.
Definition: Atomic.h:860
Windows.h
List.h
vtkm::AtomicStore
void AtomicStore(T *pointer, T value, vtkm::MemoryOrder order=vtkm::MemoryOrder::Release)
Atomic function to save a value to a shared memory location.
Definition: Atomic.h:800