VTK-m  2.2
DescriptiveStatistics.h
Go to the documentation of this file.
1 //============================================================================
2 // Copyright (c) Kitware, Inc.
3 // All rights reserved.
4 // See LICENSE.txt for details.
5 //
6 // This software is distributed WITHOUT ANY WARRANTY; without even
7 // the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
8 // PURPOSE. See the above copyright notice for more information.
9 //============================================================================
10 #ifndef vtk_m_worklet_DescriptiveStatistics_h
11 #define vtk_m_worklet_DescriptiveStatistics_h
12 
13 #include <vtkm/cont/Algorithm.h>
14 #include <vtkm/cont/ArrayCopy.h>
17 
18 namespace vtkm
19 {
20 namespace worklet
21 {
23 {
24 public:
25  template <typename T>
26  struct StatState
27  {
30  : n_(0)
31  , min_(std::numeric_limits<T>::max())
32  , max_(std::numeric_limits<T>::lowest())
33  , sum_(0)
34  , mean_(0)
35  , M2_(0)
36  , M3_(0)
37  , M4_(0)
38  {
39  }
40 
42  StatState(T value)
43  : n_(1)
44  , min_(value)
45  , max_(value)
46  , sum_(value)
47  , mean_(value)
48  , M2_(0)
49  , M3_(0)
50  , M4_(0)
51  {
52  }
53 
55  StatState(T n, T min, T max, T sum, T mean, T M2, T M3, T M4)
56  : n_(n)
57  , min_(min)
58  , max_(max)
59  , sum_(sum)
60  , mean_(mean)
61  , M2_(M2)
62  , M3_(M3)
63  , M4_(M4)
64  {
65  }
66 
69  {
70  const StatState<T>& x = *this;
71  if (y.n_ == 0)
72  {
73  return x;
74  }
75  if (x.n_ == 0)
76  {
77  return y;
78  }
79 
80  StatState result;
81  result.n_ = x.n_ + y.n_;
82 
83  result.min_ = vtkm::Min(x.min_, y.min_);
84  result.max_ = vtkm::Max(x.max_, y.max_);
85 
86  // TODO: consider implementing compensated sum
87  // https://en.wikipedia.org/wiki/Kahan_summation_algorithm
88  result.sum_ = x.sum_ + y.sum_;
89 
90  // It is tempting to try to deviate from the literature and calculate
91  // mean in each "reduction" from sum and n. This saves one multiplication.
92  // However, RESIST THE TEMPTATION!!! This takes us back to the naive
93  // algorithm (mean = sum of a bunch of numbers / N) that actually
94  // accumulates more error and causes problem when calculating M2
95  // (and thus variance).
96  // TODO: Verify that FieldStatistics exhibits the same problem since
97  // it is using a "parallel" version of the naive algorithm as well.
98  // TODO: or better, just deprecate FieldStatistics.
99  T delta = y.mean_ - x.mean_;
100  result.mean_ = x.mean_ + delta * y.n_ / result.n_;
101 
102  T delta2 = delta * delta;
103  result.M2_ = x.M2_ + y.M2_ + delta2 * x.n_ * y.n_ / result.n_;
104 
105  T delta3 = delta * delta2;
106  T n2 = result.n_ * result.n_;
107  result.M3_ = x.M3_ + y.M3_;
108  result.M3_ += delta3 * x.n_ * y.n_ * (x.n_ - y.n_) / n2;
109  result.M3_ += T(3.0) * delta * (x.n_ * y.M2_ - y.n_ * x.M2_) / result.n_;
110 
111  T delta4 = delta2 * delta2;
112  T n3 = result.n_ * n2;
113  result.M4_ = x.M4_ + y.M4_;
114  result.M4_ += delta4 * x.n_ * y.n_ * (x.n_ * x.n_ - x.n_ * y.n_ + y.n_ * y.n_) / n3;
115  result.M4_ += T(6.0) * delta2 * (x.n_ * x.n_ * y.M2_ + y.n_ * y.n_ * x.M2_) / n2;
116  result.M4_ += T(4.0) * delta * (x.n_ * y.M3_ - y.n_ * x.M3_) / result.n_;
117 
118  return result;
119  }
120 
121  VTKM_EXEC_CONT T N() const { return this->n_; }
122 
124  T Min() const { return this->min_; }
125 
127  T Max() const { return this->max_; }
128 
130  T Sum() const { return this->sum_; }
131 
133  T Mean() const { return this->mean_; }
134 
136  T M2() const { return this->M2_; }
137 
139  T M3() const { return this->M3_; }
140 
142  T M4() const { return this->M4_; }
143 
145  T SampleStddev() const { return vtkm::Sqrt(this->SampleVariance()); }
146 
148  T PopulationStddev() const { return vtkm::Sqrt(this->PopulationVariance()); }
149 
151  T SampleVariance() const
152  {
153  if (this->n_ <= 1)
154  {
155  return 0;
156  }
157  return this->M2_ / (this->n_ - 1);
158  }
159 
162  {
163  if (this->M2_ == 0 || this->n_ == 0)
164  {
165  return T(0);
166  }
167  return this->M2_ / this->n_;
168  }
169 
171  T Skewness() const
172  {
173  if (this->M2_ == 0 || this->n_ == 0)
174  // Shamelessly swiped from Boost Math
175  // The limit is technically undefined, but the interpretation here is clear:
176  // A constant dataset has no skewness.
177  return T(0);
178  else
179  return vtkm::Sqrt(this->n_) * this->M3_ / vtkm::Pow(this->M2_, T{ 1.5 });
180  }
181 
183  T Kurtosis() const
184  {
185  if (this->M2_ == 0 || this->n_ == 0)
186  // Shamelessly swiped from Boost Math
187  // The limit is technically undefined, but the interpretation here is clear:
188  // A constant dataset has no kurtosis.
189  return T(0);
190  else
191  return this->n_ * this->M4_ / (this->M2_ * this->M2_);
192  }
193 
194  private:
195  // GCC4.8 is not happy about initializing data members here.
196  T n_;
197  T min_;
198  T max_;
199  T sum_;
200  T mean_;
201  T M2_;
202  T M3_;
203  T M4_;
204  }; // StatState
205 
207  {
208  template <typename T>
210  {
212  }
213  };
214 
224  template <typename FieldType, typename Storage>
227  {
228  using Algorithm = vtkm::cont::Algorithm;
229 
230  // Essentially a TransformReduce. Do we have that convenience in VTKm?
232  return Algorithm::Reduce(states, StatState<FieldType>{});
233  }
234 
235  template <typename KeyType, typename ValueType, typename KeyInStorage, typename ValueInStorage>
240  {
241  using Algorithm = vtkm::cont::Algorithm;
242 
243  // Make a copy of the input arrays so we don't modify them
245  vtkm::cont::ArrayCopy(keys, keys_copy);
246 
248  vtkm::cont::ArrayCopy(values, values_copy);
249 
250  // Gather values of the same key by sorting them according to keys
251  Algorithm::SortByKey(keys_copy, values_copy);
252 
253  auto states = vtkm::cont::make_ArrayHandleTransform(values_copy, MakeStatState{});
255 
257  Algorithm::ReduceByKey(keys_copy, states, keys_out, results, vtkm::Add{});
258 
259  return vtkm::cont::make_ArrayHandleZip(keys_out, results);
260  }
261 }; // DescriptiveStatistics
262 
263 } // worklet
264 } // vtkm
265 #endif // vtk_m_worklet_DescriptiveStatistics_h
vtkm::worklet::DescriptiveStatistics::StatState::Kurtosis
T Kurtosis() const
Definition: DescriptiveStatistics.h:183
vtkm::worklet::DescriptiveStatistics::StatState::StatState
StatState(T value)
Definition: DescriptiveStatistics.h:42
vtkm::cont::ArrayHandle
Manages an array-worth of data.
Definition: ArrayHandle.h:300
vtkm::worklet::DescriptiveStatistics::StatState::M3
T M3() const
Definition: DescriptiveStatistics.h:139
vtkm::worklet::DescriptiveStatistics::StatState::Min
T Min() const
Definition: DescriptiveStatistics.h:124
vtkm::worklet::DescriptiveStatistics::StatState::sum_
T sum_
Definition: DescriptiveStatistics.h:199
vtkm::worklet::DescriptiveStatistics::StatState::StatState
StatState()
Definition: DescriptiveStatistics.h:29
vtkm::worklet::DescriptiveStatistics::StatState::M2
T M2() const
Definition: DescriptiveStatistics.h:136
vtkm
Groups connected points that have the same field value.
Definition: Atomic.h:19
vtkm::Sqrt
vtkm::Float32 Sqrt(vtkm::Float32 x)
Definition: Math.h:943
VTKM_EXEC_CONT
#define VTKM_EXEC_CONT
Definition: ExportMacros.h:52
vtkm::worklet::DescriptiveStatistics::StatState::M4_
T M4_
Definition: DescriptiveStatistics.h:203
vtkm::worklet::DescriptiveStatistics::StatState::operator+
StatState operator+(const StatState< T > &y) const
Definition: DescriptiveStatistics.h:68
ArrayHandleTransform.h
vtkm::worklet::DescriptiveStatistics::StatState::M4
T M4() const
Definition: DescriptiveStatistics.h:142
vtkm::worklet::DescriptiveStatistics::StatState::Sum
T Sum() const
Definition: DescriptiveStatistics.h:130
vtkm::worklet::DescriptiveStatistics::StatState::mean_
T mean_
Definition: DescriptiveStatistics.h:200
vtkm::worklet::DescriptiveStatistics::StatState::Mean
T Mean() const
Definition: DescriptiveStatistics.h:133
vtkm::worklet::DescriptiveStatistics::StatState::N
T N() const
Definition: DescriptiveStatistics.h:121
vtkm::worklet::DescriptiveStatistics::StatState::Skewness
T Skewness() const
Definition: DescriptiveStatistics.h:171
ArrayCopy.h
vtkm::worklet::DescriptiveStatistics::StatState::Max
T Max() const
Definition: DescriptiveStatistics.h:127
ArrayHandleZip.h
vtkm::Add
Definition: Types.h:260
vtkm::cont::make_ArrayHandleZip
vtkm::cont::ArrayHandleZip< FirstHandleType, SecondHandleType > make_ArrayHandleZip(const FirstHandleType &first, const SecondHandleType &second)
A convenience function for creating an ArrayHandleZip.
Definition: ArrayHandleZip.h:290
vtkm::worklet::DescriptiveStatistics::MakeStatState
Definition: DescriptiveStatistics.h:206
vtkm::worklet::DescriptiveStatistics::StatState::SampleVariance
T SampleVariance() const
Definition: DescriptiveStatistics.h:151
vtkm::cont::ArrayHandleZip
ArrayHandleZip is a specialization of ArrayHandle.
Definition: ArrayHandleZip.h:253
Algorithm.h
vtkm::worklet::DescriptiveStatistics::StatState::StatState
StatState(T n, T min, T max, T sum, T mean, T M2, T M3, T M4)
Definition: DescriptiveStatistics.h:55
vtkm::cont::ArrayCopy
void ArrayCopy(const SourceArrayType &source, DestArrayType &destination)
Does a deep copy from one array to another array.
Definition: ArrayCopy.h:120
vtkm::worklet::DescriptiveStatistics::StatState
Definition: DescriptiveStatistics.h:26
VTKM_CONT
#define VTKM_CONT
Definition: ExportMacros.h:57
vtkm::worklet::DescriptiveStatistics::StatState::max_
T max_
Definition: DescriptiveStatistics.h:198
vtkm::worklet::DescriptiveStatistics::StatState::M3_
T M3_
Definition: DescriptiveStatistics.h:202
vtkm::cont::Algorithm
Definition: Algorithm.h:386
vtkm::worklet::DescriptiveStatistics::StatState::n_
T n_
Definition: DescriptiveStatistics.h:196
vtkm::worklet::DescriptiveStatistics::Run
static auto Run(const vtkm::cont::ArrayHandle< KeyType, KeyInStorage > &keys, const vtkm::cont::ArrayHandle< ValueType, ValueInStorage > &values) -> vtkm::cont::ArrayHandleZip< vtkm::cont::ArrayHandle< KeyType >, vtkm::cont::ArrayHandle< StatState< ValueType >>>
Definition: DescriptiveStatistics.h:236
vtkm::worklet::DescriptiveStatistics
Definition: DescriptiveStatistics.h:22
vtkm::worklet::DescriptiveStatistics::StatState::PopulationVariance
T PopulationVariance() const
Definition: DescriptiveStatistics.h:161
vtkm::worklet::DescriptiveStatistics::StatState::min_
T min_
Definition: DescriptiveStatistics.h:197
vtkm::worklet::DescriptiveStatistics::Run
static StatState< FieldType > Run(const vtkm::cont::ArrayHandle< FieldType, Storage > &field)
Calculate various summary statistics for the input ArrayHandle.
Definition: DescriptiveStatistics.h:225
vtkm::cont::make_ArrayHandleTransform
vtkm::cont::ArrayHandleTransform< HandleType, FunctorType > make_ArrayHandleTransform(HandleType handle, FunctorType functor)
make_ArrayHandleTransform is convenience function to generate an ArrayHandleTransform.
Definition: ArrayHandleTransform.h:480
vtkm::worklet::DescriptiveStatistics::StatState::M2_
T M2_
Definition: DescriptiveStatistics.h:201
vtkm::worklet::DescriptiveStatistics::MakeStatState::operator()
vtkm::worklet::DescriptiveStatistics::StatState< T > operator()(T value) const
Definition: DescriptiveStatistics.h:209
vtkm::worklet::DescriptiveStatistics::StatState::SampleStddev
T SampleStddev() const
Definition: DescriptiveStatistics.h:145
vtkm::worklet::DescriptiveStatistics::StatState::PopulationStddev
T PopulationStddev() const
Definition: DescriptiveStatistics.h:148