Ignore:
Timestamp:
12/02/20 00:15:24 (6 months ago)
Author:
bburlacu
Message:

#3090: Implement extension to Welford's algorithm by Schubert et al in the OnlinePearsonsRCalculator.

File:
1 edited

Legend:

Unmodified
Added
Removed
  • trunk/HeuristicLab.Problems.DataAnalysis/3.4/OnlineCalculators/OnlinePearsonsRCalculator.cs

    r17180 r17787  
    2626namespace HeuristicLab.Problems.DataAnalysis {
    2727  public class OnlinePearsonsRCalculator : DeepCloneable, IOnlineCalculator {
    28     private OnlineCovarianceCalculator covCalculator = new OnlineCovarianceCalculator();
    29     private OnlineMeanAndVarianceCalculator sxCalculator = new OnlineMeanAndVarianceCalculator();
    30     private OnlineMeanAndVarianceCalculator syCalculator = new OnlineMeanAndVarianceCalculator();
     28    private double sumX;
     29    private double sumY;
     30    private double sumWe;
     31
     32    private double sumXX;
     33    private double sumYY;
     34    private double sumXY;
     35
     36    private OnlineCalculatorError errorState;
    3137
    3238    public double R {
    3339      get {
    34         double xVar = sxCalculator.PopulationVariance;
    35         double yVar = syCalculator.PopulationVariance;
    36         if (xVar.IsAlmost(0.0) || yVar.IsAlmost(0.0)) {
    37           return 0.0;
    38         } else {
    39           var r = covCalculator.Covariance / (Math.Sqrt(xVar) * Math.Sqrt(yVar));
    40           if (r < -1.0) r = -1.0;
    41           else if (r > 1.0) r = 1.0;
    42           return r;
     40        if (!(sumXX > 0.0 && sumYY > 0.0)) {
     41          return (sumXX == sumYY) ? 1.0 : 0.0;
    4342        }
     43        return sumXY / Math.Sqrt(sumXX * sumYY);
    4444      }
    4545    }
     46
     47    public double MeanX { get { return sumX / sumWe; } }
     48
     49    public double MeanY { get { return sumY / sumWe; } }
     50
     51    public double NaiveCovariance { get { return sumXY / sumWe; } }
     52
     53    public double SampleCovariance {
     54      get {
     55        if (sumWe > 1.0) {
     56          errorState = OnlineCalculatorError.None;
     57          return sumXY / (sumWe - 1);
     58        }
     59        errorState = OnlineCalculatorError.InsufficientElementsAdded;
     60        return double.NaN;
     61      }
     62    }
     63
     64    public double NaiveVarianceX { get { return sumXX / sumWe; } }
     65
     66    public double SampleVarianceX {
     67      get {
     68        if (sumWe > 1.0) {
     69          errorState = OnlineCalculatorError.None;
     70          return sumXX / (sumWe - 1);
     71        }
     72        errorState = OnlineCalculatorError.InsufficientElementsAdded;
     73        return double.NaN;
     74      }
     75    }
     76
     77    public double NaiveStdevX { get { return Math.Sqrt(NaiveVarianceY); } }
     78
     79    public double SampleStdevX { get { return Math.Sqrt(SampleVarianceX); } }
     80
     81    public double NaiveVarianceY { get { return sumYY / sumWe; } }
     82
     83    public double SampleVarianceY {
     84      get {
     85        if (sumWe > 1.0) {
     86          errorState = OnlineCalculatorError.None;
     87          return sumYY / (sumWe - 1);
     88        }
     89        errorState = OnlineCalculatorError.InsufficientElementsAdded;
     90        return double.NaN;
     91      }
     92    }
     93
     94    public double NaiveStdevY { get { return Math.Sqrt(NaiveVarianceY); } }
     95
     96    public double SampleStdevY { get { return Math.Sqrt(SampleVarianceX); } }
    4697
    4798    public OnlinePearsonsRCalculator() { }
     
    49100    protected OnlinePearsonsRCalculator(OnlinePearsonsRCalculator original, Cloner cloner)
    50101      : base(original, cloner) {
    51       covCalculator = cloner.Clone(original.covCalculator);
    52       sxCalculator = cloner.Clone(original.sxCalculator);
    53       syCalculator = cloner.Clone(original.syCalculator);
     102      sumX = original.sumX;
     103      sumY = original.sumY;
     104      sumXX = original.sumXX;
     105      sumYY = original.sumYY;
     106      sumXY = original.sumXY;
     107      sumWe = original.sumWe;
     108      errorState = original.ErrorState;
    54109    }
    55110    public override IDeepCloneable Clone(Cloner cloner) {
     
    59114    #region IOnlineCalculator Members
    60115    public OnlineCalculatorError ErrorState {
    61       get { return covCalculator.ErrorState | sxCalculator.PopulationVarianceErrorState | syCalculator.PopulationVarianceErrorState; }
     116      get { return errorState; }
    62117    }
    63118    public double Value {
     
    65120    }
    66121    public void Reset() {
    67       covCalculator.Reset();
    68       sxCalculator.Reset();
    69       syCalculator.Reset();
     122      sumXX = sumYY = sumXY = sumX = sumY = sumWe = 0.0;
     123      errorState = OnlineCalculatorError.None;
    70124    }
    71125
    72126    public void Add(double x, double y) {
    73       // no need to check validity of values explicitly here as it is checked in all three evaluators
    74       covCalculator.Add(x, y);
    75       sxCalculator.Add(x);
    76       syCalculator.Add(y);
     127      if (sumWe <= 0.0) {
     128        sumX = x;
     129        sumY = y;
     130        sumWe = 1;
     131        return;
     132      }
     133      // Delta to previous mean
     134      double deltaX = x * sumWe - sumX, deltaY = y * sumWe - sumY;
     135      double oldWe = sumWe;
     136      // Incremental update
     137      sumWe += 1;
     138      double f = 1.0 / (sumWe * oldWe);
     139      // Update
     140      sumXX += f * deltaX * deltaX;
     141      sumYY += f * deltaY * deltaY;
     142      // should equal weight * deltaY * neltaX!
     143      sumXY += f * deltaX * deltaY;
     144      // Update means
     145      sumX += x;
     146      sumY += y;
    77147    }
    78148
     
    80150
    81151    public static double Calculate(IEnumerable<double> first, IEnumerable<double> second, out OnlineCalculatorError errorState) {
    82       IEnumerator<double> firstEnumerator = first.GetEnumerator();
    83       IEnumerator<double> secondEnumerator = second.GetEnumerator();
    84       var calculator = new OnlinePearsonsRCalculator();
     152      var x = first.GetEnumerator(); x.MoveNext();
     153      var y = second.GetEnumerator(); y.MoveNext();
     154      double sumXX = 0.0, sumYY = 0.0, sumXY = 0.0;
     155      double sumX = x.Current, sumY = y.Current;
     156      int i = 1;
    85157
    86       // always move forward both enumerators (do not use short-circuit evaluation!)
    87       while (firstEnumerator.MoveNext() & secondEnumerator.MoveNext()) {
    88         double original = firstEnumerator.Current;
    89         double estimated = secondEnumerator.Current;
    90         calculator.Add(original, estimated);
    91         if (calculator.ErrorState != OnlineCalculatorError.None) break;
     158      // Inlined computation of Pearson correlation, to avoid allocating objects
     159      // This is a numerically stabilized version, avoiding sum-of-squares.
     160      while (x.MoveNext() & y.MoveNext()) {
     161        double xv = x.Current, yv = y.Current;
     162        // Delta to previous mean
     163        double deltaX = xv * i - sumX, deltaY = yv * i - sumY;
     164        // Increment count first
     165        double oldi = i; // Convert to double!
     166        ++i;
     167        double f = 1.0 / (i * oldi);
     168        // Update
     169        sumXX += f * deltaX * deltaX;
     170        sumYY += f * deltaY * deltaY;
     171        // should equal deltaY * deltaX!
     172        sumXY += f * deltaX * deltaY;
     173        // Update sums
     174        sumX += xv;
     175        sumY += yv;
    92176      }
    93177
    94       // check if both enumerators are at the end to make sure both enumerations have the same length
    95       if (calculator.ErrorState == OnlineCalculatorError.None &&
    96            (secondEnumerator.MoveNext() || firstEnumerator.MoveNext())) {
    97         throw new ArgumentException("Number of elements in first and second enumeration doesn't match.");
    98       } else {
    99         errorState = calculator.ErrorState;
    100         return calculator.R;
    101       }
     178      errorState = OnlineCalculatorError.None;
     179      // One or both series were constant:
     180      return !(sumXX > 0.0 && sumYY > 0.0) ? sumXX == sumYY ? 1.0 : 0.0 : //
     181          sumXY / Math.Sqrt(sumXX * sumYY);
    102182    }
    103183  }
Note: See TracChangeset for help on using the changeset viewer.