Free cookie consent management tool by TermsFeed Policy Generator

Ignore:
Timestamp:
04/20/10 20:31:23 (15 years ago)
Author:
gkronber
Message:

Included tracking of best of run solution (based on validation set) and calculation of MSE, R² and rel. Error on training and test sets. #938 (Data types and operators for regression problems)

File:
1 copied

Legend:

Unmodified
Added
Removed
  • trunk/sources/HeuristicLab.Problems.DataAnalysis/3.3/Evaluators/SimpleRSquaredEvaluator.cs

    r3441 r3452  
    1 using System;
     1#region License Information
     2/* HeuristicLab
     3 * Copyright (C) 2002-2010 Heuristic and Evolutionary Algorithms Laboratory (HEAL)
     4 *
     5 * This file is part of HeuristicLab.
     6 *
     7 * HeuristicLab is free software: you can redistribute it and/or modify
     8 * it under the terms of the GNU General Public License as published by
     9 * the Free Software Foundation, either version 3 of the License, or
     10 * (at your option) any later version.
     11 *
     12 * HeuristicLab is distributed in the hope that it will be useful,
     13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
     14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
     15 * GNU General Public License for more details.
     16 *
     17 * You should have received a copy of the GNU General Public License
     18 * along with HeuristicLab. If not, see <http://www.gnu.org/licenses/>.
     19 */
     20#endregion
     21
     22using System;
    223using System.Collections.Generic;
    324using System.Linq;
    425using System.Text;
    526using HeuristicLab.Core;
     27using HeuristicLab.Common;
    628using HeuristicLab.Data;
    7 using HeuristicLab.DataAnalysis;
     29using HeuristicLab.Parameters;
    830
    9 namespace HeuristicLab.Modeling {
    10   public class SimpleStableCorrelationCoefficientEvaluator : SimpleEvaluatorBase {
    11 
    12     public override string OutputVariableName {
    13       get {
    14         return "R2";
    15       }
     31namespace HeuristicLab.Problems.DataAnalysis.Evaluators {
     32  public class SimpleRSquaredEvaluator : SimpleEvaluator {
     33    public ILookupParameter<DoubleValue> RSquaredParameter {
     34      get { return (ILookupParameter<DoubleValue>)Parameters["RSquared"]; }
    1635    }
    1736
    18     public override double Evaluate(double[,] values) {
    19       try {
    20         return Calculate(values);
    21       }
    22       catch (ArgumentException) {
    23         return double.NegativeInfinity;
    24       }
     37    public SimpleRSquaredEvaluator() {
     38      Parameters.Add(new LookupParameter<DoubleValue>("RSquared", "The squared Pearson's Product Moment Correlation (R²) of estimated values and original values."));
    2539    }
    2640
    27     public static double Calculate(double[,] values) {
     41    protected override void Apply(DoubleMatrix values) {
     42      var original = from i in Enumerable.Range(0, values.Rows)
     43                     select values[i, ORIGINAL_INDEX];
     44      var estimated = from i in Enumerable.Range(0, values.Rows)
     45                      select values[i, ESTIMATION_INDEX];
     46      RSquaredParameter.ActualValue = new DoubleValue(Calculate(original, estimated));
     47    }
     48
     49
     50    public static double Calculate(IEnumerable<double> original, IEnumerable<double> estimated) {
     51      var originalEnumerator = original.GetEnumerator();
     52      var estimatedEnumerator = estimated.GetEnumerator();
     53      originalEnumerator.MoveNext();
     54      estimatedEnumerator.MoveNext();
     55      double e = estimatedEnumerator.Current;
     56      double o = originalEnumerator.Current;
     57
     58      // stable and iterative calculation of R² in one pass over original and estimated
    2859      double sum_sq_x = 0.0;
    2960      double sum_sq_y = 0.0;
    3061      double sum_coproduct = 0.0;
    31       if (IsInvalidValue(values[0, ORIGINAL_INDEX]) || IsInvalidValue(values[0, ESTIMATION_INDEX])) {
    32         throw new ArgumentException("Correlation coefficient is not defined for variables with NaN or infinity values.");
     62      if (IsInvalidValue(o) || IsInvalidValue(e)) {
     63        throw new ArgumentException(" is not defined for variables with NaN or infinity values.");
    3364      }
    34       double mean_x = values[0, ORIGINAL_INDEX];
    35       double mean_y = values[0, ESTIMATION_INDEX];
    36       for (int i = 1; i < values.GetLength(0); i++) {
    37         double sweep = (i - 1.0) / i;
    38         if (IsInvalidValue(values[i, ORIGINAL_INDEX]) || IsInvalidValue(values[i, ESTIMATION_INDEX])) {
     65      double mean_x = o;
     66      double mean_y = e;
     67      int n = 1;
     68      while (originalEnumerator.MoveNext() & estimatedEnumerator.MoveNext()) {
     69        e = estimatedEnumerator.Current;
     70        o = originalEnumerator.Current;
     71        double sweep = (n - 1.0) / n;
     72        if (IsInvalidValue(o) || IsInvalidValue(e)) {
    3973          throw new ArgumentException("Correlation coefficient is not defined for variables with NaN or infinity values.");
    4074        }
    41         double delta_x = values[i, ORIGINAL_INDEX] - mean_x;
    42         double delta_y = values[i, ESTIMATION_INDEX] - mean_y;
     75        double delta_x = o - mean_x;
     76        double delta_y = e - mean_y;
    4377        sum_sq_x += delta_x * delta_x * sweep;
    4478        sum_sq_y += delta_y * delta_y * sweep;
    4579        sum_coproduct += delta_x * delta_y * sweep;
    46         mean_x += delta_x / i;
    47         mean_y += delta_y / i;
     80        mean_x += delta_x / n;
     81        mean_y += delta_y / n;
     82        n++;
    4883      }
    49       double pop_sd_x = Math.Sqrt(sum_sq_x / values.GetLength(0));
    50       double pop_sd_y = Math.Sqrt(sum_sq_y / values.GetLength(0));
    51       double cov_x_y = sum_coproduct / values.GetLength(0);
     84      if (estimatedEnumerator.MoveNext() || originalEnumerator.MoveNext()) {
     85        throw new ArgumentException("Number of elements in original and estimated enumeration doesn't match.");
     86      } else {
     87        double pop_sd_x = Math.Sqrt(sum_sq_x / n);
     88        double pop_sd_y = Math.Sqrt(sum_sq_y / n);
     89        double cov_x_y = sum_coproduct / n;
    5290
    53       if (pop_sd_x == 0.0 || pop_sd_y == 0.0)
    54         return 0.0;
    55       else {
    56         double r = cov_x_y / (pop_sd_x * pop_sd_y);
    57         return r * r;
     91        if (pop_sd_x.IsAlmost(0.0) || pop_sd_y.IsAlmost(0.0))
     92          return 0.0;
     93        else {
     94          double r = cov_x_y / (pop_sd_x * pop_sd_y);
     95          return r * r;
     96        }
    5897      }
    5998    }
Note: See TracChangeset for help on using the changeset viewer.