Free cookie consent management tool by TermsFeed Policy Generator

Ignore:
Timestamp:
04/20/10 20:31:23 (15 years ago)
Author:
gkronber
Message:

Included tracking of best of run solution (based on validation set) and calculation of MSE, R² and rel. Error on training and test sets. #938 (Data types and operators for regression problems)

File:
1 copied

Legend:

Unmodified
Added
Removed
  • trunk/sources/HeuristicLab.Problems.DataAnalysis/3.3/Statistics.cs

    r3441 r3452  
    2323using System.Collections.Generic;
    2424using System.Text;
     25using System.Linq;
    2526
    26 namespace HeuristicLab.DataAnalysis {
    27   public class Statistics {
    28 
     27namespace HeuristicLab.Problems.DataAnalysis {
     28  public static class StatisticExtensions {
    2929    /// <summary>
    30     /// Minimum returns the smalles entry of values.
    31     /// Throws and exception if values is empty.
    32     /// </summary>
    33     /// <typeparam name="T"></typeparam>
    34     /// <param name="values"></param>
    35     /// <returns></returns>
    36     public static T Minimum<T>(IEnumerable<T> values) where T : struct, IComparable, IComparable<T> {
    37       IEnumerator<T> enumerator = values.GetEnumerator();
    38 
    39       // this will throw an exception if the values collection is empty
    40       enumerator.MoveNext();
    41       T minimum = enumerator.Current;
    42 
    43       while (enumerator.MoveNext()) {
    44         T current = enumerator.Current;
    45         if (current.CompareTo(minimum) < 0) {
    46           minimum = current;
    47         }
    48       }
    49 
    50       return minimum;
    51     }
    52 
    53     /// <summary>
    54     /// Maximum returns the largest entry of values.
    55     /// Throws an exception if values is empty.
    56     /// </summary>
    57     /// <typeparam name="T"></typeparam>
    58     /// <param name="values"></param>
    59     /// <returns></returns>
    60     public static T Maximum<T>(IEnumerable<T> values) where T : struct, IComparable, IComparable<T> {
    61       IEnumerator<T> enumerator = values.GetEnumerator();
    62 
    63       // this will throw an exception if the values collection is empty
    64       enumerator.MoveNext();
    65       T maximum = enumerator.Current;
    66 
    67       while (enumerator.MoveNext()) {
    68         T current = enumerator.Current;
    69         if (current.CompareTo(maximum) > 0) {
    70           maximum = current;
    71         }
    72       }
    73 
    74       return maximum;
    75     }
    76 
    77     /// <summary>
    78     /// Range calculates the difference between the larges and smallest entry of values.
     30    /// Calculates the median element of the enumeration.
    7931    /// </summary>
    8032    /// <param name="values"></param>
    8133    /// <returns></returns>
    82     public static double Range(double[] values) {
    83       return Range(values, 0, values.Length);
    84     }
     34    public static double Median(this IEnumerable<double> values) {
     35      int n = values.Count();
     36      if (n == 0) throw new InvalidOperationException("Enumeration contains no elements.");
    8537
    86     /// <summary>
    87     /// Range calculates the difference between the larges and smallest entry of values.
    88     /// </summary>
    89     public static double Range(List<double> values) {
    90       return Range(values.ToArray(), 0, values.Count);
    91     }
     38      double[] sortedValues = new double[n];
     39      int i = 0;
     40      foreach (double x in values)
     41        sortedValues[i++] = x;
    9242
    93     /// <summary>
    94     /// Range calculates the difference between the largest and smallest entry of values between start and end.
    95     /// </summary>
    96     /// <param name="values">collection of values</param>
    97     /// <param name="start">start index (inclusive)</param>
    98     /// <param name="end">end index (exclusive)</param>
    99     /// <returns></returns>
    100     public static double Range(double[] values, int start, int end) {
    101       if (start < 0 || start > values.Length || end < 0 || end > values.Length || start > end) {
    102         throw new InvalidOperationException();
    103       }
    104 
    105       double minimum = double.PositiveInfinity;
    106       double maximum = double.NegativeInfinity;
    107       for (int i = start; i < end; i++) {
    108         if (!double.IsNaN(values[i])) {
    109           if (values[i] > maximum) {
    110             maximum = values[i];
    111           }
    112           if (values[i] < minimum) {
    113             minimum = values[i];
    114           }
    115         }
    116       }
    117       return (maximum - minimum);
    118     }
    119 
    120     /// <summary>
    121     /// Calculates the mean of all values.
    122     /// </summary>
    123     /// <param name="values"></param>
    124     /// <returns></returns>
    125     public static double Mean(List<double> values) {
    126       return Mean(values.ToArray(), 0, values.Count);
    127     }
    128 
    129     // Calculates the mean of all values.
    130     public static double Mean(double[] values) {
    131       return Mean(values, 0, values.Length);
    132     }
    133 
    134     /// <summary>
    135     /// Calculates the mean of the values between start and end.
    136     /// </summary>
    137     /// <param name="values"></param>
    138     /// <param name="start">start index (inclusive)</param>
    139     /// <param name="end">end index(exclusive)</param>
    140     /// <returns></returns>
    141     public static double Mean(double[] values, int start, int end) {
    142       if (values.Length == 0) throw new ArgumentException("Values is empty.");
    143       if(end <=start) throw new ArgumentException("End is smaller or equal start");
    144       double sum = 0.0;
    145       int n = 0;
    146       for (int i = start; i < end; i++) {
    147         if (!double.IsNaN(values[i])) {
    148           sum += values[i];
    149           n++;
    150         }
    151       }
    152       if (n > 0)
    153         return sum / n;
    154       else throw new ArgumentException("Only NaN elements in values");
    155     }
    156 
    157     /// <summary>
    158     /// Calculates the median of the values.
    159     /// </summary>
    160     /// <param name="values"></param>
    161     /// <returns></returns>
    162     public static double Median(double[] values) {
    163       if (values.Length == 0) throw new InvalidOperationException();
    164       int n = values.Length;
    165       double[] sortedValues = new double[n];
    166 
    167       Array.Copy(values, sortedValues, n);
    16843      Array.Sort(sortedValues);
    16944
     
    17247        return sortedValues[n / 2];
    17348      } else {
    174         return (sortedValues[(n / 2)-1] + sortedValues[n / 2 ]) / 2.0;
     49        return (sortedValues[(n / 2) - 1] + sortedValues[n / 2]) / 2.0;
    17550      }
    17651    }
     
    18257    /// <param name="values"></param>
    18358    /// <returns></returns>
    184     public static double StandardDeviation(double[] values) {
     59    public static double StandardDeviation(this IEnumerable<double> values) {
    18560      return Math.Sqrt(Variance(values));
    18661    }
    18762
    18863    /// <summary>
    189     /// Calculates the variance of values.
     64    /// Calculates the variance of values. (sum (x - x_mean)² / n)
    19065    /// </summary>
    19166    /// <param name="values"></param>
    19267    /// <returns></returns>
    193     public static double Variance(double[] values) {
    194       return Variance(values, 0, values.Length);
    195     }
     68    public static double Variance(this IEnumerable<double> values) {
     69      IList<double> list = values as IList<double>;
     70      if (list == null) {
     71        list = values.ToList();
     72      }
     73      if (list.Count == 0) throw new ArgumentException("Enumeration contains no elements.");
    19674
    197 
    198     /// <summary>
    199     /// Calculates the variance of the entries of values between start and end.
    200     /// </summary>
    201     /// <param name="values"></param>
    202     /// <param name="start">start index (inclusive)</param>
    203     /// <param name="end">end index (exclusive)</param>
    204     /// <returns></returns>
    205     public static double Variance(double[] values, int start, int end) {
    206       if (values.Length == 0) throw new ArgumentException("Values is empty.");
    207       if (end <= start) throw new ArgumentException("End is smaller or equal start");
    208       if (end - start == 1)
    209         return 0.0;
    210 
    211       double mean = Mean(values, start, end);
     75      double mean = list.Average();
    21276      double squaredErrorsSum = 0.0;
    213 
    214       int n = 0;
    215       for (int i = start; i < end; i++) {
    216         if (!double.IsNaN(values[i])) {
    217           double d = values[i] - mean;
     77      int n = list.Count;
     78      int s = 0;
     79      for (int i = 0; i < n; i++) {
     80        if (!double.IsNaN(list[i])) {
     81          double d = list[i] - mean;
    21882          squaredErrorsSum += d * d;
    219           n++;
     83          s++;
    22084        }
    22185      }
    222       if (n < 2) {
    223         throw new ArgumentException("Only one non-NaN element in values");
     86      if (s == 0) {
     87        throw new ArgumentException("Enumeration contains no non-NaN elements.");
    22488      }
    225       return squaredErrorsSum / (n - 1);
     89      return squaredErrorsSum / n;
    22690    }
    22791  }
Note: See TracChangeset for help on using the changeset viewer.