Changeset 10183


Ignore:
Timestamp:
12/04/13 13:19:17 (6 years ago)
Author:
rstoll
Message:

Switched from columnIndex to variableName

Location:
branches/DataPreprocessing/HeuristicLab.DataPreprocessing/3.3
Files:
2 edited

Legend:

Unmodified
Added
Removed
  • branches/DataPreprocessing/HeuristicLab.DataPreprocessing/3.3/Implementations/StatisticInfo.cs

    r10182 r10183  
    11using System;
    22using System.Linq;
    3 using HeuristicLab.DataPreprocessing;
    43
    54namespace HeuristicLab.DataPreprocessing {
    65
    7   class StatisticInfo : IStatisticInfo {
     6  public class StatisticInfo : IStatisticInfo {
    87
    98    private IPreprocessingData preprocessingData;
     
    2322    public int GetNumericColumnCount() {
    2423      int count = 0;
    25       for (int i = 0; i < preprocessingData.Columns; ++i) {
    26         if (preprocessingData.IsType<double>(i)) {
     24      foreach (var variableName in preprocessingData.VariableNames) {
     25        if (preprocessingData.IsType<double>(variableName)) {
    2726          ++count;
    2827        }
     
    3736    public int GetMissingValueCount() {
    3837      int count = 0;
    39       for (int i = 0; i < preprocessingData.Columns; ++i) {
    40         count += GetMissingValueCount(i);
     38      foreach (var variableName in preprocessingData.VariableNames) {
     39        count += GetMissingValueCount(variableName);
    4140      }
    4241      return count;
    4342    }
    4443
    45     public int GetMissingValueCount(int columnIndex) {
    46       if (preprocessingData.IsType<double>(columnIndex)) {
    47         return preprocessingData.GetValues<double>(columnIndex).Count(x => double.IsNaN(x));
    48       } else if (preprocessingData.IsType<string>(columnIndex)) {
    49         return preprocessingData.GetValues<string>(columnIndex).Count(x => string.IsNullOrEmpty(x));
    50       } else if (preprocessingData.IsType<DateTime>(columnIndex)) {
    51         return preprocessingData.GetValues<DateTime>(columnIndex).Count(x => x.Equals(DateTime.MinValue));
     44    public int GetMissingValueCount(string variableName) {
     45      if (preprocessingData.IsType<double>(variableName)) {
     46        return preprocessingData.GetValues<double>(variableName).Count(x => double.IsNaN(x));
     47      } else if (preprocessingData.IsType<string>(variableName)) {
     48        return preprocessingData.GetValues<string>(variableName).Count(x => string.IsNullOrEmpty(x));
     49      } else if (preprocessingData.IsType<DateTime>(variableName)) {
     50        return preprocessingData.GetValues<DateTime>(variableName).Count(x => x.Equals(DateTime.MinValue));
    5251      } else {
    53         throw new ArgumentException("column with index: " + columnIndex + " contains a non supported type.");
     52        throw new ArgumentException("column with index: " + variableName + " contains a non supported type.");
    5453      }
    5554    }
    5655
    57     public T GetMin<T>(int columnIndex) where T : IComparable<T> {
    58       return preprocessingData.GetValues<T>(columnIndex).Min();
     56    public T GetMin<T>(string variableName) where T : IComparable<T> {
     57      return preprocessingData.GetValues<T>(variableName).Min();
    5958    }
    6059
    61     public T GetMax<T>(int columnIndex) where T : IComparable<T> {
    62       return preprocessingData.GetValues<T>(columnIndex).Max();
     60    public T GetMax<T>(string variableName) where T : IComparable<T> {
     61      return preprocessingData.GetValues<T>(variableName).Max();
    6362    }
    6463
    65     public double GetMedian(int columnIndex) {
     64    public double GetMedian(string variableName) {
    6665      double median = double.NaN;
    67       if (preprocessingData.IsType<double>(columnIndex)) {
    68         median = preprocessingData.GetValues<double>(columnIndex).OrderBy(x => x).ElementAt(preprocessingData.Rows / 2);
     66      if (preprocessingData.IsType<double>(variableName)) {
     67        median = preprocessingData.GetValues<double>(variableName).OrderBy(x => x).ElementAt(preprocessingData.Rows / 2);
    6968      }
    7069      return median;
    7170    }
    7271
    73     public double GetAverage(int columnIndex) {
     72    public double GetAverage(string variableName) {
    7473      double avg = double.NaN;
    75       if (preprocessingData.IsType<double>(columnIndex)) {
    76         avg = preprocessingData.GetValues<double>(columnIndex).Average();
     74      if (preprocessingData.IsType<double>(variableName)) {
     75        avg = preprocessingData.GetValues<double>(variableName).Average();
    7776      }
    7877      return avg;
    7978    }
    8079
    81     public T GetMostCommonValue<T>(int columnIndex) {
    82       return preprocessingData.GetValues<T>(columnIndex)
     80    public T GetMostCommonValue<T>(string variableName) {
     81      return preprocessingData.GetValues<T>(variableName)
    8382                              .GroupBy(x => x)
    8483                              .OrderByDescending(g => g.Count())
     
    8887
    8988
    90     public double GetStandardDeviation(int columnIndex) {
     89    public double GetStandardDeviation(string variableName) {
    9190      double stdDev = double.NaN;
    92       if (preprocessingData.IsType<double>(columnIndex)) {
    93         double avg = GetAverage(columnIndex);
    94         stdDev = Math.Sqrt(preprocessingData.GetValues<double>(columnIndex).Sum(x => (x - avg) * (x - avg)) / (preprocessingData.Rows - 1));
     91      if (preprocessingData.IsType<double>(variableName)) {
     92        double avg = GetAverage(variableName);
     93        stdDev = Math.Sqrt(preprocessingData.GetValues<double>(variableName).Sum(x => (x - avg) * (x - avg)) / (preprocessingData.Rows - 1));
    9594      }
    9695      return stdDev;
    9796    }
    9897
    99     public int GetDifferentValuesCount<T>(int columnIndex) {
    100       return preprocessingData.GetValues<T>(columnIndex).GroupBy(x => x).Count();
     98    public int GetDifferentValuesCount<T>(string variableName) {
     99      return preprocessingData.GetValues<T>(variableName).GroupBy(x => x).Count();
    101100    }
    102101  }
  • branches/DataPreprocessing/HeuristicLab.DataPreprocessing/3.3/Interfaces/IStatisticInfo.cs

    r10182 r10183  
    99    int GetNominalColumnCount();
    1010    int GetMissingValueCount();
    11     int GetMissingValueCount(int columnIndex);
    12     T GetMin<T>(int columnIndex) where T : IComparable<T>;
    13     T GetMax<T>(int columnIndex) where T : IComparable<T>;
    14     double GetMedian(int columnIndex);
    15     double GetAverage(int columnIndex);
    16     T GetMostCommonValue<T>(int columnIndex);
    17     double GetStandardDeviation(int columnIndex);
    18     int GetDifferentValuesCount<T>(int columnIndex);
     11    int GetMissingValueCount(string variableName);
     12    T GetMin<T>(string variableName) where T : IComparable<T>;
     13    T GetMax<T>(string variableName) where T : IComparable<T>;
     14    double GetMedian(string variableName);
     15    double GetAverage(string variableName);
     16    T GetMostCommonValue<T>(string variableName);
     17    double GetStandardDeviation(string variableName);
     18    int GetDifferentValuesCount<T>(string variableName);
    1919  }
    2020}
Note: See TracChangeset for help on using the changeset viewer.