Free cookie consent management tool by TermsFeed Policy Generator

source: branches/DataPreprocessing/HeuristicLab.DataPreprocessing/3.3/Implementations/StatisticInfo.cs @ 10188

Last change on this file since 10188 was 10183, checked in by rstoll, 11 years ago

Switched from columnIndex to variableName

File size: 3.5 KB
RevLine 
[10148]1using System;
[10165]2using System.Linq;
[10148]3
4namespace HeuristicLab.DataPreprocessing {
[10165]5
[10183]6  public class StatisticInfo : IStatisticInfo {
[10148]7
[10159]8    private IPreprocessingData preprocessingData;
[10148]9
[10180]10    public StatisticInfo(IPreprocessingData thePreprocessingData) {
[10165]11      preprocessingData = thePreprocessingData;
[10148]12    }
13
14    public int GetColumnCount() {
[10159]15      return preprocessingData.Columns;
[10148]16    }
17
18    public int GetRowCount() {
[10159]19      return preprocessingData.Rows;
[10148]20    }
21
22    public int GetNumericColumnCount() {
[10159]23      int count = 0;
[10183]24      foreach (var variableName in preprocessingData.VariableNames) {
25        if (preprocessingData.IsType<double>(variableName)) {
[10159]26          ++count;
27        }
28      }
29      return count;
[10148]30    }
31
32    public int GetNominalColumnCount() {
[10159]33      return preprocessingData.Columns - GetNumericColumnCount();
[10148]34    }
35
36    public int GetMissingValueCount() {
37      int count = 0;
[10183]38      foreach (var variableName in preprocessingData.VariableNames) {
39        count += GetMissingValueCount(variableName);
[10148]40      }
41      return count;
42    }
43
[10183]44    public int GetMissingValueCount(string variableName) {
45      if (preprocessingData.IsType<double>(variableName)) {
46        return preprocessingData.GetValues<double>(variableName).Count(x => double.IsNaN(x));
47      } else if (preprocessingData.IsType<string>(variableName)) {
48        return preprocessingData.GetValues<string>(variableName).Count(x => string.IsNullOrEmpty(x));
49      } else if (preprocessingData.IsType<DateTime>(variableName)) {
50        return preprocessingData.GetValues<DateTime>(variableName).Count(x => x.Equals(DateTime.MinValue));
[10165]51      } else {
[10183]52        throw new ArgumentException("column with index: " + variableName + " contains a non supported type.");
[10165]53      }
[10148]54    }
55
[10183]56    public T GetMin<T>(string variableName) where T : IComparable<T> {
57      return preprocessingData.GetValues<T>(variableName).Min();
[10148]58    }
59
[10183]60    public T GetMax<T>(string variableName) where T : IComparable<T> {
61      return preprocessingData.GetValues<T>(variableName).Max();
[10148]62    }
63
[10183]64    public double GetMedian(string variableName) {
[10166]65      double median = double.NaN;
[10183]66      if (preprocessingData.IsType<double>(variableName)) {
67        median = preprocessingData.GetValues<double>(variableName).OrderBy(x => x).ElementAt(preprocessingData.Rows / 2);
[10166]68      }
69      return median;
[10148]70    }
71
[10183]72    public double GetAverage(string variableName) {
[10166]73      double avg = double.NaN;
[10183]74      if (preprocessingData.IsType<double>(variableName)) {
75        avg = preprocessingData.GetValues<double>(variableName).Average();
[10166]76      }
77      return avg;
[10148]78    }
79
[10183]80    public T GetMostCommonValue<T>(string variableName) {
81      return preprocessingData.GetValues<T>(variableName)
[10180]82                              .GroupBy(x => x)
83                              .OrderByDescending(g => g.Count())
84                              .Select(g => g.Key)
85                              .First();
[10148]86    }
87
[10167]88
[10183]89    public double GetStandardDeviation(string variableName) {
[10169]90      double stdDev = double.NaN;
[10183]91      if (preprocessingData.IsType<double>(variableName)) {
92        double avg = GetAverage(variableName);
93        stdDev = Math.Sqrt(preprocessingData.GetValues<double>(variableName).Sum(x => (x - avg) * (x - avg)) / (preprocessingData.Rows - 1));
[10169]94      }
95      return stdDev;
[10148]96    }
97
[10183]98    public int GetDifferentValuesCount<T>(string variableName) {
99      return preprocessingData.GetValues<T>(variableName).GroupBy(x => x).Count();
[10179]100    }
[10148]101  }
102}
Note: See TracBrowser for help on using the repository browser.