Free cookie consent management tool by TermsFeed Policy Generator

source: branches/DataPreprocessing/HeuristicLab.DataPreprocessing/3.3/Implementations/StatisticInfo.cs @ 10204

Last change on this file since 10204 was 10191, checked in by rstoll, 11 years ago
  • GetRowMissingValueCount implemented
  • IsMissingValue for cell implemented
File size: 3.2 KB
RevLine 
[10148]1using System;
[10165]2using System.Linq;
[10148]3
4namespace HeuristicLab.DataPreprocessing {
[10165]5
[10183]6  public class StatisticInfo : IStatisticInfo {
[10148]7
[10159]8    private IPreprocessingData preprocessingData;
[10148]9
[10180]10    public StatisticInfo(IPreprocessingData thePreprocessingData) {
[10165]11      preprocessingData = thePreprocessingData;
[10148]12    }
13
14    public int GetColumnCount() {
[10159]15      return preprocessingData.Columns;
[10148]16    }
17
18    public int GetRowCount() {
[10159]19      return preprocessingData.Rows;
[10148]20    }
21
22    public int GetNumericColumnCount() {
[10159]23      int count = 0;
[10183]24      foreach (var variableName in preprocessingData.VariableNames) {
25        if (preprocessingData.IsType<double>(variableName)) {
[10159]26          ++count;
27        }
28      }
29      return count;
[10148]30    }
31
32    public int GetNominalColumnCount() {
[10159]33      return preprocessingData.Columns - GetNumericColumnCount();
[10148]34    }
35
36    public int GetMissingValueCount() {
37      int count = 0;
[10183]38      foreach (var variableName in preprocessingData.VariableNames) {
39        count += GetMissingValueCount(variableName);
[10148]40      }
41      return count;
42    }
43
[10183]44    public int GetMissingValueCount(string variableName) {
[10189]45      return preprocessingData.GetMissingValueIndices(variableName).Count();
[10148]46    }
47
[10183]48    public T GetMin<T>(string variableName) where T : IComparable<T> {
49      return preprocessingData.GetValues<T>(variableName).Min();
[10148]50    }
51
[10183]52    public T GetMax<T>(string variableName) where T : IComparable<T> {
53      return preprocessingData.GetValues<T>(variableName).Max();
[10148]54    }
55
[10183]56    public double GetMedian(string variableName) {
[10166]57      double median = double.NaN;
[10183]58      if (preprocessingData.IsType<double>(variableName)) {
59        median = preprocessingData.GetValues<double>(variableName).OrderBy(x => x).ElementAt(preprocessingData.Rows / 2);
[10166]60      }
61      return median;
[10148]62    }
63
[10183]64    public double GetAverage(string variableName) {
[10166]65      double avg = double.NaN;
[10183]66      if (preprocessingData.IsType<double>(variableName)) {
67        avg = preprocessingData.GetValues<double>(variableName).Average();
[10166]68      }
69      return avg;
[10148]70    }
71
[10183]72    public T GetMostCommonValue<T>(string variableName) {
73      return preprocessingData.GetValues<T>(variableName)
[10180]74                              .GroupBy(x => x)
75                              .OrderByDescending(g => g.Count())
76                              .Select(g => g.Key)
77                              .First();
[10148]78    }
79
[10167]80
[10183]81    public double GetStandardDeviation(string variableName) {
[10169]82      double stdDev = double.NaN;
[10183]83      if (preprocessingData.IsType<double>(variableName)) {
84        double avg = GetAverage(variableName);
85        stdDev = Math.Sqrt(preprocessingData.GetValues<double>(variableName).Sum(x => (x - avg) * (x - avg)) / (preprocessingData.Rows - 1));
[10169]86      }
87      return stdDev;
[10148]88    }
89
[10183]90    public int GetDifferentValuesCount<T>(string variableName) {
91      return preprocessingData.GetValues<T>(variableName).GroupBy(x => x).Count();
[10179]92    }
[10191]93
94    public int GetRowMissingValueCount(int rowIndex) {
95      int count = 0;
96      foreach (var variableName in preprocessingData.VariableNames) {
97        if (preprocessingData.IsMissingValue(variableName, rowIndex)) {
98          ++count;
99        }
100      }
101      return count;
102    }
[10148]103  }
104}
Note: See TracBrowser for help on using the repository browser.