Free cookie consent management tool by TermsFeed Policy Generator

source: branches/DataPreprocessing/HeuristicLab.DataPreprocessing/3.3/Implementations/StatisticInfo.cs @ 10191

Last change on this file since 10191 was 10191, checked in by rstoll, 11 years ago
  • GetRowMissingValueCount implemented
  • IsMissingValue for cell implemented
File size: 3.2 KB
Line 
1using System;
2using System.Linq;
3
4namespace HeuristicLab.DataPreprocessing {
5
6  public class StatisticInfo : IStatisticInfo {
7
8    private IPreprocessingData preprocessingData;
9
10    public StatisticInfo(IPreprocessingData thePreprocessingData) {
11      preprocessingData = thePreprocessingData;
12    }
13
14    public int GetColumnCount() {
15      return preprocessingData.Columns;
16    }
17
18    public int GetRowCount() {
19      return preprocessingData.Rows;
20    }
21
22    public int GetNumericColumnCount() {
23      int count = 0;
24      foreach (var variableName in preprocessingData.VariableNames) {
25        if (preprocessingData.IsType<double>(variableName)) {
26          ++count;
27        }
28      }
29      return count;
30    }
31
32    public int GetNominalColumnCount() {
33      return preprocessingData.Columns - GetNumericColumnCount();
34    }
35
36    public int GetMissingValueCount() {
37      int count = 0;
38      foreach (var variableName in preprocessingData.VariableNames) {
39        count += GetMissingValueCount(variableName);
40      }
41      return count;
42    }
43
44    public int GetMissingValueCount(string variableName) {
45      return preprocessingData.GetMissingValueIndices(variableName).Count();
46    }
47
48    public T GetMin<T>(string variableName) where T : IComparable<T> {
49      return preprocessingData.GetValues<T>(variableName).Min();
50    }
51
52    public T GetMax<T>(string variableName) where T : IComparable<T> {
53      return preprocessingData.GetValues<T>(variableName).Max();
54    }
55
56    public double GetMedian(string variableName) {
57      double median = double.NaN;
58      if (preprocessingData.IsType<double>(variableName)) {
59        median = preprocessingData.GetValues<double>(variableName).OrderBy(x => x).ElementAt(preprocessingData.Rows / 2);
60      }
61      return median;
62    }
63
64    public double GetAverage(string variableName) {
65      double avg = double.NaN;
66      if (preprocessingData.IsType<double>(variableName)) {
67        avg = preprocessingData.GetValues<double>(variableName).Average();
68      }
69      return avg;
70    }
71
72    public T GetMostCommonValue<T>(string variableName) {
73      return preprocessingData.GetValues<T>(variableName)
74                              .GroupBy(x => x)
75                              .OrderByDescending(g => g.Count())
76                              .Select(g => g.Key)
77                              .First();
78    }
79
80
81    public double GetStandardDeviation(string variableName) {
82      double stdDev = double.NaN;
83      if (preprocessingData.IsType<double>(variableName)) {
84        double avg = GetAverage(variableName);
85        stdDev = Math.Sqrt(preprocessingData.GetValues<double>(variableName).Sum(x => (x - avg) * (x - avg)) / (preprocessingData.Rows - 1));
86      }
87      return stdDev;
88    }
89
90    public int GetDifferentValuesCount<T>(string variableName) {
91      return preprocessingData.GetValues<T>(variableName).GroupBy(x => x).Count();
92    }
93
94    public int GetRowMissingValueCount(int rowIndex) {
95      int count = 0;
96      foreach (var variableName in preprocessingData.VariableNames) {
97        if (preprocessingData.IsMissingValue(variableName, rowIndex)) {
98          ++count;
99        }
100      }
101      return count;
102    }
103  }
104}
Note: See TracBrowser for help on using the repository browser.