Free cookie consent management tool by TermsFeed Policy Generator

source: branches/DataPreprocessing/HeuristicLab.DataPreprocessing/3.3/Implementations/StatisticInfo.cs @ 10189

Last change on this file since 10189 was 10189, checked in by rstoll, 11 years ago

Added GetMissingValueIndices to IPreprocessingData since it will be used in many places
Removed columnIndex specific method from PreprocessingData

File size: 2.9 KB
Line 
1using System;
2using System.Linq;
3
4namespace HeuristicLab.DataPreprocessing {
5
6  public class StatisticInfo : IStatisticInfo {
7
8    private IPreprocessingData preprocessingData;
9
10    public StatisticInfo(IPreprocessingData thePreprocessingData) {
11      preprocessingData = thePreprocessingData;
12    }
13
14    public int GetColumnCount() {
15      return preprocessingData.Columns;
16    }
17
18    public int GetRowCount() {
19      return preprocessingData.Rows;
20    }
21
22    public int GetNumericColumnCount() {
23      int count = 0;
24      foreach (var variableName in preprocessingData.VariableNames) {
25        if (preprocessingData.IsType<double>(variableName)) {
26          ++count;
27        }
28      }
29      return count;
30    }
31
32    public int GetNominalColumnCount() {
33      return preprocessingData.Columns - GetNumericColumnCount();
34    }
35
36    public int GetMissingValueCount() {
37      int count = 0;
38      foreach (var variableName in preprocessingData.VariableNames) {
39        count += GetMissingValueCount(variableName);
40      }
41      return count;
42    }
43
44    public int GetMissingValueCount(string variableName) {
45      return preprocessingData.GetMissingValueIndices(variableName).Count();
46    }
47
48    public T GetMin<T>(string variableName) where T : IComparable<T> {
49      return preprocessingData.GetValues<T>(variableName).Min();
50    }
51
52    public T GetMax<T>(string variableName) where T : IComparable<T> {
53      return preprocessingData.GetValues<T>(variableName).Max();
54    }
55
56    public double GetMedian(string variableName) {
57      double median = double.NaN;
58      if (preprocessingData.IsType<double>(variableName)) {
59        median = preprocessingData.GetValues<double>(variableName).OrderBy(x => x).ElementAt(preprocessingData.Rows / 2);
60      }
61      return median;
62    }
63
64    public double GetAverage(string variableName) {
65      double avg = double.NaN;
66      if (preprocessingData.IsType<double>(variableName)) {
67        avg = preprocessingData.GetValues<double>(variableName).Average();
68      }
69      return avg;
70    }
71
72    public T GetMostCommonValue<T>(string variableName) {
73      return preprocessingData.GetValues<T>(variableName)
74                              .GroupBy(x => x)
75                              .OrderByDescending(g => g.Count())
76                              .Select(g => g.Key)
77                              .First();
78    }
79
80
81    public double GetStandardDeviation(string variableName) {
82      double stdDev = double.NaN;
83      if (preprocessingData.IsType<double>(variableName)) {
84        double avg = GetAverage(variableName);
85        stdDev = Math.Sqrt(preprocessingData.GetValues<double>(variableName).Sum(x => (x - avg) * (x - avg)) / (preprocessingData.Rows - 1));
86      }
87      return stdDev;
88    }
89
90    public int GetDifferentValuesCount<T>(string variableName) {
91      return preprocessingData.GetValues<T>(variableName).GroupBy(x => x).Count();
92    }
93  }
94}
Note: See TracBrowser for help on using the repository browser.