Free cookie consent management tool by TermsFeed Policy Generator

source: branches/DataPreprocessing/HeuristicLab.DataPreprocessing/3.3/Implementations/StatisticInfo.cs @ 10234

Last change on this file since 10234 was 10216, checked in by rstoll, 11 years ago

Exchanged own median, standard deviation implementation with the one in the common lib
Added GetVariance (using common as well)

File size: 3.4 KB
Line 
1using System;
2using System.Linq;
3using HeuristicLab.Common;
4
5namespace HeuristicLab.DataPreprocessing {
6
7  public class StatisticInfo : IStatisticInfo {
8
9    private IPreprocessingData preprocessingData;
10
11    public StatisticInfo(IPreprocessingData thePreprocessingData) {
12      preprocessingData = thePreprocessingData;
13    }
14
15    public int GetColumnCount() {
16      return preprocessingData.Columns;
17    }
18
19    public int GetRowCount() {
20      return preprocessingData.Rows;
21    }
22
23    public int GetNumericColumnCount() {
24      int count = 0;
25      foreach (var variableName in preprocessingData.VariableNames) {
26        if (preprocessingData.IsType<double>(variableName)) {
27          ++count;
28        }
29      }
30      return count;
31    }
32
33    public int GetNominalColumnCount() {
34      return preprocessingData.Columns - GetNumericColumnCount();
35    }
36
37    public int GetMissingValueCount() {
38      int count = 0;
39      foreach (var variableName in preprocessingData.VariableNames) {
40        count += GetMissingValueCount(variableName);
41      }
42      return count;
43    }
44
45    public int GetMissingValueCount(string variableName) {
46      return preprocessingData.GetMissingValueIndices(variableName).Count();
47    }
48
49    public T GetMin<T>(string variableName) where T : IComparable<T> {
50      return preprocessingData.GetValues<T>(variableName).Min();
51    }
52
53    public T GetMax<T>(string variableName) where T : IComparable<T> {
54      return preprocessingData.GetValues<T>(variableName).Max();
55    }
56
57    public double GetMedian(string variableName) {
58      double median = double.NaN;
59      if (preprocessingData.IsType<double>(variableName)) {
60        median = preprocessingData.GetValues<double>(variableName).Median();
61      }
62      return median;
63    }
64
65    public double GetAverage(string variableName) {
66      double avg = double.NaN;
67      if (preprocessingData.IsType<double>(variableName)) {
68        avg = preprocessingData.GetValues<double>(variableName).Average();
69      }
70      return avg;
71    }
72
73    public T GetMostCommonValue<T>(string variableName) {
74      return preprocessingData.GetValues<T>(variableName)
75                              .GroupBy(x => x)
76                              .OrderByDescending(g => g.Count())
77                              .Select(g => g.Key)
78                              .First();
79    }
80
81
82    public double GetStandardDeviation(string variableName) {
83      double stdDev = double.NaN;
84      if (preprocessingData.IsType<double>(variableName)) {
85        stdDev = preprocessingData.GetValues<double>(variableName).StandardDeviation();
86      }
87      return stdDev;
88    }
89
90    public double GetVariance(string variableName) {
91      double stdDev = double.NaN;
92      if (preprocessingData.IsType<double>(variableName)) {
93        stdDev = preprocessingData.GetValues<double>(variableName).Variance();
94      }
95      return stdDev;
96    }
97
98    public int GetDifferentValuesCount<T>(string variableName) {
99      return preprocessingData.GetValues<T>(variableName).GroupBy(x => x).Count();
100    }
101
102    public int GetRowMissingValueCount(int rowIndex) {
103      int count = 0;
104      foreach (var variableName in preprocessingData.VariableNames) {
105        if (preprocessingData.IsMissingValue(variableName, rowIndex)) {
106          ++count;
107        }
108      }
109      return count;
110    }
111  }
112}
Note: See TracBrowser for help on using the repository browser.