Free cookie consent management tool by TermsFeed Policy Generator

source: branches/DataPreprocessing/HeuristicLab.DataPreprocessing/3.3/Implementations/StatisticInfo.cs @ 10183

Last change on this file since 10183 was 10183, checked in by rstoll, 10 years ago

Switched from columnIndex to variableName

File size: 3.5 KB
Line 
1using System;
2using System.Linq;
3
4namespace HeuristicLab.DataPreprocessing {
5
6  public class StatisticInfo : IStatisticInfo {
7
8    private IPreprocessingData preprocessingData;
9
10    public StatisticInfo(IPreprocessingData thePreprocessingData) {
11      preprocessingData = thePreprocessingData;
12    }
13
14    public int GetColumnCount() {
15      return preprocessingData.Columns;
16    }
17
18    public int GetRowCount() {
19      return preprocessingData.Rows;
20    }
21
22    public int GetNumericColumnCount() {
23      int count = 0;
24      foreach (var variableName in preprocessingData.VariableNames) {
25        if (preprocessingData.IsType<double>(variableName)) {
26          ++count;
27        }
28      }
29      return count;
30    }
31
32    public int GetNominalColumnCount() {
33      return preprocessingData.Columns - GetNumericColumnCount();
34    }
35
36    public int GetMissingValueCount() {
37      int count = 0;
38      foreach (var variableName in preprocessingData.VariableNames) {
39        count += GetMissingValueCount(variableName);
40      }
41      return count;
42    }
43
44    public int GetMissingValueCount(string variableName) {
45      if (preprocessingData.IsType<double>(variableName)) {
46        return preprocessingData.GetValues<double>(variableName).Count(x => double.IsNaN(x));
47      } else if (preprocessingData.IsType<string>(variableName)) {
48        return preprocessingData.GetValues<string>(variableName).Count(x => string.IsNullOrEmpty(x));
49      } else if (preprocessingData.IsType<DateTime>(variableName)) {
50        return preprocessingData.GetValues<DateTime>(variableName).Count(x => x.Equals(DateTime.MinValue));
51      } else {
52        throw new ArgumentException("column with index: " + variableName + " contains a non supported type.");
53      }
54    }
55
56    public T GetMin<T>(string variableName) where T : IComparable<T> {
57      return preprocessingData.GetValues<T>(variableName).Min();
58    }
59
60    public T GetMax<T>(string variableName) where T : IComparable<T> {
61      return preprocessingData.GetValues<T>(variableName).Max();
62    }
63
64    public double GetMedian(string variableName) {
65      double median = double.NaN;
66      if (preprocessingData.IsType<double>(variableName)) {
67        median = preprocessingData.GetValues<double>(variableName).OrderBy(x => x).ElementAt(preprocessingData.Rows / 2);
68      }
69      return median;
70    }
71
72    public double GetAverage(string variableName) {
73      double avg = double.NaN;
74      if (preprocessingData.IsType<double>(variableName)) {
75        avg = preprocessingData.GetValues<double>(variableName).Average();
76      }
77      return avg;
78    }
79
80    public T GetMostCommonValue<T>(string variableName) {
81      return preprocessingData.GetValues<T>(variableName)
82                              .GroupBy(x => x)
83                              .OrderByDescending(g => g.Count())
84                              .Select(g => g.Key)
85                              .First();
86    }
87
88
89    public double GetStandardDeviation(string variableName) {
90      double stdDev = double.NaN;
91      if (preprocessingData.IsType<double>(variableName)) {
92        double avg = GetAverage(variableName);
93        stdDev = Math.Sqrt(preprocessingData.GetValues<double>(variableName).Sum(x => (x - avg) * (x - avg)) / (preprocessingData.Rows - 1));
94      }
95      return stdDev;
96    }
97
98    public int GetDifferentValuesCount<T>(string variableName) {
99      return preprocessingData.GetValues<T>(variableName).GroupBy(x => x).Count();
100    }
101  }
102}
Note: See TracBrowser for help on using the repository browser.