Free cookie consent management tool by TermsFeed Policy Generator

source: branches/DataPreprocessing/HeuristicLab.DataPreprocessing/3.3/Implementations/StatisticInfo.cs @ 10182

Last change on this file since 10182 was 10182, checked in by sbreuer, 10 years ago
  • removed unneccesary namespace distinctions
  • moved statisticInfo to implementations folder
File size: 3.4 KB
Line 
1using System;
2using System.Linq;
3using HeuristicLab.DataPreprocessing;
4
5namespace HeuristicLab.DataPreprocessing {
6
7  class StatisticInfo : IStatisticInfo {
8
9    private IPreprocessingData preprocessingData;
10
11    public StatisticInfo(IPreprocessingData thePreprocessingData) {
12      preprocessingData = thePreprocessingData;
13    }
14
15    public int GetColumnCount() {
16      return preprocessingData.Columns;
17    }
18
19    public int GetRowCount() {
20      return preprocessingData.Rows;
21    }
22
23    public int GetNumericColumnCount() {
24      int count = 0;
25      for (int i = 0; i < preprocessingData.Columns; ++i) {
26        if (preprocessingData.IsType<double>(i)) {
27          ++count;
28        }
29      }
30      return count;
31    }
32
33    public int GetNominalColumnCount() {
34      return preprocessingData.Columns - GetNumericColumnCount();
35    }
36
37    public int GetMissingValueCount() {
38      int count = 0;
39      for (int i = 0; i < preprocessingData.Columns; ++i) {
40        count += GetMissingValueCount(i);
41      }
42      return count;
43    }
44
45    public int GetMissingValueCount(int columnIndex) {
46      if (preprocessingData.IsType<double>(columnIndex)) {
47        return preprocessingData.GetValues<double>(columnIndex).Count(x => double.IsNaN(x));
48      } else if (preprocessingData.IsType<string>(columnIndex)) {
49        return preprocessingData.GetValues<string>(columnIndex).Count(x => string.IsNullOrEmpty(x));
50      } else if (preprocessingData.IsType<DateTime>(columnIndex)) {
51        return preprocessingData.GetValues<DateTime>(columnIndex).Count(x => x.Equals(DateTime.MinValue));
52      } else {
53        throw new ArgumentException("column with index: " + columnIndex + " contains a non supported type.");
54      }
55    }
56
57    public T GetMin<T>(int columnIndex) where T : IComparable<T> {
58      return preprocessingData.GetValues<T>(columnIndex).Min();
59    }
60
61    public T GetMax<T>(int columnIndex) where T : IComparable<T> {
62      return preprocessingData.GetValues<T>(columnIndex).Max();
63    }
64
65    public double GetMedian(int columnIndex) {
66      double median = double.NaN;
67      if (preprocessingData.IsType<double>(columnIndex)) {
68        median = preprocessingData.GetValues<double>(columnIndex).OrderBy(x => x).ElementAt(preprocessingData.Rows / 2);
69      }
70      return median;
71    }
72
73    public double GetAverage(int columnIndex) {
74      double avg = double.NaN;
75      if (preprocessingData.IsType<double>(columnIndex)) {
76        avg = preprocessingData.GetValues<double>(columnIndex).Average();
77      }
78      return avg;
79    }
80
81    public T GetMostCommonValue<T>(int columnIndex) {
82      return preprocessingData.GetValues<T>(columnIndex)
83                              .GroupBy(x => x)
84                              .OrderByDescending(g => g.Count())
85                              .Select(g => g.Key)
86                              .First();
87    }
88
89
90    public double GetStandardDeviation(int columnIndex) {
91      double stdDev = double.NaN;
92      if (preprocessingData.IsType<double>(columnIndex)) {
93        double avg = GetAverage(columnIndex);
94        stdDev = Math.Sqrt(preprocessingData.GetValues<double>(columnIndex).Sum(x => (x - avg) * (x - avg)) / (preprocessingData.Rows - 1));
95      }
96      return stdDev;
97    }
98
99    public int GetDifferentValuesCount<T>(int columnIndex) {
100      return preprocessingData.GetValues<T>(columnIndex).GroupBy(x => x).Count();
101    }
102  }
103}
Note: See TracBrowser for help on using the repository browser.