Free cookie consent management tool by TermsFeed Policy Generator

source: branches/DataPreprocessing/HeuristicLab.DataPreprocessing/3.3/Implementations/StatisticsLogic.cs @ 10367

Last change on this file since 10367 was 10367, checked in by rstoll, 10 years ago
  • modified PreprocessingData, uses columnIndex now instead of variableName (is faster and more convenient), set variabelName based methods to Obsolete
  • Already changed SearchLogic, DataGridLogic, StatisticLogic as well as PreprocessingDataManipulation

*

File size: 3.6 KB
RevLine 
[10148]1using System;
[10165]2using System.Linq;
[10216]3using HeuristicLab.Common;
[10148]4
5namespace HeuristicLab.DataPreprocessing {
[10165]6
[10249]7  public class StatisticsLogic : IStatisticsLogic {
[10148]8
[10236]9    private readonly IPreprocessingData preprocessingData;
10    private readonly ISearchLogic searchLogic;
[10148]11
[10249]12    public StatisticsLogic(IPreprocessingData thePreprocessingData, ISearchLogic theSearchLogic) {
[10165]13      preprocessingData = thePreprocessingData;
[10236]14      searchLogic = theSearchLogic;
[10148]15    }
16
17    public int GetColumnCount() {
[10159]18      return preprocessingData.Columns;
[10148]19    }
20
21    public int GetRowCount() {
[10159]22      return preprocessingData.Rows;
[10148]23    }
24
25    public int GetNumericColumnCount() {
[10159]26      int count = 0;
[10183]27      foreach (var variableName in preprocessingData.VariableNames) {
28        if (preprocessingData.IsType<double>(variableName)) {
[10159]29          ++count;
30        }
31      }
32      return count;
[10148]33    }
34
35    public int GetNominalColumnCount() {
[10159]36      return preprocessingData.Columns - GetNumericColumnCount();
[10148]37    }
38
39    public int GetMissingValueCount() {
40      int count = 0;
[10367]41      for(int i = 0; i < preprocessingData.Columns; ++i){
42        count += GetMissingValueCount(i);
[10148]43      }
44      return count;
45    }
46
[10367]47    public int GetMissingValueCount(int columnIndex) {
48      return searchLogic.GetMissingValueIndices(columnIndex).Count();
[10148]49    }
50
[10367]51    public T GetMin<T>(int columnIndex) where T : IComparable<T> {
52      return preprocessingData.GetValues<T>(columnIndex).Min();
[10148]53    }
54
[10367]55    public T GetMax<T>(int columnIndex) where T : IComparable<T> {
56      return preprocessingData.GetValues<T>(columnIndex).Max();
[10148]57    }
58
[10367]59    public double GetMedian(int columnIndex) {
[10166]60      double median = double.NaN;
[10367]61      if (preprocessingData.IsType<double>(columnIndex)) {
62        median = preprocessingData.GetValues<double>(columnIndex).Median();
[10166]63      }
64      return median;
[10148]65    }
66
[10367]67    public double GetAverage(int columnIndex) {
[10166]68      double avg = double.NaN;
[10367]69      if (preprocessingData.IsType<double>(columnIndex)) {
70        avg = preprocessingData.GetValues<double>(columnIndex).Average();
[10166]71      }
72      return avg;
[10148]73    }
74
[10367]75    public T GetMostCommonValue<T>(int columnIndex) {
76      return preprocessingData.GetValues<T>(columnIndex)
[10180]77                              .GroupBy(x => x)
78                              .OrderByDescending(g => g.Count())
79                              .Select(g => g.Key)
80                              .First();
[10148]81    }
82
[10167]83
[10367]84    public double GetStandardDeviation(int columnIndex) {
[10169]85      double stdDev = double.NaN;
[10367]86      if (preprocessingData.IsType<double>(columnIndex)) {
87        stdDev = preprocessingData.GetValues<double>(columnIndex).StandardDeviation();
[10169]88      }
89      return stdDev;
[10148]90    }
91
[10367]92    public double GetVariance(int columnIndex) {
[10216]93      double stdDev = double.NaN;
[10367]94      if (preprocessingData.IsType<double>(columnIndex)) {
95        stdDev = preprocessingData.GetValues<double>(columnIndex).Variance();
[10216]96      }
97      return stdDev;
98    }
99
[10367]100    public int GetDifferentValuesCount<T>(int columnIndex) {
101      return preprocessingData.GetValues<T>(columnIndex).GroupBy(x => x).Count();
[10179]102    }
[10191]103
104    public int GetRowMissingValueCount(int rowIndex) {
105      int count = 0;
[10367]106       for(int i = 0; i < preprocessingData.Columns; ++i){
107        if (searchLogic.IsMissingValue(i, rowIndex)) {
[10191]108          ++count;
109        }
110      }
111      return count;
112    }
[10367]113
114
115    public string GetVariableName(int columnIndex) {
116      return preprocessingData.GetVariableName(columnIndex);
117    }
118
119    public bool IsType<T>(int columnIndex) {
120      return preprocessingData.IsType<T>(columnIndex);
121    }
[10148]122  }
123}
Note: See TracBrowser for help on using the repository browser.