Free cookie consent management tool by TermsFeed Policy Generator

source: branches/DataPreprocessing/HeuristicLab.DataPreprocessing/3.3/Implementations/StatisticsLogic.cs @ 10535

Last change on this file since 10535 was 10534, checked in by rstoll, 11 years ago
  • StatisticsView column specific information added
File size: 5.3 KB
RevLine 
[10148]1using System;
[10383]2using System.Collections.Generic;
[10165]3using System.Linq;
[10216]4using HeuristicLab.Common;
[10148]5
6namespace HeuristicLab.DataPreprocessing {
[10165]7
[10249]8  public class StatisticsLogic : IStatisticsLogic {
[10148]9
[10236]10    private readonly IPreprocessingData preprocessingData;
11    private readonly ISearchLogic searchLogic;
[10148]12
[10249]13    public StatisticsLogic(IPreprocessingData thePreprocessingData, ISearchLogic theSearchLogic) {
[10165]14      preprocessingData = thePreprocessingData;
[10236]15      searchLogic = theSearchLogic;
[10148]16    }
17
18    public int GetColumnCount() {
[10159]19      return preprocessingData.Columns;
[10148]20    }
21
22    public int GetRowCount() {
[10159]23      return preprocessingData.Rows;
[10148]24    }
25
26    public int GetNumericColumnCount() {
[10159]27      int count = 0;
[10369]28
29      for (int i = 0; i < preprocessingData.Columns; ++i) {
30        if (preprocessingData.IsType<double>(i)) {
[10159]31          ++count;
32        }
33      }
34      return count;
[10148]35    }
36
37    public int GetNominalColumnCount() {
[10159]38      return preprocessingData.Columns - GetNumericColumnCount();
[10148]39    }
40
41    public int GetMissingValueCount() {
42      int count = 0;
[10369]43      for (int i = 0; i < preprocessingData.Columns; ++i) {
[10367]44        count += GetMissingValueCount(i);
[10148]45      }
46      return count;
47    }
48
[10367]49    public int GetMissingValueCount(int columnIndex) {
50      return searchLogic.GetMissingValueIndices(columnIndex).Count();
[10148]51    }
52
[10367]53    public T GetMin<T>(int columnIndex) where T : IComparable<T> {
54      return preprocessingData.GetValues<T>(columnIndex).Min();
[10148]55    }
56
[10367]57    public T GetMax<T>(int columnIndex) where T : IComparable<T> {
58      return preprocessingData.GetValues<T>(columnIndex).Max();
[10148]59    }
60
[10367]61    public double GetMedian(int columnIndex) {
[10166]62      double median = double.NaN;
[10367]63      if (preprocessingData.IsType<double>(columnIndex)) {
64        median = preprocessingData.GetValues<double>(columnIndex).Median();
[10166]65      }
66      return median;
[10148]67    }
68
[10367]69    public double GetAverage(int columnIndex) {
[10166]70      double avg = double.NaN;
[10367]71      if (preprocessingData.IsType<double>(columnIndex)) {
72        avg = preprocessingData.GetValues<double>(columnIndex).Average();
[10166]73      }
74      return avg;
[10148]75    }
76
[10383]77    public DateTime GetMedianDateTime(int columnIndex) {
[10381]78      DateTime median = new DateTime();
[10383]79      if (preprocessingData.IsType<DateTime>(columnIndex)) {
80        median = GetSecondsAsDateTime(GetDateTimeAsSeconds(columnIndex).Median());
[10381]81      }
82      return median;
83    }
84
[10383]85    public DateTime GetAverageDateTime(int columnIndex) {
[10381]86      DateTime avg = new DateTime();
[10383]87      if (preprocessingData.IsType<DateTime>(columnIndex)) {
88        avg = GetSecondsAsDateTime(GetDateTimeAsSeconds(columnIndex).Average());
[10381]89      }
90      return avg;
91    }
92
[10367]93    public T GetMostCommonValue<T>(int columnIndex) {
[10534]94      var t = preprocessingData.GetValues<T>(columnIndex);
95      var t2 = t.GroupBy(x => x);
96      var t3 = t2.Select(g => g.Key);
97
[10367]98      return preprocessingData.GetValues<T>(columnIndex)
[10381]99
[10180]100                              .GroupBy(x => x)
101                              .OrderByDescending(g => g.Count())
102                              .Select(g => g.Key)
103                              .First();
[10148]104    }
105
[10167]106
[10367]107    public double GetStandardDeviation(int columnIndex) {
[10169]108      double stdDev = double.NaN;
[10367]109      if (preprocessingData.IsType<double>(columnIndex)) {
110        stdDev = preprocessingData.GetValues<double>(columnIndex).StandardDeviation();
[10532]111      } else if (preprocessingData.IsType<DateTime>(columnIndex)) {
112        stdDev = GetDateTimeAsSeconds(columnIndex).StandardDeviation();
[10169]113      }
114      return stdDev;
[10148]115    }
116
[10367]117    public double GetVariance(int columnIndex) {
[10383]118      double variance = double.NaN;
[10367]119      if (preprocessingData.IsType<double>(columnIndex)) {
[10381]120        variance = preprocessingData.GetValues<double>(columnIndex).Variance();
[10532]121      } else if (preprocessingData.IsType<DateTime>(columnIndex)) {
122        variance = GetDateTimeAsSeconds(columnIndex).Variance();
[10216]123      }
[10381]124      return variance;
[10216]125    }
126
[10367]127    public int GetDifferentValuesCount<T>(int columnIndex) {
128      return preprocessingData.GetValues<T>(columnIndex).GroupBy(x => x).Count();
[10179]129    }
[10191]130
131    public int GetRowMissingValueCount(int rowIndex) {
132      int count = 0;
[10369]133      for (int i = 0; i < preprocessingData.Columns; ++i) {
[10367]134        if (searchLogic.IsMissingValue(i, rowIndex)) {
[10191]135          ++count;
136        }
137      }
138      return count;
139    }
[10367]140
141    public string GetVariableName(int columnIndex) {
142      return preprocessingData.GetVariableName(columnIndex);
143    }
144
145    public bool IsType<T>(int columnIndex) {
146      return preprocessingData.IsType<T>(columnIndex);
147    }
[10371]148
149    public string GetColumnTypeAsString(int columnIndex) {
150      if (preprocessingData.IsType<double>(columnIndex)) {
151        return "double";
152      } else if (preprocessingData.IsType<string>(columnIndex)) {
153        return "string";
154      } else if (preprocessingData.IsType<DateTime>(columnIndex)) {
155        return "DateTime";
156      }
157      return "Unknown Type";
158    }
[10383]159    private IEnumerable<double> GetDateTimeAsSeconds(int columnIndex) {
160      return preprocessingData.GetValues<DateTime>(columnIndex).Select(x => (double)x.Ticks / TimeSpan.TicksPerSecond);
[10381]161    }
162
[10383]163    private DateTime GetSecondsAsDateTime(double seconds) {
164      DateTime dateTime = new DateTime();
165      return dateTime.Add(new TimeSpan(0, 0, (int)seconds));
[10381]166    }
[10148]167  }
168}
Note: See TracBrowser for help on using the repository browser.