Free cookie consent management tool by TermsFeed Policy Generator

source: branches/DataPreprocessing/HeuristicLab.DataPreprocessing/3.3/Implementations/StatisticsLogic.cs @ 10534

Last change on this file since 10534 was 10534, checked in by rstoll, 10 years ago
  • StatisticsView column specific information added
File size: 5.3 KB
Line 
1using System;
2using System.Collections.Generic;
3using System.Linq;
4using HeuristicLab.Common;
5
6namespace HeuristicLab.DataPreprocessing {
7
8  public class StatisticsLogic : IStatisticsLogic {
9
10    private readonly IPreprocessingData preprocessingData;
11    private readonly ISearchLogic searchLogic;
12
13    public StatisticsLogic(IPreprocessingData thePreprocessingData, ISearchLogic theSearchLogic) {
14      preprocessingData = thePreprocessingData;
15      searchLogic = theSearchLogic;
16    }
17
18    public int GetColumnCount() {
19      return preprocessingData.Columns;
20    }
21
22    public int GetRowCount() {
23      return preprocessingData.Rows;
24    }
25
26    public int GetNumericColumnCount() {
27      int count = 0;
28
29      for (int i = 0; i < preprocessingData.Columns; ++i) {
30        if (preprocessingData.IsType<double>(i)) {
31          ++count;
32        }
33      }
34      return count;
35    }
36
37    public int GetNominalColumnCount() {
38      return preprocessingData.Columns - GetNumericColumnCount();
39    }
40
41    public int GetMissingValueCount() {
42      int count = 0;
43      for (int i = 0; i < preprocessingData.Columns; ++i) {
44        count += GetMissingValueCount(i);
45      }
46      return count;
47    }
48
49    public int GetMissingValueCount(int columnIndex) {
50      return searchLogic.GetMissingValueIndices(columnIndex).Count();
51    }
52
53    public T GetMin<T>(int columnIndex) where T : IComparable<T> {
54      return preprocessingData.GetValues<T>(columnIndex).Min();
55    }
56
57    public T GetMax<T>(int columnIndex) where T : IComparable<T> {
58      return preprocessingData.GetValues<T>(columnIndex).Max();
59    }
60
61    public double GetMedian(int columnIndex) {
62      double median = double.NaN;
63      if (preprocessingData.IsType<double>(columnIndex)) {
64        median = preprocessingData.GetValues<double>(columnIndex).Median();
65      }
66      return median;
67    }
68
69    public double GetAverage(int columnIndex) {
70      double avg = double.NaN;
71      if (preprocessingData.IsType<double>(columnIndex)) {
72        avg = preprocessingData.GetValues<double>(columnIndex).Average();
73      }
74      return avg;
75    }
76
77    public DateTime GetMedianDateTime(int columnIndex) {
78      DateTime median = new DateTime();
79      if (preprocessingData.IsType<DateTime>(columnIndex)) {
80        median = GetSecondsAsDateTime(GetDateTimeAsSeconds(columnIndex).Median());
81      }
82      return median;
83    }
84
85    public DateTime GetAverageDateTime(int columnIndex) {
86      DateTime avg = new DateTime();
87      if (preprocessingData.IsType<DateTime>(columnIndex)) {
88        avg = GetSecondsAsDateTime(GetDateTimeAsSeconds(columnIndex).Average());
89      }
90      return avg;
91    }
92
93    public T GetMostCommonValue<T>(int columnIndex) {
94      var t = preprocessingData.GetValues<T>(columnIndex);
95      var t2 = t.GroupBy(x => x);
96      var t3 = t2.Select(g => g.Key);
97
98      return preprocessingData.GetValues<T>(columnIndex)
99
100                              .GroupBy(x => x)
101                              .OrderByDescending(g => g.Count())
102                              .Select(g => g.Key)
103                              .First();
104    }
105
106
107    public double GetStandardDeviation(int columnIndex) {
108      double stdDev = double.NaN;
109      if (preprocessingData.IsType<double>(columnIndex)) {
110        stdDev = preprocessingData.GetValues<double>(columnIndex).StandardDeviation();
111      } else if (preprocessingData.IsType<DateTime>(columnIndex)) {
112        stdDev = GetDateTimeAsSeconds(columnIndex).StandardDeviation();
113      }
114      return stdDev;
115    }
116
117    public double GetVariance(int columnIndex) {
118      double variance = double.NaN;
119      if (preprocessingData.IsType<double>(columnIndex)) {
120        variance = preprocessingData.GetValues<double>(columnIndex).Variance();
121      } else if (preprocessingData.IsType<DateTime>(columnIndex)) {
122        variance = GetDateTimeAsSeconds(columnIndex).Variance();
123      }
124      return variance;
125    }
126
127    public int GetDifferentValuesCount<T>(int columnIndex) {
128      return preprocessingData.GetValues<T>(columnIndex).GroupBy(x => x).Count();
129    }
130
131    public int GetRowMissingValueCount(int rowIndex) {
132      int count = 0;
133      for (int i = 0; i < preprocessingData.Columns; ++i) {
134        if (searchLogic.IsMissingValue(i, rowIndex)) {
135          ++count;
136        }
137      }
138      return count;
139    }
140
141    public string GetVariableName(int columnIndex) {
142      return preprocessingData.GetVariableName(columnIndex);
143    }
144
145    public bool IsType<T>(int columnIndex) {
146      return preprocessingData.IsType<T>(columnIndex);
147    }
148
149    public string GetColumnTypeAsString(int columnIndex) {
150      if (preprocessingData.IsType<double>(columnIndex)) {
151        return "double";
152      } else if (preprocessingData.IsType<string>(columnIndex)) {
153        return "string";
154      } else if (preprocessingData.IsType<DateTime>(columnIndex)) {
155        return "DateTime";
156      }
157      return "Unknown Type";
158    }
159    private IEnumerable<double> GetDateTimeAsSeconds(int columnIndex) {
160      return preprocessingData.GetValues<DateTime>(columnIndex).Select(x => (double)x.Ticks / TimeSpan.TicksPerSecond);
161    }
162
163    private DateTime GetSecondsAsDateTime(double seconds) {
164      DateTime dateTime = new DateTime();
165      return dateTime.Add(new TimeSpan(0, 0, (int)seconds));
166    }
167  }
168}
Note: See TracBrowser for help on using the repository browser.