Free cookie consent management tool by TermsFeed Policy Generator

source: branches/DataPreprocessing Cleanup/HeuristicLab.DataPreprocessing/3.4/Logic/StatisticsLogic.cs @ 15269

Last change on this file since 15269 was 15269, checked in by pfleck, 7 years ago

#2809: Removed SearchLogic

File size: 8.3 KB
Line 
1#region License Information
2/* HeuristicLab
3 * Copyright (C) 2002-2016 Heuristic and Evolutionary Algorithms Laboratory (HEAL)
4 *
5 * This file is part of HeuristicLab.
6 *
7 * HeuristicLab is free software: you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation, either version 3 of the License, or
10 * (at your option) any later version.
11 *
12 * HeuristicLab is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15 * GNU General Public License for more details.
16 *
17 * You should have received a copy of the GNU General Public License
18 * along with HeuristicLab. If not, see <http://www.gnu.org/licenses/>.
19 */
20#endregion
21
22using System;
23using System.Collections.Generic;
24using System.Linq;
25using HeuristicLab.Common;
26
27namespace HeuristicLab.DataPreprocessing {
28  public class StatisticsLogic {
29    private readonly ITransactionalPreprocessingData preprocessingData;
30
31    public StatisticsLogic(ITransactionalPreprocessingData preprocessingData) {
32      this.preprocessingData = preprocessingData;
33    }
34
35    public int GetColumnCount() {
36      return preprocessingData.Columns;
37    }
38
39    public int GetRowCount() {
40      return preprocessingData.Rows;
41    }
42
43    public int GetNumericColumnCount() {
44      int count = 0;
45
46      for (int i = 0; i < preprocessingData.Columns; ++i) {
47        if (preprocessingData.VariableHasType<double>(i)) {
48          ++count;
49        }
50      }
51      return count;
52    }
53
54    public int GetNominalColumnCount() {
55      return preprocessingData.Columns - GetNumericColumnCount();
56    }
57
58    public int GetMissingValueCount() {
59      int count = 0;
60      for (int i = 0; i < preprocessingData.Columns; ++i) {
61        count += GetMissingValueCount(i);
62      }
63      return count;
64    }
65
66    public int GetMissingValueCount(int columnIndex) {
67      return Enumerable.Range(0, preprocessingData.Rows).Count(rowIndex => preprocessingData.IsCellEmpty(columnIndex, rowIndex));
68    }
69
70    public int GetRowMissingValueCount(int rowIndex) {
71      var columnIndexes = Enumerable.Range(0, preprocessingData.Columns);
72      return columnIndexes.Count(columnIndex => !preprocessingData.IsCellEmpty(columnIndex, rowIndex));
73    }
74
75    private IEnumerable<T> GetValuesWithoutNaN<T>(int columnIndex, bool considerSelection = false) {
76      return preprocessingData.GetValues<T>(columnIndex, considerSelection).Where(x => !PreprocessingData.IsMissingValue(x)).ToList();
77    }
78
79    public T GetMin<T>(int columnIndex, T defaultValue, bool considerSelection = false) where T : IComparable<T> {
80      var min = defaultValue;
81      if (preprocessingData.VariableHasType<T>(columnIndex)) {
82        var values = GetValuesWithoutNaN<T>(columnIndex, considerSelection);
83        if (values.Any()) {
84          min = values.Min();
85        }
86      }
87      return min;
88    }
89
90    public T GetMax<T>(int columnIndex, T defaultValue, bool considerSelection = false) where T : IComparable<T> {
91      var max = defaultValue;
92      if (preprocessingData.VariableHasType<T>(columnIndex)) {
93        var values = GetValuesWithoutNaN<T>(columnIndex, considerSelection);
94        if (values.Any()) {
95          max = values.Max();
96        }
97      }
98      return max;
99    }
100
101    public double GetMedian(int columnIndex, bool considerSelection = false) {
102      double median = double.NaN;
103      if (preprocessingData.VariableHasType<double>(columnIndex)) {
104        var values = GetValuesWithoutNaN<double>(columnIndex, considerSelection);
105        if (values.Any()) {
106          median = values.Median();
107        }
108      }
109      return median;
110    }
111
112    public double GetAverage(int columnIndex, bool considerSelection = false) {
113      double avg = double.NaN;
114      if (preprocessingData.VariableHasType<double>(columnIndex)) {
115        var values = GetValuesWithoutNaN<double>(columnIndex, considerSelection);
116        if (values.Any()) {
117          avg = values.Average();
118        }
119      }
120      return avg;
121    }
122
123    public DateTime GetMedianDateTime(int columnIndex, bool considerSelection = false) {
124      DateTime median = new DateTime();
125      if (preprocessingData.VariableHasType<DateTime>(columnIndex)) {
126        median = GetSecondsAsDateTime(GetDateTimeAsSeconds(columnIndex, considerSelection).Median());
127      }
128      return median;
129    }
130
131    public DateTime GetAverageDateTime(int columnIndex, bool considerSelection = false) {
132      DateTime avg = new DateTime();
133      if (preprocessingData.VariableHasType<DateTime>(columnIndex)) {
134        avg = GetSecondsAsDateTime(GetDateTimeAsSeconds(columnIndex, considerSelection).Average());
135      }
136      return avg;
137    }
138
139    public T GetMostCommonValue<T>(int columnIndex, T defaultValue, bool considerSelection = false) {
140      var values = GetValuesWithoutNaN<T>(columnIndex, considerSelection);
141      if (!values.Any())
142        return defaultValue;
143      return values.GroupBy(x => x)
144                              .OrderByDescending(g => g.Count())
145                              .Select(g => g.Key)
146                              .First();
147    }
148
149
150    public double GetStandardDeviation(int columnIndex) {
151      double stdDev = double.NaN;
152      if (preprocessingData.VariableHasType<double>(columnIndex)) {
153        stdDev = GetValuesWithoutNaN<double>(columnIndex).StandardDeviation();
154      } else if (preprocessingData.VariableHasType<DateTime>(columnIndex)) {
155        stdDev = GetDateTimeAsSeconds(columnIndex).StandardDeviation();
156      }
157      return stdDev;
158    }
159
160    public double GetVariance(int columnIndex) {
161      double variance = double.NaN;
162      if (preprocessingData.VariableHasType<double>(columnIndex)) {
163        variance = GetValuesWithoutNaN<double>(columnIndex).Variance();
164      } else if (preprocessingData.VariableHasType<DateTime>(columnIndex)) {
165        variance = GetDateTimeAsSeconds(columnIndex).Variance();
166      }
167      return variance;
168    }
169
170    public double GetOneQuarterPercentile(int columnIndex) {
171      double percentile = double.NaN;
172      if (preprocessingData.VariableHasType<double>(columnIndex)) {
173        percentile = GetValuesWithoutNaN<double>(columnIndex).DefaultIfEmpty(double.NaN).Quantile(0.25);
174      } else if (preprocessingData.VariableHasType<DateTime>(columnIndex)) {
175        percentile = GetDateTimeAsSeconds(columnIndex).DefaultIfEmpty(double.NaN).Quantile(0.25);
176      }
177      return percentile;
178    }
179
180    public double GetThreeQuarterPercentile(int columnIndex) {
181      double percentile = double.NaN;
182      if (preprocessingData.VariableHasType<double>(columnIndex)) {
183        percentile = GetValuesWithoutNaN<double>(columnIndex).DefaultIfEmpty(double.NaN).Quantile(0.75);
184      } else if (preprocessingData.VariableHasType<DateTime>(columnIndex)) {
185        percentile = GetDateTimeAsSeconds(columnIndex).DefaultIfEmpty(double.NaN).Quantile(0.75);
186      }
187      return percentile;
188    }
189
190    public int GetDifferentValuesCount<T>(int columnIndex) {
191      return preprocessingData.GetValues<T>(columnIndex).GroupBy(x => x).Count();
192    }
193
194
195
196    public bool VariableHasType<T>(int columnIndex) {
197      return preprocessingData.VariableHasType<T>(columnIndex);
198    }
199
200    public string GetColumnTypeAsString(int columnIndex) {
201      if (preprocessingData.VariableHasType<double>(columnIndex)) {
202        return "double";
203      } else if (preprocessingData.VariableHasType<string>(columnIndex)) {
204        return "string";
205      } else if (preprocessingData.VariableHasType<DateTime>(columnIndex)) {
206        return "DateTime";
207      }
208      return "Unknown Type";
209    }
210
211    private IEnumerable<double> GetDateTimeAsSeconds(int columnIndex, bool considerSelection = false) {
212      return GetValuesWithoutNaN<DateTime>(columnIndex, considerSelection).Select(x => (double)x.Ticks / TimeSpan.TicksPerSecond);
213    }
214
215
216    private DateTime GetSecondsAsDateTime(double seconds) {
217      DateTime dateTime = new DateTime();
218      return dateTime.AddSeconds(seconds);
219    }
220
221    public event DataPreprocessingChangedEventHandler Changed {
222      add { preprocessingData.Changed += value; }
223      remove { preprocessingData.Changed -= value; }
224    }
225  }
226}
Note: See TracBrowser for help on using the repository browser.