Free cookie consent management tool by TermsFeed Policy Generator

source: branches/crossvalidation-2434/HeuristicLab.DataPreprocessing/3.4/Implementations/StatisticsLogic.cs @ 12779

Last change on this file since 12779 was 12676, checked in by mkommend, 10 years ago

#2335: Merged changes into trunk.

File size: 7.3 KB
Line 
1#region License Information
2/* HeuristicLab
3 * Copyright (C) 2002-2015 Heuristic and Evolutionary Algorithms Laboratory (HEAL)
4 *
5 * This file is part of HeuristicLab.
6 *
7 * HeuristicLab is free software: you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation, either version 3 of the License, or
10 * (at your option) any later version.
11 *
12 * HeuristicLab is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15 * GNU General Public License for more details.
16 *
17 * You should have received a copy of the GNU General Public License
18 * along with HeuristicLab. If not, see <http://www.gnu.org/licenses/>.
19 */
20#endregion
21
22using System;
23using System.Collections.Generic;
24using System.Linq;
25using HeuristicLab.Common;
26
27namespace HeuristicLab.DataPreprocessing {
28
29  public class StatisticsLogic : IStatisticsLogic {
30
31    private readonly ITransactionalPreprocessingData preprocessingData;
32    private readonly ISearchLogic searchLogic;
33
34    public StatisticsLogic(ITransactionalPreprocessingData thePreprocessingData, ISearchLogic theSearchLogic) {
35      preprocessingData = thePreprocessingData;
36      searchLogic = theSearchLogic;
37    }
38
39    public int GetColumnCount() {
40      return searchLogic.Columns;
41    }
42
43    public int GetRowCount() {
44      return searchLogic.Rows;
45    }
46
47    public int GetNumericColumnCount() {
48      int count = 0;
49
50      for (int i = 0; i < searchLogic.Columns; ++i) {
51        if (preprocessingData.VariableHasType<double>(i)) {
52          ++count;
53        }
54      }
55      return count;
56    }
57
58    public int GetNominalColumnCount() {
59      return searchLogic.Columns - GetNumericColumnCount();
60    }
61
62    public int GetMissingValueCount() {
63      int count = 0;
64      for (int i = 0; i < searchLogic.Columns; ++i) {
65        count += GetMissingValueCount(i);
66      }
67      return count;
68    }
69
70    public int GetMissingValueCount(int columnIndex) {
71      return searchLogic.GetMissingValueIndices(columnIndex).Count();
72    }
73
74    public T GetMin<T>(int columnIndex, bool considerSelection) where T : IComparable<T> {
75      var min = default(T);
76      if (preprocessingData.VariableHasType<T>(columnIndex)) {
77        var values = GetValuesWithoutNaN<T>(columnIndex, considerSelection);
78        if (values.Any()) {
79          min = values.Min();
80        }
81      }
82      return min;
83    }
84
85    public T GetMax<T>(int columnIndex, bool considerSelection) where T : IComparable<T> {
86      var max = default(T);
87      if (preprocessingData.VariableHasType<T>(columnIndex)) {
88        var values = GetValuesWithoutNaN<T>(columnIndex, considerSelection);
89        if (values.Any()) {
90          max = values.Max();
91        }
92      }
93      return max;
94    }
95
96    public double GetMedian(int columnIndex, bool considerSelection) {
97      double median = double.NaN;
98      if (preprocessingData.VariableHasType<double>(columnIndex)) {
99        var values = GetValuesWithoutNaN<double>(columnIndex, considerSelection);
100        if (values.Any()) {
101          median = values.Median();
102        }
103      }
104      return median;
105    }
106
107    public double GetAverage(int columnIndex, bool considerSelection) {
108      double avg = double.NaN;
109      if (preprocessingData.VariableHasType<double>(columnIndex)) {
110        var values = GetValuesWithoutNaN<double>(columnIndex, considerSelection);
111        if (values.Any()) {
112          avg = values.Average();
113        }
114      }
115      return avg;
116    }
117
118    public DateTime GetMedianDateTime(int columnIndex, bool considerSelection) {
119      DateTime median = new DateTime();
120      if (preprocessingData.VariableHasType<DateTime>(columnIndex)) {
121        median = GetSecondsAsDateTime(GetDateTimeAsSeconds(columnIndex, considerSelection).Median());
122      }
123      return median;
124    }
125
126    public DateTime GetAverageDateTime(int columnIndex, bool considerSelection) {
127      DateTime avg = new DateTime();
128      if (preprocessingData.VariableHasType<DateTime>(columnIndex)) {
129        avg = GetSecondsAsDateTime(GetDateTimeAsSeconds(columnIndex, considerSelection).Average());
130      }
131      return avg;
132    }
133
134    public T GetMostCommonValue<T>(int columnIndex, bool considerSelection) {
135      var values = GetValuesWithoutNaN<T>(columnIndex, considerSelection);
136      if (!values.Any())
137        return default(T);
138      return values.GroupBy(x => x)
139                              .OrderByDescending(g => g.Count())
140                              .Select(g => g.Key)
141                              .First();
142    }
143
144
145    public double GetStandardDeviation(int columnIndex) {
146      double stdDev = double.NaN;
147      if (preprocessingData.VariableHasType<double>(columnIndex)) {
148        stdDev = GetValuesWithoutNaN<double>(columnIndex).StandardDeviation();
149      } else if (preprocessingData.VariableHasType<DateTime>(columnIndex)) {
150        stdDev = GetDateTimeAsSeconds(columnIndex).StandardDeviation();
151      }
152      return stdDev;
153    }
154
155    public double GetVariance(int columnIndex) {
156      double variance = double.NaN;
157      if (preprocessingData.VariableHasType<double>(columnIndex)) {
158        variance = GetValuesWithoutNaN<double>(columnIndex).Variance();
159      } else if (preprocessingData.VariableHasType<DateTime>(columnIndex)) {
160        variance = GetDateTimeAsSeconds(columnIndex).Variance();
161      }
162      return variance;
163    }
164
165    public int GetDifferentValuesCount<T>(int columnIndex) {
166      return preprocessingData.GetValues<T>(columnIndex).GroupBy(x => x).Count();
167    }
168
169    public int GetRowMissingValueCount(int rowIndex) {
170      int count = 0;
171      for (int i = 0; i < preprocessingData.Columns; ++i) {
172        if (searchLogic.IsMissingValue(i, rowIndex)) {
173          ++count;
174        }
175      }
176      return count;
177    }
178
179    public string GetVariableName(int columnIndex) {
180      return preprocessingData.GetVariableName(columnIndex);
181    }
182
183    public bool VariableHasType<T>(int columnIndex) {
184      return preprocessingData.VariableHasType<T>(columnIndex);
185    }
186
187    public string GetColumnTypeAsString(int columnIndex) {
188      if (preprocessingData.VariableHasType<double>(columnIndex)) {
189        return "double";
190      } else if (preprocessingData.VariableHasType<string>(columnIndex)) {
191        return "string";
192      } else if (preprocessingData.VariableHasType<DateTime>(columnIndex)) {
193        return "DateTime";
194      }
195      return "Unknown Type";
196    }
197
198    private IEnumerable<double> GetDateTimeAsSeconds(int columnIndex, bool considerSelection = false) {
199      return GetValuesWithoutNaN<DateTime>(columnIndex, considerSelection).Select(x => (double)x.Ticks / TimeSpan.TicksPerSecond);
200    }
201
202    private IEnumerable<T> GetValuesWithoutNaN<T>(int columnIndex, bool considerSelection = false) {
203      return searchLogic.GetValuesWithoutNaN<T>(columnIndex, considerSelection);
204    }
205
206    private DateTime GetSecondsAsDateTime(double seconds) {
207      DateTime dateTime = new DateTime();
208      return dateTime.AddSeconds(seconds);
209    }
210
211    public event DataPreprocessingChangedEventHandler Changed {
212      add { preprocessingData.Changed += value; }
213      remove { preprocessingData.Changed -= value; }
214    }
215  }
216}
Note: See TracBrowser for help on using the repository browser.