Free cookie consent management tool by TermsFeed Policy Generator

source: branches/DataPreprocessing/HeuristicLab.DataPreprocessing/3.3/Implementations/StatisticsLogic.cs @ 10704

Last change on this file since 10704 was 10663, checked in by rstoll, 11 years ago

removed unnecessary check for IsNan in GetAverage

File size: 6.5 KB
Line 
1#region License Information
2/* HeuristicLab
3 * Copyright (C) 2002-2013 Heuristic and Evolutionary Algorithms Laboratory (HEAL)
4 *
5 * This file is part of HeuristicLab.
6 *
7 * HeuristicLab is free software: you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation, either version 3 of the License, or
10 * (at your option) any later version.
11 *
12 * HeuristicLab is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15 * GNU General Public License for more details.
16 *
17 * You should have received a copy of the GNU General Public License
18 * along with HeuristicLab. If not, see <http://www.gnu.org/licenses/>.
19 */
20#endregion
21
22using System;
23using System.Collections.Generic;
24using System.Linq;
25using HeuristicLab.Common;
26using System.Collections;
27
28namespace HeuristicLab.DataPreprocessing {
29
30  public class StatisticsLogic : IStatisticsLogic {
31
32    private readonly ITransactionalPreprocessingData preprocessingData;
33    private readonly ISearchLogic searchLogic;
34
35    public StatisticsLogic(ITransactionalPreprocessingData thePreprocessingData, ISearchLogic theSearchLogic) {
36      preprocessingData = thePreprocessingData;
37      searchLogic = theSearchLogic;
38    }
39
40    public int GetColumnCount() {
41      return preprocessingData.Columns;
42    }
43
44    public int GetRowCount() {
45      return preprocessingData.Rows;
46    }
47
48    public int GetNumericColumnCount() {
49      int count = 0;
50
51      for (int i = 0; i < preprocessingData.Columns; ++i) {
52        if (preprocessingData.IsType<double>(i)) {
53          ++count;
54        }
55      }
56      return count;
57    }
58
59    public int GetNominalColumnCount() {
60      return preprocessingData.Columns - GetNumericColumnCount();
61    }
62
63    public int GetMissingValueCount() {
64      int count = 0;
65      for (int i = 0; i < preprocessingData.Columns; ++i) {
66        count += GetMissingValueCount(i);
67      }
68      return count;
69    }
70
71    public int GetMissingValueCount(int columnIndex) {
72      return searchLogic.GetMissingValueIndices(columnIndex).Count();
73    }
74
75    public T GetMin<T>(int columnIndex) where T : IComparable<T> {
76      return preprocessingData.GetValues<T>(columnIndex).Min();
77    }
78
79    public T GetMax<T>(int columnIndex) where T : IComparable<T> {
80      return preprocessingData.GetValues<T>(columnIndex).Max();
81    }
82
83    public double GetMedian(int columnIndex) {
84      double median = double.NaN;
85      if (preprocessingData.IsType<double>(columnIndex)) {
86        median = GetValuesWithoutNaN<double>(columnIndex).Median();
87      }
88      return median;
89    }
90
91    public double GetAverage(int columnIndex) {
92      double avg = double.NaN;
93      if (preprocessingData.IsType<double>(columnIndex)) {
94        avg = GetValuesWithoutNaN<double>(columnIndex).Average();
95      }
96      return avg;
97    }
98
99    public DateTime GetMedianDateTime(int columnIndex) {
100      DateTime median = new DateTime();
101      if (preprocessingData.IsType<DateTime>(columnIndex)) {
102        median = GetSecondsAsDateTime(GetDateTimeAsSeconds(columnIndex).Median());
103      }
104      return median;
105    }
106
107    public DateTime GetAverageDateTime(int columnIndex) {
108      DateTime avg = new DateTime();
109      if (preprocessingData.IsType<DateTime>(columnIndex)) {
110        avg = GetSecondsAsDateTime(GetDateTimeAsSeconds(columnIndex).Average());
111      }
112      return avg;
113    }
114
115    public T GetMostCommonValue<T>(int columnIndex) {
116      var t = preprocessingData.GetValues<T>(columnIndex);
117      var t2 = t.GroupBy(x => x);
118      var t3 = t2.Select(g => g.Key);
119
120      return preprocessingData.GetValues<T>(columnIndex)
121                              .GroupBy(x => x)
122                              .OrderByDescending(g => g.Count())
123                              .Select(g => g.Key)
124                              .First();
125    }
126
127
128    public double GetStandardDeviation(int columnIndex) {
129      double stdDev = double.NaN;
130      if (preprocessingData.IsType<double>(columnIndex)) {
131        stdDev = GetValuesWithoutNaN<double>(columnIndex).StandardDeviation();
132      } else if (preprocessingData.IsType<DateTime>(columnIndex)) {
133        stdDev = GetDateTimeAsSeconds(columnIndex).StandardDeviation();
134      }
135      return stdDev;
136    }
137
138    public double GetVariance(int columnIndex) {
139      double variance = double.NaN;
140      if (preprocessingData.IsType<double>(columnIndex)) {
141        variance = preprocessingData.GetValues<double>(columnIndex).Variance();
142      } else if (preprocessingData.IsType<DateTime>(columnIndex)) {
143        variance = GetDateTimeAsSeconds(columnIndex).Variance();
144      }
145      return variance;
146    }
147
148    public int GetDifferentValuesCount<T>(int columnIndex) {
149      return preprocessingData.GetValues<T>(columnIndex).GroupBy(x => x).Count();
150    }
151
152    public int GetRowMissingValueCount(int rowIndex) {
153      int count = 0;
154      for (int i = 0; i < preprocessingData.Columns; ++i) {
155        if (searchLogic.IsMissingValue(i, rowIndex)) {
156          ++count;
157        }
158      }
159      return count;
160    }
161
162    public string GetVariableName(int columnIndex) {
163      return preprocessingData.GetVariableName(columnIndex);
164    }
165
166    public bool IsType<T>(int columnIndex) {
167      return preprocessingData.IsType<T>(columnIndex);
168    }
169
170    public string GetColumnTypeAsString(int columnIndex) {
171      if (preprocessingData.IsType<double>(columnIndex)) {
172        return "double";
173      } else if (preprocessingData.IsType<string>(columnIndex)) {
174        return "string";
175      } else if (preprocessingData.IsType<DateTime>(columnIndex)) {
176        return "DateTime";
177      }
178      return "Unknown Type";
179    }
180
181    private IEnumerable<double> GetDateTimeAsSeconds(int columnIndex) {
182      return GetValuesWithoutNaN<DateTime>(columnIndex).Select(x => (double)x.Ticks / TimeSpan.TicksPerSecond);
183    }
184
185    private IEnumerable<T> GetValuesWithoutNaN<T>(int columnIndex) {
186      return searchLogic.GetValuesWithoutNaN<T>(columnIndex);
187    }
188
189    private DateTime GetSecondsAsDateTime(double seconds) {
190      DateTime dateTime = new DateTime();
191      return dateTime.AddSeconds(seconds);
192    }
193
194    public event DataPreprocessingChangedEventHandler Changed {
195      add { preprocessingData.Changed += value; }
196      remove { preprocessingData.Changed -= value; }
197    }
198  }
199}
Note: See TracBrowser for help on using the repository browser.