Free cookie consent management tool by TermsFeed Policy Generator

source: branches/DataPreprocessing/HeuristicLab.DataPreprocessing/3.3/Implementations/StatisticsLogic.cs @ 10611

Last change on this file since 10611 was 10586, checked in by tsteinre, 11 years ago
  • divided/refactored PreprocessingData into TransactionalPreprocessingData and preprocessingData
File size: 6.4 KB
Line 
1#region License Information
2/* HeuristicLab
3 * Copyright (C) 2002-2013 Heuristic and Evolutionary Algorithms Laboratory (HEAL)
4 *
5 * This file is part of HeuristicLab.
6 *
7 * HeuristicLab is free software: you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation, either version 3 of the License, or
10 * (at your option) any later version.
11 *
12 * HeuristicLab is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15 * GNU General Public License for more details.
16 *
17 * You should have received a copy of the GNU General Public License
18 * along with HeuristicLab. If not, see <http://www.gnu.org/licenses/>.
19 */
20#endregion
21
22using System;
23using System.Collections.Generic;
24using System.Linq;
25using HeuristicLab.Common;
26
27namespace HeuristicLab.DataPreprocessing {
28
29  public class StatisticsLogic : IStatisticsLogic {
30
31    private readonly ITransactionalPreprocessingData preprocessingData;
32    private readonly ISearchLogic searchLogic;
33
34    public StatisticsLogic(ITransactionalPreprocessingData thePreprocessingData, ISearchLogic theSearchLogic) {
35      preprocessingData = thePreprocessingData;
36      searchLogic = theSearchLogic;
37    }
38
39    public int GetColumnCount() {
40      return preprocessingData.Columns;
41    }
42
43    public int GetRowCount() {
44      return preprocessingData.Rows;
45    }
46
47    public int GetNumericColumnCount() {
48      int count = 0;
49
50      for (int i = 0; i < preprocessingData.Columns; ++i) {
51        if (preprocessingData.IsType<double>(i)) {
52          ++count;
53        }
54      }
55      return count;
56    }
57
58    public int GetNominalColumnCount() {
59      return preprocessingData.Columns - GetNumericColumnCount();
60    }
61
62    public int GetMissingValueCount() {
63      int count = 0;
64      for (int i = 0; i < preprocessingData.Columns; ++i) {
65        count += GetMissingValueCount(i);
66      }
67      return count;
68    }
69
70    public int GetMissingValueCount(int columnIndex) {
71      return searchLogic.GetMissingValueIndices(columnIndex).Count();
72    }
73
74    public T GetMin<T>(int columnIndex) where T : IComparable<T> {
75      return preprocessingData.GetValues<T>(columnIndex).Min();
76    }
77
78    public T GetMax<T>(int columnIndex) where T : IComparable<T> {
79      return preprocessingData.GetValues<T>(columnIndex).Max();
80    }
81
82    public double GetMedian(int columnIndex) {
83      double median = double.NaN;
84      if (preprocessingData.IsType<double>(columnIndex)) {
85        median = preprocessingData.GetValues<double>(columnIndex).Median();
86      }
87      return median;
88    }
89
90    public double GetAverage(int columnIndex) {
91      double avg = double.NaN;
92      if (preprocessingData.IsType<double>(columnIndex)) {
93        avg = preprocessingData.GetValues<double>(columnIndex).Average();
94      }
95      return avg;
96    }
97
98    public DateTime GetMedianDateTime(int columnIndex) {
99      DateTime median = new DateTime();
100      if (preprocessingData.IsType<DateTime>(columnIndex)) {
101        median = GetSecondsAsDateTime(GetDateTimeAsSeconds(columnIndex).Median());
102      }
103      return median;
104    }
105
106    public DateTime GetAverageDateTime(int columnIndex) {
107      DateTime avg = new DateTime();
108      if (preprocessingData.IsType<DateTime>(columnIndex)) {
109        avg = GetSecondsAsDateTime(GetDateTimeAsSeconds(columnIndex).Average());
110      }
111      return avg;
112    }
113
114    public T GetMostCommonValue<T>(int columnIndex) {
115      var t = preprocessingData.GetValues<T>(columnIndex);
116      var t2 = t.GroupBy(x => x);
117      var t3 = t2.Select(g => g.Key);
118
119      return preprocessingData.GetValues<T>(columnIndex)
120                              .GroupBy(x => x)
121                              .OrderByDescending(g => g.Count())
122                              .Select(g => g.Key)
123                              .First();
124    }
125
126
127    public double GetStandardDeviation(int columnIndex) {
128      double stdDev = double.NaN;
129      if (preprocessingData.IsType<double>(columnIndex)) {
130        stdDev = preprocessingData.GetValues<double>(columnIndex).StandardDeviation();
131      } else if (preprocessingData.IsType<DateTime>(columnIndex)) {
132        stdDev = GetDateTimeAsSeconds(columnIndex).StandardDeviation();
133      }
134      return stdDev;
135    }
136
137    public double GetVariance(int columnIndex) {
138      double variance = double.NaN;
139      if (preprocessingData.IsType<double>(columnIndex)) {
140        variance = preprocessingData.GetValues<double>(columnIndex).Variance();
141      } else if (preprocessingData.IsType<DateTime>(columnIndex)) {
142        variance = GetDateTimeAsSeconds(columnIndex).Variance();
143      }
144      return variance;
145    }
146
147    public int GetDifferentValuesCount<T>(int columnIndex) {
148      return preprocessingData.GetValues<T>(columnIndex).GroupBy(x => x).Count();
149    }
150
151    public int GetRowMissingValueCount(int rowIndex) {
152      int count = 0;
153      for (int i = 0; i < preprocessingData.Columns; ++i) {
154        if (searchLogic.IsMissingValue(i, rowIndex)) {
155          ++count;
156        }
157      }
158      return count;
159    }
160
161    public string GetVariableName(int columnIndex) {
162      return preprocessingData.GetVariableName(columnIndex);
163    }
164
165    public bool IsType<T>(int columnIndex) {
166      return preprocessingData.IsType<T>(columnIndex);
167    }
168
169    public string GetColumnTypeAsString(int columnIndex) {
170      if (preprocessingData.IsType<double>(columnIndex)) {
171        return "double";
172      } else if (preprocessingData.IsType<string>(columnIndex)) {
173        return "string";
174      } else if (preprocessingData.IsType<DateTime>(columnIndex)) {
175        return "DateTime";
176      }
177      return "Unknown Type";
178    }
179    private IEnumerable<double> GetDateTimeAsSeconds(int columnIndex) {
180      return preprocessingData.GetValues<DateTime>(columnIndex).Select(x => (double)x.Ticks / TimeSpan.TicksPerSecond);
181    }
182
183    private DateTime GetSecondsAsDateTime(double seconds) {
184      DateTime dateTime = new DateTime();
185      return dateTime.Add(new TimeSpan(0, 0, (int)seconds));
186    }
187
188    public event DataPreprocessingChangedEventHandler Changed {
189      add { preprocessingData.Changed += value; }
190      remove { preprocessingData.Changed -= value; }
191    }
192  }
193}
Note: See TracBrowser for help on using the repository browser.