Free cookie consent management tool by TermsFeed Policy Generator

source: stable/HeuristicLab.DataPreprocessing/3.4/Implementations/StatisticsLogic.cs @ 13329

Last change on this file since 13329 was 13151, checked in by gkronber, 9 years ago

#2491: merged r13034 and r13051 from trunk to stable branch

File size: 8.2 KB
RevLine 
[10539]1#region License Information
2/* HeuristicLab
[12009]3 * Copyright (C) 2002-2015 Heuristic and Evolutionary Algorithms Laboratory (HEAL)
[10539]4 *
5 * This file is part of HeuristicLab.
6 *
7 * HeuristicLab is free software: you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation, either version 3 of the License, or
10 * (at your option) any later version.
11 *
12 * HeuristicLab is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15 * GNU General Public License for more details.
16 *
17 * You should have received a copy of the GNU General Public License
18 * along with HeuristicLab. If not, see <http://www.gnu.org/licenses/>.
19 */
20#endregion
21
22using System;
[10383]23using System.Collections.Generic;
[10165]24using System.Linq;
[10216]25using HeuristicLab.Common;
[10148]26
[10635]27namespace HeuristicLab.DataPreprocessing {
[10165]28
[10635]29  public class StatisticsLogic : IStatisticsLogic {
[10148]30
[10586]31    private readonly ITransactionalPreprocessingData preprocessingData;
[10236]32    private readonly ISearchLogic searchLogic;
[10148]33
[10635]34    public StatisticsLogic(ITransactionalPreprocessingData thePreprocessingData, ISearchLogic theSearchLogic) {
[10165]35      preprocessingData = thePreprocessingData;
[10236]36      searchLogic = theSearchLogic;
[10148]37    }
38
[10635]39    public int GetColumnCount() {
[12718]40      return searchLogic.Columns;
[10148]41    }
42
[10635]43    public int GetRowCount() {
[12718]44      return searchLogic.Rows;
[10148]45    }
46
[10635]47    public int GetNumericColumnCount() {
[10159]48      int count = 0;
[10369]49
[12718]50      for (int i = 0; i < searchLogic.Columns; ++i) {
[11159]51        if (preprocessingData.VariableHasType<double>(i)) {
[10159]52          ++count;
53        }
54      }
55      return count;
[10148]56    }
57
[10635]58    public int GetNominalColumnCount() {
[12718]59      return searchLogic.Columns - GetNumericColumnCount();
[10148]60    }
61
[10635]62    public int GetMissingValueCount() {
[10148]63      int count = 0;
[12718]64      for (int i = 0; i < searchLogic.Columns; ++i) {
[10367]65        count += GetMissingValueCount(i);
[10148]66      }
67      return count;
68    }
69
[10635]70    public int GetMissingValueCount(int columnIndex) {
[10367]71      return searchLogic.GetMissingValueIndices(columnIndex).Count();
[10148]72    }
73
[10809]74    public T GetMin<T>(int columnIndex, bool considerSelection) where T : IComparable<T> {
[12718]75      var min = default(T);
76      if (preprocessingData.VariableHasType<T>(columnIndex)) {
77        var values = GetValuesWithoutNaN<T>(columnIndex, considerSelection);
78        if (values.Any()) {
79          min = values.Min();
80        }
81      }
82      return min;
[10148]83    }
84
[10809]85    public T GetMax<T>(int columnIndex, bool considerSelection) where T : IComparable<T> {
[12718]86      var max = default(T);
87      if (preprocessingData.VariableHasType<T>(columnIndex)) {
88        var values = GetValuesWithoutNaN<T>(columnIndex, considerSelection);
89        if (values.Any()) {
90          max = values.Max();
91        }
92      }
93      return max;
[10148]94    }
95
[10809]96    public double GetMedian(int columnIndex, bool considerSelection) {
[10166]97      double median = double.NaN;
[11159]98      if (preprocessingData.VariableHasType<double>(columnIndex)) {
[12718]99        var values = GetValuesWithoutNaN<double>(columnIndex, considerSelection);
100        if (values.Any()) {
101          median = values.Median();
102        }
[10166]103      }
104      return median;
[10148]105    }
106
[10809]107    public double GetAverage(int columnIndex, bool considerSelection) {
[10166]108      double avg = double.NaN;
[11159]109      if (preprocessingData.VariableHasType<double>(columnIndex)) {
[12718]110        var values = GetValuesWithoutNaN<double>(columnIndex, considerSelection);
111        if (values.Any()) {
112          avg = values.Average();
113        }
[10166]114      }
115      return avg;
[10148]116    }
117
[10809]118    public DateTime GetMedianDateTime(int columnIndex, bool considerSelection) {
[10381]119      DateTime median = new DateTime();
[11159]120      if (preprocessingData.VariableHasType<DateTime>(columnIndex)) {
[10809]121        median = GetSecondsAsDateTime(GetDateTimeAsSeconds(columnIndex, considerSelection).Median());
[10381]122      }
123      return median;
124    }
125
[10809]126    public DateTime GetAverageDateTime(int columnIndex, bool considerSelection) {
[10381]127      DateTime avg = new DateTime();
[11159]128      if (preprocessingData.VariableHasType<DateTime>(columnIndex)) {
[10809]129        avg = GetSecondsAsDateTime(GetDateTimeAsSeconds(columnIndex, considerSelection).Average());
[10381]130      }
131      return avg;
132    }
133
[10809]134    public T GetMostCommonValue<T>(int columnIndex, bool considerSelection) {
[12718]135      var values = GetValuesWithoutNaN<T>(columnIndex, considerSelection);
136      if (!values.Any())
137        return default(T);
138      return values.GroupBy(x => x)
[10180]139                              .OrderByDescending(g => g.Count())
140                              .Select(g => g.Key)
141                              .First();
[10148]142    }
143
[10167]144
[10635]145    public double GetStandardDeviation(int columnIndex) {
[10169]146      double stdDev = double.NaN;
[11159]147      if (preprocessingData.VariableHasType<double>(columnIndex)) {
[10811]148        stdDev = GetValuesWithoutNaN<double>(columnIndex).StandardDeviation();
[11159]149      } else if (preprocessingData.VariableHasType<DateTime>(columnIndex)) {
[10811]150        stdDev = GetDateTimeAsSeconds(columnIndex).StandardDeviation();
[10169]151      }
152      return stdDev;
[10148]153    }
154
[10635]155    public double GetVariance(int columnIndex) {
[10383]156      double variance = double.NaN;
[11159]157      if (preprocessingData.VariableHasType<double>(columnIndex)) {
[10812]158        variance = GetValuesWithoutNaN<double>(columnIndex).Variance();
[11159]159      } else if (preprocessingData.VariableHasType<DateTime>(columnIndex)) {
[10811]160        variance = GetDateTimeAsSeconds(columnIndex).Variance();
[10216]161      }
[10381]162      return variance;
[10216]163    }
164
[13149]165    public double GetOneQuarterPercentile(int columnIndex) {
166      double percentile = double.NaN;
167      if (preprocessingData.VariableHasType<double>(columnIndex)) {
[13151]168        percentile = GetValuesWithoutNaN<double>(columnIndex).Quantile(0.25);
[13149]169      } else if (preprocessingData.VariableHasType<DateTime>(columnIndex)) {
[13151]170        percentile = GetDateTimeAsSeconds(columnIndex).Quantile(0.25);
[13149]171      }
172      return percentile;
173    }
174
175    public double GetThreeQuarterPercentile(int columnIndex) {
176      double percentile = double.NaN;
177      if (preprocessingData.VariableHasType<double>(columnIndex)) {
[13151]178        percentile = GetValuesWithoutNaN<double>(columnIndex).Quantile(0.75);
[13149]179      } else if (preprocessingData.VariableHasType<DateTime>(columnIndex)) {
[13151]180        percentile = GetDateTimeAsSeconds(columnIndex).Quantile(0.75);
[13149]181      }
182      return percentile;
183    }
184
[10635]185    public int GetDifferentValuesCount<T>(int columnIndex) {
[10811]186      return preprocessingData.GetValues<T>(columnIndex).GroupBy(x => x).Count();
[10179]187    }
[10191]188
[10635]189    public int GetRowMissingValueCount(int rowIndex) {
[10191]190      int count = 0;
[10635]191      for (int i = 0; i < preprocessingData.Columns; ++i) {
192        if (searchLogic.IsMissingValue(i, rowIndex)) {
[10191]193          ++count;
194        }
195      }
196      return count;
197    }
[10367]198
[10635]199    public string GetVariableName(int columnIndex) {
[10367]200      return preprocessingData.GetVariableName(columnIndex);
201    }
202
[11159]203    public bool VariableHasType<T>(int columnIndex) {
204      return preprocessingData.VariableHasType<T>(columnIndex);
[10367]205    }
[10371]206
[10635]207    public string GetColumnTypeAsString(int columnIndex) {
[11159]208      if (preprocessingData.VariableHasType<double>(columnIndex)) {
[10371]209        return "double";
[11159]210      } else if (preprocessingData.VariableHasType<string>(columnIndex)) {
[10371]211        return "string";
[11159]212      } else if (preprocessingData.VariableHasType<DateTime>(columnIndex)) {
[10371]213        return "DateTime";
214      }
215      return "Unknown Type";
216    }
[10624]217
[10811]218    private IEnumerable<double> GetDateTimeAsSeconds(int columnIndex, bool considerSelection = false) {
[10809]219      return GetValuesWithoutNaN<DateTime>(columnIndex, considerSelection).Select(x => (double)x.Ticks / TimeSpan.TicksPerSecond);
[10624]220    }
[10381]221
[10811]222    private IEnumerable<T> GetValuesWithoutNaN<T>(int columnIndex, bool considerSelection = false) {
[10809]223      return searchLogic.GetValuesWithoutNaN<T>(columnIndex, considerSelection);
[10661]224    }
225
[10635]226    private DateTime GetSecondsAsDateTime(double seconds) {
[10383]227      DateTime dateTime = new DateTime();
[10613]228      return dateTime.AddSeconds(seconds);
[10381]229    }
[10551]230
[10635]231    public event DataPreprocessingChangedEventHandler Changed {
[10551]232      add { preprocessingData.Changed += value; }
233      remove { preprocessingData.Changed -= value; }
234    }
[10148]235  }
236}
Note: See TracBrowser for help on using the repository browser.