Free cookie consent management tool by TermsFeed Policy Generator

source: stable/HeuristicLab.DataPreprocessing/3.4/Logic/StatisticsLogic.cs @ 14077

Last change on this file since 14077 was 14077, checked in by mkommend, 8 years ago

#2616: Merged r13934, r13935 into stable.

File size: 8.4 KB
RevLine 
[10539]1#region License Information
2/* HeuristicLab
[12012]3 * Copyright (C) 2002-2015 Heuristic and Evolutionary Algorithms Laboratory (HEAL)
[10539]4 *
5 * This file is part of HeuristicLab.
6 *
7 * HeuristicLab is free software: you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation, either version 3 of the License, or
10 * (at your option) any later version.
11 *
12 * HeuristicLab is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15 * GNU General Public License for more details.
16 *
17 * You should have received a copy of the GNU General Public License
18 * along with HeuristicLab. If not, see <http://www.gnu.org/licenses/>.
19 */
20#endregion
21
22using System;
[10383]23using System.Collections.Generic;
[10165]24using System.Linq;
[10216]25using HeuristicLab.Common;
[10148]26
[10635]27namespace HeuristicLab.DataPreprocessing {
[13508]28  public class StatisticsLogic {
[10148]29
[10586]30    private readonly ITransactionalPreprocessingData preprocessingData;
[13508]31    private readonly SearchLogic searchLogic;
[10148]32
[13508]33    public StatisticsLogic(ITransactionalPreprocessingData thePreprocessingData, SearchLogic theSearchLogic) {
[10165]34      preprocessingData = thePreprocessingData;
[10236]35      searchLogic = theSearchLogic;
[10148]36    }
37
[10635]38    public int GetColumnCount() {
[12676]39      return searchLogic.Columns;
[10148]40    }
41
[10635]42    public int GetRowCount() {
[12676]43      return searchLogic.Rows;
[10148]44    }
45
[10635]46    public int GetNumericColumnCount() {
[10159]47      int count = 0;
[10369]48
[12676]49      for (int i = 0; i < searchLogic.Columns; ++i) {
[11156]50        if (preprocessingData.VariableHasType<double>(i)) {
[10159]51          ++count;
52        }
53      }
54      return count;
[10148]55    }
56
[10635]57    public int GetNominalColumnCount() {
[12676]58      return searchLogic.Columns - GetNumericColumnCount();
[10148]59    }
60
[10635]61    public int GetMissingValueCount() {
[10148]62      int count = 0;
[12676]63      for (int i = 0; i < searchLogic.Columns; ++i) {
[10367]64        count += GetMissingValueCount(i);
[10148]65      }
66      return count;
67    }
68
[10635]69    public int GetMissingValueCount(int columnIndex) {
[10367]70      return searchLogic.GetMissingValueIndices(columnIndex).Count();
[10148]71    }
72
[14077]73    public T GetMin<T>(int columnIndex, T defaultValue, bool considerSelection = false) where T : IComparable<T> {
74      var min = defaultValue;
[12676]75      if (preprocessingData.VariableHasType<T>(columnIndex)) {
76        var values = GetValuesWithoutNaN<T>(columnIndex, considerSelection);
77        if (values.Any()) {
78          min = values.Min();
79        }
80      }
81      return min;
[10148]82    }
83
[14077]84    public T GetMax<T>(int columnIndex, T defaultValue, bool considerSelection = false) where T : IComparable<T> {
85      var max = defaultValue;
[12676]86      if (preprocessingData.VariableHasType<T>(columnIndex)) {
87        var values = GetValuesWithoutNaN<T>(columnIndex, considerSelection);
88        if (values.Any()) {
89          max = values.Max();
90        }
91      }
92      return max;
[10148]93    }
94
[13508]95    public double GetMedian(int columnIndex, bool considerSelection = false) {
[10166]96      double median = double.NaN;
[11156]97      if (preprocessingData.VariableHasType<double>(columnIndex)) {
[12676]98        var values = GetValuesWithoutNaN<double>(columnIndex, considerSelection);
99        if (values.Any()) {
100          median = values.Median();
101        }
[10166]102      }
103      return median;
[10148]104    }
105
[13508]106    public double GetAverage(int columnIndex, bool considerSelection = false) {
[10166]107      double avg = double.NaN;
[11156]108      if (preprocessingData.VariableHasType<double>(columnIndex)) {
[12676]109        var values = GetValuesWithoutNaN<double>(columnIndex, considerSelection);
110        if (values.Any()) {
111          avg = values.Average();
112        }
[10166]113      }
114      return avg;
[10148]115    }
116
[13508]117    public DateTime GetMedianDateTime(int columnIndex, bool considerSelection = false) {
[10381]118      DateTime median = new DateTime();
[11156]119      if (preprocessingData.VariableHasType<DateTime>(columnIndex)) {
[10809]120        median = GetSecondsAsDateTime(GetDateTimeAsSeconds(columnIndex, considerSelection).Median());
[10381]121      }
122      return median;
123    }
124
[13508]125    public DateTime GetAverageDateTime(int columnIndex, bool considerSelection = false) {
[10381]126      DateTime avg = new DateTime();
[11156]127      if (preprocessingData.VariableHasType<DateTime>(columnIndex)) {
[10809]128        avg = GetSecondsAsDateTime(GetDateTimeAsSeconds(columnIndex, considerSelection).Average());
[10381]129      }
130      return avg;
131    }
132
[14077]133    public T GetMostCommonValue<T>(int columnIndex, T defaultValue, bool considerSelection = false) {
[12676]134      var values = GetValuesWithoutNaN<T>(columnIndex, considerSelection);
135      if (!values.Any())
[14077]136        return defaultValue;
[12676]137      return values.GroupBy(x => x)
[10180]138                              .OrderByDescending(g => g.Count())
139                              .Select(g => g.Key)
140                              .First();
[10148]141    }
142
[10167]143
[10635]144    public double GetStandardDeviation(int columnIndex) {
[10169]145      double stdDev = double.NaN;
[11156]146      if (preprocessingData.VariableHasType<double>(columnIndex)) {
[10811]147        stdDev = GetValuesWithoutNaN<double>(columnIndex).StandardDeviation();
[11156]148      } else if (preprocessingData.VariableHasType<DateTime>(columnIndex)) {
[10811]149        stdDev = GetDateTimeAsSeconds(columnIndex).StandardDeviation();
[10169]150      }
151      return stdDev;
[10148]152    }
153
[10635]154    public double GetVariance(int columnIndex) {
[10383]155      double variance = double.NaN;
[11156]156      if (preprocessingData.VariableHasType<double>(columnIndex)) {
[10812]157        variance = GetValuesWithoutNaN<double>(columnIndex).Variance();
[11156]158      } else if (preprocessingData.VariableHasType<DateTime>(columnIndex)) {
[10811]159        variance = GetDateTimeAsSeconds(columnIndex).Variance();
[10216]160      }
[10381]161      return variance;
[10216]162    }
163
[12889]164    public double GetOneQuarterPercentile(int columnIndex) {
165      double percentile = double.NaN;
166      if (preprocessingData.VariableHasType<double>(columnIndex)) {
[14077]167        percentile = GetValuesWithoutNaN<double>(columnIndex).DefaultIfEmpty(double.NaN).Quantile(0.25);
[12889]168      } else if (preprocessingData.VariableHasType<DateTime>(columnIndex)) {
[14077]169        percentile = GetDateTimeAsSeconds(columnIndex).DefaultIfEmpty(double.NaN).Quantile(0.25);
[12889]170      }
171      return percentile;
172    }
173
174    public double GetThreeQuarterPercentile(int columnIndex) {
175      double percentile = double.NaN;
176      if (preprocessingData.VariableHasType<double>(columnIndex)) {
[14077]177        percentile = GetValuesWithoutNaN<double>(columnIndex).DefaultIfEmpty(double.NaN).Quantile(0.75);
[12889]178      } else if (preprocessingData.VariableHasType<DateTime>(columnIndex)) {
[14077]179        percentile = GetDateTimeAsSeconds(columnIndex).DefaultIfEmpty(double.NaN).Quantile(0.75);
[12889]180      }
181      return percentile;
182    }
183
[10635]184    public int GetDifferentValuesCount<T>(int columnIndex) {
[10811]185      return preprocessingData.GetValues<T>(columnIndex).GroupBy(x => x).Count();
[10179]186    }
[10191]187
[10635]188    public int GetRowMissingValueCount(int rowIndex) {
[10191]189      int count = 0;
[10635]190      for (int i = 0; i < preprocessingData.Columns; ++i) {
191        if (searchLogic.IsMissingValue(i, rowIndex)) {
[10191]192          ++count;
193        }
194      }
195      return count;
196    }
[10367]197
[10635]198    public string GetVariableName(int columnIndex) {
[10367]199      return preprocessingData.GetVariableName(columnIndex);
200    }
201
[11156]202    public bool VariableHasType<T>(int columnIndex) {
203      return preprocessingData.VariableHasType<T>(columnIndex);
[10367]204    }
[10371]205
[10635]206    public string GetColumnTypeAsString(int columnIndex) {
[11156]207      if (preprocessingData.VariableHasType<double>(columnIndex)) {
[10371]208        return "double";
[11156]209      } else if (preprocessingData.VariableHasType<string>(columnIndex)) {
[10371]210        return "string";
[11156]211      } else if (preprocessingData.VariableHasType<DateTime>(columnIndex)) {
[10371]212        return "DateTime";
213      }
214      return "Unknown Type";
215    }
[10624]216
[10811]217    private IEnumerable<double> GetDateTimeAsSeconds(int columnIndex, bool considerSelection = false) {
[10809]218      return GetValuesWithoutNaN<DateTime>(columnIndex, considerSelection).Select(x => (double)x.Ticks / TimeSpan.TicksPerSecond);
[10624]219    }
[10381]220
[10811]221    private IEnumerable<T> GetValuesWithoutNaN<T>(int columnIndex, bool considerSelection = false) {
[10809]222      return searchLogic.GetValuesWithoutNaN<T>(columnIndex, considerSelection);
[10661]223    }
224
[10635]225    private DateTime GetSecondsAsDateTime(double seconds) {
[10383]226      DateTime dateTime = new DateTime();
[10613]227      return dateTime.AddSeconds(seconds);
[10381]228    }
[10551]229
[10635]230    public event DataPreprocessingChangedEventHandler Changed {
[10551]231      add { preprocessingData.Changed += value; }
232      remove { preprocessingData.Changed -= value; }
233    }
[10148]234  }
235}
Note: See TracBrowser for help on using the repository browser.