Free cookie consent management tool by TermsFeed Policy Generator

source: trunk/sources/HeuristicLab.DataPreprocessing/3.4/Implementations/StatisticsLogic.cs @ 12199

Last change on this file since 12199 was 12012, checked in by ascheibe, 10 years ago

#2212 merged r12008, r12009, r12010 back into trunk

File size: 7.0 KB
RevLine 
[10539]1#region License Information
2/* HeuristicLab
[12012]3 * Copyright (C) 2002-2015 Heuristic and Evolutionary Algorithms Laboratory (HEAL)
[10539]4 *
5 * This file is part of HeuristicLab.
6 *
7 * HeuristicLab is free software: you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation, either version 3 of the License, or
10 * (at your option) any later version.
11 *
12 * HeuristicLab is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15 * GNU General Public License for more details.
16 *
17 * You should have received a copy of the GNU General Public License
18 * along with HeuristicLab. If not, see <http://www.gnu.org/licenses/>.
19 */
20#endregion
21
22using System;
[10383]23using System.Collections.Generic;
[10165]24using System.Linq;
[10216]25using HeuristicLab.Common;
[10148]26
[10635]27namespace HeuristicLab.DataPreprocessing {
[10165]28
[10635]29  public class StatisticsLogic : IStatisticsLogic {
[10148]30
[10586]31    private readonly ITransactionalPreprocessingData preprocessingData;
[10236]32    private readonly ISearchLogic searchLogic;
[10148]33
[10635]34    public StatisticsLogic(ITransactionalPreprocessingData thePreprocessingData, ISearchLogic theSearchLogic) {
[10165]35      preprocessingData = thePreprocessingData;
[10236]36      searchLogic = theSearchLogic;
[10148]37    }
38
[10635]39    public int GetColumnCount() {
[10159]40      return preprocessingData.Columns;
[10148]41    }
42
[10635]43    public int GetRowCount() {
[10159]44      return preprocessingData.Rows;
[10148]45    }
46
[10635]47    public int GetNumericColumnCount() {
[10159]48      int count = 0;
[10369]49
[10635]50      for (int i = 0; i < preprocessingData.Columns; ++i) {
[11156]51        if (preprocessingData.VariableHasType<double>(i)) {
[10159]52          ++count;
53        }
54      }
55      return count;
[10148]56    }
57
[10635]58    public int GetNominalColumnCount() {
[10159]59      return preprocessingData.Columns - GetNumericColumnCount();
[10148]60    }
61
[10635]62    public int GetMissingValueCount() {
[10148]63      int count = 0;
[10635]64      for (int i = 0; i < preprocessingData.Columns; ++i) {
[10367]65        count += GetMissingValueCount(i);
[10148]66      }
67      return count;
68    }
69
[10635]70    public int GetMissingValueCount(int columnIndex) {
[10367]71      return searchLogic.GetMissingValueIndices(columnIndex).Count();
[10148]72    }
73
[10809]74    public T GetMin<T>(int columnIndex, bool considerSelection) where T : IComparable<T> {
75      return preprocessingData.GetValues<T>(columnIndex, considerSelection).Min();
[10148]76    }
77
[10809]78    public T GetMax<T>(int columnIndex, bool considerSelection) where T : IComparable<T> {
79      return preprocessingData.GetValues<T>(columnIndex, considerSelection).Max();
[10148]80    }
81
[10809]82    public double GetMedian(int columnIndex, bool considerSelection) {
[10166]83      double median = double.NaN;
[11156]84      if (preprocessingData.VariableHasType<double>(columnIndex)) {
[10809]85        median = GetValuesWithoutNaN<double>(columnIndex, considerSelection).Median();
[10166]86      }
87      return median;
[10148]88    }
89
[10809]90    public double GetAverage(int columnIndex, bool considerSelection) {
[10166]91      double avg = double.NaN;
[11156]92      if (preprocessingData.VariableHasType<double>(columnIndex)) {
[10809]93        avg = GetValuesWithoutNaN<double>(columnIndex, considerSelection).Average();
[10166]94      }
95      return avg;
[10148]96    }
97
[10809]98    public DateTime GetMedianDateTime(int columnIndex, bool considerSelection) {
[10381]99      DateTime median = new DateTime();
[11156]100      if (preprocessingData.VariableHasType<DateTime>(columnIndex)) {
[10809]101        median = GetSecondsAsDateTime(GetDateTimeAsSeconds(columnIndex, considerSelection).Median());
[10381]102      }
103      return median;
104    }
105
[10809]106    public DateTime GetAverageDateTime(int columnIndex, bool considerSelection) {
[10381]107      DateTime avg = new DateTime();
[11156]108      if (preprocessingData.VariableHasType<DateTime>(columnIndex)) {
[10809]109        avg = GetSecondsAsDateTime(GetDateTimeAsSeconds(columnIndex, considerSelection).Average());
[10381]110      }
111      return avg;
112    }
113
[10809]114    public T GetMostCommonValue<T>(int columnIndex, bool considerSelection) {
115      var t = preprocessingData.GetValues<T>(columnIndex, considerSelection);
[10534]116      var t2 = t.GroupBy(x => x);
117      var t3 = t2.Select(g => g.Key);
118
[10809]119      return preprocessingData.GetValues<T>(columnIndex, considerSelection)
[10180]120                              .GroupBy(x => x)
121                              .OrderByDescending(g => g.Count())
122                              .Select(g => g.Key)
123                              .First();
[10148]124    }
125
[10167]126
[10635]127    public double GetStandardDeviation(int columnIndex) {
[10169]128      double stdDev = double.NaN;
[11156]129      if (preprocessingData.VariableHasType<double>(columnIndex)) {
[10811]130        stdDev = GetValuesWithoutNaN<double>(columnIndex).StandardDeviation();
[11156]131      } else if (preprocessingData.VariableHasType<DateTime>(columnIndex)) {
[10811]132        stdDev = GetDateTimeAsSeconds(columnIndex).StandardDeviation();
[10169]133      }
134      return stdDev;
[10148]135    }
136
[10635]137    public double GetVariance(int columnIndex) {
[10383]138      double variance = double.NaN;
[11156]139      if (preprocessingData.VariableHasType<double>(columnIndex)) {
[10812]140        variance = GetValuesWithoutNaN<double>(columnIndex).Variance();
[11156]141      } else if (preprocessingData.VariableHasType<DateTime>(columnIndex)) {
[10811]142        variance = GetDateTimeAsSeconds(columnIndex).Variance();
[10216]143      }
[10381]144      return variance;
[10216]145    }
146
[10635]147    public int GetDifferentValuesCount<T>(int columnIndex) {
[10811]148      return preprocessingData.GetValues<T>(columnIndex).GroupBy(x => x).Count();
[10179]149    }
[10191]150
[10635]151    public int GetRowMissingValueCount(int rowIndex) {
[10191]152      int count = 0;
[10635]153      for (int i = 0; i < preprocessingData.Columns; ++i) {
154        if (searchLogic.IsMissingValue(i, rowIndex)) {
[10191]155          ++count;
156        }
157      }
158      return count;
159    }
[10367]160
[10635]161    public string GetVariableName(int columnIndex) {
[10367]162      return preprocessingData.GetVariableName(columnIndex);
163    }
164
[11156]165    public bool VariableHasType<T>(int columnIndex) {
166      return preprocessingData.VariableHasType<T>(columnIndex);
[10367]167    }
[10371]168
[10635]169    public string GetColumnTypeAsString(int columnIndex) {
[11156]170      if (preprocessingData.VariableHasType<double>(columnIndex)) {
[10371]171        return "double";
[11156]172      } else if (preprocessingData.VariableHasType<string>(columnIndex)) {
[10371]173        return "string";
[11156]174      } else if (preprocessingData.VariableHasType<DateTime>(columnIndex)) {
[10371]175        return "DateTime";
176      }
177      return "Unknown Type";
178    }
[10624]179
[10811]180    private IEnumerable<double> GetDateTimeAsSeconds(int columnIndex, bool considerSelection = false) {
[10809]181      return GetValuesWithoutNaN<DateTime>(columnIndex, considerSelection).Select(x => (double)x.Ticks / TimeSpan.TicksPerSecond);
[10624]182    }
[10381]183
[10811]184    private IEnumerable<T> GetValuesWithoutNaN<T>(int columnIndex, bool considerSelection = false) {
[10809]185      return searchLogic.GetValuesWithoutNaN<T>(columnIndex, considerSelection);
[10661]186    }
187
[10635]188    private DateTime GetSecondsAsDateTime(double seconds) {
[10383]189      DateTime dateTime = new DateTime();
[10613]190      return dateTime.AddSeconds(seconds);
[10381]191    }
[10551]192
[10635]193    public event DataPreprocessingChangedEventHandler Changed {
[10551]194      add { preprocessingData.Changed += value; }
195      remove { preprocessingData.Changed -= value; }
196    }
[10148]197  }
198}
Note: See TracBrowser for help on using the repository browser.