Free cookie consent management tool by TermsFeed Policy Generator

source: branches/DataPreprocessing/HeuristicLab.DataPreprocessing/3.3/Implementations/StatisticsLogic.cs @ 10783

Last change on this file since 10783 was 10663, checked in by rstoll, 11 years ago

removed unnecessary check for IsNan in GetAverage

File size: 6.5 KB
RevLine 
[10539]1#region License Information
2/* HeuristicLab
3 * Copyright (C) 2002-2013 Heuristic and Evolutionary Algorithms Laboratory (HEAL)
4 *
5 * This file is part of HeuristicLab.
6 *
7 * HeuristicLab is free software: you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation, either version 3 of the License, or
10 * (at your option) any later version.
11 *
12 * HeuristicLab is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15 * GNU General Public License for more details.
16 *
17 * You should have received a copy of the GNU General Public License
18 * along with HeuristicLab. If not, see <http://www.gnu.org/licenses/>.
19 */
20#endregion
21
22using System;
[10383]23using System.Collections.Generic;
[10165]24using System.Linq;
[10216]25using HeuristicLab.Common;
[10661]26using System.Collections;
[10148]27
[10635]28namespace HeuristicLab.DataPreprocessing {
[10165]29
[10635]30  public class StatisticsLogic : IStatisticsLogic {
[10148]31
[10586]32    private readonly ITransactionalPreprocessingData preprocessingData;
[10236]33    private readonly ISearchLogic searchLogic;
[10148]34
[10635]35    public StatisticsLogic(ITransactionalPreprocessingData thePreprocessingData, ISearchLogic theSearchLogic) {
[10165]36      preprocessingData = thePreprocessingData;
[10236]37      searchLogic = theSearchLogic;
[10148]38    }
39
[10635]40    public int GetColumnCount() {
[10159]41      return preprocessingData.Columns;
[10148]42    }
43
[10635]44    public int GetRowCount() {
[10159]45      return preprocessingData.Rows;
[10148]46    }
47
[10635]48    public int GetNumericColumnCount() {
[10159]49      int count = 0;
[10369]50
[10635]51      for (int i = 0; i < preprocessingData.Columns; ++i) {
52        if (preprocessingData.IsType<double>(i)) {
[10159]53          ++count;
54        }
55      }
56      return count;
[10148]57    }
58
[10635]59    public int GetNominalColumnCount() {
[10159]60      return preprocessingData.Columns - GetNumericColumnCount();
[10148]61    }
62
[10635]63    public int GetMissingValueCount() {
[10148]64      int count = 0;
[10635]65      for (int i = 0; i < preprocessingData.Columns; ++i) {
[10367]66        count += GetMissingValueCount(i);
[10148]67      }
68      return count;
69    }
70
[10635]71    public int GetMissingValueCount(int columnIndex) {
[10367]72      return searchLogic.GetMissingValueIndices(columnIndex).Count();
[10148]73    }
74
[10635]75    public T GetMin<T>(int columnIndex) where T : IComparable<T> {
[10367]76      return preprocessingData.GetValues<T>(columnIndex).Min();
[10148]77    }
78
[10635]79    public T GetMax<T>(int columnIndex) where T : IComparable<T> {
[10367]80      return preprocessingData.GetValues<T>(columnIndex).Max();
[10148]81    }
82
[10635]83    public double GetMedian(int columnIndex) {
[10166]84      double median = double.NaN;
[10635]85      if (preprocessingData.IsType<double>(columnIndex)) {
[10624]86        median = GetValuesWithoutNaN<double>(columnIndex).Median();
[10166]87      }
88      return median;
[10148]89    }
90
[10635]91    public double GetAverage(int columnIndex) {
[10166]92      double avg = double.NaN;
[10635]93      if (preprocessingData.IsType<double>(columnIndex)) {
[10663]94        avg = GetValuesWithoutNaN<double>(columnIndex).Average();
[10166]95      }
96      return avg;
[10148]97    }
98
[10635]99    public DateTime GetMedianDateTime(int columnIndex) {
[10381]100      DateTime median = new DateTime();
[10635]101      if (preprocessingData.IsType<DateTime>(columnIndex)) {
[10383]102        median = GetSecondsAsDateTime(GetDateTimeAsSeconds(columnIndex).Median());
[10381]103      }
104      return median;
105    }
106
[10635]107    public DateTime GetAverageDateTime(int columnIndex) {
[10381]108      DateTime avg = new DateTime();
[10635]109      if (preprocessingData.IsType<DateTime>(columnIndex)) {
[10383]110        avg = GetSecondsAsDateTime(GetDateTimeAsSeconds(columnIndex).Average());
[10381]111      }
112      return avg;
113    }
114
[10635]115    public T GetMostCommonValue<T>(int columnIndex) {
[10534]116      var t = preprocessingData.GetValues<T>(columnIndex);
117      var t2 = t.GroupBy(x => x);
118      var t3 = t2.Select(g => g.Key);
119
[10367]120      return preprocessingData.GetValues<T>(columnIndex)
[10180]121                              .GroupBy(x => x)
122                              .OrderByDescending(g => g.Count())
123                              .Select(g => g.Key)
124                              .First();
[10148]125    }
126
[10167]127
[10635]128    public double GetStandardDeviation(int columnIndex) {
[10169]129      double stdDev = double.NaN;
[10635]130      if (preprocessingData.IsType<double>(columnIndex)) {
[10624]131        stdDev = GetValuesWithoutNaN<double>(columnIndex).StandardDeviation();
[10635]132      } else if (preprocessingData.IsType<DateTime>(columnIndex)) {
[10532]133        stdDev = GetDateTimeAsSeconds(columnIndex).StandardDeviation();
[10169]134      }
135      return stdDev;
[10148]136    }
137
[10635]138    public double GetVariance(int columnIndex) {
[10383]139      double variance = double.NaN;
[10635]140      if (preprocessingData.IsType<double>(columnIndex)) {
[10381]141        variance = preprocessingData.GetValues<double>(columnIndex).Variance();
[10635]142      } else if (preprocessingData.IsType<DateTime>(columnIndex)) {
[10532]143        variance = GetDateTimeAsSeconds(columnIndex).Variance();
[10216]144      }
[10381]145      return variance;
[10216]146    }
147
[10635]148    public int GetDifferentValuesCount<T>(int columnIndex) {
[10367]149      return preprocessingData.GetValues<T>(columnIndex).GroupBy(x => x).Count();
[10179]150    }
[10191]151
[10635]152    public int GetRowMissingValueCount(int rowIndex) {
[10191]153      int count = 0;
[10635]154      for (int i = 0; i < preprocessingData.Columns; ++i) {
155        if (searchLogic.IsMissingValue(i, rowIndex)) {
[10191]156          ++count;
157        }
158      }
159      return count;
160    }
[10367]161
[10635]162    public string GetVariableName(int columnIndex) {
[10367]163      return preprocessingData.GetVariableName(columnIndex);
164    }
165
[10635]166    public bool IsType<T>(int columnIndex) {
[10367]167      return preprocessingData.IsType<T>(columnIndex);
168    }
[10371]169
[10635]170    public string GetColumnTypeAsString(int columnIndex) {
171      if (preprocessingData.IsType<double>(columnIndex)) {
[10371]172        return "double";
[10635]173      } else if (preprocessingData.IsType<string>(columnIndex)) {
[10371]174        return "string";
[10635]175      } else if (preprocessingData.IsType<DateTime>(columnIndex)) {
[10371]176        return "DateTime";
177      }
178      return "Unknown Type";
179    }
[10624]180
[10635]181    private IEnumerable<double> GetDateTimeAsSeconds(int columnIndex) {
[10624]182      return GetValuesWithoutNaN<DateTime>(columnIndex).Select(x => (double)x.Ticks / TimeSpan.TicksPerSecond);
183    }
[10381]184
[10661]185    private IEnumerable<T> GetValuesWithoutNaN<T>(int columnIndex) {
186      return searchLogic.GetValuesWithoutNaN<T>(columnIndex);
187    }
188
[10635]189    private DateTime GetSecondsAsDateTime(double seconds) {
[10383]190      DateTime dateTime = new DateTime();
[10613]191      return dateTime.AddSeconds(seconds);
[10381]192    }
[10551]193
[10635]194    public event DataPreprocessingChangedEventHandler Changed {
[10551]195      add { preprocessingData.Changed += value; }
196      remove { preprocessingData.Changed -= value; }
197    }
[10148]198  }
199}
Note: See TracBrowser for help on using the repository browser.