1  #region License Information


2  /* HeuristicLab


3  * Copyright (C) 20022015 Heuristic and Evolutionary Algorithms Laboratory (HEAL)


4  *


5  * This file is part of HeuristicLab.


6  *


7  * HeuristicLab is free software: you can redistribute it and/or modify


8  * it under the terms of the GNU General Public License as published by


9  * the Free Software Foundation, either version 3 of the License, or


10  * (at your option) any later version.


11  *


12  * HeuristicLab is distributed in the hope that it will be useful,


13  * but WITHOUT ANY WARRANTY; without even the implied warranty of


14  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the


15  * GNU General Public License for more details.


16  *


17  * You should have received a copy of the GNU General Public License


18  * along with HeuristicLab. If not, see <http://www.gnu.org/licenses/>.


19  */


20  #endregion


21 


22  using System;


23  using System.Collections.Generic;


24  using System.Linq;


25  using HeuristicLab.Common;


26 


27  namespace HeuristicLab.DataPreprocessing {


28 


29  public class StatisticsLogic : IStatisticsLogic {


30 


31  private readonly ITransactionalPreprocessingData preprocessingData;


32  private readonly ISearchLogic searchLogic;


33 


34  public StatisticsLogic(ITransactionalPreprocessingData thePreprocessingData, ISearchLogic theSearchLogic) {


35  preprocessingData = thePreprocessingData;


36  searchLogic = theSearchLogic;


37  }


38 


39  public int GetColumnCount() {


40  return searchLogic.Columns;


41  }


42 


43  public int GetRowCount() {


44  return searchLogic.Rows;


45  }


46 


47  public int GetNumericColumnCount() {


48  int count = 0;


49 


50  for (int i = 0; i < searchLogic.Columns; ++i) {


51  if (preprocessingData.VariableHasType<double>(i)) {


52  ++count;


53  }


54  }


55  return count;


56  }


57 


58  public int GetNominalColumnCount() {


59  return searchLogic.Columns  GetNumericColumnCount();


60  }


61 


62  public int GetMissingValueCount() {


63  int count = 0;


64  for (int i = 0; i < searchLogic.Columns; ++i) {


65  count += GetMissingValueCount(i);


66  }


67  return count;


68  }


69 


70  public int GetMissingValueCount(int columnIndex) {


71  return searchLogic.GetMissingValueIndices(columnIndex).Count();


72  }


73 


74  public T GetMin<T>(int columnIndex, bool considerSelection) where T : IComparable<T> {


75  var min = default(T);


76  if (preprocessingData.VariableHasType<T>(columnIndex)) {


77  var values = GetValuesWithoutNaN<T>(columnIndex, considerSelection);


78  if (values.Any()) {


79  min = values.Min();


80  }


81  }


82  return min;


83  }


84 


85  public T GetMax<T>(int columnIndex, bool considerSelection) where T : IComparable<T> {


86  var max = default(T);


87  if (preprocessingData.VariableHasType<T>(columnIndex)) {


88  var values = GetValuesWithoutNaN<T>(columnIndex, considerSelection);


89  if (values.Any()) {


90  max = values.Max();


91  }


92  }


93  return max;


94  }


95 


96  public double GetMedian(int columnIndex, bool considerSelection) {


97  double median = double.NaN;


98  if (preprocessingData.VariableHasType<double>(columnIndex)) {


99  var values = GetValuesWithoutNaN<double>(columnIndex, considerSelection);


100  if (values.Any()) {


101  median = values.Median();


102  }


103  }


104  return median;


105  }


106 


107  public double GetAverage(int columnIndex, bool considerSelection) {


108  double avg = double.NaN;


109  if (preprocessingData.VariableHasType<double>(columnIndex)) {


110  var values = GetValuesWithoutNaN<double>(columnIndex, considerSelection);


111  if (values.Any()) {


112  avg = values.Average();


113  }


114  }


115  return avg;


116  }


117 


118  public DateTime GetMedianDateTime(int columnIndex, bool considerSelection) {


119  DateTime median = new DateTime();


120  if (preprocessingData.VariableHasType<DateTime>(columnIndex)) {


121  median = GetSecondsAsDateTime(GetDateTimeAsSeconds(columnIndex, considerSelection).Median());


122  }


123  return median;


124  }


125 


126  public DateTime GetAverageDateTime(int columnIndex, bool considerSelection) {


127  DateTime avg = new DateTime();


128  if (preprocessingData.VariableHasType<DateTime>(columnIndex)) {


129  avg = GetSecondsAsDateTime(GetDateTimeAsSeconds(columnIndex, considerSelection).Average());


130  }


131  return avg;


132  }


133 


134  public T GetMostCommonValue<T>(int columnIndex, bool considerSelection) {


135  var values = GetValuesWithoutNaN<T>(columnIndex, considerSelection);


136  if (!values.Any())


137  return default(T);


138  return values.GroupBy(x => x)


139  .OrderByDescending(g => g.Count())


140  .Select(g => g.Key)


141  .First();


142  }


143 


144 


145  public double GetStandardDeviation(int columnIndex) {


146  double stdDev = double.NaN;


147  if (preprocessingData.VariableHasType<double>(columnIndex)) {


148  stdDev = GetValuesWithoutNaN<double>(columnIndex).StandardDeviation();


149  } else if (preprocessingData.VariableHasType<DateTime>(columnIndex)) {


150  stdDev = GetDateTimeAsSeconds(columnIndex).StandardDeviation();


151  }


152  return stdDev;


153  }


154 


155  public double GetVariance(int columnIndex) {


156  double variance = double.NaN;


157  if (preprocessingData.VariableHasType<double>(columnIndex)) {


158  variance = GetValuesWithoutNaN<double>(columnIndex).Variance();


159  } else if (preprocessingData.VariableHasType<DateTime>(columnIndex)) {


160  variance = GetDateTimeAsSeconds(columnIndex).Variance();


161  }


162  return variance;


163  }


164 


165  public int GetDifferentValuesCount<T>(int columnIndex) {


166  return preprocessingData.GetValues<T>(columnIndex).GroupBy(x => x).Count();


167  }


168 


169  public int GetRowMissingValueCount(int rowIndex) {


170  int count = 0;


171  for (int i = 0; i < preprocessingData.Columns; ++i) {


172  if (searchLogic.IsMissingValue(i, rowIndex)) {


173  ++count;


174  }


175  }


176  return count;


177  }


178 


179  public string GetVariableName(int columnIndex) {


180  return preprocessingData.GetVariableName(columnIndex);


181  }


182 


183  public bool VariableHasType<T>(int columnIndex) {


184  return preprocessingData.VariableHasType<T>(columnIndex);


185  }


186 


187  public string GetColumnTypeAsString(int columnIndex) {


188  if (preprocessingData.VariableHasType<double>(columnIndex)) {


189  return "double";


190  } else if (preprocessingData.VariableHasType<string>(columnIndex)) {


191  return "string";


192  } else if (preprocessingData.VariableHasType<DateTime>(columnIndex)) {


193  return "DateTime";


194  }


195  return "Unknown Type";


196  }


197 


198  private IEnumerable<double> GetDateTimeAsSeconds(int columnIndex, bool considerSelection = false) {


199  return GetValuesWithoutNaN<DateTime>(columnIndex, considerSelection).Select(x => (double)x.Ticks / TimeSpan.TicksPerSecond);


200  }


201 


202  private IEnumerable<T> GetValuesWithoutNaN<T>(int columnIndex, bool considerSelection = false) {


203  return searchLogic.GetValuesWithoutNaN<T>(columnIndex, considerSelection);


204  }


205 


206  private DateTime GetSecondsAsDateTime(double seconds) {


207  DateTime dateTime = new DateTime();


208  return dateTime.AddSeconds(seconds);


209  }


210 


211  public event DataPreprocessingChangedEventHandler Changed {


212  add { preprocessingData.Changed += value; }


213  remove { preprocessingData.Changed = value; }


214  }


215  }


216  }

