#region License Information
/* HeuristicLab
* Copyright (C) 2002-2015 Heuristic and Evolutionary Algorithms Laboratory (HEAL)
*
* This file is part of HeuristicLab.
*
* HeuristicLab is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* HeuristicLab is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with HeuristicLab. If not, see .
*/
#endregion
using System;
using System.Collections.Generic;
using System.Linq;
using HeuristicLab.Common;
namespace HeuristicLab.DataPreprocessing {
public class StatisticsLogic {
private readonly ITransactionalPreprocessingData preprocessingData;
private readonly SearchLogic searchLogic;
public StatisticsLogic(ITransactionalPreprocessingData thePreprocessingData, SearchLogic theSearchLogic) {
preprocessingData = thePreprocessingData;
searchLogic = theSearchLogic;
}
public int GetColumnCount() {
return searchLogic.Columns;
}
public int GetRowCount() {
return searchLogic.Rows;
}
public int GetNumericColumnCount() {
int count = 0;
for (int i = 0; i < searchLogic.Columns; ++i) {
if (preprocessingData.VariableHasType(i)) {
++count;
}
}
return count;
}
public int GetNominalColumnCount() {
return searchLogic.Columns - GetNumericColumnCount();
}
public int GetMissingValueCount() {
int count = 0;
for (int i = 0; i < searchLogic.Columns; ++i) {
count += GetMissingValueCount(i);
}
return count;
}
public int GetMissingValueCount(int columnIndex) {
return searchLogic.GetMissingValueIndices(columnIndex).Count();
}
public T GetMin(int columnIndex, bool considerSelection = false) where T : IComparable {
var min = default(T);
if (preprocessingData.VariableHasType(columnIndex)) {
var values = GetValuesWithoutNaN(columnIndex, considerSelection);
if (values.Any()) {
min = values.Min();
}
}
return min;
}
public T GetMax(int columnIndex, bool considerSelection = false) where T : IComparable {
var max = default(T);
if (preprocessingData.VariableHasType(columnIndex)) {
var values = GetValuesWithoutNaN(columnIndex, considerSelection);
if (values.Any()) {
max = values.Max();
}
}
return max;
}
public double GetMedian(int columnIndex, bool considerSelection = false) {
double median = double.NaN;
if (preprocessingData.VariableHasType(columnIndex)) {
var values = GetValuesWithoutNaN(columnIndex, considerSelection);
if (values.Any()) {
median = values.Median();
}
}
return median;
}
public double GetAverage(int columnIndex, bool considerSelection = false) {
double avg = double.NaN;
if (preprocessingData.VariableHasType(columnIndex)) {
var values = GetValuesWithoutNaN(columnIndex, considerSelection);
if (values.Any()) {
avg = values.Average();
}
}
return avg;
}
public DateTime GetMedianDateTime(int columnIndex, bool considerSelection = false) {
DateTime median = new DateTime();
if (preprocessingData.VariableHasType(columnIndex)) {
median = GetSecondsAsDateTime(GetDateTimeAsSeconds(columnIndex, considerSelection).Median());
}
return median;
}
public DateTime GetAverageDateTime(int columnIndex, bool considerSelection = false) {
DateTime avg = new DateTime();
if (preprocessingData.VariableHasType(columnIndex)) {
avg = GetSecondsAsDateTime(GetDateTimeAsSeconds(columnIndex, considerSelection).Average());
}
return avg;
}
public T GetMostCommonValue(int columnIndex, bool considerSelection = false) {
var values = GetValuesWithoutNaN(columnIndex, considerSelection);
if (!values.Any())
return default(T);
return values.GroupBy(x => x)
.OrderByDescending(g => g.Count())
.Select(g => g.Key)
.First();
}
public double GetStandardDeviation(int columnIndex) {
double stdDev = double.NaN;
if (preprocessingData.VariableHasType(columnIndex)) {
stdDev = GetValuesWithoutNaN(columnIndex).StandardDeviation();
} else if (preprocessingData.VariableHasType(columnIndex)) {
stdDev = GetDateTimeAsSeconds(columnIndex).StandardDeviation();
}
return stdDev;
}
public double GetVariance(int columnIndex) {
double variance = double.NaN;
if (preprocessingData.VariableHasType(columnIndex)) {
variance = GetValuesWithoutNaN(columnIndex).Variance();
} else if (preprocessingData.VariableHasType(columnIndex)) {
variance = GetDateTimeAsSeconds(columnIndex).Variance();
}
return variance;
}
public double GetOneQuarterPercentile(int columnIndex) {
double percentile = double.NaN;
if (preprocessingData.VariableHasType(columnIndex)) {
percentile = GetValuesWithoutNaN(columnIndex).Quantile(0.25);
} else if (preprocessingData.VariableHasType(columnIndex)) {
percentile = GetDateTimeAsSeconds(columnIndex).Quantile(0.25);
}
return percentile;
}
public double GetThreeQuarterPercentile(int columnIndex) {
double percentile = double.NaN;
if (preprocessingData.VariableHasType(columnIndex)) {
percentile = GetValuesWithoutNaN(columnIndex).Quantile(0.75);
} else if (preprocessingData.VariableHasType(columnIndex)) {
percentile = GetDateTimeAsSeconds(columnIndex).Quantile(0.75);
}
return percentile;
}
public int GetDifferentValuesCount(int columnIndex) {
return preprocessingData.GetValues(columnIndex).GroupBy(x => x).Count();
}
public int GetRowMissingValueCount(int rowIndex) {
int count = 0;
for (int i = 0; i < preprocessingData.Columns; ++i) {
if (searchLogic.IsMissingValue(i, rowIndex)) {
++count;
}
}
return count;
}
public string GetVariableName(int columnIndex) {
return preprocessingData.GetVariableName(columnIndex);
}
public bool VariableHasType(int columnIndex) {
return preprocessingData.VariableHasType(columnIndex);
}
public string GetColumnTypeAsString(int columnIndex) {
if (preprocessingData.VariableHasType(columnIndex)) {
return "double";
} else if (preprocessingData.VariableHasType(columnIndex)) {
return "string";
} else if (preprocessingData.VariableHasType(columnIndex)) {
return "DateTime";
}
return "Unknown Type";
}
private IEnumerable GetDateTimeAsSeconds(int columnIndex, bool considerSelection = false) {
return GetValuesWithoutNaN(columnIndex, considerSelection).Select(x => (double)x.Ticks / TimeSpan.TicksPerSecond);
}
private IEnumerable GetValuesWithoutNaN(int columnIndex, bool considerSelection = false) {
return searchLogic.GetValuesWithoutNaN(columnIndex, considerSelection);
}
private DateTime GetSecondsAsDateTime(double seconds) {
DateTime dateTime = new DateTime();
return dateTime.AddSeconds(seconds);
}
public event DataPreprocessingChangedEventHandler Changed {
add { preprocessingData.Changed += value; }
remove { preprocessingData.Changed -= value; }
}
}
}