#region License Information
/* HeuristicLab
* Copyright (C) 2002-2013 Heuristic and Evolutionary Algorithms Laboratory (HEAL)
*
* This file is part of HeuristicLab.
*
* HeuristicLab is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* HeuristicLab is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with HeuristicLab. If not, see .
*/
#endregion
using System;
using System.Collections.Generic;
using System.Linq;
using HeuristicLab.Common;
namespace HeuristicLab.DataPreprocessing
{
public class StatisticsLogic : IStatisticsLogic
{
private readonly ITransactionalPreprocessingData preprocessingData;
private readonly ISearchLogic searchLogic;
public StatisticsLogic(ITransactionalPreprocessingData thePreprocessingData, ISearchLogic theSearchLogic)
{
preprocessingData = thePreprocessingData;
searchLogic = theSearchLogic;
}
public int GetColumnCount()
{
return preprocessingData.Columns;
}
public int GetRowCount()
{
return preprocessingData.Rows;
}
public int GetNumericColumnCount()
{
int count = 0;
for (int i = 0; i < preprocessingData.Columns; ++i)
{
if (preprocessingData.IsType(i))
{
++count;
}
}
return count;
}
public int GetNominalColumnCount()
{
return preprocessingData.Columns - GetNumericColumnCount();
}
public int GetMissingValueCount()
{
int count = 0;
for (int i = 0; i < preprocessingData.Columns; ++i)
{
count += GetMissingValueCount(i);
}
return count;
}
public int GetMissingValueCount(int columnIndex)
{
return searchLogic.GetMissingValueIndices(columnIndex).Count();
}
public T GetMin(int columnIndex) where T : IComparable
{
return preprocessingData.GetValues(columnIndex).Min();
}
public T GetMax(int columnIndex) where T : IComparable
{
return preprocessingData.GetValues(columnIndex).Max();
}
public double GetMedian(int columnIndex)
{
double median = double.NaN;
if (preprocessingData.IsType(columnIndex))
{
median = GetValuesWithoutNaN(columnIndex).Median();
}
return median;
}
public double GetAverage(int columnIndex)
{
double avg = double.NaN;
if (preprocessingData.IsType(columnIndex))
{
avg = GetValuesWithoutNaN(columnIndex).Where(x => !double.IsNaN(x)).Average();
}
return avg;
}
public DateTime GetMedianDateTime(int columnIndex)
{
DateTime median = new DateTime();
if (preprocessingData.IsType(columnIndex))
{
median = GetSecondsAsDateTime(GetDateTimeAsSeconds(columnIndex).Median());
}
return median;
}
public DateTime GetAverageDateTime(int columnIndex)
{
DateTime avg = new DateTime();
if (preprocessingData.IsType(columnIndex))
{
avg = GetSecondsAsDateTime(GetDateTimeAsSeconds(columnIndex).Average());
}
return avg;
}
public T GetMostCommonValue(int columnIndex)
{
var t = preprocessingData.GetValues(columnIndex);
var t2 = t.GroupBy(x => x);
var t3 = t2.Select(g => g.Key);
return preprocessingData.GetValues(columnIndex)
.GroupBy(x => x)
.OrderByDescending(g => g.Count())
.Select(g => g.Key)
.First();
}
public double GetStandardDeviation(int columnIndex)
{
double stdDev = double.NaN;
if (preprocessingData.IsType(columnIndex))
{
stdDev = GetValuesWithoutNaN(columnIndex).StandardDeviation();
}
else if (preprocessingData.IsType(columnIndex))
{
stdDev = GetDateTimeAsSeconds(columnIndex).StandardDeviation();
}
return stdDev;
}
public double GetVariance(int columnIndex)
{
double variance = double.NaN;
if (preprocessingData.IsType(columnIndex))
{
variance = preprocessingData.GetValues(columnIndex).Variance();
}
else if (preprocessingData.IsType(columnIndex))
{
variance = GetDateTimeAsSeconds(columnIndex).Variance();
}
return variance;
}
public int GetDifferentValuesCount(int columnIndex)
{
return preprocessingData.GetValues(columnIndex).GroupBy(x => x).Count();
}
public int GetRowMissingValueCount(int rowIndex)
{
int count = 0;
for (int i = 0; i < preprocessingData.Columns; ++i)
{
if (searchLogic.IsMissingValue(i, rowIndex))
{
++count;
}
}
return count;
}
public string GetVariableName(int columnIndex)
{
return preprocessingData.GetVariableName(columnIndex);
}
public bool IsType(int columnIndex)
{
return preprocessingData.IsType(columnIndex);
}
public string GetColumnTypeAsString(int columnIndex)
{
if (preprocessingData.IsType(columnIndex))
{
return "double";
}
else if (preprocessingData.IsType(columnIndex))
{
return "string";
}
else if (preprocessingData.IsType(columnIndex))
{
return "DateTime";
}
return "Unknown Type";
}
private List GetValuesWithoutNaN(int columnIndex)
{
IEnumerable missing = searchLogic.GetMissingValueIndices(columnIndex);
return (List)preprocessingData.GetValues(columnIndex).Select((v, i) => new { i, v }).Where(x => !missing.Contains(x.i));
}
private IEnumerable GetDateTimeAsSeconds(int columnIndex)
{
return GetValuesWithoutNaN(columnIndex).Select(x => (double)x.Ticks / TimeSpan.TicksPerSecond);
}
private DateTime GetSecondsAsDateTime(double seconds)
{
DateTime dateTime = new DateTime();
return dateTime.AddSeconds(seconds);
}
public event DataPreprocessingChangedEventHandler Changed
{
add { preprocessingData.Changed += value; }
remove { preprocessingData.Changed -= value; }
}
}
}