using System; using System.Collections.Generic; using System.Linq; using HeuristicLab.Common; namespace HeuristicLab.DataPreprocessing { public class StatisticsLogic : IStatisticsLogic { private readonly IPreprocessingData preprocessingData; private readonly ISearchLogic searchLogic; public StatisticsLogic(IPreprocessingData thePreprocessingData, ISearchLogic theSearchLogic) { preprocessingData = thePreprocessingData; searchLogic = theSearchLogic; } public int GetColumnCount() { return preprocessingData.Columns; } public int GetRowCount() { return preprocessingData.Rows; } public int GetNumericColumnCount() { int count = 0; for (int i = 0; i < preprocessingData.Columns; ++i) { if (preprocessingData.IsType(i)) { ++count; } } return count; } public int GetNominalColumnCount() { return preprocessingData.Columns - GetNumericColumnCount(); } public int GetMissingValueCount() { int count = 0; for (int i = 0; i < preprocessingData.Columns; ++i) { count += GetMissingValueCount(i); } return count; } public int GetMissingValueCount(int columnIndex) { return searchLogic.GetMissingValueIndices(columnIndex).Count(); } public T GetMin(int columnIndex) where T : IComparable { return preprocessingData.GetValues(columnIndex).Min(); } public T GetMax(int columnIndex) where T : IComparable { return preprocessingData.GetValues(columnIndex).Max(); } public double GetMedian(int columnIndex) { double median = double.NaN; if (preprocessingData.IsType(columnIndex)) { median = preprocessingData.GetValues(columnIndex).Median(); } return median; } public double GetAverage(int columnIndex) { double avg = double.NaN; if (preprocessingData.IsType(columnIndex)) { avg = preprocessingData.GetValues(columnIndex).Average(); } return avg; } public DateTime GetMedianDateTime(int columnIndex) { DateTime median = new DateTime(); if (preprocessingData.IsType(columnIndex)) { median = GetSecondsAsDateTime(GetDateTimeAsSeconds(columnIndex).Median()); } return median; } public DateTime GetAverageDateTime(int columnIndex) { DateTime avg = new DateTime(); if (preprocessingData.IsType(columnIndex)) { avg = GetSecondsAsDateTime(GetDateTimeAsSeconds(columnIndex).Average()); } return avg; } public T GetMostCommonValue(int columnIndex) { return preprocessingData.GetValues(columnIndex) .GroupBy(x => x) .OrderByDescending(g => g.Count()) .Select(g => g.Key) .First(); } public double GetStandardDeviation(int columnIndex) { double stdDev = double.NaN; //TODO: fix me /* if (preprocessingData.IsType(columnIndex)) { stdDev = preprocessingData.GetValues(columnIndex).StandardDeviation(); } else if (preprocessingData.IsType(variableName)) { stdDev = GetDateTimeAsSeconds(variableName).StandardDeviation(); } */ return stdDev; } public double GetVariance(int columnIndex) { double variance = double.NaN; //TODO: fix me /* if (preprocessingData.IsType(columnIndex)) { variance = preprocessingData.GetValues(columnIndex).Variance(); } else if (preprocessingData.IsType(variableName)) { variance = GetDateTimeAsSeconds(variableName).Variance(); } */ return variance; } public int GetDifferentValuesCount(int columnIndex) { return preprocessingData.GetValues(columnIndex).GroupBy(x => x).Count(); } public int GetRowMissingValueCount(int rowIndex) { int count = 0; for (int i = 0; i < preprocessingData.Columns; ++i) { if (searchLogic.IsMissingValue(i, rowIndex)) { ++count; } } return count; } public string GetVariableName(int columnIndex) { return preprocessingData.GetVariableName(columnIndex); } public bool IsType(int columnIndex) { return preprocessingData.IsType(columnIndex); } public string GetColumnTypeAsString(int columnIndex) { if (preprocessingData.IsType(columnIndex)) { return "double"; } else if (preprocessingData.IsType(columnIndex)) { return "string"; } else if (preprocessingData.IsType(columnIndex)) { return "DateTime"; } return "Unknown Type"; } private IEnumerable GetDateTimeAsSeconds(int columnIndex) { return preprocessingData.GetValues(columnIndex).Select(x => (double)x.Ticks / TimeSpan.TicksPerSecond); } private DateTime GetSecondsAsDateTime(double seconds) { DateTime dateTime = new DateTime(); return dateTime.Add(new TimeSpan(0, 0, (int)seconds)); } } }