using System; using System.Linq; namespace HeuristicLab.DataPreprocessing { public class StatisticInfo : IStatisticInfo { private IPreprocessingData preprocessingData; public StatisticInfo(IPreprocessingData thePreprocessingData) { preprocessingData = thePreprocessingData; } public int GetColumnCount() { return preprocessingData.Columns; } public int GetRowCount() { return preprocessingData.Rows; } public int GetNumericColumnCount() { int count = 0; foreach (var variableName in preprocessingData.VariableNames) { if (preprocessingData.IsType(variableName)) { ++count; } } return count; } public int GetNominalColumnCount() { return preprocessingData.Columns - GetNumericColumnCount(); } public int GetMissingValueCount() { int count = 0; foreach (var variableName in preprocessingData.VariableNames) { count += GetMissingValueCount(variableName); } return count; } public int GetMissingValueCount(string variableName) { return preprocessingData.GetMissingValueIndices(variableName).Count(); } public T GetMin(string variableName) where T : IComparable { return preprocessingData.GetValues(variableName).Min(); } public T GetMax(string variableName) where T : IComparable { return preprocessingData.GetValues(variableName).Max(); } public double GetMedian(string variableName) { double median = double.NaN; if (preprocessingData.IsType(variableName)) { median = preprocessingData.GetValues(variableName).OrderBy(x => x).ElementAt(preprocessingData.Rows / 2); } return median; } public double GetAverage(string variableName) { double avg = double.NaN; if (preprocessingData.IsType(variableName)) { avg = preprocessingData.GetValues(variableName).Average(); } return avg; } public T GetMostCommonValue(string variableName) { return preprocessingData.GetValues(variableName) .GroupBy(x => x) .OrderByDescending(g => g.Count()) .Select(g => g.Key) .First(); } public double GetStandardDeviation(string variableName) { double stdDev = double.NaN; if (preprocessingData.IsType(variableName)) { double avg = GetAverage(variableName); stdDev = Math.Sqrt(preprocessingData.GetValues(variableName).Sum(x => (x - avg) * (x - avg)) / (preprocessingData.Rows - 1)); } return stdDev; } public int GetDifferentValuesCount(string variableName) { return preprocessingData.GetValues(variableName).GroupBy(x => x).Count(); } public int GetRowMissingValueCount(int rowIndex) { int count = 0; foreach (var variableName in preprocessingData.VariableNames) { if (preprocessingData.IsMissingValue(variableName, rowIndex)) { ++count; } } return count; } } }