Changeset 10183
- Timestamp:
- 12/04/13 13:19:17 (11 years ago)
- Location:
- branches/DataPreprocessing/HeuristicLab.DataPreprocessing/3.3
- Files:
-
- 2 edited
Legend:
- Unmodified
- Added
- Removed
-
branches/DataPreprocessing/HeuristicLab.DataPreprocessing/3.3/Implementations/StatisticInfo.cs
r10182 r10183 1 1 using System; 2 2 using System.Linq; 3 using HeuristicLab.DataPreprocessing;4 3 5 4 namespace HeuristicLab.DataPreprocessing { 6 5 7 class StatisticInfo : IStatisticInfo {6 public class StatisticInfo : IStatisticInfo { 8 7 9 8 private IPreprocessingData preprocessingData; … … 23 22 public int GetNumericColumnCount() { 24 23 int count = 0; 25 for (int i = 0; i < preprocessingData.Columns; ++i) {26 if (preprocessingData.IsType<double>( i)) {24 foreach (var variableName in preprocessingData.VariableNames) { 25 if (preprocessingData.IsType<double>(variableName)) { 27 26 ++count; 28 27 } … … 37 36 public int GetMissingValueCount() { 38 37 int count = 0; 39 for (int i = 0; i < preprocessingData.Columns; ++i) {40 count += GetMissingValueCount( i);38 foreach (var variableName in preprocessingData.VariableNames) { 39 count += GetMissingValueCount(variableName); 41 40 } 42 41 return count; 43 42 } 44 43 45 public int GetMissingValueCount( int columnIndex) {46 if (preprocessingData.IsType<double>( columnIndex)) {47 return preprocessingData.GetValues<double>( columnIndex).Count(x => double.IsNaN(x));48 } else if (preprocessingData.IsType<string>( columnIndex)) {49 return preprocessingData.GetValues<string>( columnIndex).Count(x => string.IsNullOrEmpty(x));50 } else if (preprocessingData.IsType<DateTime>( columnIndex)) {51 return preprocessingData.GetValues<DateTime>( columnIndex).Count(x => x.Equals(DateTime.MinValue));44 public int GetMissingValueCount(string variableName) { 45 if (preprocessingData.IsType<double>(variableName)) { 46 return preprocessingData.GetValues<double>(variableName).Count(x => double.IsNaN(x)); 47 } else if (preprocessingData.IsType<string>(variableName)) { 48 return preprocessingData.GetValues<string>(variableName).Count(x => string.IsNullOrEmpty(x)); 49 } else if (preprocessingData.IsType<DateTime>(variableName)) { 50 return preprocessingData.GetValues<DateTime>(variableName).Count(x => x.Equals(DateTime.MinValue)); 52 51 } else { 53 throw new ArgumentException("column with index: " + columnIndex+ " contains a non supported type.");52 throw new ArgumentException("column with index: " + variableName + " contains a non supported type."); 54 53 } 55 54 } 56 55 57 public T GetMin<T>( int columnIndex) where T : IComparable<T> {58 return preprocessingData.GetValues<T>( columnIndex).Min();56 public T GetMin<T>(string variableName) where T : IComparable<T> { 57 return preprocessingData.GetValues<T>(variableName).Min(); 59 58 } 60 59 61 public T GetMax<T>( int columnIndex) where T : IComparable<T> {62 return preprocessingData.GetValues<T>( columnIndex).Max();60 public T GetMax<T>(string variableName) where T : IComparable<T> { 61 return preprocessingData.GetValues<T>(variableName).Max(); 63 62 } 64 63 65 public double GetMedian( int columnIndex) {64 public double GetMedian(string variableName) { 66 65 double median = double.NaN; 67 if (preprocessingData.IsType<double>( columnIndex)) {68 median = preprocessingData.GetValues<double>( columnIndex).OrderBy(x => x).ElementAt(preprocessingData.Rows / 2);66 if (preprocessingData.IsType<double>(variableName)) { 67 median = preprocessingData.GetValues<double>(variableName).OrderBy(x => x).ElementAt(preprocessingData.Rows / 2); 69 68 } 70 69 return median; 71 70 } 72 71 73 public double GetAverage( int columnIndex) {72 public double GetAverage(string variableName) { 74 73 double avg = double.NaN; 75 if (preprocessingData.IsType<double>( columnIndex)) {76 avg = preprocessingData.GetValues<double>( columnIndex).Average();74 if (preprocessingData.IsType<double>(variableName)) { 75 avg = preprocessingData.GetValues<double>(variableName).Average(); 77 76 } 78 77 return avg; 79 78 } 80 79 81 public T GetMostCommonValue<T>( int columnIndex) {82 return preprocessingData.GetValues<T>( columnIndex)80 public T GetMostCommonValue<T>(string variableName) { 81 return preprocessingData.GetValues<T>(variableName) 83 82 .GroupBy(x => x) 84 83 .OrderByDescending(g => g.Count()) … … 88 87 89 88 90 public double GetStandardDeviation( int columnIndex) {89 public double GetStandardDeviation(string variableName) { 91 90 double stdDev = double.NaN; 92 if (preprocessingData.IsType<double>( columnIndex)) {93 double avg = GetAverage( columnIndex);94 stdDev = Math.Sqrt(preprocessingData.GetValues<double>( columnIndex).Sum(x => (x - avg) * (x - avg)) / (preprocessingData.Rows - 1));91 if (preprocessingData.IsType<double>(variableName)) { 92 double avg = GetAverage(variableName); 93 stdDev = Math.Sqrt(preprocessingData.GetValues<double>(variableName).Sum(x => (x - avg) * (x - avg)) / (preprocessingData.Rows - 1)); 95 94 } 96 95 return stdDev; 97 96 } 98 97 99 public int GetDifferentValuesCount<T>( int columnIndex) {100 return preprocessingData.GetValues<T>( columnIndex).GroupBy(x => x).Count();98 public int GetDifferentValuesCount<T>(string variableName) { 99 return preprocessingData.GetValues<T>(variableName).GroupBy(x => x).Count(); 101 100 } 102 101 } -
branches/DataPreprocessing/HeuristicLab.DataPreprocessing/3.3/Interfaces/IStatisticInfo.cs
r10182 r10183 9 9 int GetNominalColumnCount(); 10 10 int GetMissingValueCount(); 11 int GetMissingValueCount( int columnIndex);12 T GetMin<T>( int columnIndex) where T : IComparable<T>;13 T GetMax<T>( int columnIndex) where T : IComparable<T>;14 double GetMedian( int columnIndex);15 double GetAverage( int columnIndex);16 T GetMostCommonValue<T>( int columnIndex);17 double GetStandardDeviation( int columnIndex);18 int GetDifferentValuesCount<T>( int columnIndex);11 int GetMissingValueCount(string variableName); 12 T GetMin<T>(string variableName) where T : IComparable<T>; 13 T GetMax<T>(string variableName) where T : IComparable<T>; 14 double GetMedian(string variableName); 15 double GetAverage(string variableName); 16 T GetMostCommonValue<T>(string variableName); 17 double GetStandardDeviation(string variableName); 18 int GetDifferentValuesCount<T>(string variableName); 19 19 } 20 20 }
Note: See TracChangeset
for help on using the changeset viewer.