Changeset 10165
- Timestamp:
- 11/27/13 16:34:23 (11 years ago)
- Location:
- branches/DataPreprocessing/HeuristicLab.DataPreprocessing/3.3
- Files:
-
- 2 edited
Legend:
- Unmodified
- Added
- Removed
-
branches/DataPreprocessing/HeuristicLab.DataPreprocessing/3.3/Interfaces/IPreprocessingData.cs
r10163 r10165 27 27 28 28 public interface IPreprocessingData : INamedItem { 29 T GetCell<T>(int columnIndex, int rowIndex);30 void SetCell<T>(int columnIndex, int rowIndex, T value);29 T GetCell<T>(int rowIndex, int columnIndex); 30 void SetCell<T>(int rowIndex, int columnIndex, T value); 31 31 32 32 IList<T> GetValues<T>(int columnIndex); -
branches/DataPreprocessing/HeuristicLab.DataPreprocessing/3.3/PreprocessingDataStatisticInfo.cs
r10162 r10165 1 1 using System; 2 using System.Linq; 2 3 using HeuristicLab.DataPreprocessing.Interfaces; 3 4 4 5 namespace HeuristicLab.DataPreprocessing { 6 5 7 class PreprocessingDataStatisticInfo : IPreprocessingDataStatisticInfo { 6 8 7 9 private IPreprocessingData preprocessingData; 8 10 9 public PreprocessingDataStatisticInfo(IPreprocessingData the DataSet) {10 preprocessingData = the DataSet;11 public PreprocessingDataStatisticInfo(IPreprocessingData thePreprocessingData) { 12 preprocessingData = thePreprocessingData; 11 13 } 12 13 14 14 15 public int GetColumnCount() { … … 43 44 44 45 public int GetMissingValueCount(int columnIndex) { 45 throw new System.NotImplementedException(); 46 //Func<dynamic, bool> isMissingValueFunc; 47 //if (preprocessingData.IsType<double>(columnIndex)) { 48 // isMissingValueFunc = IsMissingDoubleValue; 49 //} else if (preprocessingData.IsType<string>(columnIndex)) { 50 // isMissingValueFunc = IsMissingStringValue; 51 //} else if (preprocessingData.IsType<DateTime>(columnIndex)) { 52 // isMissingValueFunc = isMissingDateTimeValue; 53 //} else { 54 // throw new ArgumentException("column with index: " + columnIndex + " contains a non supported type."); 55 //} 56 57 //int count = 0; 58 //for (int i = 0; i < preprocessingData.Rows; ++i) { 59 // if (isMissingValueFunc(preprocessingData.GetCell(i, columnIndex))) { 60 // ++count; 61 // } 62 //} 63 //return count; 64 } 65 66 private bool IsMissingDoubleValue(string value) { 67 double dummy; 68 bool couldNotParse = !double.TryParse(value, out dummy); 69 return couldNotParse || double.IsNaN(dummy); 70 } 71 72 private bool IsMissingStringValue(string value) { 73 return string.IsNullOrEmpty(value); 74 } 75 76 private bool isMissingDateTimeValue(string value) { 77 DateTime dateTime; 78 bool couldNotParse = DateTime.TryParse(value, out dateTime); 79 return couldNotParse || dateTime.Equals(DateTime.MinValue); 46 if (preprocessingData.IsType<double>(columnIndex)) { 47 return preprocessingData.GetValues<double>(columnIndex).Count(x => double.IsNaN(x)); 48 } else if (preprocessingData.IsType<string>(columnIndex)) { 49 return preprocessingData.GetValues<string>(columnIndex).Count(x => string.IsNullOrEmpty(x)); 50 } else if (preprocessingData.IsType<DateTime>(columnIndex)) { 51 return preprocessingData.GetValues<DateTime>(columnIndex).Count(x => x.Equals(DateTime.MinValue)); 52 } else { 53 throw new ArgumentException("column with index: " + columnIndex + " contains a non supported type."); 54 } 80 55 } 81 56 82 57 public T GetMin<T>(int columnIndex) where T : IComparable<T> { 83 if (!preprocessingData.IsType<double>(columnIndex)) { 84 throw new ArgumentException("column with index: " + columnIndex + " was assumed to be of type " + typeof(T).Name + " but was different."); 85 } 86 if (typeof(T) == typeof(double)) { 87 return (dynamic)GetMin(columnIndex, double.MaxValue, IsMissingDoubleValue, double.Parse); ; 88 } else if (typeof(T) == typeof(DateTime)) { 89 return (dynamic)GetMin(columnIndex, DateTime.MaxValue, IsMissingDoubleValue, DateTime.Parse); 90 } else { 91 throw new ArgumentException("type of T is not supported"); 92 } 58 return preprocessingData.GetValues<T>(columnIndex).Min(); 93 59 } 94 60 95 61 public T GetMax<T>(int columnIndex) where T : IComparable<T> { 96 if (!preprocessingData.IsType<double>(columnIndex)) { 97 throw new ArgumentException("column with index: " + columnIndex + " was assumed to be of type " + typeof(T).Name + " but was different."); 98 } 99 if (typeof(T) == typeof(double)) { 100 return (dynamic)GetMax(columnIndex, double.MinValue, IsMissingDoubleValue, double.Parse); ; 101 } else if (typeof(T) == typeof(DateTime)) { 102 return (dynamic)GetMax(columnIndex, DateTime.MinValue, IsMissingDoubleValue, DateTime.Parse); 103 } else { 104 throw new ArgumentException("type of T is not supported"); 105 } 62 return preprocessingData.GetValues<T>(columnIndex).Max(); 106 63 } 107 108 private T GetMin<T>(int columnIndex, T max, Func<string, bool> isMissingValueFunc, Func<string, T> parseFunc) where T : IComparable<T> {109 throw new System.NotImplementedException();110 //T min = max;111 //for (int i = 0; i < preprocessingData.Rows; ++i) {112 // var value = preprocessingData.GetValue(i, columnIndex);113 // if (!isMissingValueFunc(value)) {114 // T parsedValue = parseFunc(value);115 // if (parsedValue.CompareTo(min) < 0) {116 // min = parsedValue;117 // }118 // }119 //}120 //return min;121 }122 123 private T GetMax<T>(int columnIndex, T min, Func<string, bool> isMissingValueFunc, Func<string, T> parseFunc) where T : IComparable<T> {124 throw new System.NotImplementedException();125 //T max = min;126 //for (int i = 0; i < preprocessingData.Rows; ++i) {127 // var value = preprocessingData.GetValue(i, columnIndex);128 // if (!isMissingValueFunc(value)) {129 // T parsedValue = parseFunc(value);130 // if (parsedValue.CompareTo(min) > 0) {131 // max = parsedValue;132 // }133 // }134 //}135 //return max;136 }137 138 139 140 141 64 142 65 public double GetMedian(int columnIndex) {
Note: See TracChangeset
for help on using the changeset viewer.