Changeset 10624 for branches/DataPreprocessing
- Timestamp:
- 03/19/14 14:02:37 (11 years ago)
- File:
-
- 1 edited
Legend:
- Unmodified
- Added
- Removed
-
branches/DataPreprocessing/HeuristicLab.DataPreprocessing/3.3/Implementations/StatisticsLogic.cs
r10613 r10624 25 25 using HeuristicLab.Common; 26 26 27 namespace HeuristicLab.DataPreprocessing { 28 29 public class StatisticsLogic : IStatisticsLogic { 27 namespace HeuristicLab.DataPreprocessing 28 { 29 30 public class StatisticsLogic : IStatisticsLogic 31 { 30 32 31 33 private readonly ITransactionalPreprocessingData preprocessingData; 32 34 private readonly ISearchLogic searchLogic; 33 35 34 public StatisticsLogic(ITransactionalPreprocessingData thePreprocessingData, ISearchLogic theSearchLogic) { 36 public StatisticsLogic(ITransactionalPreprocessingData thePreprocessingData, ISearchLogic theSearchLogic) 37 { 35 38 preprocessingData = thePreprocessingData; 36 39 searchLogic = theSearchLogic; 37 40 } 38 41 39 public int GetColumnCount() { 42 public int GetColumnCount() 43 { 40 44 return preprocessingData.Columns; 41 45 } 42 46 43 public int GetRowCount() { 47 public int GetRowCount() 48 { 44 49 return preprocessingData.Rows; 45 50 } 46 51 47 public int GetNumericColumnCount() { 52 public int GetNumericColumnCount() 53 { 48 54 int count = 0; 49 55 50 for (int i = 0; i < preprocessingData.Columns; ++i) { 51 if (preprocessingData.IsType<double>(i)) { 56 for (int i = 0; i < preprocessingData.Columns; ++i) 57 { 58 if (preprocessingData.IsType<double>(i)) 59 { 52 60 ++count; 53 61 } … … 56 64 } 57 65 58 public int GetNominalColumnCount() { 66 public int GetNominalColumnCount() 67 { 59 68 return preprocessingData.Columns - GetNumericColumnCount(); 60 69 } 61 70 62 public int GetMissingValueCount() { 71 public int GetMissingValueCount() 72 { 63 73 int count = 0; 64 for (int i = 0; i < preprocessingData.Columns; ++i) { 74 for (int i = 0; i < preprocessingData.Columns; ++i) 75 { 65 76 count += GetMissingValueCount(i); 66 77 } … … 68 79 } 69 80 70 public int GetMissingValueCount(int columnIndex) { 81 public int GetMissingValueCount(int columnIndex) 82 { 71 83 return searchLogic.GetMissingValueIndices(columnIndex).Count(); 72 84 } 73 85 74 public T GetMin<T>(int columnIndex) where T : IComparable<T> { 86 public T GetMin<T>(int columnIndex) where T : IComparable<T> 87 { 75 88 return preprocessingData.GetValues<T>(columnIndex).Min(); 76 89 } 77 90 78 public T GetMax<T>(int columnIndex) where T : IComparable<T> { 91 public T GetMax<T>(int columnIndex) where T : IComparable<T> 92 { 79 93 return preprocessingData.GetValues<T>(columnIndex).Max(); 80 94 } 81 95 82 public double GetMedian(int columnIndex) { 96 public double GetMedian(int columnIndex) 97 { 83 98 double median = double.NaN; 84 if (preprocessingData.IsType<double>(columnIndex)) { 85 median = preprocessingData.GetValues<double>(columnIndex).Median(); 99 if (preprocessingData.IsType<double>(columnIndex)) 100 { 101 median = GetValuesWithoutNaN<double>(columnIndex).Median(); 86 102 } 87 103 return median; 88 104 } 89 105 90 public double GetAverage(int columnIndex) { 106 public double GetAverage(int columnIndex) 107 { 91 108 double avg = double.NaN; 92 if (preprocessingData.IsType<double>(columnIndex)) { 93 avg = preprocessingData.GetValues<double>(columnIndex).Average(); 109 if (preprocessingData.IsType<double>(columnIndex)) 110 { 111 avg = GetValuesWithoutNaN<double>(columnIndex).Where(x => !double.IsNaN(x)).Average(); 94 112 } 95 113 return avg; 96 114 } 97 115 98 public DateTime GetMedianDateTime(int columnIndex) { 116 public DateTime GetMedianDateTime(int columnIndex) 117 { 99 118 DateTime median = new DateTime(); 100 if (preprocessingData.IsType<DateTime>(columnIndex)) { 119 if (preprocessingData.IsType<DateTime>(columnIndex)) 120 { 101 121 median = GetSecondsAsDateTime(GetDateTimeAsSeconds(columnIndex).Median()); 102 122 } … … 104 124 } 105 125 106 public DateTime GetAverageDateTime(int columnIndex) { 126 public DateTime GetAverageDateTime(int columnIndex) 127 { 107 128 DateTime avg = new DateTime(); 108 if (preprocessingData.IsType<DateTime>(columnIndex)) { 129 if (preprocessingData.IsType<DateTime>(columnIndex)) 130 { 109 131 avg = GetSecondsAsDateTime(GetDateTimeAsSeconds(columnIndex).Average()); 110 132 } … … 112 134 } 113 135 114 public T GetMostCommonValue<T>(int columnIndex) { 136 public T GetMostCommonValue<T>(int columnIndex) 137 { 115 138 var t = preprocessingData.GetValues<T>(columnIndex); 116 139 var t2 = t.GroupBy(x => x); … … 125 148 126 149 127 public double GetStandardDeviation(int columnIndex) { 150 public double GetStandardDeviation(int columnIndex) 151 { 128 152 double stdDev = double.NaN; 129 if (preprocessingData.IsType<double>(columnIndex)) { 130 stdDev = preprocessingData.GetValues<double>(columnIndex).StandardDeviation(); 131 } else if (preprocessingData.IsType<DateTime>(columnIndex)) { 153 if (preprocessingData.IsType<double>(columnIndex)) 154 { 155 stdDev = GetValuesWithoutNaN<double>(columnIndex).StandardDeviation(); 156 } 157 else if (preprocessingData.IsType<DateTime>(columnIndex)) 158 { 132 159 stdDev = GetDateTimeAsSeconds(columnIndex).StandardDeviation(); 133 160 } … … 135 162 } 136 163 137 public double GetVariance(int columnIndex) { 164 public double GetVariance(int columnIndex) 165 { 138 166 double variance = double.NaN; 139 if (preprocessingData.IsType<double>(columnIndex)) { 167 if (preprocessingData.IsType<double>(columnIndex)) 168 { 140 169 variance = preprocessingData.GetValues<double>(columnIndex).Variance(); 141 } else if (preprocessingData.IsType<DateTime>(columnIndex)) { 170 } 171 else if (preprocessingData.IsType<DateTime>(columnIndex)) 172 { 142 173 variance = GetDateTimeAsSeconds(columnIndex).Variance(); 143 174 } … … 145 176 } 146 177 147 public int GetDifferentValuesCount<T>(int columnIndex) { 178 public int GetDifferentValuesCount<T>(int columnIndex) 179 { 148 180 return preprocessingData.GetValues<T>(columnIndex).GroupBy(x => x).Count(); 149 181 } 150 182 151 public int GetRowMissingValueCount(int rowIndex) { 183 public int GetRowMissingValueCount(int rowIndex) 184 { 152 185 int count = 0; 153 for (int i = 0; i < preprocessingData.Columns; ++i) { 154 if (searchLogic.IsMissingValue(i, rowIndex)) { 186 for (int i = 0; i < preprocessingData.Columns; ++i) 187 { 188 if (searchLogic.IsMissingValue(i, rowIndex)) 189 { 155 190 ++count; 156 191 } … … 159 194 } 160 195 161 public string GetVariableName(int columnIndex) { 196 public string GetVariableName(int columnIndex) 197 { 162 198 return preprocessingData.GetVariableName(columnIndex); 163 199 } 164 200 165 public bool IsType<T>(int columnIndex) { 201 public bool IsType<T>(int columnIndex) 202 { 166 203 return preprocessingData.IsType<T>(columnIndex); 167 204 } 168 205 169 public string GetColumnTypeAsString(int columnIndex) { 170 if (preprocessingData.IsType<double>(columnIndex)) { 206 public string GetColumnTypeAsString(int columnIndex) 207 { 208 if (preprocessingData.IsType<double>(columnIndex)) 209 { 171 210 return "double"; 172 } else if (preprocessingData.IsType<string>(columnIndex)) { 211 } 212 else if (preprocessingData.IsType<string>(columnIndex)) 213 { 173 214 return "string"; 174 } else if (preprocessingData.IsType<DateTime>(columnIndex)) { 215 } 216 else if (preprocessingData.IsType<DateTime>(columnIndex)) 217 { 175 218 return "DateTime"; 176 219 } 177 220 return "Unknown Type"; 178 221 } 179 private IEnumerable<double> GetDateTimeAsSeconds(int columnIndex) { 180 return preprocessingData.GetValues<DateTime>(columnIndex).Select(x => (double)x.Ticks / TimeSpan.TicksPerSecond); 181 } 182 183 private DateTime GetSecondsAsDateTime(double seconds) { 222 223 private List<T> GetValuesWithoutNaN<T>(int columnIndex) 224 { 225 IEnumerable<int> missing = searchLogic.GetMissingValueIndices(columnIndex); 226 return (List<T>)preprocessingData.GetValues<T>(columnIndex).Select((v, i) => new { i, v }).Where(x => !missing.Contains(x.i)); 227 } 228 private IEnumerable<double> GetDateTimeAsSeconds(int columnIndex) 229 { 230 return GetValuesWithoutNaN<DateTime>(columnIndex).Select(x => (double)x.Ticks / TimeSpan.TicksPerSecond); 231 } 232 233 private DateTime GetSecondsAsDateTime(double seconds) 234 { 184 235 DateTime dateTime = new DateTime(); 185 236 return dateTime.AddSeconds(seconds); 186 237 } 187 238 188 public event DataPreprocessingChangedEventHandler Changed { 239 public event DataPreprocessingChangedEventHandler Changed 240 { 189 241 add { preprocessingData.Changed += value; } 190 242 remove { preprocessingData.Changed -= value; }
Note: See TracChangeset
for help on using the changeset viewer.