- Timestamp:
- 05/07/14 15:22:41 (11 years ago)
- Location:
- branches/DataPreprocessing/HeuristicLab.DataPreprocessing/3.3
- Files:
-
- 2 edited
Legend:
- Unmodified
- Added
- Removed
-
branches/DataPreprocessing/HeuristicLab.DataPreprocessing/3.3/Implementations/ManipulationLogic.cs
r10811 r10820 109 109 } 110 110 111 IList<Tuple<int, int>> startEndings = GetStartAndEndingsForInterpolation(column); 112 foreach (var tuple in startEndings) { 113 Interpolate(column, tuple.Item1, tuple.Item2); 114 } 115 } 116 }); 117 } 118 119 private List<Tuple<int, int>> GetStartAndEndingsForInterpolation(KeyValuePair<int, IList<int>> column) { 120 List<Tuple<int, int>> startEndings = new List<Tuple<int, int>>(); 121 var rowIndices = column.Value; 122 rowIndices = rowIndices.OrderBy(x => x).ToList(); 123 var count = rowIndices.Count; 124 int start = int.MinValue; 125 for (int i = 0; i < count; ++i) { 126 if (start == int.MinValue) { 127 start = indexOfPrevPresentValue(column.Key, rowIndices[i]); 128 } 129 if (i + 1 == count || (i + 1 < count && rowIndices[i + 1] - rowIndices[i] > 1)) { 130 int next = indexOfNextPresentValue(column.Key, rowIndices[i]); 131 if (start > 0 && next < preprocessingData.Rows) { 132 startEndings.Add(new Tuple<int, int>(start, next)); 133 } 134 start = int.MinValue; 135 } 136 } 137 return startEndings; 138 } 139 140 public void ReplaceIndicesBySmoothing(IDictionary<int, IList<int>> cells) { 141 preprocessingData.InTransaction(() => { 142 foreach (var column in cells) { 143 int countValues = preprocessingData.Rows; 144 111 145 foreach (int index in column.Value) { 112 146 // dont replace first or last values … … 116 150 117 151 // no neighbours found 118 if (prevIndex < 0 &&nextIndex >= countValues) {152 if (prevIndex < 0 || nextIndex >= countValues) { 119 153 continue; 120 154 } 121 155 122 int valuesToInterpolate = nextIndex - prevIndex; 123 124 if (preprocessingData.IsType<double>(column.Key)) { 125 double prev = preprocessingData.GetCell<double>(column.Key, prevIndex); 126 double next = preprocessingData.GetCell<double>(column.Key, nextIndex); 127 double interpolationStep = (next - prev) / valuesToInterpolate; 128 129 for (int i = prevIndex; i < nextIndex; ++i) { 130 double interpolated = prev + (interpolationStep * (i - prevIndex)); 131 preprocessingData.SetCell<double>(column.Key, i, interpolated); 132 } 133 } else if (preprocessingData.IsType<DateTime>(column.Key)) { 134 DateTime prev = preprocessingData.GetCell<DateTime>(column.Key, prevIndex); 135 DateTime next = preprocessingData.GetCell<DateTime>(column.Key, nextIndex); 136 double interpolationStep = (next - prev).TotalSeconds / valuesToInterpolate; 137 138 for (int i = prevIndex; i < nextIndex; ++i) { 139 DateTime interpolated = prev.AddSeconds(interpolationStep * (i - prevIndex)); 140 preprocessingData.SetCell<DateTime>(column.Key, i, interpolated); 141 } 142 } 156 Interpolate(column, prevIndex, nextIndex); 143 157 } 144 158 } 145 159 } 146 160 }); 161 } 162 163 private void Interpolate(KeyValuePair<int, IList<int>> column, int prevIndex, int nextIndex) { 164 int valuesToInterpolate = nextIndex - prevIndex; 165 166 if (preprocessingData.IsType<double>(column.Key)) { 167 double prev = preprocessingData.GetCell<double>(column.Key, prevIndex); 168 double next = preprocessingData.GetCell<double>(column.Key, nextIndex); 169 double interpolationStep = (next - prev) / valuesToInterpolate; 170 171 for (int i = prevIndex; i < nextIndex; ++i) { 172 double interpolated = prev + (interpolationStep * (i - prevIndex)); 173 preprocessingData.SetCell<double>(column.Key, i, interpolated); 174 } 175 } else if (preprocessingData.IsType<DateTime>(column.Key)) { 176 DateTime prev = preprocessingData.GetCell<DateTime>(column.Key, prevIndex); 177 DateTime next = preprocessingData.GetCell<DateTime>(column.Key, nextIndex); 178 double interpolationStep = (next - prev).TotalSeconds / valuesToInterpolate; 179 180 for (int i = prevIndex; i < nextIndex; ++i) { 181 DateTime interpolated = prev.AddSeconds(interpolationStep * (i - prevIndex)); 182 preprocessingData.SetCell<DateTime>(column.Key, i, interpolated); 183 } 184 } 147 185 } 148 186 … … 232 270 } 233 271 234 public void ShuffleToIndices(IList<System.Tuple<int, int>> indices) 235 { 236 preprocessingData.InTransaction(() => 237 { 238 for (int i = 0; i < preprocessingData.Columns; ++i) 239 { 240 if (preprocessingData.IsType<double>(i)) 241 { 272 public void ShuffleToIndices(IList<System.Tuple<int, int>> indices) { 273 preprocessingData.InTransaction(() => { 274 for (int i = 0; i < preprocessingData.Columns; ++i) { 275 if (preprocessingData.IsType<double>(i)) { 242 276 ShuffleToIndices<double>(i, indices); 243 } 244 else if (preprocessingData.IsType<string>(i)) 245 { 277 } else if (preprocessingData.IsType<string>(i)) { 246 278 ShuffleToIndices<string>(i, indices); 247 } 248 else if (preprocessingData.IsType<DateTime>(i)) 249 { 279 } else if (preprocessingData.IsType<DateTime>(i)) { 250 280 ShuffleToIndices<DateTime>(i, indices); 251 281 } … … 268 298 } 269 299 270 private void ShuffleToIndices<T>(int columnIndex, IList<Tuple<int, int>> indices) 271 { 300 private void ShuffleToIndices<T>(int columnIndex, IList<Tuple<int, int>> indices) { 272 301 // process all columns equally 273 foreach (Tuple<int, int> index in indices) 274 { 302 foreach (Tuple<int, int> index in indices) { 275 303 int originalIndex = index.Item1; 276 304 int replaceIndex = index.Item2; … … 297 325 public List<int> RowsWithMissingValuesGreater(double percent) { 298 326 299 List<int> rows= new List<int>(); 300 301 for (int i = 0; i < preprocessingData.Rows; ++i) 302 { 327 List<int> rows = new List<int>(); 328 329 for (int i = 0; i < preprocessingData.Rows; ++i) { 303 330 int missingCount = statisticsLogic.GetRowMissingValueCount(i); 304 if (100f / preprocessingData.Columns * missingCount > percent) 305 { 331 if (100f / preprocessingData.Columns * missingCount > percent) { 306 332 rows.Add(i); 307 333 } … … 328 354 List<int> columns = new List<int>(); 329 355 for (int i = 0; i < preprocessingData.Columns; ++i) { 330 if (preprocessingData.IsType<double>(i) || preprocessingData.IsType<DateTime>(i)) 331 { 356 if (preprocessingData.IsType<double>(i) || preprocessingData.IsType<DateTime>(i)) { 332 357 double columnVariance = statisticsLogic.GetVariance(i); 333 if (columnVariance < variance) 334 { 358 if (columnVariance < variance) { 335 359 columns.Add(i); 336 360 } … … 355 379 rows.Sort(); 356 380 rows.Reverse(); 357 preprocessingData.InTransaction(() => 358 { 359 foreach (int row in rows) 360 { 381 preprocessingData.InTransaction(() => { 382 foreach (int row in rows) { 361 383 preprocessingData.DeleteRow(row); 362 384 } … … 367 389 columns.Sort(); 368 390 columns.Reverse(); 369 preprocessingData.InTransaction(() => 370 { 371 foreach (int column in columns) 372 { 391 preprocessingData.InTransaction(() => { 392 foreach (int column in columns) { 373 393 preprocessingData.DeleteColumn(column); 374 394 } -
branches/DataPreprocessing/HeuristicLab.DataPreprocessing/3.3/Interfaces/IManipulationLogic.cs
r10811 r10820 29 29 void ShuffleToIndices(IList<System.Tuple<int, int>> indices); 30 30 void ReplaceIndicesByAverageValue(IDictionary<int, IList<int>> cells, bool considerSelection = false); 31 void ReplaceIndicesByLinearInterpolationOfNeighbours(IDictionary<int, IList<int>> cells);32 31 void ReplaceIndicesByMedianValue(IDictionary<int, IList<int>> cells, bool considerSelection = false); 33 32 void ReplaceIndicesByMostCommonValue(IDictionary<int, IList<int>> cells, bool considerSelection = false); 34 33 void ReplaceIndicesByRandomValue(IDictionary<int, IList<int>> cells, bool considerSelection = false); 34 void ReplaceIndicesByLinearInterpolationOfNeighbours(IDictionary<int, IList<int>> cells); 35 void ReplaceIndicesBySmoothing(IDictionary<int, IList<int>> cells); 35 36 void ReplaceIndicesByValue(IDictionary<int, IList<int>> cells, string value); 36 37 void ReplaceIndicesByValue<T>(int columnIndex, IEnumerable<int> rowIndices, T value); … … 45 46 46 47 event DataPreprocessingChangedEventHandler Changed; 48 49 47 50 } 48 51 }
Note: See TracChangeset
for help on using the changeset viewer.