Changeset 10715 for branches/DataPreprocessing
- Timestamp:
- 04/02/14 15:48:53 (11 years ago)
- Location:
- branches/DataPreprocessing/HeuristicLab.DataPreprocessing/3.3
- Files:
-
- 2 edited
Legend:
- Unmodified
- Added
- Removed
-
branches/DataPreprocessing/HeuristicLab.DataPreprocessing/3.3/Implementations/ManipulationLogic.cs
r10711 r10715 257 257 258 258 259 public List<int> RowsWithMissingValuesGreater(double percent) { 260 261 List<int> rows= new List<int>(); 262 263 for (int i = preprocessingData.Rows - 1; i >= 0; --i) 264 { 265 int missingCount = statisticsLogic.GetRowMissingValueCount(i); 266 if (100f / preprocessingData.Columns * missingCount >= percent) 267 { 268 rows.Add(i); 269 } 270 } 271 272 return rows; 273 } 274 275 public List<int> ColumnsWithMissingValuesGreater(double percent) { 276 277 List<int> columns = new List<int>(); 278 for (int i = preprocessingData.Columns - 1; i >= 0; --i) 279 { 280 int missingCount = statisticsLogic.GetMissingValueCount(i); 281 if (100f / preprocessingData.Columns * missingCount >= percent) 282 { 283 columns.Add(i); 284 } 285 } 286 287 return columns; 288 } 289 290 public List<int> ColumnsWithVarianceSmaller(double variance) { 291 292 List<int> columns = new List<int>(); 293 for (int i = preprocessingData.Columns - 1; i >= 0; --i) 294 { 295 if (preprocessingData.IsType<double>(i) || preprocessingData.IsType<DateTime>(i)) 296 { 297 double columnVariance = statisticsLogic.GetVariance(i); 298 if (columnVariance < variance) 299 { 300 columns.Add(i); 301 } 302 } 303 } 304 return columns; 305 } 306 259 307 public void DeleteRowsWithMissingValuesGreater(double percent) { 260 for (int i = 0; i < preprocessingData.Rows; ++i) { 261 int missingCount = statisticsLogic.GetRowMissingValueCount(i); 262 if (100f / preprocessingData.Columns * missingCount >= percent) { 263 preprocessingData.DeleteRow(i); 264 --i; 265 } 266 } 267 } 268 269 public void DeleteColumnsWithMissingValuesGreater(float percent) { 270 for (int i = 0; i < preprocessingData.Columns; ++i) { 271 int missingCount = statisticsLogic.GetMissingValueCount(i); 272 if (100f / preprocessingData.Columns * missingCount >= percent) { 273 preprocessingData.DeleteColumn(i); 274 --i; 275 } 276 } 308 DeleteRows(RowsWithMissingValuesGreater(percent)); 309 } 310 311 public void DeleteColumnsWithMissingValuesGreater(double percent) { 312 DeleteColumns(ColumnsWithMissingValuesGreater(percent)); 277 313 } 278 314 279 315 public void DeleteColumnsWithVarianceSmaller(double variance) { 280 for (int i = 0; i < preprocessingData.Columns; ++i) { 281 if (preprocessingData.IsType<double>(i) || preprocessingData.IsType<DateTime>(i)) { 282 double columnVariance = statisticsLogic.GetVariance(i); 283 if (columnVariance < variance) { 284 preprocessingData.DeleteColumn(i); 285 --i; 286 } 287 } 288 } 289 } 290 316 DeleteColumns(ColumnsWithVarianceSmaller(variance)); 317 } 318 319 private void DeleteRows(IEnumerable<int> rows) { 320 preprocessingData.InTransaction(() => 321 { 322 foreach (int row in rows) 323 { 324 preprocessingData.DeleteRow(row); 325 } 326 }); 327 } 328 329 private void DeleteColumns(IEnumerable<int> columns) { 330 preprocessingData.InTransaction(() => 331 { 332 foreach (int column in columns) 333 { 334 preprocessingData.DeleteColumn(column); 335 } 336 }); 337 } 291 338 } 292 339 } -
branches/DataPreprocessing/HeuristicLab.DataPreprocessing/3.3/Interfaces/IManipulationLogic.cs
r10711 r10715 36 36 void ShuffleWithRanges(); 37 37 void ShuffleWithRanges(IEnumerable<HeuristicLab.Data.IntRange> ranges); 38 List<int> RowsWithMissingValuesGreater(double percent); 39 List<int> ColumnsWithMissingValuesGreater(double percent); 40 List<int> ColumnsWithVarianceSmaller(double variance); 38 41 void DeleteRowsWithMissingValuesGreater(double percent); 39 void DeleteColumnsWithMissingValuesGreater( floatpercent);42 void DeleteColumnsWithMissingValuesGreater(double percent); 40 43 void DeleteColumnsWithVarianceSmaller(double variance); 41 44 }
Note: See TracChangeset
for help on using the changeset viewer.