- Timestamp:
- 10/25/17 12:38:12 (7 years ago)
- Location:
- branches/DataPreprocessing Cleanup/HeuristicLab.DataPreprocessing/3.4/Content
- Files:
-
- 4 edited
Legend:
- Unmodified
- Added
- Removed
-
branches/DataPreprocessing Cleanup/HeuristicLab.DataPreprocessing/3.4/Content/DataGridContent.cs
r15309 r15431 90 90 91 91 public void DeleteRows(IEnumerable<int> rows) { 92 PreprocessingData.DeleteRows (rows);92 PreprocessingData.DeleteRowsWithIndices(rows); 93 93 } 94 94 … … 134 134 135 135 #region Manipulations 136 private void ReplaceIndicesByValue(IDictionary<int, IList<int>> cells, Func<int, double> doubleAggregator = null, 137 Func<int, DateTime> dateTimeAggregator = null, Func<int, string> stringAggregator = null) { 138 PreprocessingData.InTransaction(() => { 139 foreach (var column in cells) { 140 if (doubleAggregator != null && PreprocessingData.VariableHasType<double>(column.Key)) { 141 var value = doubleAggregator(column.Key); 142 foreach (int index in column.Value) 143 PreprocessingData.SetCell<double>(column.Key, index, value); 144 } else if (dateTimeAggregator != null && PreprocessingData.VariableHasType<DateTime>(column.Key)) { 145 var value = dateTimeAggregator(column.Key); 146 foreach (int index in column.Value) 147 PreprocessingData.SetCell<DateTime>(column.Key, index, value); 148 } else if (stringAggregator != null && PreprocessingData.VariableHasType<string>(column.Key)) { 149 var value = stringAggregator(column.Key); 150 foreach (int index in column.Value) 151 PreprocessingData.SetCell<string>(column.Key, index, value); 152 } 153 } 154 }); 155 } 156 157 private void ReplaceIndicesByValues(IDictionary<int, IList<int>> cells, Func<int, IEnumerable<double>> doubleAggregator = null, 158 Func<int, IEnumerable<DateTime>> dateTimeAggregator = null, Func<int, IEnumerable<string>> stringAggregator = null) { 159 PreprocessingData.InTransaction(() => { 160 foreach (var column in cells) { 161 if (doubleAggregator != null && PreprocessingData.VariableHasType<double>(column.Key)) { 162 var values = doubleAggregator(column.Key); 163 foreach (var pair in column.Value.Zip(values, (row, value) => new { row, value })) 164 PreprocessingData.SetCell<double>(column.Key, pair.row, pair.value); 165 } else if (dateTimeAggregator != null && PreprocessingData.VariableHasType<DateTime>(column.Key)) { 166 var values = dateTimeAggregator(column.Key); 167 foreach (var pair in column.Value.Zip(values, (row, value) => new { row, value })) 168 PreprocessingData.SetCell<DateTime>(column.Key, pair.row, pair.value); 169 } else if (stringAggregator != null && PreprocessingData.VariableHasType<string>(column.Key)) { 170 var values = stringAggregator(column.Key); 171 foreach (var pair in column.Value.Zip(values, (row, value) => new { row, value })) 172 PreprocessingData.SetCell<string>(column.Key, pair.row, pair.value); 173 } 174 } 175 }); 176 } 177 136 178 public void ReplaceIndicesByMean(IDictionary<int, IList<int>> cells, bool considerSelection = false) { 137 PreprocessingData.InTransaction(() => { 138 foreach (var column in cells) { 139 PreprocessingData.DataColumns[column.Key].TypeSwitch( 140 c => { 141 var mean = c.GetMean(considerSelection ? column.Value : null); 142 foreach (var index in column.Value) c[index] = mean; 143 }, 144 dateTimeAction: c => { 145 var mean = c.GetMean(considerSelection ? column.Value : null); 146 foreach (var index in column.Value) c[index] = mean; 147 }); 148 } 149 }); 179 ReplaceIndicesByValue(cells, 180 col => PreprocessingData.GetMean<double>(col, considerSelection), 181 col => PreprocessingData.GetMean<DateTime>(col, considerSelection)); 150 182 } 151 183 152 184 public void ReplaceIndicesByMedianValue(IDictionary<int, IList<int>> cells, bool considerSelection = false) { 153 PreprocessingData.InTransaction(() => { 154 foreach (var column in cells) { 155 PreprocessingData.DataColumns[column.Key].TypeSwitch( 156 c => { 157 var median = c.GetMedian(considerSelection ? column.Value : null); 158 foreach (var index in column.Value) c[index] = median; 159 }, 160 c => { 161 var median = c.GetMedian(considerSelection ? column.Value : null); 162 foreach (var index in column.Value) c[index] = median; 163 }, 164 c => { 165 var median = c.GetMedian(considerSelection ? column.Value : null); 166 foreach (var index in column.Value) c[index] = median; 167 }); 168 } 169 }); 185 ReplaceIndicesByValue(cells, 186 col => PreprocessingData.GetMedian<double>(col, considerSelection), 187 col => PreprocessingData.GetMedian<DateTime>(col, considerSelection)); 170 188 } 171 189 172 190 public void ReplaceIndicesByMode(IDictionary<int, IList<int>> cells, bool considerSelection = false) { 173 PreprocessingData.InTransaction(() => { 174 foreach (var column in cells) { 175 PreprocessingData.DataColumns[column.Key].TypeSwitch( 176 c => { 177 var mode = c.GetMode(considerSelection ? column.Value : null); 178 foreach (var index in column.Value) c[index] = mode; 179 }, 180 c => { 181 var mode = c.GetMode(considerSelection ? column.Value : null); 182 foreach (var index in column.Value) c[index] = mode; 183 }, 184 c => { 185 var mode = c.GetMode(considerSelection ? column.Value : null); 186 foreach (var index in column.Value) c[index] = mode; 187 }); 188 } 189 }); 191 ReplaceIndicesByValue(cells, 192 col => PreprocessingData.GetMode<double>(col, considerSelection), 193 col => PreprocessingData.GetMode<DateTime>(col, considerSelection), 194 col => PreprocessingData.GetMode<string>(col, considerSelection)); 190 195 } 191 196 192 197 public void ReplaceIndicesByRandomValue(IDictionary<int, IList<int>> cells, bool considerSelection = false) { 193 198 var rand = new FastRandom(); 194 PreprocessingData.InTransaction(() => { 195 foreach (var column in cells) { 196 PreprocessingData.DataColumns[column.Key].TypeSwitch( 197 c => { 198 double min = c.GetMin(considerSelection ? column.Value : null); 199 double max = c.GetMax(considerSelection ? column.Value : null); 200 double range = max - min; 201 foreach (var index in column.Value) c[index] = min + rand.NextDouble() * range; 202 }, 203 dateTimeAction: c => { 204 var min = c.GetMin(considerSelection ? column.Value : null); 205 var max = c.GetMax(considerSelection ? column.Value : null); 206 double range = (max - min).TotalSeconds; 207 foreach (var index in column.Value) c[index] = min + TimeSpan.FromSeconds(rand.NextDouble() * range); 208 }); 209 } 210 }); 199 ReplaceIndicesByValues(cells, 200 col => { 201 double min = PreprocessingData.GetMin<double>(col, considerSelection); 202 double max = PreprocessingData.GetMax<double>(col, considerSelection); 203 double range = max - min; 204 return cells[col].Select(_ => rand.NextDouble() * range + min); 205 }, 206 col => { 207 var min = PreprocessingData.GetMin<DateTime>(col, considerSelection); 208 var max = PreprocessingData.GetMax<DateTime>(col, considerSelection); 209 double range = (max - min).TotalSeconds; 210 return cells[col].Select(_ => min + TimeSpan.FromSeconds(rand.NextDouble() * range)); 211 }); 211 212 } 212 213 … … 215 216 foreach (var column in cells) { 216 217 foreach (var rowIdx in column.Value) { 217 PreprocessingData. DataColumns[column.Key].SetValue(value, rowIdx);218 PreprocessingData.SetValue(value, column.Key, rowIdx); 218 219 } 219 220 } … … 256 257 int valuesToInterpolate = nextIndex - prevIndex; 257 258 258 PreprocessingData.DataColumns[column.Key].TypeSwitch( 259 c => { 260 double prev = c[prevIndex]; 261 double next = c[nextIndex]; 262 double interpolationStep = (next - prev) / valuesToInterpolate; 263 for (int i = prevIndex; i < nextIndex; i++) c[i] = prev + (interpolationStep * (i - prevIndex)); 264 }, 265 dateTimeAction: c => { 266 var prev = c[prevIndex]; 267 var next = c[nextIndex]; 268 double interpolationStep = (next - prev).TotalSeconds / valuesToInterpolate; 269 for (int i = prevIndex; i < nextIndex; i++) c[i] = prev.AddSeconds(interpolationStep * (i - prevIndex)); 270 } 271 ); 259 if (PreprocessingData.VariableHasType<double>(column.Key)) { 260 double prev = PreprocessingData.GetCell<double>(column.Key, prevIndex); 261 double next = PreprocessingData.GetCell<double>(column.Key, nextIndex); 262 double interpolationStep = (next - prev) / valuesToInterpolate; 263 264 for (int i = prevIndex; i < nextIndex; ++i) { 265 double interpolated = prev + (interpolationStep * (i - prevIndex)); 266 PreprocessingData.SetCell<double>(column.Key, i, interpolated); 267 } 268 } else if (PreprocessingData.VariableHasType<DateTime>(column.Key)) { 269 DateTime prev = PreprocessingData.GetCell<DateTime>(column.Key, prevIndex); 270 DateTime next = PreprocessingData.GetCell<DateTime>(column.Key, nextIndex); 271 double interpolationStep = (next - prev).TotalSeconds / valuesToInterpolate; 272 273 for (int i = prevIndex; i < nextIndex; ++i) { 274 DateTime interpolated = prev.AddSeconds(interpolationStep * (i - prevIndex)); 275 PreprocessingData.SetCell<DateTime>(column.Key, i, interpolated); 276 } 277 } 272 278 } 273 279 274 280 private int IndexOfPrevPresentValue(int columnIndex, int start) { 275 int index = start - 1; 276 while (index >= 0 && PreprocessingData.IsCellEmpty(columnIndex, index)) 277 index--; 278 return index; 281 int offset = start - 1; 282 while (offset >= 0 && PreprocessingData.IsCellEmpty(columnIndex, offset)) { 283 offset--; 284 } 285 286 return offset; 279 287 } 280 288 281 289 private int IndexOfNextPresentValue(int columnIndex, int start) { 282 int index = start + 1; 283 while (index < PreprocessingData.Rows && PreprocessingData.IsCellEmpty(columnIndex, index)) 284 index++; 285 return index; 290 int offset = start + 1; 291 while (offset < PreprocessingData.Rows && PreprocessingData.IsCellEmpty(columnIndex, offset)) { 292 offset++; 293 } 294 295 return offset; 286 296 } 287 297 … … 293 303 PreprocessingData.InTransaction(() => { 294 304 // process all given ranges - e.g. TrainingPartition, TestPartition 295 foreach ( varrange in ranges) {305 foreach (IntRange range in ranges) { 296 306 var indices = Enumerable.Range(0, PreprocessingData.Rows).ToArray(); 297 307 var shuffledIndices = Enumerable.Range(range.Start, range.Size).Shuffle(random).ToArray(); … … 314 324 public void ReOrderToIndices(int[] indices) { 315 325 PreprocessingData.InTransaction(() => { 316 for each (var column in PreprocessingData.DataColumns) {317 column.TypeSwitch(318 c => {319 if (indices.Length != c.Values.Count) throw new InvalidOperationException("The number of provided indices does not match the values.");320 var originalData = new List<double>(c.Values);321 for (int i = 0; i < indices.Length; i++) c[i] = originalData[indices[i]];322 },323 c => {324 if (indices.Length != c.Values.Count) throw new InvalidOperationException("The number of provided indices does not match the values.");325 var originalData = new List<string>(c.Values);326 for (int i = 0; i < indices.Length; i++) c[i] = originalData[indices[i]]; 327 },328 c => {329 if (indices.Length != c.Values.Count) throw new InvalidOperationException("The number of provided indices does not match the values.");330 var originalData = new List<DateTime>(c.Values); 331 for (int i = 0; i < indices.Length; i++) c[i] = originalData[indices[i]];332 });333 }334 } );326 for (int i = 0; i < PreprocessingData.Columns; ++i) { 327 if (PreprocessingData.VariableHasType<double>(i)) 328 ReOrderToIndices<double>(i, indices); 329 else if (PreprocessingData.VariableHasType<string>(i)) 330 ReOrderToIndices<string>(i, indices); 331 else if (PreprocessingData.VariableHasType<DateTime>(i)) 332 ReOrderToIndices<DateTime>(i, indices); 333 } 334 }); 335 } 336 337 private void ReOrderToIndices<T>(int columnIndex, int[] indices) { 338 var originalData = new List<T>(PreprocessingData.GetValues<T>(columnIndex)); 339 if (indices.Length != originalData.Count) throw new InvalidOperationException("The number of provided indices does not match the values."); 340 341 for (int i = 0; i < indices.Length; i++) { 342 T newValue = originalData[indices[i]]; 343 PreprocessingData.SetCell<T>(columnIndex, i, newValue); 344 } 335 345 } 336 346 #endregion -
branches/DataPreprocessing Cleanup/HeuristicLab.DataPreprocessing/3.4/Content/ManipulationContent.cs
r15309 r15431 57 57 58 58 for (int i = 0; i < PreprocessingData.Rows; ++i) { 59 int missingCount = 0; 60 for (var col = 0; col < PreprocessingData.DataColumns.Count; col++) { 61 if (!PreprocessingData.DataColumns[col].IsValidValue(i)) 62 missingCount++; 59 int missingCount = PreprocessingData.GetRowMissingValueCount(i); 60 if (100f / PreprocessingData.Columns * missingCount > percent) { 61 rows.Add(i); 63 62 } 64 if (100f / PreprocessingData.Columns * missingCount > percent)65 rows.Add(i);66 63 } 67 64 … … 72 69 List<int> columns = new List<int>(); 73 70 for (int i = 0; i < PreprocessingData.Columns; ++i) { 74 int missingCount = PreprocessingData. DataColumns[i].GetNumberOfMissingValues();71 int missingCount = PreprocessingData.GetMissingValueCount(i); 75 72 if (100f / PreprocessingData.Rows * missingCount > percent) { 76 73 columns.Add(i); … … 83 80 public List<int> ColumnsWithVarianceSmaller(double variance) { 84 81 List<int> columns = new List<int>(); 85 86 for (int i = 0; i < PreprocessingData.Columns; i++) { 87 if (PreprocessingData.DataColumns[i].TypeSwitch<bool>( 88 c => c.GetVariance() < variance, 89 c => false, 90 c => c.GetVariance().Ticks / TimeSpan.TicksPerSecond < variance 91 )) 92 columns.Add(i); 82 for (int i = 0; i < PreprocessingData.Columns; ++i) { 83 if (PreprocessingData.VariableHasType<double>(i)) { 84 double columnVariance = PreprocessingData.GetVariance<double>(i); 85 if (columnVariance < variance) { 86 columns.Add(i); 87 } 88 } else if (PreprocessingData.VariableHasType<DateTime>(i)) { 89 double columnVariance = (double)PreprocessingData.GetVariance<DateTime>(i).Ticks / TimeSpan.TicksPerSecond; 90 if (columnVariance < variance) { 91 columns.Add(i); 92 } 93 } 93 94 } 94 95 95 return columns; 96 96 } … … 119 119 PreprocessingData.InTransaction(() => { 120 120 foreach (int column in columns.OrderByDescending(x => x)) { 121 PreprocessingData.D ataColumns.RemoveAt(column);121 PreprocessingData.DeleteColumn(column); 122 122 } 123 123 }); -
branches/DataPreprocessing Cleanup/HeuristicLab.DataPreprocessing/3.4/Content/PreprocessingChartContent.cs
r15309 r15431 82 82 83 83 public static DataRow CreateDataRow(IFilteredPreprocessingData preprocessingData, string variableName, DataRowVisualProperties.DataRowChartType chartType) { 84 varvalues = preprocessingData.GetValues<double>(preprocessingData.GetColumnIndex(variableName));84 IList<double> values = preprocessingData.GetValues<double>(preprocessingData.GetColumnIndex(variableName)); 85 85 DataRow row = new DataRow(variableName, "", values); 86 86 row.VisualProperties.ChartType = chartType; -
branches/DataPreprocessing Cleanup/HeuristicLab.DataPreprocessing/3.4/Content/ScatterPlotContent.cs
r15309 r15431 21 21 22 22 using System; 23 using System.Collections.Generic; 23 24 using System.Linq; 24 25 using HeuristicLab.Analysis; … … 50 51 #endregion 51 52 52 public static ScatterPlot CreateScatterPlot(IFilteredPreprocessingData preprocessingData, string variableNameX, string variableNameY, 53 string variableNameGroup = "-", LegendOrder legendOrder = LegendOrder.Alphabetically) { 53 public static ScatterPlot CreateScatterPlot(IFilteredPreprocessingData preprocessingData, string variableNameX, string variableNameY, string variableNameGroup = "-", LegendOrder legendOrder = LegendOrder.Alphabetically) { 54 54 ScatterPlot scatterPlot = new ScatterPlot(); 55 55 56 varxValues = preprocessingData.GetValues<double>(preprocessingData.GetColumnIndex(variableNameX));57 varyValues = preprocessingData.GetValues<double>(preprocessingData.GetColumnIndex(variableNameY));56 IList<double> xValues = preprocessingData.GetValues<double>(preprocessingData.GetColumnIndex(variableNameX)); 57 IList<double> yValues = preprocessingData.GetValues<double>(preprocessingData.GetColumnIndex(variableNameY)); 58 58 59 59 var points = xValues.Zip(yValues, (x, y) => new Point2D<double>(x, y)).ToList();
Note: See TracChangeset
for help on using the changeset viewer.