Changeset 10367 for branches/DataPreprocessing/HeuristicLab.DataPreprocessing/3.3/Implementations/PreprocessingDataManipulation.cs
- Timestamp:
- 01/22/14 14:13:11 (10 years ago)
- File:
-
- 1 edited
Legend:
- Unmodified
- Added
- Removed
-
branches/DataPreprocessing/HeuristicLab.DataPreprocessing/3.3/Implementations/PreprocessingDataManipulation.cs
r10311 r10367 3 3 using System.Linq; 4 4 using HeuristicLab.Data; 5 using System.Collections;6 5 7 6 namespace HeuristicLab.DataPreprocessing { … … 17 16 } 18 17 19 public void ReplaceIndicesByValue<T>( string variableName, IEnumerable<int> indices, T value) {20 foreach (int index in indices) {21 preprocessingData.SetCell<T>( variableName, index, value);18 public void ReplaceIndicesByValue<T>(int columnIndex, IEnumerable<int> rowIndices, T value) { 19 foreach (int index in rowIndices) { 20 preprocessingData.SetCell<T>(columnIndex, index, value); 22 21 } 23 22 } 24 23 25 public void ReplaceIndicesByAverageValue( string variableName, IEnumerable<int> indices) {26 double average = statisticInfo.GetAverage( variableName);27 ReplaceIndicesByValue<double>( variableName, indices, average);24 public void ReplaceIndicesByAverageValue(int columnIndex, IEnumerable<int> rowIndices) { 25 double average = statisticInfo.GetAverage(columnIndex); 26 ReplaceIndicesByValue<double>(columnIndex, rowIndices, average); 28 27 } 29 28 30 public void ReplaceIndicesByMedianValue( string variableName, IEnumerable<int> indices) {31 double median = statisticInfo.GetMedian( variableName);32 ReplaceIndicesByValue<double>( variableName, indices, median);29 public void ReplaceIndicesByMedianValue(int columnIndex, IEnumerable<int> rowIndices) { 30 double median = statisticInfo.GetMedian(columnIndex); 31 ReplaceIndicesByValue<double>(columnIndex, rowIndices, median); 33 32 } 34 33 35 public void ReplaceIndicesByRandomValue( string variableName, IEnumerable<int> indices) {34 public void ReplaceIndicesByRandomValue(int columnIndex, IEnumerable<int> rowIndices) { 36 35 Random r = new Random(); 37 36 38 double max = statisticInfo.GetMax<double>( variableName);39 double min = statisticInfo.GetMin<double>( variableName);37 double max = statisticInfo.GetMax<double>(columnIndex); 38 double min = statisticInfo.GetMin<double>(columnIndex); 40 39 double randMultiplier = (max - min); 41 foreach (int index in indices) {40 foreach (int index in rowIndices) { 42 41 double rand = r.NextDouble() * randMultiplier + min; 43 preprocessingData.SetCell<double>( variableName, index, rand);42 preprocessingData.SetCell<double>(columnIndex, index, rand); 44 43 } 45 44 } 46 45 47 public void ReplaceIndicesByLinearInterpolationOfNeighbours( string variableName, IEnumerable<int> indices) {48 int countValues = preprocessingData.GetValues<double>( variableName).Count();49 foreach (int index in indices) {46 public void ReplaceIndicesByLinearInterpolationOfNeighbours(int columnIndex, IEnumerable<int> rowIndices) { 47 int countValues = preprocessingData.GetValues<double>(columnIndex).Count(); 48 foreach (int index in rowIndices) { 50 49 // dont replace first or last values 51 50 if (index > 0 && index < countValues) { 52 int prevIndex = indexOfPrevPresentValue( variableName, index);53 int nextIndex = indexOfNextPresentValue( variableName, index);51 int prevIndex = indexOfPrevPresentValue(columnIndex, index); 52 int nextIndex = indexOfNextPresentValue(columnIndex, index); 54 53 55 54 // no neighbours found … … 57 56 continue; 58 57 } 59 double prev = preprocessingData.GetCell<double>( variableName, prevIndex);60 double next = preprocessingData.GetCell<double>( variableName, nextIndex);58 double prev = preprocessingData.GetCell<double>(columnIndex, prevIndex); 59 double next = preprocessingData.GetCell<double>(columnIndex, nextIndex); 61 60 62 61 int valuesToInterpolate = nextIndex - prevIndex; … … 66 65 for (int i = prevIndex; i < nextIndex; ++i) { 67 66 double interpolated = prev + (interpolationStep * (i - prevIndex)); 68 preprocessingData.SetCell<double>( variableName, i, interpolated);67 preprocessingData.SetCell<double>(columnIndex, i, interpolated); 69 68 } 70 69 } … … 72 71 } 73 72 74 private int indexOfPrevPresentValue( string variableName, int start) {73 private int indexOfPrevPresentValue(int columnIndex, int start) { 75 74 int offset = start - 1; 76 while (offset >= 0 && searchLogic.IsMissingValue( variableName, offset)) {75 while (offset >= 0 && searchLogic.IsMissingValue(columnIndex, offset)) { 77 76 offset--; 78 77 } … … 81 80 } 82 81 83 private int indexOfNextPresentValue( string variableName, int start) {82 private int indexOfNextPresentValue(int columnIndex, int start) { 84 83 int offset = start + 1; 85 while (offset < preprocessingData.Rows && searchLogic.IsMissingValue( variableName, offset)) {84 while (offset < preprocessingData.Rows && searchLogic.IsMissingValue(columnIndex, offset)) { 86 85 offset++; 87 86 } … … 90 89 } 91 90 92 public void ReplaceIndicesByMostCommonValue( string variableName, IEnumerable<int> indices) {93 if (preprocessingData.IsType<double>( variableName)) {94 ReplaceIndicesByValue<double>( variableName, indices, statisticInfo.GetMostCommonValue<double>(variableName));95 } else if (preprocessingData.IsType<string>( variableName)) {96 ReplaceIndicesByValue<string>( variableName, indices, statisticInfo.GetMostCommonValue<string>(variableName));97 } else if (preprocessingData.IsType<DateTime>( variableName)) {98 ReplaceIndicesByValue<DateTime>( variableName, indices, statisticInfo.GetMostCommonValue<DateTime>(variableName));91 public void ReplaceIndicesByMostCommonValue(int columnIndex, IEnumerable<int> rowIndices) { 92 if (preprocessingData.IsType<double>(columnIndex)) { 93 ReplaceIndicesByValue<double>(columnIndex, rowIndices, statisticInfo.GetMostCommonValue<double>(columnIndex)); 94 } else if (preprocessingData.IsType<string>(columnIndex)) { 95 ReplaceIndicesByValue<string>(columnIndex, rowIndices, statisticInfo.GetMostCommonValue<string>(columnIndex)); 96 } else if (preprocessingData.IsType<DateTime>(columnIndex)) { 97 ReplaceIndicesByValue<DateTime>(columnIndex, rowIndices, statisticInfo.GetMostCommonValue<DateTime>(columnIndex)); 99 98 } else { 100 throw new ArgumentException("column with index: " + variableName+ " contains a non supported type.");99 throw new ArgumentException("column with index: " + columnIndex + " contains a non supported type."); 101 100 } 102 101 } … … 131 130 132 131 public void reOrderToIndices(IList<System.Tuple<int, int>> indices) { 133 for each (string variableName in preprocessingData.VariableNames) {134 if (preprocessingData.IsType<double>( variableName)) {135 reOrderToIndices<double>( variableName, indices);136 } else if (preprocessingData.IsType<string>( variableName)) {137 reOrderToIndices<string>( variableName, indices);138 } else if (preprocessingData.IsType<DateTime>( variableName)) {139 reOrderToIndices<DateTime>( variableName, indices);132 for (int i = 0; i < preprocessingData.Columns; ++i) { 133 if (preprocessingData.IsType<double>(i)) { 134 reOrderToIndices<double>(i, indices); 135 } else if (preprocessingData.IsType<string>(i)) { 136 reOrderToIndices<string>(i, indices); 137 } else if (preprocessingData.IsType<DateTime>(i)) { 138 reOrderToIndices<DateTime>(i, indices); 140 139 } 141 140 } 142 141 } 143 142 144 private void reOrderToIndices<T>( string variableName, IList<Tuple<int, int>> indices) {143 private void reOrderToIndices<T>(int columnIndex, IList<Tuple<int, int>> indices) { 145 144 146 List<T> originalData = new List<T>(preprocessingData.GetValues<T>( variableName));145 List<T> originalData = new List<T>(preprocessingData.GetValues<T>(columnIndex)); 147 146 148 147 // process all columns equally … … 152 151 153 152 T replaceValue = originalData.ElementAt<T>(replaceIndex); 154 preprocessingData.SetCell<T>( variableName, originalIndex, replaceValue);153 preprocessingData.SetCell<T>(columnIndex, originalIndex, replaceValue); 155 154 } 156 155 }
Note: See TracChangeset
for help on using the changeset viewer.