- Timestamp:
- 12/04/13 16:33:18 (11 years ago)
- File:
-
- 1 edited
Legend:
- Unmodified
- Added
- Removed
-
branches/DataPreprocessing/HeuristicLab.DataPreprocessing/3.3/Implementations/PreprocessingData.cs
r10191 r10192 39 39 40 40 private double trainingToTestRatio; 41 private StatisticInfo statisticInfo; 41 42 42 43 private PreprocessingData(PreprocessingData original, Cloner cloner) … … 75 76 Columns = problemData.Dataset.Columns; 76 77 Rows = problemData.Dataset.Rows; 78 79 statisticInfo = new StatisticInfo(this); 77 80 } 78 81 … … 104 107 105 108 public IEnumerable<T> GetValues<T>(string variableName) { 106 return (IEnumerable<T>)variableValues[variableName];109 return (IEnumerable<T>)variableValues[variableName]; 107 110 } 108 111 … … 175 178 return GetValues<string>(variableName).Select((s, i) => new { i, s }).Where(t => string.IsNullOrEmpty(t.s)).Select(t => t.i); 176 179 } else if (IsType<DateTime>(variableName)) { 177 return GetValues<DateTime>(variableName).Select((s, i) => new { i, s }).Where(t => t.s.Equals(DateTime.MinValue)).Select(t => t.i);180 return GetValues<DateTime>(variableName).Select((s, i) => new { i, s }).Where(t => t.s.Equals(DateTime.MinValue)).Select(t => t.i); 178 181 } else { 179 182 throw new ArgumentException("column with variableName: " + variableName + " contains a non supported type."); 183 } 184 } 185 186 public void ReplaceIndicesByValue<T>(string variableName, IEnumerable<int> indices, T value) 187 { 188 foreach (int index in indices) 189 { 190 SetCell<T>(variableName, index, value); 191 } 192 } 193 194 public void ReplaceIndicesByAverageValue(string variableName, IEnumerable<int> indices) { 195 double average = statisticInfo.GetAverage(variableName); 196 ReplaceIndicesByValue<double>(variableName, indices, average); 197 } 198 199 public void ReplaceIndicesByAverageValue(string variableName, IEnumerable<int> indices) { 200 double median = statisticInfo.GetMedian(variableName); 201 ReplaceIndicesByValue<double>(variableName, indices, median); 202 } 203 204 public void ReplaceIndicesByRandomValue(string variableName, IEnumerable<int> indices) { 205 Random r = new Random(); 206 207 double max = statisticInfo.GetMax<double>(variableName); 208 double min = statisticInfo.GetMin<double>(variableName); 209 double randMultiplier = (max - min); 210 foreach (int index in indices) 211 { 212 double rand = r.NextDouble() * randMultiplier + min; 213 SetCell<double>(variableName, index, rand); 214 } 215 } 216 217 public void ReplaceIndicesByLinearInterpolationOfNeighbours(string variableName, IEnumerable<int> indices) { 218 int countValues = GetValues<double>(variableName).Count(); 219 foreach (int index in indices) 220 { 221 // dont replace first or last values 222 if (index > 0 && index < countValues) { 223 double prev = GetCell<double>(variableName, index - 1); 224 double next = GetCell<double>(variableName, index + 1); 225 226 double interpolated = (prev + next) / 2; 227 228 SetCell<double>(variableName, index, interpolated); 229 } 230 } 231 } 232 233 public void ReplaceIndicesByMostCommonValue(string variableName, IEnumerable<int> indices) { 234 if (IsType<double>(variableName)) { 235 ReplaceIndicesByValue<double>(variableName, indices,statisticInfo.GetMostCommonValue<double>(variableName)); 236 } else if (IsType<string>(variableName)) { 237 ReplaceIndicesByValue<string>(variableName, indices, statisticInfo.GetMostCommonValue<string>(variableName)); 238 } else if (IsType<DateTime>(variableName)) { 239 ReplaceIndicesByValue<DateTime>(variableName, indices, statisticInfo.GetMostCommonValue<DateTime>(variableName)); 240 } else { 241 throw new ArgumentException("column with index: " + variableName + " contains a non supported type."); 180 242 } 181 243 }
Note: See TracChangeset
for help on using the changeset viewer.