using System; using System.Collections.Generic; using System.Linq; using System.Text; namespace HeuristicLab.DataPreprocessing.Implementations { class PreprocessingDataManipulation { private IPreprocessingData preprocessingData; private StatisticInfo statisticInfo; public PreprocessingDataManipulation(IPreprocessingData _prepocessingData) { preprocessingData = _prepocessingData; statisticInfo = new StatisticInfo(preprocessingData); } public void ReplaceIndicesByValue(string variableName, IEnumerable indices, T value) { foreach (int index in indices) { preprocessingData.SetCell(variableName, index, value); } } public void ReplaceIndicesByAverageValue(string variableName, IEnumerable indices) { double average = statisticInfo.GetAverage(variableName); ReplaceIndicesByValue(variableName, indices, average); } public void ReplaceIndicesByMedianValue(string variableName, IEnumerable indices) { double median = statisticInfo.GetMedian(variableName); ReplaceIndicesByValue(variableName, indices, median); } public void ReplaceIndicesByRandomValue(string variableName, IEnumerable indices) { Random r = new Random(); double max = statisticInfo.GetMax(variableName); double min = statisticInfo.GetMin(variableName); double randMultiplier = (max - min); foreach (int index in indices) { double rand = r.NextDouble() * randMultiplier + min; preprocessingData.SetCell(variableName, index, rand); } } public void ReplaceIndicesByLinearInterpolationOfNeighbours(string variableName, IEnumerable indices) { int countValues = preprocessingData.GetValues(variableName).Count(); foreach (int index in indices) { // dont replace first or last values if (index > 0 && index < countValues) { double prev = preprocessingData.GetCell(variableName, index - 1); double next = preprocessingData.GetCell(variableName, index + 1); double interpolated = (prev + next) / 2; preprocessingData.SetCell(variableName, index, interpolated); } } } public void ReplaceIndicesByMostCommonValue(string variableName, IEnumerable indices) { if (preprocessingData.IsType(variableName)) { ReplaceIndicesByValue(variableName, indices, statisticInfo.GetMostCommonValue(variableName)); } else if (preprocessingData.IsType(variableName)) { ReplaceIndicesByValue(variableName, indices, statisticInfo.GetMostCommonValue(variableName)); } else if (preprocessingData.IsType(variableName)) { ReplaceIndicesByValue(variableName, indices, statisticInfo.GetMostCommonValue(variableName)); } else { throw new ArgumentException("column with index: " + variableName + " contains a non supported type."); } } } }