#region License Information /* HeuristicLab * Copyright (C) 2002-2013 Heuristic and Evolutionary Algorithms Laboratory (HEAL) * * This file is part of HeuristicLab. * * HeuristicLab is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation, either version 3 of the License, or * (at your option) any later version. * * HeuristicLab is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with HeuristicLab. If not, see . */ #endregion using System; using System.Collections.Generic; using System.Linq; using HeuristicLab.Data; namespace HeuristicLab.DataPreprocessing { public class ManipulationLogic : IManipulationLogic { private ITransactionalPreprocessingData preprocessingData; private IStatisticsLogic statisticsLogic; private ISearchLogic searchLogic; public ManipulationLogic(ITransactionalPreprocessingData _prepocessingData, ISearchLogic theSearchLogic, IStatisticsLogic theStatisticsLogic) { preprocessingData = _prepocessingData; searchLogic = theSearchLogic; statisticsLogic = theStatisticsLogic; } public void ReplaceIndicesByValue(int columnIndex, IEnumerable rowIndices, T value) { foreach (int index in rowIndices) { preprocessingData.SetCell(columnIndex, index, value); } } public void ReplaceIndicesByAverageValue(Dictionary> cells) { preprocessingData.InTransaction(() => { foreach (var column in cells) { if (preprocessingData.IsType(column.Key)) { double average = statisticsLogic.GetAverage(column.Key); ReplaceIndicesByValue(column.Key, column.Value, average); } else if (preprocessingData.IsType(column.Key)) { DateTime average = statisticsLogic.GetAverageDateTime(column.Key); ReplaceIndicesByValue(column.Key, column.Value, average); } } }); } public void ReplaceIndicesByMedianValue(Dictionary> cells) { preprocessingData.InTransaction(() => { foreach (var column in cells) { if (preprocessingData.IsType(column.Key)) { double median = statisticsLogic.GetMedian(column.Key); ReplaceIndicesByValue(column.Key, column.Value, median); } else if (preprocessingData.IsType(column.Key)) { DateTime median = statisticsLogic.GetMedianDateTime(column.Key); ReplaceIndicesByValue(column.Key, column.Value, median); } } }); } public void ReplaceIndicesByRandomValue(Dictionary> cells) { preprocessingData.InTransaction(() => { Random r = new Random(); foreach (var column in cells) { if (preprocessingData.IsType(column.Key)) { double max = statisticsLogic.GetMax(column.Key); double min = statisticsLogic.GetMin(column.Key); double randMultiplier = (max - min); foreach (int index in column.Value) { double rand = r.NextDouble() * randMultiplier + min; preprocessingData.SetCell(column.Key, index, rand); } } else if (preprocessingData.IsType(column.Key)) { DateTime min = statisticsLogic.GetMin(column.Key); DateTime max = statisticsLogic.GetMax(column.Key); double randMultiplier = (max - min).TotalSeconds; foreach (int index in column.Value) { double rand = r.NextDouble() * randMultiplier; preprocessingData.SetCell(column.Key, index, min.AddSeconds(rand)); } } } }); } public void ReplaceIndicesByLinearInterpolationOfNeighbours(Dictionary> cells) { preprocessingData.InTransaction(() => { foreach (var column in cells) { if (preprocessingData.IsType(column.Key)) { int countValues = preprocessingData.GetValues(column.Key).Count(); foreach (int index in column.Value) { // dont replace first or last values if (index > 0 && index < countValues) { int prevIndex = indexOfPrevPresentValue(column.Key, index); int nextIndex = indexOfNextPresentValue(column.Key, index); // no neighbours found if (prevIndex < 0 && nextIndex >= countValues) { continue; } double prev = preprocessingData.GetCell(column.Key, prevIndex); double next = preprocessingData.GetCell(column.Key, nextIndex); int valuesToInterpolate = nextIndex - prevIndex; double interpolationStep = (next - prev) / valuesToInterpolate; for (int i = prevIndex; i < nextIndex; ++i) { double interpolated = prev + (interpolationStep * (i - prevIndex)); preprocessingData.SetCell(column.Key, i, interpolated); } } } } else if (preprocessingData.IsType(column.Key)) { int countValues = preprocessingData.GetValues(column.Key).Count(); foreach (int index in column.Value) { // dont replace first or last values if (index > 0 && index < countValues) { int prevIndex = indexOfPrevPresentValue(column.Key, index); int nextIndex = indexOfNextPresentValue(column.Key, index); // no neighbours found if (prevIndex < 0 && nextIndex >= countValues) { continue; } DateTime prev = preprocessingData.GetCell(column.Key, prevIndex); DateTime next = preprocessingData.GetCell(column.Key, nextIndex); int valuesToInterpolate = nextIndex - prevIndex; double interpolationStep = (next - prev).TotalSeconds / valuesToInterpolate; for (int i = prevIndex; i < nextIndex; ++i) { DateTime interpolated = prev.AddSeconds(interpolationStep * (i - prevIndex)); preprocessingData.SetCell(column.Key, i, interpolated); } } } } } }); } private int indexOfPrevPresentValue(int columnIndex, int start) { int offset = start - 1; while (offset >= 0 && searchLogic.IsMissingValue(columnIndex, offset)) { offset--; } return offset; } private int indexOfNextPresentValue(int columnIndex, int start) { int offset = start + 1; while (offset < preprocessingData.Rows && searchLogic.IsMissingValue(columnIndex, offset)) { offset++; } return offset; } public void ReplaceIndicesByMostCommonValue(Dictionary> cells) { preprocessingData.InTransaction(() => { foreach (var column in cells) { if (preprocessingData.IsType(column.Key)) { ReplaceIndicesByValue(column.Key, column.Value, statisticsLogic.GetMostCommonValue(column.Key)); } else if (preprocessingData.IsType(column.Key)) { ReplaceIndicesByValue(column.Key, column.Value, statisticsLogic.GetMostCommonValue(column.Key)); } else if (preprocessingData.IsType(column.Key)) { ReplaceIndicesByValue(column.Key, column.Value, statisticsLogic.GetMostCommonValue(column.Key)); } else { throw new ArgumentException("column with index: " + column.Key + " contains a non supported type."); } } }); } public void ShuffleWithRanges(IEnumerable ranges) { // init random outside loop Random random = new Random(); preprocessingData.InTransaction(() => { // process all given ranges - e.g. TrainingPartition, Trainingpartition foreach (IntRange range in ranges) { List> shuffledIndices = new List>(); // generate random indices used for shuffeling each column for (int i = range.End; i > range.Start; --i) { int rand = random.Next(range.Start, i); shuffledIndices.Add(new Tuple(i, rand)); } ReOrderToIndices(shuffledIndices); } }); } public void ReOrderToIndices(IEnumerable indices) { List> indicesTuple = new List>(); for (int i = 0; i < indices.Count(); ++i) { indicesTuple.Add(new Tuple(i, indices.ElementAt(i))); } ReOrderToIndices(indicesTuple); } public void ReOrderToIndices(IList> indices) { preprocessingData.InTransaction(() => { for (int i = 0; i < preprocessingData.Columns; ++i) { if (preprocessingData.IsType(i)) { reOrderToIndices(i, indices); } else if (preprocessingData.IsType(i)) { reOrderToIndices(i, indices); } else if (preprocessingData.IsType(i)) { reOrderToIndices(i, indices); } } }); } private void reOrderToIndices(int columnIndex, IList> indices) { List originalData = new List(preprocessingData.GetValues(columnIndex)); // process all columns equally foreach (Tuple index in indices) { int originalIndex = index.Item1; int replaceIndex = index.Item2; T replaceValue = originalData.ElementAt(replaceIndex); preprocessingData.SetCell(columnIndex, originalIndex, replaceValue); } } } }