#region License Information
/* HeuristicLab
* Copyright (C) 2002-2013 Heuristic and Evolutionary Algorithms Laboratory (HEAL)
*
* This file is part of HeuristicLab.
*
* HeuristicLab is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* HeuristicLab is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with HeuristicLab. If not, see .
*/
#endregion
using System;
using System.Collections.Generic;
using System.Linq;
using HeuristicLab.Data;
namespace HeuristicLab.DataPreprocessing {
public class ManipulationLogic : IManipulationLogic {
private IPreprocessingData preprocessingData;
private IStatisticsLogic statisticInfo;
private ISearchLogic searchLogic;
public ManipulationLogic(IPreprocessingData _prepocessingData, ISearchLogic theSearchLogic, IStatisticsLogic theStatisticsLogic) {
preprocessingData = _prepocessingData;
searchLogic = theSearchLogic;
statisticInfo = theStatisticsLogic;
}
public void ReplaceIndicesByValue(int columnIndex, IEnumerable rowIndices, T value) {
foreach (int index in rowIndices) {
preprocessingData.SetCell(columnIndex, index, value);
}
}
public void ReplaceIndicesByAverageValue(int columnIndex, IEnumerable rowIndices) {
double average = statisticInfo.GetAverage(columnIndex);
ReplaceIndicesByValue(columnIndex, rowIndices, average);
}
public void ReplaceIndicesByMedianValue(int columnIndex, IEnumerable rowIndices) {
double median = statisticInfo.GetMedian(columnIndex);
ReplaceIndicesByValue(columnIndex, rowIndices, median);
}
public void ReplaceIndicesByRandomValue(int columnIndex, IEnumerable rowIndices) {
Random r = new Random();
double max = statisticInfo.GetMax(columnIndex);
double min = statisticInfo.GetMin(columnIndex);
double randMultiplier = (max - min);
foreach (int index in rowIndices) {
double rand = r.NextDouble() * randMultiplier + min;
preprocessingData.SetCell(columnIndex, index, rand);
}
}
public void ReplaceIndicesByLinearInterpolationOfNeighbours(int columnIndex, IEnumerable rowIndices) {
int countValues = preprocessingData.GetValues(columnIndex).Count();
foreach (int index in rowIndices) {
// dont replace first or last values
if (index > 0 && index < countValues) {
int prevIndex = indexOfPrevPresentValue(columnIndex, index);
int nextIndex = indexOfNextPresentValue(columnIndex, index);
// no neighbours found
if (prevIndex < 0 && nextIndex >= countValues) {
continue;
}
double prev = preprocessingData.GetCell(columnIndex, prevIndex);
double next = preprocessingData.GetCell(columnIndex, nextIndex);
int valuesToInterpolate = nextIndex - prevIndex;
double interpolationStep = (prev + next) / valuesToInterpolate;
for (int i = prevIndex; i < nextIndex; ++i) {
double interpolated = prev + (interpolationStep * (i - prevIndex));
preprocessingData.SetCell(columnIndex, i, interpolated);
}
}
}
}
private int indexOfPrevPresentValue(int columnIndex, int start) {
int offset = start - 1;
while (offset >= 0 && searchLogic.IsMissingValue(columnIndex, offset)) {
offset--;
}
return offset;
}
private int indexOfNextPresentValue(int columnIndex, int start) {
int offset = start + 1;
while (offset < preprocessingData.Rows && searchLogic.IsMissingValue(columnIndex, offset)) {
offset++;
}
return offset;
}
public void ReplaceIndicesByMostCommonValue(int columnIndex, IEnumerable rowIndices) {
if (preprocessingData.IsType(columnIndex)) {
ReplaceIndicesByValue(columnIndex, rowIndices, statisticInfo.GetMostCommonValue(columnIndex));
} else if (preprocessingData.IsType(columnIndex)) {
ReplaceIndicesByValue(columnIndex, rowIndices, statisticInfo.GetMostCommonValue(columnIndex));
} else if (preprocessingData.IsType(columnIndex)) {
ReplaceIndicesByValue(columnIndex, rowIndices, statisticInfo.GetMostCommonValue(columnIndex));
} else {
throw new ArgumentException("column with index: " + columnIndex + " contains a non supported type.");
}
}
public void ShuffleWithRanges(IEnumerable ranges) {
// init random outside loop
Random random = new Random();
// process all given ranges - e.g. TrainingPartition, Trainingpartition
foreach (IntRange range in ranges) {
List> shuffledIndices = new List>();
// generate random indices used for shuffeling each column
for (int i = range.End; i > range.Start; --i) {
int rand = random.Next(range.Start, i);
shuffledIndices.Add(new Tuple(i, rand));
}
ReOrderToIndices(shuffledIndices);
}
}
public void ReOrderToIndices(IEnumerable indices) {
List> indicesTuple = new List>();
for (int i = 0; i < indices.Count(); ++i) {
indicesTuple.Add(new Tuple(i, indices.ElementAt(i)));
}
ReOrderToIndices(indicesTuple);
}
public void ReOrderToIndices(IList> indices) {
for (int i = 0; i < preprocessingData.Columns; ++i) {
if (preprocessingData.IsType(i)) {
reOrderToIndices(i, indices);
} else if (preprocessingData.IsType(i)) {
reOrderToIndices(i, indices);
} else if (preprocessingData.IsType(i)) {
reOrderToIndices(i, indices);
}
}
}
private void reOrderToIndices(int columnIndex, IList> indices) {
List originalData = new List(preprocessingData.GetValues(columnIndex));
// process all columns equally
foreach (Tuple index in indices) {
int originalIndex = index.Item1;
int replaceIndex = index.Item2;
T replaceValue = originalData.ElementAt(replaceIndex);
preprocessingData.SetCell(columnIndex, originalIndex, replaceValue);
}
}
}
}