#region License Information
/* HeuristicLab
* Copyright (C) 2002-2013 Heuristic and Evolutionary Algorithms Laboratory (HEAL)
*
* This file is part of HeuristicLab.
*
* HeuristicLab is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* HeuristicLab is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with HeuristicLab. If not, see .
*/
#endregion
using System;
using System.Collections.Generic;
using System.Linq;
using HeuristicLab.Data;
namespace HeuristicLab.DataPreprocessing {
public class ManipulationLogic : IManipulationLogic {
private ITransactionalPreprocessingData preprocessingData;
private IStatisticsLogic statisticsLogic;
private ISearchLogic searchLogic;
private IDataGridLogic dataGridLogic;
public ManipulationLogic(ITransactionalPreprocessingData _prepocessingData, ISearchLogic theSearchLogic, IStatisticsLogic theStatisticsLogic, IDataGridLogic theDataGridLogic) {
preprocessingData = _prepocessingData;
searchLogic = theSearchLogic;
statisticsLogic = theStatisticsLogic;
dataGridLogic = theDataGridLogic;
}
public void ReplaceIndicesByValue(int columnIndex, IEnumerable rowIndices, T value) {
foreach (int index in rowIndices) {
preprocessingData.SetCell(columnIndex, index, value);
}
}
public void ReplaceIndicesByAverageValue(IDictionary> cells) {
preprocessingData.InTransaction(() => {
foreach (var column in cells) {
if (preprocessingData.IsType(column.Key)) {
double average = statisticsLogic.GetAverage(column.Key);
ReplaceIndicesByValue(column.Key, column.Value, average);
} else if (preprocessingData.IsType(column.Key)) {
DateTime average = statisticsLogic.GetAverageDateTime(column.Key);
ReplaceIndicesByValue(column.Key, column.Value, average);
}
}
});
}
public void ReplaceIndicesByMedianValue(IDictionary> cells) {
preprocessingData.InTransaction(() => {
foreach (var column in cells) {
if (preprocessingData.IsType(column.Key)) {
double median = statisticsLogic.GetMedian(column.Key);
ReplaceIndicesByValue(column.Key, column.Value, median);
} else if (preprocessingData.IsType(column.Key)) {
DateTime median = statisticsLogic.GetMedianDateTime(column.Key);
ReplaceIndicesByValue(column.Key, column.Value, median);
}
}
});
}
public void ReplaceIndicesByRandomValue(IDictionary> cells) {
preprocessingData.InTransaction(() => {
Random r = new Random();
foreach (var column in cells) {
if (preprocessingData.IsType(column.Key)) {
double max = statisticsLogic.GetMax(column.Key);
double min = statisticsLogic.GetMin(column.Key);
double randMultiplier = (max - min);
foreach (int index in column.Value) {
double rand = r.NextDouble() * randMultiplier + min;
preprocessingData.SetCell(column.Key, index, rand);
}
} else if (preprocessingData.IsType(column.Key)) {
DateTime min = statisticsLogic.GetMin(column.Key);
DateTime max = statisticsLogic.GetMax(column.Key);
double randMultiplier = (max - min).TotalSeconds;
foreach (int index in column.Value) {
double rand = r.NextDouble() * randMultiplier;
preprocessingData.SetCell(column.Key, index, min.AddSeconds(rand));
}
}
}
});
}
public void ReplaceIndicesByLinearInterpolationOfNeighbours(IDictionary> cells) {
preprocessingData.InTransaction(() => {
foreach (var column in cells) {
int countValues = 0;
if (preprocessingData.IsType(column.Key)) {
countValues = preprocessingData.GetValues(column.Key).Count();
} else if (preprocessingData.IsType(column.Key)) {
countValues = preprocessingData.GetValues(column.Key).Count();
}
foreach (int index in column.Value) {
// dont replace first or last values
if (index > 0 && index < countValues) {
int prevIndex = indexOfPrevPresentValue(column.Key, index);
int nextIndex = indexOfNextPresentValue(column.Key, index);
// no neighbours found
if (prevIndex < 0 && nextIndex >= countValues) {
continue;
}
int valuesToInterpolate = nextIndex - prevIndex;
if (preprocessingData.IsType(column.Key)) {
double prev = preprocessingData.GetCell(column.Key, prevIndex);
double next = preprocessingData.GetCell(column.Key, nextIndex);
double interpolationStep = (next - prev) / valuesToInterpolate;
for (int i = prevIndex; i < nextIndex; ++i) {
double interpolated = prev + (interpolationStep * (i - prevIndex));
preprocessingData.SetCell(column.Key, i, interpolated);
}
} else if (preprocessingData.IsType(column.Key)) {
DateTime prev = preprocessingData.GetCell(column.Key, prevIndex);
DateTime next = preprocessingData.GetCell(column.Key, nextIndex);
double interpolationStep = (next - prev).TotalSeconds / valuesToInterpolate;
for (int i = prevIndex; i < nextIndex; ++i) {
DateTime interpolated = prev.AddSeconds(interpolationStep * (i - prevIndex));
preprocessingData.SetCell(column.Key, i, interpolated);
}
}
}
}
}
});
}
private int indexOfPrevPresentValue(int columnIndex, int start) {
int offset = start - 1;
while (offset >= 0 && searchLogic.IsMissingValue(columnIndex, offset)) {
offset--;
}
return offset;
}
private int indexOfNextPresentValue(int columnIndex, int start) {
int offset = start + 1;
while (offset < preprocessingData.Rows && searchLogic.IsMissingValue(columnIndex, offset)) {
offset++;
}
return offset;
}
public void ReplaceIndicesByMostCommonValue(IDictionary> cells) {
preprocessingData.InTransaction(() => {
foreach (var column in cells) {
if (preprocessingData.IsType(column.Key)) {
ReplaceIndicesByValue(column.Key, column.Value, statisticsLogic.GetMostCommonValue(column.Key));
} else if (preprocessingData.IsType(column.Key)) {
ReplaceIndicesByValue(column.Key, column.Value, statisticsLogic.GetMostCommonValue(column.Key));
} else if (preprocessingData.IsType(column.Key)) {
ReplaceIndicesByValue(column.Key, column.Value, statisticsLogic.GetMostCommonValue(column.Key));
} else {
throw new ArgumentException("column with index: " + column.Key + " contains a non supported type.");
}
}
});
}
public void ShuffleWithRanges(IEnumerable ranges) {
// init random outside loop
Random random = new Random();
preprocessingData.InTransaction(() => {
// process all given ranges - e.g. TrainingPartition, Trainingpartition
foreach (IntRange range in ranges) {
List> shuffledIndices = new List>();
// generate random indices used for shuffeling each column
for (int i = range.End; i > range.Start; --i) {
int rand = random.Next(range.Start, i);
shuffledIndices.Add(new Tuple(i, rand));
}
ReOrderToIndices(shuffledIndices);
}
});
}
public void ReOrderToIndices(IEnumerable indices) {
List> indicesTuple = new List>();
for (int i = 0; i < indices.Count(); ++i) {
indicesTuple.Add(new Tuple(i, indices.ElementAt(i)));
}
ReOrderToIndices(indicesTuple);
}
public void ReOrderToIndices(IList> indices) {
preprocessingData.InTransaction(() => {
for (int i = 0; i < preprocessingData.Columns; ++i) {
if (preprocessingData.IsType(i)) {
reOrderToIndices(i, indices);
} else if (preprocessingData.IsType(i)) {
reOrderToIndices(i, indices);
} else if (preprocessingData.IsType(i)) {
reOrderToIndices(i, indices);
}
}
});
}
private void reOrderToIndices(int columnIndex, IList> indices) {
List originalData = new List(preprocessingData.GetValues(columnIndex));
// process all columns equally
foreach (Tuple index in indices) {
int originalIndex = index.Item1;
int replaceIndex = index.Item2;
T replaceValue = originalData.ElementAt(replaceIndex);
preprocessingData.SetCell(columnIndex, originalIndex, replaceValue);
}
}
public void ReplaceIndicesByValue(IDictionary> cells, string value) {
preprocessingData.InTransaction(() => {
foreach (var column in cells) {
foreach (var rowIdx in column.Value) {
dataGridLogic.SetValue(value, column.Key, rowIdx);
}
}
});
}
}
}