#region License Information
/* HeuristicLab
* Copyright (C) 2002-2013 Heuristic and Evolutionary Algorithms Laboratory (HEAL)
*
* This file is part of HeuristicLab.
*
* HeuristicLab is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* HeuristicLab is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with HeuristicLab. If not, see .
*/
#endregion
using System;
using System.Collections.Generic;
using System.Linq;
using HeuristicLab.Data;
namespace HeuristicLab.DataPreprocessing {
public class ManipulationLogic : IManipulationLogic {
private ITransactionalPreprocessingData preprocessingData;
private IStatisticsLogic statisticsLogic;
private ISearchLogic searchLogic;
private IDataGridLogic dataGridLogic;
public ManipulationLogic(ITransactionalPreprocessingData _prepocessingData, ISearchLogic theSearchLogic, IStatisticsLogic theStatisticsLogic, IDataGridLogic theDataGridLogic) {
preprocessingData = _prepocessingData;
searchLogic = theSearchLogic;
statisticsLogic = theStatisticsLogic;
dataGridLogic = theDataGridLogic;
}
public void ReplaceIndicesByValue(int columnIndex, IEnumerable rowIndices, T value) {
foreach (int index in rowIndices) {
preprocessingData.SetCell(columnIndex, index, value);
}
}
public void ReplaceIndicesByAverageValue(IDictionary> cells, bool considerSelection) {
preprocessingData.InTransaction(() => {
foreach (var column in cells) {
if (preprocessingData.IsType(column.Key)) {
double average = statisticsLogic.GetAverage(column.Key, considerSelection);
ReplaceIndicesByValue(column.Key, column.Value, average);
} else if (preprocessingData.IsType(column.Key)) {
DateTime average = statisticsLogic.GetAverageDateTime(column.Key, considerSelection);
ReplaceIndicesByValue(column.Key, column.Value, average);
}
}
});
}
public void ReplaceIndicesByMedianValue(IDictionary> cells, bool considerSelection) {
preprocessingData.InTransaction(() => {
foreach (var column in cells) {
if (preprocessingData.IsType(column.Key)) {
double median = statisticsLogic.GetMedian(column.Key, considerSelection);
ReplaceIndicesByValue(column.Key, column.Value, median);
} else if (preprocessingData.IsType(column.Key)) {
DateTime median = statisticsLogic.GetMedianDateTime(column.Key, considerSelection);
ReplaceIndicesByValue(column.Key, column.Value, median);
}
}
});
}
public void ReplaceIndicesByRandomValue(IDictionary> cells, bool considerSelection) {
preprocessingData.InTransaction(() => {
Random r = new Random();
foreach (var column in cells) {
if (preprocessingData.IsType(column.Key)) {
double max = statisticsLogic.GetMax(column.Key, considerSelection);
double min = statisticsLogic.GetMin(column.Key, considerSelection);
double randMultiplier = (max - min);
foreach (int index in column.Value) {
double rand = r.NextDouble() * randMultiplier + min;
preprocessingData.SetCell(column.Key, index, rand);
}
} else if (preprocessingData.IsType(column.Key)) {
DateTime min = statisticsLogic.GetMin(column.Key, considerSelection);
DateTime max = statisticsLogic.GetMax(column.Key, considerSelection);
double randMultiplier = (max - min).TotalSeconds;
foreach (int index in column.Value) {
double rand = r.NextDouble() * randMultiplier;
preprocessingData.SetCell(column.Key, index, min.AddSeconds(rand));
}
}
}
});
}
public void ReplaceIndicesByLinearInterpolationOfNeighbours(IDictionary> cells) {
preprocessingData.InTransaction(() => {
foreach (var column in cells) {
int countValues = 0;
if (preprocessingData.IsType(column.Key)) {
countValues = preprocessingData.GetValues(column.Key, false).Count();
} else if (preprocessingData.IsType(column.Key)) {
countValues = preprocessingData.GetValues(column.Key, false).Count();
}
foreach (int index in column.Value) {
// dont replace first or last values
if (index > 0 && index < countValues) {
int prevIndex = indexOfPrevPresentValue(column.Key, index);
int nextIndex = indexOfNextPresentValue(column.Key, index);
// no neighbours found
if (prevIndex < 0 && nextIndex >= countValues) {
continue;
}
int valuesToInterpolate = nextIndex - prevIndex;
if (preprocessingData.IsType(column.Key)) {
double prev = preprocessingData.GetCell(column.Key, prevIndex);
double next = preprocessingData.GetCell(column.Key, nextIndex);
double interpolationStep = (next - prev) / valuesToInterpolate;
for (int i = prevIndex; i < nextIndex; ++i) {
double interpolated = prev + (interpolationStep * (i - prevIndex));
preprocessingData.SetCell(column.Key, i, interpolated);
}
} else if (preprocessingData.IsType(column.Key)) {
DateTime prev = preprocessingData.GetCell(column.Key, prevIndex);
DateTime next = preprocessingData.GetCell(column.Key, nextIndex);
double interpolationStep = (next - prev).TotalSeconds / valuesToInterpolate;
for (int i = prevIndex; i < nextIndex; ++i) {
DateTime interpolated = prev.AddSeconds(interpolationStep * (i - prevIndex));
preprocessingData.SetCell(column.Key, i, interpolated);
}
}
}
}
}
});
}
private int indexOfPrevPresentValue(int columnIndex, int start) {
int offset = start - 1;
while (offset >= 0 && searchLogic.IsMissingValue(columnIndex, offset)) {
offset--;
}
return offset;
}
private int indexOfNextPresentValue(int columnIndex, int start) {
int offset = start + 1;
while (offset < preprocessingData.Rows && searchLogic.IsMissingValue(columnIndex, offset)) {
offset++;
}
return offset;
}
public void ReplaceIndicesByMostCommonValue(IDictionary> cells, bool considerSelection) {
preprocessingData.InTransaction(() => {
foreach (var column in cells) {
if (preprocessingData.IsType(column.Key)) {
ReplaceIndicesByValue(column.Key, column.Value, statisticsLogic.GetMostCommonValue(column.Key, considerSelection));
} else if (preprocessingData.IsType(column.Key)) {
ReplaceIndicesByValue(column.Key, column.Value, statisticsLogic.GetMostCommonValue(column.Key, considerSelection));
} else if (preprocessingData.IsType(column.Key)) {
ReplaceIndicesByValue(column.Key, column.Value, statisticsLogic.GetMostCommonValue(column.Key, considerSelection));
} else {
throw new ArgumentException("column with index: " + column.Key + " contains a non supported type.");
}
}
});
}
public void ShuffleWithRanges() {
ShuffleWithRanges(new[] {
preprocessingData.TestPartition,
preprocessingData.TrainingPartition
});
}
public void ShuffleWithRanges(IEnumerable ranges) {
// init random outside loop
Random random = new Random();
preprocessingData.InTransaction(() => {
// process all given ranges - e.g. TrainingPartition, TestPartition
foreach (IntRange range in ranges) {
List> shuffledIndices = new List>();
// generate random indices used for shuffeling each column
for (int i = range.End - 1; i >= range.Start; --i) {
int rand = random.Next(range.Start, i);
shuffledIndices.Add(new Tuple(i, rand));
}
ShuffleToIndices(shuffledIndices);
}
});
}
public void ReOrderToIndices(IEnumerable indices) {
List> indicesTuple = new List>();
for (int i = 0; i < indices.Count(); ++i) {
indicesTuple.Add(new Tuple(i, indices.ElementAt(i)));
}
ReOrderToIndices(indicesTuple);
}
public void ReOrderToIndices(IList> indices) {
preprocessingData.InTransaction(() => {
for (int i = 0; i < preprocessingData.Columns; ++i) {
if (preprocessingData.IsType(i)) {
reOrderToIndices(i, indices);
} else if (preprocessingData.IsType(i)) {
reOrderToIndices(i, indices);
} else if (preprocessingData.IsType(i)) {
reOrderToIndices(i, indices);
}
}
});
}
public void ShuffleToIndices(IList> indices)
{
preprocessingData.InTransaction(() =>
{
for (int i = 0; i < preprocessingData.Columns; ++i)
{
if (preprocessingData.IsType(i))
{
ShuffleToIndices(i, indices);
}
else if (preprocessingData.IsType(i))
{
ShuffleToIndices(i, indices);
}
else if (preprocessingData.IsType(i))
{
ShuffleToIndices(i, indices);
}
}
});
}
private void reOrderToIndices(int columnIndex, IList> indices) {
List originalData = new List(preprocessingData.GetValues(columnIndex, false));
// process all columns equally
foreach (Tuple index in indices) {
int originalIndex = index.Item1;
int replaceIndex = index.Item2;
T replaceValue = originalData.ElementAt(replaceIndex);
preprocessingData.SetCell(columnIndex, originalIndex, replaceValue);
}
}
private void ShuffleToIndices(int columnIndex, IList> indices)
{
// process all columns equally
foreach (Tuple index in indices)
{
int originalIndex = index.Item1;
int replaceIndex = index.Item2;
T tmp = preprocessingData.GetCell(columnIndex, originalIndex);
T replaceValue = preprocessingData.GetCell(columnIndex, replaceIndex);
preprocessingData.SetCell(columnIndex, originalIndex, replaceValue);
preprocessingData.SetCell(columnIndex, replaceIndex, tmp);
}
}
public void ReplaceIndicesByValue(IDictionary> cells, string value) {
preprocessingData.InTransaction(() => {
foreach (var column in cells) {
foreach (var rowIdx in column.Value) {
dataGridLogic.SetValue(value, column.Key, rowIdx);
}
}
});
}
public List RowsWithMissingValuesGreater(double percent) {
List rows= new List();
for (int i = 0; i < preprocessingData.Rows; ++i)
{
int missingCount = statisticsLogic.GetRowMissingValueCount(i);
if (100f / preprocessingData.Columns * missingCount > percent)
{
rows.Add(i);
}
}
return rows;
}
public List ColumnsWithMissingValuesGreater(double percent) {
List columns = new List();
for (int i = 0; i < preprocessingData.Columns; ++i) {
int missingCount = statisticsLogic.GetMissingValueCount(i);
if (100f / preprocessingData.Rows * missingCount > percent) {
columns.Add(i);
}
}
return columns;
}
public List ColumnsWithVarianceSmaller(double variance) {
List columns = new List();
for (int i = 0; i < preprocessingData.Columns; ++i) {
if (preprocessingData.IsType(i) || preprocessingData.IsType(i))
{
double columnVariance = statisticsLogic.GetVariance(i);
if (columnVariance < variance)
{
columns.Add(i);
}
}
}
return columns;
}
public void DeleteRowsWithMissingValuesGreater(double percent) {
DeleteRows(RowsWithMissingValuesGreater(percent));
}
public void DeleteColumnsWithMissingValuesGreater(double percent) {
DeleteColumns(ColumnsWithMissingValuesGreater(percent));
}
public void DeleteColumnsWithVarianceSmaller(double variance) {
DeleteColumns(ColumnsWithVarianceSmaller(variance));
}
private void DeleteRows(List rows) {
rows.Sort();
rows.Reverse();
preprocessingData.InTransaction(() =>
{
foreach (int row in rows)
{
preprocessingData.DeleteRow(row);
}
});
}
private void DeleteColumns(List columns) {
columns.Sort();
columns.Reverse();
preprocessingData.InTransaction(() =>
{
foreach (int column in columns)
{
preprocessingData.DeleteColumn(column);
}
});
}
public event DataPreprocessingChangedEventHandler Changed {
add { dataGridLogic.Changed += value; }
remove { dataGridLogic.Changed -= value; }
}
}
}