#region License Information
/* HeuristicLab
* Copyright (C) Heuristic and Evolutionary Algorithms Laboratory (HEAL)
*
* This file is part of HeuristicLab.
*
* HeuristicLab is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* HeuristicLab is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with HeuristicLab. If not, see .
*/
#endregion
using System;
using System.Collections;
using System.Collections.Generic;
using System.Linq;
using System.Linq.Expressions;
using HeuristicLab.Common;
using HeuristicLab.Core;
using HeuristicLab.Random;
namespace HeuristicLab.Problems.DataAnalysis {
using ValuesType = Dictionary;
public static class DatasetUtil {
///
/// Shuffle all the lists with the same shuffling.
///
/// The value lists to be shuffled.
/// The random number generator
/// A new list containing shuffled copies of the original value lists.
public static List ShuffleLists(this List values, IRandom random) {
int count = values.First().Count;
int[] indices = Enumerable.Range(0, count).Shuffle(random).ToArray();
List shuffled = new List(values.Count);
for (int col = 0; col < values.Count; col++) {
if (values[col] is IList)
shuffled.Add(new List());
else if (values[col] is IList)
shuffled.Add(new List());
else if (values[col] is IList)
shuffled.Add(new List());
else
throw new InvalidOperationException();
for (int i = 0; i < count; i++) {
shuffled[col].Add(values[col][indices[i]]);
}
}
return shuffled;
}
private static readonly Action setValues;
private static readonly Func getValues;
static DatasetUtil() {
var dataset = Expression.Parameter(typeof(Dataset));
var variableValues = Expression.Parameter(typeof(ValuesType));
var valuesExpression = Expression.Field(dataset, "variableValues");
var assignExpression = Expression.Assign(valuesExpression, variableValues);
var variableValuesSetExpression = Expression.Lambda>(assignExpression, dataset, variableValues);
setValues = variableValuesSetExpression.Compile();
var variableValuesGetExpression = Expression.Lambda>(valuesExpression, dataset);
getValues = variableValuesGetExpression.Compile();
}
public static void RemoveDuplicateDatasets(IContent content) {
var variableValuesMapping = new Dictionary();
foreach (var problemData in content.GetObjectGraphObjects(excludeStaticMembers: true).OfType()) {
var dataset = problemData.Dataset as Dataset;
if (dataset == null) continue;
var originalValues = getValues(dataset);
ValuesType matchingValues;
variableValuesMapping.GetEqualValues(originalValues, out matchingValues);
setValues(dataset, matchingValues);
}
}
public static Dictionary GetVariableRanges(IDataset dataset, IEnumerable rows = null) {
Dictionary variableRanges = new Dictionary();
foreach (var variable in dataset.VariableNames) {
IEnumerable values = null;
if (rows == null) values = dataset.GetDoubleValues(variable);
else values = dataset.GetDoubleValues(variable, rows);
var range = Interval.GetInterval(values);
variableRanges.Add(variable, range);
}
return variableRanges;
}
private static bool GetEqualValues(this Dictionary variableValuesMapping, ValuesType originalValues, out ValuesType matchingValues) {
if (variableValuesMapping.ContainsKey(originalValues)) {
matchingValues = variableValuesMapping[originalValues];
return true;
}
matchingValues = variableValuesMapping.FirstOrDefault(kv => kv.Key == kv.Value && EqualVariableValues(originalValues, kv.Key)).Key;
bool result = true;
if (matchingValues == null) {
matchingValues = originalValues;
result = false;
}
variableValuesMapping[originalValues] = matchingValues;
return result;
}
private static bool EqualVariableValues(ValuesType values1, ValuesType values2) {
//compare variable names for equality
if (!values1.Keys.SequenceEqual(values2.Keys)) return false;
foreach (var key in values1.Keys) {
var v1 = values1[key];
var v2 = values2[key];
if (v1.Count != v2.Count) return false;
for (int i = 0; i < v1.Count; i++) {
if (!v1[i].Equals(v2[i])) return false;
}
}
return true;
}
}
}