#region License Information /* HeuristicLab * Copyright (C) 2002-2018 Heuristic and Evolutionary Algorithms Laboratory (HEAL) * * This file is part of HeuristicLab. * * HeuristicLab is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation, either version 3 of the License, or * (at your option) any later version. * * HeuristicLab is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with HeuristicLab. If not, see . */ #endregion using System; using System.Collections; using System.Collections.Generic; using System.Linq; using System.Linq.Expressions; using HeuristicLab.Common; using HeuristicLab.Core; using HeuristicLab.Random; namespace HeuristicLab.Problems.DataAnalysis { using ValuesType = Dictionary; public static class DatasetUtil { /// /// Shuffle all the lists with the same shuffling. /// /// The value lists to be shuffled. /// The random number generator /// A new list containing shuffled copies of the original value lists. public static List ShuffleLists(this List values, IRandom random) { int count = values.First().Count; int[] indices = Enumerable.Range(0, count).Shuffle(random).ToArray(); List shuffled = new List(values.Count); for (int col = 0; col < values.Count; col++) { if (values[col] is IList) shuffled.Add(new List()); else if (values[col] is IList) shuffled.Add(new List()); else if (values[col] is IList) shuffled.Add(new List()); else throw new InvalidOperationException(); for (int i = 0; i < count; i++) { shuffled[col].Add(values[col][indices[i]]); } } return shuffled; } private static readonly Action setValues; private static readonly Func getValues; static DatasetUtil() { var dataset = Expression.Parameter(typeof(Dataset)); var variableValues = Expression.Parameter(typeof(ValuesType)); var valuesExpression = Expression.Field(dataset, "variableValues"); var assignExpression = Expression.Assign(valuesExpression, variableValues); var variableValuesSetExpression = Expression.Lambda>(assignExpression, dataset, variableValues); setValues = variableValuesSetExpression.Compile(); var variableValuesGetExpression = Expression.Lambda>(valuesExpression, dataset); getValues = variableValuesGetExpression.Compile(); } public static void RemoveDuplicateDatasets(IContent content) { var variableValuesMapping = new Dictionary(); foreach (var problemData in content.GetObjectGraphObjects(excludeStaticMembers: true).OfType()) { var dataset = problemData.Dataset as Dataset; if (dataset == null) continue; var originalValues = getValues(dataset); ValuesType matchingValues; variableValuesMapping.GetEqualValues(originalValues, out matchingValues); setValues(dataset, matchingValues); } } private static bool GetEqualValues(this Dictionary variableValuesMapping, ValuesType originalValues, out ValuesType matchingValues) { if (variableValuesMapping.ContainsKey(originalValues)) { matchingValues = variableValuesMapping[originalValues]; return true; } matchingValues = variableValuesMapping.FirstOrDefault(kv => kv.Key == kv.Value && EqualVariableValues(originalValues, kv.Key)).Key; bool result = true; if (matchingValues == null) { matchingValues = originalValues; result = false; } variableValuesMapping[originalValues] = matchingValues; return result; } private static bool EqualVariableValues(ValuesType values1, ValuesType values2) { //compare variable names for equality if (!values1.Keys.SequenceEqual(values2.Keys)) return false; foreach (var key in values1.Keys) { var v1 = values1[key]; var v2 = values2[key]; if (v1.Count != v2.Count) return false; for (int i = 0; i < v1.Count; i++) { if (!v1[i].Equals(v2[i])) return false; } } return true; } } }