Free cookie consent management tool by TermsFeed Policy Generator

source: branches/2839_HiveProjectManagement/HeuristicLab.Problems.DataAnalysis/3.4/DatasetUtil.cs @ 17021

Last change on this file since 17021 was 16057, checked in by jkarder, 6 years ago

#2839:

File size: 4.9 KB
Line 
1#region License Information
2/* HeuristicLab
3 * Copyright (C) 2002-2018 Heuristic and Evolutionary Algorithms Laboratory (HEAL)
4 *
5 * This file is part of HeuristicLab.
6 *
7 * HeuristicLab is free software: you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation, either version 3 of the License, or
10 * (at your option) any later version.
11 *
12 * HeuristicLab is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15 * GNU General Public License for more details.
16 *
17 * You should have received a copy of the GNU General Public License
18 * along with HeuristicLab. If not, see <http://www.gnu.org/licenses/>.
19 */
20#endregion
21
22using System;
23using System.Collections;
24using System.Collections.Generic;
25using System.Linq;
26using System.Linq.Expressions;
27using HeuristicLab.Common;
28using HeuristicLab.Core;
29using HeuristicLab.Random;
30
31namespace HeuristicLab.Problems.DataAnalysis {
32  using ValuesType = Dictionary<string, IList>;
33
34  public static class DatasetUtil {
35    /// <summary>
36    /// Shuffle all the lists with the same shuffling.
37    /// </summary>
38    /// <param name="values">The value lists to be shuffled.</param>
39    /// <param name="random">The random number generator</param>
40    /// <returns>A new list containing shuffled copies of the original value lists.</returns>
41    public static List<IList> ShuffleLists(this List<IList> values, IRandom random) {
42      int count = values.First().Count;
43      int[] indices = Enumerable.Range(0, count).Shuffle(random).ToArray();
44      List<IList> shuffled = new List<IList>(values.Count);
45      for (int col = 0; col < values.Count; col++) {
46
47        if (values[col] is IList<double>)
48          shuffled.Add(new List<double>());
49        else if (values[col] is IList<DateTime>)
50          shuffled.Add(new List<DateTime>());
51        else if (values[col] is IList<string>)
52          shuffled.Add(new List<string>());
53        else
54          throw new InvalidOperationException();
55
56        for (int i = 0; i < count; i++) {
57          shuffled[col].Add(values[col][indices[i]]);
58        }
59      }
60      return shuffled;
61    }
62
63    private static readonly Action<Dataset, ValuesType> setValues;
64    private static readonly Func<Dataset, ValuesType> getValues;
65    static DatasetUtil() {
66      var dataset = Expression.Parameter(typeof(Dataset));
67      var variableValues = Expression.Parameter(typeof(ValuesType));
68      var valuesExpression = Expression.Field(dataset, "variableValues");
69      var assignExpression = Expression.Assign(valuesExpression, variableValues);
70
71      var variableValuesSetExpression = Expression.Lambda<Action<Dataset, ValuesType>>(assignExpression, dataset, variableValues);
72      setValues = variableValuesSetExpression.Compile();
73
74      var variableValuesGetExpression = Expression.Lambda<Func<Dataset, ValuesType>>(valuesExpression, dataset);
75      getValues = variableValuesGetExpression.Compile();
76    }
77
78    public static void RemoveDuplicateDatasets(IContent content) {
79      var variableValuesMapping = new Dictionary<ValuesType, ValuesType>();
80
81      foreach (var problemData in content.GetObjectGraphObjects(excludeStaticMembers: true).OfType<IDataAnalysisProblemData>()) {
82        var dataset = problemData.Dataset as Dataset;
83        if (dataset == null) continue;
84
85        var originalValues = getValues(dataset);
86
87        ValuesType matchingValues;
88
89        variableValuesMapping.GetEqualValues(originalValues, out matchingValues);
90
91        setValues(dataset, matchingValues);
92      }
93    }
94
95    private static bool GetEqualValues(this Dictionary<ValuesType, ValuesType> variableValuesMapping, ValuesType originalValues, out ValuesType matchingValues) {
96      if (variableValuesMapping.ContainsKey(originalValues)) {
97        matchingValues = variableValuesMapping[originalValues];
98        return true;
99      }
100      matchingValues = variableValuesMapping.FirstOrDefault(kv => kv.Key == kv.Value && EqualVariableValues(originalValues, kv.Key)).Key;
101      bool result = true;
102      if (matchingValues == null) {
103        matchingValues = originalValues;
104        result = false;
105      }
106      variableValuesMapping[originalValues] = matchingValues;
107      return result;
108    }
109
110    private static bool EqualVariableValues(ValuesType values1, ValuesType values2) {
111      //compare variable names for equality
112      if (!values1.Keys.SequenceEqual(values2.Keys)) return false;
113      foreach (var key in values1.Keys) {
114        var v1 = values1[key];
115        var v2 = values2[key];
116        if (v1.Count != v2.Count) return false;
117        for (int i = 0; i < v1.Count; i++) {
118          if (!v1[i].Equals(v2[i])) return false;
119        }
120      }
121      return true;
122    }
123  }
124}
Note: See TracBrowser for help on using the repository browser.