Free cookie consent management tool by TermsFeed Policy Generator

source: trunk/HeuristicLab.Problems.DataAnalysis/3.4/DatasetUtil.cs @ 18242

Last change on this file since 18242 was 18208, checked in by gkronber, 3 years ago

#3134: bugfixes and improvements

File size: 5.4 KB
RevLine 
[14857]1#region License Information
2/* HeuristicLab
[17180]3 * Copyright (C) Heuristic and Evolutionary Algorithms Laboratory (HEAL)
[14857]4 *
5 * This file is part of HeuristicLab.
6 *
7 * HeuristicLab is free software: you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation, either version 3 of the License, or
10 * (at your option) any later version.
11 *
12 * HeuristicLab is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15 * GNU General Public License for more details.
16 *
17 * You should have received a copy of the GNU General Public License
18 * along with HeuristicLab. If not, see <http://www.gnu.org/licenses/>.
19 */
20#endregion
21
22using System;
23using System.Collections;
24using System.Collections.Generic;
25using System.Linq;
[15427]26using System.Linq.Expressions;
27using HeuristicLab.Common;
[14857]28using HeuristicLab.Core;
29using HeuristicLab.Random;
30
31namespace HeuristicLab.Problems.DataAnalysis {
[15427]32  using ValuesType = Dictionary<string, IList>;
33
[14857]34  public static class DatasetUtil {
35    /// <summary>
36    /// Shuffle all the lists with the same shuffling.
37    /// </summary>
38    /// <param name="values">The value lists to be shuffled.</param>
39    /// <param name="random">The random number generator</param>
40    /// <returns>A new list containing shuffled copies of the original value lists.</returns>
41    public static List<IList> ShuffleLists(this List<IList> values, IRandom random) {
42      int count = values.First().Count;
43      int[] indices = Enumerable.Range(0, count).Shuffle(random).ToArray();
44      List<IList> shuffled = new List<IList>(values.Count);
45      for (int col = 0; col < values.Count; col++) {
46
[15013]47        if (values[col] is IList<double>)
[14857]48          shuffled.Add(new List<double>());
[15013]49        else if (values[col] is IList<DateTime>)
[14857]50          shuffled.Add(new List<DateTime>());
[15013]51        else if (values[col] is IList<string>)
[14857]52          shuffled.Add(new List<string>());
53        else
54          throw new InvalidOperationException();
55
56        for (int i = 0; i < count; i++) {
57          shuffled[col].Add(values[col][indices[i]]);
58        }
59      }
60      return shuffled;
[15427]61    }
[14857]62
[15427]63    private static readonly Action<Dataset, ValuesType> setValues;
64    private static readonly Func<Dataset, ValuesType> getValues;
65    static DatasetUtil() {
66      var dataset = Expression.Parameter(typeof(Dataset));
67      var variableValues = Expression.Parameter(typeof(ValuesType));
68      var valuesExpression = Expression.Field(dataset, "variableValues");
69      var assignExpression = Expression.Assign(valuesExpression, variableValues);
70
71      var variableValuesSetExpression = Expression.Lambda<Action<Dataset, ValuesType>>(assignExpression, dataset, variableValues);
72      setValues = variableValuesSetExpression.Compile();
73
74      var variableValuesGetExpression = Expression.Lambda<Func<Dataset, ValuesType>>(valuesExpression, dataset);
75      getValues = variableValuesGetExpression.Compile();
[14857]76    }
[15427]77
78    public static void RemoveDuplicateDatasets(IContent content) {
79      var variableValuesMapping = new Dictionary<ValuesType, ValuesType>();
80
81      foreach (var problemData in content.GetObjectGraphObjects(excludeStaticMembers: true).OfType<IDataAnalysisProblemData>()) {
82        var dataset = problemData.Dataset as Dataset;
83        if (dataset == null) continue;
84
85        var originalValues = getValues(dataset);
86
87        ValuesType matchingValues;
88
89        variableValuesMapping.GetEqualValues(originalValues, out matchingValues);
90
91        setValues(dataset, matchingValues);
92      }
93    }
94
[16407]95    public static Dictionary<string, Interval> GetVariableRanges(IDataset dataset, IEnumerable<int> rows = null) {
96      Dictionary<string, Interval> variableRanges = new Dictionary<string, Interval>();
97
98      foreach (var variable in dataset.VariableNames) {
[18208]99        IEnumerable<double> values;
[16407]100        if (rows == null) values = dataset.GetDoubleValues(variable);
101        else values = dataset.GetDoubleValues(variable, rows);
102
103        var range = Interval.GetInterval(values);
104        variableRanges.Add(variable, range);
105      }
106
107      return variableRanges;
108    }
109
[15427]110    private static bool GetEqualValues(this Dictionary<ValuesType, ValuesType> variableValuesMapping, ValuesType originalValues, out ValuesType matchingValues) {
111      if (variableValuesMapping.ContainsKey(originalValues)) {
112        matchingValues = variableValuesMapping[originalValues];
113        return true;
114      }
115      matchingValues = variableValuesMapping.FirstOrDefault(kv => kv.Key == kv.Value && EqualVariableValues(originalValues, kv.Key)).Key;
116      bool result = true;
117      if (matchingValues == null) {
118        matchingValues = originalValues;
119        result = false;
120      }
121      variableValuesMapping[originalValues] = matchingValues;
122      return result;
123    }
124
125    private static bool EqualVariableValues(ValuesType values1, ValuesType values2) {
126      //compare variable names for equality
127      if (!values1.Keys.SequenceEqual(values2.Keys)) return false;
128      foreach (var key in values1.Keys) {
129        var v1 = values1[key];
130        var v2 = values2[key];
131        if (v1.Count != v2.Count) return false;
132        for (int i = 0; i < v1.Count; i++) {
133          if (!v1[i].Equals(v2[i])) return false;
134        }
135      }
136      return true;
137    }
[14857]138  }
139}
Note: See TracBrowser for help on using the repository browser.