source: trunk/HeuristicLab.Problems.DataAnalysis/3.4/DatasetUtil.cs @ 16407

Last change on this file since 16407 was 16407, checked in by chaider, 23 months ago

#2966: Merged branch changes into trunk.

File size: 5.5 KB
Line 
1#region License Information
2/* HeuristicLab
3 * Copyright (C) 2002-2018 Heuristic and Evolutionary Algorithms Laboratory (HEAL)
4 *
5 * This file is part of HeuristicLab.
6 *
7 * HeuristicLab is free software: you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation, either version 3 of the License, or
10 * (at your option) any later version.
11 *
12 * HeuristicLab is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15 * GNU General Public License for more details.
16 *
17 * You should have received a copy of the GNU General Public License
18 * along with HeuristicLab. If not, see <http://www.gnu.org/licenses/>.
19 */
20#endregion
21
22using System;
23using System.Collections;
24using System.Collections.Generic;
25using System.Linq;
26using System.Linq.Expressions;
27using HeuristicLab.Common;
28using HeuristicLab.Core;
29using HeuristicLab.Random;
30
31namespace HeuristicLab.Problems.DataAnalysis {
32  using ValuesType = Dictionary<string, IList>;
33
34  public static class DatasetUtil {
35    /// <summary>
36    /// Shuffle all the lists with the same shuffling.
37    /// </summary>
38    /// <param name="values">The value lists to be shuffled.</param>
39    /// <param name="random">The random number generator</param>
40    /// <returns>A new list containing shuffled copies of the original value lists.</returns>
41    public static List<IList> ShuffleLists(this List<IList> values, IRandom random) {
42      int count = values.First().Count;
43      int[] indices = Enumerable.Range(0, count).Shuffle(random).ToArray();
44      List<IList> shuffled = new List<IList>(values.Count);
45      for (int col = 0; col < values.Count; col++) {
46
47        if (values[col] is IList<double>)
48          shuffled.Add(new List<double>());
49        else if (values[col] is IList<DateTime>)
50          shuffled.Add(new List<DateTime>());
51        else if (values[col] is IList<string>)
52          shuffled.Add(new List<string>());
53        else
54          throw new InvalidOperationException();
55
56        for (int i = 0; i < count; i++) {
57          shuffled[col].Add(values[col][indices[i]]);
58        }
59      }
60      return shuffled;
61    }
62
63    private static readonly Action<Dataset, ValuesType> setValues;
64    private static readonly Func<Dataset, ValuesType> getValues;
65    static DatasetUtil() {
66      var dataset = Expression.Parameter(typeof(Dataset));
67      var variableValues = Expression.Parameter(typeof(ValuesType));
68      var valuesExpression = Expression.Field(dataset, "variableValues");
69      var assignExpression = Expression.Assign(valuesExpression, variableValues);
70
71      var variableValuesSetExpression = Expression.Lambda<Action<Dataset, ValuesType>>(assignExpression, dataset, variableValues);
72      setValues = variableValuesSetExpression.Compile();
73
74      var variableValuesGetExpression = Expression.Lambda<Func<Dataset, ValuesType>>(valuesExpression, dataset);
75      getValues = variableValuesGetExpression.Compile();
76    }
77
78    public static void RemoveDuplicateDatasets(IContent content) {
79      var variableValuesMapping = new Dictionary<ValuesType, ValuesType>();
80
81      foreach (var problemData in content.GetObjectGraphObjects(excludeStaticMembers: true).OfType<IDataAnalysisProblemData>()) {
82        var dataset = problemData.Dataset as Dataset;
83        if (dataset == null) continue;
84
85        var originalValues = getValues(dataset);
86
87        ValuesType matchingValues;
88
89        variableValuesMapping.GetEqualValues(originalValues, out matchingValues);
90
91        setValues(dataset, matchingValues);
92      }
93    }
94
95    public static Dictionary<string, Interval> GetVariableRanges(IDataset dataset, IEnumerable<int> rows = null) {
96      Dictionary<string, Interval> variableRanges = new Dictionary<string, Interval>();
97
98      foreach (var variable in dataset.VariableNames) {
99        IEnumerable<double> values = null;
100
101        if (rows == null) values = dataset.GetDoubleValues(variable);
102        else values = dataset.GetDoubleValues(variable, rows);
103
104        var range = Interval.GetInterval(values);
105        variableRanges.Add(variable, range);
106      }
107
108      return variableRanges;
109    }
110
111    private static bool GetEqualValues(this Dictionary<ValuesType, ValuesType> variableValuesMapping, ValuesType originalValues, out ValuesType matchingValues) {
112      if (variableValuesMapping.ContainsKey(originalValues)) {
113        matchingValues = variableValuesMapping[originalValues];
114        return true;
115      }
116      matchingValues = variableValuesMapping.FirstOrDefault(kv => kv.Key == kv.Value && EqualVariableValues(originalValues, kv.Key)).Key;
117      bool result = true;
118      if (matchingValues == null) {
119        matchingValues = originalValues;
120        result = false;
121      }
122      variableValuesMapping[originalValues] = matchingValues;
123      return result;
124    }
125
126    private static bool EqualVariableValues(ValuesType values1, ValuesType values2) {
127      //compare variable names for equality
128      if (!values1.Keys.SequenceEqual(values2.Keys)) return false;
129      foreach (var key in values1.Keys) {
130        var v1 = values1[key];
131        var v2 = values2[key];
132        if (v1.Count != v2.Count) return false;
133        for (int i = 0; i < v1.Count; i++) {
134          if (!v1[i].Equals(v2[i])) return false;
135        }
136      }
137      return true;
138    }
139  }
140}
Note: See TracBrowser for help on using the repository browser.