Free cookie consent management tool by TermsFeed Policy Generator

source: branches/2966_interval_calculation/HeuristicLab.Problems.DataAnalysis/3.4/DatasetUtil.cs @ 16364

Last change on this file since 16364 was 16364, checked in by chaider, 5 years ago

#2966

  • Changed signature of GetSymbolicExressionTreeIntervals methods
  • Changed PrepareInterpreterState (removed optinal parameters, takes Dictionary<string, Interval> as input and no dataset anymore)
  • Added optional parameter (rows) to GetVariableBoundaries method (allows to use just parts from dataset as input e.g. training/test indices)
File size: 5.7 KB
Line 
1#region License Information
2/* HeuristicLab
3 * Copyright (C) 2002-2018 Heuristic and Evolutionary Algorithms Laboratory (HEAL)
4 *
5 * This file is part of HeuristicLab.
6 *
7 * HeuristicLab is free software: you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation, either version 3 of the License, or
10 * (at your option) any later version.
11 *
12 * HeuristicLab is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15 * GNU General Public License for more details.
16 *
17 * You should have received a copy of the GNU General Public License
18 * along with HeuristicLab. If not, see <http://www.gnu.org/licenses/>.
19 */
20#endregion
21
22using System;
23using System.Collections;
24using System.Collections.Generic;
25using System.Linq;
26using System.Linq.Expressions;
27using HeuristicLab.Common;
28using HeuristicLab.Core;
29using HeuristicLab.Random;
30
31namespace HeuristicLab.Problems.DataAnalysis {
32  using ValuesType = Dictionary<string, IList>;
33
34  public static class DatasetUtil {
35    /// <summary>
36    /// Shuffle all the lists with the same shuffling.
37    /// </summary>
38    /// <param name="values">The value lists to be shuffled.</param>
39    /// <param name="random">The random number generator</param>
40    /// <returns>A new list containing shuffled copies of the original value lists.</returns>
41    public static List<IList> ShuffleLists(this List<IList> values, IRandom random) {
42      int count = values.First().Count;
43      int[] indices = Enumerable.Range(0, count).Shuffle(random).ToArray();
44      List<IList> shuffled = new List<IList>(values.Count);
45      for (int col = 0; col < values.Count; col++) {
46
47        if (values[col] is IList<double>)
48          shuffled.Add(new List<double>());
49        else if (values[col] is IList<DateTime>)
50          shuffled.Add(new List<DateTime>());
51        else if (values[col] is IList<string>)
52          shuffled.Add(new List<string>());
53        else
54          throw new InvalidOperationException();
55
56        for (int i = 0; i < count; i++) {
57          shuffled[col].Add(values[col][indices[i]]);
58        }
59      }
60      return shuffled;
61    }
62
63    private static readonly Action<Dataset, ValuesType> setValues;
64    private static readonly Func<Dataset, ValuesType> getValues;
65    static DatasetUtil() {
66      var dataset = Expression.Parameter(typeof(Dataset));
67      var variableValues = Expression.Parameter(typeof(ValuesType));
68      var valuesExpression = Expression.Field(dataset, "variableValues");
69      var assignExpression = Expression.Assign(valuesExpression, variableValues);
70
71      var variableValuesSetExpression = Expression.Lambda<Action<Dataset, ValuesType>>(assignExpression, dataset, variableValues);
72      setValues = variableValuesSetExpression.Compile();
73
74      var variableValuesGetExpression = Expression.Lambda<Func<Dataset, ValuesType>>(valuesExpression, dataset);
75      getValues = variableValuesGetExpression.Compile();
76    }
77
78    public static void RemoveDuplicateDatasets(IContent content) {
79      var variableValuesMapping = new Dictionary<ValuesType, ValuesType>();
80
81      foreach (var problemData in content.GetObjectGraphObjects(excludeStaticMembers: true).OfType<IDataAnalysisProblemData>()) {
82        var dataset = problemData.Dataset as Dataset;
83        if (dataset == null) continue;
84
85        var originalValues = getValues(dataset);
86
87        ValuesType matchingValues;
88
89        variableValuesMapping.GetEqualValues(originalValues, out matchingValues);
90
91        setValues(dataset, matchingValues);
92      }
93    }
94
95    public static Dictionary<string, Interval> GetVariableBoundaries(IDataset dataset, IEnumerable<int> rows = null) {
96      Dictionary<string, Interval> variableBoundaries = new Dictionary<string, Interval>();
97
98      foreach (var variable in dataset.VariableNames) {
99        var min = double.MaxValue;
100        var max = double.MinValue;
101
102        if (rows != null) {
103          foreach (int row in rows) {
104            var val = dataset.GetDoubleValue(variable, row);
105            if (val < min) min = val;
106            if (val > max) max = val;
107          }
108        } else {
109          foreach (var val in dataset.GetDoubleValues(variable)) {
110            if (val < min) min = val;
111            if (val > max) max = val;
112          }
113        }
114        variableBoundaries.Add(variable, new Interval(min, max));
115      }
116
117      return variableBoundaries;
118    }
119
120    private static bool GetEqualValues(this Dictionary<ValuesType, ValuesType> variableValuesMapping, ValuesType originalValues, out ValuesType matchingValues) {
121      if (variableValuesMapping.ContainsKey(originalValues)) {
122        matchingValues = variableValuesMapping[originalValues];
123        return true;
124      }
125      matchingValues = variableValuesMapping.FirstOrDefault(kv => kv.Key == kv.Value && EqualVariableValues(originalValues, kv.Key)).Key;
126      bool result = true;
127      if (matchingValues == null) {
128        matchingValues = originalValues;
129        result = false;
130      }
131      variableValuesMapping[originalValues] = matchingValues;
132      return result;
133    }
134
135    private static bool EqualVariableValues(ValuesType values1, ValuesType values2) {
136      //compare variable names for equality
137      if (!values1.Keys.SequenceEqual(values2.Keys)) return false;
138      foreach (var key in values1.Keys) {
139        var v1 = values1[key];
140        var v2 = values2[key];
141        if (v1.Count != v2.Count) return false;
142        for (int i = 0; i < v1.Count; i++) {
143          if (!v1[i].Equals(v2[i])) return false;
144        }
145      }
146      return true;
147    }
148  }
149}
Note: See TracBrowser for help on using the repository browser.