Free cookie consent management tool by TermsFeed Policy Generator

source: branches/SAPBA/HeuristicLab.Algorithms.SAPBA/EgoUtilities.cs @ 14893

Last change on this file since 14893 was 14893, checked in by bwerth, 8 years ago

#2780 added intial files for SAPBA

File size: 8.0 KB
RevLine 
[14893]1#region License Information
2/* HeuristicLab
3 * Copyright (C) 2002-2016 Heuristic and Evolutionary Algorithms Laboratory (HEAL)
4 *
5 * This file is part of HeuristicLab.
6 *
7 * HeuristicLab is free software: you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation, either version 3 of the License, or
10 * (at your option) any later version.
11 *
12 * HeuristicLab is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15 * GNU General Public License for more details.
16 *
17 * You should have received a copy of the GNU General Public License
18 * along with HeuristicLab. If not, see <http://www.gnu.org/licenses/>.
19 */
20#endregion
21
22using System;
23using System.Collections.Generic;
24using System.Linq;
25using System.Threading;
26using HeuristicLab.Algorithms.DataAnalysis;
27using HeuristicLab.Common;
28using HeuristicLab.Core;
29using HeuristicLab.Data;
30using HeuristicLab.Encodings.RealVectorEncoding;
31using HeuristicLab.Optimization;
32using HeuristicLab.Problems.DataAnalysis;
33
34namespace HeuristicLab.Algorithms.SAPBA {
35  internal static class EgoUtilities {
36    //Extention methods for convenience
37    public static int ArgMax<T>(this IEnumerable<T> values, Func<T, double> func) {
38      var max = double.MinValue;
39      var maxIdx = 0;
40      var idx = 0;
41      foreach (var v in values) {
42        var d = func.Invoke(v);
43        if (d > max) {
44          max = d;
45          maxIdx = idx;
46        }
47        idx++;
48      }
49      return maxIdx;
50    }
51    public static int ArgMin<T>(this IEnumerable<T> values, Func<T, double> func) {
52      return ArgMax(values, x => -func.Invoke(x));
53    }
54    public static double GetEstimation(this IRegressionModel model, RealVector r) {
55      var dataset = GetDataSet(new[] { new Tuple<RealVector, double>(r, 0.0) }, false);
56      return model.GetEstimatedValues(dataset, new[] { 0 }).First();
57    }
58    public static double GetVariance(this IConfidenceRegressionModel model, RealVector r) {
59      var dataset = GetDataSet(new[] { new Tuple<RealVector, double>(r, 0.0) }, false);
60      return model.GetEstimatedVariances(dataset, new[] { 0 }).First();
61    }
62    public static double GetDoubleValue(this IDataset dataset, int i, int j) {
63      return dataset.GetDoubleValue("input" + j, i);
64    }
65
66    //Sub-Algorithms
67    public static ResultCollection SyncRunSubAlgorithm(IAlgorithm alg, int random) {
68
69      if (alg.Parameters.ContainsKey("SetSeedRandomly") && alg.Parameters.ContainsKey("Seed")) {
70        var setSeed = alg.Parameters["SetSeedRandomly"].ActualValue as BoolValue;
71        var seed = alg.Parameters["Seed"].ActualValue as IntValue;
72        if (seed == null || setSeed == null) throw new ArgumentException("wrong SeedParametertypes");
73        setSeed.Value = false;
74        seed.Value = random;
75
76      }
77
78
79      EventWaitHandle trigger = new AutoResetEvent(false);
80      Exception ex = null;
81      EventHandler<EventArgs<Exception>> exhandler = (sender, e) => ex = e.Value;
82      EventHandler stoppedHandler = (sender, e) => trigger.Set();
83      alg.ExceptionOccurred += exhandler;
84      alg.Stopped += stoppedHandler;
85      alg.Prepare();
86      alg.Start();
87      trigger.WaitOne();
88      alg.ExceptionOccurred -= exhandler;
89      alg.Stopped -= stoppedHandler;
90      if (ex != null) throw ex;
91      return alg.Results;
92    }
93
94    public static IRegressionSolution BuildModel(CancellationToken cancellationToken, IEnumerable<Tuple<RealVector, double>> samples, IDataAnalysisAlgorithm<IRegressionProblem> regressionAlgorithm, IRandom random, bool removeDuplicates = true, IRegressionSolution oldSolution = null) {
95      var dataset = EgoUtilities.GetDataSet(samples.ToList(), removeDuplicates);
96      var problemdata = new RegressionProblemData(dataset, dataset.VariableNames.Where(x => !x.Equals("output")), "output");
97      problemdata.TrainingPartition.Start = 0;
98      problemdata.TrainingPartition.End = dataset.Rows;
99      problemdata.TestPartition.Start = dataset.Rows;
100      problemdata.TestPartition.End = dataset.Rows;
101
102
103      if (regressionAlgorithm.Problem == null) regressionAlgorithm.Problem = new RegressionProblem();
104      var problem = regressionAlgorithm.Problem;
105      problem.ProblemDataParameter.Value = problemdata;
106      var i = 0;
107      IRegressionSolution solution = null;
108
109      while (solution == null && i++ < 100) {
110        var results = EgoUtilities.SyncRunSubAlgorithm(regressionAlgorithm, random.Next(int.MaxValue));
111        solution = results.Select(x => x.Value).OfType<IRegressionSolution>().SingleOrDefault();
112        cancellationToken.ThrowIfCancellationRequested();
113      }
114
115      //special treatement for GaussianProcessRegression
116      var gp = regressionAlgorithm;
117      var oldGaussian = oldSolution as GaussianProcessRegressionSolution;
118      if (gp != null && oldGaussian != null) {
119        const double noise = 0.0;
120        var n = samples.First().Item1.Length;
121        var mean = (IMeanFunction)oldGaussian.Model.MeanFunction.Clone();
122        var cov = (ICovarianceFunction)oldGaussian.Model.CovarianceFunction.Clone();
123        if (mean.GetNumberOfParameters(n) != 0 || cov.GetNumberOfParameters(n) != 0) throw new ArgumentException("DEBUG: assumption about fixed paramters wrong");
124        double[] hyp = { noise };
125        try {
126          var model = new GaussianProcessModel(problemdata.Dataset, problemdata.TargetVariable, problemdata.AllowedInputVariables, problemdata.TrainingIndices, hyp, mean, cov);
127          model.FixParameters();
128          var sol = new GaussianProcessRegressionSolution(model, problemdata);
129          if (solution == null || solution.TrainingMeanSquaredError > sol.TrainingMeanSquaredError) {
130            solution = sol;
131          }
132        }
133        catch (ArgumentException) { }
134      }
135
136      if (solution == null) throw new ArgumentException("The algorithm didn't return a model");
137      regressionAlgorithm.Runs.Clear();
138      return solution;
139    }
140    //RegressionModel extensions
141    public const double DuplicateResolution = 0.0001;
142    public static Dataset GetDataSet(IReadOnlyList<Tuple<RealVector, double>> samples, bool removeDuplicates) {
143      if (removeDuplicates) samples = RemoveDuplicates(samples); //TODO duplicate removal leads to incorrect uncertainty values in models
144      var dimensions = samples[0].Item1.Length + 1;
145      var data = new double[samples.Count, dimensions];
146      var names = new string[dimensions - 1];
147      for (var i = 0; i < names.Length; i++) names[i] = "input" + i;
148      for (var j = 0; j < samples.Count; j++) {
149        for (var i = 0; i < names.Length; i++) data[j, i] = samples[j].Item1[i];
150        data[j, dimensions - 1] = samples[j].Item2;
151      }
152      return new Dataset(names.Concat(new[] { "output" }).ToArray(), data);
153    }
154    private static IReadOnlyList<Tuple<RealVector, double>> RemoveDuplicates(IReadOnlyList<Tuple<RealVector, double>> samples) {
155      var res = new List<Tuple<RealVector, double, int>>();
156      foreach (var sample in samples) {
157        if (res.Count == 0) {
158          res.Add(new Tuple<RealVector, double, int>(sample.Item1, sample.Item2, 1));
159          continue;
160        }
161        var index = res.ArgMin(x => Euclidian(sample.Item1, x.Item1));
162        var d = Euclidian(res[index].Item1, sample.Item1);
163        if (d > DuplicateResolution)
164          res.Add(new Tuple<RealVector, double, int>(sample.Item1, sample.Item2, 1));
165        else {
166          var t = res[index];
167          res.RemoveAt(index);
168          res.Add(new Tuple<RealVector, double, int>(t.Item1, t.Item2 + sample.Item2, t.Item3 + 1));
169        }
170      }
171      return res.Select(x => new Tuple<RealVector, double>(x.Item1, x.Item2 / x.Item3)).ToArray();
172    }
173    private static double Euclidian(IEnumerable<double> a, IEnumerable<double> b) {
174      return Math.Sqrt(a.Zip(b, (d, d1) => d - d1).Sum(d => d * d));
175    }
176  }
177}
Note: See TracBrowser for help on using the repository browser.