Free cookie consent management tool by TermsFeed Policy Generator

source: branches/2780_SAPBA/HeuristicLab.Algorithms.SAPBA/SapbaUtilities.cs @ 18229

Last change on this file since 18229 was 16108, checked in by bwerth, 6 years ago

#2780 renamed branch to include ticket number

File size: 7.8 KB
Line 
1#region License Information
2/* HeuristicLab
3 * Copyright (C) 2002-2016 Heuristic and Evolutionary Algorithms Laboratory (HEAL)
4 *
5 * This file is part of HeuristicLab.
6 *
7 * HeuristicLab is free software: you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation, either version 3 of the License, or
10 * (at your option) any later version.
11 *
12 * HeuristicLab is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15 * GNU General Public License for more details.
16 *
17 * You should have received a copy of the GNU General Public License
18 * along with HeuristicLab. If not, see <http://www.gnu.org/licenses/>.
19 */
20#endregion
21
22using System;
23using System.Collections.Generic;
24using System.Linq;
25using System.Threading;
26using HeuristicLab.Algorithms.DataAnalysis;
27using HeuristicLab.Common;
28using HeuristicLab.Core;
29using HeuristicLab.Data;
30using HeuristicLab.Encodings.RealVectorEncoding;
31using HeuristicLab.Optimization;
32using HeuristicLab.Problems.DataAnalysis;
33
34namespace HeuristicLab.Algorithms.SAPBA {
35  internal static class SapbaUtilities {
36    //Extention methods for convenience
37    public static int ArgMax<T>(this IEnumerable<T> values, Func<T, double> func) {
38      var max = double.MinValue;
39      var maxIdx = 0;
40      var idx = 0;
41      foreach (var v in values) {
42        var d = func.Invoke(v);
43        if (d > max) {
44          max = d;
45          maxIdx = idx;
46        }
47        idx++;
48      }
49      return maxIdx;
50    }
51    public static int ArgMin<T>(this IEnumerable<T> values, Func<T, double> func) {
52      return ArgMax(values, x => -func.Invoke(x));
53    }
54    public static double GetEstimation(this IRegressionModel model, RealVector r) {
55      var dataset = GetDataSet(new[] { new Tuple<RealVector, double>(r, 0.0) }, false);
56      return model.GetEstimatedValues(dataset, new[] { 0 }).First();
57    }
58    public static double GetVariance(this IConfidenceRegressionModel model, RealVector r) {
59      var dataset = GetDataSet(new[] { new Tuple<RealVector, double>(r, 0.0) }, false);
60      return model.GetEstimatedVariances(dataset, new[] { 0 }).First();
61    }
62    public static double GetDoubleValue(this IDataset dataset, int i, int j) {
63      return dataset.GetDoubleValue("input" + j, i);
64    }
65
66    //Sub-Algorithms
67    public static ResultCollection SyncRunSubAlgorithm(IAlgorithm alg, int random) {
68      if (alg.Parameters.ContainsKey("SetSeedRandomly") && alg.Parameters.ContainsKey("Seed")) {
69        var setSeed = alg.Parameters["SetSeedRandomly"].ActualValue as BoolValue;
70        var seed = alg.Parameters["Seed"].ActualValue as IntValue;
71        if (seed == null || setSeed == null) throw new ArgumentException("wrong SeedParametertypes");
72        setSeed.Value = false;
73        seed.Value = random;
74
75      }
76      EventWaitHandle trigger = new AutoResetEvent(false);
77      Exception ex = null;
78      EventHandler<EventArgs<Exception>> exhandler = (sender, e) => ex = e.Value;
79      EventHandler stoppedHandler = (sender, e) => trigger.Set();
80      alg.ExceptionOccurred += exhandler;
81      alg.Stopped += stoppedHandler;
82      alg.Prepare();
83      alg.Start();
84      trigger.WaitOne();
85      alg.ExceptionOccurred -= exhandler;
86      alg.Stopped -= stoppedHandler;
87      if (ex != null) throw ex;
88      return alg.Results;
89    }
90    public static IRegressionSolution BuildModel(IReadOnlyList<Tuple<RealVector, double>> samples, IDataAnalysisAlgorithm<IRegressionProblem> regressionAlgorithm, IRandom random, bool removeDuplicates = true, IRegressionSolution oldSolution = null) {
91      var dataset = GetDataSet(samples, removeDuplicates);
92      var problemdata = new RegressionProblemData(dataset, dataset.VariableNames.Where(x => !x.Equals("output")), "output");
93      problemdata.TrainingPartition.Start = 0;
94      problemdata.TrainingPartition.End = dataset.Rows;
95      problemdata.TestPartition.Start = dataset.Rows;
96      problemdata.TestPartition.End = dataset.Rows;
97
98      if (regressionAlgorithm.Problem == null) regressionAlgorithm.Problem = new RegressionProblem();
99      var problem = regressionAlgorithm.Problem;
100      problem.ProblemDataParameter.Value = problemdata;
101      var i = 0;
102      IRegressionSolution solution = null;
103
104      while (solution == null && i++ < 100) {
105        var results = SyncRunSubAlgorithm(regressionAlgorithm, random.Next(int.MaxValue));
106        solution = results.Select(x => x.Value).OfType<IRegressionSolution>().SingleOrDefault();
107      }
108
109      //special treatement for GaussianProcessRegression
110      var gp = regressionAlgorithm as GaussianProcessRegression;
111      var oldGaussian = oldSolution as GaussianProcessRegressionSolution;
112      if (gp != null && oldGaussian != null) {
113        const double noise = 0.0;
114        var n = samples.First().Item1.Length;
115        var mean = (IMeanFunction)oldGaussian.Model.MeanFunction.Clone();
116        var cov = (ICovarianceFunction)oldGaussian.Model.CovarianceFunction.Clone();
117        if (mean.GetNumberOfParameters(n) != 0 || cov.GetNumberOfParameters(n) != 0) throw new ArgumentException("DEBUG: assumption about fixed paramters wrong");
118        double[] hyp = { noise };
119        try {
120          var model = new GaussianProcessModel(problemdata.Dataset, problemdata.TargetVariable, problemdata.AllowedInputVariables, problemdata.TrainingIndices, hyp, mean, cov);
121          model.FixParameters();
122          var sol = new GaussianProcessRegressionSolution(model, problemdata);
123          if (solution == null || solution.TrainingMeanSquaredError > sol.TrainingMeanSquaredError) solution = sol;
124        }
125        catch (ArgumentException) { }
126      }
127      if (solution == null) throw new ArgumentException("The algorithm didn't return a model");
128      regressionAlgorithm.Runs.Clear();
129      return solution;
130    }
131
132    //RegressionModel extensions
133    public const double DuplicateResolution = 0.000001;
134    public static Dataset GetDataSet(IReadOnlyList<Tuple<RealVector, double>> samples, bool removeDuplicates) {
135      if (removeDuplicates) samples = RemoveDuplicates(samples); //TODO duplicate removal leads to incorrect uncertainty values in models
136      var dimensions = samples[0].Item1.Length + 1;
137      var data = new double[samples.Count, dimensions];
138      var names = new string[dimensions - 1];
139      for (var i = 0; i < names.Length; i++) names[i] = "input" + i;
140      for (var j = 0; j < samples.Count; j++) {
141        for (var i = 0; i < names.Length; i++) data[j, i] = samples[j].Item1[i];
142        data[j, dimensions - 1] = samples[j].Item2;
143      }
144      return new Dataset(names.Concat(new[] { "output" }).ToArray(), data);
145    }
146    private static IReadOnlyList<Tuple<RealVector, double>> RemoveDuplicates(IReadOnlyList<Tuple<RealVector, double>> samples) {
147      var res = new List<Tuple<RealVector, double, int>>();
148      foreach (var sample in samples) {
149        if (res.Count == 0) {
150          res.Add(new Tuple<RealVector, double, int>(sample.Item1, sample.Item2, 1));
151          continue;
152        }
153        var index = res.ArgMin(x => Euclidian(sample.Item1, x.Item1));
154        var d = Euclidian(res[index].Item1, sample.Item1);
155        if (d > DuplicateResolution) res.Add(new Tuple<RealVector, double, int>(sample.Item1, sample.Item2, 1));
156        else {
157          var t = res[index];
158          res.RemoveAt(index);
159          res.Add(new Tuple<RealVector, double, int>(t.Item1, t.Item2 + sample.Item2, t.Item3 + 1));
160        }
161      }
162      return res.Select(x => new Tuple<RealVector, double>(x.Item1, x.Item2 / x.Item3)).ToArray();
163    }
164    private static double Euclidian(IEnumerable<double> a, IEnumerable<double> b) {
165      return Math.Sqrt(a.Zip(b, (d, d1) => d - d1).Sum(d => d * d));
166    }
167  }
168}
Note: See TracBrowser for help on using the repository browser.