Free cookie consent management tool by TermsFeed Policy Generator

source: branches/EfficientGlobalOptimization/HeuristicLab.Algorithms.EGO/Operators/ModelBuilder.cs @ 16099

Last change on this file since 16099 was 15343, checked in by bwerth, 7 years ago

#2745 added discretized EGO-version for use with IntegerVectors

File size: 8.1 KB
Line 
1#region License Information
2/* HeuristicLab
3 * Copyright (C) 2002-2016 Heuristic and Evolutionary Algorithms Laboratory (HEAL)
4 *
5 * This file is part of HeuristicLab.
6 *
7 * HeuristicLab is free software: you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation, either version 3 of the License, or
10 * (at your option) any later version.
11 *
12 * HeuristicLab is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15 * GNU General Public License for more details.
16 *
17 * You should have received a copy of the GNU General Public License
18 * along with HeuristicLab. If not, see <http://www.gnu.org/licenses/>.
19 */
20#endregion
21
22using System;
23using System.Linq;
24using System.Threading;
25using HeuristicLab.Algorithms.DataAnalysis;
26using HeuristicLab.Common;
27using HeuristicLab.Core;
28using HeuristicLab.Data;
29using HeuristicLab.Operators;
30using HeuristicLab.Optimization;
31using HeuristicLab.Parameters;
32using HeuristicLab.Persistence.Default.CompositeSerializers.Storable;
33using HeuristicLab.Problems.DataAnalysis;
34
35namespace HeuristicLab.Algorithms.EGO {
36  [Item("ModelBuilder", "Builds a model from a dataset and a given RegressionAlgorithm")]
37  [StorableClass]
38  public class ModelBuilder : InstrumentedOperator, IStochasticOperator, ICancellableOperator {
39    public override bool CanChangeName => true;
40    public CancellationToken Cancellation { get; set; }
41
42    #region Parameter properties
43    public ILookupParameter<IDataAnalysisAlgorithm<IRegressionProblem>> RegressionAlgorithmParameter => (ILookupParameter<IDataAnalysisAlgorithm<IRegressionProblem>>)Parameters["RegressionAlgorithm"];
44    public ILookupParameter<IRegressionSolution> ModelParameter => (ILookupParameter<IRegressionSolution>)Parameters["Model"];
45    public ILookupParameter<ModifiableDataset> DatasetParameter => (ILookupParameter<ModifiableDataset>)Parameters["Dataset"];
46    public ILookupParameter<IRandom> RandomParameter => (ILookupParameter<IRandom>)Parameters["Random"];
47    public ILookupParameter<IntValue> MaxModelSizeParameter => (ILookupParameter<IntValue>)Parameters["Maximal Model Size"];
48    public ILookupParameter<DoubleMatrix> InfillBoundsParameter => (ILookupParameter<DoubleMatrix>)Parameters["InfillBounds"];
49    #endregion
50
51    [StorableConstructor]
52    protected ModelBuilder(bool deserializing) : base(deserializing) { }
53    protected ModelBuilder(ModelBuilder original, Cloner cloner) : base(original, cloner) { }
54    public ModelBuilder() {
55      Parameters.Add(new LookupParameter<IDataAnalysisAlgorithm<IRegressionProblem>>("RegressionAlgorithm", "The algorithm used to build a model") { Hidden = true });
56      Parameters.Add(new LookupParameter<IRegressionSolution>("Model", "The resulting model") { Hidden = true });
57      Parameters.Add(new LookupParameter<ModifiableDataset>("Dataset", "The Dataset from which the model is created") { Hidden = true });
58      Parameters.Add(new LookupParameter<IRandom>("Random", "A random number generator") { Hidden = true });
59      Parameters.Add(new LookupParameter<IntValue>("Maximal Model Size", "The maximum number of sample points used to build the model (Set -1 for infinite size") { Hidden = true });
60      Parameters.Add(new LookupParameter<DoubleMatrix>("InfillBounds", "The bounds applied for infill solving") { Hidden = true });
61    }
62
63    public override IDeepCloneable Clone(Cloner cloner) {
64      return new ModelBuilder(this, cloner);
65    }
66
67    public override IOperation InstrumentedApply() {
68      var regressionAlg = RegressionAlgorithmParameter.ActualValue;
69      IDataset data = DatasetParameter.ActualValue;
70      var random = RandomParameter.ActualValue;
71      var oldModel = ModelParameter.ActualValue;
72      var max = MaxModelSizeParameter.ActualValue.Value;
73      if (data.Rows > max && max > 0) {
74        data = SelectBestSamples(data, max);
75        InfillBoundsParameter.ActualValue = GetBounds(data);
76      }
77      ModelParameter.ActualValue = BuildModel(random, regressionAlg, data, oldModel);
78      return base.InstrumentedApply();
79    }
80
81    private DoubleMatrix GetBounds(IDataset data) {
82      var res = new DoubleMatrix(data.Columns - 1, 2);
83      var names = data.DoubleVariables.ToArray();
84      for (var i = 0; i < names.Length - 1; i++) {
85        res[i, 0] = data.GetDoubleValues(names[i]).Min();
86        res[i, 1] = data.GetDoubleValues(names[i]).Max();
87      }
88      return res;
89    }
90
91    private static Dataset SelectBestSamples(IDataset data, int max) {
92      var bestSampleIndices = data.GetDoubleValues("output").Select((d, i) => Tuple.Create(d, i)).OrderBy(x => x.Item1).Take(max).Select(x => x.Item2).ToArray();
93      return new Dataset(data.VariableNames, data.VariableNames.Select(v => data.GetDoubleValues(v, bestSampleIndices).ToList()));
94    }
95
96    private IRegressionSolution BuildModel(IRandom random, IDataAnalysisAlgorithm<IRegressionProblem> regressionAlgorithm, IDataset dataset, IRegressionSolution oldSolution) {
97      //var dataset = EgoUtilities.GetDataSet(dataSamples, RemoveDuplicates);
98      var problemdata = new RegressionProblemData(dataset, dataset.VariableNames.Where(x => !x.Equals("output")), "output");
99      problemdata.TrainingPartition.Start = 0;
100      problemdata.TrainingPartition.End = dataset.Rows;
101      problemdata.TestPartition.Start = dataset.Rows;
102      problemdata.TestPartition.End = dataset.Rows;
103
104      //train
105      var problem = (RegressionProblem)regressionAlgorithm.Problem;
106      problem.ProblemDataParameter.Value = problemdata;
107      var i = 0;
108      IRegressionSolution solution = null;
109
110      while (solution == null && i++ < 100) {
111        var results = EgoUtilities.SyncRunSubAlgorithm(regressionAlgorithm, random.Next(int.MaxValue), Cancellation);
112        solution = results.Select(x => x.Value).OfType<IRegressionSolution>().SingleOrDefault();
113      }
114
115      if (regressionAlgorithm is GaussianProcessRegression && oldSolution != null)
116        solution = SanitizeGaussianProcess(oldSolution as GaussianProcessRegressionSolution, solution as GaussianProcessRegressionSolution, Cancellation);
117
118      //if (regressionAlgorithm is M5RegressionTree && oldSolution != null) solution = SanitizeM5Regression(oldSolution.Model as M5Model, solution, random, Cancellation);
119
120
121      regressionAlgorithm.Runs.Clear();
122      return solution;
123
124    }
125
126    //private static IRegressionSolution SanitizeM5Regression(M5Model oldmodel, IRegressionSolution newSolution, IRandom random, CancellationToken cancellation) {
127    //  var problemdata = newSolution.ProblemData;
128    //  oldmodel.UpdateLeafModels(problemdata, problemdata.AllIndices, random, cancellation);
129    //  var oldSolution = oldmodel.CreateRegressionSolution(problemdata);
130    //  var magicDecision = newSolution.TrainingRSquared < oldSolution.TrainingRSquared - 0.05;
131    //  return magicDecision ? newSolution : oldmodel.CreateRegressionSolution(problemdata);
132    //}
133
134    //try creating a model with old hyperparameters and new dataset;
135    private static IRegressionSolution SanitizeGaussianProcess(GaussianProcessRegressionSolution oldmodel, GaussianProcessRegressionSolution newSolution, CancellationToken cancellation) {
136      var problemdata = newSolution.ProblemData;
137      var mean = (IMeanFunction)oldmodel.Model.MeanFunction.Clone();
138      var cov = (ICovarianceFunction)oldmodel.Model.CovarianceFunction.Clone();
139      try {
140        var model = new GaussianProcessModel(problemdata.Dataset, problemdata.TargetVariable, problemdata.AllowedInputVariables, problemdata.TrainingIndices, new[] { 0.0 }, mean, cov);
141        cancellation.ThrowIfCancellationRequested();
142        model.FixParameters();
143        var sol = new GaussianProcessRegressionSolution(model, problemdata);
144        if (newSolution.TrainingMeanSquaredError > sol.TrainingMeanSquaredError) {
145          newSolution = sol;
146        }
147      }
148      catch (ArgumentException) { }
149      return newSolution;
150    }
151
152  }
153}
Note: See TracBrowser for help on using the repository browser.