source: branches/EfficientGlobalOptimization/HeuristicLab.Algorithms.EGO/Operators/ModelBuilder.cs @ 15338

Last change on this file since 15338 was 15338, checked in by bwerth, 5 years ago

#2745 fixed bug concerning new Start and StartAsync methods; passed CancellationToken to sub algorithms

File size: 8.2 KB
Line 
1#region License Information
2/* HeuristicLab
3 * Copyright (C) 2002-2016 Heuristic and Evolutionary Algorithms Laboratory (HEAL)
4 *
5 * This file is part of HeuristicLab.
6 *
7 * HeuristicLab is free software: you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation, either version 3 of the License, or
10 * (at your option) any later version.
11 *
12 * HeuristicLab is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15 * GNU General Public License for more details.
16 *
17 * You should have received a copy of the GNU General Public License
18 * along with HeuristicLab. If not, see <http://www.gnu.org/licenses/>.
19 */
20#endregion
21
22using System;
23using System.Linq;
24using System.Threading;
25using HeuristicLab.Algorithms.DataAnalysis;
26using HeuristicLab.Common;
27using HeuristicLab.Core;
28using HeuristicLab.Data;
29using HeuristicLab.Operators;
30using HeuristicLab.Optimization;
31using HeuristicLab.Parameters;
32using HeuristicLab.Persistence.Default.CompositeSerializers.Storable;
33using HeuristicLab.Problems.DataAnalysis;
34using HeuristicLab.Problems.SurrogateProblem;
35
36namespace HeuristicLab.Algorithms.EGO {
37  /// <summary>
38  /// A base class for operators that manipulate real-valued vectors.
39  /// </summary>
40  [Item("ModelBuilder", "Builds a model from a dataset and a given RegressionAlgorithm")]
41  [StorableClass]
42  public class ModelBuilder : InstrumentedOperator, IStochasticOperator, ICancellableOperator {
43    public override bool CanChangeName => true;
44    public CancellationToken Cancellation { get; set; }
45
46    #region Parameter properties
47    public ILookupParameter<IDataAnalysisAlgorithm<IRegressionProblem>> RegressionAlgorithmParameter => (ILookupParameter<IDataAnalysisAlgorithm<IRegressionProblem>>)Parameters["RegressionAlgorithm"];
48    public ILookupParameter<IRegressionSolution> ModelParameter => (ILookupParameter<IRegressionSolution>)Parameters["Model"];
49    public ILookupParameter<ModifiableDataset> DatasetParameter => (ILookupParameter<ModifiableDataset>)Parameters["Dataset"];
50    public ILookupParameter<IRandom> RandomParameter => (ILookupParameter<IRandom>)Parameters["Random"];
51    public ILookupParameter<IntValue> MaxModelSizeParameter => (ILookupParameter<IntValue>)Parameters["Maximal Model Size"];
52    public ILookupParameter<DoubleMatrix> InfillBoundsParameter => (ILookupParameter<DoubleMatrix>)Parameters["InfillBounds"];
53    #endregion
54
55    [StorableConstructor]
56    protected ModelBuilder(bool deserializing) : base(deserializing) { }
57    protected ModelBuilder(ModelBuilder original, Cloner cloner) : base(original, cloner) { }
58    public ModelBuilder() {
59      Parameters.Add(new LookupParameter<IDataAnalysisAlgorithm<IRegressionProblem>>("RegressionAlgorithm", "The algorithm used to build a model") { Hidden = true });
60      Parameters.Add(new LookupParameter<IRegressionSolution>("Model", "The resulting model") { Hidden = true });
61      Parameters.Add(new LookupParameter<ModifiableDataset>("Dataset", "The Dataset from which the model is created") { Hidden = true });
62      Parameters.Add(new LookupParameter<IRandom>("Random", "A random number generator") { Hidden = true });
63      Parameters.Add(new LookupParameter<IntValue>("Maximal Model Size", "The maximum number of sample points used to build the model (Set -1 for infinite size") { Hidden = true });
64      Parameters.Add(new LookupParameter<DoubleMatrix>("InfillBounds", "The bounds applied for infill solving") { Hidden = true });
65    }
66
67    public override IDeepCloneable Clone(Cloner cloner) {
68      return new ModelBuilder(this, cloner);
69    }
70
71    public override IOperation InstrumentedApply() {
72      var regressionAlg = RegressionAlgorithmParameter.ActualValue;
73      IDataset data = DatasetParameter.ActualValue;
74      var random = RandomParameter.ActualValue;
75      var oldModel = ModelParameter.ActualValue;
76      var max = MaxModelSizeParameter.ActualValue.Value;
77      if (data.Rows > max && max > 0) {
78        data = SelectBestSamples(data, max);
79        InfillBoundsParameter.ActualValue = GetBounds(data);
80      }
81      ModelParameter.ActualValue = BuildModel(random, regressionAlg, data, oldModel);
82      return base.InstrumentedApply();
83    }
84
85    private DoubleMatrix GetBounds(IDataset data) {
86      var res = new DoubleMatrix(data.Columns - 1, 2);
87      var names = data.DoubleVariables.ToArray();
88      for (var i = 0; i < names.Length - 1; i++) {
89        res[i, 0] = data.GetDoubleValues(names[i]).Min();
90        res[i, 1] = data.GetDoubleValues(names[i]).Max();
91      }
92      return res;
93    }
94
95    private static Dataset SelectBestSamples(IDataset data, int max) {
96      var bestSampleIndices = data.GetDoubleValues("output").Select((d, i) => Tuple.Create(d, i)).OrderBy(x => x.Item1).Take(max).Select(x => x.Item2).ToArray();
97      return new Dataset(data.VariableNames, data.VariableNames.Select(v => data.GetDoubleValues(v, bestSampleIndices).ToList()));
98    }
99
100    private IRegressionSolution BuildModel(IRandom random, IDataAnalysisAlgorithm<IRegressionProblem> regressionAlgorithm, IDataset dataset, IRegressionSolution oldSolution) {
101      //var dataset = EgoUtilities.GetDataSet(dataSamples, RemoveDuplicates);
102      var problemdata = new RegressionProblemData(dataset, dataset.VariableNames.Where(x => !x.Equals("output")), "output");
103      problemdata.TrainingPartition.Start = 0;
104      problemdata.TrainingPartition.End = dataset.Rows;
105      problemdata.TestPartition.Start = dataset.Rows;
106      problemdata.TestPartition.End = dataset.Rows;
107
108      //train
109      var problem = (RegressionProblem)regressionAlgorithm.Problem;
110      problem.ProblemDataParameter.Value = problemdata;
111      var i = 0;
112      IRegressionSolution solution = null;
113
114      while (solution == null && i++ < 100) {
115        var results = EgoUtilities.SyncRunSubAlgorithm(regressionAlgorithm, random.Next(int.MaxValue), Cancellation);
116        solution = results.Select(x => x.Value).OfType<IRegressionSolution>().SingleOrDefault();
117      }
118
119      if (regressionAlgorithm is GaussianProcessRegression && oldSolution != null)
120        solution = SanitizeGaussianProcess(oldSolution as GaussianProcessRegressionSolution, solution as GaussianProcessRegressionSolution, Cancellation);
121
122      if (regressionAlgorithm is M5RegressionTree && oldSolution != null)
123        solution = SanitizeM5Regression(oldSolution.Model as M5Model, solution, random, Cancellation);
124
125
126      regressionAlgorithm.Runs.Clear();
127      return solution;
128
129    }
130
131    private static IRegressionSolution SanitizeM5Regression(M5Model oldmodel, IRegressionSolution newSolution, IRandom random, CancellationToken cancellation) {
132      var problemdata = newSolution.ProblemData;
133      oldmodel.UpdateLeafModels(problemdata, problemdata.AllIndices, random, cancellation);
134      var oldSolution = oldmodel.CreateRegressionSolution(problemdata);
135      var magicDecision = newSolution.TrainingRSquared < oldSolution.TrainingRSquared - 0.05;
136      return magicDecision ? newSolution : oldmodel.CreateRegressionSolution(problemdata);
137    }
138
139    //try creating a model with old hyperparameters and new dataset;
140    private static IRegressionSolution SanitizeGaussianProcess(GaussianProcessRegressionSolution oldmodel, GaussianProcessRegressionSolution newSolution, CancellationToken cancellation) {
141      var problemdata = newSolution.ProblemData;
142      var mean = (IMeanFunction)oldmodel.Model.MeanFunction.Clone();
143      var cov = (ICovarianceFunction)oldmodel.Model.CovarianceFunction.Clone();
144      try {
145        var model = new GaussianProcessModel(problemdata.Dataset, problemdata.TargetVariable, problemdata.AllowedInputVariables, problemdata.TrainingIndices, new[] { 0.0 }, mean, cov);
146        cancellation.ThrowIfCancellationRequested();
147        model.FixParameters();
148        var sol = new GaussianProcessRegressionSolution(model, problemdata);
149        if (newSolution.TrainingMeanSquaredError > sol.TrainingMeanSquaredError) {
150          newSolution = sol;
151        }
152      } catch (ArgumentException) { }
153      return newSolution;
154    }
155
156  }
157}
Note: See TracBrowser for help on using the repository browser.