source: branches/EfficientGlobalOptimization/HeuristicLab.Algorithms.EGO/Operators/ModelBuilder.cs @ 15064

Last change on this file since 15064 was 15064, checked in by bwerth, 5 years ago

#2745 implemented EGO as EngineAlgorithm + some simplifications in the IInfillCriterion interface

File size: 6.9 KB
Line 
1#region License Information
2/* HeuristicLab
3 * Copyright (C) 2002-2016 Heuristic and Evolutionary Algorithms Laboratory (HEAL)
4 *
5 * This file is part of HeuristicLab.
6 *
7 * HeuristicLab is free software: you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation, either version 3 of the License, or
10 * (at your option) any later version.
11 *
12 * HeuristicLab is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15 * GNU General Public License for more details.
16 *
17 * You should have received a copy of the GNU General Public License
18 * along with HeuristicLab. If not, see <http://www.gnu.org/licenses/>.
19 */
20#endregion
21
22using System;
23using System.Linq;
24using HeuristicLab.Algorithms.DataAnalysis;
25using HeuristicLab.Common;
26using HeuristicLab.Core;
27using HeuristicLab.Data;
28using HeuristicLab.Operators;
29using HeuristicLab.Optimization;
30using HeuristicLab.Parameters;
31using HeuristicLab.Persistence.Default.CompositeSerializers.Storable;
32using HeuristicLab.Problems.DataAnalysis;
33
34namespace HeuristicLab.Algorithms.EGO {
35  /// <summary>
36  /// A base class for operators that manipulate real-valued vectors.
37  /// </summary>
38  [Item("ModelBuilder", "Builds a model from a dataset and a given RegressionAlgorithm")]
39  [StorableClass]
40  public class ModelBuilder : InstrumentedOperator, IStochasticOperator {
41    public override bool CanChangeName => true;
42
43    public ILookupParameter<IDataAnalysisAlgorithm<IRegressionProblem>> RegressionAlgorithmParameter => (ILookupParameter<IDataAnalysisAlgorithm<IRegressionProblem>>)Parameters["RegressionAlgorithm"];
44    public ILookupParameter<IRegressionSolution> ModelParameter => (ILookupParameter<IRegressionSolution>)Parameters["Model"];
45    public ILookupParameter<ModifiableDataset> DatasetParameter => (ILookupParameter<ModifiableDataset>)Parameters["Dataset"];
46    public ILookupParameter<IRandom> RandomParameter => (ILookupParameter<IRandom>)Parameters["Random"];
47    public ILookupParameter<IntValue> MaxModelSizeParameter => (ILookupParameter<IntValue>)Parameters["Maximal Model Size"];
48    public ILookupParameter<DoubleMatrix> InfillBoundsParameter => (ILookupParameter<DoubleMatrix>)Parameters["InfillBounds"];
49
50    [StorableConstructor]
51    protected ModelBuilder(bool deserializing) : base(deserializing) { }
52    protected ModelBuilder(ModelBuilder original, Cloner cloner) : base(original, cloner) { }
53    public ModelBuilder() {
54      Parameters.Add(new LookupParameter<IDataAnalysisAlgorithm<IRegressionProblem>>("RegressionAlgorithm", "The algorithm used to build a model") { Hidden = true });
55      Parameters.Add(new LookupParameter<IRegressionSolution>("Model", "The resulting model") { Hidden = true });
56      Parameters.Add(new LookupParameter<ModifiableDataset>("Dataset", "The Dataset from which the model is created") { Hidden = true });
57      Parameters.Add(new LookupParameter<IRandom>("Random", "A random number generator") { Hidden = true });
58      Parameters.Add(new LookupParameter<IntValue>("Maximal Model Size", "The maximum number of sample points used to build the model (Set -1 for infinite size") { Hidden = true });
59      Parameters.Add(new LookupParameter<DoubleMatrix>("InfillBounds", "The bounds applied for infill solving") { Hidden = true });
60    }
61
62    public override IDeepCloneable Clone(Cloner cloner) {
63      return new ModelBuilder(this, cloner);
64    }
65
66    public override IOperation InstrumentedApply() {
67      var regressionAlg = RegressionAlgorithmParameter.ActualValue;
68      IDataset data = DatasetParameter.ActualValue;
69      var random = RandomParameter.ActualValue;
70      var oldModel = ModelParameter.ActualValue;
71      var max = MaxModelSizeParameter.ActualValue.Value;
72      if (data.Rows > max && max > 0) {
73        data = SelectBestSamples(data, max);
74        InfillBoundsParameter.ActualValue = GetBounds(data);
75      }
76      ModelParameter.ActualValue = BuildModel(random, regressionAlg, data, oldModel);
77      return base.InstrumentedApply();
78    }
79
80    private DoubleMatrix GetBounds(IDataset data) {
81      var res = new DoubleMatrix(data.Columns - 1, 2);
82      var names = data.DoubleVariables.ToArray();
83      for (var i = 0; i < names.Length - 1; i++) {
84        res[i, 0] = data.GetDoubleValues(names[i]).Min();
85        res[i, 1] = data.GetDoubleValues(names[i]).Max();
86      }
87      return res;
88    }
89
90    private static Dataset SelectBestSamples(IDataset data, int max) {
91      var bestSampleIndices = data.GetDoubleValues("output").Select((d, i) => Tuple.Create(d, i)).OrderBy(x => x.Item1).Take(max).Select(x => x.Item2).ToArray();
92      return new Dataset(data.VariableNames, data.VariableNames.Select(v => data.GetDoubleValues(v, bestSampleIndices).ToList()));
93    }
94
95    private static IRegressionSolution BuildModel(IRandom random, IDataAnalysisAlgorithm<IRegressionProblem> regressionAlgorithm, IDataset dataset, IRegressionSolution oldSolution) {
96      //var dataset = EgoUtilities.GetDataSet(dataSamples, RemoveDuplicates);
97      var problemdata = new RegressionProblemData(dataset, dataset.VariableNames.Where(x => !x.Equals("output")), "output");
98      problemdata.TrainingPartition.Start = 0;
99      problemdata.TrainingPartition.End = dataset.Rows;
100      problemdata.TestPartition.Start = dataset.Rows;
101      problemdata.TestPartition.End = dataset.Rows;
102
103      //train
104      var problem = (RegressionProblem)regressionAlgorithm.Problem;
105      problem.ProblemDataParameter.Value = problemdata;
106      var i = 0;
107      IRegressionSolution solution = null;
108
109      while (solution == null && i++ < 100) {
110        var results = EgoUtilities.SyncRunSubAlgorithm(regressionAlgorithm, random.Next(int.MaxValue));
111        solution = results.Select(x => x.Value).OfType<IRegressionSolution>().SingleOrDefault();
112      }
113
114      //try creating a model with old hyperparameters and new dataset;
115      var gp = regressionAlgorithm as GaussianProcessRegression;
116      var oldmodel = oldSolution as GaussianProcessRegressionSolution;
117      if (gp != null && oldmodel != null) {
118        var mean = (IMeanFunction)oldmodel.Model.MeanFunction.Clone();
119        var cov = (ICovarianceFunction)oldmodel.Model.CovarianceFunction.Clone();
120        try {
121          var model = new GaussianProcessModel(problemdata.Dataset, problemdata.TargetVariable,
122            problemdata.AllowedInputVariables, problemdata.TrainingIndices, new[] { 0.0 }, mean, cov);
123          model.FixParameters();
124          var sol = new GaussianProcessRegressionSolution(model, problemdata);
125          if (solution == null || solution.TrainingMeanSquaredError > sol.TrainingMeanSquaredError) {
126            solution = sol;
127          }
128        }
129        catch (ArgumentException) { }
130      }
131
132      regressionAlgorithm.Runs.Clear();
133      return solution;
134
135    }
136  }
137}
Note: See TracBrowser for help on using the repository browser.