#region License Information /* HeuristicLab * Copyright (C) 2002-2016 Heuristic and Evolutionary Algorithms Laboratory (HEAL) * * This file is part of HeuristicLab. * * HeuristicLab is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation, either version 3 of the License, or * (at your option) any later version. * * HeuristicLab is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with HeuristicLab. If not, see . */ #endregion using System; using System.Linq; using HeuristicLab.Algorithms.DataAnalysis; using HeuristicLab.Common; using HeuristicLab.Core; using HeuristicLab.Data; using HeuristicLab.Operators; using HeuristicLab.Optimization; using HeuristicLab.Parameters; using HeuristicLab.Persistence.Default.CompositeSerializers.Storable; using HeuristicLab.Problems.DataAnalysis; namespace HeuristicLab.Algorithms.EGO { /// /// A base class for operators that manipulate real-valued vectors. /// [Item("ModelBuilder", "Builds a model from a dataset and a given RegressionAlgorithm")] [StorableClass] public class ModelBuilder : InstrumentedOperator, IStochasticOperator { public override bool CanChangeName => true; public ILookupParameter> RegressionAlgorithmParameter => (ILookupParameter>)Parameters["RegressionAlgorithm"]; public ILookupParameter ModelParameter => (ILookupParameter)Parameters["Model"]; public ILookupParameter DatasetParameter => (ILookupParameter)Parameters["Dataset"]; public ILookupParameter RandomParameter => (ILookupParameter)Parameters["Random"]; public ILookupParameter MaxModelSizeParameter => (ILookupParameter)Parameters["Maximal Model Size"]; public ILookupParameter InfillBoundsParameter => (ILookupParameter)Parameters["InfillBounds"]; [StorableConstructor] protected ModelBuilder(bool deserializing) : base(deserializing) { } protected ModelBuilder(ModelBuilder original, Cloner cloner) : base(original, cloner) { } public ModelBuilder() { Parameters.Add(new LookupParameter>("RegressionAlgorithm", "The algorithm used to build a model") { Hidden = true }); Parameters.Add(new LookupParameter("Model", "The resulting model") { Hidden = true }); Parameters.Add(new LookupParameter("Dataset", "The Dataset from which the model is created") { Hidden = true }); Parameters.Add(new LookupParameter("Random", "A random number generator") { Hidden = true }); Parameters.Add(new LookupParameter("Maximal Model Size", "The maximum number of sample points used to build the model (Set -1 for infinite size") { Hidden = true }); Parameters.Add(new LookupParameter("InfillBounds", "The bounds applied for infill solving") { Hidden = true }); } public override IDeepCloneable Clone(Cloner cloner) { return new ModelBuilder(this, cloner); } public override IOperation InstrumentedApply() { var regressionAlg = RegressionAlgorithmParameter.ActualValue; IDataset data = DatasetParameter.ActualValue; var random = RandomParameter.ActualValue; var oldModel = ModelParameter.ActualValue; var max = MaxModelSizeParameter.ActualValue.Value; if (data.Rows > max && max > 0) { data = SelectBestSamples(data, max); InfillBoundsParameter.ActualValue = GetBounds(data); } ModelParameter.ActualValue = BuildModel(random, regressionAlg, data, oldModel); return base.InstrumentedApply(); } private DoubleMatrix GetBounds(IDataset data) { var res = new DoubleMatrix(data.Columns - 1, 2); var names = data.DoubleVariables.ToArray(); for (var i = 0; i < names.Length - 1; i++) { res[i, 0] = data.GetDoubleValues(names[i]).Min(); res[i, 1] = data.GetDoubleValues(names[i]).Max(); } return res; } private static Dataset SelectBestSamples(IDataset data, int max) { var bestSampleIndices = data.GetDoubleValues("output").Select((d, i) => Tuple.Create(d, i)).OrderBy(x => x.Item1).Take(max).Select(x => x.Item2).ToArray(); return new Dataset(data.VariableNames, data.VariableNames.Select(v => data.GetDoubleValues(v, bestSampleIndices).ToList())); } private static IRegressionSolution BuildModel(IRandom random, IDataAnalysisAlgorithm regressionAlgorithm, IDataset dataset, IRegressionSolution oldSolution) { //var dataset = EgoUtilities.GetDataSet(dataSamples, RemoveDuplicates); var problemdata = new RegressionProblemData(dataset, dataset.VariableNames.Where(x => !x.Equals("output")), "output"); problemdata.TrainingPartition.Start = 0; problemdata.TrainingPartition.End = dataset.Rows; problemdata.TestPartition.Start = dataset.Rows; problemdata.TestPartition.End = dataset.Rows; //train var problem = (RegressionProblem)regressionAlgorithm.Problem; problem.ProblemDataParameter.Value = problemdata; var i = 0; IRegressionSolution solution = null; while (solution == null && i++ < 100) { var results = EgoUtilities.SyncRunSubAlgorithm(regressionAlgorithm, random.Next(int.MaxValue)); solution = results.Select(x => x.Value).OfType().SingleOrDefault(); } //try creating a model with old hyperparameters and new dataset; var gp = regressionAlgorithm as GaussianProcessRegression; var oldmodel = oldSolution as GaussianProcessRegressionSolution; if (gp != null && oldmodel != null) { var mean = (IMeanFunction)oldmodel.Model.MeanFunction.Clone(); var cov = (ICovarianceFunction)oldmodel.Model.CovarianceFunction.Clone(); try { var model = new GaussianProcessModel(problemdata.Dataset, problemdata.TargetVariable, problemdata.AllowedInputVariables, problemdata.TrainingIndices, new[] { 0.0 }, mean, cov); model.FixParameters(); var sol = new GaussianProcessRegressionSolution(model, problemdata); if (solution == null || solution.TrainingMeanSquaredError > sol.TrainingMeanSquaredError) { solution = sol; } } catch (ArgumentException) { } } regressionAlgorithm.Runs.Clear(); return solution; } } }