source: branches/M5Regression/HeuristicLab.Algorithms.DataAnalysis/3.4/M5Regression/LeafTypes/GaussianProcessLeaf.cs @ 15830

Last change on this file since 15830 was 15830, checked in by bwerth, 16 months ago

#2847 adapted project to new rep structure; major changes to interfaces; restructures splitting and pruning

File size: 4.5 KB
Line 
1#region License Information
2/* HeuristicLab
3 * Copyright (C) 2002-2017 Heuristic and Evolutionary Algorithms Laboratory (HEAL)
4 *
5 * This file is part of HeuristicLab.
6 *
7 * HeuristicLab is free software: you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation, either version 3 of the License, or
10 * (at your option) any later version.
11 *
12 * HeuristicLab is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15 * GNU General Public License for more details.
16 *
17 * You should have received a copy of the GNU General Public License
18 * along with HeuristicLab. If not, see <http://www.gnu.org/licenses/>.
19 */
20#endregion
21
22using System;
23using System.Linq;
24using System.Threading;
25using HeuristicLab.Common;
26using HeuristicLab.Core;
27using HeuristicLab.Data;
28using HeuristicLab.Parameters;
29using HeuristicLab.Persistence.Default.CompositeSerializers.Storable;
30using HeuristicLab.Problems.DataAnalysis;
31
32namespace HeuristicLab.Algorithms.DataAnalysis {
33  [StorableClass]
34  [Item("GaussianProcessLeaf", "A leaf type that uses gaussian process models as leaf models.")]
35  public class GaussianProcessLeaf : LeafBase {
36    #region ParameterNames
37    public const string TriesParameterName = "Tries";
38    public const string RegressionParameterName = "Regression";
39    #endregion
40
41    #region ParameterProperties
42    public IFixedValueParameter<IntValue> TriesParameter {
43      get { return Parameters[TriesParameterName] as IFixedValueParameter<IntValue>; }
44    }
45    public IFixedValueParameter<GaussianProcessRegression> RegressionParameter {
46      get { return Parameters[RegressionParameterName] as IFixedValueParameter<GaussianProcessRegression>; }
47    }
48    #endregion
49
50    #region Properties
51    public int Tries {
52      get { return TriesParameter.Value.Value; }
53    }
54    public GaussianProcessRegression Regression {
55      get { return RegressionParameter.Value; }
56    }
57    #endregion
58
59    #region Constructors & Cloning
60    [StorableConstructor]
61    protected GaussianProcessLeaf(bool deserializing) : base(deserializing) { }
62    protected GaussianProcessLeaf(GaussianProcessLeaf original, Cloner cloner) : base(original, cloner) { }
63    public GaussianProcessLeaf() {
64      var gp = new GaussianProcessRegression();
65      gp.CovarianceFunctionParameter.Value = new CovarianceRationalQuadraticIso();
66      gp.MeanFunctionParameter.Value = new MeanLinear();
67
68      Parameters.Add(new FixedValueParameter<IntValue>(TriesParameterName, "Number of repetitions", new IntValue(10)));
69      Parameters.Add(new FixedValueParameter<GaussianProcessRegression>(RegressionParameterName, "The algorithm creating GPmodels", gp));
70    }
71    public override IDeepCloneable Clone(Cloner cloner) {
72      return new GaussianProcessLeaf(this, cloner);
73    }
74    #endregion
75
76    #region IModelType
77    public override bool ProvidesConfidence {
78      get { return true; }
79    }
80    public override IRegressionModel Build(IRegressionProblemData pd, IRandom random, CancellationToken cancellationToken, out int noParameters) {
81      if (pd.Dataset.Rows < MinLeafSize(pd)) throw new ArgumentException("The number of training instances is too small to create a gaussian process model");
82      Regression.Problem = new RegressionProblem {ProblemData = pd};
83      var cvscore = double.MaxValue;
84      GaussianProcessRegressionSolution sol = null;
85
86      for (var i = 0; i < Tries; i++) {
87        var res = RegressionTreeUtilities.RunSubAlgorithm(Regression, random.Next(), cancellationToken);
88        var t = res.Select(x => x.Value).OfType<GaussianProcessRegressionSolution>().FirstOrDefault();
89        var score = ((DoubleValue)res["Negative log pseudo-likelihood (LOO-CV)"].Value).Value;
90        if (score >= cvscore || t == null || double.IsNaN(t.TrainingRSquared)) continue;
91        cvscore = score;
92        sol = t;
93      }
94      Regression.Runs.Clear();
95      if (sol == null) throw new ArgumentException("Could not create Gaussian Process model");
96
97      noParameters = pd.Dataset.Rows + 1
98                     + Regression.CovarianceFunction.GetNumberOfParameters(pd.AllowedInputVariables.Count())
99                     + Regression.MeanFunction.GetNumberOfParameters(pd.AllowedInputVariables.Count());
100      return sol.Model;
101    }
102
103    public override int MinLeafSize(IRegressionProblemData pd) {
104      return 3;
105    }
106    #endregion
107  }
108}
Note: See TracBrowser for help on using the repository browser.