Free cookie consent management tool by TermsFeed Policy Generator

source: stable/HeuristicLab.Algorithms.DataAnalysis/3.4/GradientBoostedTrees/GradientBoostedTreesModelSurrogate.cs

Last change on this file was 17494, checked in by mkommend, 4 years ago

#3030: Merged r17272 and r17278 into stable.

File size: 5.7 KB
RevLine 
[12868]1#region License Information
2/* HeuristicLab
[17181]3 * Copyright (C) Heuristic and Evolutionary Algorithms Laboratory (HEAL)
[12868]4 * and the BEACON Center for the Study of Evolution in Action.
5 *
6 * This file is part of HeuristicLab.
7 *
8 * HeuristicLab is free software: you can redistribute it and/or modify
9 * it under the terms of the GNU General Public License as published by
10 * the Free Software Foundation, either version 3 of the License, or
11 * (at your option) any later version.
12 *
13 * HeuristicLab is distributed in the hope that it will be useful,
14 * but WITHOUT ANY WARRANTY; without even the implied warranty of
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
16 * GNU General Public License for more details.
17 *
18 * You should have received a copy of the GNU General Public License
19 * along with HeuristicLab. If not, see <http://www.gnu.org/licenses/>.
20 */
21#endregion
22
[14327]23using System;
[12868]24using System.Collections.Generic;
[14027]25using System.Linq;
[17494]26using HEAL.Attic;
[12868]27using HeuristicLab.Common;
28using HeuristicLab.Core;
29using HeuristicLab.Problems.DataAnalysis;
30
31namespace HeuristicLab.Algorithms.DataAnalysis {
[17097]32  [StorableType("1BF7BEFB-6739-48AA-89BC-B632E72D148C")]
[12868]33  // this class is used as a surrogate for persistence of an actual GBT model
34  // since the actual GBT model would be very large when persisted we only store all necessary information to
35  // recalculate the actual GBT model on demand
36  [Item("Gradient boosted tree model", "")]
[14027]37  public sealed class GradientBoostedTreesModelSurrogate : RegressionModel, IGradientBoostedTreesModel {
[12868]38    // don't store the actual model!
[14327]39    // the actual model is only recalculated when necessary
[17494]40    private IGradientBoostedTreesModel fullModel;
[14327]41    private readonly Lazy<IGradientBoostedTreesModel> actualModel;
42    private IGradientBoostedTreesModel ActualModel {
43      get { return actualModel.Value; }
44    }
[12868]45
46    [Storable]
47    private readonly IRegressionProblemData trainingProblemData;
48    [Storable]
49    private readonly uint seed;
50    [Storable]
[17494]51    private readonly ILossFunction lossFunction;
[12868]52    [Storable]
[17494]53    private readonly double r;
[12868]54    [Storable]
[17494]55    private readonly double m;
[12868]56    [Storable]
[17494]57    private readonly double nu;
[12868]58    [Storable]
[17494]59    private readonly int iterations;
[12868]60    [Storable]
[17494]61    private readonly int maxSize;
[12868]62
63
[14027]64    public override IEnumerable<string> VariablesUsedForPrediction {
[14327]65      get {
66        return ActualModel.Models.SelectMany(x => x.VariablesUsedForPrediction).Distinct().OrderBy(x => x);
67      }
[14027]68    }
69
[12868]70    [StorableConstructor]
[17097]71    private GradientBoostedTreesModelSurrogate(StorableConstructorFlag _) : base(_) {
[17494]72      actualModel = CreateLazyInitFunc();
[14327]73    }
[12868]74
75    private GradientBoostedTreesModelSurrogate(GradientBoostedTreesModelSurrogate original, Cloner cloner)
76      : base(original, cloner) {
[17494]77      // clone data which is necessary to rebuild the model
[12868]78      this.trainingProblemData = cloner.Clone(original.trainingProblemData);
[13184]79      this.lossFunction = cloner.Clone(original.lossFunction);
[12868]80      this.seed = original.seed;
81      this.iterations = original.iterations;
82      this.maxSize = original.maxSize;
83      this.r = original.r;
84      this.m = original.m;
85      this.nu = original.nu;
[17494]86
87      // clone full model if it has already been created
88      if (original.fullModel != null) this.fullModel = cloner.Clone(original.fullModel);
89      actualModel = CreateLazyInitFunc();
[12868]90    }
91
[17494]92    private Lazy<IGradientBoostedTreesModel> CreateLazyInitFunc() {
93      return new Lazy<IGradientBoostedTreesModel>(() => {
94        if (fullModel == null) fullModel = RecalculateModel();
95        return fullModel;
96      });
[14327]97    }
98
[12868]99    // create only the surrogate model without an actual model
[17156]100    private GradientBoostedTreesModelSurrogate(IRegressionProblemData trainingProblemData, uint seed,
[14027]101      ILossFunction lossFunction, int iterations, int maxSize, double r, double m, double nu)
102      : base(trainingProblemData.TargetVariable, "Gradient boosted tree model", string.Empty) {
[12868]103      this.trainingProblemData = trainingProblemData;
104      this.seed = seed;
[13184]105      this.lossFunction = lossFunction;
[12868]106      this.iterations = iterations;
107      this.maxSize = maxSize;
108      this.r = r;
109      this.m = m;
110      this.nu = nu;
[17156]111
[17494]112      actualModel = CreateLazyInitFunc();
[12868]113    }
114
[17494]115    // wrap an actual model in a surrogate
[17156]116    public GradientBoostedTreesModelSurrogate(IGradientBoostedTreesModel model, IRegressionProblemData trainingProblemData, uint seed,
117      ILossFunction lossFunction, int iterations, int maxSize, double r, double m, double nu)
[13184]118      : this(trainingProblemData, seed, lossFunction, iterations, maxSize, r, m, nu) {
[17494]119      fullModel = model;
[12868]120    }
121
122    public override IDeepCloneable Clone(Cloner cloner) {
123      return new GradientBoostedTreesModelSurrogate(this, cloner);
124    }
125
126    // forward message to actual model (recalculate model first if necessary)
[14027]127    public override IEnumerable<double> GetEstimatedValues(IDataset dataset, IEnumerable<int> rows) {
[14327]128      return ActualModel.GetEstimatedValues(dataset, rows);
[12868]129    }
130
[14027]131    public override IRegressionSolution CreateRegressionSolution(IRegressionProblemData problemData) {
[12868]132      return new RegressionSolution(this, (IRegressionProblemData)problemData.Clone());
133    }
134
[13184]135    private IGradientBoostedTreesModel RecalculateModel() {
[12868]136      return GradientBoostedTreesAlgorithmStatic.TrainGbm(trainingProblemData, lossFunction, maxSize, nu, r, m, iterations, seed).Model;
137    }
[13184]138
139    public IEnumerable<IRegressionModel> Models {
[17494]140      get { return ActualModel.Models; }
[13184]141    }
142
143    public IEnumerable<double> Weights {
[17494]144      get { return ActualModel.Weights; }
[13184]145    }
[12868]146  }
[17156]147}
Note: See TracBrowser for help on using the repository browser.