source: stable/HeuristicLab.Algorithms.DataAnalysis/3.4/GradientBoostedTrees/GradientBoostedTreesModelSurrogate.cs @ 17097

Last change on this file since 17097 was 17097, checked in by mkommend, 5 months ago

#2520: Merged 16565 - 16579 into stable.

File size: 5.7 KB
RevLine 
[12868]1#region License Information
2/* HeuristicLab
[17097]3 * Copyright (C) 2002-2019 Heuristic and Evolutionary Algorithms Laboratory (HEAL)
[12868]4 * and the BEACON Center for the Study of Evolution in Action.
5 *
6 * This file is part of HeuristicLab.
7 *
8 * HeuristicLab is free software: you can redistribute it and/or modify
9 * it under the terms of the GNU General Public License as published by
10 * the Free Software Foundation, either version 3 of the License, or
11 * (at your option) any later version.
12 *
13 * HeuristicLab is distributed in the hope that it will be useful,
14 * but WITHOUT ANY WARRANTY; without even the implied warranty of
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
16 * GNU General Public License for more details.
17 *
18 * You should have received a copy of the GNU General Public License
19 * along with HeuristicLab. If not, see <http://www.gnu.org/licenses/>.
20 */
21#endregion
22
[14327]23using System;
[12868]24using System.Collections.Generic;
[14027]25using System.Linq;
[12868]26using HeuristicLab.Common;
27using HeuristicLab.Core;
[17097]28using HEAL.Attic;
[12868]29using HeuristicLab.Problems.DataAnalysis;
30
31namespace HeuristicLab.Algorithms.DataAnalysis {
[17097]32  [StorableType("1BF7BEFB-6739-48AA-89BC-B632E72D148C")]
[12868]33  // this class is used as a surrogate for persistence of an actual GBT model
34  // since the actual GBT model would be very large when persisted we only store all necessary information to
35  // recalculate the actual GBT model on demand
36  [Item("Gradient boosted tree model", "")]
[14027]37  public sealed class GradientBoostedTreesModelSurrogate : RegressionModel, IGradientBoostedTreesModel {
[12868]38    // don't store the actual model!
[14327]39    // the actual model is only recalculated when necessary
40    private readonly Lazy<IGradientBoostedTreesModel> actualModel;
41    private IGradientBoostedTreesModel ActualModel {
42      get { return actualModel.Value; }
43    }
[12868]44
45    [Storable]
46    private readonly IRegressionProblemData trainingProblemData;
47    [Storable]
48    private readonly uint seed;
49    [Storable]
[13184]50    private ILossFunction lossFunction;
[12868]51    [Storable]
52    private double r;
53    [Storable]
54    private double m;
55    [Storable]
56    private double nu;
57    [Storable]
58    private int iterations;
59    [Storable]
60    private int maxSize;
61
62
[14027]63    public override IEnumerable<string> VariablesUsedForPrediction {
[14327]64      get {
65        return ActualModel.Models.SelectMany(x => x.VariablesUsedForPrediction).Distinct().OrderBy(x => x);
66      }
[14027]67    }
68
[12868]69    [StorableConstructor]
[17097]70    private GradientBoostedTreesModelSurrogate(StorableConstructorFlag _) : base(_) {
[14327]71      actualModel = new Lazy<IGradientBoostedTreesModel>(() => RecalculateModel());
72    }
[12868]73
74    private GradientBoostedTreesModelSurrogate(GradientBoostedTreesModelSurrogate original, Cloner cloner)
75      : base(original, cloner) {
[14327]76      IGradientBoostedTreesModel clonedModel = null;
77      if (original.ActualModel != null) clonedModel = cloner.Clone(original.ActualModel);
78      actualModel = new Lazy<IGradientBoostedTreesModel>(CreateLazyInitFunc(clonedModel)); // only capture clonedModel in the closure
[12868]79
80      this.trainingProblemData = cloner.Clone(original.trainingProblemData);
[13184]81      this.lossFunction = cloner.Clone(original.lossFunction);
[12868]82      this.seed = original.seed;
83      this.iterations = original.iterations;
84      this.maxSize = original.maxSize;
85      this.r = original.r;
86      this.m = original.m;
87      this.nu = original.nu;
88    }
89
[14327]90    private Func<IGradientBoostedTreesModel> CreateLazyInitFunc(IGradientBoostedTreesModel clonedModel) {
91      return () => {
92        return clonedModel == null ? RecalculateModel() : clonedModel;
93      };
94    }
95
[12868]96    // create only the surrogate model without an actual model
[14027]97    public GradientBoostedTreesModelSurrogate(IRegressionProblemData trainingProblemData, uint seed,
98      ILossFunction lossFunction, int iterations, int maxSize, double r, double m, double nu)
99      : base(trainingProblemData.TargetVariable, "Gradient boosted tree model", string.Empty) {
[12868]100      this.trainingProblemData = trainingProblemData;
101      this.seed = seed;
[13184]102      this.lossFunction = lossFunction;
[12868]103      this.iterations = iterations;
104      this.maxSize = maxSize;
105      this.r = r;
106      this.m = m;
107      this.nu = nu;
108    }
109
110    // wrap an actual model in a surrograte
[14027]111    public GradientBoostedTreesModelSurrogate(IRegressionProblemData trainingProblemData, uint seed,
112      ILossFunction lossFunction, int iterations, int maxSize, double r, double m, double nu,
113      IGradientBoostedTreesModel model)
[13184]114      : this(trainingProblemData, seed, lossFunction, iterations, maxSize, r, m, nu) {
[14327]115      actualModel = new Lazy<IGradientBoostedTreesModel>(() => model);
[12868]116    }
117
118    public override IDeepCloneable Clone(Cloner cloner) {
119      return new GradientBoostedTreesModelSurrogate(this, cloner);
120    }
121
122    // forward message to actual model (recalculate model first if necessary)
[14027]123    public override IEnumerable<double> GetEstimatedValues(IDataset dataset, IEnumerable<int> rows) {
[14327]124      return ActualModel.GetEstimatedValues(dataset, rows);
[12868]125    }
126
[14027]127    public override IRegressionSolution CreateRegressionSolution(IRegressionProblemData problemData) {
[12868]128      return new RegressionSolution(this, (IRegressionProblemData)problemData.Clone());
129    }
130
[13184]131    private IGradientBoostedTreesModel RecalculateModel() {
[12868]132      return GradientBoostedTreesAlgorithmStatic.TrainGbm(trainingProblemData, lossFunction, maxSize, nu, r, m, iterations, seed).Model;
133    }
[13184]134
135    public IEnumerable<IRegressionModel> Models {
136      get {
[14327]137        return ActualModel.Models;
[13184]138      }
139    }
140
141    public IEnumerable<double> Weights {
142      get {
[14327]143        return ActualModel.Weights;
[13184]144      }
145    }
[12868]146  }
147}
Note: See TracBrowser for help on using the repository browser.