Free cookie consent management tool by TermsFeed Policy Generator

source: branches/2883_GBTModelStorage/HeuristicLab.Algorithms.DataAnalysis/3.4/GradientBoostedTrees/GradientBoostedTreesModelSurrogate.cs @ 15679

Last change on this file since 15679 was 15679, checked in by fholzing, 6 years ago

#2883: Removed backwardscompatibility and changed the level of the decision if surrogate or not into the algorithm (so one level up)

File size: 5.7 KB
Line 
1#region License Information
2/* HeuristicLab
3 * Copyright (C) 2002-2018 Heuristic and Evolutionary Algorithms Laboratory (HEAL)
4 * and the BEACON Center for the Study of Evolution in Action.
5 *
6 * This file is part of HeuristicLab.
7 *
8 * HeuristicLab is free software: you can redistribute it and/or modify
9 * it under the terms of the GNU General Public License as published by
10 * the Free Software Foundation, either version 3 of the License, or
11 * (at your option) any later version.
12 *
13 * HeuristicLab is distributed in the hope that it will be useful,
14 * but WITHOUT ANY WARRANTY; without even the implied warranty of
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
16 * GNU General Public License for more details.
17 *
18 * You should have received a copy of the GNU General Public License
19 * along with HeuristicLab. If not, see <http://www.gnu.org/licenses/>.
20 */
21#endregion
22
23using System;
24using System.Collections.Generic;
25using System.Linq;
26using HeuristicLab.Common;
27using HeuristicLab.Core;
28using HeuristicLab.Persistence.Default.CompositeSerializers.Storable;
29using HeuristicLab.Problems.DataAnalysis;
30
31namespace HeuristicLab.Algorithms.DataAnalysis {
32  [StorableClass]
33  // this class is used as a surrogate for persistence of an actual GBT model
34  // since the actual GBT model would be very large when persisted we only store all necessary information to
35  // recalculate the actual GBT model on demand
36  [Item("Gradient boosted tree model", "")]
37  public sealed class GradientBoostedTreesModelSurrogate : RegressionModel, IGradientBoostedTreesModel {
38    // don't store the actual model!
39    // the actual model is only recalculated when necessary
40    private readonly Lazy<IGradientBoostedTreesModel> actualModel;
41    private IGradientBoostedTreesModel ActualModel
42    {
43      get { return actualModel.Value; }
44    }
45
46    [Storable]
47    private readonly IRegressionProblemData trainingProblemData;
48    [Storable]
49    private readonly uint seed;
50    [Storable]
51    private ILossFunction lossFunction;
52    [Storable]
53    private double r;
54    [Storable]
55    private double m;
56    [Storable]
57    private double nu;
58    [Storable]
59    private int iterations;
60    [Storable]
61    private int maxSize;
62
63
64    public override IEnumerable<string> VariablesUsedForPrediction
65    {
66      get
67      {
68        return ActualModel.Models.SelectMany(x => x.VariablesUsedForPrediction).Distinct().OrderBy(x => x);
69      }
70    }
71
72    [StorableConstructor]
73    private GradientBoostedTreesModelSurrogate(bool deserializing)
74      : base(deserializing) {
75      actualModel = new Lazy<IGradientBoostedTreesModel>(() => RecalculateModel());
76    }
77
78    private GradientBoostedTreesModelSurrogate(GradientBoostedTreesModelSurrogate original, Cloner cloner)
79      : base(original, cloner) {
80      IGradientBoostedTreesModel clonedModel = null;
81      if (original.ActualModel != null) clonedModel = cloner.Clone(original.ActualModel);
82      actualModel = new Lazy<IGradientBoostedTreesModel>(CreateLazyInitFunc(clonedModel)); // only capture clonedModel in the closure
83
84      this.trainingProblemData = cloner.Clone(original.trainingProblemData);
85      this.lossFunction = cloner.Clone(original.lossFunction);
86      this.seed = original.seed;
87      this.iterations = original.iterations;
88      this.maxSize = original.maxSize;
89      this.r = original.r;
90      this.m = original.m;
91      this.nu = original.nu;
92    }
93
94    private Func<IGradientBoostedTreesModel> CreateLazyInitFunc(IGradientBoostedTreesModel clonedModel) {
95      return () => {
96        return clonedModel == null ? RecalculateModel() : clonedModel;
97      };
98    }
99
100    // create only the surrogate model without an actual model
101    public GradientBoostedTreesModelSurrogate(IRegressionProblemData trainingProblemData, uint seed,
102      ILossFunction lossFunction, int iterations, int maxSize, double r, double m, double nu)
103      : base(trainingProblemData.TargetVariable, "Gradient boosted tree model", string.Empty) {
104      this.trainingProblemData = trainingProblemData;
105      this.seed = seed;
106      this.lossFunction = lossFunction;
107      this.iterations = iterations;
108      this.maxSize = maxSize;
109      this.r = r;
110      this.m = m;
111      this.nu = nu;
112    }
113
114    // wrap an actual model in a surrograte
115    public GradientBoostedTreesModelSurrogate(IRegressionProblemData trainingProblemData, uint seed,
116      ILossFunction lossFunction, int iterations, int maxSize, double r, double m, double nu,
117      IGradientBoostedTreesModel model)
118      : this(trainingProblemData, seed, lossFunction, iterations, maxSize, r, m, nu) {
119      actualModel = new Lazy<IGradientBoostedTreesModel>(() => model);
120    }
121
122    public override IDeepCloneable Clone(Cloner cloner) {
123      return new GradientBoostedTreesModelSurrogate(this, cloner);
124    }
125
126    // forward message to actual model (recalculate model first if necessary)
127    public override IEnumerable<double> GetEstimatedValues(IDataset dataset, IEnumerable<int> rows) {
128      return ActualModel.GetEstimatedValues(dataset, rows);
129    }
130
131    public override IRegressionSolution CreateRegressionSolution(IRegressionProblemData problemData) {
132      return new RegressionSolution(this, (IRegressionProblemData)problemData.Clone());
133    }
134
135    private IGradientBoostedTreesModel RecalculateModel() {
136      return GradientBoostedTreesAlgorithmStatic.TrainGbm(trainingProblemData, lossFunction, maxSize, nu, r, m, iterations, seed).Model;
137    }
138
139    public IEnumerable<IRegressionModel> Models
140    {
141      get
142      {
143        return ActualModel.Models;
144      }
145    }
146
147    public IEnumerable<double> Weights
148    {
149      get
150      {
151        return ActualModel.Weights;
152      }
153    }
154  }
155}
Note: See TracBrowser for help on using the repository browser.