Free cookie consent management tool by TermsFeed Policy Generator

source: branches/PersistenceReintegration/HeuristicLab.Algorithms.DataAnalysis/3.4/GradientBoostedTrees/GradientBoostedTreesModelSurrogate.cs @ 15792

Last change on this file since 15792 was 15018, checked in by gkronber, 8 years ago

#2520 introduced StorableConstructorFlag type for StorableConstructors

File size: 5.7 KB
RevLine 
[12868]1#region License Information
2/* HeuristicLab
[14185]3 * Copyright (C) 2002-2016 Heuristic and Evolutionary Algorithms Laboratory (HEAL)
[12868]4 * and the BEACON Center for the Study of Evolution in Action.
5 *
6 * This file is part of HeuristicLab.
7 *
8 * HeuristicLab is free software: you can redistribute it and/or modify
9 * it under the terms of the GNU General Public License as published by
10 * the Free Software Foundation, either version 3 of the License, or
11 * (at your option) any later version.
12 *
13 * HeuristicLab is distributed in the hope that it will be useful,
14 * but WITHOUT ANY WARRANTY; without even the implied warranty of
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
16 * GNU General Public License for more details.
17 *
18 * You should have received a copy of the GNU General Public License
19 * along with HeuristicLab. If not, see <http://www.gnu.org/licenses/>.
20 */
21#endregion
22
[14315]23using System;
[12868]24using System.Collections.Generic;
[14315]25using System.Diagnostics.Eventing.Reader;
[13921]26using System.Linq;
[12868]27using HeuristicLab.Common;
28using HeuristicLab.Core;
[14927]29using HeuristicLab.Persistence;
[12868]30using HeuristicLab.Problems.DataAnalysis;
31
32namespace HeuristicLab.Algorithms.DataAnalysis {
[14927]33  [StorableType("4d807e5e-3504-4e49-a3e7-03833144bbc6")]
[12868]34  // this class is used as a surrogate for persistence of an actual GBT model
35  // since the actual GBT model would be very large when persisted we only store all necessary information to
36  // recalculate the actual GBT model on demand
37  [Item("Gradient boosted tree model", "")]
[13941]38  public sealed class GradientBoostedTreesModelSurrogate : RegressionModel, IGradientBoostedTreesModel {
[12868]39    // don't store the actual model!
[14315]40    // the actual model is only recalculated when necessary
41    private readonly Lazy<IGradientBoostedTreesModel> actualModel;
42    private IGradientBoostedTreesModel ActualModel {
43      get { return actualModel.Value; }
44    }
[12868]45
46    [Storable]
47    private readonly IRegressionProblemData trainingProblemData;
48    [Storable]
49    private readonly uint seed;
50    [Storable]
[12873]51    private ILossFunction lossFunction;
[12868]52    [Storable]
53    private double r;
54    [Storable]
55    private double m;
56    [Storable]
57    private double nu;
58    [Storable]
59    private int iterations;
60    [Storable]
61    private int maxSize;
62
63
[13941]64    public override IEnumerable<string> VariablesUsedForPrediction {
[14315]65      get {
66        return ActualModel.Models.SelectMany(x => x.VariablesUsedForPrediction).Distinct().OrderBy(x => x);
[14236]67      }
[13921]68    }
69
[12868]70    [StorableConstructor]
[15018]71    private GradientBoostedTreesModelSurrogate(StorableConstructorFlag deserializing)
[14315]72      : base(deserializing) {
73      actualModel = new Lazy<IGradientBoostedTreesModel>(() => RecalculateModel());
74    }
[12868]75
76    private GradientBoostedTreesModelSurrogate(GradientBoostedTreesModelSurrogate original, Cloner cloner)
77      : base(original, cloner) {
[14315]78      IGradientBoostedTreesModel clonedModel = null;
79      if (original.ActualModel != null) clonedModel = cloner.Clone(original.ActualModel);
80      actualModel = new Lazy<IGradientBoostedTreesModel>(CreateLazyInitFunc(clonedModel)); // only capture clonedModel in the closure
[12868]81
82      this.trainingProblemData = cloner.Clone(original.trainingProblemData);
[12873]83      this.lossFunction = cloner.Clone(original.lossFunction);
[12868]84      this.seed = original.seed;
85      this.iterations = original.iterations;
86      this.maxSize = original.maxSize;
87      this.r = original.r;
88      this.m = original.m;
89      this.nu = original.nu;
90    }
91
[14315]92    private Func<IGradientBoostedTreesModel> CreateLazyInitFunc(IGradientBoostedTreesModel clonedModel) {
93      return () => {
94        return clonedModel == null ? RecalculateModel() : clonedModel;
95      };
96    }
97
[12868]98    // create only the surrogate model without an actual model
[13921]99    public GradientBoostedTreesModelSurrogate(IRegressionProblemData trainingProblemData, uint seed,
100      ILossFunction lossFunction, int iterations, int maxSize, double r, double m, double nu)
[13941]101      : base(trainingProblemData.TargetVariable, "Gradient boosted tree model", string.Empty) {
[12868]102      this.trainingProblemData = trainingProblemData;
103      this.seed = seed;
[12873]104      this.lossFunction = lossFunction;
[12868]105      this.iterations = iterations;
106      this.maxSize = maxSize;
107      this.r = r;
108      this.m = m;
109      this.nu = nu;
110    }
111
112    // wrap an actual model in a surrograte
[13921]113    public GradientBoostedTreesModelSurrogate(IRegressionProblemData trainingProblemData, uint seed,
114      ILossFunction lossFunction, int iterations, int maxSize, double r, double m, double nu,
115      IGradientBoostedTreesModel model)
[12873]116      : this(trainingProblemData, seed, lossFunction, iterations, maxSize, r, m, nu) {
[14315]117      actualModel = new Lazy<IGradientBoostedTreesModel>(() => model);
[12868]118    }
119
120    public override IDeepCloneable Clone(Cloner cloner) {
121      return new GradientBoostedTreesModelSurrogate(this, cloner);
122    }
123
124    // forward message to actual model (recalculate model first if necessary)
[13941]125    public override IEnumerable<double> GetEstimatedValues(IDataset dataset, IEnumerable<int> rows) {
[14315]126      return ActualModel.GetEstimatedValues(dataset, rows);
[12868]127    }
128
[13941]129    public override IRegressionSolution CreateRegressionSolution(IRegressionProblemData problemData) {
[12868]130      return new RegressionSolution(this, (IRegressionProblemData)problemData.Clone());
131    }
132
[13157]133    private IGradientBoostedTreesModel RecalculateModel() {
[12868]134      return GradientBoostedTreesAlgorithmStatic.TrainGbm(trainingProblemData, lossFunction, maxSize, nu, r, m, iterations, seed).Model;
135    }
[13157]136
137    public IEnumerable<IRegressionModel> Models {
138      get {
[14315]139        return ActualModel.Models;
[13157]140      }
141    }
142
143    public IEnumerable<double> Weights {
144      get {
[14315]145        return ActualModel.Weights;
[13157]146      }
147    }
[12868]148  }
149}
Note: See TracBrowser for help on using the repository browser.