Changeset 17156


Ignore:
Timestamp:
07/23/19 20:20:35 (3 months ago)
Author:
gkronber
Message:

#2883: merged r17030, r17032, r17033, r17043, r17044, r17137 from trunk to stable

Location:
stable
Files:
10 edited
1 copied

Legend:

Unmodified
Added
Removed
  • stable

  • stable/HeuristicLab.Algorithms.DataAnalysis

  • stable/HeuristicLab.Algorithms.DataAnalysis/3.4

  • stable/HeuristicLab.Algorithms.DataAnalysis/3.4/GradientBoostedTrees/GradientBoostedTreesAlgorithm.cs

    r17097 r17156  
    2424using System.Linq;
    2525using System.Threading;
     26using HeuristicLab.Algorithms.DataAnalysis.GradientBoostedTrees;
    2627using HeuristicLab.Analysis;
    2728using HeuristicLab.Common;
     
    4950    private const string LossFunctionParameterName = "LossFunction";
    5051    private const string UpdateIntervalParameterName = "UpdateInterval";
    51     private const string CreateSolutionParameterName = "CreateSolution";
     52    private const string ModelCreationParameterName = "ModelCreation";
    5253    #endregion
    5354
     
    8081      get { return (IFixedValueParameter<IntValue>)Parameters[UpdateIntervalParameterName]; }
    8182    }
    82     public IFixedValueParameter<BoolValue> CreateSolutionParameter {
    83       get { return (IFixedValueParameter<BoolValue>)Parameters[CreateSolutionParameterName]; }
     83    private IFixedValueParameter<EnumValue<ModelCreation>> ModelCreationParameter {
     84      get { return (IFixedValueParameter<EnumValue<ModelCreation>>)Parameters[ModelCreationParameterName]; }
    8485    }
    8586    #endregion
     
    114115      set { MParameter.Value.Value = value; }
    115116    }
    116     public bool CreateSolution {
    117       get { return CreateSolutionParameter.Value.Value; }
    118       set { CreateSolutionParameter.Value.Value = value; }
     117    public ModelCreation ModelCreation {
     118      get { return ModelCreationParameter.Value.Value; }
     119      set { ModelCreationParameter.Value.Value = value; }
    119120    }
    120121    #endregion
     
    147148      Parameters.Add(new FixedValueParameter<IntValue>(SeedParameterName, "The random seed used to initialize the new pseudo random number generator.", new IntValue(0)));
    148149      Parameters.Add(new FixedValueParameter<BoolValue>(SetSeedRandomlyParameterName, "True if the random seed should be set to a random value, otherwise false.", new BoolValue(true)));
    149       Parameters.Add(new FixedValueParameter<IntValue>(MaxSizeParameterName, "Maximal size of the tree learned in each step (prefer smaller sizes if possible)", new IntValue(10)));
     150      Parameters.Add(new FixedValueParameter<IntValue>(MaxSizeParameterName, "Maximal size of the tree learned in each step (prefer smaller sizes (3 to 10) if possible)", new IntValue(10)));
    150151      Parameters.Add(new FixedValueParameter<DoubleValue>(RParameterName, "Ratio of training rows selected randomly in each step (0 < R <= 1)", new DoubleValue(0.5)));
    151152      Parameters.Add(new FixedValueParameter<DoubleValue>(MParameterName, "Ratio of variables selected randomly in each step (0 < M <= 1)", new DoubleValue(0.5)));
     
    153154      Parameters.Add(new FixedValueParameter<IntValue>(UpdateIntervalParameterName, "", new IntValue(100)));
    154155      Parameters[UpdateIntervalParameterName].Hidden = true;
    155       Parameters.Add(new FixedValueParameter<BoolValue>(CreateSolutionParameterName, "Flag that indicates if a solution should be produced at the end of the run", new BoolValue(true)));
    156       Parameters[CreateSolutionParameterName].Hidden = true;
     156      Parameters.Add(new FixedValueParameter<EnumValue<ModelCreation>>(ModelCreationParameterName, "Defines the results produced at the end of the run (Surrogate => Less disk space, lazy recalculation of model)", new EnumValue<ModelCreation>(ModelCreation.Model)));
     157      Parameters[ModelCreationParameterName].Hidden = true;
    157158
    158159      var lossFunctions = ApplicationManager.Manager.GetInstances<ILossFunction>();
     
    165166      // BackwardsCompatibility3.4
    166167      #region Backwards compatible code, remove with 3.5
     168
     169      #region LossFunction
    167170      // parameter type has been changed
    168171      var lossFunctionParam = Parameters[LossFunctionParameterName] as ConstrainedValueParameter<StringValue>;
     
    183186      }
    184187      #endregion
     188
     189      #region CreateSolution
     190      // parameter type has been changed
     191      if (Parameters.ContainsKey("CreateSolution")) {
     192        var createSolutionParam = Parameters["CreateSolution"] as FixedValueParameter<BoolValue>;
     193        Parameters.Remove(createSolutionParam);
     194
     195        ModelCreation value = createSolutionParam.Value.Value ? ModelCreation.Model : ModelCreation.QualityOnly;
     196        Parameters.Add(new FixedValueParameter<EnumValue<ModelCreation>>(ModelCreationParameterName, "Defines the results produced at the end of the run (Surrogate => Less disk space, lazy recalculation of model)", new EnumValue<ModelCreation>(value)));
     197        Parameters[ModelCreationParameterName].Hidden = true;
     198      }
     199      #endregion
     200      #endregion
    185201    }
    186202
     
    249265
    250266      // produce solution
    251       if (CreateSolution) {
    252         var model = state.GetModel();
     267      if (ModelCreation == ModelCreation.SurrogateModel || ModelCreation == ModelCreation.Model) {
     268        IRegressionModel model = state.GetModel();
     269
     270        if (ModelCreation == ModelCreation.SurrogateModel) {
     271          model = new GradientBoostedTreesModelSurrogate((GradientBoostedTreesModel)model, problemData, (uint)Seed, lossFunction, Iterations, MaxSize, R, M, Nu);
     272        }
    253273
    254274        // for logistic regression we produce a classification solution
     
    272292          Results.Add(new Result("Solution", new GradientBoostedTreesSolution(model, problemData)));
    273293        }
     294      } else if (ModelCreation == ModelCreation.QualityOnly) {
     295        //Do nothing
     296      } else {
     297        throw new NotImplementedException("Selected parameter for CreateSolution isn't implemented yet");
    274298      }
    275299    }
  • stable/HeuristicLab.Algorithms.DataAnalysis/3.4/GradientBoostedTrees/GradientBoostedTreesAlgorithmStatic.cs

    r17097 r17156  
    101101
    102102      public IRegressionModel GetModel() {
    103 #pragma warning disable 618
    104         var model = new GradientBoostedTreesModel(models, weights);
    105 #pragma warning restore 618
    106         // we don't know the number of iterations here but the number of weights is equal
    107         // to the number of iterations + 1 (for the constant model)
    108         // wrap the actual model in a surrogate that enables persistence and lazy recalculation of the model if necessary
    109         return new GradientBoostedTreesModelSurrogate(problemData, randSeed, lossFunction, weights.Count - 1, maxSize, r, m, nu, model);
     103        return new GradientBoostedTreesModel(models, weights);
    110104      }
    111105      public IEnumerable<KeyValuePair<string, double>> GetVariableRelevance() {
  • stable/HeuristicLab.Algorithms.DataAnalysis/3.4/GradientBoostedTrees/GradientBoostedTreesModel.cs

    r17097 r17156  
    3434  // this is essentially a collection of weighted regression models
    3535  public sealed class GradientBoostedTreesModel : RegressionModel, IGradientBoostedTreesModel {
    36     // BackwardsCompatibility3.4 for allowing deserialization & serialization of old models
    37     #region Backwards compatible code, remove with 3.5
    38     private bool isCompatibilityLoaded = false; // only set to true if the model is deserialized from the old format, needed to make sure that information is serialized again if it was loaded from the old format
    39 
    4036    [Storable(Name = "models")]
    4137    private IList<IRegressionModel> __persistedModels {
    4238      set {
    43         this.isCompatibilityLoaded = true;
    4439        this.models.Clear();
    4540        foreach (var m in value) this.models.Add(m);
    4641      }
    47       get { if (this.isCompatibilityLoaded) return models; else return null; }
     42      get { return models; }
    4843    }
    4944    [Storable(Name = "weights")]
    5045    private IList<double> __persistedWeights {
    5146      set {
    52         this.isCompatibilityLoaded = true;
    5347        this.weights.Clear();
    5448        foreach (var w in value) this.weights.Add(w);
    5549      }
    56       get { if (this.isCompatibilityLoaded) return weights; else return null; }
     50      get { return weights; }
    5751    }
    58     #endregion
    5952
    6053    public override IEnumerable<string> VariablesUsedForPrediction {
     
    7770      this.weights = new List<double>(original.weights);
    7871      this.models = new List<IRegressionModel>(original.models.Select(m => cloner.Clone(m)));
    79       this.isCompatibilityLoaded = original.isCompatibilityLoaded;
    8072    }
    81     [Obsolete("The constructor of GBTModel should not be used directly anymore (use GBTModelSurrogate instead)")]
     73
    8274    internal GradientBoostedTreesModel(IEnumerable<IRegressionModel> models, IEnumerable<double> weights)
    8375      : base(string.Empty, "Gradient boosted tree model", string.Empty) {
  • stable/HeuristicLab.Algorithms.DataAnalysis/3.4/GradientBoostedTrees/GradientBoostedTreesModelSurrogate.cs

    r17097 r17156  
    7575      : base(original, cloner) {
    7676      IGradientBoostedTreesModel clonedModel = null;
    77       if (original.ActualModel != null) clonedModel = cloner.Clone(original.ActualModel);
     77      if (original.actualModel.IsValueCreated) clonedModel = cloner.Clone(original.ActualModel);
    7878      actualModel = new Lazy<IGradientBoostedTreesModel>(CreateLazyInitFunc(clonedModel)); // only capture clonedModel in the closure
    7979
     
    9090    private Func<IGradientBoostedTreesModel> CreateLazyInitFunc(IGradientBoostedTreesModel clonedModel) {
    9191      return () => {
    92         return clonedModel == null ? RecalculateModel() : clonedModel;
     92        return clonedModel ?? RecalculateModel();
    9393      };
    9494    }
    9595
    9696    // create only the surrogate model without an actual model
    97     public GradientBoostedTreesModelSurrogate(IRegressionProblemData trainingProblemData, uint seed,
     97    private GradientBoostedTreesModelSurrogate(IRegressionProblemData trainingProblemData, uint seed,
    9898      ILossFunction lossFunction, int iterations, int maxSize, double r, double m, double nu)
    9999      : base(trainingProblemData.TargetVariable, "Gradient boosted tree model", string.Empty) {
     
    106106      this.m = m;
    107107      this.nu = nu;
     108
     109      actualModel = new Lazy<IGradientBoostedTreesModel>(() => RecalculateModel());
    108110    }
    109111
    110112    // wrap an actual model in a surrograte
    111     public GradientBoostedTreesModelSurrogate(IRegressionProblemData trainingProblemData, uint seed,
    112       ILossFunction lossFunction, int iterations, int maxSize, double r, double m, double nu,
    113       IGradientBoostedTreesModel model)
     113    public GradientBoostedTreesModelSurrogate(IGradientBoostedTreesModel model, IRegressionProblemData trainingProblemData, uint seed,
     114      ILossFunction lossFunction, int iterations, int maxSize, double r, double m, double nu)
    114115      : this(trainingProblemData, seed, lossFunction, iterations, maxSize, r, m, nu) {
    115116      actualModel = new Lazy<IGradientBoostedTreesModel>(() => model);
  • stable/HeuristicLab.Algorithms.DataAnalysis/3.4/GradientBoostedTrees/ModelCreation.cs

    r17030 r17156  
    2727  /// <summary>
    2828  /// Defines what part of the Model should be stored.
    29   /// Quality - only the resulting Quality will be stored.
    30   /// Parameter - only the parameters will be stored, the models can be calculated afterwards
    31   /// Complete - the complete model will be stored (consider the amount of memory needed)
     29  /// QualityOnly - no solution will be created.
     30  /// SurrogateModel - only the parameters will be stored, the model is calculated during deserialization
     31  /// Model - the complete model will be stored (consider the amount of memory needed)
    3232  /// </summary>
    3333  [StorableType("EE55C357-C4B3-4662-B40B-D1D06A851809")]
  • stable/HeuristicLab.Algorithms.DataAnalysis/3.4/HeuristicLab.Algorithms.DataAnalysis-3.4.csproj

    r17105 r17156  
    217217    <Compile Include="GradientBoostedTrees\LossFunctions\SquaredErrorLoss.cs" />
    218218    <Compile Include="GradientBoostedTrees\GradientBoostedTreesSolution.cs" />
     219    <Compile Include="GradientBoostedTrees\ModelCreation.cs" />
    219220    <Compile Include="GradientBoostedTrees\RegressionTreeBuilder.cs" />
    220221    <Compile Include="GradientBoostedTrees\RegressionTreeModel.cs" />
  • stable/HeuristicLab.Tests

  • stable/HeuristicLab.Tests/HeuristicLab.Algorithms.DataAnalysis-3.4/GradientBoostingTest.cs

    r15292 r17156  
    237237      gbt.Iterations = 5000;
    238238      gbt.MaxSize = 20;
    239       gbt.CreateSolution = false;
     239      gbt.ModelCreation = GradientBoostedTrees.ModelCreation.QualityOnly;
    240240      #endregion
    241241
     
    265265      gbt.Nu = 0.02;
    266266      gbt.LossFunctionParameter.Value = gbt.LossFunctionParameter.ValidValues.First(l => l.ToString().Contains("Absolute"));
    267       gbt.CreateSolution = false;
     267      gbt.ModelCreation = GradientBoostedTrees.ModelCreation.QualityOnly;
    268268      #endregion
    269269
     
    293293      gbt.Nu = 0.005;
    294294      gbt.LossFunctionParameter.Value = gbt.LossFunctionParameter.ValidValues.First(l => l.ToString().Contains("Relative"));
    295       gbt.CreateSolution = false;
     295      gbt.ModelCreation = GradientBoostedTrees.ModelCreation.QualityOnly;
    296296      #endregion
    297297
Note: See TracChangeset for help on using the changeset viewer.