source: branches/M5Regression/HeuristicLab.Algorithms.DataAnalysis/3.4/M5Regression/LeafTypes/LeafBase.cs @ 15967

Last change on this file since 15967 was 15967, checked in by bwerth, 12 months ago

#2847 added logistic dampening and some minor changes

File size: 7.0 KB
Line 
1#region License Information
2/* HeuristicLab
3 * Copyright (C) 2002-2017 Heuristic and Evolutionary Algorithms Laboratory (HEAL)
4 *
5 * This file is part of HeuristicLab.
6 *
7 * HeuristicLab is free software: you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation, either version 3 of the License, or
10 * (at your option) any later version.
11 *
12 * HeuristicLab is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15 * GNU General Public License for more details.
16 *
17 * You should have received a copy of the GNU General Public License
18 * along with HeuristicLab. If not, see <http://www.gnu.org/licenses/>.
19 */
20#endregion
21
22using System;
23using System.Collections.Generic;
24using System.Linq;
25using System.Threading;
26using HeuristicLab.Common;
27using HeuristicLab.Core;
28using HeuristicLab.Data;
29using HeuristicLab.Parameters;
30using HeuristicLab.Persistence.Default.CompositeSerializers.Storable;
31using HeuristicLab.Problems.DataAnalysis;
32
33namespace HeuristicLab.Algorithms.DataAnalysis {
34  [StorableClass]
35  public abstract class LeafBase : ParameterizedNamedItem, ILeafModel {
36    public const string LeafBuildingStateVariableName = "LeafBuildingState";
37    public const string UseDampeningParameterName = "UseDampening";
38    private const string DampeningParameterName = "DampeningStrenght";
39
40    public IFixedValueParameter<DoubleValue> DampeningParameter {
41      get { return Parameters[DampeningParameterName] as IFixedValueParameter<DoubleValue>; }
42    }
43    public IFixedValueParameter<BoolValue> UseDampeningParameter {
44      get { return (IFixedValueParameter<BoolValue>)Parameters[UseDampeningParameterName]; }
45    }
46
47    public bool UseDampening {
48      get { return UseDampeningParameter.Value.Value; }
49    }
50    public double Dampening {
51      get { return DampeningParameter.Value.Value; }
52    }
53
54    #region Constructors & Cloning
55    [StorableConstructor]
56    protected LeafBase(bool deserializing) : base(deserializing) { }
57    protected LeafBase(LeafBase original, Cloner cloner) : base(original, cloner) { }
58    protected LeafBase() {
59      Parameters.Add(new FixedValueParameter<BoolValue>(UseDampeningParameterName, "Whether logistic dampening should be used to prevent extreme extrapolation", new BoolValue(false)));
60      Parameters.Add(new FixedValueParameter<DoubleValue>(DampeningParameterName, "Determines the strenght of the logistic dampening. Must be > 0.0. Larger numbers make more conservative predictions.", new DoubleValue(1.5)));
61    }
62    #endregion
63
64    #region IModelType
65    public abstract bool ProvidesConfidence { get; }
66    public abstract int MinLeafSize(IRegressionProblemData pd);
67    public void Initialize(IScope states) {
68      states.Variables.Add(new Variable(LeafBuildingStateVariableName, new LeafBuildingState()));
69    }
70    public void Build(RegressionNodeTreeModel tree, IReadOnlyList<int> trainingRows, IScope stateScope, CancellationToken cancellationToken) {
71      var parameters = (RegressionTreeParameters)stateScope.Variables[M5Regression.RegressionTreeParameterVariableName].Value;
72      var state = (LeafBuildingState)stateScope.Variables[LeafBuildingStateVariableName].Value;
73
74      if (state.Code == 0) {
75        state.FillLeafs(tree, trainingRows, parameters.Data);
76        state.Code = 1;
77      }
78      while (state.nodeQueue.Count != 0) {
79        var n = state.nodeQueue.Peek();
80        var t = state.trainingRowsQueue.Peek();
81        int numP;
82        n.SetLeafModel(BuildModel(t, parameters, cancellationToken, out numP));
83        state.nodeQueue.Dequeue();
84        state.trainingRowsQueue.Dequeue();
85      }
86    }
87
88    public IRegressionModel BuildModel(IReadOnlyList<int> rows, RegressionTreeParameters parameters, CancellationToken cancellation, out int numParams) {
89      var reducedData = RegressionTreeUtilities.ReduceDataset(parameters.Data, rows, parameters.AllowedInputVariables.ToArray(), parameters.TargetVariable);
90      var pd = new RegressionProblemData(reducedData, parameters.AllowedInputVariables.ToArray(), parameters.TargetVariable);
91      pd.TrainingPartition.Start = 0;
92      pd.TrainingPartition.End = pd.TestPartition.Start = pd.TestPartition.End = reducedData.Rows;
93
94      int numP;
95      var model = Build(pd, parameters.Random, cancellation, out numP);
96      if (UseDampening && Dampening > 0.0) {
97        model = DampenedModel.DampenModel(model, pd, Dampening);
98      }
99
100      numParams = numP;
101      cancellation.ThrowIfCancellationRequested();
102      return model;
103    }
104
105    public abstract IRegressionModel Build(IRegressionProblemData pd, IRandom random, CancellationToken cancellationToken, out int noParameters);
106    #endregion
107
108    [StorableClass]
109    public class LeafBuildingState : Item {
110      [Storable]
111      public Queue<RegressionNodeModel> nodeQueue = new Queue<RegressionNodeModel>();
112      [Storable]
113      public Queue<IReadOnlyList<int>> trainingRowsQueue = new Queue<IReadOnlyList<int>>();
114
115      //State.Code values denote the current action (for pausing)
116      //0...nothing has been done;
117      //1...building Models;
118      [Storable]
119      public int Code = 0;
120
121      #region HLConstructors & Cloning
122      [StorableConstructor]
123      protected LeafBuildingState(bool deserializing) : base(deserializing) { }
124      protected LeafBuildingState(LeafBuildingState original, Cloner cloner) : base(original, cloner) {
125        nodeQueue = new Queue<RegressionNodeModel>(original.nodeQueue.Select(cloner.Clone));
126        trainingRowsQueue = new Queue<IReadOnlyList<int>>(original.trainingRowsQueue.Select(x => (IReadOnlyList<int>)x.ToArray()));
127        Code = original.Code;
128      }
129      public LeafBuildingState() { }
130      public override IDeepCloneable Clone(Cloner cloner) {
131        return new LeafBuildingState(this, cloner);
132      }
133      #endregion
134
135      public void FillLeafs(RegressionNodeTreeModel tree, IReadOnlyList<int> trainingRows, IDataset data) {
136        var helperQueue = new Queue<RegressionNodeModel>();
137        var trainingHelperQueue = new Queue<IReadOnlyList<int>>();
138        nodeQueue.Clear();
139        trainingRowsQueue.Clear();
140
141        helperQueue.Enqueue(tree.Root);
142        trainingHelperQueue.Enqueue(trainingRows);
143
144        while (helperQueue.Count != 0) {
145          var n = helperQueue.Dequeue();
146          var t = trainingHelperQueue.Dequeue();
147          if (n.IsLeaf) {
148            nodeQueue.Enqueue(n);
149            trainingRowsQueue.Enqueue(t);
150            continue;
151          }
152
153          IReadOnlyList<int> leftTraining, rightTraining;
154          RegressionTreeUtilities.SplitRows(t, data, n.SplitAttribute, n.SplitValue, out leftTraining, out rightTraining);
155
156          helperQueue.Enqueue(n.Left);
157          helperQueue.Enqueue(n.Right);
158          trainingHelperQueue.Enqueue(leftTraining);
159          trainingHelperQueue.Enqueue(rightTraining);
160        }
161      }
162    }
163  }
164}
Note: See TracBrowser for help on using the repository browser.