source: branches/M5Regression/HeuristicLab.Algorithms.DataAnalysis/3.4/M5Regression/MetaModels/RegressionNodeModel.cs @ 16069

Last change on this file since 16069 was 16069, checked in by bwerth, 13 months ago

#2847 fixed serialization bug

File size: 7.4 KB
Line 
1#region License Information
2/* HeuristicLab
3 * Copyright (C) 2002-2017 Heuristic and Evolutionary Algorithms Laboratory (HEAL)
4 *
5 * This file is part of HeuristicLab.
6 *
7 * HeuristicLab is free software: you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation, either version 3 of the License, or
10 * (at your option) any later version.
11 *
12 * HeuristicLab is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15 * GNU General Public License for more details.
16 *
17 * You should have received a copy of the GNU General Public License
18 * along with HeuristicLab. If not, see <http://www.gnu.org/licenses/>.
19 */
20#endregion
21
22using System;
23using System.Collections.Generic;
24using System.Linq;
25using HeuristicLab.Common;
26using HeuristicLab.Persistence.Default.CompositeSerializers.Storable;
27using HeuristicLab.Problems.DataAnalysis;
28
29namespace HeuristicLab.Algorithms.DataAnalysis {
30  [StorableClass]
31  public class RegressionNodeModel : RegressionModel {
32    #region Properties
33    public double PruningStrength = double.NaN;
34    private IReadOnlyList<string> Variables {
35      get {
36        if (IsLeaf && Model == null) return new List<string>();
37        if (IsLeaf) return Model.VariablesUsedForPrediction.ToList();
38        var set = new HashSet<string> {SplitAttribute};
39        var vl = Left.Variables;
40        var vr = Right.Variables;
41        for (var i = 0; i < vl.Count; i++) set.Add(vl[i]);
42        for (var i = 0; i < vr.Count; i++) set.Add(vr[i]);
43        return set.ToList();
44      }
45    }
46    [Storable]
47    internal int NumSamples { get; private set; }
48    [Storable]
49    internal bool IsLeaf { get; private set; }
50    [Storable]
51    private IRegressionModel Model { get; set; }
52
53    [Storable]
54    public string SplitAttribute { get; private set; }
55    [Storable]
56    public double SplitValue { get; private set; }
57    [Storable]
58    public RegressionNodeModel Left { get; private set; }
59    [Storable]
60    public RegressionNodeModel Right { get; private set; }
61    [Storable]
62    public RegressionNodeModel Parent { get; private set; }
63    #endregion
64
65    #region HLConstructors
66    [StorableConstructor]
67    protected RegressionNodeModel(bool deserializing) : base(deserializing) { }
68    protected RegressionNodeModel(RegressionNodeModel original, Cloner cloner) : base(original, cloner) {
69      IsLeaf = original.IsLeaf;
70      Model = cloner.Clone(original.Model);
71      SplitValue = original.SplitValue;
72      SplitAttribute = original.SplitAttribute;
73      Left = cloner.Clone(original.Left);
74      Right = cloner.Clone(original.Right);
75      Parent = cloner.Clone(original.Parent);
76      NumSamples = original.NumSamples;
77    }
78    private RegressionNodeModel(string targetAttr) : base(targetAttr) {
79      IsLeaf = true;
80    }
81    private RegressionNodeModel(RegressionNodeModel parent) : this(parent.TargetVariable) {
82      Parent = parent;
83      IsLeaf = true;
84    }
85    public override IDeepCloneable Clone(Cloner cloner) {
86      return new RegressionNodeModel(this, cloner);
87    }
88    public static RegressionNodeModel CreateNode(string targetAttr, RegressionTreeParameters regressionTreeParams) {
89      return regressionTreeParams.LeafModel.ProvidesConfidence ? new ConfidenceRegressionNodeModel(targetAttr) : new RegressionNodeModel(targetAttr);
90    }
91    private static RegressionNodeModel CreateNode(RegressionNodeModel parent, RegressionTreeParameters regressionTreeParams) {
92      return regressionTreeParams.LeafModel.ProvidesConfidence ? new ConfidenceRegressionNodeModel(parent) : new RegressionNodeModel(parent);
93    }
94    #endregion
95
96    #region RegressionModel
97    public override IEnumerable<string> VariablesUsedForPrediction {
98      get { return Variables; }
99    }
100    public override IEnumerable<double> GetEstimatedValues(IDataset dataset, IEnumerable<int> rows) {
101      if (!IsLeaf) return rows.Select(row => GetEstimatedValue(dataset, row));
102      if (Model == null) throw new NotSupportedException("The model has not been built correctly");
103      return Model.GetEstimatedValues(dataset, rows);
104    }
105    public override IRegressionSolution CreateRegressionSolution(IRegressionProblemData problemData) {
106      return new RegressionSolution(this, problemData);
107    }
108    #endregion
109
110    internal void Split(RegressionTreeParameters regressionTreeParams, string splitAttribute, double splitValue, int numSamples) {
111      NumSamples = numSamples;
112      SplitAttribute = splitAttribute;
113      SplitValue = splitValue;
114      Left = CreateNode(this, regressionTreeParams);
115      Right = CreateNode(this, regressionTreeParams);
116      IsLeaf = false;
117    }
118
119    internal void ToLeaf() {
120      IsLeaf = true;
121      Right = null;
122      Left = null;
123    }
124
125    internal void SetLeafModel(IRegressionModel model) {
126      Model = model;
127    }
128
129    internal IEnumerable<RegressionNodeModel> EnumerateNodes() {
130      var queue = new Queue<RegressionNodeModel>();
131      queue.Enqueue(this);
132      while (queue.Count != 0) {
133        var cur = queue.Dequeue();
134        yield return cur;
135        if (cur.Left == null && cur.Right == null) continue;
136        if (cur.Left != null) queue.Enqueue(cur.Left);
137        if (cur.Right != null) queue.Enqueue(cur.Right);
138      }
139    }
140
141    #region Helpers
142    private double GetEstimatedValue(IDataset dataset, int row) {
143      if (!IsLeaf) return (dataset.GetDoubleValue(SplitAttribute, row) <= SplitValue ? Left : Right).GetEstimatedValue(dataset, row);
144      if (Model == null) throw new NotSupportedException("The model has not been built correctly");
145      return Model.GetEstimatedValues(dataset, new[] {row}).First();
146    }
147    #endregion
148
149    [StorableClass]
150    private sealed class ConfidenceRegressionNodeModel : RegressionNodeModel, IConfidenceRegressionModel {
151      #region HLConstructors
152      [StorableConstructor]
153      private ConfidenceRegressionNodeModel(bool deserializing) : base(deserializing) { }
154      private ConfidenceRegressionNodeModel(ConfidenceRegressionNodeModel original, Cloner cloner) : base(original, cloner) { }
155      public ConfidenceRegressionNodeModel(string targetAttr) : base(targetAttr) { }
156      public ConfidenceRegressionNodeModel(RegressionNodeModel parent) : base(parent) { }
157      public override IDeepCloneable Clone(Cloner cloner) {
158        return new ConfidenceRegressionNodeModel(this, cloner);
159      }
160      #endregion
161
162      public IEnumerable<double> GetEstimatedVariances(IDataset dataset, IEnumerable<int> rows) {
163        return IsLeaf ? ((IConfidenceRegressionModel)Model).GetEstimatedVariances(dataset, rows) : rows.Select(row => GetEstimatedVariance(dataset, row));
164      }
165
166      private double GetEstimatedVariance(IDataset dataset, int row) {
167        return !IsLeaf ? ((IConfidenceRegressionModel)(dataset.GetDoubleValue(SplitAttribute, row) <= SplitValue ? Left : Right)).GetEstimatedVariances(dataset, row.ToEnumerable()).Single() : ((IConfidenceRegressionModel)Model).GetEstimatedVariances(dataset, new[] {row}).First();
168      }
169
170      public override IRegressionSolution CreateRegressionSolution(IRegressionProblemData problemData) {
171        return new ConfidenceRegressionSolution(this, problemData);
172      }
173    }
174  }
175}
Note: See TracBrowser for help on using the repository browser.