Free cookie consent management tool by TermsFeed Policy Generator

source: branches/M5Regression/HeuristicLab.Algorithms.DataAnalysis/3.4/M5Regression/Pruning/PruningBase.cs @ 15470

Last change on this file since 15470 was 15470, checked in by bwerth, 5 years ago

#2847 worked on M5Regression

File size: 3.7 KB
Line 
1#region License Information
2/* HeuristicLab
3 * Copyright (C) 2002-2017 Heuristic and Evolutionary Algorithms Laboratory (HEAL)
4 *
5 * This file is part of HeuristicLab.
6 *
7 * HeuristicLab is free software: you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation, either version 3 of the License, or
10 * (at your option) any later version.
11 *
12 * HeuristicLab is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15 * GNU General Public License for more details.
16 *
17 * You should have received a copy of the GNU General Public License
18 * along with HeuristicLab. If not, see <http://www.gnu.org/licenses/>.
19 */
20#endregion
21
22using System.Collections.Generic;
23using System.Linq;
24using HeuristicLab.Common;
25using HeuristicLab.Core;
26using HeuristicLab.Data;
27using HeuristicLab.Parameters;
28using HeuristicLab.Persistence.Default.CompositeSerializers.Storable;
29using HeuristicLab.Problems.DataAnalysis;
30
31namespace HeuristicLab.Algorithms.DataAnalysis {
32  public abstract class PruningBase : ParameterizedNamedItem, IPruningType {
33    private const string PruningStrengthParameterName = "PruningStrength";
34
35    public IFixedValueParameter<DoubleValue> PruningStrengthParameter {
36      get { return Parameters[PruningStrengthParameterName] as IFixedValueParameter<DoubleValue>; }
37    }
38
39    public double PruningStrength {
40      get { return PruningStrengthParameter.Value.Value; }
41    }
42
43    #region Constructors & Cloning
44    [StorableConstructor]
45    protected PruningBase(bool deserializing) : base(deserializing) { }
46    protected PruningBase(PruningBase original, Cloner cloner) : base(original, cloner) { }
47    protected PruningBase() {
48      Parameters.Add(new FixedValueParameter<DoubleValue>(PruningStrengthParameterName, "The strength of the pruning. Higher values force the algorithm to create simpler models", new DoubleValue(2.0)));
49    }
50    #endregion
51
52    #region IPruningType
53    public abstract ILeafType<IRegressionModel> ModelType(ILeafType<IRegressionModel> leafType);
54    public abstract void GenerateHoldOutSet(IReadOnlyList<int> allrows, IRandom random, out IReadOnlyList<int> training, out IReadOnlyList<int> holdout);
55    internal virtual bool Prune(M5NodeModel node, M5CreationParameters m5CreationParams, IReadOnlyList<int> testRows, double globalStdDev) {
56      if (testRows.Count == 0) return true;
57      var vars = m5CreationParams.AllowedInputVariables.Concat(new[] {m5CreationParams.TargetVariable}).ToArray();
58      var reducedData = new Dataset(vars, vars.Select(x => m5CreationParams.Data.GetDoubleValues(x, testRows).ToList()));
59      var pd = new RegressionProblemData(reducedData, m5CreationParams.AllowedInputVariables, m5CreationParams.TargetVariable);
60      pd.TrainingPartition.Start = pd.TrainingPartition.End = pd.TestPartition.Start = 0;
61      pd.TestPartition.End = reducedData.Rows;
62
63      var rmsModel = node.NodeModel.CreateRegressionSolution(pd).TestRootMeanSquaredError;
64      var rmsSubTree = node.CreateRegressionSolution(pd).TestRootMeanSquaredError;
65
66      var adjustedRmsModel = rmsModel * PruningFactor(pd.Dataset.Rows, node.NodeModelParams);
67      var adjustedRmsTree = rmsSubTree * PruningFactor(pd.Dataset.Rows, node.Left.NumParam + node.Right.NumParam + 1);
68      return adjustedRmsModel <= adjustedRmsTree || adjustedRmsModel < globalStdDev * 0.0001;
69    }
70    #endregion
71
72    private double PruningFactor(int noInstances, int noParams) {
73      return noInstances <= noParams ? 10.0 : (noInstances + PruningStrength * noParams) / (noInstances - noParams);
74    }
75  }
76}
Note: See TracBrowser for help on using the repository browser.