source: branches/2847_M5Regression/HeuristicLab.Algorithms.DataAnalysis/3.4/M5Regression/LeafTypes/ComponentReductionLinearLeaf.cs @ 16847

Last change on this file since 16847 was 16847, checked in by gkronber, 2 months ago

#2847: made some minor changes while reviewing

File size: 3.9 KB
Line 
1#region License Information
2/* HeuristicLab
3 * Copyright (C) 2002-2017 Heuristic and Evolutionary Algorithms Laboratory (HEAL)
4 *
5 * This file is part of HeuristicLab.
6 *
7 * HeuristicLab is free software: you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation, either version 3 of the License, or
10 * (at your option) any later version.
11 *
12 * HeuristicLab is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15 * GNU General Public License for more details.
16 *
17 * You should have received a copy of the GNU General Public License
18 * along with HeuristicLab. If not, see <http://www.gnu.org/licenses/>.
19 */
20#endregion
21
22using System;
23using System.Collections.Generic;
24using System.Linq;
25using System.Threading;
26using HeuristicLab.Common;
27using HeuristicLab.Core;
28using HeuristicLab.Data;
29using HeuristicLab.Parameters;
30using HeuristicLab.Problems.DataAnalysis;
31using HEAL.Attic;
32
33namespace HeuristicLab.Algorithms.DataAnalysis {
34  [StorableType("5730B54C-7A8B-4CA7-8F37-7FF3F9848CD2")]
35  [Item("ComponentReductionLinearLeaf", "A leaf type that uses principle component analysis to create smaller linear models as leaf models")]
36  public class ComponentReductionLinearLeaf : LeafBase {
37    public const string NumberOfComponentsParameterName = "NoComponents";
38    public IFixedValueParameter<IntValue> NumberOfCompontentsParameter {
39      get { return (IFixedValueParameter<IntValue>)Parameters[NumberOfComponentsParameterName]; }
40    }
41    public int NumberOfComponents {
42      get { return NumberOfCompontentsParameter.Value.Value; }
43      set { NumberOfCompontentsParameter.Value.Value = value; }
44    }
45
46    #region Constructors & Cloning
47    [StorableConstructor]
48    protected ComponentReductionLinearLeaf(StorableConstructorFlag _) : base(_) { }
49    protected ComponentReductionLinearLeaf(ComponentReductionLinearLeaf original, Cloner cloner) : base(original, cloner) { }
50    public ComponentReductionLinearLeaf() {
51      Parameters.Add(new FixedValueParameter<IntValue>(NumberOfComponentsParameterName, "The maximum number of principle components used (default=10)", new IntValue(10)));
52    }
53    public override IDeepCloneable Clone(Cloner cloner) {
54      return new ComponentReductionLinearLeaf(this, cloner);
55    }
56    #endregion
57
58    #region IModelType
59    public override bool ProvidesConfidence {
60      get { return false; }
61    }
62
63    public override IRegressionModel Build(IRegressionProblemData pd, IRandom random,
64      CancellationToken cancellationToken, out int numberOfParameters) {
65      var pca = PrincipleComponentTransformation.CreateProjection(pd.Dataset, pd.TrainingIndices, pd.AllowedInputVariables, normalize: true);
66      var pcdata = pca.TransformProblemData(pd);
67      ComponentReducedLinearModel bestModel = null;
68      var bestCvrmse = double.MaxValue;
69      numberOfParameters = 1;
70      for (var i = 1; i <= Math.Min(NumberOfComponents, pd.AllowedInputVariables.Count()); i++) {
71        var pd2 = (IRegressionProblemData)pcdata.Clone();
72        var inputs = new HashSet<string>(pca.ComponentNames.Take(i));
73        foreach (var v in pd2.InputVariables.CheckedItems.ToArray())
74          pd2.InputVariables.SetItemCheckedState(v.Value, inputs.Contains(v.Value.Value));
75        double rmse;
76        var model = PreconstructedLinearModel.CreateLinearModel(pd2, out rmse);
77        if (rmse > bestCvrmse) continue;
78        bestModel = new ComponentReducedLinearModel(pd2.TargetVariable, model, pca);
79        numberOfParameters = i + 1;
80        bestCvrmse = rmse;
81      }
82      return bestModel;
83    }
84
85    public override int MinLeafSize(IRegressionProblemData pd) {
86      return NumberOfComponents + 2;
87    }
88    #endregion
89  }
90}
Note: See TracBrowser for help on using the repository browser.