Free cookie consent management tool by TermsFeed Policy Generator

source: branches/M5Regression/HeuristicLab.Algorithms.DataAnalysis/3.4/M5Regression/LeafTypes/ComponentReductionLinearLeaf.cs @ 15430

Last change on this file since 15430 was 15430, checked in by bwerth, 6 years ago

#2847 first implementation of M5'-regression

File size: 3.7 KB
Line 
1#region License Information
2/* HeuristicLab
3 * Copyright (C) 2002-2017 Heuristic and Evolutionary Algorithms Laboratory (HEAL)
4 *
5 * This file is part of HeuristicLab.
6 *
7 * HeuristicLab is free software: you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation, either version 3 of the License, or
10 * (at your option) any later version.
11 *
12 * HeuristicLab is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15 * GNU General Public License for more details.
16 *
17 * You should have received a copy of the GNU General Public License
18 * along with HeuristicLab. If not, see <http://www.gnu.org/licenses/>.
19 */
20#endregion
21
22using System;
23using System.Collections.Generic;
24using System.Linq;
25using System.Threading;
26using HeuristicLab.Common;
27using HeuristicLab.Core;
28using HeuristicLab.Data;
29using HeuristicLab.Parameters;
30using HeuristicLab.Persistence.Default.CompositeSerializers.Storable;
31using HeuristicLab.Problems.DataAnalysis;
32
33namespace HeuristicLab.Algorithms.DataAnalysis {
34  [StorableClass]
35  [Item("ComponentReductionLinearLeaf", "A leaf type that uses principle component analysis to create smaller linear models as leaf models")]
36  public class ComponentReductionLinearLeaf : ParameterizedNamedItem, ILeafType<IConfidenceRegressionModel> {
37    public const string NoComponentsParameterName = "NoComponents";
38    public IFixedValueParameter<IntValue> NoComponentsParameter {
39      get { return Parameters[NoComponentsParameterName] as IFixedValueParameter<IntValue>; }
40    }
41    public int NoComponents {
42      get { return NoComponentsParameter.Value.Value; }
43    }
44
45    #region Constructors & Cloning
46    [StorableConstructor]
47    private ComponentReductionLinearLeaf(bool deserializing) : base(deserializing) { }
48    private ComponentReductionLinearLeaf(ComponentReductionLinearLeaf original, Cloner cloner) : base(original, cloner) { }
49    public ComponentReductionLinearLeaf() {
50      Parameters.Add(new FixedValueParameter<IntValue>(NoComponentsParameterName, "The maximum number of principle components used", new IntValue(10)));
51    }
52    public override IDeepCloneable Clone(Cloner cloner) {
53      return new ComponentReductionLinearLeaf(this, cloner);
54    }
55    #endregion
56
57    #region IModelType
58    public IConfidenceRegressionModel BuildModel(IRegressionProblemData pd, IRandom random,
59      CancellationToken cancellation, out int noParameters) {
60      var pca = PrincipleComponentAnalysisStatic.Create(pd.Dataset, pd.TrainingIndices, pd.AllowedInputVariables, true);
61      var pcdata = pca.ProjectProblem(pd);
62      ComponentReducedLinearModel bestModel = null;
63      var bestCvrmse = double.MaxValue;
64      noParameters = 1;
65      for (var i = 1; i <= Math.Min(NoComponents, pd.AllowedInputVariables.Count()); i++) {
66        var pd2 = (IRegressionProblemData) pcdata.Clone();
67        var inputs = new HashSet<string>(pca.Names.Take(i));
68        foreach (var v in pd2.InputVariables.CheckedItems.ToArray())
69          pd2.InputVariables.SetItemCheckedState(v.Value, inputs.Contains(v.Value.Value));
70        double cvRmse;
71        double rmse;
72        var model = PreconstructedLinearModel.CreateConfidenceLinearModel(pd2, out rmse, out cvRmse);
73        if (cvRmse > bestCvrmse) continue;
74        bestModel = new ComponentReducedLinearModel(pd2.TargetVariable, model, pca);
75        noParameters = i + 1;
76        bestCvrmse = cvRmse;
77      }
78      return bestModel;
79    }
80
81    public int MinLeafSize(IRegressionProblemData pd) {
82      return NoComponents + 2;
83    }
84    #endregion
85  }
86}
Note: See TracBrowser for help on using the repository browser.