#region License Information /* HeuristicLab * Copyright (C) 2002-2017 Heuristic and Evolutionary Algorithms Laboratory (HEAL) * * This file is part of HeuristicLab. * * HeuristicLab is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation, either version 3 of the License, or * (at your option) any later version. * * HeuristicLab is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with HeuristicLab. If not, see . */ #endregion using System; using System.Collections.Generic; using System.Linq; using System.Threading; using HeuristicLab.Common; using HeuristicLab.Core; using HeuristicLab.Data; using HeuristicLab.Parameters; using HeuristicLab.Persistence.Default.CompositeSerializers.Storable; using HeuristicLab.Problems.DataAnalysis; namespace HeuristicLab.Algorithms.DataAnalysis { [StorableClass] [Item("ComponentReductionLinearLeaf", "A leaf type that uses principle component analysis to create smaller linear models as leaf models")] public class ComponentReductionLinearLeaf : ParameterizedNamedItem, ILeafType { public const string NoComponentsParameterName = "NoComponents"; public IFixedValueParameter NoComponentsParameter { get { return Parameters[NoComponentsParameterName] as IFixedValueParameter; } } public int NoComponents { get { return NoComponentsParameter.Value.Value; } } #region Constructors & Cloning [StorableConstructor] private ComponentReductionLinearLeaf(bool deserializing) : base(deserializing) { } private ComponentReductionLinearLeaf(ComponentReductionLinearLeaf original, Cloner cloner) : base(original, cloner) { } public ComponentReductionLinearLeaf() { Parameters.Add(new FixedValueParameter(NoComponentsParameterName, "The maximum number of principle components used", new IntValue(10))); } public override IDeepCloneable Clone(Cloner cloner) { return new ComponentReductionLinearLeaf(this, cloner); } #endregion #region IModelType public IConfidenceRegressionModel BuildModel(IRegressionProblemData pd, IRandom random, CancellationToken cancellation, out int noParameters) { var pca = PrincipleComponentTransformation.CreateProjection(pd.Dataset, pd.TrainingIndices, pd.AllowedInputVariables, true); var pcdata = pca.TransformProblemData(pd); ComponentReducedLinearModel bestModel = null; var bestCvrmse = double.MaxValue; noParameters = 1; for (var i = 1; i <= Math.Min(NoComponents, pd.AllowedInputVariables.Count()); i++) { var pd2 = (IRegressionProblemData) pcdata.Clone(); var inputs = new HashSet(pca.ComponentNames.Take(i)); foreach (var v in pd2.InputVariables.CheckedItems.ToArray()) pd2.InputVariables.SetItemCheckedState(v.Value, inputs.Contains(v.Value.Value)); double cvRmse; double rmse; var model = PreconstructedLinearModel.CreateConfidenceLinearModel(pd2, out rmse, out cvRmse); if (cvRmse > bestCvrmse) continue; bestModel = new ComponentReducedLinearModel(pd2.TargetVariable, model, pca); noParameters = i + 1; bestCvrmse = cvRmse; } return bestModel; } public int MinLeafSize(IRegressionProblemData pd) { return NoComponents + 2; } #endregion } }