#region License Information
/* HeuristicLab
* Copyright (C) 2002-2017 Heuristic and Evolutionary Algorithms Laboratory (HEAL)
*
* This file is part of HeuristicLab.
*
* HeuristicLab is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* HeuristicLab is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with HeuristicLab. If not, see .
*/
#endregion
using System;
using System.Collections.Generic;
using System.Linq;
using System.Threading;
using HeuristicLab.Common;
using HeuristicLab.Core;
using HeuristicLab.Data;
using HeuristicLab.Parameters;
using HeuristicLab.Problems.DataAnalysis;
using HEAL.Attic;
namespace HeuristicLab.Algorithms.DataAnalysis {
[StorableType("5730B54C-7A8B-4CA7-8F37-7FF3F9848CD2")]
[Item("ComponentReductionLinearLeaf", "A leaf type that uses principle component analysis to create smaller linear models as leaf models")]
public class ComponentReductionLinearLeaf : LeafBase {
public const string NumberOfComponentsParameterName = "NoComponents";
public IFixedValueParameter NumberOfCompontentsParameter {
get { return (IFixedValueParameter)Parameters[NumberOfComponentsParameterName]; }
}
public int NumberOfComponents {
get { return NumberOfCompontentsParameter.Value.Value; }
set { NumberOfCompontentsParameter.Value.Value = value; }
}
#region Constructors & Cloning
[StorableConstructor]
protected ComponentReductionLinearLeaf(StorableConstructorFlag _) : base(_) { }
protected ComponentReductionLinearLeaf(ComponentReductionLinearLeaf original, Cloner cloner) : base(original, cloner) { }
public ComponentReductionLinearLeaf() {
Parameters.Add(new FixedValueParameter(NumberOfComponentsParameterName, "The maximum number of principle components used (default=10)", new IntValue(10)));
}
public override IDeepCloneable Clone(Cloner cloner) {
return new ComponentReductionLinearLeaf(this, cloner);
}
#endregion
#region IModelType
public override bool ProvidesConfidence {
get { return false; }
}
public override IRegressionModel Build(IRegressionProblemData pd, IRandom random,
CancellationToken cancellationToken, out int numberOfParameters) {
var pca = PrincipleComponentTransformation.CreateProjection(pd.Dataset, pd.TrainingIndices, pd.AllowedInputVariables, normalize: true);
var pcdata = pca.TransformProblemData(pd);
ComponentReducedLinearModel bestModel = null;
var bestCvrmse = double.MaxValue;
numberOfParameters = 1;
for (var i = 1; i <= Math.Min(NumberOfComponents, pd.AllowedInputVariables.Count()); i++) {
var pd2 = (IRegressionProblemData)pcdata.Clone();
var inputs = new HashSet(pca.ComponentNames.Take(i));
foreach (var v in pd2.InputVariables.CheckedItems.ToArray())
pd2.InputVariables.SetItemCheckedState(v.Value, inputs.Contains(v.Value.Value));
double rmse;
var model = PreconstructedLinearModel.CreateLinearModel(pd2, out rmse);
if (rmse > bestCvrmse) continue;
bestModel = new ComponentReducedLinearModel(pd2.TargetVariable, model, pca);
numberOfParameters = i + 1;
bestCvrmse = rmse;
}
return bestModel;
}
public override int MinLeafSize(IRegressionProblemData pd) {
return NumberOfComponents + 2;
}
#endregion
}
}