#region License Information /* HeuristicLab * Copyright (C) 2002-2008 Heuristic and Evolutionary Algorithms Laboratory (HEAL) * * This file is part of HeuristicLab. * * HeuristicLab is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation, either version 3 of the License, or * (at your option) any later version. * * HeuristicLab is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with HeuristicLab. If not, see . */ #endregion using System; using System.Collections.Generic; using System.Linq; using System.Text; using HeuristicLab.Core; using System.Xml; using System.Diagnostics; using HeuristicLab.DataAnalysis; using HeuristicLab.Data; using HeuristicLab.Operators; using HeuristicLab.GP.StructureIdentification; using HeuristicLab.Modeling; using HeuristicLab.GP; using HeuristicLab.Random; namespace HeuristicLab.LinearRegression { public class LinearRegression : ItemBase, IEditable, IAlgorithm { public string Name { get { return "LinearRegression"; } } public string Description { get { return "TODO"; } } private SequentialEngine.SequentialEngine engine; public IEngine Engine { get { return engine; } } public Dataset Dataset { get { return ProblemInjector.GetVariableValue("Dataset", null, false); } set { ProblemInjector.GetVariable("Dataset").Value = value; } } public int TargetVariable { get { return ProblemInjector.GetVariableValue("TargetVariable", null, false).Data; } set { ProblemInjector.GetVariableValue("TargetVariable", null, false).Data = value; } } public IOperator ProblemInjector { get { IOperator main = GetMainOperator(); return main.SubOperators[1]; } set { IOperator main = GetMainOperator(); main.RemoveSubOperator(1); main.AddSubOperator(value, 1); } } public IModel Model { get { if (!engine.Terminated) throw new InvalidOperationException("The algorithm is still running. Wait until the algorithm is terminated to retrieve the result."); IScope bestModelScope = engine.GlobalScope; return CreateLRModel(bestModelScope); } } public LinearRegression() { engine = new SequentialEngine.SequentialEngine(); CombinedOperator algo = CreateAlgorithm(); engine.OperatorGraph.AddOperator(algo); engine.OperatorGraph.InitialOperator = algo; } private CombinedOperator CreateAlgorithm() { CombinedOperator algo = new CombinedOperator(); SequentialProcessor seq = new SequentialProcessor(); algo.Name = "LinearRegression"; seq.Name = "LinearRegression"; var randomInjector = new RandomInjector(); randomInjector.Name = "Random Injector"; IOperator globalInjector = CreateGlobalInjector(); ProblemInjector problemInjector = new ProblemInjector(); problemInjector.GetVariableInfo("MaxNumberOfTrainingSamples").Local = true; problemInjector.AddVariable(new HeuristicLab.Core.Variable("MaxNumberOfTrainingSamples", new IntData(5000))); IOperator shuffler = new DatasetShuffler(); shuffler.GetVariableInfo("ShuffleStart").ActualName = "TrainingSamplesStart"; shuffler.GetVariableInfo("ShuffleEnd").ActualName = "TrainingSamplesEnd"; LinearRegressionOperator lrOperator = new LinearRegressionOperator(); lrOperator.GetVariableInfo("SamplesStart").ActualName = "ActualTrainingSamplesStart"; lrOperator.GetVariableInfo("SamplesEnd").ActualName = "ActualTrainingSamplesEnd"; seq.AddSubOperator(randomInjector); seq.AddSubOperator(problemInjector); seq.AddSubOperator(globalInjector); seq.AddSubOperator(shuffler); seq.AddSubOperator(lrOperator); seq.AddSubOperator(CreateModelAnalyser()); algo.OperatorGraph.InitialOperator = seq; algo.OperatorGraph.AddOperator(seq); return algo; } private IOperator CreateGlobalInjector() { VariableInjector injector = new VariableInjector(); injector.AddVariable(new HeuristicLab.Core.Variable("PunishmentFactor", new DoubleData(10))); injector.AddVariable(new HeuristicLab.Core.Variable("TotalEvaluatedNodes", new DoubleData(0))); injector.AddVariable(new HeuristicLab.Core.Variable("TreeEvaluator", new HL2TreeEvaluator())); injector.AddVariable(new HeuristicLab.Core.Variable("UseEstimatedTargetValue", new BoolData(false))); return injector; } private IOperator CreateModelAnalyser() { CombinedOperator modelAnalyser = new CombinedOperator(); modelAnalyser.Name = "Model Analyzer"; SequentialProcessor seqProc = new SequentialProcessor(); #region MSE MeanSquaredErrorEvaluator trainingMSE = new MeanSquaredErrorEvaluator(); trainingMSE.Name = "TrainingMseEvaluator"; trainingMSE.GetVariableInfo("FunctionTree").ActualName = "LinearRegressionModel"; trainingMSE.GetVariableInfo("MSE").ActualName = "TrainingQuality"; trainingMSE.GetVariableInfo("SamplesStart").ActualName = "ActualTrainingSamplesStart"; trainingMSE.GetVariableInfo("SamplesEnd").ActualName = "ActualTrainingSamplesEnd"; MeanSquaredErrorEvaluator validationMSE = new MeanSquaredErrorEvaluator(); validationMSE.Name = "ValidationMseEvaluator"; validationMSE.GetVariableInfo("FunctionTree").ActualName = "LinearRegressionModel"; validationMSE.GetVariableInfo("MSE").ActualName = "ValidationQuality"; validationMSE.GetVariableInfo("SamplesStart").ActualName = "ValidationSamplesStart"; validationMSE.GetVariableInfo("SamplesEnd").ActualName = "ValidationSamplesEnd"; MeanSquaredErrorEvaluator testMSE = new MeanSquaredErrorEvaluator(); testMSE.Name = "TestMseEvaluator"; testMSE.GetVariableInfo("FunctionTree").ActualName = "LinearRegressionModel"; testMSE.GetVariableInfo("MSE").ActualName = "TestQuality"; testMSE.GetVariableInfo("SamplesStart").ActualName = "TestSamplesStart"; testMSE.GetVariableInfo("SamplesEnd").ActualName = "TestSamplesEnd"; #endregion #region R2 CoefficientOfDeterminationEvaluator trainingR2 = new CoefficientOfDeterminationEvaluator(); trainingR2.Name = "TrainingR2Evaluator"; trainingR2.GetVariableInfo("FunctionTree").ActualName = "LinearRegressionModel"; trainingR2.GetVariableInfo("R2").ActualName = "TrainingR2"; trainingR2.GetVariableInfo("SamplesStart").ActualName = "ActualTrainingSamplesStart"; trainingR2.GetVariableInfo("SamplesEnd").ActualName = "ActualTrainingSamplesEnd"; CoefficientOfDeterminationEvaluator validationR2 = new CoefficientOfDeterminationEvaluator(); validationR2.Name = "ValidationR2Evaluator"; validationR2.GetVariableInfo("FunctionTree").ActualName = "LinearRegressionModel"; validationR2.GetVariableInfo("R2").ActualName = "ValidationR2"; validationR2.GetVariableInfo("SamplesStart").ActualName = "ValidationSamplesStart"; validationR2.GetVariableInfo("SamplesEnd").ActualName = "ValidationSamplesEnd"; CoefficientOfDeterminationEvaluator testR2 = new CoefficientOfDeterminationEvaluator(); testR2.Name = "TestR2Evaluator"; testR2.GetVariableInfo("FunctionTree").ActualName = "LinearRegressionModel"; testR2.GetVariableInfo("R2").ActualName = "TestR2"; testR2.GetVariableInfo("SamplesStart").ActualName = "TestSamplesStart"; testR2.GetVariableInfo("SamplesEnd").ActualName = "TestSamplesEnd"; #endregion #region MAPE MeanAbsolutePercentageErrorEvaluator trainingMAPE = new MeanAbsolutePercentageErrorEvaluator(); trainingMAPE.Name = "TrainingMapeEvaluator"; trainingMAPE.GetVariableInfo("FunctionTree").ActualName = "LinearRegressionModel"; trainingMAPE.GetVariableInfo("MAPE").ActualName = "TrainingMAPE"; trainingMAPE.GetVariableInfo("SamplesStart").ActualName = "ActualTrainingSamplesStart"; trainingMAPE.GetVariableInfo("SamplesEnd").ActualName = "ActualTrainingSamplesEnd"; MeanAbsolutePercentageErrorEvaluator validationMAPE = new MeanAbsolutePercentageErrorEvaluator(); validationMAPE.Name = "ValidationMapeEvaluator"; validationMAPE.GetVariableInfo("FunctionTree").ActualName = "LinearRegressionModel"; validationMAPE.GetVariableInfo("MAPE").ActualName = "ValidationMAPE"; validationMAPE.GetVariableInfo("SamplesStart").ActualName = "ValidationSamplesStart"; validationMAPE.GetVariableInfo("SamplesEnd").ActualName = "ValidationSamplesEnd"; MeanAbsolutePercentageErrorEvaluator testMAPE = new MeanAbsolutePercentageErrorEvaluator(); testMAPE.Name = "TestMapeEvaluator"; testMAPE.GetVariableInfo("FunctionTree").ActualName = "LinearRegressionModel"; testMAPE.GetVariableInfo("MAPE").ActualName = "TestMAPE"; testMAPE.GetVariableInfo("SamplesStart").ActualName = "TestSamplesStart"; testMAPE.GetVariableInfo("SamplesEnd").ActualName = "TestSamplesEnd"; #endregion #region MAPRE MeanAbsolutePercentageOfRangeErrorEvaluator trainingMAPRE = new MeanAbsolutePercentageOfRangeErrorEvaluator(); trainingMAPRE.Name = "TrainingMapreEvaluator"; trainingMAPRE.GetVariableInfo("FunctionTree").ActualName = "LinearRegressionModel"; trainingMAPRE.GetVariableInfo("MAPRE").ActualName = "TrainingMAPRE"; trainingMAPRE.GetVariableInfo("SamplesStart").ActualName = "ActualTrainingSamplesStart"; trainingMAPRE.GetVariableInfo("SamplesEnd").ActualName = "ActualTrainingSamplesEnd"; MeanAbsolutePercentageOfRangeErrorEvaluator validationMAPRE = new MeanAbsolutePercentageOfRangeErrorEvaluator(); validationMAPRE.Name = "ValidationMapreEvaluator"; validationMAPRE.GetVariableInfo("FunctionTree").ActualName = "LinearRegressionModel"; validationMAPRE.GetVariableInfo("MAPRE").ActualName = "ValidationMAPRE"; validationMAPRE.GetVariableInfo("SamplesStart").ActualName = "ValidationSamplesStart"; validationMAPRE.GetVariableInfo("SamplesEnd").ActualName = "ValidationSamplesEnd"; MeanAbsolutePercentageOfRangeErrorEvaluator testMAPRE = new MeanAbsolutePercentageOfRangeErrorEvaluator(); testMAPRE.Name = "TestMapreEvaluator"; testMAPRE.GetVariableInfo("FunctionTree").ActualName = "LinearRegressionModel"; testMAPRE.GetVariableInfo("MAPRE").ActualName = "TestMAPRE"; testMAPRE.GetVariableInfo("SamplesStart").ActualName = "TestSamplesStart"; testMAPRE.GetVariableInfo("SamplesEnd").ActualName = "TestSamplesEnd"; #endregion #region VAF VarianceAccountedForEvaluator trainingVAF = new VarianceAccountedForEvaluator(); trainingVAF.Name = "TrainingVafEvaluator"; trainingVAF.GetVariableInfo("FunctionTree").ActualName = "LinearRegressionModel"; trainingVAF.GetVariableInfo("VAF").ActualName = "TrainingVAF"; trainingVAF.GetVariableInfo("SamplesStart").ActualName = "ActualTrainingSamplesStart"; trainingVAF.GetVariableInfo("SamplesEnd").ActualName = "ActualTrainingSamplesEnd"; VarianceAccountedForEvaluator validationVAF = new VarianceAccountedForEvaluator(); validationVAF.Name = "ValidationVafEvaluator"; validationVAF.GetVariableInfo("FunctionTree").ActualName = "LinearRegressionModel"; validationVAF.GetVariableInfo("VAF").ActualName = "ValidationVAF"; validationVAF.GetVariableInfo("SamplesStart").ActualName = "ValidationSamplesStart"; validationVAF.GetVariableInfo("SamplesEnd").ActualName = "ValidationSamplesEnd"; VarianceAccountedForEvaluator testVAF = new VarianceAccountedForEvaluator(); testVAF.Name = "TestVafEvaluator"; testVAF.GetVariableInfo("FunctionTree").ActualName = "LinearRegressionModel"; testVAF.GetVariableInfo("VAF").ActualName = "TestVAF"; testVAF.GetVariableInfo("SamplesStart").ActualName = "TestSamplesStart"; testVAF.GetVariableInfo("SamplesEnd").ActualName = "TestSamplesEnd"; #endregion HeuristicLab.GP.StructureIdentification.VariableEvaluationImpactCalculator evalImpactCalc = new HeuristicLab.GP.StructureIdentification.VariableEvaluationImpactCalculator(); evalImpactCalc.GetVariableInfo("FunctionTree").ActualName = "LinearRegressionModel"; HeuristicLab.Modeling.VariableQualityImpactCalculator qualImpactCalc = new HeuristicLab.GP.StructureIdentification.VariableQualityImpactCalculator(); qualImpactCalc.GetVariableInfo("FunctionTree").ActualName = "LinearRegressionModel"; seqProc.AddSubOperator(trainingMSE); seqProc.AddSubOperator(validationMSE); seqProc.AddSubOperator(testMSE); seqProc.AddSubOperator(trainingR2); seqProc.AddSubOperator(validationR2); seqProc.AddSubOperator(testR2); seqProc.AddSubOperator(trainingMAPE); seqProc.AddSubOperator(validationMAPE); seqProc.AddSubOperator(testMAPE); seqProc.AddSubOperator(trainingMAPRE); seqProc.AddSubOperator(validationMAPRE); seqProc.AddSubOperator(testMAPRE); seqProc.AddSubOperator(trainingVAF); seqProc.AddSubOperator(validationVAF); seqProc.AddSubOperator(testVAF); seqProc.AddSubOperator(qualImpactCalc); seqProc.AddSubOperator(evalImpactCalc); modelAnalyser.OperatorGraph.InitialOperator = seqProc; modelAnalyser.OperatorGraph.AddOperator(seqProc); return modelAnalyser; } protected internal virtual Model CreateLRModel(IScope bestModelScope) { Model model = new Model(); model.TrainingMeanSquaredError = bestModelScope.GetVariableValue("TrainingQuality", false).Data; model.ValidationMeanSquaredError = bestModelScope.GetVariableValue("ValidationQuality", false).Data; model.TestMeanSquaredError = bestModelScope.GetVariableValue("TestQuality", false).Data; model.TrainingCoefficientOfDetermination = bestModelScope.GetVariableValue("TrainingR2", false).Data; model.ValidationCoefficientOfDetermination = bestModelScope.GetVariableValue("ValidationR2", false).Data; model.TestCoefficientOfDetermination = bestModelScope.GetVariableValue("TestR2", false).Data; model.TrainingMeanAbsolutePercentageError = bestModelScope.GetVariableValue("TrainingMAPE", false).Data; model.ValidationMeanAbsolutePercentageError = bestModelScope.GetVariableValue("ValidationMAPE", false).Data; model.TestMeanAbsolutePercentageError = bestModelScope.GetVariableValue("TestMAPE", false).Data; model.TrainingMeanAbsolutePercentageOfRangeError = bestModelScope.GetVariableValue("TrainingMAPRE", false).Data; model.ValidationMeanAbsolutePercentageOfRangeError = bestModelScope.GetVariableValue("ValidationMAPRE", false).Data; model.TestMeanAbsolutePercentageOfRangeError = bestModelScope.GetVariableValue("TestMAPRE", false).Data; model.TrainingVarianceAccountedFor = bestModelScope.GetVariableValue("TrainingVAF", false).Data; model.ValidationVarianceAccountedFor = bestModelScope.GetVariableValue("ValidationVAF", false).Data; model.TestVarianceAccountedFor = bestModelScope.GetVariableValue("TestVAF", false).Data; model.Data = bestModelScope.GetVariableValue("LinearRegressionModel", false); HeuristicLab.DataAnalysis.Dataset ds = bestModelScope.GetVariableValue("Dataset", true); model.Dataset = ds; model.TargetVariable = ds.GetVariableName(bestModelScope.GetVariableValue("TargetVariable", true).Data); ItemList evaluationImpacts = bestModelScope.GetVariableValue("VariableEvaluationImpacts", false); ItemList qualityImpacts = bestModelScope.GetVariableValue("VariableQualityImpacts", false); foreach (ItemList row in evaluationImpacts) { string variableName = ((StringData)row[0]).Data; double impact = ((DoubleData)row[1]).Data; model.SetVariableEvaluationImpact(variableName, impact); } foreach (ItemList row in qualityImpacts) { string variableName = ((StringData)row[0]).Data; double impact = ((DoubleData)row[1]).Data; model.SetVariableQualityImpact(variableName, impact); } return model; } private IOperator GetMainOperator() { CombinedOperator lr = (CombinedOperator)Engine.OperatorGraph.InitialOperator; return lr.OperatorGraph.InitialOperator; } public override IView CreateView() { return engine.CreateView(); } #region IEditable Members public IEditor CreateEditor() { return engine.CreateEditor(); } #endregion } }