#region License Information /* HeuristicLab * Copyright (C) 2002-2008 Heuristic and Evolutionary Algorithms Laboratory (HEAL) * * This file is part of HeuristicLab. * * HeuristicLab is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation, either version 3 of the License, or * (at your option) any later version. * * HeuristicLab is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with HeuristicLab. If not, see . */ #endregion using System; using System.Collections.Generic; using System.Linq; using System.Text; using HeuristicLab.Core; using System.Xml; using System.Diagnostics; using HeuristicLab.DataAnalysis; using HeuristicLab.Data; using HeuristicLab.Operators; using HeuristicLab.GP.StructureIdentification; using HeuristicLab.Modeling; using HeuristicLab.GP; using HeuristicLab.Random; using HeuristicLab.GP.Interfaces; namespace HeuristicLab.LinearRegression { public class LinearRegression : ItemBase, IEditable, IAlgorithm { public virtual string Name { get { return "LinearRegression"; } } public virtual string Description { get { return "TODO"; } } private IEngine engine; public virtual IEngine Engine { get { return engine; } } public virtual Dataset Dataset { get { return ProblemInjector.GetVariableValue("Dataset", null, false); } set { ProblemInjector.GetVariable("Dataset").Value = value; } } public virtual string TargetVariable { get { return ProblemInjector.GetVariableValue("TargetVariable", null, false).Data; } set { ProblemInjector.GetVariableValue("TargetVariable", null, false).Data = value; } } public virtual IOperator ProblemInjector { get { IOperator main = GetMainOperator(); CombinedOperator probInjector = (CombinedOperator)main.SubOperators[2]; return probInjector.OperatorGraph.InitialOperator.SubOperators[0]; } set { IOperator main = GetMainOperator(); CombinedOperator probInjector = (CombinedOperator)main.SubOperators[2]; probInjector.OperatorGraph.InitialOperator.RemoveSubOperator(0); probInjector.OperatorGraph.InitialOperator.AddSubOperator(value, 0); } } public IEnumerable AllowedVariables { get { ItemList allowedVariables = ProblemInjector.GetVariableValue>("AllowedFeatures", null, false); return allowedVariables.Select(x => x.Data); } set { ItemList allowedVariables = ProblemInjector.GetVariableValue>("AllowedFeatures", null, false); foreach (string x in value) allowedVariables.Add(new StringData(x)); } } public int TrainingSamplesStart { get { return ProblemInjector.GetVariableValue("TrainingSamplesStart", null, false).Data; } set { ProblemInjector.GetVariableValue("TrainingSamplesStart", null, false).Data = value; } } public int TrainingSamplesEnd { get { return ProblemInjector.GetVariableValue("TrainingSamplesEnd", null, false).Data; } set { ProblemInjector.GetVariableValue("TrainingSamplesEnd", null, false).Data = value; } } public int ValidationSamplesStart { get { return ProblemInjector.GetVariableValue("ValidationSamplesStart", null, false).Data; } set { ProblemInjector.GetVariableValue("ValidationSamplesStart", null, false).Data = value; } } public int ValidationSamplesEnd { get { return ProblemInjector.GetVariableValue("ValidationSamplesEnd", null, false).Data; } set { ProblemInjector.GetVariableValue("ValidationSamplesEnd", null, false).Data = value; } } public int TestSamplesStart { get { return ProblemInjector.GetVariableValue("TestSamplesStart", null, false).Data; } set { ProblemInjector.GetVariableValue("TestSamplesStart", null, false).Data = value; } } public int TestSamplesEnd { get { return ProblemInjector.GetVariableValue("TestSamplesEnd", null, false).Data; } set { ProblemInjector.GetVariableValue("TestSamplesEnd", null, false).Data = value; } } public virtual IAnalyzerModel Model { get { if (!engine.Terminated) throw new InvalidOperationException("The algorithm is still running. Wait until the algorithm is terminated to retrieve the result."); IScope bestModelScope = engine.GlobalScope; return CreateLRModel(bestModelScope); } } public LinearRegression() { engine = new SequentialEngine.SequentialEngine(); CombinedOperator algo = CreateAlgorithm(); engine.OperatorGraph.AddOperator(algo); engine.OperatorGraph.InitialOperator = algo; } protected virtual CombinedOperator CreateAlgorithm() { CombinedOperator algo = new CombinedOperator(); SequentialProcessor seq = new SequentialProcessor(); algo.Name = Name; seq.Name = Name; IOperator globalInjector = CreateGlobalInjector(); HL3TreeEvaluatorInjector treeEvaluatorInjector = new HL3TreeEvaluatorInjector(); LinearRegressionOperator lrOperator = new LinearRegressionOperator(); lrOperator.GetVariableInfo("SamplesStart").ActualName = "ActualTrainingSamplesStart"; lrOperator.GetVariableInfo("SamplesEnd").ActualName = "ActualTrainingSamplesEnd"; seq.AddSubOperator(globalInjector); seq.AddSubOperator(new RandomInjector()); seq.AddSubOperator(CreateProblemInjector()); seq.AddSubOperator(treeEvaluatorInjector); seq.AddSubOperator(lrOperator); seq.AddSubOperator(CreatePostProcessingOperator()); algo.OperatorGraph.InitialOperator = seq; algo.OperatorGraph.AddOperator(seq); return algo; } protected virtual IOperator CreateProblemInjector() { return DefaultRegressionOperators.CreateProblemInjector(); } protected virtual VariableInjector CreateGlobalInjector() { VariableInjector injector = new VariableInjector(); injector.AddVariable(new HeuristicLab.Core.Variable("PunishmentFactor", new DoubleData(1000))); injector.AddVariable(new HeuristicLab.Core.Variable("TotalEvaluatedNodes", new DoubleData(0))); injector.AddVariable(new HeuristicLab.Core.Variable("MaxNumberOfTrainingSamples", new IntData(4000))); return injector; } protected virtual IOperator CreatePostProcessingOperator() { CombinedOperator op = new CombinedOperator(); op.Name = "Model Analyzer"; SequentialProcessor seq = new SequentialProcessor(); HL3TreeEvaluatorInjector evaluatorInjector = new HL3TreeEvaluatorInjector(); evaluatorInjector.AddVariable(new HeuristicLab.Core.Variable("PunishmentFactor", new DoubleData(1000.0))); evaluatorInjector.GetVariableInfo("TreeEvaluator").ActualName = "ModelAnalysisTreeEvaluator"; #region simple evaluators SimpleEvaluator trainingEvaluator = new SimpleEvaluator(); trainingEvaluator.Name = "TrainingEvaluator"; trainingEvaluator.GetVariableInfo("FunctionTree").ActualName = "LinearRegressionModel"; trainingEvaluator.GetVariableInfo("SamplesStart").ActualName = "TrainingSamplesStart"; trainingEvaluator.GetVariableInfo("SamplesEnd").ActualName = "TrainingSamplesEnd"; trainingEvaluator.GetVariableInfo("Values").ActualName = "TrainingValues"; trainingEvaluator.GetVariableInfo("TreeEvaluator").ActualName = "ModelAnalysisTreeEvaluator"; SimpleEvaluator validationEvaluator = new SimpleEvaluator(); validationEvaluator.Name = "ValidationEvaluator"; validationEvaluator.GetVariableInfo("FunctionTree").ActualName = "LinearRegressionModel"; validationEvaluator.GetVariableInfo("SamplesStart").ActualName = "ValidationSamplesStart"; validationEvaluator.GetVariableInfo("SamplesEnd").ActualName = "ValidationSamplesEnd"; validationEvaluator.GetVariableInfo("Values").ActualName = "ValidationValues"; validationEvaluator.GetVariableInfo("TreeEvaluator").ActualName = "ModelAnalysisTreeEvaluator"; SimpleEvaluator testEvaluator = new SimpleEvaluator(); testEvaluator.Name = "TestEvaluator"; testEvaluator.GetVariableInfo("FunctionTree").ActualName = "LinearRegressionModel"; testEvaluator.GetVariableInfo("SamplesStart").ActualName = "TestSamplesStart"; testEvaluator.GetVariableInfo("SamplesEnd").ActualName = "TestSamplesEnd"; testEvaluator.GetVariableInfo("Values").ActualName = "TestValues"; testEvaluator.GetVariableInfo("TreeEvaluator").ActualName = "ModelAnalysisTreeEvaluator"; seq.AddSubOperator(evaluatorInjector); seq.AddSubOperator(trainingEvaluator); seq.AddSubOperator(validationEvaluator); seq.AddSubOperator(testEvaluator); #endregion #region variable impacts // calculate and set variable impacts VariableNamesExtractor namesExtractor = new VariableNamesExtractor(); namesExtractor.GetVariableInfo("VariableNames").ActualName = "InputVariableNames"; namesExtractor.GetVariableInfo("FunctionTree").ActualName = "LinearRegressionModel"; PredictorBuilder predictorBuilder = new PredictorBuilder(); predictorBuilder.GetVariableInfo("TreeEvaluator").ActualName = "ModelAnalysisTreeEvaluator"; predictorBuilder.GetVariableInfo("FunctionTree").ActualName = "LinearRegressionModel"; seq.AddSubOperator(namesExtractor); seq.AddSubOperator(predictorBuilder); VariableQualityImpactCalculator qualityImpactCalculator = new VariableQualityImpactCalculator(); qualityImpactCalculator.GetVariableInfo("SamplesStart").ActualName = "TrainingSamplesStart"; qualityImpactCalculator.GetVariableInfo("SamplesEnd").ActualName = "TrainingSamplesEnd"; seq.AddSubOperator(qualityImpactCalculator); #endregion seq.AddSubOperator(CreateModelAnalyzerOperator()); op.OperatorGraph.AddOperator(seq); op.OperatorGraph.InitialOperator = seq; return op; } protected virtual IOperator CreateModelAnalyzerOperator() { return DefaultRegressionOperators.CreatePostProcessingOperator(); } protected virtual IAnalyzerModel CreateLRModel(IScope bestModelScope) { var model = new AnalyzerModel(); IGeneticProgrammingModel gpModel = bestModelScope.GetVariableValue("LinearRegressionModel", false); model.SetMetaData("TreeSize", gpModel.Size); model.SetMetaData("TreeHeight", gpModel.Height); double treeComplexity = TreeComplexityEvaluator.Calculate(gpModel.FunctionTree); model.SetMetaData("TreeComplexity", treeComplexity); model.SetMetaData("AverageNodeComplexity", treeComplexity / gpModel.Size); CreateSpecificLRModel(bestModelScope, model); #region variable impacts ItemList qualityImpacts = bestModelScope.GetVariableValue(ModelingResult.VariableQualityImpact.ToString(), false); foreach (ItemList row in qualityImpacts) { string variableName = ((StringData)row[0]).Data; double impact = ((DoubleData)row[1]).Data; model.SetVariableResult(ModelingResult.VariableQualityImpact, variableName, impact); model.AddInputVariable(variableName); } #endregion return model; } protected virtual void CreateSpecificLRModel(IScope bestModelScope, IAnalyzerModel model) { DefaultRegressionOperators.PopulateAnalyzerModel(bestModelScope, model); } protected virtual IOperator GetMainOperator() { CombinedOperator lr = (CombinedOperator)Engine.OperatorGraph.InitialOperator; return lr.OperatorGraph.InitialOperator; } protected virtual IOperator GetVariableInjector() { return GetMainOperator().SubOperators[0]; } public override IView CreateView() { return engine.CreateView(); } #region IEditable Members public virtual IEditor CreateEditor() { return ((SequentialEngine.SequentialEngine)engine).CreateEditor(); } #endregion #region persistence public override object Clone(IDictionary clonedObjects) { LinearRegression clone = (LinearRegression)base.Clone(clonedObjects); clone.engine = (IEngine)Auxiliary.Clone(Engine, clonedObjects); return clone; } public override XmlNode GetXmlNode(string name, XmlDocument document, IDictionary persistedObjects) { XmlNode node = base.GetXmlNode(name, document, persistedObjects); node.AppendChild(PersistenceManager.Persist("Engine", engine, document, persistedObjects)); return node; } public override void Populate(XmlNode node, IDictionary restoredObjects) { base.Populate(node, restoredObjects); engine = (IEngine)PersistenceManager.Restore(node.SelectSingleNode("Engine"), restoredObjects); } #endregion } }