#region License Information /* HeuristicLab * Copyright (C) 2002-2010 Heuristic and Evolutionary Algorithms Laboratory (HEAL) * * This file is part of HeuristicLab. * * HeuristicLab is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation, either version 3 of the License, or * (at your option) any later version. * * HeuristicLab is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with HeuristicLab. If not, see . */ #endregion using System; using System.Collections.Generic; using System.Linq; using System.Drawing; using HeuristicLab.Common; using HeuristicLab.Core; using HeuristicLab.Data; using HeuristicLab.Optimization; using HeuristicLab.Parameters; using HeuristicLab.Persistence.Default.CompositeSerializers.Storable; using HeuristicLab.PluginInfrastructure; using HeuristicLab.Encodings.SymbolicExpressionTreeEncoding; using HeuristicLab.Problems.DataAnalysis.Regression; using HeuristicLab.Problems.DataAnalysis.Symbolic; using HeuristicLab.Encodings.SymbolicExpressionTreeEncoding.ArchitectureManipulators; using HeuristicLab.Encodings.SymbolicExpressionTreeEncoding.Manipulators; using HeuristicLab.Encodings.SymbolicExpressionTreeEncoding.Crossovers; using HeuristicLab.Encodings.SymbolicExpressionTreeEncoding.Creators; using HeuristicLab.Encodings.SymbolicExpressionTreeEncoding.Interfaces; using HeuristicLab.Problems.DataAnalysis.Regression.Symbolic.Analyzers; using HeuristicLab.Encodings.SymbolicExpressionTreeEncoding.Analyzers; namespace HeuristicLab.Problems.DataAnalysis.Regression.Symbolic { [Item("Symbolic Regression Problem", "Represents a symbolic regression problem.")] [Creatable("Problems")] [StorableClass] public class SymbolicRegressionProblem : DataAnalysisProblem, ISingleObjectiveProblem { #region Parameter Properties public ValueParameter MaximizationParameter { get { return (ValueParameter)Parameters["Maximization"]; } } IParameter ISingleObjectiveProblem.MaximizationParameter { get { return MaximizationParameter; } } public override ValueParameter SolutionCreatorParameter { get { return (ValueParameter)Parameters["SolutionCreator"]; } } IParameter IProblem.SolutionCreatorParameter { get { return SolutionCreatorParameter; } } public ValueParameter LowerEstimationLimitParameter { get { return (ValueParameter)Parameters["LowerEstimationLimit"]; } } public ValueParameter UpperEstimationLimitParameter { get { return (ValueParameter)Parameters["UpperEstimationLimit"]; } } public ValueParameter SymbolicExpressionTreeInterpreterParameter { get { return (ValueParameter)Parameters["SymbolicExpressionTreeInterpreter"]; } } public override ValueParameter EvaluatorParameter { get { return (ValueParameter)Parameters["Evaluator"]; } } IParameter IProblem.EvaluatorParameter { get { return EvaluatorParameter; } } public ValueParameter FunctionTreeGrammarParameter { get { return (ValueParameter)Parameters["FunctionTreeGrammar"]; } } public ValueParameter MaxExpressionLengthParameter { get { return (ValueParameter)Parameters["MaxExpressionLength"]; } } public ValueParameter MaxExpressionDepthParameter { get { return (ValueParameter)Parameters["MaxExpressionDepth"]; } } public ValueParameter MaxFunctionDefiningBranchesParameter { get { return (ValueParameter)Parameters["MaxFunctionDefiningBranches"]; } } public ValueParameter MaxFunctionArgumentsParameter { get { return (ValueParameter)Parameters["MaxFunctionArguments"]; } } public OptionalValueParameter BestKnownQualityParameter { get { return (OptionalValueParameter)Parameters["BestKnownQuality"]; } } IParameter ISingleObjectiveProblem.BestKnownQualityParameter { get { return BestKnownQualityParameter; } } #endregion #region Properties public IntValue MaxExpressionLength { get { return MaxExpressionLengthParameter.Value; } set { MaxExpressionLengthParameter.Value = value; } } public IntValue MaxExpressionDepth { get { return MaxExpressionDepthParameter.Value; } set { MaxExpressionDepthParameter.Value = value; } } public IntValue MaxFunctionDefiningBranches { get { return MaxFunctionDefiningBranchesParameter.Value; } set { MaxFunctionDefiningBranchesParameter.Value = value; } } public IntValue MaxFunctionArguments { get { return MaxFunctionArgumentsParameter.Value; } set { MaxFunctionArgumentsParameter.Value = value; } } public override SymbolicExpressionTreeCreator SolutionCreator { get { return SolutionCreatorParameter.Value; } set { SolutionCreatorParameter.Value = value; } } ISolutionCreator IProblem.SolutionCreator { get { return SolutionCreatorParameter.Value; } } public ISymbolicExpressionTreeInterpreter SymbolicExpressionTreeInterpreter { get { return SymbolicExpressionTreeInterpreterParameter.Value; } set { SymbolicExpressionTreeInterpreterParameter.Value = value; } } public DoubleValue LowerEstimationLimit { get { return LowerEstimationLimitParameter.Value; } set { LowerEstimationLimitParameter.Value = value; } } public DoubleValue UpperEstimationLimit { get { return UpperEstimationLimitParameter.Value; } set { UpperEstimationLimitParameter.Value = value; } } public override ISymbolicRegressionEvaluator Evaluator { get { return EvaluatorParameter.Value; } set { EvaluatorParameter.Value = value; } } ISingleObjectiveEvaluator ISingleObjectiveProblem.Evaluator { get { return EvaluatorParameter.Value; } } IEvaluator IProblem.Evaluator { get { return EvaluatorParameter.Value; } } public ISymbolicExpressionGrammar FunctionTreeGrammar { get { return (ISymbolicExpressionGrammar)FunctionTreeGrammarParameter.Value; } } public DoubleValue BestKnownQuality { get { return BestKnownQualityParameter.Value; } } private List operators; public override IEnumerable Operators { get { return operators; } } public IEnumerable Analyzers { get { return operators.OfType(); } } public DoubleValue PunishmentFactor { get { return new DoubleValue(10.0); } } public IntValue TrainingSamplesStart { get { return new IntValue(DataAnalysisProblemData.TrainingSamplesStart.Value); } } public IntValue TrainingSamplesEnd { get { return new IntValue((DataAnalysisProblemData.TrainingSamplesStart.Value + DataAnalysisProblemData.TrainingSamplesEnd.Value) / 2); } } public IntValue ValidationSamplesStart { get { return TrainingSamplesEnd; } } public IntValue ValidationSamplesEnd { get { return new IntValue(DataAnalysisProblemData.TrainingSamplesEnd.Value); } } public IntValue TestSamplesStart { get { return DataAnalysisProblemData.TestSamplesStart; } } public IntValue TestSamplesEnd { get { return DataAnalysisProblemData.TestSamplesEnd; } } #endregion public SymbolicRegressionProblem() : base() { SymbolicExpressionTreeCreator creator = new ProbabilisticTreeCreator(); var evaluator = new SymbolicRegressionScaledMeanSquaredErrorEvaluator(); var grammar = new FullFunctionalExpressionGrammar(); var globalGrammar = new GlobalSymbolicExpressionGrammar(grammar); var interpreter = new SimpleArithmeticExpressionInterpreter(); Parameters.Add(new ValueParameter("Maximization", "Set to false as the error of the regression model should be minimized.", (BoolValue)new BoolValue(false).AsReadOnly())); Parameters.Add(new ValueParameter("SolutionCreator", "The operator which should be used to create new symbolic regression solutions.", creator)); Parameters.Add(new ValueParameter("SymbolicExpressionTreeInterpreter", "The interpreter that should be used to evaluate the symbolic expression tree.", interpreter)); Parameters.Add(new ValueParameter("Evaluator", "The operator which should be used to evaluate symbolic regression solutions.", evaluator)); Parameters.Add(new ValueParameter("LowerEstimationLimit", "The lower limit for the estimated value that can be returned by the symbolic regression model.", new DoubleValue(double.NegativeInfinity))); Parameters.Add(new ValueParameter("UpperEstimationLimit", "The upper limit for the estimated value that can be returned by the symbolic regression model.", new DoubleValue(double.PositiveInfinity))); Parameters.Add(new OptionalValueParameter("BestKnownQuality", "The minimal error value that reached by symbolic regression solutions for the problem.")); Parameters.Add(new ValueParameter("FunctionTreeGrammar", "The grammar that should be used for symbolic regression models.", globalGrammar)); Parameters.Add(new ValueParameter("MaxExpressionLength", "Maximal length of the symbolic expression.", new IntValue(100))); Parameters.Add(new ValueParameter("MaxExpressionDepth", "Maximal depth of the symbolic expression.", new IntValue(10))); Parameters.Add(new ValueParameter("MaxFunctionDefiningBranches", "Maximal number of automatically defined functions.", (IntValue)new IntValue(0).AsReadOnly())); Parameters.Add(new ValueParameter("MaxFunctionArguments", "Maximal number of arguments of automatically defined functions.", (IntValue)new IntValue(0).AsReadOnly())); creator.SymbolicExpressionTreeParameter.ActualName = "SymbolicRegressionModel"; evaluator.QualityParameter.ActualName = "TrainingMeanSquaredError"; ParameterizeSolutionCreator(); ParameterizeEvaluator(); UpdateGrammar(); UpdateEstimationLimits(); Initialize(); } [StorableConstructor] private SymbolicRegressionProblem(bool deserializing) : base() { } [StorableHook(HookType.AfterDeserialization)] private void AfterDeserializationHook() { Initialize(); } public override IDeepCloneable Clone(Cloner cloner) { SymbolicRegressionProblem clone = (SymbolicRegressionProblem)base.Clone(cloner); clone.Initialize(); return clone; } private void RegisterParameterValueEvents() { MaxFunctionArgumentsParameter.ValueChanged += new EventHandler(ArchitectureParameter_ValueChanged); MaxFunctionDefiningBranchesParameter.ValueChanged += new EventHandler(ArchitectureParameter_ValueChanged); SolutionCreatorParameter.ValueChanged += new EventHandler(SolutionCreatorParameter_ValueChanged); EvaluatorParameter.ValueChanged += new EventHandler(EvaluatorParameter_ValueChanged); } private void RegisterParameterEvents() { MaxFunctionArgumentsParameter.Value.ValueChanged += new EventHandler(ArchitectureParameterValue_ValueChanged); MaxFunctionDefiningBranchesParameter.Value.ValueChanged += new EventHandler(ArchitectureParameterValue_ValueChanged); SolutionCreator.SymbolicExpressionTreeParameter.ActualNameChanged += new EventHandler(SolutionCreator_SymbolicExpressionTreeParameter_ActualNameChanged); Evaluator.QualityParameter.ActualNameChanged += new EventHandler(Evaluator_QualityParameter_ActualNameChanged); } #region event handling protected override void OnDataAnalysisProblemChanged(EventArgs e) { base.OnDataAnalysisProblemChanged(e); BestKnownQualityParameter.Value = null; // paritions could be changed ParameterizeEvaluator(); ParameterizeAnalyzers(); // input variables could have been changed UpdateGrammar(); // estimation limits have to be recalculated UpdateEstimationLimits(); } protected virtual void OnArchitectureParameterChanged(EventArgs e) { UpdateGrammar(); } protected virtual void OnGrammarChanged(EventArgs e) { } protected virtual void OnOperatorsChanged(EventArgs e) { RaiseOperatorsChanged(e); } protected virtual void OnSolutionCreatorChanged(EventArgs e) { SolutionCreator.SymbolicExpressionTreeParameter.ActualNameChanged += new EventHandler(SolutionCreator_SymbolicExpressionTreeParameter_ActualNameChanged); ParameterizeSolutionCreator(); OnSolutionParameterNameChanged(e); RaiseSolutionCreatorChanged(e); } protected virtual void OnSolutionParameterNameChanged(EventArgs e) { ParameterizeEvaluator(); ParameterizeAnalyzers(); ParameterizeOperators(); } protected virtual void OnEvaluatorChanged(EventArgs e) { Evaluator.QualityParameter.ActualNameChanged += new EventHandler(Evaluator_QualityParameter_ActualNameChanged); ParameterizeEvaluator(); ParameterizeAnalyzers(); RaiseEvaluatorChanged(e); } protected virtual void OnQualityParameterNameChanged(EventArgs e) { ParameterizeAnalyzers(); } #endregion #region event handlers private void SolutionCreatorParameter_ValueChanged(object sender, EventArgs e) { OnSolutionCreatorChanged(e); } private void SolutionCreator_SymbolicExpressionTreeParameter_ActualNameChanged(object sender, EventArgs e) { OnSolutionParameterNameChanged(e); } private void EvaluatorParameter_ValueChanged(object sender, EventArgs e) { OnEvaluatorChanged(e); } private void ArchitectureParameter_ValueChanged(object sender, EventArgs e) { MaxFunctionArgumentsParameter.Value.ValueChanged += new EventHandler(ArchitectureParameterValue_ValueChanged); MaxFunctionDefiningBranchesParameter.Value.ValueChanged += new EventHandler(ArchitectureParameterValue_ValueChanged); OnArchitectureParameterChanged(e); } private void ArchitectureParameterValue_ValueChanged(object sender, EventArgs e) { OnArchitectureParameterChanged(e); } private void Evaluator_QualityParameter_ActualNameChanged(object sender, EventArgs e) { OnQualityParameterNameChanged(e); } #endregion #region Helpers private void Initialize() { InitializeOperators(); RegisterParameterEvents(); RegisterParameterValueEvents(); } private void UpdateGrammar() { foreach (var varSymbol in FunctionTreeGrammar.Symbols.OfType()) { varSymbol.VariableNames = DataAnalysisProblemData.InputVariables.CheckedItems.Select(x => x.Value.Value); } var globalGrammar = FunctionTreeGrammar as GlobalSymbolicExpressionGrammar; if (globalGrammar != null) { globalGrammar.MaxFunctionArguments = MaxFunctionArguments.Value; globalGrammar.MaxFunctionDefinitions = MaxFunctionDefiningBranches.Value; } } private void UpdateEstimationLimits() { if (TrainingSamplesStart.Value < TrainingSamplesEnd.Value && DataAnalysisProblemData.Dataset.VariableNames.Contains(DataAnalysisProblemData.TargetVariable.Value)) { var targetValues = DataAnalysisProblemData.Dataset.GetVariableValues(DataAnalysisProblemData.TargetVariable.Value, TrainingSamplesStart.Value, TrainingSamplesEnd.Value); var mean = targetValues.Average(); var range = targetValues.Max() - targetValues.Min(); UpperEstimationLimit = new DoubleValue(mean + PunishmentFactor.Value * range); LowerEstimationLimit = new DoubleValue(mean - PunishmentFactor.Value * range); } } private void InitializeOperators() { operators = new List(); operators.AddRange(ApplicationManager.Manager.GetInstances().OfType()); operators.Add(new SymbolicRegressionTournamentPruning()); operators.Add(new SymbolicRegressionVariableFrequencyAnalyzer()); operators.Add(new FixedValidationBestScaledSymbolicRegressionSolutionAnalyzer()); operators.Add(new MinAverageMaxSymbolicExpressionTreeSizeAnalyzer()); ParameterizeOperators(); ParameterizeAnalyzers(); } private void ParameterizeSolutionCreator() { SolutionCreator.SymbolicExpressionGrammarParameter.ActualName = FunctionTreeGrammarParameter.Name; SolutionCreator.MaxTreeHeightParameter.ActualName = MaxExpressionDepthParameter.Name; SolutionCreator.MaxTreeSizeParameter.ActualName = MaxExpressionLengthParameter.Name; SolutionCreator.MaxFunctionArgumentsParameter.ActualName = MaxFunctionArgumentsParameter.Name; SolutionCreator.MaxFunctionDefinitionsParameter.ActualName = MaxFunctionDefiningBranchesParameter.Name; } private void ParameterizeEvaluator() { Evaluator.SymbolicExpressionTreeParameter.ActualName = SolutionCreator.SymbolicExpressionTreeParameter.ActualName; Evaluator.RegressionProblemDataParameter.ActualName = DataAnalysisProblemDataParameter.Name; Evaluator.SamplesStartParameter.Value = TrainingSamplesStart; Evaluator.SamplesEndParameter.Value = TrainingSamplesEnd; } private void ParameterizeAnalyzers() { foreach (var analyzer in Analyzers) { analyzer.SymbolicExpressionTreeParameter.ActualName = SolutionCreator.SymbolicExpressionTreeParameter.ActualName; var fixedBestValidationSolutionAnalyzer = analyzer as FixedValidationBestScaledSymbolicRegressionSolutionAnalyzer; if (fixedBestValidationSolutionAnalyzer != null) { fixedBestValidationSolutionAnalyzer.ProblemDataParameter.ActualName = DataAnalysisProblemDataParameter.Name; fixedBestValidationSolutionAnalyzer.UpperEstimationLimitParameter.ActualName = UpperEstimationLimitParameter.Name; fixedBestValidationSolutionAnalyzer.LowerEstimationLimitParameter.ActualName = LowerEstimationLimitParameter.Name; fixedBestValidationSolutionAnalyzer.SymbolicExpressionTreeInterpreterParameter.ActualName = SymbolicExpressionTreeInterpreterParameter.Name; fixedBestValidationSolutionAnalyzer.SymbolicExpressionTreeParameter.ActualName = SolutionCreator.SymbolicExpressionTreeParameter.ActualName; fixedBestValidationSolutionAnalyzer.ValidationSamplesStartParameter.Value = ValidationSamplesStart; fixedBestValidationSolutionAnalyzer.ValidationSamplesEndParameter.Value = ValidationSamplesEnd; fixedBestValidationSolutionAnalyzer.BestKnownQualityParameter.ActualName = BestKnownQualityParameter.Name; fixedBestValidationSolutionAnalyzer.QualityParameter.ActualName = Evaluator.QualityParameter.ActualName; } var bestValidationSolutionAnalyzer = analyzer as FixedValidationBestScaledSymbolicRegressionSolutionAnalyzer; if (bestValidationSolutionAnalyzer != null) { bestValidationSolutionAnalyzer.ProblemDataParameter.ActualName = DataAnalysisProblemDataParameter.Name; bestValidationSolutionAnalyzer.UpperEstimationLimitParameter.ActualName = UpperEstimationLimitParameter.Name; bestValidationSolutionAnalyzer.LowerEstimationLimitParameter.ActualName = LowerEstimationLimitParameter.Name; bestValidationSolutionAnalyzer.SymbolicExpressionTreeInterpreterParameter.ActualName = SymbolicExpressionTreeInterpreterParameter.Name; bestValidationSolutionAnalyzer.SymbolicExpressionTreeParameter.ActualName = SolutionCreator.SymbolicExpressionTreeParameter.ActualName; bestValidationSolutionAnalyzer.ValidationSamplesStartParameter.Value = ValidationSamplesStart; bestValidationSolutionAnalyzer.ValidationSamplesEndParameter.Value = ValidationSamplesEnd; bestValidationSolutionAnalyzer.BestKnownQualityParameter.ActualName = BestKnownQualityParameter.Name; bestValidationSolutionAnalyzer.QualityParameter.ActualName = Evaluator.QualityParameter.ActualName; } var varFreqAnalyzer = analyzer as SymbolicRegressionVariableFrequencyAnalyzer; if (varFreqAnalyzer != null) { varFreqAnalyzer.ProblemDataParameter.ActualName = DataAnalysisProblemDataParameter.Name; } var pruningOperator = analyzer as SymbolicRegressionTournamentPruning; if (pruningOperator != null) { pruningOperator.SamplesStartParameter.Value = TrainingSamplesStart; pruningOperator.SamplesEndParameter.Value = TrainingSamplesEnd; pruningOperator.DataAnalysisProblemDataParameter.ActualName = DataAnalysisProblemDataParameter.Name; pruningOperator.SymbolicExpressionTreeParameter.ActualName = SolutionCreator.SymbolicExpressionTreeParameter.ActualName; pruningOperator.SymbolicExpressionTreeInterpreterParameter.ActualName = SymbolicExpressionTreeInterpreterParameter.Name; pruningOperator.LowerEstimationLimitParameter.ActualName = LowerEstimationLimitParameter.Name; pruningOperator.UpperEstimationLimitParameter.ActualName = UpperEstimationLimitParameter.Name; } } foreach (ISymbolicExpressionTreeAnalyzer analyzer in Operators.OfType()) { analyzer.SymbolicExpressionTreeParameter.ActualName = SolutionCreator.SymbolicExpressionTreeParameter.ActualName; } } private void ParameterizeOperators() { foreach (ISymbolicExpressionTreeOperator op in Operators.OfType()) { op.MaxTreeHeightParameter.ActualName = MaxExpressionDepthParameter.Name; op.MaxTreeSizeParameter.ActualName = MaxExpressionLengthParameter.Name; op.SymbolicExpressionGrammarParameter.ActualName = FunctionTreeGrammarParameter.Name; } foreach (ISymbolicExpressionTreeCrossover op in Operators.OfType()) { op.ParentsParameter.ActualName = SolutionCreator.SymbolicExpressionTreeParameter.ActualName; op.ChildParameter.ActualName = SolutionCreator.SymbolicExpressionTreeParameter.ActualName; } foreach (ISymbolicExpressionTreeManipulator op in Operators.OfType()) { op.SymbolicExpressionTreeParameter.ActualName = SolutionCreator.SymbolicExpressionTreeParameter.ActualName; } foreach (ISymbolicExpressionTreeArchitectureManipulator op in Operators.OfType()) { op.MaxFunctionArgumentsParameter.ActualName = MaxFunctionArgumentsParameter.Name; op.MaxFunctionDefinitionsParameter.ActualName = MaxFunctionDefiningBranchesParameter.Name; } } #endregion } }