#region License Information /* HeuristicLab * Copyright (C) 2002-2010 Heuristic and Evolutionary Algorithms Laboratory (HEAL) * * This file is part of HeuristicLab. * * HeuristicLab is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation, either version 3 of the License, or * (at your option) any later version. * * HeuristicLab is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with HeuristicLab. If not, see . */ #endregion using System; using System.Collections.Generic; using System.Linq; using HeuristicLab.Common; using HeuristicLab.Core; using HeuristicLab.Data; using HeuristicLab.Encodings.SymbolicExpressionTreeEncoding; using HeuristicLab.Encodings.SymbolicExpressionTreeEncoding.Analyzers; using HeuristicLab.Encodings.SymbolicExpressionTreeEncoding.Creators; using HeuristicLab.Encodings.SymbolicExpressionTreeEncoding.Interfaces; using HeuristicLab.Optimization; using HeuristicLab.Parameters; using HeuristicLab.Persistence.Default.CompositeSerializers.Storable; using HeuristicLab.PluginInfrastructure; using HeuristicLab.Problems.DataAnalysis.Regression.Symbolic.Analyzers; using HeuristicLab.Problems.DataAnalysis.Symbolic; namespace HeuristicLab.Problems.DataAnalysis.Regression.Symbolic { [StorableClass] public abstract class SymbolicRegressionProblemBase : DataAnalysisProblem, IProblem { #region Parameter Properties public new ValueParameter SolutionCreatorParameter { get { return (ValueParameter)Parameters["SolutionCreator"]; } } IParameter IProblem.SolutionCreatorParameter { get { return SolutionCreatorParameter; } } public ValueParameter LowerEstimationLimitParameter { get { return (ValueParameter)Parameters["LowerEstimationLimit"]; } } public ValueParameter UpperEstimationLimitParameter { get { return (ValueParameter)Parameters["UpperEstimationLimit"]; } } public ValueParameter SymbolicExpressionTreeInterpreterParameter { get { return (ValueParameter)Parameters["SymbolicExpressionTreeInterpreter"]; } } public ValueParameter FunctionTreeGrammarParameter { get { return (ValueParameter)Parameters["FunctionTreeGrammar"]; } } public ValueParameter MaxExpressionLengthParameter { get { return (ValueParameter)Parameters["MaxExpressionLength"]; } } public ValueParameter MaxExpressionDepthParameter { get { return (ValueParameter)Parameters["MaxExpressionDepth"]; } } public ValueParameter MaxFunctionDefiningBranchesParameter { get { return (ValueParameter)Parameters["MaxFunctionDefiningBranches"]; } } public ValueParameter MaxFunctionArgumentsParameter { get { return (ValueParameter)Parameters["MaxFunctionArguments"]; } } #endregion #region Properties public IntValue MaxExpressionLength { get { return MaxExpressionLengthParameter.Value; } set { MaxExpressionLengthParameter.Value = value; } } public IntValue MaxExpressionDepth { get { return MaxExpressionDepthParameter.Value; } set { MaxExpressionDepthParameter.Value = value; } } public IntValue MaxFunctionDefiningBranches { get { return MaxFunctionDefiningBranchesParameter.Value; } set { MaxFunctionDefiningBranchesParameter.Value = value; } } public IntValue MaxFunctionArguments { get { return MaxFunctionArgumentsParameter.Value; } set { MaxFunctionArgumentsParameter.Value = value; } } public new SymbolicExpressionTreeCreator SolutionCreator { get { return SolutionCreatorParameter.Value; } set { SolutionCreatorParameter.Value = value; } } ISolutionCreator IProblem.SolutionCreator { get { return SolutionCreatorParameter.Value; } } public ISymbolicExpressionTreeInterpreter SymbolicExpressionTreeInterpreter { get { return SymbolicExpressionTreeInterpreterParameter.Value; } set { SymbolicExpressionTreeInterpreterParameter.Value = value; } } public DoubleValue LowerEstimationLimit { get { return LowerEstimationLimitParameter.Value; } set { LowerEstimationLimitParameter.Value = value; } } public DoubleValue UpperEstimationLimit { get { return UpperEstimationLimitParameter.Value; } set { UpperEstimationLimitParameter.Value = value; } } public ISymbolicExpressionGrammar FunctionTreeGrammar { get { return (ISymbolicExpressionGrammar)FunctionTreeGrammarParameter.Value; } private set { FunctionTreeGrammarParameter.Value = value; } } public override IEnumerable Operators { get { return operators; } } public IEnumerable Analyzers { get { return operators.OfType(); } } public DoubleValue PunishmentFactor { get { return new DoubleValue(10.0); } } public IntValue TrainingSamplesStart { get { return new IntValue(DataAnalysisProblemData.TrainingIndizes.First()); } } public IntValue TrainingSamplesEnd { get { int endIndex = (int)(DataAnalysisProblemData.TrainingIndizes.Count() * (1.0 - DataAnalysisProblemData.ValidationPercentage.Value) - 1); if (endIndex < 0) endIndex = 0; return new IntValue(DataAnalysisProblemData.TrainingIndizes.ElementAt(endIndex)); } } public IntValue ValidationSamplesStart { get { return TrainingSamplesEnd; } } public IntValue ValidationSamplesEnd { get { return new IntValue(DataAnalysisProblemData.TrainingIndizes.Last() + 1); } } public IntValue TestSamplesStart { get { return DataAnalysisProblemData.TestSamplesStart; } } public IntValue TestSamplesEnd { get { return DataAnalysisProblemData.TestSamplesEnd; } } #endregion [Storable] private List operators; [StorableConstructor] protected SymbolicRegressionProblemBase(bool deserializing) : base(deserializing) { } protected SymbolicRegressionProblemBase(SymbolicRegressionProblemBase original, Cloner cloner) : base(original, cloner) { operators = original.operators.Select(x => (IOperator)cloner.Clone(x)).ToList(); RegisterParameterEvents(); RegisterParameterValueEvents(); } public SymbolicRegressionProblemBase() : base() { SymbolicExpressionTreeCreator creator = new ProbabilisticTreeCreator(); var grammar = new FullFunctionalExpressionGrammar(); var globalGrammar = new GlobalSymbolicExpressionGrammar(grammar); var interpreter = new SimpleArithmeticExpressionInterpreter(); Parameters.Add(new ValueParameter("SolutionCreator", "The operator which should be used to create new symbolic regression solutions.", creator)); Parameters.Add(new ValueParameter("SymbolicExpressionTreeInterpreter", "The interpreter that should be used to evaluate the symbolic expression tree.", interpreter)); Parameters.Add(new ValueParameter("LowerEstimationLimit", "The lower limit for the estimated value that can be returned by the symbolic regression model.", new DoubleValue(double.NegativeInfinity))); Parameters.Add(new ValueParameter("UpperEstimationLimit", "The upper limit for the estimated value that can be returned by the symbolic regression model.", new DoubleValue(double.PositiveInfinity))); Parameters.Add(new ValueParameter("FunctionTreeGrammar", "The grammar that should be used for symbolic regression models.", globalGrammar)); Parameters.Add(new ValueParameter("MaxExpressionLength", "Maximal length of the symbolic expression.", new IntValue(100))); Parameters.Add(new ValueParameter("MaxExpressionDepth", "Maximal depth of the symbolic expression.", new IntValue(10))); Parameters.Add(new ValueParameter("MaxFunctionDefiningBranches", "Maximal number of automatically defined functions.", (IntValue)new IntValue(0).AsReadOnly())); Parameters.Add(new ValueParameter("MaxFunctionArguments", "Maximal number of arguments of automatically defined functions.", (IntValue)new IntValue(0).AsReadOnly())); creator.SymbolicExpressionTreeParameter.ActualName = "SymbolicRegressionModel"; ParameterizeSolutionCreator(); UpdateGrammar(); UpdateEstimationLimits(); InitializeOperators(); RegisterParameterEvents(); RegisterParameterValueEvents(); } private void RegisterParameterValueEvents() { MaxFunctionArgumentsParameter.ValueChanged += new EventHandler(ArchitectureParameter_ValueChanged); MaxFunctionDefiningBranchesParameter.ValueChanged += new EventHandler(ArchitectureParameter_ValueChanged); SolutionCreatorParameter.ValueChanged += new EventHandler(SolutionCreatorParameter_ValueChanged); FunctionTreeGrammarParameter.ValueChanged += new EventHandler(FunctionTreeGrammarParameter_ValueChanged); } private void RegisterParameterEvents() { MaxFunctionArgumentsParameter.Value.ValueChanged += new EventHandler(ArchitectureParameterValue_ValueChanged); MaxFunctionDefiningBranchesParameter.Value.ValueChanged += new EventHandler(ArchitectureParameterValue_ValueChanged); SolutionCreator.SymbolicExpressionTreeParameter.ActualNameChanged += new EventHandler(SolutionCreator_SymbolicExpressionTreeParameter_ActualNameChanged); } #region event handling protected override void OnDataAnalysisProblemChanged(EventArgs e) { base.OnDataAnalysisProblemChanged(e); // paritions could be changed ParameterizeAnalyzers(); // input variables could have been changed UpdateGrammar(); // estimation limits have to be recalculated UpdateEstimationLimits(); } protected virtual void OnArchitectureParameterChanged(EventArgs e) { UpdateGrammar(); } protected virtual void OnGrammarChanged() { UpdateGrammar(); } protected virtual void OnOperatorsChanged(EventArgs e) { RaiseOperatorsChanged(e); } protected virtual void OnSolutionCreatorChanged(EventArgs e) { SolutionCreator.SymbolicExpressionTreeParameter.ActualNameChanged += new EventHandler(SolutionCreator_SymbolicExpressionTreeParameter_ActualNameChanged); ParameterizeSolutionCreator(); OnSolutionParameterNameChanged(e); RaiseSolutionCreatorChanged(e); } protected virtual void OnSolutionParameterNameChanged(EventArgs e) { ParameterizeAnalyzers(); ParameterizeOperators(); } protected virtual void OnEvaluatorChanged(EventArgs e) { RaiseEvaluatorChanged(e); } #endregion #region event handlers private void FunctionTreeGrammarParameter_ValueChanged(object sender, EventArgs e) { if (!(FunctionTreeGrammar is GlobalSymbolicExpressionGrammar)) FunctionTreeGrammar = new GlobalSymbolicExpressionGrammar(FunctionTreeGrammar); OnGrammarChanged(); } private void SolutionCreatorParameter_ValueChanged(object sender, EventArgs e) { OnSolutionCreatorChanged(e); } private void SolutionCreator_SymbolicExpressionTreeParameter_ActualNameChanged(object sender, EventArgs e) { OnSolutionParameterNameChanged(e); } private void ArchitectureParameter_ValueChanged(object sender, EventArgs e) { MaxFunctionArgumentsParameter.Value.ValueChanged += new EventHandler(ArchitectureParameterValue_ValueChanged); MaxFunctionDefiningBranchesParameter.Value.ValueChanged += new EventHandler(ArchitectureParameterValue_ValueChanged); OnArchitectureParameterChanged(e); } private void ArchitectureParameterValue_ValueChanged(object sender, EventArgs e) { OnArchitectureParameterChanged(e); } #endregion #region Helpers [StorableHook(HookType.AfterDeserialization)] private void AfterDeserializationHook() { // BackwardsCompatibility3.3 #region Backwards compatible code (remove with 3.4) if (operators == null) InitializeOperators(); #endregion RegisterParameterEvents(); RegisterParameterValueEvents(); } protected void AddOperator(IOperator op) { operators.Add(op); } private void UpdateGrammar() { foreach (var varSymbol in FunctionTreeGrammar.Symbols.OfType()) { varSymbol.VariableNames = DataAnalysisProblemData.InputVariables.CheckedItems.Select(x => x.Value.Value); } var globalGrammar = FunctionTreeGrammar as GlobalSymbolicExpressionGrammar; if (globalGrammar != null) { globalGrammar.MaxFunctionArguments = MaxFunctionArguments.Value; globalGrammar.MaxFunctionDefinitions = MaxFunctionDefiningBranches.Value; } } private void UpdateEstimationLimits() { if (TrainingSamplesStart.Value < TrainingSamplesEnd.Value && DataAnalysisProblemData.Dataset.VariableNames.Contains(DataAnalysisProblemData.TargetVariable.Value)) { var targetValues = DataAnalysisProblemData.Dataset.GetVariableValues(DataAnalysisProblemData.TargetVariable.Value, TrainingSamplesStart.Value, TrainingSamplesEnd.Value); var mean = targetValues.Average(); var range = targetValues.Max() - targetValues.Min(); UpperEstimationLimit = new DoubleValue(mean + PunishmentFactor.Value * range); LowerEstimationLimit = new DoubleValue(mean - PunishmentFactor.Value * range); } } private void InitializeOperators() { operators = new List(); operators.AddRange(ApplicationManager.Manager.GetInstances().OfType()); operators.Add(new SymbolicRegressionVariableFrequencyAnalyzer()); operators.Add(new MinAverageMaxSymbolicExpressionTreeSizeAnalyzer()); operators.Add(new SymbolicRegressionModelQualityAnalyzer()); ParameterizeOperators(); ParameterizeAnalyzers(); } private void ParameterizeSolutionCreator() { SolutionCreator.SymbolicExpressionGrammarParameter.ActualName = FunctionTreeGrammarParameter.Name; SolutionCreator.MaxTreeHeightParameter.ActualName = MaxExpressionDepthParameter.Name; SolutionCreator.MaxTreeSizeParameter.ActualName = MaxExpressionLengthParameter.Name; SolutionCreator.MaxFunctionArgumentsParameter.ActualName = MaxFunctionArgumentsParameter.Name; SolutionCreator.MaxFunctionDefinitionsParameter.ActualName = MaxFunctionDefiningBranchesParameter.Name; } private void ParameterizeAnalyzers() { foreach (var analyzer in Analyzers) { analyzer.SymbolicExpressionTreeParameter.ActualName = SolutionCreator.SymbolicExpressionTreeParameter.ActualName; var symbolicRegressionModelQualityAnalyzer = analyzer as SymbolicRegressionModelQualityAnalyzer; if (symbolicRegressionModelQualityAnalyzer != null) { symbolicRegressionModelQualityAnalyzer.ProblemDataParameter.ActualName = DataAnalysisProblemDataParameter.Name; symbolicRegressionModelQualityAnalyzer.UpperEstimationLimitParameter.ActualName = UpperEstimationLimitParameter.Name; symbolicRegressionModelQualityAnalyzer.LowerEstimationLimitParameter.ActualName = LowerEstimationLimitParameter.Name; symbolicRegressionModelQualityAnalyzer.SymbolicExpressionTreeInterpreterParameter.ActualName = SymbolicExpressionTreeInterpreterParameter.Name; symbolicRegressionModelQualityAnalyzer.SymbolicExpressionTreeParameter.ActualName = SolutionCreator.SymbolicExpressionTreeParameter.ActualName; } var varFreqAnalyzer = analyzer as SymbolicRegressionVariableFrequencyAnalyzer; if (varFreqAnalyzer != null) { varFreqAnalyzer.ProblemDataParameter.ActualName = DataAnalysisProblemDataParameter.Name; } var pruningOperator = analyzer as SymbolicRegressionTournamentPruning; if (pruningOperator != null) { pruningOperator.SamplesStartParameter.Value = TrainingSamplesStart; pruningOperator.SamplesEndParameter.Value = TrainingSamplesEnd; pruningOperator.DataAnalysisProblemDataParameter.ActualName = DataAnalysisProblemDataParameter.Name; pruningOperator.SymbolicExpressionTreeParameter.ActualName = SolutionCreator.SymbolicExpressionTreeParameter.ActualName; pruningOperator.SymbolicExpressionTreeInterpreterParameter.ActualName = SymbolicExpressionTreeInterpreterParameter.Name; pruningOperator.LowerEstimationLimitParameter.ActualName = LowerEstimationLimitParameter.Name; pruningOperator.UpperEstimationLimitParameter.ActualName = UpperEstimationLimitParameter.Name; } } foreach (ISymbolicExpressionTreeAnalyzer analyzer in Operators.OfType()) { analyzer.SymbolicExpressionTreeParameter.ActualName = SolutionCreator.SymbolicExpressionTreeParameter.ActualName; } } private void ParameterizeOperators() { foreach (ISymbolicExpressionTreeOperator op in Operators.OfType()) { op.MaxTreeHeightParameter.ActualName = MaxExpressionDepthParameter.Name; op.MaxTreeSizeParameter.ActualName = MaxExpressionLengthParameter.Name; op.SymbolicExpressionGrammarParameter.ActualName = FunctionTreeGrammarParameter.Name; } foreach (ISymbolicExpressionTreeCrossover op in Operators.OfType()) { op.ParentsParameter.ActualName = SolutionCreator.SymbolicExpressionTreeParameter.ActualName; op.ChildParameter.ActualName = SolutionCreator.SymbolicExpressionTreeParameter.ActualName; } foreach (ISymbolicExpressionTreeManipulator op in Operators.OfType()) { op.SymbolicExpressionTreeParameter.ActualName = SolutionCreator.SymbolicExpressionTreeParameter.ActualName; } foreach (ISymbolicExpressionTreeArchitectureManipulator op in Operators.OfType()) { op.MaxFunctionArgumentsParameter.ActualName = MaxFunctionArgumentsParameter.Name; op.MaxFunctionDefinitionsParameter.ActualName = MaxFunctionDefiningBranchesParameter.Name; } } #endregion } }