#region License Information /* HeuristicLab * Copyright (C) 2002-2011 Heuristic and Evolutionary Algorithms Laboratory (HEAL) * * This file is part of HeuristicLab. * * HeuristicLab is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation, either version 3 of the License, or * (at your option) any later version. * * HeuristicLab is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with HeuristicLab. If not, see . */ #endregion using System; using System.Collections.Generic; using System.Linq; using HeuristicLab.Common; using HeuristicLab.Core; using HeuristicLab.Data; using HeuristicLab.Encodings.SymbolicExpressionTreeEncoding; using HeuristicLab.Encodings.SymbolicExpressionTreeEncoding.Analyzers; using HeuristicLab.Encodings.SymbolicExpressionTreeEncoding.Creators; using HeuristicLab.Encodings.SymbolicExpressionTreeEncoding.Interfaces; using HeuristicLab.Encodings.SymbolicExpressionTreeEncoding.Symbols; using HeuristicLab.Parameters; using HeuristicLab.Persistence.Default.CompositeSerializers.Storable; using HeuristicLab.PluginInfrastructure; using HeuristicLab.Problems.DataAnalysis.Classification.Symbolic.Analyzers; using HeuristicLab.Problems.DataAnalysis.Regression.Symbolic.Analyzers; using HeuristicLab.Problems.DataAnalysis.Symbolic; using HeuristicLab.Problems.DataAnalysis.Symbolic.Symbols; namespace HeuristicLab.Problems.DataAnalysis.Classification { [Item("Classification Problem", "Represents a classfication problem.")] [StorableClass] [Creatable("Problems")] public sealed class SymbolicClassificationProblem : SingleObjectiveClassificationProblem, IStorableContent { private const string SymbolicExpressionTreeInterpreterParameterName = "SymbolicExpressionTreeInterpreter"; private const string FunctionTreeGrammarParameterName = "FunctionTreeGrammar"; private const string MaxExpressionLengthParameterName = "MaxExpressionLength"; private const string MaxExpressionDepthParameterName = "MaxExpressionDepth"; private const string UpperEstimationLimitParameterName = "UpperEstimationLimit"; private const string LowerEstimationLimitParameterName = "LowerEstimationLimit"; private const string MaxFunctionDefiningBranchensParameterName = "MaxFunctionDefiningBranches"; private const string MaxFunctionArgumentsParameterName = "MaxFunctionArguments"; #region properties public string Filename { get; set; } public ISymbolicExpressionTreeInterpreter SymbolicExpressionTreeInterpreter { get { return SymbolicExpressionTreeInterpreterParameter.Value; } private set { SymbolicExpressionTreeInterpreterParameter.Value = value; } } public IValueParameter SymbolicExpressionTreeInterpreterParameter { get { return (IValueParameter)Parameters[SymbolicExpressionTreeInterpreterParameterName]; } } public ISymbolicExpressionGrammar FunctionTreeGrammar { get { return (ISymbolicExpressionGrammar)FunctionTreeGrammarParameter.Value; } private set { FunctionTreeGrammarParameter.Value = value; } } public IValueParameter FunctionTreeGrammarParameter { get { return (IValueParameter)Parameters[FunctionTreeGrammarParameterName]; } } public IntValue MaxExpressionLength { get { return MaxExpressionLengthParameter.Value; } private set { MaxExpressionLengthParameter.Value = value; } } public IValueParameter MaxExpressionLengthParameter { get { return (IValueParameter)Parameters[MaxExpressionLengthParameterName]; } } public IntValue MaxExpressionDepth { get { return MaxExpressionDepthParameter.Value; } private set { MaxExpressionDepthParameter.Value = value; } } public ValueParameter MaxExpressionDepthParameter { get { return (ValueParameter)Parameters[MaxExpressionDepthParameterName]; } } public DoubleValue UpperEstimationLimit { get { return UpperEstimationLimitParameter.Value; } private set { UpperEstimationLimitParameter.Value = value; } } public IValueParameter UpperEstimationLimitParameter { get { return (IValueParameter)Parameters[UpperEstimationLimitParameterName]; } } public DoubleValue LowerEstimationLimit { get { return LowerEstimationLimitParameter.Value; } private set { LowerEstimationLimitParameter.Value = value; } } public IValueParameter LowerEstimationLimitParameter { get { return (IValueParameter)Parameters[LowerEstimationLimitParameterName]; } } public IntValue MaxFunctionDefiningBranches { get { return MaxFunctionDefiningBranchesParameter.Value; } private set { MaxFunctionDefiningBranchesParameter.Value = value; } } public IValueParameter MaxFunctionDefiningBranchesParameter { get { return (IValueParameter)Parameters[MaxFunctionDefiningBranchensParameterName]; } } public IntValue MaxFunctionArguments { get { return MaxFunctionArgumentsParameter.Value; } private set { MaxFunctionArgumentsParameter.Value = value; } } public IValueParameter MaxFunctionArgumentsParameter { get { return (IValueParameter)Parameters[MaxFunctionArgumentsParameterName]; } } public DoubleValue PunishmentFactor { get { return new DoubleValue(10.0); } } public IntValue TrainingSamplesStart { get { return new IntValue(ClassificationProblemData.TrainingIndizes.First()); } } public IntValue TrainingSamplesEnd { get { int endIndex = (int)(ClassificationProblemData.TrainingIndizes.Count() * (1.0 - ClassificationProblemData.ValidationPercentage.Value) - 1); if (endIndex < 0) endIndex = 0; return new IntValue(ClassificationProblemData.TrainingIndizes.ElementAt(endIndex)); } } public IntValue ValidationSamplesStart { get { return TrainingSamplesEnd; } } public IntValue ValidationSamplesEnd { get { return new IntValue(ClassificationProblemData.TrainingIndizes.Last() + 1); } } public IntValue TestSamplesStart { get { return ClassificationProblemData.TestSamplesStart; } } public IntValue TestSamplesEnd { get { return ClassificationProblemData.TestSamplesEnd; } } #endregion [StorableConstructor] private SymbolicClassificationProblem(bool deserializing) : base(deserializing) { } private SymbolicClassificationProblem(SymbolicClassificationProblem original, Cloner cloner) : base(original, cloner) { RegisterParameterEvents(); } public SymbolicClassificationProblem() : base() { Parameters.Add(new ValueParameter(SymbolicExpressionTreeInterpreterParameterName, "The interpreter that should be used to evaluate the symbolic expression tree.")); Parameters.Add(new ValueParameter(FunctionTreeGrammarParameterName, "The grammar that should be used for symbolic regression models.")); Parameters.Add(new ValueParameter(MaxExpressionLengthParameterName, "Maximal length of the symbolic expression.")); Parameters.Add(new ValueParameter(MaxExpressionDepthParameterName, "Maximal depth of the symbolic expression.")); Parameters.Add(new ValueParameter(UpperEstimationLimitParameterName, "The upper limit for the estimated value that can be returned by the symbolic regression model.")); Parameters.Add(new ValueParameter(LowerEstimationLimitParameterName, "The lower limit for the estimated value that can be returned by the symbolic regression model.")); Parameters.Add(new ValueParameter(MaxFunctionDefiningBranchensParameterName, "Maximal number of automatically defined functions.")); Parameters.Add(new ValueParameter(MaxFunctionArgumentsParameterName, "Maximal number of arguments of automatically defined functions.")); SolutionCreator = new ProbabilisticTreeCreator(); Evaluator = new SymbolicClassifacitionMeanSquaredErrorEvaluator(); ParameterizeSolutionCreator(); Maximization = new BoolValue(false); FunctionTreeGrammar = new GlobalSymbolicExpressionGrammar(new FullFunctionalExpressionGrammar()); SymbolicExpressionTreeInterpreter = new SimpleArithmeticExpressionInterpreter(); MaxExpressionLength = new IntValue(100); MaxExpressionDepth = new IntValue(10); MaxFunctionDefiningBranches = new IntValue(0); MaxFunctionArguments = new IntValue(0); InitializeOperators(); RegisterParameterEvents(); UpdateEstimationLimits(); ParameterizeEvaluator(); ParameterizeSolutionCreator(); ParameterizeGrammar(); ParameterizeOperators(); ParameterizeAnalyzers(); } public override IDeepCloneable Clone(Cloner cloner) { return new SymbolicClassificationProblem(this, cloner); } [StorableHook(HookType.AfterDeserialization)] private void AfterDeserialization() { RegisterParameterEvents(); } private void RegisterParameterEvents() { SolutionCreator.SymbolicExpressionTreeParameter.ActualNameChanged += new EventHandler(SolutionCreator_SymbolicExpressionTreeParameter_ActualNameChanged); FunctionTreeGrammarParameter.ValueChanged += new EventHandler(FunctionTreeGrammarParameter_ValueChanged); MaxFunctionArgumentsParameter.ValueChanged += new EventHandler(ArchitectureParameter_ValueChanged); MaxFunctionDefiningBranchesParameter.ValueChanged += new EventHandler(ArchitectureParameter_ValueChanged); MaxFunctionArgumentsParameter.Value.ValueChanged += new EventHandler(ArchitectureParameterValue_ValueChanged); MaxFunctionDefiningBranchesParameter.Value.ValueChanged += new EventHandler(ArchitectureParameterValue_ValueChanged); } protected override void OnEvaluatorChanged() { ParameterizeEvaluator(); ParameterizeAnalyzers(); ParameterizeProblem(); base.OnEvaluatorChanged(); } protected override void OnSolutionCreatorChanged() { ParameterizeSolutionCreator(); SolutionCreator.SymbolicExpressionTreeParameter.ActualNameChanged += new EventHandler(SolutionCreator_SymbolicExpressionTreeParameter_ActualNameChanged); base.OnSolutionCreatorChanged(); } private void SolutionCreator_SymbolicExpressionTreeParameter_ActualNameChanged(object sender, System.EventArgs e) { ParameterizeEvaluator(); ParameterizeOperators(); ParameterizeAnalyzers(); } protected override void OnClassificationProblemDataChanged() { ParameterizeAnalyzers(); ParameterizeGrammar(); ParameterizeEvaluator(); UpdateEstimationLimits(); base.OnClassificationProblemDataChanged(); } private void FunctionTreeGrammarParameter_ValueChanged(object sender, System.EventArgs e) { if (!(FunctionTreeGrammar is GlobalSymbolicExpressionGrammar)) { FunctionTreeGrammar = new GlobalSymbolicExpressionGrammar(FunctionTreeGrammar); } OnGrammarChanged(); } private void OnGrammarChanged() { ParameterizeGrammar(); } private void ArchitectureParameter_ValueChanged(object sender, EventArgs e) { MaxFunctionArgumentsParameter.Value.ValueChanged += new EventHandler(ArchitectureParameterValue_ValueChanged); MaxFunctionDefiningBranchesParameter.Value.ValueChanged += new EventHandler(ArchitectureParameterValue_ValueChanged); OnArchitectureParameterChanged(); } private void ArchitectureParameterValue_ValueChanged(object sender, EventArgs e) { OnArchitectureParameterChanged(); } private void OnArchitectureParameterChanged() { ParameterizeGrammar(); } private void InitializeOperators() { Operators.AddRange(ApplicationManager.Manager.GetInstances().OfType()); Operators.Add(new MinAverageMaxSymbolicExpressionTreeSizeAnalyzer()); Operators.Add(new SymbolicRegressionVariableFrequencyAnalyzer()); Operators.Add(new SymbolicExpressionSymbolFrequencyAnalyzer()); Operators.Add(new ValidationBestSymbolicClassificationSolutionAnalyzer()); Operators.Add(new TrainingBestSymbolicClassificationSolutionAnalyzer()); } #region operator parameterization private void UpdateEstimationLimits() { if (TrainingSamplesStart.Value < TrainingSamplesEnd.Value && ClassificationProblemData.Dataset.VariableNames.Contains(ClassificationProblemData.TargetVariable.Value)) { var targetValues = ClassificationProblemData.Dataset.GetVariableValues(ClassificationProblemData.TargetVariable.Value, TrainingSamplesStart.Value, TrainingSamplesEnd.Value); var mean = targetValues.Average(); var range = targetValues.Max() - targetValues.Min(); UpperEstimationLimit = new DoubleValue(mean + PunishmentFactor.Value * range); LowerEstimationLimit = new DoubleValue(mean - PunishmentFactor.Value * range); } } private void ParameterizeEvaluator() { if (Evaluator != null) { Evaluator.SymbolicExpressionTreeParameter.ActualName = SolutionCreator.SymbolicExpressionTreeParameter.ActualName; Evaluator.RegressionProblemDataParameter.ActualName = ClassificationProblemDataParameter.Name; Evaluator.SamplesStartParameter.Value = TrainingSamplesStart; Evaluator.SamplesEndParameter.Value = TrainingSamplesEnd; } } private void ParameterizeGrammar() { List laggedSymbols = FunctionTreeGrammar.Symbols.OfType().ToList(); foreach (Symbol symbol in laggedSymbols) FunctionTreeGrammar.RemoveSymbol(symbol); foreach (var varSymbol in FunctionTreeGrammar.Symbols.OfType()) { varSymbol.VariableNames = ClassificationProblemData.InputVariables.CheckedItems.Select(x => x.Value.Value); } var globalGrammar = FunctionTreeGrammar as GlobalSymbolicExpressionGrammar; if (globalGrammar != null) { globalGrammar.MaxFunctionArguments = MaxFunctionArguments.Value; globalGrammar.MaxFunctionDefinitions = MaxFunctionDefiningBranches.Value; } } private void ParameterizeSolutionCreator() { SolutionCreator.SymbolicExpressionGrammarParameter.ActualName = FunctionTreeGrammarParameter.Name; SolutionCreator.MaxTreeHeightParameter.ActualName = MaxExpressionDepthParameter.Name; SolutionCreator.MaxTreeSizeParameter.ActualName = MaxExpressionLengthParameter.Name; SolutionCreator.MaxFunctionArgumentsParameter.ActualName = MaxFunctionArgumentsParameter.Name; SolutionCreator.MaxFunctionDefinitionsParameter.ActualName = MaxFunctionDefiningBranchesParameter.Name; } private void ParameterizeOperators() { foreach (ISymbolicExpressionTreeOperator op in Operators.OfType()) { op.MaxTreeHeightParameter.ActualName = MaxExpressionDepthParameter.Name; op.MaxTreeSizeParameter.ActualName = MaxExpressionLengthParameter.Name; op.SymbolicExpressionGrammarParameter.ActualName = FunctionTreeGrammarParameter.Name; } foreach (ISymbolicExpressionTreeCrossover op in Operators.OfType()) { op.ParentsParameter.ActualName = SolutionCreator.SymbolicExpressionTreeParameter.ActualName; op.ChildParameter.ActualName = SolutionCreator.SymbolicExpressionTreeParameter.ActualName; } foreach (ISymbolicExpressionTreeManipulator op in Operators.OfType()) { op.SymbolicExpressionTreeParameter.ActualName = SolutionCreator.SymbolicExpressionTreeParameter.ActualName; } foreach (ISymbolicExpressionTreeArchitectureManipulator op in Operators.OfType()) { op.MaxFunctionArgumentsParameter.ActualName = MaxFunctionArgumentsParameter.Name; op.MaxFunctionDefinitionsParameter.ActualName = MaxFunctionDefiningBranchesParameter.Name; } } private void ParameterizeAnalyzers() { foreach (ISymbolicRegressionAnalyzer analyzer in Operators.OfType()) { analyzer.SymbolicExpressionTreeParameter.ActualName = SolutionCreator.SymbolicExpressionTreeParameter.ActualName; var bestValidationSolutionAnalyzer = analyzer as ValidationBestSymbolicClassificationSolutionAnalyzer; if (bestValidationSolutionAnalyzer != null) { bestValidationSolutionAnalyzer.ClassificationProblemDataParameter.ActualName = ClassificationProblemDataParameter.Name; bestValidationSolutionAnalyzer.UpperEstimationLimitParameter.ActualName = UpperEstimationLimitParameter.Name; bestValidationSolutionAnalyzer.LowerEstimationLimitParameter.ActualName = LowerEstimationLimitParameter.Name; bestValidationSolutionAnalyzer.SymbolicExpressionTreeInterpreterParameter.ActualName = SymbolicExpressionTreeInterpreterParameter.Name; bestValidationSolutionAnalyzer.SymbolicExpressionTreeParameter.ActualName = SolutionCreator.SymbolicExpressionTreeParameter.ActualName; bestValidationSolutionAnalyzer.ValidationSamplesStartParameter.Value = ValidationSamplesStart; bestValidationSolutionAnalyzer.ValidationSamplesEndParameter.Value = ValidationSamplesEnd; } var bestTrainingSolutionAnalyzer = analyzer as TrainingBestSymbolicClassificationSolutionAnalyzer; if (bestTrainingSolutionAnalyzer != null) { bestTrainingSolutionAnalyzer.ProblemDataParameter.ActualName = ClassificationProblemDataParameter.Name; bestTrainingSolutionAnalyzer.UpperEstimationLimitParameter.ActualName = UpperEstimationLimitParameter.Name; bestTrainingSolutionAnalyzer.LowerEstimationLimitParameter.ActualName = LowerEstimationLimitParameter.Name; bestTrainingSolutionAnalyzer.SymbolicExpressionTreeInterpreterParameter.ActualName = SymbolicExpressionTreeInterpreterParameter.Name; bestTrainingSolutionAnalyzer.SymbolicExpressionTreeParameter.ActualName = SolutionCreator.SymbolicExpressionTreeParameter.ActualName; } var varFreqAnalyzer = analyzer as SymbolicRegressionVariableFrequencyAnalyzer; if (varFreqAnalyzer != null) { varFreqAnalyzer.ProblemDataParameter.ActualName = ClassificationProblemDataParameter.Name; } } } private void ParameterizeProblem() { if (Maximization != null) { Maximization.Value = Evaluator.Maximization; } else { Maximization = new BoolValue(Evaluator.Maximization); } } #endregion } }