#region License Information /* HeuristicLab * Copyright (C) 2002-2018 Heuristic and Evolutionary Algorithms Laboratory (HEAL) * * This file is part of HeuristicLab. * * HeuristicLab is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation, either version 3 of the License, or * (at your option) any later version. * * HeuristicLab is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with HeuristicLab. If not, see . */ #endregion using System; using System.Collections.Generic; using System.Drawing; using System.Linq; using HeuristicLab.Common; using HeuristicLab.Common.Resources; using HeuristicLab.Core; using HeuristicLab.Data; using HeuristicLab.Encodings.SymbolicExpressionTreeEncoding; using HeuristicLab.Optimization; using HeuristicLab.Parameters; using HeuristicLab.Persistence.Default.CompositeSerializers.Storable; using HeuristicLab.PluginInfrastructure; using HeuristicLab.Problems.Instances; namespace HeuristicLab.Problems.DataAnalysis.Symbolic { [StorableClass] public abstract class SymbolicDataAnalysisProblem : HeuristicOptimizationProblem, IDataAnalysisProblem, ISymbolicDataAnalysisProblem, IStorableContent, IProblemInstanceConsumer, IProblemInstanceExporter where T : class, IDataAnalysisProblemData where U : class, ISymbolicDataAnalysisEvaluator where V : class, ISymbolicDataAnalysisSolutionCreator { #region parameter names & descriptions private const string ProblemDataParameterName = "ProblemData"; private const string SymbolicExpressionTreeGrammarParameterName = "SymbolicExpressionTreeGrammar"; private const string SymbolicExpressionTreeInterpreterParameterName = "SymbolicExpressionTreeInterpreter"; private const string MaximumSymbolicExpressionTreeDepthParameterName = "MaximumSymbolicExpressionTreeDepth"; private const string MaximumSymbolicExpressionTreeLengthParameterName = "MaximumSymbolicExpressionTreeLength"; private const string MaximumFunctionDefinitionsParameterName = "MaximumFunctionDefinitions"; private const string MaximumFunctionArgumentsParameterName = "MaximumFunctionArguments"; private const string RelativeNumberOfEvaluatedSamplesParameterName = "RelativeNumberOfEvaluatedSamples"; private const string FitnessCalculationPartitionParameterName = "FitnessCalculationPartition"; private const string ValidationPartitionParameterName = "ValidationPartition"; private const string ApplyLinearScalingParameterName = "ApplyLinearScaling"; private const string ProblemDataParameterDescription = ""; private const string SymbolicExpressionTreeGrammarParameterDescription = "The grammar that should be used for symbolic expression tree."; private const string SymoblicExpressionTreeInterpreterParameterDescription = "The interpreter that should be used to evaluate the symbolic expression tree."; private const string MaximumSymbolicExpressionTreeDepthParameterDescription = "Maximal depth of the symbolic expression. The minimum depth needed for the algorithm is 3 because two levels are reserved for the ProgramRoot and the Start symbol."; private const string MaximumSymbolicExpressionTreeLengthParameterDescription = "Maximal length of the symbolic expression."; private const string MaximumFunctionDefinitionsParameterDescription = "Maximal number of automatically defined functions"; private const string MaximumFunctionArgumentsParameterDescription = "Maximal number of arguments of automatically defined functions."; private const string RelativeNumberOfEvaluatedSamplesParameterDescription = "The relative number of samples of the dataset partition, which should be randomly chosen for evaluation."; private const string FitnessCalculationPartitionParameterDescription = "The partition of the problem data training partition, that should be used to calculate the fitness of an individual."; private const string ValidationPartitionParameterDescription = "The partition of the problem data training partition, that should be used to select the best model from (optional)."; private const string ApplyLinearScalingParameterDescription = "Flag that indicates if the individual should be linearly scaled before evaluating."; #endregion #region parameter properties IParameter IDataAnalysisProblem.ProblemDataParameter { get { return ProblemDataParameter; } } public IValueParameter ProblemDataParameter { get { return (IValueParameter)Parameters[ProblemDataParameterName]; } } public IValueParameter SymbolicExpressionTreeGrammarParameter { get { return (IValueParameter)Parameters[SymbolicExpressionTreeGrammarParameterName]; } } public IValueParameter SymbolicExpressionTreeInterpreterParameter { get { return (IValueParameter)Parameters[SymbolicExpressionTreeInterpreterParameterName]; } } public IFixedValueParameter MaximumSymbolicExpressionTreeDepthParameter { get { return (IFixedValueParameter)Parameters[MaximumSymbolicExpressionTreeDepthParameterName]; } } public IFixedValueParameter MaximumSymbolicExpressionTreeLengthParameter { get { return (IFixedValueParameter)Parameters[MaximumSymbolicExpressionTreeLengthParameterName]; } } public IFixedValueParameter MaximumFunctionDefinitionsParameter { get { return (IFixedValueParameter)Parameters[MaximumFunctionDefinitionsParameterName]; } } public IFixedValueParameter MaximumFunctionArgumentsParameter { get { return (IFixedValueParameter)Parameters[MaximumFunctionArgumentsParameterName]; } } public IFixedValueParameter RelativeNumberOfEvaluatedSamplesParameter { get { return (IFixedValueParameter)Parameters[RelativeNumberOfEvaluatedSamplesParameterName]; } } public IFixedValueParameter FitnessCalculationPartitionParameter { get { return (IFixedValueParameter)Parameters[FitnessCalculationPartitionParameterName]; } } public IFixedValueParameter ValidationPartitionParameter { get { return (IFixedValueParameter)Parameters[ValidationPartitionParameterName]; } } public IFixedValueParameter ApplyLinearScalingParameter { get { return (IFixedValueParameter)Parameters[ApplyLinearScalingParameterName]; } } #endregion #region properties public string Filename { get; set; } public static new Image StaticItemImage { get { return VSImageLibrary.Type; } } IDataAnalysisProblemData IDataAnalysisProblem.ProblemData { get { return ProblemData; } } public T ProblemData { get { return ProblemDataParameter.Value; } set { ProblemDataParameter.Value = value; } } public ISymbolicDataAnalysisGrammar SymbolicExpressionTreeGrammar { get { return SymbolicExpressionTreeGrammarParameter.Value; } set { SymbolicExpressionTreeGrammarParameter.Value = value; } } public ISymbolicDataAnalysisExpressionTreeInterpreter SymbolicExpressionTreeInterpreter { get { return SymbolicExpressionTreeInterpreterParameter.Value; } set { SymbolicExpressionTreeInterpreterParameter.Value = value; } } public IntValue MaximumSymbolicExpressionTreeDepth { get { return MaximumSymbolicExpressionTreeDepthParameter.Value; } } public IntValue MaximumSymbolicExpressionTreeLength { get { return MaximumSymbolicExpressionTreeLengthParameter.Value; } } public IntValue MaximumFunctionDefinitions { get { return MaximumFunctionDefinitionsParameter.Value; } } public IntValue MaximumFunctionArguments { get { return MaximumFunctionArgumentsParameter.Value; } } public PercentValue RelativeNumberOfEvaluatedSamples { get { return RelativeNumberOfEvaluatedSamplesParameter.Value; } } public IntRange FitnessCalculationPartition { get { return FitnessCalculationPartitionParameter.Value; } } public IntRange ValidationPartition { get { return ValidationPartitionParameter.Value; } } public BoolValue ApplyLinearScaling { get { return ApplyLinearScalingParameter.Value; } } #endregion [StorableConstructor] protected SymbolicDataAnalysisProblem(bool deserializing) : base(deserializing) { } [StorableHook(HookType.AfterDeserialization)] private void AfterDeserialization() { if (!Parameters.ContainsKey(ApplyLinearScalingParameterName)) { Parameters.Add(new FixedValueParameter(ApplyLinearScalingParameterName, ApplyLinearScalingParameterDescription, new BoolValue(false))); ApplyLinearScalingParameter.Hidden = true; //it is assumed that for all symbolic regression algorithms linear scaling was set to true //there is no possibility to determine the previous value of the parameter as it was stored in the evaluator if (GetType().Name.Contains("SymbolicRegression")) ApplyLinearScaling.Value = true; } RegisterEventHandlers(); } protected SymbolicDataAnalysisProblem(SymbolicDataAnalysisProblem original, Cloner cloner) : base(original, cloner) { RegisterEventHandlers(); } protected SymbolicDataAnalysisProblem(T problemData, U evaluator, V solutionCreator) : base(evaluator, solutionCreator) { Parameters.Add(new ValueParameter(ProblemDataParameterName, ProblemDataParameterDescription, problemData)); Parameters.Add(new ValueParameter(SymbolicExpressionTreeGrammarParameterName, SymbolicExpressionTreeGrammarParameterDescription)); Parameters.Add(new ValueParameter(SymbolicExpressionTreeInterpreterParameterName, SymoblicExpressionTreeInterpreterParameterDescription)); Parameters.Add(new FixedValueParameter(MaximumSymbolicExpressionTreeDepthParameterName, MaximumSymbolicExpressionTreeDepthParameterDescription)); Parameters.Add(new FixedValueParameter(MaximumSymbolicExpressionTreeLengthParameterName, MaximumSymbolicExpressionTreeLengthParameterDescription)); Parameters.Add(new FixedValueParameter(MaximumFunctionDefinitionsParameterName, MaximumFunctionDefinitionsParameterDescription)); Parameters.Add(new FixedValueParameter(MaximumFunctionArgumentsParameterName, MaximumFunctionArgumentsParameterDescription)); Parameters.Add(new FixedValueParameter(FitnessCalculationPartitionParameterName, FitnessCalculationPartitionParameterDescription)); Parameters.Add(new FixedValueParameter(ValidationPartitionParameterName, ValidationPartitionParameterDescription)); Parameters.Add(new FixedValueParameter(RelativeNumberOfEvaluatedSamplesParameterName, RelativeNumberOfEvaluatedSamplesParameterDescription, new PercentValue(1))); Parameters.Add(new FixedValueParameter(ApplyLinearScalingParameterName, ApplyLinearScalingParameterDescription, new BoolValue(false))); SymbolicExpressionTreeInterpreterParameter.Hidden = true; MaximumFunctionArgumentsParameter.Hidden = true; MaximumFunctionDefinitionsParameter.Hidden = true; ApplyLinearScalingParameter.Hidden = true; SymbolicExpressionTreeGrammar = new TypeCoherentExpressionGrammar(); SymbolicExpressionTreeInterpreter = new SymbolicDataAnalysisExpressionTreeLinearInterpreter(); FitnessCalculationPartition.Start = ProblemData.TrainingPartition.Start; FitnessCalculationPartition.End = ProblemData.TrainingPartition.End; InitializeOperators(); UpdateGrammar(); RegisterEventHandlers(); } protected virtual void UpdateGrammar() { var problemData = ProblemData; var ds = problemData.Dataset; var grammar = SymbolicExpressionTreeGrammar; grammar.MaximumFunctionArguments = MaximumFunctionArguments.Value; grammar.MaximumFunctionDefinitions = MaximumFunctionDefinitions.Value; foreach (var varSymbol in grammar.Symbols.OfType()) { if (!varSymbol.Fixed) { varSymbol.AllVariableNames = problemData.InputVariables.Select(x => x.Value).Where(x => ds.VariableHasType(x)); varSymbol.VariableNames = problemData.AllowedInputVariables.Where(x => ds.VariableHasType(x)); } } foreach (var factorSymbol in grammar.Symbols.OfType()) { if (!factorSymbol.Fixed) { factorSymbol.AllVariableNames = problemData.InputVariables.Select(x => x.Value).Where(x => ds.VariableHasType(x)); factorSymbol.VariableNames = problemData.AllowedInputVariables.Where(x => ds.VariableHasType(x)); factorSymbol.VariableValues = factorSymbol.VariableNames .ToDictionary(varName => varName, varName => ds.GetStringValues(varName).Distinct().ToList()); } } foreach (var factorSymbol in grammar.Symbols.OfType()) { if (!factorSymbol.Fixed) { factorSymbol.AllVariableNames = problemData.InputVariables.Select(x => x.Value).Where(x => ds.VariableHasType(x)); factorSymbol.VariableNames = problemData.AllowedInputVariables.Where(x => ds.VariableHasType(x)); factorSymbol.VariableValues = factorSymbol.VariableNames .ToDictionary(varName => varName, varName => ds.GetStringValues(varName).Distinct() .Select((n, i) => Tuple.Create(n, i)) .ToDictionary(tup => tup.Item1, tup => tup.Item2)); } } } private void InitializeOperators() { var operators = new HashSet(new TypeEqualityComparer()); operators.Add(new SubtreeCrossover()); operators.Add(new MultiSymbolicExpressionTreeManipulator()); foreach (var op in ApplicationManager.Manager.GetInstances()) operators.Add(op); foreach (var op in ApplicationManager.Manager.GetInstances>()) operators.Add(op); operators.Add(new SymbolicExpressionSymbolFrequencyAnalyzer()); operators.Add(new SymbolicDataAnalysisVariableFrequencyAnalyzer()); operators.Add(new MinAverageMaxSymbolicExpressionTreeLengthAnalyzer()); operators.Add(new SymbolicExpressionTreeLengthAnalyzer()); operators.Add(new SymbolicExpressionTreeBottomUpSimilarityCalculator()); operators.Add(new SymbolicDataAnalysisBottomUpDiversityAnalyzer(operators.OfType().First())); Operators.AddRange(operators); Operators.Add(new SymbolicDataAnalysisGenealogyAnalyzer()); ParameterizeOperators(); } #region events private void RegisterEventHandlers() { ProblemDataParameter.ValueChanged += new EventHandler(ProblemDataParameter_ValueChanged); ProblemDataParameter.Value.Changed += (object sender, EventArgs e) => OnProblemDataChanged(); SymbolicExpressionTreeGrammarParameter.ValueChanged += new EventHandler(SymbolicExpressionTreeGrammarParameter_ValueChanged); MaximumFunctionArguments.ValueChanged += new EventHandler(ArchitectureParameterValue_ValueChanged); MaximumFunctionDefinitions.ValueChanged += new EventHandler(ArchitectureParameterValue_ValueChanged); MaximumSymbolicExpressionTreeDepth.ValueChanged += new EventHandler(MaximumSymbolicExpressionTreeDepth_ValueChanged); } private void ProblemDataParameter_ValueChanged(object sender, EventArgs e) { ValidationPartition.Start = 0; ValidationPartition.End = 0; ProblemDataParameter.Value.Changed += (object s, EventArgs args) => OnProblemDataChanged(); OnProblemDataChanged(); } private void SymbolicExpressionTreeGrammarParameter_ValueChanged(object sender, EventArgs e) { UpdateGrammar(); } private void ArchitectureParameterValue_ValueChanged(object sender, EventArgs e) { UpdateGrammar(); } private void MaximumSymbolicExpressionTreeDepth_ValueChanged(object sender, EventArgs e) { if (MaximumSymbolicExpressionTreeDepth != null && MaximumSymbolicExpressionTreeDepth.Value < 3) MaximumSymbolicExpressionTreeDepth.Value = 3; } protected override void OnSolutionCreatorChanged() { base.OnSolutionCreatorChanged(); SolutionCreator.SymbolicExpressionTreeParameter.ActualNameChanged += new EventHandler(SolutionCreator_SymbolicExpressionTreeParameter_ActualNameChanged); ParameterizeOperators(); } private void SolutionCreator_SymbolicExpressionTreeParameter_ActualNameChanged(object sender, EventArgs e) { ParameterizeOperators(); } protected override void OnEvaluatorChanged() { base.OnEvaluatorChanged(); ParameterizeOperators(); } public event EventHandler ProblemDataChanged; protected virtual void OnProblemDataChanged() { FitnessCalculationPartition.Start = ProblemData.TrainingPartition.Start; FitnessCalculationPartition.End = ProblemData.TrainingPartition.End; UpdateGrammar(); ParameterizeOperators(); var handler = ProblemDataChanged; if (handler != null) handler(this, EventArgs.Empty); OnReset(); } #endregion protected virtual void ParameterizeOperators() { var operators = Parameters.OfType().Select(p => p.Value).OfType().Union(Operators).ToList(); foreach (var op in operators.OfType()) { op.SymbolicExpressionTreeGrammarParameter.ActualName = SymbolicExpressionTreeGrammarParameter.Name; } foreach (var op in operators.OfType()) { op.MaximumSymbolicExpressionTreeDepthParameter.ActualName = MaximumSymbolicExpressionTreeDepthParameter.Name; op.MaximumSymbolicExpressionTreeLengthParameter.ActualName = MaximumSymbolicExpressionTreeLengthParameter.Name; } foreach (var op in operators.OfType()) { op.MaximumFunctionArgumentsParameter.ActualName = MaximumFunctionArgumentsParameter.Name; op.MaximumFunctionDefinitionsParameter.ActualName = MaximumFunctionDefinitionsParameter.Name; } foreach (var op in operators.OfType>()) { op.ProblemDataParameter.ActualName = ProblemDataParameterName; op.SymbolicExpressionTreeParameter.ActualName = SolutionCreator.SymbolicExpressionTreeParameter.ActualName; op.EvaluationPartitionParameter.ActualName = FitnessCalculationPartitionParameter.Name; op.RelativeNumberOfEvaluatedSamplesParameter.ActualName = RelativeNumberOfEvaluatedSamplesParameter.Name; op.ApplyLinearScalingParameter.ActualName = ApplyLinearScalingParameter.Name; } foreach (var op in operators.OfType()) { op.ParentsParameter.ActualName = SolutionCreator.SymbolicExpressionTreeParameter.ActualName; op.SymbolicExpressionTreeParameter.ActualName = SolutionCreator.SymbolicExpressionTreeParameter.ActualName; } foreach (var op in operators.OfType()) { op.SymbolicExpressionTreeParameter.ActualName = SolutionCreator.SymbolicExpressionTreeParameter.ActualName; } foreach (var op in operators.OfType()) { op.SymbolicExpressionTreeParameter.ActualName = SolutionCreator.SymbolicExpressionTreeParameter.ActualName; } foreach (var op in operators.OfType()) { op.ApplyLinearScalingParameter.ActualName = ApplyLinearScalingParameter.Name; } foreach (var op in operators.OfType()) { op.ApplyLinearScalingParameter.ActualName = ApplyLinearScalingParameter.Name; } foreach (var op in operators.OfType()) { op.SymbolicExpressionTreeParameter.ActualName = SolutionCreator.SymbolicExpressionTreeParameter.ActualName; } foreach (var op in operators.OfType>()) { op.RelativeNumberOfEvaluatedSamplesParameter.ActualName = RelativeNumberOfEvaluatedSamplesParameter.Name; op.ValidationPartitionParameter.ActualName = ValidationPartitionParameter.Name; } foreach (var op in operators.OfType()) { op.SymbolicDataAnalysisTreeInterpreterParameter.ActualName = SymbolicExpressionTreeInterpreterParameter.Name; } foreach (var op in operators.OfType>()) { op.EvaluationPartitionParameter.ActualName = FitnessCalculationPartitionParameter.Name; op.ProblemDataParameter.ActualName = ProblemDataParameter.Name; op.EvaluationPartitionParameter.ActualName = FitnessCalculationPartitionParameter.Name; op.RelativeNumberOfEvaluatedSamplesParameter.ActualName = RelativeNumberOfEvaluatedSamplesParameter.Name; op.EvaluatorParameter.ActualName = EvaluatorParameter.Name; } // add tracking analyzer foreach (var op in operators.OfType()) { op.BeforeCrossoverOperatorParameter.ActualValue = new SymbolicDataAnalysisExpressionBeforeCrossoverOperator(); op.AfterCrossoverOperatorParameter.ActualValue = new SymbolicDataAnalysisExpressionAfterCrossoverOperator(); op.BeforeManipulatorOperatorParameter.ActualValue = new SymbolicDataAnalysisExpressionBeforeManipulatorOperator(); op.AfterManipulatorOperatorParameter.ActualValue = new SymbolicDataAnalysisExpressionAfterManipulatorOperator(); // get crossover parameter names var crossover = operators.OfType().FirstOrDefault(); if (crossover != null) { op.BeforeCrossoverOperator.ParentsParameter.ActualName = crossover.ParentsParameter.Name; op.AfterCrossoverOperator.ParentsParameter.ActualName = crossover.ParentsParameter.Name; op.BeforeCrossoverOperator.ChildParameter.ActualName = crossover.SymbolicExpressionTreeParameter.Name; op.AfterCrossoverOperator.ChildParameter.ActualName = crossover.SymbolicExpressionTreeParameter.Name; // get manipulator parameter names var manipulator = operators.OfType().FirstOrDefault(); if (manipulator != null) { op.BeforeManipulatorOperator.ChildParameter.ActualName = manipulator.SymbolicExpressionTreeParameter.Name; op.AfterManipulatorOperator.ChildParameter.ActualName = manipulator.SymbolicExpressionTreeParameter.Name; } var creator = operators.OfType().FirstOrDefault(); if (creator != null) { op.PopulationParameter.ActualName = creator.SymbolicExpressionTreeParameter.ActualName; } } } } #region Import & Export public virtual void Load(T data) { Name = data.Name; Description = data.Description; ProblemData = data; } public virtual T Export() { return ProblemData; } #endregion } }