#region License Information
/* HeuristicLab
* Copyright (C) 2002-2016 Heuristic and Evolutionary Algorithms Laboratory (HEAL)
*
* This file is part of HeuristicLab.
*
* HeuristicLab is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* HeuristicLab is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with HeuristicLab. If not, see .
*/
#endregion
using System;
using System.Drawing;
using System.Linq;
using HeuristicLab.Common;
using HeuristicLab.Common.Resources;
using HeuristicLab.Core;
using HeuristicLab.Data;
using HeuristicLab.Encodings.SymbolicExpressionTreeEncoding;
using HeuristicLab.Optimization;
using HeuristicLab.Parameters;
using HeuristicLab.Persistence;
using HeuristicLab.PluginInfrastructure;
using HeuristicLab.Problems.Instances;
namespace HeuristicLab.Problems.DataAnalysis.Symbolic {
[StorableType("dec1cb4b-d759-4927-a176-dfbe3a71d474")]
public abstract class SymbolicDataAnalysisProblem : HeuristicOptimizationProblem, IDataAnalysisProblem, ISymbolicDataAnalysisProblem, IStorableContent,
IProblemInstanceConsumer, IProblemInstanceExporter
where T : class, IDataAnalysisProblemData
where U : class, ISymbolicDataAnalysisEvaluator
where V : class, ISymbolicDataAnalysisSolutionCreator {
#region parameter names & descriptions
private const string ProblemDataParameterName = "ProblemData";
private const string SymbolicExpressionTreeGrammarParameterName = "SymbolicExpressionTreeGrammar";
private const string SymbolicExpressionTreeInterpreterParameterName = "SymbolicExpressionTreeInterpreter";
private const string MaximumSymbolicExpressionTreeDepthParameterName = "MaximumSymbolicExpressionTreeDepth";
private const string MaximumSymbolicExpressionTreeLengthParameterName = "MaximumSymbolicExpressionTreeLength";
private const string MaximumFunctionDefinitionsParameterName = "MaximumFunctionDefinitions";
private const string MaximumFunctionArgumentsParameterName = "MaximumFunctionArguments";
private const string RelativeNumberOfEvaluatedSamplesParameterName = "RelativeNumberOfEvaluatedSamples";
private const string FitnessCalculationPartitionParameterName = "FitnessCalculationPartition";
private const string ValidationPartitionParameterName = "ValidationPartition";
private const string ApplyLinearScalingParameterName = "ApplyLinearScaling";
private const string ProblemDataParameterDescription = "";
private const string SymbolicExpressionTreeGrammarParameterDescription = "The grammar that should be used for symbolic expression tree.";
private const string SymoblicExpressionTreeInterpreterParameterDescription = "The interpreter that should be used to evaluate the symbolic expression tree.";
private const string MaximumSymbolicExpressionTreeDepthParameterDescription = "Maximal depth of the symbolic expression. The minimum depth needed for the algorithm is 3 because two levels are reserved for the ProgramRoot and the Start symbol.";
private const string MaximumSymbolicExpressionTreeLengthParameterDescription = "Maximal length of the symbolic expression.";
private const string MaximumFunctionDefinitionsParameterDescription = "Maximal number of automatically defined functions";
private const string MaximumFunctionArgumentsParameterDescription = "Maximal number of arguments of automatically defined functions.";
private const string RelativeNumberOfEvaluatedSamplesParameterDescription = "The relative number of samples of the dataset partition, which should be randomly chosen for evaluation.";
private const string FitnessCalculationPartitionParameterDescription = "The partition of the problem data training partition, that should be used to calculate the fitness of an individual.";
private const string ValidationPartitionParameterDescription = "The partition of the problem data training partition, that should be used to select the best model from (optional).";
private const string ApplyLinearScalingParameterDescription = "Flag that indicates if the individual should be linearly scaled before evaluating.";
#endregion
#region parameter properties
IParameter IDataAnalysisProblem.ProblemDataParameter {
get { return ProblemDataParameter; }
}
public IValueParameter ProblemDataParameter {
get { return (IValueParameter)Parameters[ProblemDataParameterName]; }
}
public IValueParameter SymbolicExpressionTreeGrammarParameter {
get { return (IValueParameter)Parameters[SymbolicExpressionTreeGrammarParameterName]; }
}
public IValueParameter SymbolicExpressionTreeInterpreterParameter {
get { return (IValueParameter)Parameters[SymbolicExpressionTreeInterpreterParameterName]; }
}
public IFixedValueParameter MaximumSymbolicExpressionTreeDepthParameter {
get { return (IFixedValueParameter)Parameters[MaximumSymbolicExpressionTreeDepthParameterName]; }
}
public IFixedValueParameter MaximumSymbolicExpressionTreeLengthParameter {
get { return (IFixedValueParameter)Parameters[MaximumSymbolicExpressionTreeLengthParameterName]; }
}
public IFixedValueParameter MaximumFunctionDefinitionsParameter {
get { return (IFixedValueParameter)Parameters[MaximumFunctionDefinitionsParameterName]; }
}
public IFixedValueParameter MaximumFunctionArgumentsParameter {
get { return (IFixedValueParameter)Parameters[MaximumFunctionArgumentsParameterName]; }
}
public IFixedValueParameter RelativeNumberOfEvaluatedSamplesParameter {
get { return (IFixedValueParameter)Parameters[RelativeNumberOfEvaluatedSamplesParameterName]; }
}
public IFixedValueParameter FitnessCalculationPartitionParameter {
get { return (IFixedValueParameter)Parameters[FitnessCalculationPartitionParameterName]; }
}
public IFixedValueParameter ValidationPartitionParameter {
get { return (IFixedValueParameter)Parameters[ValidationPartitionParameterName]; }
}
public IFixedValueParameter ApplyLinearScalingParameter {
get { return (IFixedValueParameter)Parameters[ApplyLinearScalingParameterName]; }
}
#endregion
#region properties
public string Filename { get; set; }
public static new Image StaticItemImage { get { return VSImageLibrary.Type; } }
IDataAnalysisProblemData IDataAnalysisProblem.ProblemData {
get { return ProblemData; }
}
public T ProblemData {
get { return ProblemDataParameter.Value; }
set { ProblemDataParameter.Value = value; }
}
public ISymbolicDataAnalysisGrammar SymbolicExpressionTreeGrammar {
get { return SymbolicExpressionTreeGrammarParameter.Value; }
set { SymbolicExpressionTreeGrammarParameter.Value = value; }
}
public ISymbolicDataAnalysisExpressionTreeInterpreter SymbolicExpressionTreeInterpreter {
get { return SymbolicExpressionTreeInterpreterParameter.Value; }
set { SymbolicExpressionTreeInterpreterParameter.Value = value; }
}
public IntValue MaximumSymbolicExpressionTreeDepth {
get { return MaximumSymbolicExpressionTreeDepthParameter.Value; }
}
public IntValue MaximumSymbolicExpressionTreeLength {
get { return MaximumSymbolicExpressionTreeLengthParameter.Value; }
}
public IntValue MaximumFunctionDefinitions {
get { return MaximumFunctionDefinitionsParameter.Value; }
}
public IntValue MaximumFunctionArguments {
get { return MaximumFunctionArgumentsParameter.Value; }
}
public PercentValue RelativeNumberOfEvaluatedSamples {
get { return RelativeNumberOfEvaluatedSamplesParameter.Value; }
}
public IntRange FitnessCalculationPartition {
get { return FitnessCalculationPartitionParameter.Value; }
}
public IntRange ValidationPartition {
get { return ValidationPartitionParameter.Value; }
}
public BoolValue ApplyLinearScaling {
get { return ApplyLinearScalingParameter.Value; }
}
#endregion
[StorableConstructor]
protected SymbolicDataAnalysisProblem(bool deserializing) : base(deserializing) { }
[StorableHook(HookType.AfterDeserialization)]
private void AfterDeserialization() {
if (!Parameters.ContainsKey(ApplyLinearScalingParameterName)) {
Parameters.Add(new FixedValueParameter(ApplyLinearScalingParameterName, ApplyLinearScalingParameterDescription, new BoolValue(false)));
ApplyLinearScalingParameter.Hidden = true;
//it is assumed that for all symbolic regression algorithms linear scaling was set to true
//there is no possibility to determine the previous value of the parameter as it was stored in the evaluator
if (GetType().Name.Contains("SymbolicRegression"))
ApplyLinearScaling.Value = true;
}
RegisterEventHandlers();
}
protected SymbolicDataAnalysisProblem(SymbolicDataAnalysisProblem original, Cloner cloner)
: base(original, cloner) {
RegisterEventHandlers();
}
protected SymbolicDataAnalysisProblem(T problemData, U evaluator, V solutionCreator)
: base(evaluator, solutionCreator) {
Parameters.Add(new ValueParameter(ProblemDataParameterName, ProblemDataParameterDescription, problemData));
Parameters.Add(new ValueParameter(SymbolicExpressionTreeGrammarParameterName, SymbolicExpressionTreeGrammarParameterDescription));
Parameters.Add(new ValueParameter(SymbolicExpressionTreeInterpreterParameterName, SymoblicExpressionTreeInterpreterParameterDescription));
Parameters.Add(new FixedValueParameter(MaximumSymbolicExpressionTreeDepthParameterName, MaximumSymbolicExpressionTreeDepthParameterDescription));
Parameters.Add(new FixedValueParameter(MaximumSymbolicExpressionTreeLengthParameterName, MaximumSymbolicExpressionTreeLengthParameterDescription));
Parameters.Add(new FixedValueParameter(MaximumFunctionDefinitionsParameterName, MaximumFunctionDefinitionsParameterDescription));
Parameters.Add(new FixedValueParameter(MaximumFunctionArgumentsParameterName, MaximumFunctionArgumentsParameterDescription));
Parameters.Add(new FixedValueParameter(FitnessCalculationPartitionParameterName, FitnessCalculationPartitionParameterDescription));
Parameters.Add(new FixedValueParameter(ValidationPartitionParameterName, ValidationPartitionParameterDescription));
Parameters.Add(new FixedValueParameter(RelativeNumberOfEvaluatedSamplesParameterName, RelativeNumberOfEvaluatedSamplesParameterDescription, new PercentValue(1)));
Parameters.Add(new FixedValueParameter(ApplyLinearScalingParameterName, ApplyLinearScalingParameterDescription, new BoolValue(false)));
SymbolicExpressionTreeInterpreterParameter.Hidden = true;
MaximumFunctionArgumentsParameter.Hidden = true;
MaximumFunctionDefinitionsParameter.Hidden = true;
ApplyLinearScalingParameter.Hidden = true;
SymbolicExpressionTreeGrammar = new TypeCoherentExpressionGrammar();
SymbolicExpressionTreeInterpreter = new SymbolicDataAnalysisExpressionTreeLinearInterpreter();
FitnessCalculationPartition.Start = ProblemData.TrainingPartition.Start;
FitnessCalculationPartition.End = ProblemData.TrainingPartition.End;
InitializeOperators();
UpdateGrammar();
RegisterEventHandlers();
}
protected virtual void UpdateGrammar() {
var problemData = ProblemData;
var ds = problemData.Dataset;
var grammar = SymbolicExpressionTreeGrammar;
grammar.MaximumFunctionArguments = MaximumFunctionArguments.Value;
grammar.MaximumFunctionDefinitions = MaximumFunctionDefinitions.Value;
foreach (var varSymbol in grammar.Symbols.OfType()) {
if (!varSymbol.Fixed) {
varSymbol.AllVariableNames = problemData.InputVariables.Select(x => x.Value).Where(x => ds.VariableHasType(x));
varSymbol.VariableNames = problemData.AllowedInputVariables.Where(x => ds.VariableHasType(x));
}
}
foreach (var factorSymbol in grammar.Symbols.OfType()) {
if (!factorSymbol.Fixed) {
factorSymbol.AllVariableNames = problemData.InputVariables.Select(x => x.Value).Where(x => ds.VariableHasType(x));
factorSymbol.VariableNames = problemData.AllowedInputVariables.Where(x => ds.VariableHasType(x));
factorSymbol.VariableValues = factorSymbol.VariableNames
.ToDictionary(varName => varName, varName => ds.GetStringValues(varName).Distinct().ToList());
}
}
foreach (var factorSymbol in grammar.Symbols.OfType()) {
if (!factorSymbol.Fixed) {
factorSymbol.AllVariableNames = problemData.InputVariables.Select(x => x.Value).Where(x => ds.VariableHasType(x));
factorSymbol.VariableNames = problemData.AllowedInputVariables.Where(x => ds.VariableHasType(x));
factorSymbol.VariableValues = factorSymbol.VariableNames
.ToDictionary(varName => varName,
varName => ds.GetStringValues(varName).Distinct()
.Select((n, i) => Tuple.Create(n, i))
.ToDictionary(tup => tup.Item1, tup => tup.Item2));
}
}
}
private void InitializeOperators() {
Operators.AddRange(ApplicationManager.Manager.GetInstances());
Operators.AddRange(ApplicationManager.Manager.GetInstances>());
Operators.Add(new SymbolicExpressionSymbolFrequencyAnalyzer());
Operators.Add(new SymbolicDataAnalysisVariableFrequencyAnalyzer());
Operators.Add(new MinAverageMaxSymbolicExpressionTreeLengthAnalyzer());
Operators.Add(new SymbolicExpressionTreeLengthAnalyzer());
Operators.Add(new SymbolicExpressionTreeBottomUpSimilarityCalculator());
Operators.Add(new SymbolicDataAnalysisBottomUpDiversityAnalyzer(Operators.OfType().First()));
ParameterizeOperators();
}
#region events
private void RegisterEventHandlers() {
ProblemDataParameter.ValueChanged += new EventHandler(ProblemDataParameter_ValueChanged);
ProblemDataParameter.Value.Changed += (object sender, EventArgs e) => OnProblemDataChanged();
SymbolicExpressionTreeGrammarParameter.ValueChanged += new EventHandler(SymbolicExpressionTreeGrammarParameter_ValueChanged);
MaximumFunctionArguments.ValueChanged += new EventHandler(ArchitectureParameterValue_ValueChanged);
MaximumFunctionDefinitions.ValueChanged += new EventHandler(ArchitectureParameterValue_ValueChanged);
MaximumSymbolicExpressionTreeDepth.ValueChanged += new EventHandler(MaximumSymbolicExpressionTreeDepth_ValueChanged);
}
private void ProblemDataParameter_ValueChanged(object sender, EventArgs e) {
ValidationPartition.Start = 0;
ValidationPartition.End = 0;
ProblemDataParameter.Value.Changed += (object s, EventArgs args) => OnProblemDataChanged();
OnProblemDataChanged();
}
private void SymbolicExpressionTreeGrammarParameter_ValueChanged(object sender, EventArgs e) {
UpdateGrammar();
}
private void ArchitectureParameterValue_ValueChanged(object sender, EventArgs e) {
UpdateGrammar();
}
private void MaximumSymbolicExpressionTreeDepth_ValueChanged(object sender, EventArgs e) {
if (MaximumSymbolicExpressionTreeDepth != null && MaximumSymbolicExpressionTreeDepth.Value < 3)
MaximumSymbolicExpressionTreeDepth.Value = 3;
}
protected override void OnSolutionCreatorChanged() {
base.OnSolutionCreatorChanged();
SolutionCreator.SymbolicExpressionTreeParameter.ActualNameChanged += new EventHandler(SolutionCreator_SymbolicExpressionTreeParameter_ActualNameChanged);
ParameterizeOperators();
}
private void SolutionCreator_SymbolicExpressionTreeParameter_ActualNameChanged(object sender, EventArgs e) {
ParameterizeOperators();
}
protected override void OnEvaluatorChanged() {
base.OnEvaluatorChanged();
ParameterizeOperators();
}
public event EventHandler ProblemDataChanged;
protected virtual void OnProblemDataChanged() {
FitnessCalculationPartition.Start = ProblemData.TrainingPartition.Start;
FitnessCalculationPartition.End = ProblemData.TrainingPartition.End;
UpdateGrammar();
ParameterizeOperators();
var handler = ProblemDataChanged;
if (handler != null) handler(this, EventArgs.Empty);
OnReset();
}
#endregion
protected virtual void ParameterizeOperators() {
var operators = Parameters.OfType().Select(p => p.Value).OfType().Union(Operators).ToList();
foreach (var op in operators.OfType()) {
op.SymbolicExpressionTreeGrammarParameter.ActualName = SymbolicExpressionTreeGrammarParameter.Name;
}
foreach (var op in operators.OfType()) {
op.MaximumSymbolicExpressionTreeDepthParameter.ActualName = MaximumSymbolicExpressionTreeDepthParameter.Name;
op.MaximumSymbolicExpressionTreeLengthParameter.ActualName = MaximumSymbolicExpressionTreeLengthParameter.Name;
}
foreach (var op in operators.OfType()) {
op.MaximumFunctionArgumentsParameter.ActualName = MaximumFunctionArgumentsParameter.Name;
op.MaximumFunctionDefinitionsParameter.ActualName = MaximumFunctionDefinitionsParameter.Name;
}
foreach (var op in operators.OfType>()) {
op.ProblemDataParameter.ActualName = ProblemDataParameterName;
op.SymbolicExpressionTreeParameter.ActualName = SolutionCreator.SymbolicExpressionTreeParameter.ActualName;
op.EvaluationPartitionParameter.ActualName = FitnessCalculationPartitionParameter.Name;
op.RelativeNumberOfEvaluatedSamplesParameter.ActualName = RelativeNumberOfEvaluatedSamplesParameter.Name;
op.ApplyLinearScalingParameter.ActualName = ApplyLinearScalingParameter.Name;
}
foreach (var op in operators.OfType()) {
op.ParentsParameter.ActualName = SolutionCreator.SymbolicExpressionTreeParameter.ActualName;
op.SymbolicExpressionTreeParameter.ActualName = SolutionCreator.SymbolicExpressionTreeParameter.ActualName;
}
foreach (var op in operators.OfType()) {
op.SymbolicExpressionTreeParameter.ActualName = SolutionCreator.SymbolicExpressionTreeParameter.ActualName;
}
foreach (var op in operators.OfType()) {
op.SymbolicExpressionTreeParameter.ActualName = SolutionCreator.SymbolicExpressionTreeParameter.ActualName;
}
foreach (var op in operators.OfType()) {
op.ApplyLinearScalingParameter.ActualName = ApplyLinearScalingParameter.Name;
}
foreach (var op in operators.OfType()) {
op.ApplyLinearScalingParameter.ActualName = ApplyLinearScalingParameter.Name;
}
foreach (var op in operators.OfType()) {
op.SymbolicExpressionTreeParameter.ActualName = SolutionCreator.SymbolicExpressionTreeParameter.ActualName;
}
foreach (var op in operators.OfType>()) {
op.RelativeNumberOfEvaluatedSamplesParameter.ActualName = RelativeNumberOfEvaluatedSamplesParameter.Name;
op.ValidationPartitionParameter.ActualName = ValidationPartitionParameter.Name;
}
foreach (var op in operators.OfType()) {
op.SymbolicDataAnalysisTreeInterpreterParameter.ActualName = SymbolicExpressionTreeInterpreterParameter.Name;
}
foreach (var op in operators.OfType>()) {
op.EvaluationPartitionParameter.ActualName = FitnessCalculationPartitionParameter.Name;
op.ProblemDataParameter.ActualName = ProblemDataParameter.Name;
op.EvaluationPartitionParameter.ActualName = FitnessCalculationPartitionParameter.Name;
op.RelativeNumberOfEvaluatedSamplesParameter.ActualName = RelativeNumberOfEvaluatedSamplesParameter.Name;
op.EvaluatorParameter.ActualName = EvaluatorParameter.Name;
}
}
#region Import & Export
public virtual void Load(T data) {
Name = data.Name;
Description = data.Description;
ProblemData = data;
}
public virtual T Export() {
return ProblemData;
}
#endregion
}
}