#region License Information
/* HeuristicLab
* Copyright (C) 2002-2010 Heuristic and Evolutionary Algorithms Laboratory (HEAL)
*
* This file is part of HeuristicLab.
*
* HeuristicLab is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* HeuristicLab is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with HeuristicLab. If not, see .
*/
#endregion
using System;
using System.Collections.Generic;
using System.Linq;
using HeuristicLab.Common;
using HeuristicLab.Core;
using HeuristicLab.Data;
using HeuristicLab.Encodings.SymbolicExpressionTreeEncoding;
using HeuristicLab.Encodings.SymbolicExpressionTreeEncoding.Analyzers;
using HeuristicLab.Encodings.SymbolicExpressionTreeEncoding.Creators;
using HeuristicLab.Encodings.SymbolicExpressionTreeEncoding.Interfaces;
using HeuristicLab.Optimization;
using HeuristicLab.Parameters;
using HeuristicLab.Persistence.Default.CompositeSerializers.Storable;
using HeuristicLab.PluginInfrastructure;
using HeuristicLab.Problems.DataAnalysis.Regression.Symbolic.Analyzers;
using HeuristicLab.Problems.DataAnalysis.Symbolic;
namespace HeuristicLab.Problems.DataAnalysis.Regression.Symbolic {
[StorableClass]
public abstract class SymbolicRegressionProblemBase : DataAnalysisProblem, IProblem {
#region Parameter Properties
public new ValueParameter SolutionCreatorParameter {
get { return (ValueParameter)Parameters["SolutionCreator"]; }
}
IParameter IProblem.SolutionCreatorParameter {
get { return SolutionCreatorParameter; }
}
public ValueParameter LowerEstimationLimitParameter {
get { return (ValueParameter)Parameters["LowerEstimationLimit"]; }
}
public ValueParameter UpperEstimationLimitParameter {
get { return (ValueParameter)Parameters["UpperEstimationLimit"]; }
}
public ValueParameter SymbolicExpressionTreeInterpreterParameter {
get { return (ValueParameter)Parameters["SymbolicExpressionTreeInterpreter"]; }
}
public ValueParameter FunctionTreeGrammarParameter {
get { return (ValueParameter)Parameters["FunctionTreeGrammar"]; }
}
public ValueParameter MaxExpressionLengthParameter {
get { return (ValueParameter)Parameters["MaxExpressionLength"]; }
}
public ValueParameter MaxExpressionDepthParameter {
get { return (ValueParameter)Parameters["MaxExpressionDepth"]; }
}
public ValueParameter MaxFunctionDefiningBranchesParameter {
get { return (ValueParameter)Parameters["MaxFunctionDefiningBranches"]; }
}
public ValueParameter MaxFunctionArgumentsParameter {
get { return (ValueParameter)Parameters["MaxFunctionArguments"]; }
}
#endregion
#region Properties
public IntValue MaxExpressionLength {
get { return MaxExpressionLengthParameter.Value; }
set { MaxExpressionLengthParameter.Value = value; }
}
public IntValue MaxExpressionDepth {
get { return MaxExpressionDepthParameter.Value; }
set { MaxExpressionDepthParameter.Value = value; }
}
public IntValue MaxFunctionDefiningBranches {
get { return MaxFunctionDefiningBranchesParameter.Value; }
set { MaxFunctionDefiningBranchesParameter.Value = value; }
}
public IntValue MaxFunctionArguments {
get { return MaxFunctionArgumentsParameter.Value; }
set { MaxFunctionArgumentsParameter.Value = value; }
}
public new SymbolicExpressionTreeCreator SolutionCreator {
get { return SolutionCreatorParameter.Value; }
set { SolutionCreatorParameter.Value = value; }
}
ISolutionCreator IProblem.SolutionCreator {
get { return SolutionCreatorParameter.Value; }
}
public ISymbolicExpressionTreeInterpreter SymbolicExpressionTreeInterpreter {
get { return SymbolicExpressionTreeInterpreterParameter.Value; }
set { SymbolicExpressionTreeInterpreterParameter.Value = value; }
}
public DoubleValue LowerEstimationLimit {
get { return LowerEstimationLimitParameter.Value; }
set { LowerEstimationLimitParameter.Value = value; }
}
public DoubleValue UpperEstimationLimit {
get { return UpperEstimationLimitParameter.Value; }
set { UpperEstimationLimitParameter.Value = value; }
}
public ISymbolicExpressionGrammar FunctionTreeGrammar {
get { return (ISymbolicExpressionGrammar)FunctionTreeGrammarParameter.Value; }
}
public override IEnumerable Operators {
get { return operators; }
}
public IEnumerable Analyzers {
get { return operators.OfType(); }
}
public DoubleValue PunishmentFactor {
get { return new DoubleValue(10.0); }
}
public IntValue TrainingSamplesStart {
get { return new IntValue(DataAnalysisProblemData.TrainingSamplesStart.Value); }
}
public IntValue TrainingSamplesEnd {
get {
return new IntValue((DataAnalysisProblemData.TrainingSamplesStart.Value +
DataAnalysisProblemData.TrainingSamplesEnd.Value) / 2);
}
}
public IntValue ValidationSamplesStart {
get { return TrainingSamplesEnd; }
}
public IntValue ValidationSamplesEnd {
get { return new IntValue(DataAnalysisProblemData.TrainingSamplesEnd.Value); }
}
public IntValue TestSamplesStart {
get { return DataAnalysisProblemData.TestSamplesStart; }
}
public IntValue TestSamplesEnd {
get { return DataAnalysisProblemData.TestSamplesEnd; }
}
#endregion
[Storable]
private List operators;
[StorableConstructor]
protected SymbolicRegressionProblemBase(bool deserializing) : base(deserializing) { }
public SymbolicRegressionProblemBase()
: base() {
SymbolicExpressionTreeCreator creator = new ProbabilisticTreeCreator();
var grammar = new BasicExpressionGrammar();
var globalGrammar = new GlobalSymbolicExpressionGrammar(grammar);
var interpreter = new SimpleArithmeticExpressionInterpreter();
Parameters.Add(new ValueParameter("SolutionCreator", "The operator which should be used to create new symbolic regression solutions.", creator));
Parameters.Add(new ValueParameter("SymbolicExpressionTreeInterpreter", "The interpreter that should be used to evaluate the symbolic expression tree.", interpreter));
Parameters.Add(new ValueParameter("LowerEstimationLimit", "The lower limit for the estimated value that can be returned by the symbolic regression model.", new DoubleValue(double.NegativeInfinity)));
Parameters.Add(new ValueParameter("UpperEstimationLimit", "The upper limit for the estimated value that can be returned by the symbolic regression model.", new DoubleValue(double.PositiveInfinity)));
Parameters.Add(new ValueParameter("FunctionTreeGrammar", "The grammar that should be used for symbolic regression models.", globalGrammar));
Parameters.Add(new ValueParameter("MaxExpressionLength", "Maximal length of the symbolic expression.", new IntValue(100)));
Parameters.Add(new ValueParameter("MaxExpressionDepth", "Maximal depth of the symbolic expression.", new IntValue(10)));
Parameters.Add(new ValueParameter("MaxFunctionDefiningBranches", "Maximal number of automatically defined functions.", (IntValue)new IntValue(0).AsReadOnly()));
Parameters.Add(new ValueParameter("MaxFunctionArguments", "Maximal number of arguments of automatically defined functions.", (IntValue)new IntValue(0).AsReadOnly()));
creator.SymbolicExpressionTreeParameter.ActualName = "SymbolicRegressionModel";
ParameterizeSolutionCreator();
UpdateGrammar();
UpdateEstimationLimits();
InitializeOperators();
RegisterParameterEvents();
RegisterParameterValueEvents();
}
public override IDeepCloneable Clone(Cloner cloner) {
SymbolicRegressionProblemBase clone = (SymbolicRegressionProblemBase)base.Clone(cloner);
clone.operators = operators.Select(x => (IOperator)cloner.Clone(x)).ToList();
clone.RegisterParameterEvents();
clone.RegisterParameterValueEvents();
return clone;
}
private void RegisterParameterValueEvents() {
MaxFunctionArgumentsParameter.ValueChanged += new EventHandler(ArchitectureParameter_ValueChanged);
MaxFunctionDefiningBranchesParameter.ValueChanged += new EventHandler(ArchitectureParameter_ValueChanged);
SolutionCreatorParameter.ValueChanged += new EventHandler(SolutionCreatorParameter_ValueChanged);
}
private void RegisterParameterEvents() {
MaxFunctionArgumentsParameter.Value.ValueChanged += new EventHandler(ArchitectureParameterValue_ValueChanged);
MaxFunctionDefiningBranchesParameter.Value.ValueChanged += new EventHandler(ArchitectureParameterValue_ValueChanged);
SolutionCreator.SymbolicExpressionTreeParameter.ActualNameChanged += new EventHandler(SolutionCreator_SymbolicExpressionTreeParameter_ActualNameChanged);
}
#region event handling
protected override void OnDataAnalysisProblemChanged(EventArgs e) {
base.OnDataAnalysisProblemChanged(e);
// paritions could be changed
ParameterizeAnalyzers();
// input variables could have been changed
UpdateGrammar();
// estimation limits have to be recalculated
UpdateEstimationLimits();
}
protected virtual void OnArchitectureParameterChanged(EventArgs e) {
UpdateGrammar();
}
protected virtual void OnGrammarChanged(EventArgs e) { }
protected virtual void OnOperatorsChanged(EventArgs e) { RaiseOperatorsChanged(e); }
protected virtual void OnSolutionCreatorChanged(EventArgs e) {
SolutionCreator.SymbolicExpressionTreeParameter.ActualNameChanged += new EventHandler(SolutionCreator_SymbolicExpressionTreeParameter_ActualNameChanged);
ParameterizeSolutionCreator();
OnSolutionParameterNameChanged(e);
RaiseSolutionCreatorChanged(e);
}
protected virtual void OnSolutionParameterNameChanged(EventArgs e) {
ParameterizeAnalyzers();
ParameterizeOperators();
}
protected virtual void OnEvaluatorChanged(EventArgs e) {
RaiseEvaluatorChanged(e);
}
#endregion
#region event handlers
private void SolutionCreatorParameter_ValueChanged(object sender, EventArgs e) {
OnSolutionCreatorChanged(e);
}
private void SolutionCreator_SymbolicExpressionTreeParameter_ActualNameChanged(object sender, EventArgs e) {
OnSolutionParameterNameChanged(e);
}
private void ArchitectureParameter_ValueChanged(object sender, EventArgs e) {
MaxFunctionArgumentsParameter.Value.ValueChanged += new EventHandler(ArchitectureParameterValue_ValueChanged);
MaxFunctionDefiningBranchesParameter.Value.ValueChanged += new EventHandler(ArchitectureParameterValue_ValueChanged);
OnArchitectureParameterChanged(e);
}
private void ArchitectureParameterValue_ValueChanged(object sender, EventArgs e) {
OnArchitectureParameterChanged(e);
}
#endregion
#region Helpers
[StorableHook(HookType.AfterDeserialization)]
private void AfterDeserializationHook() {
// BackwardsCompatibility3.3
#region Backwards compatible code (remove with 3.4)
if (operators == null) InitializeOperators();
#endregion
RegisterParameterEvents();
RegisterParameterValueEvents();
}
protected void AddOperator(IOperator op) {
operators.Add(op);
}
private void UpdateGrammar() {
foreach (var varSymbol in FunctionTreeGrammar.Symbols.OfType()) {
varSymbol.VariableNames = DataAnalysisProblemData.InputVariables.CheckedItems.Select(x => x.Value.Value);
}
var globalGrammar = FunctionTreeGrammar as GlobalSymbolicExpressionGrammar;
if (globalGrammar != null) {
globalGrammar.MaxFunctionArguments = MaxFunctionArguments.Value;
globalGrammar.MaxFunctionDefinitions = MaxFunctionDefiningBranches.Value;
}
}
private void UpdateEstimationLimits() {
if (TrainingSamplesStart.Value < TrainingSamplesEnd.Value &&
DataAnalysisProblemData.Dataset.VariableNames.Contains(DataAnalysisProblemData.TargetVariable.Value)) {
var targetValues = DataAnalysisProblemData.Dataset.GetVariableValues(DataAnalysisProblemData.TargetVariable.Value, TrainingSamplesStart.Value, TrainingSamplesEnd.Value);
var mean = targetValues.Average();
var range = targetValues.Max() - targetValues.Min();
UpperEstimationLimit = new DoubleValue(mean + PunishmentFactor.Value * range);
LowerEstimationLimit = new DoubleValue(mean - PunishmentFactor.Value * range);
}
}
private void InitializeOperators() {
operators = new List();
operators.AddRange(ApplicationManager.Manager.GetInstances().OfType());
operators.Add(new SymbolicRegressionTournamentPruning());
operators.Add(new SymbolicRegressionVariableFrequencyAnalyzer());
operators.Add(new MinAverageMaxSymbolicExpressionTreeSizeAnalyzer());
operators.Add(new SymbolicRegressionModelQualityAnalyzer());
ParameterizeOperators();
ParameterizeAnalyzers();
}
private void ParameterizeSolutionCreator() {
SolutionCreator.SymbolicExpressionGrammarParameter.ActualName = FunctionTreeGrammarParameter.Name;
SolutionCreator.MaxTreeHeightParameter.ActualName = MaxExpressionDepthParameter.Name;
SolutionCreator.MaxTreeSizeParameter.ActualName = MaxExpressionLengthParameter.Name;
SolutionCreator.MaxFunctionArgumentsParameter.ActualName = MaxFunctionArgumentsParameter.Name;
SolutionCreator.MaxFunctionDefinitionsParameter.ActualName = MaxFunctionDefiningBranchesParameter.Name;
}
private void ParameterizeAnalyzers() {
foreach (var analyzer in Analyzers) {
analyzer.SymbolicExpressionTreeParameter.ActualName = SolutionCreator.SymbolicExpressionTreeParameter.ActualName;
var symbolicRegressionModelQualityAnalyzer = analyzer as SymbolicRegressionModelQualityAnalyzer;
if (symbolicRegressionModelQualityAnalyzer != null) {
symbolicRegressionModelQualityAnalyzer.ProblemDataParameter.ActualName = DataAnalysisProblemDataParameter.Name;
symbolicRegressionModelQualityAnalyzer.UpperEstimationLimitParameter.ActualName = UpperEstimationLimitParameter.Name;
symbolicRegressionModelQualityAnalyzer.LowerEstimationLimitParameter.ActualName = LowerEstimationLimitParameter.Name;
symbolicRegressionModelQualityAnalyzer.SymbolicExpressionTreeInterpreterParameter.ActualName = SymbolicExpressionTreeInterpreterParameter.Name;
symbolicRegressionModelQualityAnalyzer.SymbolicExpressionTreeParameter.ActualName = SolutionCreator.SymbolicExpressionTreeParameter.ActualName;
}
var varFreqAnalyzer = analyzer as SymbolicRegressionVariableFrequencyAnalyzer;
if (varFreqAnalyzer != null) {
varFreqAnalyzer.ProblemDataParameter.ActualName = DataAnalysisProblemDataParameter.Name;
}
var pruningOperator = analyzer as SymbolicRegressionTournamentPruning;
if (pruningOperator != null) {
pruningOperator.SamplesStartParameter.Value = TrainingSamplesStart;
pruningOperator.SamplesEndParameter.Value = TrainingSamplesEnd;
pruningOperator.DataAnalysisProblemDataParameter.ActualName = DataAnalysisProblemDataParameter.Name;
pruningOperator.SymbolicExpressionTreeParameter.ActualName = SolutionCreator.SymbolicExpressionTreeParameter.ActualName;
pruningOperator.SymbolicExpressionTreeInterpreterParameter.ActualName = SymbolicExpressionTreeInterpreterParameter.Name;
pruningOperator.LowerEstimationLimitParameter.ActualName = LowerEstimationLimitParameter.Name;
pruningOperator.UpperEstimationLimitParameter.ActualName = UpperEstimationLimitParameter.Name;
}
}
foreach (ISymbolicExpressionTreeAnalyzer analyzer in Operators.OfType()) {
analyzer.SymbolicExpressionTreeParameter.ActualName = SolutionCreator.SymbolicExpressionTreeParameter.ActualName;
}
}
private void ParameterizeOperators() {
foreach (ISymbolicExpressionTreeOperator op in Operators.OfType()) {
op.MaxTreeHeightParameter.ActualName = MaxExpressionDepthParameter.Name;
op.MaxTreeSizeParameter.ActualName = MaxExpressionLengthParameter.Name;
op.SymbolicExpressionGrammarParameter.ActualName = FunctionTreeGrammarParameter.Name;
}
foreach (ISymbolicExpressionTreeCrossover op in Operators.OfType()) {
op.ParentsParameter.ActualName = SolutionCreator.SymbolicExpressionTreeParameter.ActualName;
op.ChildParameter.ActualName = SolutionCreator.SymbolicExpressionTreeParameter.ActualName;
}
foreach (ISymbolicExpressionTreeManipulator op in Operators.OfType()) {
op.SymbolicExpressionTreeParameter.ActualName = SolutionCreator.SymbolicExpressionTreeParameter.ActualName;
}
foreach (ISymbolicExpressionTreeArchitectureManipulator op in Operators.OfType()) {
op.MaxFunctionArgumentsParameter.ActualName = MaxFunctionArgumentsParameter.Name;
op.MaxFunctionDefinitionsParameter.ActualName = MaxFunctionDefiningBranchesParameter.Name;
}
}
#endregion
}
}