#region License Information
/* HeuristicLab
* Copyright (C) Heuristic and Evolutionary Algorithms Laboratory (HEAL)
*
* This file is part of HeuristicLab.
*
* HeuristicLab is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* HeuristicLab is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with HeuristicLab. If not, see .
*/
#endregion
using System;
using System.Collections.Generic;
using System.Linq;
using HEAL.Attic;
using HeuristicLab.Common;
using HeuristicLab.Core;
using HeuristicLab.Data;
using HeuristicLab.Encodings.RealVectorEncoding;
using HeuristicLab.Encodings.SymbolicExpressionTreeEncoding;
using HeuristicLab.Optimization;
using HeuristicLab.Parameters;
using HeuristicLab.Problems.Instances;
using HeuristicLab.Problems.Instances.DataAnalysis;
namespace HeuristicLab.Problems.DataAnalysis.Symbolic.Regression {
[StorableType("7464E84B-65CC-440A-91F0-9FA920D730F9")]
[Item(Name = "Structured Symbolic Regression Problem (single-objective)", Description = "A problem with a structural definition and variable subfunctions.")]
[Creatable(CreatableAttribute.Categories.GeneticProgrammingProblems, Priority = 150)]
public class StructuredSymbolicRegressionSingleObjectiveProblem : SingleObjectiveBasicProblem, IRegressionProblem, IProblemInstanceConsumer {
#region Constants
private const string ProblemDataParameterName = "ProblemData";
private const string StructureTemplateParameterName = "Structure Template";
private const string InterpreterParameterName = "Interpreter";
private const string EstimationLimitsParameterName = "EstimationLimits";
private const string BestTrainingSolutionParameterName = "Best Training Solution";
private const string ApplyLinearScalingParameterName = "Apply Linear Scaling";
private const string OptimizeParametersParameterName = "Optimize Parameters";
private const string SymbolicExpressionTreeName = "SymbolicExpressionTree";
private const string NumericParametersEncoding = "Numeric Parameters";
private const string StructureTemplateDescriptionText =
"Enter your expression as string in infix format into the empty input field.\n" +
"By checking the \"Apply Linear Scaling\" checkbox you can add the relevant scaling terms to your expression.\n" +
"After entering the expression click parse to build the tree.\n" +
"To edit the defined sub-functions, click on the corresponding-colored node in the tree view.\n" +
"Check the info box besides the input field for more information.";
#endregion
#region Parameters
public IValueParameter ProblemDataParameter => (IValueParameter)Parameters[ProblemDataParameterName];
public IFixedValueParameter StructureTemplateParameter => (IFixedValueParameter)Parameters[StructureTemplateParameterName];
public IValueParameter InterpreterParameter => (IValueParameter)Parameters[InterpreterParameterName];
public IFixedValueParameter EstimationLimitsParameter => (IFixedValueParameter)Parameters[EstimationLimitsParameterName];
public IResultParameter BestTrainingSolutionParameter => (IResultParameter)Parameters[BestTrainingSolutionParameterName];
public IFixedValueParameter ApplyLinearScalingParameter => (IFixedValueParameter)Parameters[ApplyLinearScalingParameterName];
public IFixedValueParameter OptimizeParametersParameter => (IFixedValueParameter)Parameters[OptimizeParametersParameterName];
#endregion
#region Properties
public IRegressionProblemData ProblemData {
get => ProblemDataParameter.Value;
set {
ProblemDataParameter.Value = value;
ProblemDataChanged?.Invoke(this, EventArgs.Empty);
}
}
public StructureTemplate StructureTemplate => StructureTemplateParameter.Value;
public ISymbolicDataAnalysisExpressionTreeInterpreter Interpreter => InterpreterParameter.Value;
IParameter IDataAnalysisProblem.ProblemDataParameter => ProblemDataParameter;
IDataAnalysisProblemData IDataAnalysisProblem.ProblemData => ProblemData;
public DoubleLimit EstimationLimits => EstimationLimitsParameter.Value;
public bool ApplyLinearScaling {
get => ApplyLinearScalingParameter.Value.Value;
set => ApplyLinearScalingParameter.Value.Value = value;
}
public bool OptimizeParameters {
get => OptimizeParametersParameter.Value.Value;
set => OptimizeParametersParameter.Value.Value = value;
}
public override bool Maximization => false;
#endregion
#region EventHandlers
public event EventHandler ProblemDataChanged;
#endregion
#region Constructors & Cloning
public StructuredSymbolicRegressionSingleObjectiveProblem() {
var provider = new PhysicsInstanceProvider();
var descriptor = new SheetBendingProcess();
var problemData = provider.LoadData(descriptor);
var shapeConstraintProblemData = new ShapeConstrainedRegressionProblemData(problemData);
var structureTemplate = new StructureTemplate();
Parameters.Add(new ValueParameter(
ProblemDataParameterName,
shapeConstraintProblemData));
Parameters.Add(new FixedValueParameter(
StructureTemplateParameterName,
StructureTemplateDescriptionText,
structureTemplate));
Parameters.Add(new FixedValueParameter(
ApplyLinearScalingParameterName, new BoolValue(true)
));
Parameters.Add(new FixedValueParameter(
OptimizeParametersParameterName, new BoolValue(true)
));
Parameters.Add(new ValueParameter(
InterpreterParameterName,
new SymbolicDataAnalysisExpressionTreeBatchInterpreter()) { Hidden = true });
Parameters.Add(new FixedValueParameter(
EstimationLimitsParameterName,
new DoubleLimit(double.NegativeInfinity, double.PositiveInfinity)) { Hidden = true });
Parameters.Add(new ResultParameter(BestTrainingSolutionParameterName, "") { Hidden = true });
this.EvaluatorParameter.Hidden = true;
Operators.Add(new SymbolicDataAnalysisVariableFrequencyAnalyzer());
Operators.Add(new MinAverageMaxSymbolicExpressionTreeLengthAnalyzer());
Operators.Add(new SymbolicExpressionSymbolFrequencyAnalyzer());
RegisterEventHandlers();
StructureTemplate.ApplyLinearScaling = ApplyLinearScaling;
StructureTemplate.Template =
"(" +
"(210000 / (210000 + h)) * ((sigma_y * t * t) / (wR * Rt * t)) + " +
"PlasticHardening(_) - Elasticity(_)" +
")" +
" * C(_)";
}
public StructuredSymbolicRegressionSingleObjectiveProblem(StructuredSymbolicRegressionSingleObjectiveProblem original, Cloner cloner) : base(original, cloner) {
RegisterEventHandlers();
}
public override IDeepCloneable Clone(Cloner cloner) =>
new StructuredSymbolicRegressionSingleObjectiveProblem(this, cloner);
[StorableConstructor]
protected StructuredSymbolicRegressionSingleObjectiveProblem(StorableConstructorFlag _) : base(_) { }
[StorableHook(HookType.AfterDeserialization)]
private void AfterDeserialization() {
if (!Parameters.ContainsKey(ApplyLinearScalingParameterName)) {
Parameters.Add(new FixedValueParameter(ApplyLinearScalingParameterName, new BoolValue(StructureTemplate.ApplyLinearScaling)));
}
if (!Parameters.ContainsKey(OptimizeParametersParameterName)) {
Parameters.Add(new FixedValueParameter(OptimizeParametersParameterName, new BoolValue(false)));
}
RegisterEventHandlers();
}
#endregion
private void RegisterEventHandlers() {
if (StructureTemplate != null) {
StructureTemplate.Changed += OnTemplateChanged;
}
ProblemDataParameter.ValueChanged += ProblemDataParameterValueChanged;
ApplyLinearScalingParameter.Value.ValueChanged += (o, e) => StructureTemplate.ApplyLinearScaling = ApplyLinearScaling;
}
private void ProblemDataParameterValueChanged(object sender, EventArgs e) {
StructureTemplate.Reset();
// InfoBox for Reset?
}
private void OnTemplateChanged(object sender, EventArgs args) {
ApplyLinearScaling = StructureTemplate.ApplyLinearScaling;
SetupEncoding();
}
private void SetupEncoding() {
foreach (var e in Encoding.Encodings.ToArray())
Encoding.Remove(e);
var templateNumberTreeNodes = StructureTemplate.Tree.IterateNodesPrefix().OfType();
if (templateNumberTreeNodes.Any()) {
var templateParameterValues = templateNumberTreeNodes.Select(n => n.Value).ToArray();
var encoding = new RealVectorEncoding(NumericParametersEncoding, templateParameterValues.Length);
var creator = encoding.Operators.OfType().First();
creator.MeanParameter.Value = new RealVector(templateParameterValues);
creator.SigmaParameter.Value = new DoubleArray(templateParameterValues.Length);
encoding.SolutionCreator = creator;
Encoding.Add(encoding);
}
foreach (var subFunction in StructureTemplate.SubFunctions) {
subFunction.SetupVariables(ProblemData.AllowedInputVariables);
// prevent the same encoding twice
if (Encoding.Encodings.Any(x => x.Name == subFunction.Name)) continue;
var encoding = new SymbolicExpressionTreeEncoding(
subFunction.Name,
subFunction.Grammar,
subFunction.MaximumSymbolicExpressionTreeLength,
subFunction.MaximumSymbolicExpressionTreeDepth);
Encoding.Add(encoding);
}
//set single point || copy crossover for numeric parameters
var multiCrossover = (IParameterizedItem)Encoding.Operators.OfType().First();
foreach (var param in multiCrossover.Parameters.OfType>()) {
if (!param.Name.Contains(NumericParametersEncoding)) continue;
var singlePointCrossover = param.ValidValues.OfType().First();
var copyCrossover = param.ValidValues.OfType().First();
var realvectorEncoding = (RealVectorEncoding)Encoding.Encodings.Where(e => e.Name == NumericParametersEncoding).First();
if (realvectorEncoding.Length == 1) { //single-point crossover throws if encoding length == 1
param.Value = copyCrossover;
} else
param.Value = singlePointCrossover;
}
//adapt crossover probability for subtree crossover
foreach (var param in multiCrossover.Parameters.OfType>()) {
var subtreeCrossover = param.ValidValues.OfType().FirstOrDefault();
if (subtreeCrossover != null) {
subtreeCrossover.CrossoverProbability = 1.0 / Encoding.Encodings.OfType().Count();
param.Value = subtreeCrossover;
}
}
//set multi manipulator as default manipulator for all symbolic expression tree encoding parts
var manipulator = (IParameterizedItem)Encoding.Operators.OfType().First();
foreach (var param in manipulator.Parameters.OfType>()) {
var m = param.ValidValues.OfType().FirstOrDefault();
param.Value = m ?? param.ValidValues.First();
}
}
public override void Analyze(Individual[] individuals, double[] qualities, ResultCollection results, IRandom random) {
base.Analyze(individuals, qualities, results, random);
var best = GetBestIndividual(individuals, qualities).Item1;
if (!results.ContainsKey(BestTrainingSolutionParameter.ActualName)) {
results.Add(new Result(BestTrainingSolutionParameter.ActualName, typeof(SymbolicRegressionSolution)));
}
var tree = (ISymbolicExpressionTree)best[SymbolicExpressionTreeName];
var model = new SymbolicRegressionModel(ProblemData.TargetVariable, tree, Interpreter);
var solution = model.CreateRegressionSolution(ProblemData);
results[BestTrainingSolutionParameter.ActualName].Value = solution;
}
public override double Evaluate(Individual individual, IRandom random) {
var templateTree = StructureTemplate.Tree;
if (templateTree == null)
throw new ArgumentException("No structure template defined!");
var tree = BuildTreeFromIndividual(templateTree, individual, containsNumericParameters: StructureTemplate.ContainsNumericParameters);
individual[SymbolicExpressionTreeName] = tree;
if (OptimizeParameters) {
var excludeNodes = GetTemplateTreeNodes(tree.Root).OfType();
ParameterOptimization.OptimizeTreeParameters(ProblemData, tree, interpreter: Interpreter, excludeNodes: excludeNodes);
} else if (ApplyLinearScaling) {
LinearScaling.AdjustLinearScalingParams(ProblemData, tree, Interpreter);
}
UpdateIndividualFromTree(tree, individual, containsNumericParameters: StructureTemplate.ContainsNumericParameters);
//calculate NMSE
var estimatedValues = Interpreter.GetSymbolicExpressionTreeValues(tree, ProblemData.Dataset, ProblemData.TrainingIndices);
var boundedEstimatedValues = estimatedValues.LimitToRange(EstimationLimits.Lower, EstimationLimits.Upper);
var targetValues = ProblemData.TargetVariableTrainingValues;
var nmse = OnlineNormalizedMeanSquaredErrorCalculator.Calculate(targetValues, boundedEstimatedValues, out var errorState);
if (errorState != OnlineCalculatorError.None)
nmse = 1.0;
//evaluate constraints
var constraints = Enumerable.Empty();
if (ProblemData is ShapeConstrainedRegressionProblemData scProbData)
constraints = scProbData.ShapeConstraints.EnabledConstraints;
if (constraints.Any()) {
var boundsEstimator = new IntervalArithBoundsEstimator();
var constraintViolations = IntervalUtil.GetConstraintViolations(constraints, boundsEstimator, ProblemData.VariableRanges, tree);
// infinite/NaN constraints
if (constraintViolations.Any(x => double.IsNaN(x) || double.IsInfinity(x)))
nmse = 1.0;
if (constraintViolations.Any(x => x > 0.0))
nmse = 1.0;
}
return nmse;
}
private static IEnumerable GetTemplateTreeNodes(ISymbolicExpressionTreeNode rootNode) {
yield return rootNode;
foreach (var node in rootNode.Subtrees) {
if (node is SubFunctionTreeNode) {
yield return node;
continue;
}
foreach (var subNode in GetTemplateTreeNodes(node))
yield return subNode;
}
}
private static ISymbolicExpressionTree BuildTreeFromIndividual(ISymbolicExpressionTree template, Individual individual, bool containsNumericParameters) {
var resolvedTree = (ISymbolicExpressionTree)template.Clone();
//set numeric parameter values
if (containsNumericParameters) {
var realVector = individual.RealVector(NumericParametersEncoding);
var numberTreeNodes = resolvedTree.IterateNodesPrefix().OfType().ToArray();
if (realVector.Length != numberTreeNodes.Length)
throw new InvalidOperationException("The number of numeric parameters in the tree does not match the provided numerical values.");
for (int i = 0; i < numberTreeNodes.Length; i++)
numberTreeNodes[i].Value = realVector[i];
}
// build main tree
foreach (var subFunctionTreeNode in resolvedTree.IterateNodesPrefix().OfType()) {
var subFunctionTree = individual.SymbolicExpressionTree(subFunctionTreeNode.Name);
// extract function tree
var subTree = subFunctionTree.Root.GetSubtree(0) // StartSymbol
.GetSubtree(0); // First Symbol
subTree = (ISymbolicExpressionTreeNode)subTree.Clone();
subFunctionTreeNode.AddSubtree(subTree);
}
return resolvedTree;
}
private static void UpdateIndividualFromTree(ISymbolicExpressionTree tree, Individual individual, bool containsNumericParameters) {
var clonedTree = (ISymbolicExpressionTree)tree.Clone();
foreach (var subFunctionTreeNode in clonedTree.IterateNodesPrefix().OfType()) {
var grammar = ((ISymbolicExpressionTree)individual[subFunctionTreeNode.Name]).Root.Grammar;
var functionTreeNode = subFunctionTreeNode.GetSubtree(0);
//remove function code to make numeric parameters extraction easier
subFunctionTreeNode.RemoveSubtree(0);
var rootNode = (SymbolicExpressionTreeTopLevelNode)new ProgramRootSymbol().CreateTreeNode();
rootNode.SetGrammar(grammar);
var startNode = (SymbolicExpressionTreeTopLevelNode)new StartSymbol().CreateTreeNode();
startNode.SetGrammar(grammar);
rootNode.AddSubtree(startNode);
startNode.AddSubtree(functionTreeNode);
var functionTree = new SymbolicExpressionTree(rootNode);
individual[subFunctionTreeNode.Name] = functionTree;
}
//set numeric parameter values
if (containsNumericParameters) {
var realVector = individual.RealVector(NumericParametersEncoding);
var numberTreeNodes = clonedTree.IterateNodesPrefix().OfType().ToArray();
if (realVector.Length != numberTreeNodes.Length)
throw new InvalidOperationException("The number of numeric parameters in the tree does not match the provided numerical values.");
for (int i = 0; i < numberTreeNodes.Length; i++)
realVector[i] = numberTreeNodes[i].Value;
}
}
public void Load(IRegressionProblemData data) {
ProblemData = data;
}
}
}