using System; using System.Collections.Generic; using System.Linq; using System.Text; using System.Threading.Tasks; using HeuristicLab.Core; using HeuristicLab.Optimization; using HEAL.Attic; using HeuristicLab.Common; using HeuristicLab.Problems.Instances; using HeuristicLab.Parameters; using HeuristicLab.Data; using HeuristicLab.Encodings.SymbolicExpressionTreeEncoding; using HeuristicLab.PluginInfrastructure; namespace HeuristicLab.Problems.DataAnalysis.Symbolic.Regression { [StorableType("7464E84B-65CC-440A-91F0-9FA920D730F9")] [Item(Name = "Structured Symbolic Regression Single Objective Problem (single-objective)", Description = "A problem with a structural definition and unfixed subfunctions.")] [Creatable(CreatableAttribute.Categories.GeneticProgrammingProblems, Priority = 150)] public class StructuredSymbolicRegressionSingleObjectiveProblem : SingleObjectiveBasicProblem, IRegressionProblem, IProblemInstanceConsumer { #region Constants private const string ProblemDataParameterName = "ProblemData"; private const string StructureTemplateParameterName = "Structure Template"; private const string InterpreterParameterName = "Interpreter"; private const string EstimationLimitsParameterName = "EstimationLimits"; private const string BestTrainingSolutionParameterName = "Best Training Solution"; private const string SymbolicExpressionTreeName = "SymbolicExpressionTree"; private const string StructureTemplateDescriptionText = "Enter your expression as string in infix format into the empty input field.\n" + "By checking the \"Apply Linear Scaling\" checkbox you can add the relevant scaling terms to your expression.\n" + "After entering the expression click parse to build the tree.\n" + "To edit the defined sub-functions, click on the coressponding colored node in the tree view."; #endregion #region Parameters public IValueParameter ProblemDataParameter => (IValueParameter)Parameters[ProblemDataParameterName]; public IFixedValueParameter StructureTemplateParameter => (IFixedValueParameter)Parameters[StructureTemplateParameterName]; public IValueParameter InterpreterParameter => (IValueParameter)Parameters[InterpreterParameterName]; public IFixedValueParameter EstimationLimitsParameter => (IFixedValueParameter)Parameters[EstimationLimitsParameterName]; public IResultParameter BestTrainingSolutionParameter => (IResultParameter)Parameters[BestTrainingSolutionParameterName]; #endregion #region Properties public IRegressionProblemData ProblemData { get => ProblemDataParameter.Value; set { ProblemDataParameter.Value = value; ProblemDataChanged?.Invoke(this, EventArgs.Empty); } } public StructureTemplate StructureTemplate => StructureTemplateParameter.Value; public ISymbolicDataAnalysisExpressionTreeInterpreter Interpreter => InterpreterParameter.Value; IParameter IDataAnalysisProblem.ProblemDataParameter => ProblemDataParameter; IDataAnalysisProblemData IDataAnalysisProblem.ProblemData => ProblemData; public DoubleLimit EstimationLimits => EstimationLimitsParameter.Value; public override bool Maximization => true; #endregion #region EventHandlers public event EventHandler ProblemDataChanged; #endregion #region Constructors & Cloning public StructuredSymbolicRegressionSingleObjectiveProblem() { var problemData = new ShapeConstrainedRegressionProblemData(); var targetInterval = problemData.VariableRanges.GetInterval(problemData.TargetVariable); var estimationWidth = targetInterval.Width * 10; var structureTemplate = new StructureTemplate(); structureTemplate.Changed += OnTemplateChanged; Parameters.Add(new ValueParameter( ProblemDataParameterName, problemData)); Parameters.Add(new FixedValueParameter( StructureTemplateParameterName, StructureTemplateDescriptionText, structureTemplate)); Parameters.Add(new ValueParameter( InterpreterParameterName, new SymbolicDataAnalysisExpressionTreeInterpreter()) { Hidden = true }); Parameters.Add(new FixedValueParameter( EstimationLimitsParameterName, new DoubleLimit(targetInterval.LowerBound - estimationWidth, targetInterval.UpperBound + estimationWidth))); Parameters.Add(new ResultParameter(BestTrainingSolutionParameterName, "")); ProblemDataParameter.ValueChanged += ProblemDataParameterValueChanged; Operators.Add(new SymbolicDataAnalysisVariableFrequencyAnalyzer()); Operators.Add(new MinAverageMaxSymbolicExpressionTreeLengthAnalyzer()); //TODO change to value lookup //Operators.Add(new SymbolicExpressionTreeLengthAnalyzer()); Operators.Add(new SymbolicExpressionSymbolFrequencyAnalyzer()); } public StructuredSymbolicRegressionSingleObjectiveProblem(StructuredSymbolicRegressionSingleObjectiveProblem original, Cloner cloner) : base(original, cloner) { } [StorableConstructor] protected StructuredSymbolicRegressionSingleObjectiveProblem(StorableConstructorFlag _) : base(_) { } #endregion #region Cloning public override IDeepCloneable Clone(Cloner cloner) => new StructuredSymbolicRegressionSingleObjectiveProblem(this, cloner); #endregion private void ProblemDataParameterValueChanged(object sender, EventArgs e) { StructureTemplate.Reset(); // InfoBox for Reset? } private void OnTemplateChanged(object sender, EventArgs args) { SetupStructureTemplate(); } private void SetupStructureTemplate() { foreach (var e in Encoding.Encodings.ToArray()) Encoding.Remove(e); foreach (var f in StructureTemplate.SubFunctions.Values) { SetupVariables(f); if (!Encoding.Encodings.Any(x => x.Name == f.Name)) // to prevent the same encoding twice Encoding.Add(new SymbolicExpressionTreeEncoding( f.Name, f.Grammar, f.MaximumSymbolicExpressionTreeLength, f.MaximumSymbolicExpressionTreeDepth)); } } public override void Analyze(Individual[] individuals, double[] qualities, ResultCollection results, IRandom random) { base.Analyze(individuals, qualities, results, random); var orderedIndividuals = individuals.Zip(qualities, (i, q) => new { Individual = i, Quality = q }).OrderBy(z => z.Quality); var best = Maximization ? orderedIndividuals.Last().Individual : orderedIndividuals.First().Individual; if (!results.ContainsKey(BestTrainingSolutionParameter.ActualName)) { results.Add(new Result(BestTrainingSolutionParameter.ActualName, typeof(SymbolicRegressionSolution))); } var tree = (ISymbolicExpressionTree)best[SymbolicExpressionTreeName]; var model = new SymbolicRegressionModel(ProblemData.TargetVariable, tree, Interpreter); var solution = model.CreateRegressionSolution(ProblemData); results[BestTrainingSolutionParameter.ActualName].Value = solution; } public override double Evaluate(Individual individual, IRandom random) { var tree = BuildTree(individual); if (StructureTemplate.ApplyLinearScaling) AdjustLinearScalingParams(ProblemData, tree, Interpreter); individual[SymbolicExpressionTreeName] = tree; var quality = SymbolicRegressionSingleObjectivePearsonRSquaredEvaluator.Calculate( Interpreter, tree, EstimationLimits.Lower, EstimationLimits.Upper, ProblemData, ProblemData.TrainingIndices, false); return quality; } private static void AdjustLinearScalingParams(IRegressionProblemData problemData, ISymbolicExpressionTree tree, ISymbolicDataAnalysisExpressionTreeInterpreter interpreter) { var offsetNode = tree.Root.GetSubtree(0).GetSubtree(0); var scalingNode = offsetNode.Subtrees.Where(x => !(x is ConstantTreeNode)).First(); var offsetConstantNode = (ConstantTreeNode)offsetNode.Subtrees.Where(x => x is ConstantTreeNode).First(); var scalingConstantNode = (ConstantTreeNode)scalingNode.Subtrees.Where(x => x is ConstantTreeNode).First(); var estimatedValues = interpreter.GetSymbolicExpressionTreeValues(tree, problemData.Dataset, problemData.TrainingIndices); var targetValues = problemData.Dataset.GetDoubleValues(problemData.TargetVariable, problemData.TrainingIndices); OnlineLinearScalingParameterCalculator.Calculate(estimatedValues, targetValues, out double a, out double b, out OnlineCalculatorError error); if (error == OnlineCalculatorError.None) { offsetConstantNode.Value = a; scalingConstantNode.Value = b; } } private ISymbolicExpressionTree BuildTree(Individual individual) { if (StructureTemplate.Tree == null) throw new ArgumentException("No structure template defined!"); var templateTree = (ISymbolicExpressionTree)StructureTemplate.Tree.Clone(); // build main tree foreach (var subFunctionTreeNode in templateTree.IterateNodesPrefix().OfType()) { var subFunctionTree = individual.SymbolicExpressionTree(subFunctionTreeNode.Name); // add new tree var subTree = subFunctionTree.Root.GetSubtree(0) // Start .GetSubtree(0); // Offset subTree = (ISymbolicExpressionTreeNode)subTree.Clone(); subFunctionTreeNode.AddSubtree(subTree); } return templateTree; } private void SetupVariables(SubFunction subFunction) { var varSym = (Variable)subFunction.Grammar.GetSymbol("Variable"); if (varSym == null) { varSym = new Variable(); subFunction.Grammar.AddSymbol(varSym); } var allVariables = ProblemData.InputVariables.Select(x => x.Value); var allInputs = allVariables.Where(x => x != ProblemData.TargetVariable); // set all variables varSym.AllVariableNames = allVariables; // set all allowed variables if (subFunction.Arguments.Contains("_")) { varSym.VariableNames = allInputs; } else { var vars = new List(); var exceptions = new List(); foreach (var arg in subFunction.Arguments) { if (allInputs.Contains(arg)) vars.Add(arg); else exceptions.Add(new ArgumentException($"The argument '{arg}' for sub-function '{subFunction.Name}' is not a valid variable.")); } if (exceptions.Any()) throw new AggregateException(exceptions); varSym.VariableNames = vars; } varSym.Enabled = true; } public void Load(IRegressionProblemData data) => ProblemData = data; } }