Index: /branches/3136_Structural_GP/HeuristicLab.Problems.DataAnalysis.Symbolic.Regression/3.4/HeuristicLab.Problems.DataAnalysis.Symbolic.Regression-3.4.csproj
===================================================================
--- /branches/3136_Structural_GP/HeuristicLab.Problems.DataAnalysis.Symbolic.Regression/3.4/HeuristicLab.Problems.DataAnalysis.Symbolic.Regression-3.4.csproj (revision 18191)
+++ /branches/3136_Structural_GP/HeuristicLab.Problems.DataAnalysis.Symbolic.Regression/3.4/HeuristicLab.Problems.DataAnalysis.Symbolic.Regression-3.4.csproj (revision 18192)
@@ -47,4 +47,5 @@
AllRules.ruleset
false
+ default
@@ -57,4 +58,5 @@
AllRules.ruleset
false
+ default
@@ -67,4 +69,5 @@
AllRules.ruleset
false
+ default
@@ -77,4 +80,5 @@
AllRules.ruleset
false
+ default
@@ -87,4 +91,5 @@
AllRules.ruleset
false
+ default
@@ -97,4 +102,5 @@
AllRules.ruleset
false
+ default
@@ -127,4 +133,5 @@
+
Index: /branches/3136_Structural_GP/HeuristicLab.Problems.DataAnalysis.Symbolic.Regression/3.4/ParameterOptimization.cs
===================================================================
--- /branches/3136_Structural_GP/HeuristicLab.Problems.DataAnalysis.Symbolic.Regression/3.4/ParameterOptimization.cs (revision 18192)
+++ /branches/3136_Structural_GP/HeuristicLab.Problems.DataAnalysis.Symbolic.Regression/3.4/ParameterOptimization.cs (revision 18192)
@@ -0,0 +1,178 @@
+#region License Information
+/* HeuristicLab
+ * Copyright (C) Heuristic and Evolutionary Algorithms Laboratory (HEAL)
+ *
+ * This file is part of HeuristicLab.
+ *
+ * HeuristicLab is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * HeuristicLab is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with HeuristicLab. If not, see .
+ */
+#endregion
+
+using System;
+using System.Collections.Generic;
+using System.Linq;
+using HeuristicLab.Encodings.SymbolicExpressionTreeEncoding;
+
+namespace HeuristicLab.Problems.DataAnalysis.Symbolic.Regression {
+ public static class ParameterOptimization {
+ public static double OptimizeTreeParameters(IRegressionProblemData problemData, ISymbolicExpressionTree tree,
+ int maxIterations = 10, bool updateParametersInTree = true, bool updateVariableWeights = true,
+ double lowerEstimationLimit = double.MinValue, double upperEstimationLimit = double.MaxValue,
+ IEnumerable rows = null, ISymbolicDataAnalysisExpressionTreeInterpreter interpreter = null,
+ Action iterationCallback = null) {
+
+ if (rows == null) rows = problemData.TrainingIndices;
+ if (interpreter == null) interpreter = new SymbolicDataAnalysisExpressionTreeBatchInterpreter();
+
+ // Numeric parameters in the tree become variables for parameter optimization.
+ // Variables in the tree become parameters (fixed values) for parameter optimization.
+ // For each parameter (variable in the original tree) we store the
+ // variable name, variable value (for factor vars) and lag as a DataForVariable object.
+ // A dictionary is used to find parameters
+ double[] initialParameters;
+ var parameters = new List();
+
+ TreeToAutoDiffTermConverter.ParametricFunction func;
+ TreeToAutoDiffTermConverter.ParametricFunctionGradient func_grad;
+ if (!TreeToAutoDiffTermConverter.TryConvertToAutoDiff(tree, updateVariableWeights, addLinearScalingTerms: false, out parameters, out initialParameters, out func, out func_grad))
+ throw new NotSupportedException("Could not optimize parameters of symbolic expression tree due to not supported symbols used in the tree.");
+ var parameterEntries = parameters.ToArray(); // order of entries must be the same for x
+
+ // extract initial parameters
+ double[] c = (double[])initialParameters.Clone();
+ alglib.minlmreport rep;
+
+ double originalQuality = SymbolicRegressionSingleObjectiveMeanSquaredErrorEvaluator.Calculate(
+ tree, problemData, rows,
+ interpreter, applyLinearScaling: false,
+ lowerEstimationLimit, upperEstimationLimit);
+
+
+ IDataset ds = problemData.Dataset;
+ int n = rows.Count();
+ int k = parameters.Count;
+
+ double[,] x = new double[n, k];
+ int row = 0;
+ foreach (var r in rows) {
+ int col = 0;
+ foreach (var info in parameterEntries) {
+ if (ds.VariableHasType(info.variableName)) {
+ x[row, col] = ds.GetDoubleValue(info.variableName, r + info.lag);
+ } else if (ds.VariableHasType(info.variableName)) {
+ x[row, col] = ds.GetStringValue(info.variableName, r) == info.variableValue ? 1 : 0;
+ } else throw new InvalidProgramException("found a variable of unknown type");
+ col++;
+ }
+ row++;
+ }
+ double[] y = ds.GetDoubleValues(problemData.TargetVariable, rows).ToArray();
+
+ alglib.ndimensional_rep xrep = (p, f, obj) => iterationCallback(p, f, obj);
+
+ try {
+ alglib.minlmcreatevj(y.Length, c, out var lmstate);
+ alglib.minlmsetcond(lmstate, 0.0, maxIterations);
+ alglib.minlmsetxrep(lmstate, iterationCallback != null);
+ // alglib.minlmoptguardgradient(lmstate, 1e-5); // for debugging gradient calculation
+ alglib.minlmoptimize(lmstate, CreateFunc(func, x, y), CreateJac(func_grad, x, y), xrep, null);
+ alglib.minlmresults(lmstate, out c, out rep);
+ // alglib.minlmoptguardresults(lmstate, out var optGuardReport);
+ } catch (ArithmeticException) {
+ return originalQuality;
+ } catch (alglib.alglibexception) {
+ return originalQuality;
+ }
+
+
+ // * TerminationType, completion code:
+ // * -8 optimizer detected NAN/INF values either in the function itself,
+ // or in its Jacobian
+ // * -5 inappropriate solver was used:
+ // * solver created with minlmcreatefgh() used on problem with
+ // general linear constraints (set with minlmsetlc() call).
+ // * -3 constraints are inconsistent
+ // * 2 relative step is no more than EpsX.
+ // * 5 MaxIts steps was taken
+ // * 7 stopping conditions are too stringent,
+ // further improvement is impossible
+ // * 8 terminated by user who called MinLMRequestTermination().
+ // X contains point which was "current accepted" when termination
+ // request was submitted.
+ if (rep.terminationtype > 0) {
+ UpdateParameters(tree, c, updateVariableWeights);
+ }
+ var quality = SymbolicRegressionSingleObjectiveMeanSquaredErrorEvaluator.Calculate(
+ tree, problemData, rows,
+ interpreter, applyLinearScaling: false,
+ lowerEstimationLimit, upperEstimationLimit);
+
+ if (!updateParametersInTree) UpdateParameters(tree, initialParameters, updateVariableWeights);
+
+ if (originalQuality < quality || double.IsNaN(quality)) {
+ UpdateParameters(tree, initialParameters, updateVariableWeights);
+ return originalQuality;
+ }
+ return quality;
+ }
+
+ private static void UpdateParameters(ISymbolicExpressionTree tree, double[] parameters, bool updateVariableWeights) {
+ int i = 0;
+ foreach (var node in tree.Root.IterateNodesPrefix().OfType()) {
+ NumberTreeNode numberTreeNode = node as NumberTreeNode;
+ VariableTreeNodeBase variableTreeNodeBase = node as VariableTreeNodeBase;
+ FactorVariableTreeNode factorVarTreeNode = node as FactorVariableTreeNode;
+ if (numberTreeNode != null) {
+ if (numberTreeNode.Parent.Symbol is Power
+ && numberTreeNode.Parent.GetSubtree(1) == numberTreeNode) continue; // exponents in powers are not optimized (see TreeToAutoDiffTermConverter)
+ numberTreeNode.Value = parameters[i++];
+ } else if (updateVariableWeights && variableTreeNodeBase != null)
+ variableTreeNodeBase.Weight = parameters[i++];
+ else if (factorVarTreeNode != null) {
+ for (int j = 0; j < factorVarTreeNode.Weights.Length; j++)
+ factorVarTreeNode.Weights[j] = parameters[i++];
+ }
+ }
+ }
+
+ private static alglib.ndimensional_fvec CreateFunc(TreeToAutoDiffTermConverter.ParametricFunction func, double[,] x, double[] y) {
+ int d = x.GetLength(1);
+ // row buffer
+ var xi = new double[d];
+ // function must return residuals, alglib optimizes resid²
+ return (double[] c, double[] resid, object o) => {
+ for (int i = 0; i < y.Length; i++) {
+ Buffer.BlockCopy(x, i * d * sizeof(double), xi, 0, d * sizeof(double)); // copy row. We are using BlockCopy instead of Array.Copy because x has rank 2
+ resid[i] = func(c, xi) - y[i];
+ }
+ };
+ }
+
+ private static alglib.ndimensional_jac CreateJac(TreeToAutoDiffTermConverter.ParametricFunctionGradient func_grad, double[,] x, double[] y) {
+ int numParams = x.GetLength(1);
+ // row buffer
+ var xi = new double[numParams];
+ return (double[] c, double[] resid, double[,] jac, object o) => {
+ int numVars = c.Length;
+ for (int i = 0; i < y.Length; i++) {
+ Buffer.BlockCopy(x, i * numParams * sizeof(double), xi, 0, numParams * sizeof(double)); // copy row
+ var tuple = func_grad(c, xi);
+ resid[i] = tuple.Item2 - y[i];
+ Buffer.BlockCopy(tuple.Item1, 0, jac, i * numVars * sizeof(double), numVars * sizeof(double)); // copy the gradient to jac. BlockCopy because jac has rank 2.
+ }
+ };
+ }
+
+ }
+}
Index: /branches/3136_Structural_GP/HeuristicLab.Problems.DataAnalysis.Symbolic.Regression/3.4/SingleObjective/StructuredSymbolicRegressionSingleObjectiveProblem.cs
===================================================================
--- /branches/3136_Structural_GP/HeuristicLab.Problems.DataAnalysis.Symbolic.Regression/3.4/SingleObjective/StructuredSymbolicRegressionSingleObjectiveProblem.cs (revision 18191)
+++ /branches/3136_Structural_GP/HeuristicLab.Problems.DataAnalysis.Symbolic.Regression/3.4/SingleObjective/StructuredSymbolicRegressionSingleObjectiveProblem.cs (revision 18192)
@@ -29,5 +29,4 @@
using HeuristicLab.Optimization;
using HeuristicLab.Parameters;
-using HeuristicLab.PluginInfrastructure;
using HeuristicLab.Problems.Instances;
using HeuristicLab.Problems.Instances.DataAnalysis;
@@ -40,5 +39,4 @@
#region Constants
- private const string TreeEvaluatorParameterName = "TreeEvaluator";
private const string ProblemDataParameterName = "ProblemData";
private const string StructureTemplateParameterName = "Structure Template";
@@ -60,5 +58,4 @@
#region Parameters
- public IConstrainedValueParameter TreeEvaluatorParameter => (IConstrainedValueParameter)Parameters[TreeEvaluatorParameterName];
public IValueParameter ProblemDataParameter => (IValueParameter)Parameters[ProblemDataParameterName];
public IFixedValueParameter StructureTemplateParameter => (IFixedValueParameter)Parameters[StructureTemplateParameterName];
@@ -80,6 +77,4 @@
}
}
-
- public SymbolicRegressionSingleObjectiveEvaluator TreeEvaluator => TreeEvaluatorParameter.Value;
public StructureTemplate StructureTemplate => StructureTemplateParameter.Value;
@@ -117,13 +112,4 @@
var structureTemplate = new StructureTemplate();
-
- var evaluators = new ItemSet(
- ApplicationManager.Manager.GetInstances()
- .Where(x => x.Maximization == Maximization));
-
- Parameters.Add(new ConstrainedValueParameter(
- TreeEvaluatorParameterName,
- evaluators,
- evaluators.First()));
Parameters.Add(new ValueParameter(
@@ -202,7 +188,7 @@
ProblemDataParameter.ValueChanged += ProblemDataParameterValueChanged;
ApplyLinearScalingParameter.Value.ValueChanged += (o, e) => StructureTemplate.ApplyLinearScaling = ApplyLinearScaling;
- OptimizeParametersParameter.Value.ValueChanged += (o, e) => {
- if (OptimizeParameters) ApplyLinearScaling = true;
- };
+ //OptimizeParametersParameter.Value.ValueChanged += (o, e) => {
+ // if (OptimizeParameters) ApplyLinearScaling = true;
+ //};
}
@@ -267,20 +253,39 @@
throw new ArgumentException("No structure template defined!");
+ //create tree where all functions have been resolved (integrated)
var tree = BuildTree(templateTree, individual);
-
- // NMSEConstraintsEvaluator sets linear scaling terms itself
- if (ApplyLinearScaling && !(TreeEvaluator is NMSESingleObjectiveConstraintsEvaluator)) {
+ individual[SymbolicExpressionTreeName] = tree;
+
+ if (OptimizeParameters) {
+ ParameterOptimization.OptimizeTreeParameters(ProblemData, tree, interpreter: Interpreter);
+ } else if (ApplyLinearScaling) {
LinearScaling.AdjustLinearScalingParams(ProblemData, tree, Interpreter);
}
- individual[SymbolicExpressionTreeName] = tree;
-
- return TreeEvaluator.Evaluate(
- tree, ProblemData,
- ProblemData.TrainingIndices,
- Interpreter,
- StructureTemplate.ApplyLinearScaling,
- EstimationLimits.Lower,
- EstimationLimits.Upper);
+ //calculate NMSE
+ var estimatedValues = Interpreter.GetSymbolicExpressionTreeValues(tree, ProblemData.Dataset, ProblemData.TrainingIndices);
+ var boundedEstimatedValues = estimatedValues.LimitToRange(EstimationLimits.Lower, EstimationLimits.Upper);
+ var targetValues = ProblemData.TargetVariableTrainingValues;
+ var nmse = OnlineNormalizedMeanSquaredErrorCalculator.Calculate(targetValues, boundedEstimatedValues, out var errorState);
+ if (errorState != OnlineCalculatorError.None)
+ nmse = 1.0;
+
+ //evaluate constraints
+ var constraints = Enumerable.Empty();
+ if (ProblemData is ShapeConstrainedRegressionProblemData scProbData)
+ constraints = scProbData.ShapeConstraints.EnabledConstraints;
+ if (constraints.Any()) {
+ var boundsEstimator = new IntervalArithBoundsEstimator();
+ var constraintViolations = IntervalUtil.GetConstraintViolations(constraints, boundsEstimator, ProblemData.VariableRanges, tree);
+
+ // infinite/NaN constraints
+ if (constraintViolations.Any(x => double.IsNaN(x) || double.IsInfinity(x)))
+ nmse = 1.0;
+
+ if (constraintViolations.Any(x => x > 0.0))
+ nmse = 1.0;
+ }
+
+ return nmse;
}