[16912]  1  #region License Information


 2  /* HeuristicLab


 3  * Copyright (C) 20022019 Heuristic and Evolutionary Algorithms Laboratory (HEAL)


 4  *


 5  * This file is part of HeuristicLab.


 6  *


 7  * HeuristicLab is free software: you can redistribute it and/or modify


 8  * it under the terms of the GNU General Public License as published by


 9  * the Free Software Foundation, either version 3 of the License, or


 10  * (at your option) any later version.


 11  *


 12  * HeuristicLab is distributed in the hope that it will be useful,


 13  * but WITHOUT ANY WARRANTY; without even the implied warranty of


 14  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the


 15  * GNU General Public License for more details.


 16  *


 17  * You should have received a copy of the GNU General Public License


 18  * along with HeuristicLab. If not, see <http://www.gnu.org/licenses/>.


 19  */


 20  #endregion


 21 


 22  using System;


 23  using System.Collections.Generic;


 24  using System.Linq;


 25  using HeuristicLab.Common;


 26  using HeuristicLab.Core;


 27  using HeuristicLab.Data;


 28  using HeuristicLab.Encodings.SymbolicExpressionTreeEncoding;


 29  using HeuristicLab.Optimization;


 30  using HeuristicLab.Parameters;


 31  using HEAL.Attic;


 32 


 33  namespace HeuristicLab.Problems.DataAnalysis.Symbolic.Regression {


[16914]  34  [Item("Constant Optimization Evaluator (with constraints)", "")]


 35  [StorableType("A8958E06C54A4193862E8315C86EB5C1")]


 36  public class ConstrainedConstantOptimizationEvaluator : SymbolicRegressionSingleObjectiveEvaluator {


[16912]  37  private const string ConstantOptimizationIterationsParameterName = "ConstantOptimizationIterations";


 38  private const string ConstantOptimizationImprovementParameterName = "ConstantOptimizationImprovement";


 39  private const string ConstantOptimizationProbabilityParameterName = "ConstantOptimizationProbability";


 40  private const string ConstantOptimizationRowsPercentageParameterName = "ConstantOptimizationRowsPercentage";


 41  private const string UpdateConstantsInTreeParameterName = "UpdateConstantsInSymbolicExpressionTree";


 42  private const string UpdateVariableWeightsParameterName = "Update Variable Weights";


 43 


 44  private const string FunctionEvaluationsResultParameterName = "Constants Optimization Function Evaluations";


 45  private const string GradientEvaluationsResultParameterName = "Constants Optimization Gradient Evaluations";


 46  private const string CountEvaluationsParameterName = "Count Function and Gradient Evaluations";


 47 


 48  public IFixedValueParameter<IntValue> ConstantOptimizationIterationsParameter {


 49  get { return (IFixedValueParameter<IntValue>)Parameters[ConstantOptimizationIterationsParameterName]; }


 50  }


 51  public IFixedValueParameter<DoubleValue> ConstantOptimizationImprovementParameter {


 52  get { return (IFixedValueParameter<DoubleValue>)Parameters[ConstantOptimizationImprovementParameterName]; }


 53  }


 54  public IFixedValueParameter<PercentValue> ConstantOptimizationProbabilityParameter {


 55  get { return (IFixedValueParameter<PercentValue>)Parameters[ConstantOptimizationProbabilityParameterName]; }


 56  }


 57  public IFixedValueParameter<PercentValue> ConstantOptimizationRowsPercentageParameter {


 58  get { return (IFixedValueParameter<PercentValue>)Parameters[ConstantOptimizationRowsPercentageParameterName]; }


 59  }


 60  public IFixedValueParameter<BoolValue> UpdateConstantsInTreeParameter {


 61  get { return (IFixedValueParameter<BoolValue>)Parameters[UpdateConstantsInTreeParameterName]; }


 62  }


 63  public IFixedValueParameter<BoolValue> UpdateVariableWeightsParameter {


 64  get { return (IFixedValueParameter<BoolValue>)Parameters[UpdateVariableWeightsParameterName]; }


 65  }


 66 


 67  public IResultParameter<IntValue> FunctionEvaluationsResultParameter {


 68  get { return (IResultParameter<IntValue>)Parameters[FunctionEvaluationsResultParameterName]; }


 69  }


 70  public IResultParameter<IntValue> GradientEvaluationsResultParameter {


 71  get { return (IResultParameter<IntValue>)Parameters[GradientEvaluationsResultParameterName]; }


 72  }


 73  public IFixedValueParameter<BoolValue> CountEvaluationsParameter {


 74  get { return (IFixedValueParameter<BoolValue>)Parameters[CountEvaluationsParameterName]; }


 75  }


[17006]  76  public IConstrainedValueParameter<StringValue> SolverParameter {


 77  get { return (IConstrainedValueParameter<StringValue>)Parameters["Solver"]; }


 78  }


[16912]  79 


 80 


 81  public IntValue ConstantOptimizationIterations {


 82  get { return ConstantOptimizationIterationsParameter.Value; }


 83  }


 84  public DoubleValue ConstantOptimizationImprovement {


 85  get { return ConstantOptimizationImprovementParameter.Value; }


 86  }


 87  public PercentValue ConstantOptimizationProbability {


 88  get { return ConstantOptimizationProbabilityParameter.Value; }


 89  }


 90  public PercentValue ConstantOptimizationRowsPercentage {


 91  get { return ConstantOptimizationRowsPercentageParameter.Value; }


 92  }


 93  public bool UpdateConstantsInTree {


 94  get { return UpdateConstantsInTreeParameter.Value.Value; }


 95  set { UpdateConstantsInTreeParameter.Value.Value = value; }


 96  }


 97 


 98  public bool UpdateVariableWeights {


 99  get { return UpdateVariableWeightsParameter.Value.Value; }


 100  set { UpdateVariableWeightsParameter.Value.Value = value; }


 101  }


 102 


 103  public bool CountEvaluations {


 104  get { return CountEvaluationsParameter.Value.Value; }


 105  set { CountEvaluationsParameter.Value.Value = value; }


 106  }


 107 


[17006]  108  public string Solver {


 109  get { return SolverParameter.Value.Value; }


 110  }


[16912]  111  public override bool Maximization {


[16914]  112  get { return false; }


[16912]  113  }


 114 


 115  [StorableConstructor]


[16914]  116  protected ConstrainedConstantOptimizationEvaluator(StorableConstructorFlag _) : base(_) { }


 117  protected ConstrainedConstantOptimizationEvaluator(ConstrainedConstantOptimizationEvaluator original, Cloner cloner)


[16912]  118  : base(original, cloner) {


 119  }


[16914]  120  public ConstrainedConstantOptimizationEvaluator()


[16912]  121  : base() {


 122  Parameters.Add(new FixedValueParameter<IntValue>(ConstantOptimizationIterationsParameterName, "Determines how many iterations should be calculated while optimizing the constant of a symbolic expression tree (0 indicates other or default stopping criterion).", new IntValue(10)));


 123  Parameters.Add(new FixedValueParameter<DoubleValue>(ConstantOptimizationImprovementParameterName, "Determines the relative improvement which must be achieved in the constant optimization to continue with it (0 indicates other or default stopping criterion).", new DoubleValue(0)) { Hidden = true });


 124  Parameters.Add(new FixedValueParameter<PercentValue>(ConstantOptimizationProbabilityParameterName, "Determines the probability that the constants are optimized", new PercentValue(1)));


 125  Parameters.Add(new FixedValueParameter<PercentValue>(ConstantOptimizationRowsPercentageParameterName, "Determines the percentage of the rows which should be used for constant optimization", new PercentValue(1)));


 126  Parameters.Add(new FixedValueParameter<BoolValue>(UpdateConstantsInTreeParameterName, "Determines if the constants in the tree should be overwritten by the optimized constants.", new BoolValue(true)) { Hidden = true });


 127  Parameters.Add(new FixedValueParameter<BoolValue>(UpdateVariableWeightsParameterName, "Determines if the variable weights in the tree should be optimized.", new BoolValue(true)) { Hidden = true });


 128 


 129  Parameters.Add(new FixedValueParameter<BoolValue>(CountEvaluationsParameterName, "Determines if function and gradient evaluation should be counted.", new BoolValue(false)));


[17006]  130  var validSolvers = new ItemSet<StringValue>(new[] { "nonsmooth (minns)", "sequential linear programming (minnlc)" }.Select(s => new StringValue(s).AsReadOnly()));


 131  Parameters.Add(new ConstrainedValueParameter<StringValue>("Solver", "The solver algorithm", validSolvers, validSolvers.First()));


[16912]  132  Parameters.Add(new ResultParameter<IntValue>(FunctionEvaluationsResultParameterName, "The number of function evaluations performed by the constants optimization evaluator", "Results", new IntValue()));


 133  Parameters.Add(new ResultParameter<IntValue>(GradientEvaluationsResultParameterName, "The number of gradient evaluations performed by the constants optimization evaluator", "Results", new IntValue()));


 134  }


 135 


 136  public override IDeepCloneable Clone(Cloner cloner) {


[16914]  137  return new ConstrainedConstantOptimizationEvaluator(this, cloner);


[16912]  138  }


 139 


 140  [StorableHook(HookType.AfterDeserialization)]


 141  private void AfterDeserialization() { }


 142 


 143  private static readonly object locker = new object();


 144 


 145  public override IOperation InstrumentedApply() {


 146  var solution = SymbolicExpressionTreeParameter.ActualValue;


 147  double quality;


 148  if (RandomParameter.ActualValue.NextDouble() < ConstantOptimizationProbability.Value) {


 149  IEnumerable<int> constantOptimizationRows = GenerateRowsToEvaluate(ConstantOptimizationRowsPercentage.Value);


 150  var counter = new EvaluationsCounter();


 151  quality = OptimizeConstants(SymbolicDataAnalysisTreeInterpreterParameter.ActualValue, solution, ProblemDataParameter.ActualValue,


[17006]  152  constantOptimizationRows, ApplyLinearScalingParameter.ActualValue.Value, Solver, ConstantOptimizationIterations.Value, updateVariableWeights: UpdateVariableWeights, lowerEstimationLimit: EstimationLimitsParameter.ActualValue.Lower, upperEstimationLimit: EstimationLimitsParameter.ActualValue.Upper, updateConstantsInTree: UpdateConstantsInTree, counter: counter);


[16912]  153 


 154  if (ConstantOptimizationRowsPercentage.Value != RelativeNumberOfEvaluatedSamplesParameter.ActualValue.Value) {


[17136]  155  throw new NotSupportedException();


[16912]  156  }


 157 


 158  if (CountEvaluations) {


 159  lock (locker) {


 160  FunctionEvaluationsResultParameter.ActualValue.Value += counter.FunctionEvaluations;


 161  GradientEvaluationsResultParameter.ActualValue.Value += counter.GradientEvaluations;


 162  }


 163  }


 164 


 165  } else {


[17136]  166  throw new NotSupportedException();


[16912]  167  }


 168  QualityParameter.ActualValue = new DoubleValue(quality);


 169 


 170  return base.InstrumentedApply();


 171  }


 172 


 173  public override double Evaluate(IExecutionContext context, ISymbolicExpressionTree tree, IRegressionProblemData problemData, IEnumerable<int> rows) {


 174  SymbolicDataAnalysisTreeInterpreterParameter.ExecutionContext = context;


 175  EstimationLimitsParameter.ExecutionContext = context;


 176  ApplyLinearScalingParameter.ExecutionContext = context;


 177  FunctionEvaluationsResultParameter.ExecutionContext = context;


 178  GradientEvaluationsResultParameter.ExecutionContext = context;


 179 


[16914]  180  // MSE evaluator is used on purpose instead of the constopt evaluator,


[16912]  181  // because Evaluate() is used to get the quality of evolved models on


 182  // different partitions of the dataset (e.g., best validation model)


[16914]  183  double mse = SymbolicRegressionSingleObjectiveMeanSquaredErrorEvaluator.Calculate(SymbolicDataAnalysisTreeInterpreterParameter.ActualValue, tree, double.MinValue, double.MaxValue, problemData, rows, applyLinearScaling: false);


[16912]  184 


 185  SymbolicDataAnalysisTreeInterpreterParameter.ExecutionContext = null;


 186  EstimationLimitsParameter.ExecutionContext = null;


 187  ApplyLinearScalingParameter.ExecutionContext = null;


 188  FunctionEvaluationsResultParameter.ExecutionContext = null;


 189  GradientEvaluationsResultParameter.ExecutionContext = null;


 190 


[16914]  191  return mse;


[16912]  192  }


 193 


 194  public class EvaluationsCounter {


 195  public int FunctionEvaluations = 0;


 196  public int GradientEvaluations = 0;


 197  }


 198 


 199  private static void GetParameterNodes(ISymbolicExpressionTree tree, out List<ISymbolicExpressionTreeNode> thetaNodes, out List<double> thetaValues) {


 200  thetaNodes = new List<ISymbolicExpressionTreeNode>();


 201  thetaValues = new List<double>();


 202 


 203  var nodes = tree.IterateNodesPrefix().ToArray();


 204  for (int i = 0; i < nodes.Length; ++i) {


 205  var node = nodes[i];


 206  if (node is VariableTreeNode variableTreeNode) {


 207  thetaValues.Add(variableTreeNode.Weight);


 208  thetaNodes.Add(node);


 209  } else if (node is ConstantTreeNode constantTreeNode) {


 210  thetaNodes.Add(node);


 211  thetaValues.Add(constantTreeNode.Value);


 212  }


 213  }


 214  }


 215 


 216  public static double OptimizeConstants(ISymbolicDataAnalysisExpressionTreeInterpreter interpreter,


 217  ISymbolicExpressionTree tree, IRegressionProblemData problemData, IEnumerable<int> rows, bool applyLinearScaling,


[17006]  218  string solver,


[16912]  219  int maxIterations, bool updateVariableWeights = true,


 220  double lowerEstimationLimit = double.MinValue, double upperEstimationLimit = double.MaxValue,


 221  bool updateConstantsInTree = true, Action<double[], double, object> iterationCallback = null, EvaluationsCounter counter = null) {


 222 


[16914]  223  if (!updateVariableWeights) throw new NotSupportedException("not updating variable weights is not supported");


 224  if (!updateConstantsInTree) throw new NotSupportedException("not updating tree parameters is not supported");


[17176]  225  if (!applyLinearScaling) throw new NotSupportedException("application without linear scaling is not supported");


[16912]  226 


[16914]  227  // we always update constants, so we don't need to calculate initial quality


 228  // double originalQuality = SymbolicRegressionSingleObjectiveMeanSquaredErrorEvaluator.Calculate(interpreter, tree, lowerEstimationLimit, upperEstimationLimit, problemData, rows, applyLinearScaling: false);


[16912]  229 


[16914]  230  if (counter == null) counter = new EvaluationsCounter();


 231  var rowEvaluationsCounter = new EvaluationsCounter();


[16912]  232 


[16914]  233  var intervalConstraints = problemData.IntervalConstraints;


[16941]  234  var dataIntervals = problemData.VariableRanges.GetIntervals();


[16912]  235 


[17176]  236  // buffers


 237  var target = problemData.TargetVariableTrainingValues.ToArray();


 238  var targetStDev = target.StandardDeviationPop();


 239  var targetVariance = targetStDev * targetStDev;


 240  var targetMean = target.Average();


 241  var pred = interpreter.GetSymbolicExpressionTreeValues(tree, problemData.Dataset, problemData.TrainingIndices).ToArray();


[17195]  242  if (pred.Any(pi => double.IsInfinity(pi)  double.IsNaN(pi))) return targetVariance;


 243 


[17176]  244  var predStDev = pred.StandardDeviationPop();


[17195]  245  if (predStDev == 0) return targetVariance; // constant expression


[17176]  246  var predMean = pred.Average();


 247 


 248  var scalingFactor = targetStDev / predStDev;


 249  var offset = targetMean  predMean * scalingFactor;


 250 


 251  ISymbolicExpressionTree scaledTree = null;


 252  if (applyLinearScaling) scaledTree = CopyAndScaleTree(tree, scalingFactor, offset);


 253 


[16914]  254  // convert constants to variables named theta...


[17176]  255  var treeForDerivation = ReplaceConstWithVar(scaledTree, out List<string> thetaNames, out List<double> thetaValues); // copies the tree


[16914]  256 


 257  // create trees for relevant derivatives


 258  Dictionary<string, ISymbolicExpressionTree> derivatives = new Dictionary<string, ISymbolicExpressionTree>();


 259  var allThetaNodes = thetaNames.Select(_ => new List<ConstantTreeNode>()).ToArray();


 260  var constraintTrees = new List<ISymbolicExpressionTree>();


 261  foreach (var constraint in intervalConstraints.Constraints) {


 262  if (constraint.IsDerivation) {


 263  if (!problemData.AllowedInputVariables.Contains(constraint.Variable))


 264  throw new ArgumentException($"Invalid constraint: the variable {constraint.Variable} does not exist in the dataset.");


 265  var df = DerivativeCalculator.Derive(treeForDerivation, constraint.Variable);


 266 


 267  // alglib requires constraint expressions of the form c(x) <= 0


 268  // > we make two expressions, one for the lower bound and one for the upper bound


 269 


 270  if (constraint.Interval.UpperBound < double.PositiveInfinity) {


 271  var df_smaller_upper = Subtract((ISymbolicExpressionTree)df.Clone(), CreateConstant(constraint.Interval.UpperBound));


 272  // convert variables named theta back to constants


 273  var df_prepared = ReplaceVarWithConst(df_smaller_upper, thetaNames, thetaValues, allThetaNodes);


 274  constraintTrees.Add(df_prepared);


 275  }


 276  if (constraint.Interval.LowerBound > double.NegativeInfinity) {


 277  var df_larger_lower = Subtract(CreateConstant(constraint.Interval.LowerBound), (ISymbolicExpressionTree)df.Clone());


 278  // convert variables named theta back to constants


 279  var df_prepared = ReplaceVarWithConst(df_larger_lower, thetaNames, thetaValues, allThetaNodes);


 280  constraintTrees.Add(df_prepared);


 281  }


 282  } else {


 283  if (constraint.Interval.UpperBound < double.PositiveInfinity) {


 284  var f_smaller_upper = Subtract((ISymbolicExpressionTree)treeForDerivation.Clone(), CreateConstant(constraint.Interval.UpperBound));


 285  // convert variables named theta back to constants


 286  var df_prepared = ReplaceVarWithConst(f_smaller_upper, thetaNames, thetaValues, allThetaNodes);


 287  constraintTrees.Add(df_prepared);


 288  }


 289  if (constraint.Interval.LowerBound > double.NegativeInfinity) {


 290  var f_larger_lower = Subtract(CreateConstant(constraint.Interval.LowerBound), (ISymbolicExpressionTree)treeForDerivation.Clone());


 291  // convert variables named theta back to constants


 292  var df_prepared = ReplaceVarWithConst(f_larger_lower, thetaNames, thetaValues, allThetaNodes);


 293  constraintTrees.Add(df_prepared);


 294  }


 295  }


[16912]  296  }


 297 


[16914]  298  var preparedTree = ReplaceVarWithConst(treeForDerivation, thetaNames, thetaValues, allThetaNodes);


[16912]  299 


 300 


[16914]  301  // local function


 302  void UpdateThetaValues(double[] theta) {


 303  for (int i = 0; i < theta.Length; ++i) {


 304  foreach (var constNode in allThetaNodes[i]) constNode.Value = theta[i];


 305  }


 306  }


[16912]  307 


[16914]  308  var fi_eval = new double[target.Length];


 309  var jac_eval = new double[target.Length, thetaValues.Count];


[16912]  310 


[16914]  311  // define the callback used by the alglib optimizer


 312  // the x argument for this callback represents our theta


 313  // local function


 314  void calculate_jacobian(double[] x, double[] fi, double[,] jac, object obj) {


 315  UpdateThetaValues(x);


[16912]  316 


[16914]  317  var autoDiffEval = new VectorAutoDiffEvaluator();


 318  autoDiffEval.Evaluate(preparedTree, problemData.Dataset, problemData.TrainingIndices.ToArray(),


 319  GetParameterNodes(preparedTree, allThetaNodes), fi_eval, jac_eval);


 320 


 321  // calc sum of squared errors and gradient


 322  var sse = 0.0;


 323  var g = new double[x.Length];


 324  for (int i = 0; i < target.Length; i++) {


 325  var res = target[i]  fi_eval[i];


 326  sse += 0.5 * res * res;


 327  for (int j = 0; j < g.Length; j++) {


 328  g[j] = res * jac_eval[i, j];


 329  }


 330  }


 331 


 332  fi[0] = sse / target.Length;


 333  for (int j = 0; j < x.Length; j++) { jac[0, j] = g[j] / target.Length; }


 334 


 335  var intervalEvaluator = new IntervalEvaluator();


 336  for (int i = 0; i < constraintTrees.Count; i++) {


 337  var interval = intervalEvaluator.Evaluate(constraintTrees[i], dataIntervals, GetParameterNodes(constraintTrees[i], allThetaNodes),


 338  out double[] lowerGradient, out double[] upperGradient);


 339 


 340  // we transformed this to a constraint c(x) <= 0, so only the upper bound is relevant for us


 341  fi[i + 1] = interval.UpperBound;


 342  for (int j = 0; j < x.Length; j++) {


 343  jac[i + 1, j] = upperGradient[j];


 344  }


 345  }


 346  }


 347 


[17006]  348  if (solver.Contains("minns")) {


 349  alglib.minnsstate state;


 350  alglib.minnsreport rep;


 351  try {


 352  alglib.minnscreate(thetaValues.Count, thetaValues.ToArray(), out state);


 353  alglib.minnssetbc(state, thetaValues.Select(_ => 10000.0).ToArray(), thetaValues.Select(_ => +10000.0).ToArray());


[17176]  354  alglib.minnssetcond(state, 0, maxIterations);


[17006]  355  var s = Enumerable.Repeat(1d, thetaValues.Count).ToArray(); // scale is set to unit scale


 356  alglib.minnssetscale(state, s);


[16914]  357 


[17006]  358  // set nonlinear constraints: 0 equality constraints, constraintTrees inequality constraints


 359  alglib.minnssetnlc(state, 0, constraintTrees.Count);


[16914]  360 


[17006]  361  alglib.minnsoptimize(state, calculate_jacobian, null, null);


 362  alglib.minnsresults(state, out double[] xOpt, out rep);


[16914]  363 


 364 


[17006]  365  // counter.FunctionEvaluations += rep.nfev; TODO


 366  counter.GradientEvaluations += rep.nfev;


[16914]  367 


[17136]  368  if (rep.terminationtype > 0) {


[17006]  369  // update parameters in tree


 370  var pIdx = 0;


[17176]  371  // here we lose the two last parameters (for linear scaling)


[17006]  372  foreach (var node in tree.IterateNodesPostfix()) {


 373  if (node is ConstantTreeNode constTreeNode) {


 374  constTreeNode.Value = xOpt[pIdx++];


 375  } else if (node is VariableTreeNode varTreeNode) {


 376  varTreeNode.Weight = xOpt[pIdx++];


 377  }


 378  }


 379  // note: we keep the optimized constants even when the tree is worse.


[17176]  380  // assert that we lose the last two parameters


 381  if (pIdx != xOpt.Length  2) throw new InvalidProgramException();


[17006]  382  }


[17136]  383  if (Math.Abs(rep.nlcerr) > 0.01) return targetVariance; // constraints are violated


[17006]  384  } catch (ArithmeticException) {


[17136]  385  return targetVariance;


[17006]  386  } catch (alglib.alglibexception) {


 387  // eval MSE of original tree


[17136]  388  return targetVariance;


[17006]  389  }


 390  } else if (solver.Contains("minnlc")) {


 391  alglib.minnlcstate state;


 392  alglib.minnlcreport rep;


 393  alglib.optguardreport optGuardRep;


 394  try {


 395  alglib.minnlccreate(thetaValues.Count, thetaValues.ToArray(), out state);


 396  alglib.minnlcsetalgoslp(state); // SLP is more robust but slower


 397  alglib.minnlcsetbc(state, thetaValues.Select(_ => 10000.0).ToArray(), thetaValues.Select(_ => +10000.0).ToArray());


[17176]  398  alglib.minnlcsetcond(state, 0, maxIterations);


[17006]  399  var s = Enumerable.Repeat(1d, thetaValues.Count).ToArray(); // scale is set to unit scale


 400  alglib.minnlcsetscale(state, s);


 401 


 402  // set nonlinear constraints: 0 equality constraints, constraintTrees inequality constraints


 403  alglib.minnlcsetnlc(state, 0, constraintTrees.Count);


 404  alglib.minnlcoptguardsmoothness(state, 1);


 405 


 406  alglib.minnlcoptimize(state, calculate_jacobian, null, null);


 407  alglib.minnlcresults(state, out double[] xOpt, out rep);


 408  alglib.minnlcoptguardresults(state, out optGuardRep);


 409  if (optGuardRep.nonc0suspected) throw new InvalidProgramException("optGuardRep.nonc0suspected");


[17176]  410  if (optGuardRep.nonc1suspected) {


 411  alglib.minnlcoptguardnonc1test1results(state, out alglib.optguardnonc1test1report strrep, out alglib.optguardnonc1test1report lngrep);


 412  throw new InvalidProgramException("optGuardRep.nonc1suspected");


 413  }


[17006]  414 


 415  // counter.FunctionEvaluations += rep.nfev; TODO


 416  counter.GradientEvaluations += rep.nfev;


 417 


 418  if (rep.terminationtype != 8) {


 419  // update parameters in tree


 420  var pIdx = 0;


 421  foreach (var node in tree.IterateNodesPostfix()) {


 422  if (node is ConstantTreeNode constTreeNode) {


 423  constTreeNode.Value = xOpt[pIdx++];


 424  } else if (node is VariableTreeNode varTreeNode) {


 425  varTreeNode.Weight = xOpt[pIdx++];


 426  }


[16915]  427  }


[17176]  428  // note: we keep the optimized constants even when the tree is worse.


 429  // assert that we lose the last two parameters


 430  if (pIdx != xOpt.Length  2) throw new InvalidProgramException();


[17006]  431 


[16914]  432  }


[17136]  433  if (Math.Abs(rep.nlcerr) > 0.01) return targetVariance; // constraints are violated


[16914]  434 


[17006]  435  } catch (ArithmeticException) {


[17136]  436  return targetVariance;


[17006]  437  } catch (alglib.alglibexception) {


[17136]  438  return targetVariance;


[16914]  439  }


[17006]  440  } else {


 441  throw new ArgumentException($"Unknown solver {solver}");


[16912]  442  }


[17006]  443 


[16912]  444 


[16914]  445  // evaluate tree with updated constants


[17195]  446  var residualVariance = SymbolicRegressionSingleObjectiveMeanSquaredErrorEvaluator.Calculate(interpreter, tree, lowerEstimationLimit, upperEstimationLimit, problemData, rows, applyLinearScaling: true);


[17136]  447  return Math.Min(residualVariance, targetVariance);


[16914]  448  }


 449 


[17176]  450  private static ISymbolicExpressionTree CopyAndScaleTree(ISymbolicExpressionTree tree, double scalingFactor, double offset) {


 451  var m = (ISymbolicExpressionTree)tree.Clone();


 452 


 453  var add = MakeNode<Addition>(MakeNode<Multiplication>(m.Root.GetSubtree(0).GetSubtree(0), CreateConstant(scalingFactor)), CreateConstant(offset));


 454  m.Root.GetSubtree(0).RemoveSubtree(0);


 455  m.Root.GetSubtree(0).AddSubtree(add);


 456  return m;


 457  }


 458 


[16914]  459  #region helper


 460  private static ISymbolicExpressionTreeNode[] GetParameterNodes(ISymbolicExpressionTree tree, List<ConstantTreeNode>[] allNodes) {


 461  // TODO better solution necessary


 462  var treeConstNodes = tree.IterateNodesPostfix().OfType<ConstantTreeNode>().ToArray();


 463  var paramNodes = new ISymbolicExpressionTreeNode[allNodes.Length];


 464  for (int i = 0; i < paramNodes.Length; i++) {


 465  paramNodes[i] = allNodes[i].SingleOrDefault(n => treeConstNodes.Contains(n));


[16912]  466  }


[16914]  467  return paramNodes;


 468  }


[16912]  469 


[16914]  470  private static ISymbolicExpressionTree ReplaceVarWithConst(ISymbolicExpressionTree tree, List<string> thetaNames, List<double> thetaValues, List<ConstantTreeNode>[] thetaNodes) {


 471  var copy = (ISymbolicExpressionTree)tree.Clone();


 472  var nodes = copy.IterateNodesPostfix().ToList();


 473  for (int i = 0; i < nodes.Count; i++) {


 474  var n = nodes[i] as VariableTreeNode;


 475  if (n != null) {


 476  var thetaIdx = thetaNames.IndexOf(n.VariableName);


 477  if (thetaIdx >= 0) {


 478  var parent = n.Parent;


 479  if (thetaNodes[thetaIdx].Any()) {


 480  // HACK: REUSE CONSTANT TREE NODE IN SEVERAL TREES


 481  // we use this trick to allow autodiff over thetas when thetas occurr multiple times in the tree (e.g. in derived trees)


 482  var constNode = thetaNodes[thetaIdx].First();


 483  var childIdx = parent.IndexOfSubtree(n);


 484  parent.RemoveSubtree(childIdx);


 485  parent.InsertSubtree(childIdx, constNode);


 486  } else {


 487  var constNode = (ConstantTreeNode)CreateConstant(thetaValues[thetaIdx]);


 488  var childIdx = parent.IndexOfSubtree(n);


 489  parent.RemoveSubtree(childIdx);


 490  parent.InsertSubtree(childIdx, constNode);


 491  thetaNodes[thetaIdx].Add(constNode);


 492  }


 493  }


 494  }


 495  }


 496  return copy;


 497  }


[16912]  498 


[16914]  499  private static ISymbolicExpressionTree ReplaceConstWithVar(ISymbolicExpressionTree tree, out List<string> thetaNames, out List<double> thetaValues) {


 500  thetaNames = new List<string>();


 501  thetaValues = new List<double>();


 502  var copy = (ISymbolicExpressionTree)tree.Clone();


 503  var nodes = copy.IterateNodesPostfix().ToList();


 504 


 505  int n = 1;


 506  for (int i = 0; i < nodes.Count; ++i) {


 507  var node = nodes[i];


 508  if (node is ConstantTreeNode constantTreeNode) {


 509  var thetaVar = (VariableTreeNode)new Problems.DataAnalysis.Symbolic.Variable().CreateTreeNode();


 510  thetaVar.Weight = 1;


 511  thetaVar.VariableName = $"θ{n++}";


 512 


 513  thetaNames.Add(thetaVar.VariableName);


 514  thetaValues.Add(constantTreeNode.Value);


 515 


 516  var parent = constantTreeNode.Parent;


 517  if (parent != null) {


 518  var index = constantTreeNode.Parent.IndexOfSubtree(constantTreeNode);


 519  parent.RemoveSubtree(index);


 520  parent.InsertSubtree(index, thetaVar);


 521  }


 522  }


[16915]  523  if (node is VariableTreeNode varTreeNode) {


 524  var thetaVar = (VariableTreeNode)new Problems.DataAnalysis.Symbolic.Variable().CreateTreeNode();


 525  thetaVar.Weight = 1;


 526  thetaVar.VariableName = $"θ{n++}";


 527 


 528  thetaNames.Add(thetaVar.VariableName);


 529  thetaValues.Add(varTreeNode.Weight);


 530 


 531  var parent = varTreeNode.Parent;


 532  if (parent != null) {


 533  var index = varTreeNode.Parent.IndexOfSubtree(varTreeNode);


 534  parent.RemoveSubtree(index);


 535  var prodNode = MakeNode<Multiplication>();


 536  varTreeNode.Weight = 1.0;


 537  prodNode.AddSubtree(varTreeNode);


 538  prodNode.AddSubtree(thetaVar);


 539  parent.InsertSubtree(index, prodNode);


 540  }


 541  }


[16912]  542  }


[16914]  543  return copy;


[16912]  544  }


 545 


[16914]  546  private static ISymbolicExpressionTreeNode CreateConstant(double value) {


 547  var constantNode = (ConstantTreeNode)new Constant().CreateTreeNode();


 548  constantNode.Value = value;


 549  return constantNode;


 550  }


 551 


 552  private static ISymbolicExpressionTree Subtract(ISymbolicExpressionTree t, ISymbolicExpressionTreeNode b) {


 553  var sub = MakeNode<Subtraction>(t.Root.GetSubtree(0).GetSubtree(0), b);


 554  t.Root.GetSubtree(0).RemoveSubtree(0);


 555  t.Root.GetSubtree(0).InsertSubtree(0, sub);


 556  return t;


 557  }


 558  private static ISymbolicExpressionTree Subtract(ISymbolicExpressionTreeNode b, ISymbolicExpressionTree t) {


 559  var sub = MakeNode<Subtraction>(b, t.Root.GetSubtree(0).GetSubtree(0));


 560  t.Root.GetSubtree(0).RemoveSubtree(0);


 561  t.Root.GetSubtree(0).InsertSubtree(0, sub);


 562  return t;


 563  }


 564 


 565  private static ISymbolicExpressionTreeNode MakeNode<T>(params ISymbolicExpressionTreeNode[] fs) where T : ISymbol, new() {


 566  var node = new T().CreateTreeNode();


 567  foreach (var f in fs) node.AddSubtree(f);


 568  return node;


 569  }


 570  #endregion


 571 


[16912]  572  private static void UpdateConstants(ISymbolicExpressionTreeNode[] nodes, double[] constants) {


 573  if (nodes.Length != constants.Length) throw new InvalidOperationException();


[16914]  574  for (int i = 0; i < nodes.Length; i++) {


[16912]  575  if (nodes[i] is VariableTreeNode varNode) varNode.Weight = constants[i];


 576  else if (nodes[i] is ConstantTreeNode constNode) constNode.Value = constants[i];


 577  }


 578  }


 579 


 580  private static alglib.ndimensional_fvec CreateFunc(ISymbolicExpressionTree tree, VectorEvaluator eval, ISymbolicExpressionTreeNode[] parameterNodes, IDataset ds, string targetVar, int[] rows) {


 581  var y = ds.GetDoubleValues(targetVar, rows).ToArray();


 582  return (double[] c, double[] fi, object o) => {


 583  UpdateConstants(parameterNodes, c);


 584  var pred = eval.Evaluate(tree, ds, rows);


 585  for (int i = 0; i < fi.Length; i++)


 586  fi[i] = pred[i]  y[i];


 587 


 588  var counter = (EvaluationsCounter)o;


 589  counter.FunctionEvaluations++;


 590  };


 591  }


 592 


 593  private static alglib.ndimensional_jac CreateJac(ISymbolicExpressionTree tree, VectorAutoDiffEvaluator eval, ISymbolicExpressionTreeNode[] parameterNodes, IDataset ds, string targetVar, int[] rows) {


 594  var y = ds.GetDoubleValues(targetVar, rows).ToArray();


 595  return (double[] c, double[] fi, double[,] jac, object o) => {


 596  UpdateConstants(parameterNodes, c);


 597  eval.Evaluate(tree, ds, rows, parameterNodes, fi, jac);


 598 


 599  for (int i = 0; i < fi.Length; i++)


 600  fi[i] = y[i];


 601 


 602  var counter = (EvaluationsCounter)o;


 603  counter.GradientEvaluations++;


 604  };


 605  }


 606  public static bool CanOptimizeConstants(ISymbolicExpressionTree tree) {


 607  return TreeToAutoDiffTermConverter.IsCompatible(tree);


 608  }


 609  }


 610  }

