source: branches/2925_AutoDiffForDynamicalModels/HeuristicLab.Problems.DynamicalSystemsModelling/3.3/Problem.cs @ 17051

Last change on this file since 17051 was 17051, checked in by gkronber, 2 years ago

#2925: produce line chart for training episodes even when there is no test set

File size: 107.6 KB
Line 
1#region License Information
2/* HeuristicLab
3 * Copyright (C) 2002-2018 Heuristic and Evolutionary Algorithms Laboratory (HEAL)
4 *
5 * This file is part of HeuristicLab.
6 *
7 * HeuristicLab is free software: you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation, either version 3 of the License, or
10 * (at your option) any later version.
11 *
12 * HeuristicLab is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15 * GNU General Public License for more details.
16 *
17 * You should have received a copy of the GNU General Public License
18 * along with HeuristicLab. If not, see <http://www.gnu.org/licenses/>.
19 */
20#endregion
21
22using System;
23using System.Collections.Generic;
24using System.Diagnostics;
25using System.Globalization;
26using System.Linq;
27using HeuristicLab.Analysis;
28using HeuristicLab.Collections;
29using HeuristicLab.Common;
30using HeuristicLab.Core;
31using HeuristicLab.Data;
32using HeuristicLab.Encodings.SymbolicExpressionTreeEncoding;
33using HeuristicLab.Optimization;
34using HeuristicLab.Parameters;
35using HeuristicLab.Problems.DataAnalysis;
36using HeuristicLab.Problems.DataAnalysis.Symbolic;
37using HeuristicLab.Problems.Instances;
38using Variable = HeuristicLab.Problems.DataAnalysis.Symbolic.Variable;
39using HEAL.Attic;
40using HeuristicLab.Problems.DataAnalysis.Symbolic.Regression;
41using System.Runtime.InteropServices;
42
43namespace HeuristicLab.Problems.DynamicalSystemsModelling {
44  [Item("Dynamical Systems Modelling Problem", "TODO")]
45  [Creatable(CreatableAttribute.Categories.GeneticProgrammingProblems, Priority = 900)]
46  [StorableType("065C6A61-773A-42C9-9DE5-61A5D1D823EB")]
47  public sealed class Problem : SingleObjectiveBasicProblem<MultiEncoding>, IRegressionProblem, IProblemInstanceConsumer<Problem> {
48    #region parameter names
49    private const string ProblemDataParameterName = "Data";
50    private const string TargetVariablesParameterName = "Target variables";
51    private const string FunctionSetParameterName = "Function set";
52    private const string MaximumLengthParameterName = "Size limit";
53    private const string MaximumPretuningParameterOptimizationIterationsParameterName = "Max. pre-tuning parameter optimization iterations";
54    private const string MaximumOdeParameterOptimizationIterationsParameterName = "Max. ODE parameter optimization iterations";
55    private const string NumberOfLatentVariablesParameterName = "Number of latent variables";
56    private const string NumericIntegrationStepsParameterName = "Steps for numeric integration";
57    private const string TrainingEpisodesParameterName = "Training episodes";
58    private const string TestEpisodesParameterName = "Test episodes";
59    private const string OptimizeParametersForEpisodesParameterName = "Optimize parameters for episodes";
60    private const string OdeSolverParameterName = "ODE Solver";
61    #endregion
62
63    #region Parameter Properties
64    IParameter IDataAnalysisProblem.ProblemDataParameter { get { return ProblemDataParameter; } }
65
66    public IValueParameter<IRegressionProblemData> ProblemDataParameter {
67      get { return (IValueParameter<IRegressionProblemData>)Parameters[ProblemDataParameterName]; }
68    }
69    public IValueParameter<ReadOnlyCheckedItemList<StringValue>> TargetVariablesParameter {
70      get { return (IValueParameter<ReadOnlyCheckedItemList<StringValue>>)Parameters[TargetVariablesParameterName]; }
71    }
72    public IValueParameter<ReadOnlyCheckedItemList<StringValue>> FunctionSetParameter {
73      get { return (IValueParameter<ReadOnlyCheckedItemList<StringValue>>)Parameters[FunctionSetParameterName]; }
74    }
75    public IFixedValueParameter<IntValue> MaximumLengthParameter {
76      get { return (IFixedValueParameter<IntValue>)Parameters[MaximumLengthParameterName]; }
77    }
78
79    public IFixedValueParameter<IntValue> MaximumPretuningParameterOptimizationIterationsParameter {
80      get { return (IFixedValueParameter<IntValue>)Parameters[MaximumPretuningParameterOptimizationIterationsParameterName]; }
81    }
82    public IFixedValueParameter<IntValue> MaximumOdeParameterOptimizationIterationsParameter {
83      get { return (IFixedValueParameter<IntValue>)Parameters[MaximumOdeParameterOptimizationIterationsParameterName]; }
84    }
85    public IFixedValueParameter<IntValue> NumberOfLatentVariablesParameter {
86      get { return (IFixedValueParameter<IntValue>)Parameters[NumberOfLatentVariablesParameterName]; }
87    }
88    public IFixedValueParameter<IntValue> NumericIntegrationStepsParameter {
89      get { return (IFixedValueParameter<IntValue>)Parameters[NumericIntegrationStepsParameterName]; }
90    }
91    public IValueParameter<ItemList<IntRange>> TrainingEpisodesParameter {
92      get { return (IValueParameter<ItemList<IntRange>>)Parameters[TrainingEpisodesParameterName]; }
93    }
94    public IValueParameter<ItemList<IntRange>> TestEpisodesParameter {
95      get { return (IValueParameter<ItemList<IntRange>>)Parameters[TestEpisodesParameterName]; }
96    }
97    public IFixedValueParameter<BoolValue> OptimizeParametersForEpisodesParameter {
98      get { return (IFixedValueParameter<BoolValue>)Parameters[OptimizeParametersForEpisodesParameterName]; }
99    }
100    public IConstrainedValueParameter<StringValue> OdeSolverParameter {
101      get { return (IConstrainedValueParameter<StringValue>)Parameters[OdeSolverParameterName]; }
102    }
103    public IFixedValueParameter<DoubleValue> PretuningErrorWeight {
104      get { return (IFixedValueParameter<DoubleValue>)Parameters["Pretuning NMSE weight"]; }
105    }
106    public IFixedValueParameter<DoubleValue> OdeErrorWeight {
107      get { return (IFixedValueParameter<DoubleValue>)Parameters["ODE NMSE weight"]; }
108    }
109    public IFixedValueParameter<DoubleValue> NumericDifferencesSmoothingParameter {
110      get { return (IFixedValueParameter<DoubleValue>)Parameters["Numeric differences smoothing"]; }
111    }
112    #endregion
113
114    #region Properties
115    public IRegressionProblemData ProblemData {
116      get { return ProblemDataParameter.Value; }
117      set { ProblemDataParameter.Value = value; }
118    }
119    IDataAnalysisProblemData IDataAnalysisProblem.ProblemData { get { return ProblemData; } }
120
121    public ReadOnlyCheckedItemList<StringValue> TargetVariables {
122      get { return TargetVariablesParameter.Value; }
123    }
124
125    public ReadOnlyCheckedItemList<StringValue> FunctionSet {
126      get { return FunctionSetParameter.Value; }
127    }
128
129    public int MaximumLength {
130      get { return MaximumLengthParameter.Value.Value; }
131    }
132    public int MaximumPretuningParameterOptimizationIterations {
133      get { return MaximumPretuningParameterOptimizationIterationsParameter.Value.Value; }
134    }
135    public int MaximumOdeParameterOptimizationIterations {
136      get { return MaximumOdeParameterOptimizationIterationsParameter.Value.Value; }
137    }
138    public int NumberOfLatentVariables {
139      get { return NumberOfLatentVariablesParameter.Value.Value; }
140    }
141    public int NumericIntegrationSteps {
142      get { return NumericIntegrationStepsParameter.Value.Value; }
143    }
144    public IList<IntRange> TrainingEpisodes {
145      get { return TrainingEpisodesParameter.Value; }
146    }
147    public IList<IntRange> TestEpisodes {
148      get { return TestEpisodesParameter.Value; }
149    }
150    public bool OptimizeParametersForEpisodes {
151      get { return OptimizeParametersForEpisodesParameter.Value.Value; }
152    }
153    public double NumericDifferencesSmoothing {
154      get { return NumericDifferencesSmoothingParameter.Value.Value; }
155    }
156
157
158    public string OdeSolver {
159      get { return OdeSolverParameter.Value.Value; }
160      set {
161        var matchingValue = OdeSolverParameter.ValidValues.FirstOrDefault(v => v.Value == value);
162        if (matchingValue == null) throw new ArgumentOutOfRangeException();
163        else OdeSolverParameter.Value = matchingValue;
164      }
165    }
166
167    #endregion
168
169    public event EventHandler ProblemDataChanged;
170
171    public override bool Maximization {
172      get { return false; } // we minimize NMSE
173    }
174
175    #region item cloning and persistence
176    // persistence
177    [StorableConstructor]
178    private Problem(StorableConstructorFlag _) : base(_) { }
179    [StorableHook(HookType.AfterDeserialization)]
180    private void AfterDeserialization() {
181      if (!Parameters.ContainsKey(OptimizeParametersForEpisodesParameterName)) {
182        Parameters.Add(new FixedValueParameter<BoolValue>(OptimizeParametersForEpisodesParameterName, "Flag to select if parameters should be optimized globally or for each episode individually.", new BoolValue(false)));
183      }
184      int iters = 100;
185      if (Parameters.ContainsKey("Max. parameter optimization iterations")) {
186        iters = ((IFixedValueParameter<IntValue>)Parameters["Max. parameter optimization iterations"]).Value.Value;
187      }
188      if (!Parameters.ContainsKey(MaximumPretuningParameterOptimizationIterationsParameterName)) {
189        Parameters.Add(new FixedValueParameter<IntValue>(MaximumPretuningParameterOptimizationIterationsParameterName, "The maximum number of iterations for optimization of parameters of individual equations for numerical derivatives (using L-BFGS). More iterations makes the algorithm slower, fewer iterations might prevent convergence in the optimization scheme. Default = 100", new IntValue(iters)));
190      }
191      if (!Parameters.ContainsKey(MaximumOdeParameterOptimizationIterationsParameterName)) {
192        Parameters.Add(new FixedValueParameter<IntValue>(MaximumOdeParameterOptimizationIterationsParameterName, "The maximum number of iterations for optimization of the full ODE parameters (using L-BFGS). More iterations makes the algorithm slower, fewer iterations might prevent convergence in the optimization scheme. Default = 100", new IntValue(iters)));
193      }
194
195      if (!Parameters.ContainsKey("Pretuning NMSE weight"))
196        Parameters.Add(new FixedValueParameter<DoubleValue>("Pretuning NMSE weight", "For fitness weighting", new DoubleValue(0.5)));
197      if (!Parameters.ContainsKey("ODE NMSE weight"))
198        Parameters.Add(new FixedValueParameter<DoubleValue>("ODE NMSE weight", "For fitness weighting", new DoubleValue(0.5)));
199
200
201      RegisterEventHandlers();
202    }
203
204    // cloning
205    private Problem(Problem original, Cloner cloner)
206      : base(original, cloner) {
207      RegisterEventHandlers();
208    }
209    public override IDeepCloneable Clone(Cloner cloner) { return new Problem(this, cloner); }
210    #endregion
211
212    public Problem()
213      : base() {
214      var targetVariables = new CheckedItemList<StringValue>().AsReadOnly(); // HACK: it would be better to provide a new class derived from IDataAnalysisProblem
215      var functions = CreateFunctionSet();
216      Parameters.Add(new ValueParameter<IRegressionProblemData>(ProblemDataParameterName, "The data captured from the dynamical system. Use CSV import functionality to import data.", new RegressionProblemData()));
217      Parameters.Add(new ValueParameter<ReadOnlyCheckedItemList<StringValue>>(TargetVariablesParameterName, "Target variables (overrides setting in ProblemData)", targetVariables));
218      Parameters.Add(new ValueParameter<ReadOnlyCheckedItemList<StringValue>>(FunctionSetParameterName, "The list of allowed functions", functions));
219      Parameters.Add(new FixedValueParameter<IntValue>(MaximumLengthParameterName, "The maximally allowed length of each expression. Set to a small value (5 - 25). Default = 10", new IntValue(10)));
220      Parameters.Add(new FixedValueParameter<IntValue>(MaximumPretuningParameterOptimizationIterationsParameterName, "The maximum number of iterations for optimization of parameters of individual equations for numerical derivatives (using L-BFGS). More iterations makes the algorithm slower, fewer iterations might prevent convergence in the optimization scheme. Default = 100", new IntValue(100)));
221      Parameters.Add(new FixedValueParameter<IntValue>(MaximumOdeParameterOptimizationIterationsParameterName, "The maximum number of iterations for optimization of the full ODE parameters (using L-BFGS). More iterations makes the algorithm slower, fewer iterations might prevent convergence in the optimization scheme. Default = 100", new IntValue(100)));
222      Parameters.Add(new FixedValueParameter<IntValue>(NumberOfLatentVariablesParameterName, "Latent variables (unobserved variables) allow us to produce expressions which are integrated up and can be used in other expressions. They are handled similarly to target variables in forward simulation / integration. The difference to target variables is that there are no data to which the calculated values of latent variables are compared. Set to a small value (0 .. 5) as necessary (default = 0)", new IntValue(0)));
223      Parameters.Add(new FixedValueParameter<IntValue>(NumericIntegrationStepsParameterName, "Number of steps in the numeric integration that are taken from one row to the next (set to 1 to 100). More steps makes the algorithm slower, less steps worsens the accuracy of the numeric integration scheme.", new IntValue(10)));
224      Parameters.Add(new ValueParameter<ItemList<IntRange>>(TrainingEpisodesParameterName, "A list of ranges that should be used for training, each range represents an independent episode. This overrides the TrainingSet parameter in ProblemData.", new ItemList<IntRange>()));
225      Parameters.Add(new ValueParameter<ItemList<IntRange>>(TestEpisodesParameterName, "A list of ranges that should be used for validation, each range represents an independent episode. This overrides the TestSet parameter in ProblemData.", new ItemList<IntRange>()));
226      Parameters.Add(new FixedValueParameter<BoolValue>(OptimizeParametersForEpisodesParameterName, "Flag to select if parameters should be optimized globally or for each episode individually.", new BoolValue(false)));
227      Parameters.Add(new FixedValueParameter<DoubleValue>("Pretuning NMSE weight", "For fitness weighting", new DoubleValue(0.5)));
228      Parameters.Add(new FixedValueParameter<DoubleValue>("ODE NMSE weight", "For fitness weighting", new DoubleValue(0.5)));
229      Parameters.Add(new FixedValueParameter<DoubleValue>("Numeric differences smoothing", "Determines the amount of smoothing for the numeric differences which are calculated for pre-tuning. Values from -8 to 8 are reasonable. Use very low value if the data contains no noise. Default: 2.", new DoubleValue(2.0)));
230
231      var solversStr = new string[] { "HeuristicLab", "CVODES", "CVODES (full)" };
232      var solvers = new ItemSet<StringValue>(
233        solversStr.Select(s => new StringValue(s).AsReadOnly())
234        );
235      Parameters.Add(new ConstrainedValueParameter<StringValue>(OdeSolverParameterName, "The solver to use for solving the initial value ODE problems", solvers, solvers.First()));
236
237      RegisterEventHandlers();
238      InitAllParameters();
239
240      // TODO: alglib gradient check fails for integration autodiff
241      // TODO: implement CVODES solver and use gradient check in alglib to verify
242      // TODO: optimization of starting values for latent variables in CVODES solver
243      // TODO: allow to specify the name for the time variable in the dataset and allow variable step-sizes
244    }
245
246    public override double Evaluate(Individual individual, IRandom random) {
247      var trees = individual.Values.Select(v => v.Value).OfType<ISymbolicExpressionTree>().ToArray(); // extract all trees from individual
248
249      var problemData = ProblemData;
250      var targetVars = TargetVariables.CheckedItems.OrderBy(i => i.Index).Select(i => i.Value.Value).ToArray();
251      var latentVariables = Enumerable.Range(1, NumberOfLatentVariables).Select(i => "λ" + i).ToArray(); // TODO: must coincide with the variables which are actually defined in the grammar and also for which we actually have trees
252      if (latentVariables.Any()) throw new NotSupportedException("latent variables are not supported"); // not sure if everything still works in combination with latent variables
253      if (OptimizeParametersForEpisodes) {
254        throw new NotImplementedException();
255        int eIdx = 0;
256        double totalNMSE = 0.0;
257        int totalSize = 0;
258        foreach (var episode in TrainingEpisodes) {
259          // double[] optTheta;
260          double nmse = OptimizeForEpisodes(trees, problemData, targetVars, latentVariables, random, new[] { episode }, MaximumPretuningParameterOptimizationIterations, NumericIntegrationSteps, OdeSolver, MaximumOdeParameterOptimizationIterations);
261          // individual["OptTheta_" + eIdx] = new DoubleArray(optTheta); // write back optimized parameters so that we can use them in the Analysis method
262          eIdx++;
263          totalNMSE += nmse * episode.Size;
264          totalSize += episode.Size;
265        }
266        return totalNMSE / totalSize;
267      } else {
268        // when no training episodes are specified then we implicitly use the training parition from the problemData
269        var trainingEpisodes = TrainingEpisodes;
270        if (!trainingEpisodes.Any()) {
271          trainingEpisodes = new List<IntRange>();
272          trainingEpisodes.Add((IntRange)ProblemData.TrainingPartition.Clone());
273        }
274        double nmse = OptimizeForEpisodes(trees, problemData, targetVars, latentVariables, random, trainingEpisodes, MaximumPretuningParameterOptimizationIterations, NumericIntegrationSteps, OdeSolver, MaximumOdeParameterOptimizationIterations,
275          PretuningErrorWeight.Value.Value, OdeErrorWeight.Value.Value, NumericDifferencesSmoothing);
276        return nmse;
277      }
278    }
279
280    public static double OptimizeForEpisodes(
281      ISymbolicExpressionTree[] trees,
282      IRegressionProblemData problemData,
283      string[] targetVars,
284      string[] latentVariables,
285      IRandom random,
286      IEnumerable<IntRange> episodes,
287      int maxPretuningParameterOptIterations,
288      int numericIntegrationSteps,
289      string odeSolver,
290      int maxOdeParameterOptIterations,
291      double pretuningErrorWeight = 0.5,
292      double odeErrorWeight = 0.5,
293      double numericDifferencesSmoothing = 2
294      ) {
295
296
297
298      // extract constants from trees (without trees for latent variables)
299      var targetVariableTrees = trees.Take(targetVars.Length).ToArray();
300      var latentVariableTrees = trees.Skip(targetVars.Length).ToArray();
301      // var constantNodes = targetVariableTrees.Select(t => t.IterateNodesPrefix().OfType<ConstantTreeNode>().ToArray()).ToArray();
302      // var initialTheta = constantNodes.Select(nodes => nodes.Select(n => n.Value).ToArray()).ToArray();
303      var constantNodes = targetVariableTrees.Select(
304        t => t.IterateNodesPrefix()
305        .Where(n => n.SubtreeCount == 0) // select leaves
306        .ToArray()).ToArray();
307      var initialTheta = constantNodes.Select(
308        a => a.Select(
309          n => {
310            if (n is VariableTreeNode varTreeNode) {
311              return varTreeNode.Weight;
312            } else if (n is ConstantTreeNode constTreeNode) {
313              return constTreeNode.Value;
314            } else throw new InvalidProgramException();
315          }).ToArray()).ToArray();
316
317      // optimize parameters by fitting f(x,y) to calculated differences dy/dt(t)
318      double[] pretunedParameters = initialTheta.SelectMany(v => v).ToArray();
319      double nmse = 0;
320      if (pretuningErrorWeight > 0 || maxPretuningParameterOptIterations > -1) {
321        nmse += pretuningErrorWeight * PreTuneParameters(trees, problemData, targetVars, latentVariables, random, episodes,
322          maxPretuningParameterOptIterations, numericDifferencesSmoothing,
323          initialTheta, out pretunedParameters);
324      }
325
326      // extend parameter vector to include parameters for latent variable trees
327      pretunedParameters = pretunedParameters
328        .Concat(latentVariableTrees
329        .SelectMany(t => t.IterateNodesPrefix()
330        .Where(n => n.SubtreeCount == 0)
331        .Select(n => {
332          if (n is VariableTreeNode varTreeNode) {
333            return varTreeNode.Weight;
334          } else if (n is ConstantTreeNode constTreeNode) {
335            return constTreeNode.Value;
336          } else throw new InvalidProgramException();
337        })))
338        .ToArray();
339
340      double[] optTheta = pretunedParameters;
341      if (odeErrorWeight > 0 || maxOdeParameterOptIterations > -1) {
342        // optimize parameters using integration of f(x,y) to calculate y(t)
343        nmse += odeErrorWeight * OptimizeParameters(trees, problemData, targetVars, latentVariables, episodes, maxOdeParameterOptIterations, pretunedParameters, numericIntegrationSteps, odeSolver,
344          out optTheta);
345      }
346      // var optTheta = pretunedParameters;
347
348      if (double.IsNaN(nmse) ||
349        double.IsInfinity(nmse) ||
350        nmse > 100 * trees.Length * episodes.Sum(ep => ep.Size))
351        return 100 * trees.Length * episodes.Sum(ep => ep.Size);
352
353      // update tree nodes with optimized values
354      var paramIdx = 0;
355      for (var treeIdx = 0; treeIdx < constantNodes.Length; treeIdx++) {
356        for (int i = 0; i < constantNodes[treeIdx].Length; i++) {
357          if (constantNodes[treeIdx][i] is VariableTreeNode varTreeNode) {
358            varTreeNode.Weight = optTheta[paramIdx++];
359          } else if (constantNodes[treeIdx][i] is ConstantTreeNode constTreeNode) {
360            constTreeNode.Value = optTheta[paramIdx++];
361          }
362        }
363      }
364      return nmse;
365    }
366
367    private static double PreTuneParameters(
368      ISymbolicExpressionTree[] trees,
369      IRegressionProblemData problemData,
370      string[] targetVars,
371      string[] latentVariables,
372      IRandom random,
373      IEnumerable<IntRange> episodes,
374      int maxParameterOptIterations,
375      double numericDifferencesSmoothing, // for smoothing of numeric differences
376      double[][] initialTheta,
377      out double[] optTheta) {
378      var thetas = new List<double>();
379      double nmse = 0.0;
380      var maxTreeNmse = 100 * episodes.Sum(ep => ep.Size);
381
382      var targetTrees = trees.Take(targetVars.Length).ToArray();
383      var latentTrees = trees.Take(latentVariables.Length).ToArray();
384
385      // first calculate values of latent variables by integration
386      if (latentVariables.Length > 0) {
387        var inputVariables = targetVars.Concat(latentTrees.SelectMany(t => t.IterateNodesPrefix().OfType<VariableTreeNode>().Select(n => n.VariableName))).Except(latentVariables).Distinct();
388        var myState = new OptimizationData(latentTrees, targetVars, inputVariables.ToArray(), problemData, null, episodes.ToArray(), 10, latentVariables, "NONE");
389
390        var fi = new double[myState.rows.Length * targetVars.Length];
391        var jac = new double[myState.rows.Length * targetVars.Length, myState.nodeValueLookup.ParameterCount];
392        var latentValues = new double[myState.rows.Length, latentVariables.Length];
393        Integrate(myState, fi, jac, latentValues);
394
395        // add integrated latent variables to dataset
396        var modifiedDataset = ((Dataset)problemData.Dataset).ToModifiable();
397        foreach (var variable in latentVariables) {
398          modifiedDataset.AddVariable(variable, Enumerable.Repeat(0.0, modifiedDataset.Rows).ToList()); // empty column
399        }
400        int predIdx = 0;
401        foreach (var ep in episodes) {
402          for (int r = ep.Start; r < ep.End; r++) {
403            for (int latVarIdx = 0; latVarIdx < latentVariables.Length; latVarIdx++) {
404              modifiedDataset.SetVariableValue(latentValues[predIdx, latVarIdx], latentVariables[latVarIdx], r);
405            }
406            predIdx++;
407          }
408        }
409
410        problemData = new RegressionProblemData(modifiedDataset, problemData.AllowedInputVariables, problemData.TargetVariable);
411      }
412      // NOTE: the order of values in parameter matches prefix order of constant nodes in trees
413      for (int treeIdx = 0; treeIdx < targetTrees.Length; treeIdx++) {
414        var t = targetTrees[treeIdx];
415
416        // check if we need to change the problem data
417        var targetValuesDiff = new List<double>();
418
419        // TODO: smooth only once
420        foreach (var ep in episodes) {
421          var episodeRows = Enumerable.Range(ep.Start, ep.Size);
422          var targetValues = problemData.Dataset.GetDoubleValues(targetVars[treeIdx], episodeRows).ToArray();
423          targetValuesDiff.AddRange(CalculateDifferences(targetValues, numericDifferencesSmoothing));
424        }
425        var adjustedEpisodes = episodes.Select(ep => new IntRange(ep.Start, ep.End));
426
427        // data for input variables is assumed to be known
428        // input variables in pretuning are all target variables and all variable names that occur in the tree
429        var inputVariables = targetVars.Concat(t.IterateNodesPrefix().OfType<VariableTreeNode>().Select(n => n.VariableName)).Distinct();
430
431        var myState = new OptimizationData(new[] { t },
432          targetVars,
433          inputVariables.ToArray(),
434          problemData, new[] { targetValuesDiff.ToArray() }, adjustedEpisodes.ToArray(), -99, latentVariables, string.Empty); // TODO
435        var paramCount = myState.nodeValueLookup.ParameterCount;
436
437        optTheta = initialTheta[treeIdx];
438        if (initialTheta[treeIdx].Length > 0 && maxParameterOptIterations > -1) {
439          try {
440            alglib.minlmstate state;
441            alglib.minlmreport report;
442            var p = new double[initialTheta[treeIdx].Length];
443            var lowerBounds = Enumerable.Repeat(-1000.0, p.Length).ToArray();
444            var upperBounds = Enumerable.Repeat(1000.0, p.Length).ToArray();
445            Array.Copy(initialTheta[treeIdx], p, p.Length);
446            alglib.minlmcreatevj(targetValuesDiff.Count, p, out state);
447            alglib.minlmsetcond(state, 0.0, 0.0, 0.0, maxParameterOptIterations);
448            alglib.minlmsetbc(state, lowerBounds, upperBounds);
449#if DEBUG
450            alglib.minlmsetgradientcheck(state, 1.0e-7 );
451#endif
452            alglib.minlmoptimize(state, EvaluateObjectiveVector, EvaluateObjectiveVectorAndJacobian, null, myState);
453
454            alglib.minlmresults(state, out optTheta, out report);
455            if (report.terminationtype < 0) {
456#if DEBUG
457              if (report.terminationtype == -7) throw new InvalidProgramException("gradient calculation fail!");
458#endif
459              optTheta = initialTheta[treeIdx];
460            }
461          } catch (alglib.alglibexception) {
462            optTheta = initialTheta[treeIdx];
463          }
464        }
465        var tree_nmse = EvaluateMSE(optTheta, myState);
466        if (double.IsNaN(tree_nmse) || double.IsInfinity(tree_nmse) || tree_nmse > maxTreeNmse) {
467          nmse += maxTreeNmse;
468          thetas.AddRange(initialTheta[treeIdx]);
469        } else {
470          nmse += tree_nmse;
471          thetas.AddRange(optTheta);
472        }
473      } // foreach tree
474      optTheta = thetas.ToArray();
475
476      return nmse;
477    }
478
479
480
481    // similar to above but this time we integrate and optimize all parameters for all targets concurrently
482    private static double OptimizeParameters(ISymbolicExpressionTree[] trees, IRegressionProblemData problemData, string[] targetVars, string[] latentVariables,
483      IEnumerable<IntRange> episodes, int maxParameterOptIterations, double[] initialTheta, int numericIntegrationSteps, string odeSolver, out double[] optTheta) {
484      var rowsForDataExtraction = episodes.SelectMany(e => Enumerable.Range(e.Start, e.Size)).ToArray();
485      var targetValues = new double[targetVars.Length][];
486      for (int treeIdx = 0; treeIdx < targetVars.Length; treeIdx++) {
487        var t = trees[treeIdx];
488
489        targetValues[treeIdx] = problemData.Dataset.GetDoubleValues(targetVars[treeIdx], rowsForDataExtraction).ToArray();
490      }
491
492      // data for input variables is assumed to be known
493      // input variables are all variable names that occur in the trees except for target variables (we assume that trees have been generated correctly)
494      var inputVariables = trees.SelectMany(t => t.IterateNodesPrefix().OfType<VariableTreeNode>().Select(n => n.VariableName))
495        .Except(targetVars)
496        .Except(latentVariables)
497        .Distinct();
498
499      var myState = new OptimizationData(trees, targetVars, inputVariables.ToArray(), problemData, targetValues, episodes.ToArray(), numericIntegrationSteps, latentVariables, odeSolver);
500      optTheta = initialTheta;
501
502      if (initialTheta.Length > 0 && maxParameterOptIterations > -1) {
503        var lowerBounds = Enumerable.Repeat(-1000.0, initialTheta.Length).ToArray();
504        var upperBounds = Enumerable.Repeat(1000.0, initialTheta.Length).ToArray();
505        try {
506          alglib.minlmstate state;
507          alglib.minlmreport report;
508          alglib.minlmcreatevj(rowsForDataExtraction.Length * trees.Length, initialTheta, out state);
509          alglib.minlmsetbc(state, lowerBounds, upperBounds);
510          alglib.minlmsetcond(state, 0.0, 0.0, 0.0, maxParameterOptIterations);
511#if DEBUG         
512          //alglib.minlmsetgradientcheck(state, 1.0e-7);
513#endif
514          alglib.minlmoptimize(state, IntegrateAndEvaluateObjectiveVector, IntegrateAndEvaluateObjectiveVectorAndJacobian, null, myState);
515
516          alglib.minlmresults(state, out optTheta, out report);
517
518          if (report.terminationtype < 0) {
519#if DEBUG
520            if (report.terminationtype == -7) throw new InvalidProgramException("gradient calculation fail!");
521#endif            // there was a problem: reset theta and evaluate for inital values
522            optTheta = initialTheta;
523          }
524        } catch (alglib.alglibexception) {
525          optTheta = initialTheta;
526        }
527      }
528      var nmse = EvaluateIntegratedMSE(optTheta, myState);
529      var maxNmse = 100 * targetValues.Length * rowsForDataExtraction.Length;
530      if (double.IsNaN(nmse) || double.IsInfinity(nmse) || nmse > maxNmse) nmse = maxNmse;
531      return nmse;
532    }
533
534
535    // helper
536    public static double EvaluateMSE(double[] x, OptimizationData optimizationData) {
537      var fi = new double[optimizationData.rows.Count()];
538      EvaluateObjectiveVector(x, fi, optimizationData);
539      return fi.Sum(fii => fii * fii) / fi.Length;
540    }
541    public static void EvaluateObjectiveVector(double[] x, double[] fi, object optimizationData) { EvaluateObjectiveVector(x, fi, (OptimizationData)optimizationData); } // for alglib
542    public static void EvaluateObjectiveVector(double[] x, double[] fi, OptimizationData optimizationData) {
543      var rows = optimizationData.rows;
544      var problemData = optimizationData.problemData;
545      var nodeValueLookup = optimizationData.nodeValueLookup;
546      var ds = problemData.Dataset;
547      var variables = optimizationData.variables;
548
549      nodeValueLookup.UpdateParamValues(x);
550
551      int outputIdx = 0;
552      for (int trainIdx = 0; trainIdx < rows.Length; trainIdx++) {
553        // update variable values
554        foreach (var variable in variables) {
555          // in this problem we also allow fixed numeric parameters (represented as variables with the value as name)
556          // if (double.TryParse(variable, NumberStyles.Float, CultureInfo.InvariantCulture, out double value)) {
557          //   nodeValueLookup.SetVariableValue(variable, value); // TODO: Perf we don't need to set this for each index
558          // } else {
559          nodeValueLookup.SetVariableValue(variable, ds.GetDoubleValue(variable, rows[trainIdx])); // TODO: perf
560          // }
561        }
562        // interpret all trees
563        for (int treeIdx = 0; treeIdx < optimizationData.trees.Length; treeIdx++) {
564          var tree = optimizationData.trees[treeIdx];
565          var pred = InterpretRec(tree.Root.GetSubtree(0).GetSubtree(0), nodeValueLookup);
566          var y = optimizationData.targetValues[treeIdx][trainIdx];
567          fi[outputIdx++] = (y - pred) * optimizationData.inverseStandardDeviation[treeIdx];
568        }
569      }
570    }
571
572    public static void EvaluateObjectiveVectorAndJacobian(double[] x, double[] fi, double[,] jac, object optimizationData) { EvaluateObjectiveVectorAndJacobian(x, fi, jac, (OptimizationData)optimizationData); } // for alglib
573    public static void EvaluateObjectiveVectorAndJacobian(double[] x, double[] fi, double[,] jac, OptimizationData optimizationData) {
574      // extract variable values from dataset
575      var variableValues = new Dictionary<string, Tuple<double, Vector>>();
576      var problemData = optimizationData.problemData;
577      var ds = problemData.Dataset;
578      var rows = optimizationData.rows;
579      var variables = optimizationData.variables;
580
581      var nodeValueLookup = optimizationData.nodeValueLookup;
582      nodeValueLookup.UpdateParamValues(x);
583
584      int termIdx = 0;
585
586      for (int trainIdx = 0; trainIdx < rows.Length; trainIdx++) {
587        // update variable values
588        foreach (var variable in variables) {
589          // in this problem we also allow fixed numeric parameters (represented as variables with the value as name)
590          // if (double.TryParse(variable, NumberStyles.Float, CultureInfo.InvariantCulture, out double value)) {
591          //   nodeValueLookup.SetVariableValue(variable, value); // TODO: Perf we don't need to set this for each index
592          // } else {
593          nodeValueLookup.SetVariableValue(variable, ds.GetDoubleValue(variable, rows[trainIdx])); // TODO: perf
594          // }
595        }
596
597        var calculatedVariables = optimizationData.targetVariables;
598
599        var trees = optimizationData.trees;
600        for (int i = 0; i < trees.Length; i++) {
601          var tree = trees[i];
602          var targetVarName = calculatedVariables[i];
603
604          double f; Vector g;
605          InterpretRec(tree.Root.GetSubtree(0).GetSubtree(0), nodeValueLookup, out f, out g);
606
607          var y = optimizationData.targetValues[i][trainIdx];
608          fi[termIdx] = (y - f) * optimizationData.inverseStandardDeviation[i]; // scale of NMSE
609          if (jac != null && g != Vector.Zero) for (int j = 0; j < g.Length; j++) jac[termIdx, j] = -g[j] * optimizationData.inverseStandardDeviation[i];
610
611          termIdx++;
612        }
613      }
614
615    }
616
617    // helper
618    public static double EvaluateIntegratedMSE(double[] x, OptimizationData optimizationData) {
619      var fi = new double[optimizationData.rows.Count() * optimizationData.targetVariables.Length];
620      IntegrateAndEvaluateObjectiveVector(x, fi, optimizationData);
621      return fi.Sum(fii => fii * fii) / fi.Length;
622    }
623    public static void IntegrateAndEvaluateObjectiveVector(double[] x, double[] fi, object optimizationData) { IntegrateAndEvaluateObjectiveVector(x, fi, (OptimizationData)optimizationData); } // for alglib
624    public static void IntegrateAndEvaluateObjectiveVector(double[] x, double[] fi, OptimizationData optimizationData) {
625      IntegrateAndEvaluateObjectiveVectorAndJacobian(x, fi, null, optimizationData);
626    }
627
628    public static void IntegrateAndEvaluateObjectiveVectorAndJacobian(double[] x, double[] fi, double[,] jac, object optimizationData) { IntegrateAndEvaluateObjectiveVectorAndJacobian(x, fi, jac, (OptimizationData)optimizationData); } // for alglib
629    public static void IntegrateAndEvaluateObjectiveVectorAndJacobian(double[] x, double[] fi, double[,] jac, OptimizationData optimizationData) {
630      var rows = optimizationData.rows.ToArray();
631      var problemData = optimizationData.problemData;
632      var nodeValueLookup = optimizationData.nodeValueLookup;
633      var ds = problemData.Dataset;
634      int outputIdx = 0;
635
636      nodeValueLookup.UpdateParamValues(x);
637
638      Integrate(optimizationData, fi, jac, null);
639      var trees = optimizationData.trees;
640
641      // update result with error
642      for (int trainIdx = 0; trainIdx < rows.Length; trainIdx++) {
643        for (int i = 0; i < optimizationData.targetVariables.Length; i++) {
644          var tree = trees[i];
645          var y = optimizationData.targetValues[i][trainIdx];
646          fi[outputIdx] = (y - fi[outputIdx]) * optimizationData.inverseStandardDeviation[i];  // scale for normalized squared error
647          if (jac != null) for (int j = 0; j < x.Length; j++) jac[outputIdx, j] = -jac[outputIdx, j] * optimizationData.inverseStandardDeviation[i];
648          outputIdx++;
649        }
650      }
651    }
652
653    public override void Analyze(Individual[] individuals, double[] qualities, ResultCollection results, IRandom random) {
654      base.Analyze(individuals, qualities, results, random);
655
656      if (!results.ContainsKey("Prediction (training)")) {
657        results.Add(new Result("Prediction (training)", typeof(ReadOnlyItemList<DataTable>)));
658      }
659      if (!results.ContainsKey("Prediction (test)")) {
660        results.Add(new Result("Prediction (test)", typeof(ReadOnlyItemList<DataTable>)));
661      }
662      if (!results.ContainsKey("Models")) {
663        results.Add(new Result("Models", typeof(VariableCollection)));
664      }
665      if (!results.ContainsKey("SNMSE")) {
666        results.Add(new Result("SNMSE", typeof(DoubleValue)));
667      }
668      if (!results.ContainsKey("SNMSE values")) {
669        var dt = new DataTable("SNMSE values");
670        dt.Rows.Add(new DataRow("ODE SNMSE"));
671        dt.Rows.Add(new DataRow("Fitness"));
672        results.Add(new Result("SNMSE values", dt));
673      }
674      if (!results.ContainsKey("Solution")) {
675        results.Add(new Result("Solution", typeof(Solution)));
676      }
677
678
679      // when no training episodes are specified then we implicitly use the training parition from the problemData
680      var trainingEpisodes = TrainingEpisodes;
681      if (!trainingEpisodes.Any()) {
682        trainingEpisodes = new List<IntRange>();
683        trainingEpisodes.Add((IntRange)ProblemData.TrainingPartition.Clone());
684      }
685
686      var bestIndividualAndQuality = this.GetBestIndividual(individuals, qualities);
687      var trees = bestIndividualAndQuality.Item1.Values.Select(v => v.Value)
688        .OfType<ISymbolicExpressionTree>().ToArray(); // extract all trees from individual
689
690      results["SNMSE"].Value = new DoubleValue(bestIndividualAndQuality.Item2);
691
692      var problemData = ProblemData;
693      var targetVars = TargetVariables.CheckedItems.OrderBy(i => i.Index).Select(i => i.Value.Value).ToArray();
694      var latentVariables = Enumerable.Range(1, NumberOfLatentVariables).Select(i => "λ" + i).ToArray(); // TODO: must coincide with the variables which are actually defined in the grammar and also for which we actually have trees
695
696      var trainingList = new ItemList<DataTable>();
697
698      if (OptimizeParametersForEpisodes) {
699        throw new NotSupportedException();
700        var eIdx = 0;
701        var trainingPredictions = new List<Tuple<double, Vector>[][]>();
702        foreach (var episode in TrainingEpisodes) {
703          var episodes = new[] { episode };
704          var optimizationData = new OptimizationData(trees, targetVars, problemData.AllowedInputVariables.ToArray(), problemData, null, episodes, NumericIntegrationSteps, latentVariables, OdeSolver);
705          var trainingPrediction = Integrate(optimizationData).ToArray();
706          trainingPredictions.Add(trainingPrediction);
707          eIdx++;
708        }
709
710        // only for target values
711        var trainingRows = TrainingEpisodes.SelectMany(e => Enumerable.Range(e.Start, e.End - e.Start));
712        for (int colIdx = 0; colIdx < targetVars.Length; colIdx++) {
713          var targetVar = targetVars[colIdx];
714          var trainingDataTable = new DataTable(targetVar + " prediction (training)");
715          var actualValuesRow = new DataRow(targetVar, "The values of " + targetVar, problemData.Dataset.GetDoubleValues(targetVar, trainingRows));
716          var predictedValuesRow = new DataRow(targetVar + " pred.", "Predicted values for " + targetVar, trainingPredictions.SelectMany(arr => arr.Select(row => row[colIdx].Item1)).ToArray());
717          trainingDataTable.Rows.Add(actualValuesRow);
718          trainingDataTable.Rows.Add(predictedValuesRow);
719          trainingList.Add(trainingDataTable);
720        }
721        results["Prediction (training)"].Value = trainingList.AsReadOnly();
722
723
724        var models = new VariableCollection();
725
726        foreach (var tup in targetVars.Zip(trees, Tuple.Create)) {
727          var targetVarName = tup.Item1;
728          var tree = tup.Item2;
729
730          var origTreeVar = new HeuristicLab.Core.Variable(targetVarName + "(original)");
731          origTreeVar.Value = (ISymbolicExpressionTree)tree.Clone();
732          models.Add(origTreeVar);
733        }
734        results["Models"].Value = models;
735      } else {
736        // data for input variables is assumed to be known
737        // input variables are all variable names that occur in the trees except for target variables (we assume that trees have been generated correctly)
738        var inputVariables = trees
739          .SelectMany(t => t.IterateNodesPrefix().OfType<VariableTreeNode>().Select(n => n.VariableName))
740          .Except(targetVars)
741          .Except(latentVariables)
742          .Distinct();
743
744        var optimizationData = new OptimizationData(trees, targetVars, inputVariables.ToArray(), problemData, null, trainingEpisodes.ToArray(), NumericIntegrationSteps, latentVariables, OdeSolver);
745        var numParams = optimizationData.nodeValueLookup.ParameterCount;
746
747        var fi = new double[optimizationData.rows.Length * targetVars.Length];
748        var jac = new double[optimizationData.rows.Length * targetVars.Length, numParams];
749        var latentValues = new double[optimizationData.rows.Length, latentVariables.Length];
750        Integrate(optimizationData, fi, jac, latentValues);
751
752
753        // for target values and latent variables
754        var trainingRows = optimizationData.rows;
755        double trainingSNMSE = 0.0;
756        for (int colIdx = 0; colIdx < trees.Length; colIdx++) {
757          // is target variable
758          if (colIdx < targetVars.Length) {
759            var targetVar = targetVars[colIdx];
760            var trainingDataTable = new DataTable(targetVar + " prediction (training)");
761            var actualValuesRow = new DataRow(targetVar, "The values of " + targetVar, problemData.Dataset.GetDoubleValues(targetVar, trainingRows));
762            var idx = Enumerable.Range(0, trainingRows.Length).Select(i => i * targetVars.Length + colIdx);
763            var pred = idx.Select(i => fi[i]);
764            var predictedValuesRow = new DataRow(targetVar + " pred.", "Predicted values for " + targetVar, pred.ToArray());
765            trainingDataTable.Rows.Add(actualValuesRow);
766            trainingDataTable.Rows.Add(predictedValuesRow);
767
768            // again calculate the integrated error (regardless how fitness is determined)
769            trainingSNMSE += actualValuesRow.Values.Zip(predictedValuesRow.Values, (a, p) => Math.Pow(a - p, 2)).Average() / actualValuesRow.Values.Variance() / targetVars.Length;
770
771            for (int paramIdx = 0; paramIdx < numParams; paramIdx++) {
772              var paramSensitivityRow = new DataRow($"∂{targetVar}/∂θ{paramIdx}", $"Sensitivities of parameter {paramIdx}", idx.Select(i => jac[i, paramIdx]).ToArray());
773              paramSensitivityRow.VisualProperties.SecondYAxis = true;
774              trainingDataTable.Rows.Add(paramSensitivityRow);
775            }
776            trainingList.Add(trainingDataTable);
777          } else {
778            var latentVar = latentVariables[colIdx - targetVars.Length];
779            var trainingDataTable = new DataTable(latentVar + " prediction (training)");
780            var idx = Enumerable.Range(0, trainingRows.Length);
781            var pred = idx.Select(i => latentValues[i, colIdx - targetVars.Length]);
782            var predictedValuesRow = new DataRow(latentVar + " pred.", "Predicted values for " + latentVar, pred.ToArray());
783            var emptyRow = new DataRow(latentVar);
784            trainingDataTable.Rows.Add(emptyRow);
785            trainingDataTable.Rows.Add(predictedValuesRow);
786            trainingList.Add(trainingDataTable);
787          }
788        }
789
790        results.AddOrUpdateResult("ODE SNMSE", new DoubleValue(trainingSNMSE));
791        var odeSNMSETable = (DataTable)results["SNMSE values"].Value;
792        odeSNMSETable.Rows["ODE SNMSE"].Values.Add(trainingSNMSE);
793        odeSNMSETable.Rows["Fitness"].Values.Add(bestIndividualAndQuality.Item2);
794        results["Prediction (training)"].Value = trainingList.AsReadOnly();
795
796        // var errorTable = new DataTable("Squared error and gradient");
797        // var seRow = new DataRow("Squared error");
798        // var gradientRows = Enumerable.Range(0, numParams).Select(i => new DataRow($"∂SE/∂θ{i}")).ToArray();
799        // errorTable.Rows.Add(seRow);
800        // foreach (var gRow in gradientRows) {
801        //   gRow.VisualProperties.SecondYAxis = true;
802        //   errorTable.Rows.Add(gRow);
803        // }
804        // var targetValues = targetVars.Select(v => problemData.Dataset.GetDoubleValues(v, trainingRows).ToArray()).ToArray();
805        // int r = 0;
806
807        // foreach (var y_pred in fi) {
808        //   // calculate objective function gradient
809        //   double f_i = 0.0;
810        //   Vector g_i = Vector.CreateNew(new double[numParams]);
811        //   for (int colIdx = 0; colIdx < targetVars.Length; colIdx++) {
812        //     var y_pred_f = y_pred[colIdx].Item1;
813        //     var y = targetValues[colIdx][r];
814        //
815        //     var res = (y - y_pred_f) * optimizationData.inverseStandardDeviation[colIdx];
816        //     var ressq = res * res;
817        //     f_i += ressq;
818        //     g_i.Add(y_pred[colIdx].Item2.Scale(-2.0 * res));
819        //   }
820        //   seRow.Values.Add(f_i);
821        //   for (int j = 0; j < g_i.Length; j++) gradientRows[j].Values.Add(g_i[j]);
822        //   r++;
823        // }
824        // results["Squared error and gradient"].Value = errorTable;
825
826        // only if there is a non-empty test partition
827        if (ProblemData.TestIndices.Any()) {
828          // TODO: DRY for training and test
829
830          var testList = new ItemList<DataTable>();
831          var testRows = ProblemData.TestIndices.ToArray();
832          var testOptimizationData = new OptimizationData(trees, targetVars, problemData.AllowedInputVariables.ToArray(), problemData, null, new IntRange[] { ProblemData.TestPartition }, NumericIntegrationSteps, latentVariables, OdeSolver);
833          var testPrediction = Integrate(testOptimizationData).ToArray();
834
835          for (int colIdx = 0; colIdx < trees.Length; colIdx++) {
836            // is target variable
837            if (colIdx < targetVars.Length) {
838              var targetVar = targetVars[colIdx];
839              var testDataTable = new DataTable(targetVar + " prediction (test)");
840              var actualValuesRow = new DataRow(targetVar, "The values of " + targetVar, problemData.Dataset.GetDoubleValues(targetVar, testRows));
841              var predictedValuesRow = new DataRow(targetVar + " pred.", "Predicted values for " + targetVar, testPrediction.Select(arr => arr[colIdx].Item1).ToArray());
842              testDataTable.Rows.Add(actualValuesRow);
843              testDataTable.Rows.Add(predictedValuesRow);
844              testList.Add(testDataTable);
845
846            } else {
847              // var latentVar = latentVariables[colIdx - targetVars.Length];
848              // var testDataTable = new DataTable(latentVar + " prediction (test)");
849              // var predictedValuesRow = new DataRow(latentVar + " pred.", "Predicted values for " + latentVar, testPrediction.Select(arr => arr[colIdx].Item1).ToArray());
850              // var emptyRow = new DataRow(latentVar);
851              // testDataTable.Rows.Add(emptyRow);
852              // testDataTable.Rows.Add(predictedValuesRow);
853              // testList.Add(testDataTable);
854            }
855          }
856
857          results["Prediction (test)"].Value = testList.AsReadOnly();
858
859        }
860
861        #region simplification of models
862        // TODO the dependency of HeuristicLab.Problems.DataAnalysis.Symbolic is not ideal
863        var models = new VariableCollection();    // to store target var names and original version of tree
864
865        var clonedTrees = new List<ISymbolicExpressionTree>();
866        for (int idx = 0; idx < trees.Length; idx++) {
867          clonedTrees.Add((ISymbolicExpressionTree)trees[idx].Clone());
868        }
869        var ds = problemData.Dataset;
870        var newProblemData = new RegressionProblemData((IDataset)ds.Clone(), problemData.AllowedInputVariables, problemData.TargetVariable);
871        results["Solution"].Value = new Solution(clonedTrees.ToArray(),
872                   // optTheta,
873                   newProblemData,
874                   targetVars,
875                   latentVariables,
876                   trainingEpisodes,
877                   OdeSolver,
878                   NumericIntegrationSteps);
879
880
881        for (int idx = 0; idx < trees.Length; idx++) {
882          var varName = string.Empty;
883          if (idx < targetVars.Length) {
884            varName = targetVars[idx];
885          } else {
886            varName = latentVariables[idx - targetVars.Length];
887          }
888          var tree = trees[idx];
889
890          var origTreeVar = new HeuristicLab.Core.Variable(varName + "(original)");
891          origTreeVar.Value = (ISymbolicExpressionTree)tree.Clone();
892          models.Add(origTreeVar);
893          var simplifiedTreeVar = new HeuristicLab.Core.Variable(varName + "(simplified)");
894          simplifiedTreeVar.Value = TreeSimplifier.Simplify(tree);
895          models.Add(simplifiedTreeVar);
896        }
897
898        results["Models"].Value = models;
899        #endregion
900
901        #region produce classical solutions to allow visualization with PDP
902        for (int treeIdx = 0; treeIdx < targetVars.Length; treeIdx++) {
903          var t = (ISymbolicExpressionTree)trees[treeIdx].Clone();
904          var name = targetVars.Concat(latentVariables).ElementAt(treeIdx); // whatever
905          var model = new SymbolicRegressionModel(name + "_diff", t, new SymbolicDataAnalysisExpressionTreeLinearInterpreter());
906          var solutionDataset = ((Dataset)problemData.Dataset).ToModifiable();
907          solutionDataset.Name = ((Dataset)problemData.Dataset).Name;
908          solutionDataset.Description = ((Dataset)problemData.Dataset).Description;
909
910          var absValues = solutionDataset.GetDoubleValues(name).ToArray();
911
912          var diffValues = new double[absValues.Length];
913          foreach (var ep in TrainingEpisodes.Concat(TestEpisodes)) {
914            var y = solutionDataset.GetDoubleValues(name, Enumerable.Range(ep.Start, ep.End - ep.Start)).ToArray();
915            var yd = CalculateDifferences(y, NumericDifferencesSmoothing).ToArray();
916            for (int r = ep.Start; r < ep.End; r++) {
917              diffValues[r] = yd[r - ep.Start];
918            }
919          }
920
921          solutionDataset.AddVariable(name + "_diff", diffValues);
922          var solutionProblemData = new RegressionProblemData(solutionDataset, problemData.AllowedInputVariables, name + "_diff");
923          solutionProblemData.Name = problemData.Name;
924          solutionProblemData.Description = problemData.Description;
925
926          solutionProblemData.TrainingPartition.Start = TrainingEpisodes.Select(ep => ep.Start).Min();
927          solutionProblemData.TrainingPartition.End = TrainingEpisodes.Select(ep => ep.End).Max(); // assumes training episodes are sequential without gaps
928          if (TestEpisodes.Any()) {
929            solutionProblemData.TestPartition.Start = TestEpisodes.Select(ep => ep.Start).Min();
930            solutionProblemData.TestPartition.End = TestEpisodes.Select(ep => ep.End).Max();
931          } else {
932            solutionProblemData.TestPartition.Start = problemData.TestPartition.Start;
933            solutionProblemData.TestPartition.End = problemData.TestPartition.End;
934          }
935          var solution = model.CreateRegressionSolution(solutionProblemData);
936          results.AddOrUpdateResult("Solution " + name, solution);
937        }
938        #endregion
939      }
940    }
941
942    #region interpretation
943
944    // the following uses auto-diff to calculate the gradient w.r.t. the parameters forward in time.
945    // this is basically the method described in Gronwall T. Note on the derivatives with respect to a parameter of the solutions of a system of differential equations. Ann. Math. 1919;20:292–296.
946
947    // a comparison of three potential calculation methods for the gradient is given in:
948    // Sengupta, B., Friston, K. J., & Penny, W. D. (2014). Efficient gradient computation for dynamical models. Neuroimage, 98(100), 521–527. http://doi.org/10.1016/j.neuroimage.2014.04.040
949    // "Our comparison establishes that the adjoint method is computationally more efficient for numerical estimation of parametric gradients
950    // for state-space models — both linear and non-linear, as in the case of a dynamical causal model (DCM)"
951
952    // for a solver with the necessary features see: https://computation.llnl.gov/projects/sundials/cvodes
953
954    public static IEnumerable<Tuple<double, Vector>[]> Integrate(OptimizationData optimizationData) {
955      var nTargets = optimizationData.targetVariables.Length;
956      var n = optimizationData.rows.Length * optimizationData.targetVariables.Length;
957      var d = optimizationData.nodeValueLookup.ParameterCount;
958      double[] fi = new double[n];
959      double[,] jac = new double[n, d];
960      Integrate(optimizationData, fi, jac, null);
961      for (int i = 0; i < optimizationData.rows.Length; i++) {
962        var res = new Tuple<double, Vector>[nTargets];
963        for (int j = 0; j < nTargets; j++) {
964          res[j] = Tuple.Create(fi[i * nTargets + j], Vector.CreateFromMatrixRow(jac, i * nTargets + j));
965        }
966        yield return res;
967      }
968    }
969
970    public static void Integrate(OptimizationData optimizationData, double[] fi, double[,] jac, double[,] latentValues) {
971      var trees = optimizationData.trees;
972      var dataset = optimizationData.problemData.Dataset;
973      var inputVariables = optimizationData.variables;
974      var targetVariables = optimizationData.targetVariables;
975      var latentVariables = optimizationData.latentVariables;
976      var episodes = optimizationData.episodes;
977      var odeSolver = optimizationData.odeSolver;
978      var numericIntegrationSteps = optimizationData.numericIntegrationSteps;
979      var calculatedVariables = targetVariables.Concat(latentVariables).ToArray(); // TODO: must conincide with the order of trees in the encoding
980
981
982
983      var nodeValues = optimizationData.nodeValueLookup;
984
985      // TODO: numericIntegrationSteps is only relevant for the HeuristicLab solver
986      var outputRowIdx = 0;
987      var episodeIdx = 0;
988      foreach (var episode in optimizationData.episodes) {
989        var rows = Enumerable.Range(episode.Start, episode.End - episode.Start).ToArray();
990
991        var t0 = rows.First();
992
993        // initialize values for inputs and targets from dataset
994        foreach (var varName in inputVariables) {
995          // in this problem we also allow fixed numeric parameters (represented as variables with the value as name)
996          // if (double.TryParse(varName, NumberStyles.Float, CultureInfo.InvariantCulture, out double value)) {
997          //   nodeValues.SetVariableValue(varName, value, Vector.Zero);
998          // } else {
999          var y0 = dataset.GetDoubleValue(varName, t0);
1000          nodeValues.SetVariableValue(varName, y0, Vector.Zero);
1001          //}
1002        }
1003        foreach (var varName in targetVariables) {
1004          var y0 = dataset.GetDoubleValue(varName, t0);
1005          nodeValues.SetVariableValue(varName, y0, Vector.Zero);
1006
1007          // output starting value
1008          fi[outputRowIdx] = y0;
1009          Vector.Zero.CopyTo(jac, outputRowIdx);
1010
1011          outputRowIdx++;
1012        }
1013
1014        var latentValueRowIdx = 0;
1015        var latentValueColIdx = 0;
1016        foreach (var varName in latentVariables) {
1017          var y0 = 0.0; // assume we start at zero
1018          nodeValues.SetVariableValue(varName, y0, Vector.Zero);
1019
1020          if (latentValues != null) {
1021            latentValues[latentValueRowIdx, latentValueColIdx++] = y0;
1022          }
1023        }
1024        latentValueColIdx = 0; latentValueRowIdx++;
1025
1026        { // CODE BELOW DOESN'T WORK ANYMORE
1027          // if (latentVariables.Length > 0) throw new NotImplementedException();
1028          //
1029          // // add value entries for latent variables which are also integrated
1030          // // initial values are at the end of the parameter vector
1031          // // separate initial values for each episode
1032          // var initialValueIdx = parameterValues.Length - episodes.Count() * latentVariables.Length + episodeIdx * latentVariables.Length;
1033          // foreach (var latentVar in latentVariables) {
1034          //   var arr = new double[parameterValues.Length]; // backing array
1035          //   arr[initialValueIdx] = 1.0;
1036          //   var g = new Vector(arr);
1037          //   nodeValues.SetVariableValue(latentVar, parameterValues[initialValueIdx], g); // we don't have observations for latent variables therefore we optimize the initial value for each episode
1038          //   initialValueIdx++;
1039          // }
1040        }
1041
1042        var prevT = t0; // TODO: here we should use a variable for t if it is available. Right now we assume equidistant measurements.
1043        if (odeSolver == "CVODES (full)") {
1044          IntegrateCVODES(trees, calculatedVariables, nodeValues, rows, fi, jac);
1045        } else {
1046
1047          foreach (var t in rows.Skip(1)) {
1048            if (odeSolver == "HeuristicLab")
1049              IntegrateHL(trees, calculatedVariables, nodeValues, numericIntegrationSteps); // integrator updates nodeValues
1050            else if (odeSolver == "CVODES")
1051              IntegrateCVODES(trees, calculatedVariables, nodeValues);
1052            else throw new InvalidOperationException("Unknown ODE solver " + odeSolver);
1053            prevT = t;
1054
1055            // update output for target variables (TODO: if we want to visualize the latent variables then we need to provide a separate output)
1056            for (int i = 0; i < targetVariables.Length; i++) {
1057              var targetVar = targetVariables[i];
1058              var yt = nodeValues.GetVariableValue(targetVar);
1059
1060              // fill up remaining rows with last valid value if there are invalid values
1061              if (double.IsNaN(yt.Item1) || double.IsInfinity(yt.Item1)) {
1062                for (; outputRowIdx < fi.Length; outputRowIdx++) {
1063                  var prevIdx = outputRowIdx - targetVariables.Length;
1064                  fi[outputRowIdx] = fi[prevIdx]; // current <- prev
1065                  if (jac != null) for (int j = 0; j < jac.GetLength(1); j++) jac[outputRowIdx, j] = jac[prevIdx, j];
1066                }
1067                return;
1068              };
1069
1070              fi[outputRowIdx] = yt.Item1;
1071              var g = yt.Item2;
1072              g.CopyTo(jac, outputRowIdx);
1073              outputRowIdx++;
1074            }
1075            if (latentValues != null) {
1076              foreach (var latentVariable in latentVariables) {
1077                var lt = nodeValues.GetVariableValue(latentVariable).Item1;
1078                latentValues[latentValueRowIdx, latentValueColIdx++] = lt;
1079              }
1080              latentValueRowIdx++; latentValueColIdx = 0;
1081            }
1082
1083            // update for next time step (only the inputs)
1084            foreach (var varName in inputVariables) {
1085              // in this problem we also allow fixed numeric parameters (represented as variables with the value as name)
1086              // if (double.TryParse(varName, NumberStyles.Float, CultureInfo.InvariantCulture, out double value)) {
1087              //   // value is unchanged
1088              // } else {
1089              nodeValues.SetVariableValue(varName, dataset.GetDoubleValue(varName, t), Vector.Zero);
1090              // }
1091            }
1092          }
1093        }
1094        episodeIdx++;
1095      }
1096    }
1097
1098    #region CVODES
1099
1100
1101    /// <summary>
1102    ///  Here we use CVODES to solve the ODE. Forward sensitivities are used to calculate the gradient for parameter optimization
1103    /// </summary>
1104    /// <param name="trees">Each equation in the ODE represented as a tree</param>
1105    /// <param name="calculatedVariables">The names of the calculated variables</param>
1106    /// <param name="t">The time t up to which we need to integrate.</param>
1107    private static void IntegrateCVODES(
1108      ISymbolicExpressionTree[] trees, // f(y,p) in tree representation
1109      string[] calculatedVariables, // names of elements of y
1110      NodeValueLookup nodeValues
1111      ) {
1112
1113      // the RHS of the ODE
1114      // dy/dt = f(y_t,x_t,p)
1115      CVODES.CVRhsFunc f = CreateOdeRhs(trees, calculatedVariables, nodeValues);
1116      // the Jacobian ∂f/∂y
1117      CVODES.CVDlsJacFunc jac = CreateJac(trees, calculatedVariables, nodeValues);
1118
1119      // the RHS for the forward sensitivities (∂f/∂y)s_i(t) + ∂f/∂p_i
1120      CVODES.CVSensRhsFn sensF = CreateSensitivityRhs(trees, calculatedVariables, nodeValues);
1121
1122      // setup solver
1123      int numberOfEquations = trees.Length;
1124      IntPtr y = IntPtr.Zero;
1125      IntPtr cvode_mem = IntPtr.Zero;
1126      IntPtr A = IntPtr.Zero;
1127      IntPtr yS0 = IntPtr.Zero;
1128      IntPtr linearSolver = IntPtr.Zero;
1129      var ns = nodeValues.ParameterCount; // number of parameters
1130
1131      try {
1132        y = CVODES.N_VNew_Serial(numberOfEquations);
1133        // init y to current values of variables
1134        // y must be initialized before calling CVodeInit
1135        for (int i = 0; i < calculatedVariables.Length; i++) {
1136          CVODES.NV_Set_Ith_S(y, i, nodeValues.GetVariableValue(calculatedVariables[i]).Item1);
1137        }
1138
1139        cvode_mem = CVODES.CVodeCreate(CVODES.MultistepMethod.CV_ADAMS, CVODES.NonlinearSolverIteration.CV_FUNCTIONAL);
1140
1141        var flag = CVODES.CVodeInit(cvode_mem, f, 0.0, y);
1142        Assert(CVODES.CV_SUCCESS == flag);
1143
1144        flag = CVODES.CVodeSetErrHandlerFn(cvode_mem, errorFunction, IntPtr.Zero);
1145        Assert(CVODES.CV_SUCCESS == flag);
1146
1147
1148        double relTol = 1.0e-2;
1149        double absTol = 1.0;
1150        flag = CVODES.CVodeSStolerances(cvode_mem, relTol, absTol);  // TODO: probably need to adjust absTol per variable
1151        Assert(CVODES.CV_SUCCESS == flag);
1152
1153        A = CVODES.SUNDenseMatrix(numberOfEquations, numberOfEquations);
1154        Assert(A != IntPtr.Zero);
1155
1156        linearSolver = CVODES.SUNDenseLinearSolver(y, A);
1157        Assert(linearSolver != IntPtr.Zero);
1158
1159        flag = CVODES.CVDlsSetLinearSolver(cvode_mem, linearSolver, A);
1160        Assert(CVODES.CV_SUCCESS == flag);
1161
1162        flag = CVODES.CVDlsSetJacFn(cvode_mem, jac);
1163        Assert(CVODES.CV_SUCCESS == flag);
1164
1165        yS0 = CVODES.N_VCloneVectorArray_Serial(ns, y); // clone the output vector for each parameter
1166        unsafe {
1167          // set to initial sensitivities supplied by caller
1168          for (int pIdx = 0; pIdx < ns; pIdx++) {
1169            var yS0_i = *((IntPtr*)yS0.ToPointer() + pIdx);
1170            for (var varIdx = 0; varIdx < calculatedVariables.Length; varIdx++) {
1171              CVODES.NV_Set_Ith_S(yS0_i, varIdx, nodeValues.GetVariableValue(calculatedVariables[varIdx]).Item2[pIdx]); // TODO: perf
1172            }
1173          }
1174        }
1175
1176        flag = CVODES.CVodeSensInit(cvode_mem, ns, CVODES.CV_SIMULTANEOUS, sensF, yS0);
1177        Assert(CVODES.CV_SUCCESS == flag);
1178
1179        flag = CVODES.CVodeSensEEtolerances(cvode_mem);
1180        Assert(CVODES.CV_SUCCESS == flag);
1181
1182        // make one forward integration step
1183        double tout = 0.0; // first output time
1184        flag = CVODES.CVode(cvode_mem, 1.0, y, ref tout, CVODES.CV_NORMAL);
1185        if (flag == CVODES.CV_SUCCESS) {
1186          Assert(1.0 == tout);
1187
1188          // get sensitivities
1189          flag = CVODES.CVodeGetSens(cvode_mem, ref tout, yS0);
1190          Assert(CVODES.CV_SUCCESS == flag);
1191
1192          // update variableValues based on integration results
1193          for (int varIdx = 0; varIdx < calculatedVariables.Length; varIdx++) {
1194            var yi = CVODES.NV_Get_Ith_S(y, varIdx);
1195            var gArr = new double[ns];
1196            for (var pIdx = 0; pIdx < ns; pIdx++) {
1197              unsafe {
1198                var yS0_pi = *((IntPtr*)yS0.ToPointer() + pIdx);
1199                gArr[pIdx] = CVODES.NV_Get_Ith_S(yS0_pi, varIdx);
1200              }
1201            }
1202            nodeValues.SetVariableValue(calculatedVariables[varIdx], yi, new Vector(gArr));
1203          }
1204        } else {
1205          throw new InvalidOperationException();
1206        }
1207
1208        // cleanup all allocated objects
1209      } finally {
1210        if (y != IntPtr.Zero) CVODES.N_VDestroy_Serial(y);
1211        if (cvode_mem != IntPtr.Zero) CVODES.CVodeFree(ref cvode_mem);
1212        if (linearSolver != IntPtr.Zero) CVODES.SUNLinSolFree(linearSolver);
1213        if (A != IntPtr.Zero) CVODES.SUNMatDestroy(A);
1214        if (yS0 != IntPtr.Zero) CVODES.N_VDestroyVectorArray_Serial(yS0, ns);
1215      }
1216    }
1217
1218    /// <summary>
1219    ///  Here we use CVODES to solve the ODE. Forward sensitivities are used to calculate the gradient for parameter optimization
1220    /// </summary>
1221    /// <param name="trees">Each equation in the ODE represented as a tree</param>
1222    /// <param name="calculatedVariables">The names of the calculated variables</param>
1223    /// <param name="t">The time t up to which we need to integrate.</param>
1224    private static void IntegrateCVODES(
1225      ISymbolicExpressionTree[] trees, // f(y,p) in tree representation
1226      string[] calculatedVariables, // names of elements of y
1227      NodeValueLookup nodeValues,
1228      IEnumerable<int> rows,
1229      double[] fi,
1230      double[,] jac
1231      ) {
1232
1233      // the RHS of the ODE
1234      // dy/dt = f(y_t,x_t,p)
1235      CVODES.CVRhsFunc f = CreateOdeRhs(trees, calculatedVariables, nodeValues);
1236
1237      var calcSens = jac != null;
1238      // the Jacobian ∂f/∂y
1239      CVODES.CVDlsJacFunc jacF = CreateJac(trees, calculatedVariables, nodeValues);
1240
1241      // the RHS for the forward sensitivities (∂f/∂y)s_i(t) + ∂f/∂p_i
1242      CVODES.CVSensRhsFn sensF = CreateSensitivityRhs(trees, calculatedVariables, nodeValues);
1243
1244      // setup solver
1245      int numberOfEquations = trees.Length;
1246      IntPtr y = IntPtr.Zero;
1247      IntPtr cvode_mem = IntPtr.Zero;
1248      IntPtr A = IntPtr.Zero;
1249      IntPtr yS0 = IntPtr.Zero;
1250      IntPtr linearSolver = IntPtr.Zero;
1251      var ns = nodeValues.ParameterCount; // number of parameters
1252
1253      try {
1254        y = CVODES.N_VNew_Serial(numberOfEquations);
1255        // init y to current values of variables
1256        // y must be initialized before calling CVodeInit
1257        for (int i = 0; i < calculatedVariables.Length; i++) {
1258          CVODES.NV_Set_Ith_S(y, i, nodeValues.GetVariableValue(calculatedVariables[i]).Item1);
1259        }
1260
1261        cvode_mem = CVODES.CVodeCreate(CVODES.MultistepMethod.CV_ADAMS, CVODES.NonlinearSolverIteration.CV_FUNCTIONAL);
1262
1263        var flag = CVODES.CVodeInit(cvode_mem, f, rows.First(), y);
1264        Assert(CVODES.CV_SUCCESS == flag);
1265
1266        flag = CVODES.CVodeSetErrHandlerFn(cvode_mem, errorFunction, IntPtr.Zero);
1267        Assert(CVODES.CV_SUCCESS == flag);
1268        double relTol = 1.0e-2;
1269        double absTol = 1.0;
1270        flag = CVODES.CVodeSStolerances(cvode_mem, relTol, absTol);  // TODO: probably need to adjust absTol per variable
1271        Assert(CVODES.CV_SUCCESS == flag);
1272
1273        A = CVODES.SUNDenseMatrix(numberOfEquations, numberOfEquations);
1274        Assert(A != IntPtr.Zero);
1275
1276        linearSolver = CVODES.SUNDenseLinearSolver(y, A);
1277        Assert(linearSolver != IntPtr.Zero);
1278
1279        flag = CVODES.CVDlsSetLinearSolver(cvode_mem, linearSolver, A);
1280        Assert(CVODES.CV_SUCCESS == flag);
1281
1282        flag = CVODES.CVDlsSetJacFn(cvode_mem, jacF);
1283        Assert(CVODES.CV_SUCCESS == flag);
1284
1285        if (calcSens) {
1286
1287          yS0 = CVODES.N_VCloneVectorArray_Serial(ns, y); // clone the output vector for each parameter
1288          unsafe {
1289            // set to initial sensitivities supplied by caller
1290            for (int pIdx = 0; pIdx < ns; pIdx++) {
1291              var yS0_i = *((IntPtr*)yS0.ToPointer() + pIdx);
1292              for (var varIdx = 0; varIdx < calculatedVariables.Length; varIdx++) {
1293                CVODES.NV_Set_Ith_S(yS0_i, varIdx, nodeValues.GetVariableValue(calculatedVariables[varIdx]).Item2[pIdx]); // TODO: perf
1294              }
1295            }
1296          }
1297
1298          flag = CVODES.CVodeSensInit(cvode_mem, ns, CVODES.CV_SIMULTANEOUS, sensF, yS0);
1299          Assert(CVODES.CV_SUCCESS == flag);
1300
1301          flag = CVODES.CVodeSensEEtolerances(cvode_mem);
1302          Assert(CVODES.CV_SUCCESS == flag);
1303        }
1304        // integrate
1305        int outputIdx = calculatedVariables.Length; // values at t0 do not need to be set.
1306        foreach (var tout in rows.Skip(1)) {
1307          double tret = 0;
1308          flag = CVODES.CVode(cvode_mem, tout, y, ref tret, CVODES.CV_NORMAL);
1309          if (flag == CVODES.CV_SUCCESS) {
1310            // Assert(1.0 == tout);
1311            if (calcSens) {
1312              // get sensitivities
1313              flag = CVODES.CVodeGetSens(cvode_mem, ref tret, yS0);
1314              Assert(CVODES.CV_SUCCESS == flag);
1315            }
1316            // update variableValues based on integration results
1317            for (int varIdx = 0; varIdx < calculatedVariables.Length; varIdx++) {
1318              var yi = CVODES.NV_Get_Ith_S(y, varIdx);
1319              fi[outputIdx] = yi;
1320              if (calcSens) {
1321                // var gArr = new double[ns];
1322                for (var pIdx = 0; pIdx < ns; pIdx++) {
1323                  unsafe {
1324                    var yS0_pi = *((IntPtr*)yS0.ToPointer() + pIdx);
1325                    jac[outputIdx, pIdx] = CVODES.NV_Get_Ith_S(yS0_pi, varIdx);
1326                  }
1327                }
1328              }
1329              outputIdx++;
1330            }
1331
1332          } else {
1333            // fill up remaining values
1334            while (outputIdx < fi.Length) {
1335              fi[outputIdx] = fi[outputIdx - calculatedVariables.Length];
1336              if (calcSens) {
1337                for (var pIdx = 0; pIdx < ns; pIdx++) {
1338                  jac[outputIdx, pIdx] = jac[outputIdx - calculatedVariables.Length, pIdx];
1339                }
1340              }
1341              outputIdx++;
1342            }
1343            return;
1344          }
1345        }
1346
1347        // cleanup all allocated objects
1348      } finally {
1349        if (y != IntPtr.Zero) CVODES.N_VDestroy_Serial(y);
1350        if (cvode_mem != IntPtr.Zero) CVODES.CVodeFree(ref cvode_mem);
1351        if (linearSolver != IntPtr.Zero) CVODES.SUNLinSolFree(linearSolver);
1352        if (A != IntPtr.Zero) CVODES.SUNMatDestroy(A);
1353        if (yS0 != IntPtr.Zero) CVODES.N_VDestroyVectorArray_Serial(yS0, ns);
1354      }
1355    }
1356
1357    private static void errorFunction(int errorCode, IntPtr module, IntPtr function, IntPtr msg, IntPtr ehdata) {
1358      var moduleStr = Marshal.PtrToStringAnsi(module);
1359      var functionStr = Marshal.PtrToStringAnsi(function);
1360      var msgStr = Marshal.PtrToStringAnsi(msg);
1361      string type = errorCode == 0 ? "Warning" : "Error";
1362      // throw new InvalidProgramException($"{type}: {msgStr} Module: {moduleStr} Function: {functionStr}");
1363    }
1364
1365    private static CVODES.CVRhsFunc CreateOdeRhs(
1366      ISymbolicExpressionTree[] trees,
1367      string[] calculatedVariables,
1368      NodeValueLookup nodeValues) {
1369      // we don't need to calculate a gradient here
1370      return (double t,
1371              IntPtr y, // N_Vector, current value of y (input)
1372              IntPtr ydot, // N_Vector (calculated value of y' (output)
1373              IntPtr user_data // optional user data, (unused here)
1374              ) => {
1375                for (int i = 0; i < calculatedVariables.Length; i++) {
1376                  var y_i = CVODES.NV_Get_Ith_S(y, (long)i);
1377                  nodeValues.SetVariableValue(calculatedVariables[i], y_i);
1378                }
1379                for (int i = 0; i < trees.Length; i++) {
1380                  var tree = trees[i];
1381                  var res_i = InterpretRec(tree.Root.GetSubtree(0).GetSubtree(0), nodeValues);
1382                  CVODES.NV_Set_Ith_S(ydot, i, res_i);
1383                }
1384                return 0;
1385              };
1386    }
1387
1388    private static CVODES.CVDlsJacFunc CreateJac(
1389      ISymbolicExpressionTree[] trees,
1390      string[] calculatedVariables,
1391      NodeValueLookup nodeValues) {
1392
1393      return (
1394        double t, // current time (input)
1395        IntPtr y, // N_Vector, current value of y (input)
1396        IntPtr fy, // N_Vector, current value of f (input)
1397        IntPtr Jac, // SUNMatrix ∂f/∂y (output, rows i contains are ∂f_i/∂y vector)
1398        IntPtr user_data, // optional (unused here)
1399        IntPtr tmp1, // N_Vector, optional (unused here)
1400        IntPtr tmp2, // N_Vector, optional (unused here)
1401        IntPtr tmp3 // N_Vector, optional (unused here)
1402      ) => {
1403        // int pIdx = 0;
1404        // foreach (var tree in trees) {
1405        //   foreach (var n in tree.IterateNodesPrefix()) {
1406        //     if (IsConstantNode(n)) {
1407        //       nodeValues.Add(n, Tuple.Create(parameterValues[pIdx], Vector.Zero)); // here we need a gradient over y which is zero for parameters
1408        //       pIdx++;
1409        //     } else if (n.SubtreeCount == 0) {
1410        //       // for variables and latent variables we use values supplied in y and init gradient vectors accordingly
1411        //       var varName = n.Symbol.Name;
1412        //       var varIdx = Array.IndexOf(calculatedVariables, varName); // TODO: perf!
1413        //       if (varIdx < 0) throw new InvalidProgramException();
1414        //
1415        //       var y_i = CVODES.NV_Get_Ith_S(y, (long)varIdx);
1416        //       var gArr = new double[CVODES.NV_LENGTH_S(y)]; // backing array
1417        //       gArr[varIdx] = 1.0;
1418        //       var g = new Vector(gArr);
1419        //       nodeValues.Add(n, Tuple.Create(y_i, g));
1420        //     }
1421        //   }
1422        // }
1423        for (int i = 0; i < calculatedVariables.Length; i++) {
1424          var y_i = CVODES.NV_Get_Ith_S(y, (long)i);
1425          nodeValues.SetVariableValue(calculatedVariables[i], y_i);
1426        }
1427        for (int i = 0; i < trees.Length; i++) {
1428          var tree = trees[i];
1429          InterpretRec(tree.Root.GetSubtree(0).GetSubtree(0), nodeValues, out double z, out Vector dz);
1430          for (int j = 0; j < calculatedVariables.Length; j++) {
1431            CVODES.SUNDenseMatrix_Set(Jac, i, j, dz[j]);  //TODO: must set as in SensitivityRhs!
1432          }
1433        }
1434        return 0; // on success
1435      };
1436    }
1437
1438
1439    // to calculate sensitivities RHS for all equations at once
1440    // must compute (∂f/∂y)s_i(t) + ∂f/∂p_i and store in ySdot.
1441    // Index i refers to parameters, dimensionality of matrix and vectors is number of equations
1442    private static CVODES.CVSensRhsFn CreateSensitivityRhs(ISymbolicExpressionTree[] trees, string[] calculatedVariables, NodeValueLookup nodeValues) {
1443      return (
1444              int Ns, // number of parameters
1445              double t, // current time
1446              IntPtr y, // N_Vector y(t) (input)
1447              IntPtr ydot, // N_Vector dy/dt(t) (input)
1448              IntPtr yS, // N_Vector*, one vector for each parameter (input)
1449              IntPtr ySdot, // N_Vector*, one vector for each parameter (output)
1450              IntPtr user_data, // optional (unused here)
1451              IntPtr tmp1, // N_Vector, optional (unused here)
1452              IntPtr tmp2 // N_Vector, optional (unused here)
1453        ) => {
1454
1455          var tmpNodeValues = new NodeValueLookup(trees, variableGradient: true); // for df / dy calculation
1456
1457          // update variableValues based on integration results
1458          for (int varIdx = 0; varIdx < calculatedVariables.Length; varIdx++) {
1459            var yi = CVODES.NV_Get_Ith_S(y, varIdx);
1460            var gArr = new double[Ns];
1461            for (var pIdx = 0; pIdx < Ns; pIdx++) {
1462              unsafe {
1463                var yS_pi = *((IntPtr*)yS.ToPointer() + pIdx);
1464                gArr[pIdx] = CVODES.NV_Get_Ith_S(yS_pi, varIdx);
1465              }
1466            }
1467            nodeValues.SetVariableValue(calculatedVariables[varIdx], yi, new Vector(gArr));
1468            tmpNodeValues.SetVariableValue(calculatedVariables[varIdx], yi, Vector.CreateIndicator(calculatedVariables.Length, varIdx));
1469          }
1470
1471          for (int pIdx = 0; pIdx < Ns; pIdx++) {
1472            unsafe {
1473              var sDot_pi = *((IntPtr*)ySdot.ToPointer() + pIdx);
1474              CVODES.N_VConst_Serial(0.0, sDot_pi);
1475            }
1476          }
1477
1478
1479          for (int i = 0; i < trees.Length; i++) {
1480            var tree = trees[i];
1481
1482            // update ySdot = (∂f/∂y)s_i(t) + ∂f/∂p_i
1483
1484            // 1. interpret tree to calculate (∂f/∂y)
1485            // we need a different nodeValue object for (∂f/∂y)
1486            InterpretRec(tree.Root.GetSubtree(0).GetSubtree(0), tmpNodeValues, out double z1, out Vector df_dy);
1487
1488            // 2. interpret tree to calculate ∂f/∂p_i
1489            InterpretRec(tree.Root.GetSubtree(0).GetSubtree(0), nodeValues, out double z, out Vector df_dp);
1490
1491            for (int pIdx = 0; pIdx < Ns; pIdx++) {
1492              unsafe {
1493                var sDot_pi = *((IntPtr*)ySdot.ToPointer() + pIdx);
1494                var s_pi = *((IntPtr*)yS.ToPointer() + pIdx);
1495
1496                var v = CVODES.NV_Get_Ith_S(sDot_pi, i);
1497                // (∂f/∂y)s_i(t)
1498                var p = 0.0;
1499                for (int yIdx = 0; yIdx < calculatedVariables.Length; yIdx++) {
1500                  p += df_dy[yIdx] * CVODES.NV_Get_Ith_S(s_pi, yIdx);
1501                }
1502                // + ∂f/∂p_i
1503                CVODES.NV_Set_Ith_S(sDot_pi, i, p + df_dp[pIdx]);
1504              }
1505            }
1506
1507          }
1508          return 0; // on success
1509        };
1510    }
1511
1512    #endregion
1513
1514    private static void IntegrateHL(
1515      ISymbolicExpressionTree[] trees,
1516      string[] calculatedVariables, // names of integrated variables
1517      NodeValueLookup nodeValues,
1518      int numericIntegrationSteps) {
1519
1520
1521      double[] deltaF = new double[calculatedVariables.Length];
1522      Vector[] deltaG = new Vector[calculatedVariables.Length];
1523
1524      double h = 1.0 / numericIntegrationSteps;
1525      for (int step = 0; step < numericIntegrationSteps; step++) {
1526
1527        // evaluate all trees
1528        for (int i = 0; i < trees.Length; i++) {
1529          var tree = trees[i];
1530
1531          // Root.GetSubtree(0).GetSubtree(0) skips programRoot and startSymbol
1532          double f; Vector g;
1533          InterpretRec(tree.Root.GetSubtree(0).GetSubtree(0), nodeValues, out f, out g);
1534          deltaF[i] = f;
1535          deltaG[i] = g;
1536        }
1537
1538        // update variableValues for next step, trapezoid integration
1539        for (int i = 0; i < trees.Length; i++) {
1540          var varName = calculatedVariables[i];
1541          var oldVal = nodeValues.GetVariableValue(varName);
1542          nodeValues.SetVariableValue(varName, oldVal.Item1 + h * deltaF[i], oldVal.Item2.Add(deltaG[i].Scale(h)));
1543        }
1544      }
1545    }
1546
1547    // TODO: use an existing interpreter implementation instead
1548    private static double InterpretRec(ISymbolicExpressionTreeNode node, NodeValueLookup nodeValues) {
1549      if (node is ConstantTreeNode constTreeNode) {
1550        return nodeValues.ConstantNodeValue(constTreeNode);
1551      } else if (node is VariableTreeNode varTreeNode) {
1552        return nodeValues.VariableNodeValue(varTreeNode);
1553      } else if (node.Symbol is Addition) {
1554        var f = InterpretRec(node.GetSubtree(0), nodeValues);
1555        for (int i = 1; i < node.SubtreeCount; i++) {
1556          f += InterpretRec(node.GetSubtree(i), nodeValues);
1557        }
1558        return f;
1559      } else if (node.Symbol is Multiplication) {
1560        var f = InterpretRec(node.GetSubtree(0), nodeValues);
1561        for (int i = 1; i < node.SubtreeCount; i++) {
1562          f *= InterpretRec(node.GetSubtree(i), nodeValues);
1563        }
1564        return f;
1565      } else if (node.Symbol is Subtraction) {
1566        if (node.SubtreeCount == 1) {
1567          return -InterpretRec(node.GetSubtree(0), nodeValues);
1568        } else {
1569          var f = InterpretRec(node.GetSubtree(0), nodeValues);
1570          for (int i = 1; i < node.SubtreeCount; i++) {
1571            f -= InterpretRec(node.GetSubtree(i), nodeValues);
1572          }
1573          return f;
1574        }
1575      } else if (node.Symbol is Division) {
1576        if (node.SubtreeCount == 1) {
1577          var f = InterpretRec(node.GetSubtree(0), nodeValues);
1578          // protected division
1579          if (f.IsAlmost(0.0)) {
1580            return 0;
1581          } else {
1582            return 1.0 / f;
1583          }
1584        } else {
1585          var f = InterpretRec(node.GetSubtree(0), nodeValues);
1586          for (int i = 1; i < node.SubtreeCount; i++) {
1587            var g = InterpretRec(node.GetSubtree(i), nodeValues);
1588            // protected division
1589            if (g.IsAlmost(0.0)) {
1590              return 0;
1591            } else {
1592              f /= g;
1593            }
1594          }
1595          return f;
1596        }
1597      } else if (node.Symbol is Sine) {
1598        Assert(node.SubtreeCount == 1);
1599
1600        var f = InterpretRec(node.GetSubtree(0), nodeValues);
1601        return Math.Sin(f);
1602      } else if (node.Symbol is Cosine) {
1603        Assert(node.SubtreeCount == 1);
1604
1605        var f = InterpretRec(node.GetSubtree(0), nodeValues);
1606        return Math.Cos(f);
1607      } else if (node.Symbol is Square) {
1608        Assert(node.SubtreeCount == 1);
1609
1610        var f = InterpretRec(node.GetSubtree(0), nodeValues);
1611        return f * f;
1612      } else if (node.Symbol is Exponential) {
1613        Assert(node.SubtreeCount == 1);
1614
1615        var f = InterpretRec(node.GetSubtree(0), nodeValues);
1616        return Math.Exp(f);
1617      } else if (node.Symbol is Logarithm) {
1618        Assert(node.SubtreeCount == 1);
1619
1620        var f = InterpretRec(node.GetSubtree(0), nodeValues);
1621        return Math.Log(f);
1622      } else if (node.Symbol is HyperbolicTangent) {
1623        Assert(node.SubtreeCount == 1);
1624
1625        var f = InterpretRec(node.GetSubtree(0), nodeValues);
1626        return Math.Tanh(f);
1627      } else if (node.Symbol is AnalyticQuotient) {
1628        Assert(node.SubtreeCount == 2);
1629
1630        var f = InterpretRec(node.GetSubtree(0), nodeValues);
1631        var g = InterpretRec(node.GetSubtree(1), nodeValues);
1632        return f / Math.Sqrt(1 + g * g);
1633      } else throw new NotSupportedException("unsupported symbol");
1634    }
1635
1636    private static void Assert(bool cond) {
1637#if DEBUG
1638      if (!cond) throw new InvalidOperationException("Assertion failed");
1639#endif
1640    }
1641
1642    private static void InterpretRec(
1643      ISymbolicExpressionTreeNode node,
1644       NodeValueLookup nodeValues,      // contains value and gradient vector for a node (variables and constants only)
1645      out double z,
1646      out Vector dz
1647      ) {
1648      double f, g;
1649      Vector df, dg;
1650      if (node is ConstantTreeNode constTreeNode) {
1651        var val = nodeValues.ConstantNodeValueAndGradient(constTreeNode);
1652        z = val.Item1;
1653        dz = val.Item2;
1654      } else if (node is VariableTreeNode varTreeNode) {
1655        var val = nodeValues.VariableNodeValueAndGradient(varTreeNode);
1656        z = val.Item1;
1657        dz = val.Item2;
1658      } else if (node.Symbol is Addition) {
1659        InterpretRec(node.GetSubtree(0), nodeValues, out f, out df);
1660        for (int i = 1; i < node.SubtreeCount; i++) {
1661          InterpretRec(node.GetSubtree(i), nodeValues, out g, out dg);
1662          f = f + g;
1663          df = df.Add(dg);
1664        }
1665        z = f;
1666        dz = df;
1667      } else if (node.Symbol is Multiplication) {
1668        InterpretRec(node.GetSubtree(0), nodeValues, out f, out df);
1669        for (int i = 1; i < node.SubtreeCount; i++) {
1670          InterpretRec(node.GetSubtree(i), nodeValues, out g, out dg);
1671          var newF = f * g;
1672          df = df.Scale(g).Add(dg.Scale(f));  // f'*g + f*g'
1673
1674          f = newF;
1675        }
1676        z = f;
1677        dz = df;
1678      } else if (node.Symbol is Subtraction) {
1679        if (node.SubtreeCount == 1) {
1680          InterpretRec(node.GetSubtree(0), nodeValues, out f, out df);
1681          z = -f;
1682          dz = df.Scale(-1.0);
1683        } else {
1684          InterpretRec(node.GetSubtree(0), nodeValues, out f, out df);
1685          for (int i = 1; i < node.SubtreeCount; i++) {
1686            InterpretRec(node.GetSubtree(i), nodeValues, out g, out dg);
1687            f = f - g;
1688            df = df.Subtract(dg);
1689          }
1690          z = f;
1691          dz = df;
1692        }
1693      } else if (node.Symbol is Division) {
1694        if (node.SubtreeCount == 1) {
1695          InterpretRec(node.GetSubtree(0), nodeValues, out f, out df);
1696          // protected division
1697          if (f.IsAlmost(0.0)) {
1698            z = 0;
1699            dz = Vector.Zero;
1700          } else {
1701            z = 1.0 / f;
1702            dz = df.Scale(-1 * z * z);
1703          }
1704        } else {
1705          InterpretRec(node.GetSubtree(0), nodeValues, out f, out df);
1706          for (int i = 1; i < node.SubtreeCount; i++) {
1707            InterpretRec(node.GetSubtree(i), nodeValues, out g, out dg);
1708            // protected division
1709            if (g.IsAlmost(0.0)) {
1710              z = 0;
1711              dz = Vector.Zero;
1712              return;
1713            } else {
1714              var inv_g = 1.0 / g;
1715              f = f * inv_g;
1716              df = dg.Scale(-f * inv_g * inv_g).Add(df.Scale(inv_g));
1717            }
1718          }
1719          z = f;
1720          dz = df;
1721        }
1722      } else if (node.Symbol is Sine) {
1723        Assert(node.SubtreeCount == 1);
1724        InterpretRec(node.GetSubtree(0), nodeValues, out f, out df);
1725        z = Math.Sin(f);
1726        dz = df.Scale(Math.Cos(f));
1727      } else if (node.Symbol is Cosine) {
1728        Assert(node.SubtreeCount == 1);
1729        InterpretRec(node.GetSubtree(0), nodeValues, out f, out df);
1730        z = Math.Cos(f);
1731        dz = df.Scale(-Math.Sin(f));
1732      } else if (node.Symbol is Square) {
1733        Assert(node.SubtreeCount == 1);
1734        InterpretRec(node.GetSubtree(0), nodeValues, out f, out df);
1735        z = f * f;
1736        dz = df.Scale(2.0 * f);
1737      } else if (node.Symbol is Exponential) {
1738        Assert(node.SubtreeCount == 1);
1739        InterpretRec(node.GetSubtree(0), nodeValues, out f, out df);
1740        z = Math.Exp(f);
1741        dz = df.Scale(z);
1742      } else if (node.Symbol is Logarithm) {
1743        Assert(node.SubtreeCount == 1);
1744        InterpretRec(node.GetSubtree(0), nodeValues, out f, out df);
1745        z = Math.Log(f);
1746        dz = df.Scale(1.0 / f);
1747      } else if (node.Symbol is HyperbolicTangent) {
1748        Assert(node.SubtreeCount == 1);
1749        InterpretRec(node.GetSubtree(0), nodeValues, out f, out df);
1750        z = Math.Tanh(f);
1751        dz = df.Scale(1 - z * z); // tanh(f(x))' = f(x)'sech²(f(x)) = f(x)'(1 - tanh²(f(x)))
1752      } else if (node.Symbol is AnalyticQuotient) {
1753        Assert(node.SubtreeCount == 2);
1754        InterpretRec(node.GetSubtree(0), nodeValues, out f, out df);
1755        InterpretRec(node.GetSubtree(1), nodeValues, out g, out dg);
1756        z = f / Math.Sqrt(1 + g * g);
1757        var denom = 1.0 / Math.Pow(1 + g * g, 1.5);
1758        dz = df.Scale(1 + g * g).Subtract(dg.Scale(f * g)).Scale(denom);
1759      } else {
1760        throw new NotSupportedException("unsupported symbol");
1761      }
1762    }
1763
1764    #endregion
1765
1766    #region events
1767    /*
1768     * Dependencies between parameters:
1769     *
1770     * ProblemData
1771     *    |                                                                         
1772     *    V                                                                         
1773     * TargetVariables   FunctionSet    MaximumLength    NumberOfLatentVariables     
1774     *               |   |                 |                   |                     
1775     *               V   V                 |                   |                     
1776     *             Grammar <---------------+-------------------                     
1777     *                |                                                             
1778     *                V                                                             
1779     *            Encoding                                                           
1780     */
1781    private void RegisterEventHandlers() {
1782      ProblemDataParameter.ValueChanged += ProblemDataParameter_ValueChanged;
1783      if (ProblemDataParameter.Value != null) ProblemDataParameter.Value.Changed += ProblemData_Changed;
1784
1785      TargetVariablesParameter.ValueChanged += TargetVariablesParameter_ValueChanged;
1786      if (TargetVariablesParameter.Value != null) TargetVariablesParameter.Value.CheckedItemsChanged += CheckedTargetVariablesChanged;
1787
1788      FunctionSetParameter.ValueChanged += FunctionSetParameter_ValueChanged;
1789      if (FunctionSetParameter.Value != null) FunctionSetParameter.Value.CheckedItemsChanged += CheckedFunctionsChanged;
1790
1791      MaximumLengthParameter.Value.ValueChanged += MaximumLengthChanged;
1792
1793      NumberOfLatentVariablesParameter.Value.ValueChanged += NumLatentVariablesChanged;
1794    }
1795
1796    private void NumLatentVariablesChanged(object sender, EventArgs e) {
1797      UpdateGrammarAndEncoding();
1798    }
1799
1800    private void MaximumLengthChanged(object sender, EventArgs e) {
1801      UpdateGrammarAndEncoding();
1802    }
1803
1804    private void FunctionSetParameter_ValueChanged(object sender, EventArgs e) {
1805      FunctionSetParameter.Value.CheckedItemsChanged += CheckedFunctionsChanged;
1806    }
1807
1808    private void CheckedFunctionsChanged(object sender, CollectionItemsChangedEventArgs<IndexedItem<StringValue>> e) {
1809      UpdateGrammarAndEncoding();
1810    }
1811
1812    private void TargetVariablesParameter_ValueChanged(object sender, EventArgs e) {
1813      TargetVariablesParameter.Value.CheckedItemsChanged += CheckedTargetVariablesChanged;
1814      UpdateGrammarAndEncoding();
1815    }
1816
1817    private void CheckedTargetVariablesChanged(object sender, CollectionItemsChangedEventArgs<IndexedItem<StringValue>> e) {
1818      UpdateGrammarAndEncoding();
1819    }
1820
1821    private void ProblemDataParameter_ValueChanged(object sender, EventArgs e) {
1822      ProblemDataParameter.Value.Changed += ProblemData_Changed;
1823      OnProblemDataChanged();
1824      OnReset();
1825    }
1826
1827    private void ProblemData_Changed(object sender, EventArgs e) {
1828      OnProblemDataChanged();
1829      OnReset();
1830    }
1831
1832    private void OnProblemDataChanged() {
1833      UpdateTargetVariables();        // implicitly updates other dependent parameters
1834      var handler = ProblemDataChanged;
1835      if (handler != null) handler(this, EventArgs.Empty);
1836    }
1837
1838    #endregion
1839
1840    #region  helper
1841
1842    private static double[] CalculateDifferences(double[] targetValues, double numericDifferencesSmoothing) {
1843      return CalculateDifferencesSavitykzGolay(targetValues);
1844    }
1845
1846    private static double[] CalculateDifferencesPenalizedSplines(double[] targetValues, double numericDifferencesSmoothing) {
1847      var x = Enumerable.Range(0, targetValues.Length).Select(i => (double)i).ToArray();
1848      alglib.spline1dfitpenalized(x, targetValues, targetValues.Length / 2, numericDifferencesSmoothing,
1849        out int info, out alglib.spline1dinterpolant s, out alglib.spline1dfitreport rep);
1850      if (info <= 0) throw new ArgumentException("There was a problem while smoothing numeric differences. Try to use a different smoothing parameter value.");
1851
1852      double[] dy = new double[x.Length];
1853      for (int i = 0; i < x.Length; i++) {
1854        double xi = x[i];
1855        alglib.spline1ddiff(s, xi, out double y, out double dyi, out double d2y);
1856        dy[i] = dyi;
1857      }
1858      return dy;
1859    }
1860
1861    private static readonly double[] sgCoeffMiddle = SavitzkyGolayCoefficients(3, 3, 1, 3);
1862    private static readonly double[] sgCoeffStart = SavitzkyGolayCoefficients(0, 3, 1, 3);
1863    private static readonly double[] sgCoeffEnd = SavitzkyGolayCoefficients(3, 0, 1, 3);
1864    private static double[] CalculateDifferencesSavitykzGolay(double[] y) {
1865      double[] dy = new double[y.Length];
1866      for (int i = 3; i < y.Length - 3; i++) {
1867        for (int j = -3; j <= 3; j++) {
1868          dy[i] += y[i + j] * sgCoeffMiddle[j + 3];
1869        }
1870      }
1871
1872      // start
1873      for (int i = 0; i < 3; i++) {
1874        for (int j = 0; j <= 3; j++) {
1875          dy[i] += y[i + j] * sgCoeffStart[j];
1876        }
1877      }
1878
1879      // end
1880      for (int i = y.Length - 3; i < y.Length; i++) {
1881        for (int j = -3; j <= 0; j++) {
1882          dy[i] += y[i + j] * sgCoeffEnd[j + 3];
1883        }
1884      }
1885
1886      return dy;
1887    }
1888
1889    /// <summary>
1890    /// Calculates coefficients for Savitzky-Golay filtering. (Numerical Recipes, page 769), one important change is that the coefficients are returned in normal order instead of wraparound order
1891    /// </summary>
1892    /// <param name="nl">number of samples to the left</param>
1893    /// <param name="nr">number of samples to the right</param>
1894    /// <param name="ld">order of derivative (smoothing=0)</param>
1895    /// <param name="order">order of the polynomial to fit</param>
1896    /// <param name="c">resulting coefficients for convolution, in correct order (t-nl, ... t-1, t+0, t+1, ... t+nr)</param>
1897    private static double[] SavitzkyGolayCoefficients(int nl, int nr, int ld, int order) {
1898      int np = nl + nr + 1;
1899
1900      int j, k, imj, ipj, kk, mm;
1901      double fac = 0;
1902      double sum = 0;
1903      if (nl < 0 || nr < 0 || ld > order || nl + nr < order) throw new ArgumentException();
1904
1905      double[,] a = new double[order + 1, order + 1];
1906      double[] b = new double[order + 1];
1907      var c = new double[np];
1908
1909      for (ipj = 0; ipj <= (order << 1); ipj++) {
1910        sum = (ipj > 0 ? 0.0 : 1.0);
1911        for (k = 1; k <= nr; k++) sum += Math.Pow((double)k, (double)ipj);
1912        for (k = 1; k <= nl; k++) sum += Math.Pow((double)-k, (double)ipj);
1913        mm = Math.Min(ipj, 2 * order - ipj);
1914        for (imj = -mm; imj <= mm; imj += 2)
1915          a[(ipj + imj) / 2, (ipj - imj) / 2] = sum;
1916      }
1917      for (j = 0; j < order + 1; j++) b[j] = 0;
1918      b[ld] = 1.0;
1919      alglib.densesolverreport rep;
1920      int info;
1921      double[] x = new double[b.Length];
1922      alglib.rmatrixsolve(a, b.Length, b, out info, out rep, out x);
1923
1924      for (kk = 0; kk < np; kk++) c[kk] = 0.0;
1925      for (k = -nl; k <= nr; k++) {
1926        sum = x[0];
1927        fac = 1.0;
1928        for (mm = 1; mm <= order; mm++) sum += x[mm] * (fac *= k);
1929        kk = k + nl;
1930        c[kk] = sum;
1931      }
1932      return c;
1933    }
1934
1935
1936    private void InitAllParameters() {
1937      UpdateTargetVariables(); // implicitly updates the grammar and the encoding
1938    }
1939
1940    private ReadOnlyCheckedItemList<StringValue> CreateFunctionSet() {
1941      var l = new CheckedItemList<StringValue>();
1942      l.Add(new StringValue("Addition").AsReadOnly());
1943      l.Add(new StringValue("Multiplication").AsReadOnly());
1944      l.Add(new StringValue("Division").AsReadOnly());
1945      l.Add(new StringValue("Subtraction").AsReadOnly());
1946      l.Add(new StringValue("Sine").AsReadOnly());
1947      l.Add(new StringValue("Cosine").AsReadOnly());
1948      l.Add(new StringValue("Square").AsReadOnly());
1949      l.Add(new StringValue("Logarithm").AsReadOnly());
1950      l.Add(new StringValue("Exponential").AsReadOnly());
1951      l.Add(new StringValue("HyperbolicTangent").AsReadOnly());
1952      l.Add(new StringValue("AnalyticQuotient").AsReadOnly());
1953      return l.AsReadOnly();
1954    }
1955
1956    // private static bool IsLatentVariableNode(ISymbolicExpressionTreeNode n) {
1957    //   return n.Symbol.Name[0] == 'λ';
1958    // }
1959
1960    private void UpdateTargetVariables() {
1961      var currentlySelectedVariables = TargetVariables.CheckedItems
1962        .OrderBy(i => i.Index)
1963        .Select(i => i.Value.Value)
1964        .ToArray();
1965
1966      var newVariablesList = new CheckedItemList<StringValue>(ProblemData.Dataset.VariableNames.Select(str => new StringValue(str).AsReadOnly()).ToArray()).AsReadOnly();
1967      var matchingItems = newVariablesList.Where(item => currentlySelectedVariables.Contains(item.Value)).ToArray();
1968      foreach (var item in newVariablesList) {
1969        if (currentlySelectedVariables.Contains(item.Value)) {
1970          newVariablesList.SetItemCheckedState(item, true);
1971        } else {
1972          newVariablesList.SetItemCheckedState(item, false);
1973        }
1974      }
1975      TargetVariablesParameter.Value = newVariablesList;
1976    }
1977
1978    private void UpdateGrammarAndEncoding() {
1979      var encoding = new MultiEncoding();
1980      var g = CreateGrammar();
1981      foreach (var targetVar in TargetVariables.CheckedItems) {
1982        var e = new SymbolicExpressionTreeEncoding(targetVar + "_tree", g, MaximumLength, MaximumLength);
1983        var multiManipulator = e.Operators.Where(op => op is MultiSymbolicExpressionTreeManipulator).First();
1984        var filteredOperators = e.Operators.Where(op => !(op is IManipulator)).ToArray();
1985        // make sure our multi-manipulator is the only manipulator
1986        e.Operators = new IOperator[] { multiManipulator }.Concat(filteredOperators);
1987
1988        // set the crossover probability to reduce likelihood that multiple trees are crossed at the same time
1989        var subtreeCrossovers = e.Operators.OfType<SubtreeCrossover>();
1990        foreach (var xover in subtreeCrossovers) {
1991          xover.CrossoverProbability.Value = 0.3;
1992        }
1993
1994        encoding = encoding.Add(e); // only limit by length
1995      }
1996      for (int i = 1; i <= NumberOfLatentVariables; i++) {
1997        var e = new SymbolicExpressionTreeEncoding("λ" + i + "_tree", g, MaximumLength, MaximumLength);
1998        var multiManipulator = e.Operators.Where(op => op is MultiSymbolicExpressionTreeManipulator).First();
1999        var filteredOperators = e.Operators.Where(op => !(op is IManipulator)).ToArray();
2000        // make sure our multi-manipulator is the only manipulator
2001        e.Operators = new IOperator[] { multiManipulator }.Concat(filteredOperators);
2002
2003        // set the crossover probability to reduce likelihood that multiple trees are crossed at the same time
2004        var subtreeCrossovers = e.Operators.OfType<SubtreeCrossover>();
2005        foreach (var xover in subtreeCrossovers) {
2006          xover.CrossoverProbability.Value = 0.3;
2007        }
2008
2009        encoding = encoding.Add(e);
2010      }
2011      Encoding = encoding;
2012    }
2013
2014    private ISymbolicExpressionGrammar CreateGrammar() {
2015      var grammar = new TypeCoherentExpressionGrammar();
2016      grammar.StartGrammarManipulation();
2017
2018      var problemData = ProblemData;
2019      var ds = problemData.Dataset;
2020      grammar.MaximumFunctionArguments = 0;
2021      grammar.MaximumFunctionDefinitions = 0;
2022      var allowedVariables = problemData.AllowedInputVariables.Concat(TargetVariables.CheckedItems.Select(chk => chk.Value.Value));
2023      foreach (var varSymbol in grammar.Symbols.OfType<HeuristicLab.Problems.DataAnalysis.Symbolic.VariableBase>()) {
2024        if (!varSymbol.Fixed) {
2025          varSymbol.AllVariableNames = problemData.InputVariables.Select(x => x.Value).Where(x => ds.VariableHasType<double>(x));
2026          varSymbol.VariableNames = allowedVariables.Where(x => ds.VariableHasType<double>(x));
2027        }
2028      }
2029      foreach (var factorSymbol in grammar.Symbols.OfType<BinaryFactorVariable>()) {
2030        if (!factorSymbol.Fixed) {
2031          factorSymbol.AllVariableNames = problemData.InputVariables.Select(x => x.Value).Where(x => ds.VariableHasType<string>(x));
2032          factorSymbol.VariableNames = problemData.AllowedInputVariables.Where(x => ds.VariableHasType<string>(x));
2033          factorSymbol.VariableValues = factorSymbol.VariableNames
2034            .ToDictionary(varName => varName, varName => ds.GetStringValues(varName).Distinct().ToList());
2035        }
2036      }
2037      foreach (var factorSymbol in grammar.Symbols.OfType<FactorVariable>()) {
2038        if (!factorSymbol.Fixed) {
2039          factorSymbol.AllVariableNames = problemData.InputVariables.Select(x => x.Value).Where(x => ds.VariableHasType<string>(x));
2040          factorSymbol.VariableNames = problemData.AllowedInputVariables.Where(x => ds.VariableHasType<string>(x));
2041          factorSymbol.VariableValues = factorSymbol.VariableNames
2042            .ToDictionary(varName => varName,
2043            varName => ds.GetStringValues(varName).Distinct()
2044            .Select((n, i) => Tuple.Create(n, i))
2045            .ToDictionary(tup => tup.Item1, tup => tup.Item2));
2046        }
2047      }
2048
2049      grammar.ConfigureAsDefaultRegressionGrammar();
2050
2051      // configure initialization of constants
2052      var constSy = (Constant)grammar.GetSymbol("Constant");
2053      // max and min are only relevant for initialization
2054      constSy.MaxValue = +1.0e-1; // small initial values for constant opt
2055      constSy.MinValue = -1.0e-1;
2056      constSy.MultiplicativeManipulatorSigma = 1.0; // allow large jumps for manipulation
2057      constSy.ManipulatorMu = 0.0;
2058      constSy.ManipulatorSigma = 1.0; // allow large jumps
2059
2060      // configure initialization of variables
2061      var varSy = (Variable)grammar.GetSymbol("Variable");
2062      // init variables to a small value and allow manipulation
2063      varSy.WeightMu = 0.0;
2064      varSy.WeightSigma = 1e-1;
2065      varSy.WeightManipulatorMu = 0.0;
2066      varSy.WeightManipulatorSigma = 1.0;
2067      varSy.MultiplicativeWeightManipulatorSigma = 1.0;
2068
2069      foreach (var f in FunctionSet) {
2070        grammar.GetSymbol(f.Value).Enabled = FunctionSet.ItemChecked(f);
2071      }
2072
2073      grammar.FinishedGrammarManipulation();
2074      return grammar;
2075
2076    }
2077    #endregion
2078
2079
2080    #region Import
2081    public void Load(Problem problem) {
2082      // transfer parameter values from problem parameter
2083      this.ProblemData = problem.ProblemData;
2084      this.TrainingEpisodesParameter.Value = problem.TrainingEpisodesParameter.Value;
2085      this.TargetVariablesParameter.Value = problem.TargetVariablesParameter.Value;
2086      this.Name = problem.Name;
2087      this.Description = problem.Description;
2088    }
2089    #endregion
2090
2091
2092    // TODO: for integration we only need a part of the data that we need for optimization
2093
2094    public class OptimizationData {
2095      public readonly ISymbolicExpressionTree[] trees;
2096      public readonly string[] targetVariables;
2097      public readonly IRegressionProblemData problemData;
2098      public readonly double[][] targetValues;
2099      public readonly double[] inverseStandardDeviation;
2100      public readonly IntRange[] episodes;
2101      public readonly int numericIntegrationSteps;
2102      public readonly string[] latentVariables;
2103      public readonly string odeSolver;
2104      public readonly NodeValueLookup nodeValueLookup;
2105      public readonly int[] rows;
2106      internal readonly string[] variables;
2107
2108      public OptimizationData(ISymbolicExpressionTree[] trees, string[] targetVars, string[] inputVariables,
2109        IRegressionProblemData problemData,
2110        double[][] targetValues,
2111        IntRange[] episodes,
2112        int numericIntegrationSteps, string[] latentVariables, string odeSolver) {
2113        this.trees = trees;
2114        this.targetVariables = targetVars;
2115        this.problemData = problemData;
2116        this.targetValues = targetValues;
2117        this.variables = inputVariables;
2118        if (targetValues != null) {
2119          this.inverseStandardDeviation = new double[targetValues.Length];
2120          for (int i = 0; i < targetValues.Length; i++) {
2121            // calculate variance for each episode separately and calc the average
2122            var epStartIdx = 0;
2123            var stdevs = new List<double>();
2124            foreach (var ep in episodes) {
2125              var epValues = targetValues[i].Skip(epStartIdx).Take(ep.Size);
2126              stdevs.Add(epValues.StandardDeviation());
2127              epStartIdx += ep.Size;
2128            }
2129            inverseStandardDeviation[i] = 1.0 / stdevs.Average();
2130          }
2131        } else
2132          this.inverseStandardDeviation = Enumerable.Repeat(1.0, trees.Length).ToArray();
2133        this.episodes = episodes;
2134        this.numericIntegrationSteps = numericIntegrationSteps;
2135        this.latentVariables = latentVariables;
2136        this.odeSolver = odeSolver;
2137        this.nodeValueLookup = new NodeValueLookup(trees);
2138        this.rows = episodes.SelectMany(ep => Enumerable.Range(ep.Start, ep.Size)).ToArray();
2139      }
2140    }
2141
2142    public class NodeValueLookup {
2143      private readonly Dictionary<ISymbolicExpressionTreeNode, Tuple<double, Vector>> node2val = new Dictionary<ISymbolicExpressionTreeNode, Tuple<double, Vector>>();
2144      private readonly ISymbolicExpressionTreeNode[] leafNodes;
2145      private readonly Vector[] constantGradientVectors;
2146      private readonly Dictionary<string, Tuple<double, Vector>> variableValues = new Dictionary<string, Tuple<double, Vector>>();
2147
2148      // accessors for current values of constant and variable nodes. For variable nodes we also need to account for the variable weight
2149      public double ConstantNodeValue(ConstantTreeNode node) => node2val[node].Item1;
2150      public Tuple<double, Vector> ConstantNodeValueAndGradient(ConstantTreeNode node) { var v = node2val[node]; return Tuple.Create(v.Item1, Vector.CreateNew(v.Item2)); }
2151      public double VariableNodeValue(VariableTreeNode node) => variableValues[node.VariableName].Item1 * node2val[node].Item1;
2152      public Tuple<double, Vector> VariableNodeValueAndGradient(VariableTreeNode node) {
2153        // (f*g)' = (f'*g)+(g'*f)       
2154        var w = node2val[node];
2155        var x = variableValues[node.VariableName];
2156
2157        return Tuple.Create(
2158          w.Item1 * x.Item1,
2159          x.Item2 == Vector.Zero ? Vector.CreateNew(w.Item2).Scale(x.Item1) :
2160          Vector.CreateNew(w.Item2).Scale(x.Item1).Add(Vector.CreateNew(x.Item2).Scale(w.Item1)));
2161      }
2162
2163      public NodeValueLookup(ISymbolicExpressionTree[] trees, bool variableGradient = false) {
2164        this.leafNodes = trees.SelectMany(t => t.IterateNodesPrefix().Where(n => n.SubtreeCount==0)).ToArray();
2165        if (!variableGradient) {
2166          constantGradientVectors = new Vector[leafNodes.Length];
2167          for (int paramIdx = 0; paramIdx < leafNodes.Length; paramIdx++) {
2168            constantGradientVectors[paramIdx] = Vector.CreateIndicator(length: leafNodes.Length, idx: paramIdx);
2169
2170            var node = leafNodes[paramIdx];
2171            if (node is ConstantTreeNode constTreeNode) {
2172              node2val[node] = Tuple.Create(constTreeNode.Value, constantGradientVectors[paramIdx]);
2173            } else if (node is VariableTreeNode varTreeNode) {
2174              node2val[node] = Tuple.Create(varTreeNode.Weight, constantGradientVectors[paramIdx]);
2175            } else throw new InvalidProgramException();
2176          }
2177        } else {
2178          throw new NotImplementedException();
2179          // variable gradient means we want to calculate the gradient over the target variables instead of parameters
2180          for (int paramIdx = 0; paramIdx < leafNodes.Length; paramIdx++) {
2181            var node = leafNodes[paramIdx];
2182            if (node is ConstantTreeNode constTreeNode) {
2183              node2val[node] = Tuple.Create(constTreeNode.Value, Vector.Zero);
2184            } else if (node is VariableTreeNode varTreeNode) {
2185              node2val[node] = Tuple.Create(varTreeNode.Weight, Vector.Zero);
2186            } else throw new InvalidProgramException();
2187          }
2188        }
2189      }
2190
2191      public int ParameterCount => leafNodes.Length;
2192
2193      public void SetVariableValue(string variableName, double val) {
2194        SetVariableValue(variableName, val, Vector.Zero);
2195      }
2196      /// <summary>
2197      /// returns the current value for variable variableName
2198      /// </summary>
2199      /// <param name="variableName"></param>
2200      /// <returns></returns>
2201      public Tuple<double, Vector> GetVariableValue(string variableName) {
2202        return variableValues[variableName];
2203      }
2204
2205      /// <summary>
2206      /// sets the current value for variable variableName
2207      /// </summary>
2208      /// <param name="variableName"></param>
2209      /// <param name="val"></param>
2210      /// <param name="dVal"></param>
2211      public void SetVariableValue(string variableName, double val, Vector dVal) {
2212        variableValues[variableName] = Tuple.Create(val, dVal);
2213        // if (name2nodes.TryGetValue(variableName, out List<ISymbolicExpressionTreeNode> nodes)) {
2214        //   nodes.ForEach(n => node2val[n] = Tuple.Create(val, dVal));
2215        // } else {
2216        //   var fakeNode = new VariableTreeNode(new Variable());
2217        //   fakeNode.Weight = 1.0;
2218        //   fakeNode.VariableName = variableName;
2219        //   var newNodeList = new List<ISymbolicExpressionTreeNode>();
2220        //   newNodeList.Add(fakeNode);
2221        //   name2nodes.Add(variableName, newNodeList);
2222        //   node2val[fakeNode] = Tuple.Create(val, dVal);
2223        // }
2224      }
2225
2226      internal void UpdateParamValues(double[] x) {
2227        for (int i = 0; i < x.Length; i++) {
2228          node2val[leafNodes[i]] = Tuple.Create(x[i], constantGradientVectors[i]);
2229        }
2230      }
2231    }
2232  }
2233}
Note: See TracBrowser for help on using the repository browser.