source: branches/2925_AutoDiffForDynamicalModels/HeuristicLab.Problems.DynamicalSystemsModelling/3.3/Problem.cs @ 16660

Last change on this file since 16660 was 16660, checked in by gkronber, 2 years ago

#2925: re-introduced partial support for latent variables

File size: 87.6 KB
Line 
1#region License Information
2/* HeuristicLab
3 * Copyright (C) 2002-2018 Heuristic and Evolutionary Algorithms Laboratory (HEAL)
4 *
5 * This file is part of HeuristicLab.
6 *
7 * HeuristicLab is free software: you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation, either version 3 of the License, or
10 * (at your option) any later version.
11 *
12 * HeuristicLab is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15 * GNU General Public License for more details.
16 *
17 * You should have received a copy of the GNU General Public License
18 * along with HeuristicLab. If not, see <http://www.gnu.org/licenses/>.
19 */
20#endregion
21
22using System;
23using System.Collections.Generic;
24using System.Diagnostics;
25using System.Globalization;
26using System.Linq;
27using HeuristicLab.Analysis;
28using HeuristicLab.Collections;
29using HeuristicLab.Common;
30using HeuristicLab.Core;
31using HeuristicLab.Data;
32using HeuristicLab.Encodings.SymbolicExpressionTreeEncoding;
33using HeuristicLab.Optimization;
34using HeuristicLab.Parameters;
35using HeuristicLab.Persistence.Default.CompositeSerializers.Storable;
36using HeuristicLab.Problems.DataAnalysis;
37using HeuristicLab.Problems.DataAnalysis.Symbolic;
38using HeuristicLab.Problems.Instances;
39using Variable = HeuristicLab.Problems.DataAnalysis.Symbolic.Variable;
40
41namespace HeuristicLab.Problems.DynamicalSystemsModelling {
42  [Item("Dynamical Systems Modelling Problem", "TODO")]
43  [Creatable(CreatableAttribute.Categories.GeneticProgrammingProblems, Priority = 900)]
44  [StorableClass]
45  public sealed class Problem : SingleObjectiveBasicProblem<MultiEncoding>, IRegressionProblem, IProblemInstanceConsumer<IRegressionProblemData>, IProblemInstanceExporter<IRegressionProblemData> {
46    #region parameter names
47    private const string ProblemDataParameterName = "Data";
48    private const string TargetVariablesParameterName = "Target variables";
49    private const string FunctionSetParameterName = "Function set";
50    private const string MaximumLengthParameterName = "Size limit";
51    private const string MaximumParameterOptimizationIterationsParameterName = "Max. parameter optimization iterations";
52    private const string NumberOfLatentVariablesParameterName = "Number of latent variables";
53    private const string NumericIntegrationStepsParameterName = "Steps for numeric integration";
54    private const string TrainingEpisodesParameterName = "Training episodes";
55    private const string OptimizeParametersForEpisodesParameterName = "Optimize parameters for episodes";
56    private const string OdeSolverParameterName = "ODE Solver";
57    #endregion
58
59    #region Parameter Properties
60    IParameter IDataAnalysisProblem.ProblemDataParameter { get { return ProblemDataParameter; } }
61
62    public IValueParameter<IRegressionProblemData> ProblemDataParameter {
63      get { return (IValueParameter<IRegressionProblemData>)Parameters[ProblemDataParameterName]; }
64    }
65    public IValueParameter<ReadOnlyCheckedItemList<StringValue>> TargetVariablesParameter {
66      get { return (IValueParameter<ReadOnlyCheckedItemList<StringValue>>)Parameters[TargetVariablesParameterName]; }
67    }
68    public IValueParameter<ReadOnlyCheckedItemList<StringValue>> FunctionSetParameter {
69      get { return (IValueParameter<ReadOnlyCheckedItemList<StringValue>>)Parameters[FunctionSetParameterName]; }
70    }
71    public IFixedValueParameter<IntValue> MaximumLengthParameter {
72      get { return (IFixedValueParameter<IntValue>)Parameters[MaximumLengthParameterName]; }
73    }
74
75    public IFixedValueParameter<IntValue> MaximumParameterOptimizationIterationsParameter {
76      get { return (IFixedValueParameter<IntValue>)Parameters[MaximumParameterOptimizationIterationsParameterName]; }
77    }
78    public IFixedValueParameter<IntValue> NumberOfLatentVariablesParameter {
79      get { return (IFixedValueParameter<IntValue>)Parameters[NumberOfLatentVariablesParameterName]; }
80    }
81    public IFixedValueParameter<IntValue> NumericIntegrationStepsParameter {
82      get { return (IFixedValueParameter<IntValue>)Parameters[NumericIntegrationStepsParameterName]; }
83    }
84    public IValueParameter<ItemList<IntRange>> TrainingEpisodesParameter {
85      get { return (IValueParameter<ItemList<IntRange>>)Parameters[TrainingEpisodesParameterName]; }
86    }
87    public IFixedValueParameter<BoolValue> OptimizeParametersForEpisodesParameter {
88      get { return (IFixedValueParameter<BoolValue>)Parameters[OptimizeParametersForEpisodesParameterName]; }
89    }
90    public IConstrainedValueParameter<StringValue> OdeSolverParameter {
91      get { return (IConstrainedValueParameter<StringValue>)Parameters[OdeSolverParameterName]; }
92    }
93    #endregion
94
95    #region Properties
96    public IRegressionProblemData ProblemData {
97      get { return ProblemDataParameter.Value; }
98      set { ProblemDataParameter.Value = value; }
99    }
100    IDataAnalysisProblemData IDataAnalysisProblem.ProblemData { get { return ProblemData; } }
101
102    public ReadOnlyCheckedItemList<StringValue> TargetVariables {
103      get { return TargetVariablesParameter.Value; }
104    }
105
106    public ReadOnlyCheckedItemList<StringValue> FunctionSet {
107      get { return FunctionSetParameter.Value; }
108    }
109
110    public int MaximumLength {
111      get { return MaximumLengthParameter.Value.Value; }
112    }
113    public int MaximumParameterOptimizationIterations {
114      get { return MaximumParameterOptimizationIterationsParameter.Value.Value; }
115    }
116    public int NumberOfLatentVariables {
117      get { return NumberOfLatentVariablesParameter.Value.Value; }
118    }
119    public int NumericIntegrationSteps {
120      get { return NumericIntegrationStepsParameter.Value.Value; }
121    }
122    public IEnumerable<IntRange> TrainingEpisodes {
123      get { return TrainingEpisodesParameter.Value; }
124    }
125    public bool OptimizeParametersForEpisodes {
126      get { return OptimizeParametersForEpisodesParameter.Value.Value; }
127    }
128
129    public string OdeSolver {
130      get { return OdeSolverParameter.Value.Value; }
131      set {
132        var matchingValue = OdeSolverParameter.ValidValues.FirstOrDefault(v => v.Value == value);
133        if (matchingValue == null) throw new ArgumentOutOfRangeException();
134        else OdeSolverParameter.Value = matchingValue;
135      }
136    }
137
138    #endregion
139
140    public event EventHandler ProblemDataChanged;
141
142    public override bool Maximization {
143      get { return false; } // we minimize NMSE
144    }
145
146    #region item cloning and persistence
147    // persistence
148    [StorableConstructor]
149    private Problem(bool deserializing) : base(deserializing) { }
150    [StorableHook(HookType.AfterDeserialization)]
151    private void AfterDeserialization() {
152      if (!Parameters.ContainsKey(OptimizeParametersForEpisodesParameterName)) {
153        Parameters.Add(new FixedValueParameter<BoolValue>(OptimizeParametersForEpisodesParameterName, "Flag to select if parameters should be optimized globally or for each episode individually.", new BoolValue(false)));
154      }
155      RegisterEventHandlers();
156    }
157
158    // cloning
159    private Problem(Problem original, Cloner cloner)
160      : base(original, cloner) {
161      RegisterEventHandlers();
162    }
163    public override IDeepCloneable Clone(Cloner cloner) { return new Problem(this, cloner); }
164    #endregion
165
166    public Problem()
167      : base() {
168      var targetVariables = new CheckedItemList<StringValue>().AsReadOnly(); // HACK: it would be better to provide a new class derived from IDataAnalysisProblem
169      var functions = CreateFunctionSet();
170      Parameters.Add(new ValueParameter<IRegressionProblemData>(ProblemDataParameterName, "The data captured from the dynamical system. Use CSV import functionality to import data.", new RegressionProblemData()));
171      Parameters.Add(new ValueParameter<ReadOnlyCheckedItemList<StringValue>>(TargetVariablesParameterName, "Target variables (overrides setting in ProblemData)", targetVariables));
172      Parameters.Add(new ValueParameter<ReadOnlyCheckedItemList<StringValue>>(FunctionSetParameterName, "The list of allowed functions", functions));
173      Parameters.Add(new FixedValueParameter<IntValue>(MaximumLengthParameterName, "The maximally allowed length of each expression. Set to a small value (5 - 25). Default = 10", new IntValue(10)));
174      Parameters.Add(new FixedValueParameter<IntValue>(MaximumParameterOptimizationIterationsParameterName, "The maximum number of iterations for optimization of parameters (using L-BFGS). More iterations makes the algorithm slower, fewer iterations might prevent convergence in the optimization scheme. Default = 100", new IntValue(100)));
175      Parameters.Add(new FixedValueParameter<IntValue>(NumberOfLatentVariablesParameterName, "Latent variables (unobserved variables) allow us to produce expressions which are integrated up and can be used in other expressions. They are handled similarly to target variables in forward simulation / integration. The difference to target variables is that there are no data to which the calculated values of latent variables are compared. Set to a small value (0 .. 5) as necessary (default = 0)", new IntValue(0)));
176      Parameters.Add(new FixedValueParameter<IntValue>(NumericIntegrationStepsParameterName, "Number of steps in the numeric integration that are taken from one row to the next (set to 1 to 100). More steps makes the algorithm slower, less steps worsens the accuracy of the numeric integration scheme.", new IntValue(10)));
177      Parameters.Add(new ValueParameter<ItemList<IntRange>>(TrainingEpisodesParameterName, "A list of ranges that should be used for training, each range represents an independent episode. This overrides the TrainingSet parameter in ProblemData.", new ItemList<IntRange>()));
178      Parameters.Add(new FixedValueParameter<BoolValue>(OptimizeParametersForEpisodesParameterName, "Flag to select if parameters should be optimized globally or for each episode individually.", new BoolValue(false)));
179
180      var solversStr = new string[] { "HeuristicLab" /* , "CVODES" */};
181      var solvers = new ItemSet<StringValue>(
182        solversStr.Select(s => new StringValue(s).AsReadOnly())
183        );
184      Parameters.Add(new ConstrainedValueParameter<StringValue>(OdeSolverParameterName, "The solver to use for solving the initial value ODE problems", solvers, solvers.First()));
185
186      RegisterEventHandlers();
187      InitAllParameters();
188
189      // TODO: use training range as default training episode
190      // TODO: optimization of starting values for latent variables in CVODES solver
191      // TODO: allow to specify the name for the time variable in the dataset and allow variable step-sizes
192    }
193
194    public override double Evaluate(Individual individual, IRandom random) {
195      var trees = individual.Values.Select(v => v.Value).OfType<ISymbolicExpressionTree>().ToArray(); // extract all trees from individual
196
197      var problemData = ProblemData;
198      var targetVars = TargetVariables.CheckedItems.OrderBy(i => i.Index).Select(i => i.Value.Value).ToArray();
199      var latentVariables = Enumerable.Range(1, NumberOfLatentVariables).Select(i => "λ" + i).ToArray(); // TODO: must coincide with the variables which are actually defined in the grammar and also for which we actually have trees
200      if (OptimizeParametersForEpisodes) {
201        throw new NotImplementedException();
202        int eIdx = 0;
203        double totalNMSE = 0.0;
204        int totalSize = 0;
205        foreach (var episode in TrainingEpisodes) {
206          // double[] optTheta;
207          double nmse = OptimizeForEpisodes(trees, problemData, targetVars, latentVariables, random, new[] { episode }, MaximumParameterOptimizationIterations, NumericIntegrationSteps, OdeSolver);
208          // individual["OptTheta_" + eIdx] = new DoubleArray(optTheta); // write back optimized parameters so that we can use them in the Analysis method
209          eIdx++;
210          totalNMSE += nmse * episode.Size;
211          totalSize += episode.Size;
212        }
213        return totalNMSE / totalSize;
214      } else {
215        // double[] optTheta;
216        double nmse = OptimizeForEpisodes(trees, problemData, targetVars, latentVariables, random, TrainingEpisodes, MaximumParameterOptimizationIterations, NumericIntegrationSteps, OdeSolver);
217        // individual["OptTheta"] = new DoubleArray(optTheta); // write back optimized parameters so that we can use them in the Analysis method
218        return nmse;
219      }
220    }
221
222    public static double OptimizeForEpisodes(
223      ISymbolicExpressionTree[] trees,
224      IRegressionProblemData problemData,
225      string[] targetVars,
226      string[] latentVariables,
227      IRandom random,
228      IEnumerable<IntRange> episodes,
229      int maxParameterOptIterations,
230      int numericIntegrationSteps,
231      string odeSolver) {
232
233      // extract constants from trees (without trees for latent variables)
234      var targetVariableTrees = trees.Take(targetVars.Length).ToArray();
235      var latentVariableTrees = trees.Skip(targetVars.Length).ToArray();
236      var constantNodes = targetVariableTrees.Select(t => t.IterateNodesPrefix().OfType<ConstantTreeNode>().ToArray()).ToArray();
237      var initialTheta = constantNodes.Select(nodes => nodes.Select(n => n.Value).ToArray()).ToArray();
238
239      // optimize parameters by fitting f(x,y) to calculated differences dy/dt(t)
240      double nmse = PreTuneParameters(trees, problemData, targetVars, latentVariables, random, episodes, maxParameterOptIterations,
241        initialTheta, out double[] pretunedParameters);
242
243      // extend parameter vector to include parameters for latent variable trees
244      pretunedParameters = pretunedParameters
245        .Concat(latentVariableTrees
246        .SelectMany(t => t.IterateNodesPrefix().OfType<ConstantTreeNode>().Select(n => n.Value)))
247        .ToArray();
248
249      // optimize parameters using integration of f(x,y) to calculate y(t)
250      nmse = OptimizeParameters(trees, problemData, targetVars, latentVariables, episodes, maxParameterOptIterations, pretunedParameters, numericIntegrationSteps, odeSolver,
251        out double[] optTheta);
252      // var optTheta = pretunedParameters;
253
254      if (double.IsNaN(nmse) ||
255        double.IsInfinity(nmse) ||
256        nmse > 100 * trees.Length * episodes.Sum(ep => ep.Size))
257        return 100 * trees.Length * episodes.Sum(ep => ep.Size);
258
259      // update tree nodes with optimized values
260      var paramIdx = 0;
261      for (var treeIdx = 0; treeIdx < constantNodes.Length; treeIdx++) {
262        for (int i = 0; i < constantNodes[treeIdx].Length; i++)
263          constantNodes[treeIdx][i].Value = optTheta[paramIdx++];
264      }
265      return nmse;
266    }
267
268    private static double PreTuneParameters(
269      ISymbolicExpressionTree[] trees,
270      IRegressionProblemData problemData,
271      string[] targetVars,
272      string[] latentVariables,
273      IRandom random,
274      IEnumerable<IntRange> episodes,
275      int maxParameterOptIterations,
276      double[][] initialTheta,
277      out double[] optTheta) {
278      var thetas = new List<double>();
279      double nmse = 0.0;
280      var maxTreeNmse = 100 * episodes.Sum(ep => ep.Size);
281
282      var targetTrees = trees.Take(targetVars.Length).ToArray();
283      var latentTrees = trees.Take(latentVariables.Length).ToArray();
284
285      {
286        // first calculate values of latent variables by integration
287        var inputVariables = targetVars.Concat(latentTrees.SelectMany(t => t.IterateNodesPrefix().OfType<VariableTreeNode>().Select(n => n.VariableName))).Except(latentVariables).Distinct();
288        var myState = new OptimizationData(latentTrees, targetVars, inputVariables.ToArray(), problemData, null, episodes.ToArray(), 10, latentVariables, "HeuristicLab");
289
290        var fi = new double[myState.rows.Length * targetVars.Length];
291        var jac = new double[myState.rows.Length * targetVars.Length, myState.nodeValueLookup.ParameterCount];
292        var latentValues = new double[myState.rows.Length, latentVariables.Length];
293        Integrate(myState, fi, jac, latentValues);
294
295        // add integrated latent variables to dataset
296        var modifiedDataset = ((Dataset)problemData.Dataset).ToModifiable();
297        foreach (var variable in latentVariables) {
298          modifiedDataset.AddVariable(variable, Enumerable.Repeat(0.0, modifiedDataset.Rows).ToList()); // empty column
299        }
300        int predIdx = 0;
301        foreach (var ep in episodes) {
302          for (int r = ep.Start; r < ep.End; r++) {
303            for (int latVarIdx = 0; latVarIdx < latentVariables.Length; latVarIdx++) {
304              modifiedDataset.SetVariableValue(latentValues[predIdx, latVarIdx], latentVariables[latVarIdx], r);
305            }
306            predIdx++;
307          }
308        }
309
310        problemData = new RegressionProblemData(modifiedDataset, problemData.AllowedInputVariables, problemData.TargetVariable);
311      }
312      // NOTE: the order of values in parameter matches prefix order of constant nodes in trees
313      for (int treeIdx = 0; treeIdx < targetTrees.Length; treeIdx++) {
314        var t = targetTrees[treeIdx];
315
316        var targetValuesDiff = new List<double>();
317        foreach (var ep in episodes) {
318          var episodeRows = Enumerable.Range(ep.Start, ep.Size);
319          var targetValues = problemData.Dataset.GetDoubleValues(targetVars[treeIdx], episodeRows).ToArray();
320          targetValuesDiff.AddRange(targetValues.Skip(1).Zip(targetValues, (t1, t0) => t1 - t0));// TODO: smoothing or multi-pole);
321        }
322        var adjustedEpisodes = episodes.Select(ep => new IntRange(ep.Start, ep.End - 1)); // because we lose the last row in the differencing step
323
324        // data for input variables is assumed to be known
325        // input variables in pretuning are all target variables and all variable names that occur in the tree
326        var inputVariables = targetVars.Concat(t.IterateNodesPrefix().OfType<VariableTreeNode>().Select(n => n.VariableName)).Distinct();
327
328        var myState = new OptimizationData(new[] { t },
329          targetVars,
330          inputVariables.ToArray(),
331          problemData, new[] { targetValuesDiff.ToArray() }, adjustedEpisodes.ToArray(), -99, latentVariables, string.Empty); // TODO
332        var paramCount = myState.nodeValueLookup.ParameterCount;
333
334        optTheta = new double[0];
335        if (initialTheta[treeIdx].Length > 0) {
336          try {
337            alglib.minlmstate state;
338            alglib.minlmreport report;
339            var p = new double[initialTheta[treeIdx].Length];
340            var lowerBounds = Enumerable.Repeat(-1000.0, p.Length).ToArray();
341            var upperBounds = Enumerable.Repeat(1000.0, p.Length).ToArray();
342            Array.Copy(initialTheta[treeIdx], p, p.Length);
343            alglib.minlmcreatevj(targetValuesDiff.Count, p, out state);
344            alglib.minlmsetcond(state, 0.0, 0.0, 0.0, maxParameterOptIterations);
345            alglib.minlmsetbc(state, lowerBounds, upperBounds);
346#if DEBUG
347            //alglib.minlmsetgradientcheck(state, 1.0e-7);
348#endif
349            alglib.minlmoptimize(state, EvaluateObjectiveVector, EvaluateObjectiveVectorAndJacobian, null, myState);
350
351            alglib.minlmresults(state, out optTheta, out report);
352            if (report.terminationtype < 0) {
353#if DEBUG
354              if (report.terminationtype == -7) throw new InvalidProgramException("gradient calculation fail!");
355#endif
356              optTheta = initialTheta[treeIdx];
357            }
358          } catch (alglib.alglibexception) {
359            optTheta = initialTheta[treeIdx];
360          }
361        }
362        var tree_nmse = EvaluateMSE(optTheta, myState);
363        if (double.IsNaN(tree_nmse) || double.IsInfinity(tree_nmse) || tree_nmse > maxTreeNmse) {
364          nmse += maxTreeNmse;
365          thetas.AddRange(initialTheta[treeIdx]);
366        } else {
367          nmse += tree_nmse;
368          thetas.AddRange(optTheta);
369        }
370      } // foreach tree
371      optTheta = thetas.ToArray();
372
373      return nmse;
374    }
375
376
377    // similar to above but this time we integrate and optimize all parameters for all targets concurrently
378    private static double OptimizeParameters(ISymbolicExpressionTree[] trees, IRegressionProblemData problemData, string[] targetVars, string[] latentVariables,
379      IEnumerable<IntRange> episodes, int maxParameterOptIterations, double[] initialTheta, int numericIntegrationSteps, string odeSolver, out double[] optTheta) {
380      var rowsForDataExtraction = episodes.SelectMany(e => Enumerable.Range(e.Start, e.Size)).ToArray();
381      var targetValues = new double[targetVars.Length][];
382      for (int treeIdx = 0; treeIdx < targetVars.Length; treeIdx++) {
383        var t = trees[treeIdx];
384
385        targetValues[treeIdx] = problemData.Dataset.GetDoubleValues(targetVars[treeIdx], rowsForDataExtraction).ToArray();
386      }
387
388      // data for input variables is assumed to be known
389      // input variables are all variable names that occur in the trees except for target variables (we assume that trees have been generated correctly)
390      var inputVariables = trees.SelectMany(t => t.IterateNodesPrefix().OfType<VariableTreeNode>().Select(n => n.VariableName))
391        .Except(targetVars)
392        .Except(latentVariables)
393        .Distinct();
394
395      var myState = new OptimizationData(trees, targetVars, inputVariables.ToArray(), problemData, targetValues, episodes.ToArray(), numericIntegrationSteps, latentVariables, odeSolver);
396      optTheta = initialTheta;
397
398      if (initialTheta.Length > 0) {
399        var lowerBounds = Enumerable.Repeat(-1000.0, initialTheta.Length).ToArray();
400        var upperBounds = Enumerable.Repeat(1000.0, initialTheta.Length).ToArray();
401        try {
402          alglib.minlmstate state;
403          alglib.minlmreport report;
404          alglib.minlmcreatevj(rowsForDataExtraction.Length * trees.Length, initialTheta, out state);
405          alglib.minlmsetbc(state, lowerBounds, upperBounds);
406          alglib.minlmsetcond(state, 0.0, 0.0, 0.0, maxParameterOptIterations);
407#if DEBUG         
408          //alglib.minlmsetgradientcheck(state, 1.0e-7);
409#endif
410          alglib.minlmoptimize(state, IntegrateAndEvaluateObjectiveVector, IntegrateAndEvaluateObjectiveVectorAndJacobian, null, myState);
411
412          alglib.minlmresults(state, out optTheta, out report);
413
414          if (report.terminationtype < 0) {
415#if DEBUG
416            if (report.terminationtype == -7) throw new InvalidProgramException("gradient calculation fail!");
417#endif            // there was a problem: reset theta and evaluate for inital values
418            optTheta = initialTheta;
419          }
420        } catch (alglib.alglibexception) {
421          optTheta = initialTheta;
422        }
423      }
424      var nmse = EvaluateIntegratedMSE(optTheta, myState);
425      var maxNmse = 100 * targetValues.Length * rowsForDataExtraction.Length;
426      if (double.IsNaN(nmse) || double.IsInfinity(nmse) || nmse > maxNmse) nmse = maxNmse;
427      return nmse;
428    }
429
430
431    // helper
432    public static double EvaluateMSE(double[] x, OptimizationData optimizationData) {
433      var fi = new double[optimizationData.rows.Count()];
434      EvaluateObjectiveVector(x, fi, optimizationData);
435      return fi.Sum(fii => fii * fii) / fi.Length;
436    }
437    public static void EvaluateObjectiveVector(double[] x, double[] fi, object optimizationData) { EvaluateObjectiveVector(x, fi, (OptimizationData)optimizationData); } // for alglib
438    public static void EvaluateObjectiveVector(double[] x, double[] fi, OptimizationData optimizationData) {
439      var rows = optimizationData.rows;
440      var problemData = optimizationData.problemData;
441      var nodeValueLookup = optimizationData.nodeValueLookup;
442      var ds = problemData.Dataset;
443      var variables = optimizationData.variables;
444
445      nodeValueLookup.UpdateParamValues(x);
446
447      int outputIdx = 0;
448      for (int trainIdx = 0; trainIdx < rows.Length; trainIdx++) {
449        // update variable values
450        foreach (var variable in variables) {
451          // in this problem we also allow fixed numeric parameters (represented as variables with the value as name)
452          if (double.TryParse(variable, NumberStyles.Float, CultureInfo.InvariantCulture, out double value)) {
453            nodeValueLookup.SetVariableValue(variable, value); // TODO: Perf we don't need to set this for each index
454          } else {
455            nodeValueLookup.SetVariableValue(variable, ds.GetDoubleValue(variable, rows[trainIdx])); // TODO: perf
456          }
457        }
458        // interpret all trees
459        for (int treeIdx = 0; treeIdx < optimizationData.trees.Length; treeIdx++) {
460          var tree = optimizationData.trees[treeIdx];
461          var pred = InterpretRec(tree.Root.GetSubtree(0).GetSubtree(0), nodeValueLookup);
462          var y = optimizationData.targetValues[treeIdx][trainIdx];
463          fi[outputIdx++] = (y - pred) * optimizationData.inverseStandardDeviation[treeIdx];
464        }
465      }
466    }
467
468    public static void EvaluateObjectiveVectorAndJacobian(double[] x, double[] fi, double[,] jac, object optimizationData) { EvaluateObjectiveVectorAndJacobian(x, fi, jac, (OptimizationData)optimizationData); } // for alglib
469    public static void EvaluateObjectiveVectorAndJacobian(double[] x, double[] fi, double[,] jac, OptimizationData optimizationData) {
470      // extract variable values from dataset
471      var variableValues = new Dictionary<string, Tuple<double, Vector>>();
472      var problemData = optimizationData.problemData;
473      var ds = problemData.Dataset;
474      var rows = optimizationData.rows;
475      var variables = optimizationData.variables;
476
477      var nodeValueLookup = optimizationData.nodeValueLookup;
478      nodeValueLookup.UpdateParamValues(x);
479
480      int termIdx = 0;
481
482      for (int trainIdx = 0; trainIdx < rows.Length; trainIdx++) {
483        // update variable values
484        foreach (var variable in variables) {
485          // in this problem we also allow fixed numeric parameters (represented as variables with the value as name)
486          if (double.TryParse(variable, NumberStyles.Float, CultureInfo.InvariantCulture, out double value)) {
487            nodeValueLookup.SetVariableValue(variable, value); // TODO: Perf we don't need to set this for each index
488          } else {
489            nodeValueLookup.SetVariableValue(variable, ds.GetDoubleValue(variable, rows[trainIdx])); // TODO: perf
490          }
491        }
492
493        var calculatedVariables = optimizationData.targetVariables;
494
495        var trees = optimizationData.trees;
496        for (int i = 0; i < trees.Length; i++) {
497          var tree = trees[i];
498          var targetVarName = calculatedVariables[i];
499
500          double f; Vector g;
501          InterpretRec(tree.Root.GetSubtree(0).GetSubtree(0), nodeValueLookup, out f, out g);
502
503          var y = optimizationData.targetValues[i][trainIdx];
504          fi[termIdx] = (y - f) * optimizationData.inverseStandardDeviation[i]; // scale of NMSE
505          if (jac != null && g != Vector.Zero) for (int j = 0; j < g.Length; j++) jac[termIdx, j] = -g[j] * optimizationData.inverseStandardDeviation[i];
506
507          termIdx++;
508        }
509      }
510
511    }
512
513    // helper
514    public static double EvaluateIntegratedMSE(double[] x, OptimizationData optimizationData) {
515      var fi = new double[optimizationData.rows.Count() * optimizationData.targetVariables.Length];
516      IntegrateAndEvaluateObjectiveVector(x, fi, optimizationData);
517      return fi.Sum(fii => fii * fii) / fi.Length;
518    }
519    public static void IntegrateAndEvaluateObjectiveVector(double[] x, double[] fi, object optimizationData) { IntegrateAndEvaluateObjectiveVector(x, fi, (OptimizationData)optimizationData); } // for alglib
520    public static void IntegrateAndEvaluateObjectiveVector(double[] x, double[] fi, OptimizationData optimizationData) {
521      IntegrateAndEvaluateObjectiveVectorAndJacobian(x, fi, null, optimizationData);
522    }
523
524    public static void IntegrateAndEvaluateObjectiveVectorAndJacobian(double[] x, double[] fi, double[,] jac, object optimizationData) { IntegrateAndEvaluateObjectiveVectorAndJacobian(x, fi, jac, (OptimizationData)optimizationData); } // for alglib
525    public static void IntegrateAndEvaluateObjectiveVectorAndJacobian(double[] x, double[] fi, double[,] jac, OptimizationData optimizationData) {
526      var rows = optimizationData.rows.ToArray();
527      var problemData = optimizationData.problemData;
528      var nodeValueLookup = optimizationData.nodeValueLookup;
529      var ds = problemData.Dataset;
530      int outputIdx = 0;
531
532      nodeValueLookup.UpdateParamValues(x);
533
534      Integrate(optimizationData, fi, jac, null);
535      var trees = optimizationData.trees;
536
537      // update result with error
538      for (int trainIdx = 0; trainIdx < rows.Length; trainIdx++) {
539        for (int i = 0; i < optimizationData.targetVariables.Length; i++) {
540          var tree = trees[i];
541          var y = optimizationData.targetValues[i][trainIdx];
542          fi[outputIdx] = (y - fi[outputIdx]) * optimizationData.inverseStandardDeviation[i];  // scale for normalized squared error
543          if (jac != null) for (int j = 0; j < x.Length; j++) jac[outputIdx, j] = -jac[outputIdx, j] * optimizationData.inverseStandardDeviation[i];
544          outputIdx++;
545        }
546      }
547    }
548
549    public override void Analyze(Individual[] individuals, double[] qualities, ResultCollection results, IRandom random) {
550      base.Analyze(individuals, qualities, results, random);
551
552      if (!results.ContainsKey("Prediction (training)")) {
553        results.Add(new Result("Prediction (training)", typeof(ReadOnlyItemList<DataTable>)));
554      }
555      if (!results.ContainsKey("Prediction (test)")) {
556        results.Add(new Result("Prediction (test)", typeof(ReadOnlyItemList<DataTable>)));
557      }
558      if (!results.ContainsKey("Models")) {
559        results.Add(new Result("Models", typeof(VariableCollection)));
560      }
561      if (!results.ContainsKey("SNMSE")) {
562        results.Add(new Result("SNMSE", typeof(DoubleValue)));
563      }
564      if (!results.ContainsKey("Solution")) {
565        results.Add(new Result("Solution", typeof(Solution)));
566      }
567      if (!results.ContainsKey("Squared error and gradient")) {
568        results.Add(new Result("Squared error and gradient", typeof(DataTable)));
569      }
570
571      var bestIndividualAndQuality = this.GetBestIndividual(individuals, qualities);
572      var trees = bestIndividualAndQuality.Item1.Values.Select(v => v.Value).OfType<ISymbolicExpressionTree>().ToArray(); // extract all trees from individual
573
574      results["SNMSE"].Value = new DoubleValue(bestIndividualAndQuality.Item2);
575
576      var problemData = ProblemData;
577      var targetVars = TargetVariables.CheckedItems.OrderBy(i => i.Index).Select(i => i.Value.Value).ToArray();
578      var latentVariables = Enumerable.Range(1, NumberOfLatentVariables).Select(i => "λ" + i).ToArray(); // TODO: must coincide with the variables which are actually defined in the grammar and also for which we actually have trees
579
580      var trainingList = new ItemList<DataTable>();
581
582      if (OptimizeParametersForEpisodes) {
583        throw new NotSupportedException();
584        var eIdx = 0;
585        var trainingPredictions = new List<Tuple<double, Vector>[][]>();
586        foreach (var episode in TrainingEpisodes) {
587          var episodes = new[] { episode };
588          var optimizationData = new OptimizationData(trees, targetVars, problemData.AllowedInputVariables.ToArray(), problemData, null, episodes, NumericIntegrationSteps, latentVariables, OdeSolver);
589          var trainingPrediction = Integrate(optimizationData).ToArray();
590          trainingPredictions.Add(trainingPrediction);
591          eIdx++;
592        }
593
594        // only for target values
595        var trainingRows = TrainingEpisodes.SelectMany(e => Enumerable.Range(e.Start, e.End - e.Start));
596        for (int colIdx = 0; colIdx < targetVars.Length; colIdx++) {
597          var targetVar = targetVars[colIdx];
598          var trainingDataTable = new DataTable(targetVar + " prediction (training)");
599          var actualValuesRow = new DataRow(targetVar, "The values of " + targetVar, problemData.Dataset.GetDoubleValues(targetVar, trainingRows));
600          var predictedValuesRow = new DataRow(targetVar + " pred.", "Predicted values for " + targetVar, trainingPredictions.SelectMany(arr => arr.Select(row => row[colIdx].Item1)).ToArray());
601          trainingDataTable.Rows.Add(actualValuesRow);
602          trainingDataTable.Rows.Add(predictedValuesRow);
603          trainingList.Add(trainingDataTable);
604        }
605        results["Prediction (training)"].Value = trainingList.AsReadOnly();
606
607
608        var models = new VariableCollection();
609
610        foreach (var tup in targetVars.Zip(trees, Tuple.Create)) {
611          var targetVarName = tup.Item1;
612          var tree = tup.Item2;
613
614          var origTreeVar = new HeuristicLab.Core.Variable(targetVarName + "(original)");
615          origTreeVar.Value = (ISymbolicExpressionTree)tree.Clone();
616          models.Add(origTreeVar);
617        }
618        results["Models"].Value = models;
619      } else {
620        // data for input variables is assumed to be known
621        // input variables are all variable names that occur in the trees except for target variables (we assume that trees have been generated correctly)
622        var inputVariables = trees
623          .SelectMany(t => t.IterateNodesPrefix().OfType<VariableTreeNode>().Select(n => n.VariableName))
624          .Except(targetVars)
625          .Except(latentVariables)
626          .Distinct();
627
628        var optimizationData = new OptimizationData(trees, targetVars, inputVariables.ToArray(), problemData, null, TrainingEpisodes.ToArray(), NumericIntegrationSteps, latentVariables, OdeSolver);
629        var numParams = optimizationData.nodeValueLookup.ParameterCount;
630
631        var fi = new double[optimizationData.rows.Length * targetVars.Length];
632        var jac = new double[optimizationData.rows.Length * targetVars.Length, numParams];
633        var latentValues = new double[optimizationData.rows.Length, latentVariables.Length];
634        Integrate(optimizationData, fi, jac, latentValues);
635
636
637        // for target values and latent variables
638        var trainingRows = optimizationData.rows;
639        for (int colIdx = 0; colIdx < trees.Length; colIdx++) {
640          // is target variable
641          if (colIdx < targetVars.Length) {
642            var targetVar = targetVars[colIdx];
643            var trainingDataTable = new DataTable(targetVar + " prediction (training)");
644            var actualValuesRow = new DataRow(targetVar, "The values of " + targetVar, problemData.Dataset.GetDoubleValues(targetVar, trainingRows));
645            var idx = Enumerable.Range(0, trainingRows.Length).Select(i => i * targetVars.Length + colIdx);
646            var pred = idx.Select(i => fi[i]);
647            var predictedValuesRow = new DataRow(targetVar + " pred.", "Predicted values for " + targetVar, pred.ToArray());
648            trainingDataTable.Rows.Add(actualValuesRow);
649            trainingDataTable.Rows.Add(predictedValuesRow);
650
651            for (int paramIdx = 0; paramIdx < numParams; paramIdx++) {
652              var paramSensitivityRow = new DataRow($"∂{targetVar}/∂θ{paramIdx}", $"Sensitivities of parameter {paramIdx}", idx.Select(i => jac[i, paramIdx]).ToArray());
653              paramSensitivityRow.VisualProperties.SecondYAxis = true;
654              trainingDataTable.Rows.Add(paramSensitivityRow);
655            }
656            trainingList.Add(trainingDataTable);
657          } else {
658            var latentVar = latentVariables[colIdx - targetVars.Length];
659            var trainingDataTable = new DataTable(latentVar + " prediction (training)");
660            var idx = Enumerable.Range(0, trainingRows.Length);
661            var pred = idx.Select(i => latentValues[i, colIdx - targetVars.Length]);
662            var predictedValuesRow = new DataRow(latentVar + " pred.", "Predicted values for " + latentVar, pred.ToArray());
663            var emptyRow = new DataRow(latentVar);
664            trainingDataTable.Rows.Add(emptyRow);
665            trainingDataTable.Rows.Add(predictedValuesRow);
666            trainingList.Add(trainingDataTable);
667          }
668        }
669
670        var errorTable = new DataTable("Squared error and gradient");
671        var seRow = new DataRow("Squared error");
672        var gradientRows = Enumerable.Range(0, numParams).Select(i => new DataRow($"∂SE/∂θ{i}")).ToArray();
673        errorTable.Rows.Add(seRow);
674        foreach (var gRow in gradientRows) {
675          gRow.VisualProperties.SecondYAxis = true;
676          errorTable.Rows.Add(gRow);
677        }
678        var targetValues = targetVars.Select(v => problemData.Dataset.GetDoubleValues(v, trainingRows).ToArray()).ToArray();
679        int r = 0;
680
681        // foreach (var y_pred in trainingPrediction) {
682        //   // calculate objective function gradient
683        //   double f_i = 0.0;
684        //   Vector g_i = Vector.CreateNew(new double[numParams]);
685        //   for (int colIdx = 0; colIdx < targetVars.Length; colIdx++) {
686        //     var y_pred_f = y_pred[colIdx].Item1;
687        //     var y = targetValues[colIdx][r];
688        //
689        //     var res = (y - y_pred_f) * optimizationData.inverseStandardDeviation[colIdx];
690        //     var ressq = res * res;
691        //     f_i += ressq;
692        //     g_i.Add(y_pred[colIdx].Item2.Scale(-2.0 * res));
693        //   }
694        //   seRow.Values.Add(f_i);
695        //   for (int j = 0; j < g_i.Length; j++) gradientRows[j].Values.Add(g_i[j]);
696        //   r++;
697        // }
698        // results["Squared error and gradient"].Value = errorTable;
699
700        // TODO: DRY for training and test
701        var testList = new ItemList<DataTable>();
702        var testRows = ProblemData.TestIndices.ToArray();
703        var testOptimizationData = new OptimizationData(trees, targetVars, problemData.AllowedInputVariables.ToArray(), problemData, null, new IntRange[] { ProblemData.TestPartition }, NumericIntegrationSteps, latentVariables, OdeSolver);
704        var testPrediction = Integrate(testOptimizationData).ToArray();
705
706        for (int colIdx = 0; colIdx < trees.Length; colIdx++) {
707          // is target variable
708          if (colIdx < targetVars.Length) {
709            var targetVar = targetVars[colIdx];
710            var testDataTable = new DataTable(targetVar + " prediction (test)");
711            var actualValuesRow = new DataRow(targetVar, "The values of " + targetVar, problemData.Dataset.GetDoubleValues(targetVar, testRows));
712            var predictedValuesRow = new DataRow(targetVar + " pred.", "Predicted values for " + targetVar, testPrediction.Select(arr => arr[colIdx].Item1).ToArray());
713            testDataTable.Rows.Add(actualValuesRow);
714            testDataTable.Rows.Add(predictedValuesRow);
715            testList.Add(testDataTable);
716
717          } else {
718            // var latentVar = latentVariables[colIdx - targetVars.Length];
719            // var testDataTable = new DataTable(latentVar + " prediction (test)");
720            // var predictedValuesRow = new DataRow(latentVar + " pred.", "Predicted values for " + latentVar, testPrediction.Select(arr => arr[colIdx].Item1).ToArray());
721            // var emptyRow = new DataRow(latentVar);
722            // testDataTable.Rows.Add(emptyRow);
723            // testDataTable.Rows.Add(predictedValuesRow);
724            // testList.Add(testDataTable);
725          }
726        }
727
728        results["Prediction (training)"].Value = trainingList.AsReadOnly();
729        results["Prediction (test)"].Value = testList.AsReadOnly();
730
731
732        #region simplification of models
733        // TODO the dependency of HeuristicLab.Problems.DataAnalysis.Symbolic is not ideal
734        var models = new VariableCollection();    // to store target var names and original version of tree
735
736        var clonedTrees = new List<ISymbolicExpressionTree>();
737        for (int idx = 0; idx < trees.Length; idx++) {
738          clonedTrees.Add((ISymbolicExpressionTree)trees[idx].Clone());
739        }
740        var ds = problemData.Dataset;
741        var newProblemData = new RegressionProblemData((IDataset)ds.Clone(), problemData.AllowedInputVariables, problemData.TargetVariable);
742        results["Solution"].Value = new Solution(clonedTrees.ToArray(),
743                   // optTheta,
744                   newProblemData,
745                   targetVars,
746                   latentVariables,
747                   TrainingEpisodes,
748                   OdeSolver,
749                   NumericIntegrationSteps);
750
751
752        for (int idx = 0; idx < trees.Length; idx++) {
753          var varName = string.Empty;
754          if (idx < targetVars.Length) {
755            varName = targetVars[idx];
756          } else {
757            varName = latentVariables[idx - targetVars.Length];
758          }
759          var tree = trees[idx];
760
761          var origTreeVar = new HeuristicLab.Core.Variable(varName + "(original)");
762          origTreeVar.Value = (ISymbolicExpressionTree)tree.Clone();
763          models.Add(origTreeVar);
764          var simplifiedTreeVar = new HeuristicLab.Core.Variable(varName + "(simplified)");
765          simplifiedTreeVar.Value = TreeSimplifier.Simplify(tree);
766          models.Add(simplifiedTreeVar);
767
768        }
769
770        results["Models"].Value = models;
771        #endregion
772      }
773    }
774
775    #region interpretation
776
777    // the following uses auto-diff to calculate the gradient w.r.t. the parameters forward in time.
778    // this is basically the method described in Gronwall T. Note on the derivatives with respect to a parameter of the solutions of a system of differential equations. Ann. Math. 1919;20:292–296.
779
780    // a comparison of three potential calculation methods for the gradient is given in:
781    // Sengupta, B., Friston, K. J., & Penny, W. D. (2014). Efficient gradient computation for dynamical models. Neuroimage, 98(100), 521–527. http://doi.org/10.1016/j.neuroimage.2014.04.040
782    // "Our comparison establishes that the adjoint method is computationally more efficient for numerical estimation of parametric gradients
783    // for state-space models — both linear and non-linear, as in the case of a dynamical causal model (DCM)"
784
785    // for a solver with the necessary features see: https://computation.llnl.gov/projects/sundials/cvodes
786
787    public static IEnumerable<Tuple<double, Vector>[]> Integrate(OptimizationData optimizationData) {
788      var nTargets = optimizationData.targetVariables.Length;
789      var n = optimizationData.rows.Length * optimizationData.targetVariables.Length;
790      var d = optimizationData.nodeValueLookup.ParameterCount;
791      double[] fi = new double[n];
792      double[,] jac = new double[n, d];
793      Integrate(optimizationData, fi, jac, null);
794      for (int i = 0; i < optimizationData.rows.Length; i++) {
795        var res = new Tuple<double, Vector>[nTargets];
796        for (int j = 0; j < nTargets; j++) {
797          res[j] = Tuple.Create(fi[i * nTargets + j], Vector.CreateFromMatrixRow(jac, i * nTargets + j));
798        }
799        yield return res;
800      }
801    }
802
803    public static void Integrate(OptimizationData optimizationData, double[] fi, double[,] jac, double[,] latentValues) {
804      var trees = optimizationData.trees;
805      var dataset = optimizationData.problemData.Dataset;
806      var inputVariables = optimizationData.variables;
807      var targetVariables = optimizationData.targetVariables;
808      var latentVariables = optimizationData.latentVariables;
809      var episodes = optimizationData.episodes;
810      var odeSolver = optimizationData.odeSolver;
811      var numericIntegrationSteps = optimizationData.numericIntegrationSteps;
812      var calculatedVariables = targetVariables.Concat(latentVariables).ToArray(); // TODO: must conincide with the order of trees in the encoding
813
814
815
816      var nodeValues = optimizationData.nodeValueLookup;
817
818      // TODO: numericIntegrationSteps is only relevant for the HeuristicLab solver
819      var outputRowIdx = 0;
820      var episodeIdx = 0;
821      foreach (var episode in optimizationData.episodes) {
822        var rows = Enumerable.Range(episode.Start, episode.End - episode.Start).ToArray();
823
824        var t0 = rows.First();
825
826        // initialize values for inputs and targets from dataset
827        foreach (var varName in inputVariables) {
828          // in this problem we also allow fixed numeric parameters (represented as variables with the value as name)
829          if (double.TryParse(varName, NumberStyles.Float, CultureInfo.InvariantCulture, out double value)) {
830            nodeValues.SetVariableValue(varName, value, Vector.Zero);
831          } else {
832            var y0 = dataset.GetDoubleValue(varName, t0);
833            nodeValues.SetVariableValue(varName, y0, Vector.Zero);
834          }
835        }
836        foreach (var varName in targetVariables) {
837          var y0 = dataset.GetDoubleValue(varName, t0);
838          nodeValues.SetVariableValue(varName, y0, Vector.Zero);
839
840          // output starting value
841          fi[outputRowIdx] = y0;
842          Vector.Zero.CopyTo(jac, outputRowIdx);
843
844          outputRowIdx++;
845        }
846
847        var latentValueRowIdx = 0;
848        var latentValueColIdx = 0;
849        foreach (var varName in latentVariables) {
850          var y0 = 0.0; // assume we start at zero
851          nodeValues.SetVariableValue(varName, y0, Vector.Zero);
852
853          if (latentValues != null) {
854            latentValues[latentValueRowIdx, latentValueColIdx++] = y0;
855          }
856        }
857        latentValueColIdx = 0; latentValueRowIdx++;
858
859        { // CODE BELOW DOESN'T WORK ANYMORE
860          // if (latentVariables.Length > 0) throw new NotImplementedException();
861          //
862          // // add value entries for latent variables which are also integrated
863          // // initial values are at the end of the parameter vector
864          // // separate initial values for each episode
865          // var initialValueIdx = parameterValues.Length - episodes.Count() * latentVariables.Length + episodeIdx * latentVariables.Length;
866          // foreach (var latentVar in latentVariables) {
867          //   var arr = new double[parameterValues.Length]; // backing array
868          //   arr[initialValueIdx] = 1.0;
869          //   var g = new Vector(arr);
870          //   nodeValues.SetVariableValue(latentVar, parameterValues[initialValueIdx], g); // we don't have observations for latent variables therefore we optimize the initial value for each episode
871          //   initialValueIdx++;
872          // }
873        }
874
875        var prevT = t0; // TODO: here we should use a variable for t if it is available. Right now we assume equidistant measurements.
876        foreach (var t in rows.Skip(1)) {
877          if (odeSolver == "HeuristicLab")
878            IntegrateHL(trees, calculatedVariables, nodeValues, numericIntegrationSteps); // integrator updates nodeValues
879          else if (odeSolver == "CVODES")
880            throw new NotImplementedException();
881          // IntegrateCVODES(trees, calculatedVariables, variableValues, parameterValues, t - prevT);
882          else throw new InvalidOperationException("Unknown ODE solver " + odeSolver);
883          prevT = t;
884
885          // update output for target variables (TODO: if we want to visualize the latent variables then we need to provide a separate output)
886          for (int i = 0; i < targetVariables.Length; i++) {
887            var targetVar = targetVariables[i];
888            var yt = nodeValues.GetVariableValue(targetVar);
889
890            // fill up remaining rows with last valid value if there are invalid values
891            if (double.IsNaN(yt.Item1) || double.IsInfinity(yt.Item1)) {
892              for (; outputRowIdx < fi.Length; outputRowIdx++) {
893                var prevIdx = outputRowIdx - targetVariables.Length;
894                fi[outputRowIdx] = fi[prevIdx]; // current <- prev
895                if (jac != null) for (int j = 0; j < jac.GetLength(1); j++) jac[outputRowIdx, j] = jac[prevIdx, j];
896              }
897              return;
898            };
899
900            fi[outputRowIdx] = yt.Item1;
901            var g = yt.Item2;
902            g.CopyTo(jac, outputRowIdx);
903            outputRowIdx++;
904          }
905          if (latentValues != null) {
906            foreach (var latentVariable in latentVariables) {
907              var lt = nodeValues.GetVariableValue(latentVariable).Item1;
908              latentValues[latentValueRowIdx, latentValueColIdx++] = lt;
909            }
910            latentValueRowIdx++; latentValueColIdx = 0;
911          }
912
913          // update for next time step (only the inputs)
914          foreach (var varName in inputVariables) {
915            // in this problem we also allow fixed numeric parameters (represented as variables with the value as name)
916            if (double.TryParse(varName, NumberStyles.Float, CultureInfo.InvariantCulture, out double value)) {
917              // value is unchanged
918            } else {
919              nodeValues.SetVariableValue(varName, dataset.GetDoubleValue(varName, t), Vector.Zero);
920            }
921          }
922        }
923        episodeIdx++;
924      }
925    }
926
927    #region CVODES
928
929    /*
930    /// <summary>
931    ///  Here we use CVODES to solve the ODE. Forward sensitivities are used to calculate the gradient for parameter optimization
932    /// </summary>
933    /// <param name="trees">Each equation in the ODE represented as a tree</param>
934    /// <param name="calculatedVariables">The names of the calculated variables</param>
935    /// <param name="variableValues">The start values of the calculated variables as well as their sensitivites over parameters</param>
936    /// <param name="parameterValues">The current parameter values</param>
937    /// <param name="t">The time t up to which we need to integrate.</param>
938    private static void IntegrateCVODES(
939      ISymbolicExpressionTree[] trees, // f(y,p) in tree representation
940      string[] calculatedVariables, // names of elements of y
941      Dictionary<string, Tuple<double, Vector>> variableValues,  //  y (intput and output) input: y(t0), output: y(t0+t)
942      double[] parameterValues, // p
943      double t // duration t for which we want to integrate
944      ) {
945
946      // the RHS of the ODE
947      // dy/dt = f(y_t,x_t,p)
948      CVODES.CVRhsFunc f = CreateOdeRhs(trees, calculatedVariables, parameterValues);
949      // the Jacobian ∂f/∂y
950      CVODES.CVDlsJacFunc jac = CreateJac(trees, calculatedVariables, parameterValues);
951
952      // the RHS for the forward sensitivities (∂f/∂y)s_i(t) + ∂f/∂p_i
953      CVODES.CVSensRhsFn sensF = CreateSensitivityRhs(trees, calculatedVariables, parameterValues);
954
955      // setup solver
956      int numberOfEquations = trees.Length;
957      IntPtr y = IntPtr.Zero;
958      IntPtr cvode_mem = IntPtr.Zero;
959      IntPtr A = IntPtr.Zero;
960      IntPtr yS0 = IntPtr.Zero;
961      IntPtr linearSolver = IntPtr.Zero;
962      var ns = parameterValues.Length; // number of parameters
963
964      try {
965        y = CVODES.N_VNew_Serial(numberOfEquations);
966        // init y to current values of variables
967        // y must be initialized before calling CVodeInit
968        for (int i = 0; i < calculatedVariables.Length; i++) {
969          CVODES.NV_Set_Ith_S(y, i, variableValues[calculatedVariables[i]].Item1);
970        }
971
972        cvode_mem = CVODES.CVodeCreate(CVODES.MultistepMethod.CV_ADAMS, CVODES.NonlinearSolverIteration.CV_FUNCTIONAL);
973
974        var flag = CVODES.CVodeInit(cvode_mem, f, 0.0, y);
975        Assert(CVODES.CV_SUCCESS == flag);
976
977        double relTol = 1.0e-2;
978        double absTol = 1.0;
979        flag = CVODES.CVodeSStolerances(cvode_mem, relTol, absTol);  // TODO: probably need to adjust absTol per variable
980        Assert(CVODES.CV_SUCCESS == flag);
981
982        A = CVODES.SUNDenseMatrix(numberOfEquations, numberOfEquations);
983        Assert(A != IntPtr.Zero);
984
985        linearSolver = CVODES.SUNDenseLinearSolver(y, A);
986        Assert(linearSolver != IntPtr.Zero);
987
988        flag = CVODES.CVDlsSetLinearSolver(cvode_mem, linearSolver, A);
989        Assert(CVODES.CV_SUCCESS == flag);
990
991        flag = CVODES.CVDlsSetJacFn(cvode_mem, jac);
992        Assert(CVODES.CV_SUCCESS == flag);
993
994        yS0 = CVODES.N_VCloneVectorArray_Serial(ns, y); // clone the output vector for each parameter
995        unsafe {
996          // set to initial sensitivities supplied by caller
997          for (int pIdx = 0; pIdx < ns; pIdx++) {
998            var yS0_i = *((IntPtr*)yS0.ToPointer() + pIdx);
999            for (var varIdx = 0; varIdx < calculatedVariables.Length; varIdx++) {
1000              CVODES.NV_Set_Ith_S(yS0_i, varIdx, variableValues[calculatedVariables[varIdx]].Item2[pIdx]);
1001            }
1002          }
1003        }
1004
1005        flag = CVODES.CVodeSensInit(cvode_mem, ns, CVODES.CV_SIMULTANEOUS, sensF, yS0);
1006        Assert(CVODES.CV_SUCCESS == flag);
1007
1008        flag = CVODES.CVodeSensEEtolerances(cvode_mem);
1009        Assert(CVODES.CV_SUCCESS == flag);
1010
1011        // make one forward integration step
1012        double tout = 0.0; // first output time
1013        flag = CVODES.CVode(cvode_mem, t, y, ref tout, CVODES.CV_NORMAL);
1014        if (flag == CVODES.CV_SUCCESS) {
1015          Assert(t == tout);
1016
1017          // get sensitivities
1018          flag = CVODES.CVodeGetSens(cvode_mem, ref tout, yS0);
1019          Assert(CVODES.CV_SUCCESS == flag);
1020
1021          // update variableValues based on integration results
1022          for (int varIdx = 0; varIdx < calculatedVariables.Length; varIdx++) {
1023            var yi = CVODES.NV_Get_Ith_S(y, varIdx);
1024            var gArr = new double[parameterValues.Length];
1025            for (var pIdx = 0; pIdx < parameterValues.Length; pIdx++) {
1026              unsafe {
1027                var yS0_pi = *((IntPtr*)yS0.ToPointer() + pIdx);
1028                gArr[pIdx] = CVODES.NV_Get_Ith_S(yS0_pi, varIdx);
1029              }
1030            }
1031            variableValues[calculatedVariables[varIdx]] = Tuple.Create(yi, new Vector(gArr));
1032          }
1033        } else {
1034          variableValues.Clear();   // indicate problems by not returning new values
1035        }
1036
1037        // cleanup all allocated objects
1038      } finally {
1039        if (y != IntPtr.Zero) CVODES.N_VDestroy_Serial(y);
1040        if (cvode_mem != IntPtr.Zero) CVODES.CVodeFree(ref cvode_mem);
1041        if (linearSolver != IntPtr.Zero) CVODES.SUNLinSolFree(linearSolver);
1042        if (A != IntPtr.Zero) CVODES.SUNMatDestroy(A);
1043        if (yS0 != IntPtr.Zero) CVODES.N_VDestroyVectorArray_Serial(yS0, ns);
1044      }
1045    }
1046
1047
1048    private static CVODES.CVRhsFunc CreateOdeRhs(
1049      ISymbolicExpressionTree[] trees,
1050      string[] calculatedVariables,
1051      double[] parameterValues) {
1052      // we don't need to calculate a gradient here
1053      return (double t,
1054              IntPtr y, // N_Vector, current value of y (input)
1055              IntPtr ydot, // N_Vector (calculated value of y' (output)
1056              IntPtr user_data // optional user data, (unused here)
1057              ) => {
1058                // TODO: perf
1059                var nodeValues = new Dictionary<ISymbolicExpressionTreeNode, Tuple<double, Vector>>();
1060
1061                int pIdx = 0;
1062                foreach (var tree in trees) {
1063                  foreach (var n in tree.IterateNodesPrefix()) {
1064                    if (IsConstantNode(n)) {
1065                      nodeValues.Add(n, Tuple.Create(parameterValues[pIdx], Vector.Zero)); // here we do not need a gradient
1066                      pIdx++;
1067                    } else if (n.SubtreeCount == 0) {
1068                      // for variables and latent variables get the value from variableValues
1069                      var varName = n.Symbol.Name;
1070                      var varIdx = Array.IndexOf(calculatedVariables, varName); // TODO: perf!
1071                      if (varIdx < 0) throw new InvalidProgramException();
1072                      var y_i = CVODES.NV_Get_Ith_S(y, (long)varIdx);
1073                      nodeValues.Add(n, Tuple.Create(y_i, Vector.Zero)); // no gradient needed
1074                    }
1075                  }
1076                }
1077                for (int i = 0; i < trees.Length; i++) {
1078                  var tree = trees[i];
1079                  var res_i = InterpretRec(tree.Root.GetSubtree(0).GetSubtree(0), nodeValues);
1080                  CVODES.NV_Set_Ith_S(ydot, i, res_i.Item1);
1081                }
1082                return 0;
1083              };
1084    }
1085
1086    private static CVODES.CVDlsJacFunc CreateJac(
1087      ISymbolicExpressionTree[] trees,
1088      string[] calculatedVariables,
1089      double[] parameterValues) {
1090
1091      return (
1092        double t, // current time (input)
1093        IntPtr y, // N_Vector, current value of y (input)
1094        IntPtr fy, // N_Vector, current value of f (input)
1095        IntPtr Jac, // SUNMatrix ∂f/∂y (output, rows i contains are ∂f_i/∂y vector)
1096        IntPtr user_data, // optional (unused here)
1097        IntPtr tmp1, // N_Vector, optional (unused here)
1098        IntPtr tmp2, // N_Vector, optional (unused here)
1099        IntPtr tmp3 // N_Vector, optional (unused here)
1100      ) => {
1101        // here we need to calculate partial derivatives for the calculated variables y
1102        var nodeValues = new Dictionary<ISymbolicExpressionTreeNode, Tuple<double, Vector>>();
1103        int pIdx = 0;
1104        foreach (var tree in trees) {
1105          foreach (var n in tree.IterateNodesPrefix()) {
1106            if (IsConstantNode(n)) {
1107              nodeValues.Add(n, Tuple.Create(parameterValues[pIdx], Vector.Zero)); // here we need a gradient over y which is zero for parameters
1108              pIdx++;
1109            } else if (n.SubtreeCount == 0) {
1110              // for variables and latent variables we use values supplied in y and init gradient vectors accordingly
1111              var varName = n.Symbol.Name;
1112              var varIdx = Array.IndexOf(calculatedVariables, varName); // TODO: perf!
1113              if (varIdx < 0) throw new InvalidProgramException();
1114
1115              var y_i = CVODES.NV_Get_Ith_S(y, (long)varIdx);
1116              var gArr = new double[CVODES.NV_LENGTH_S(y)]; // backing array
1117              gArr[varIdx] = 1.0;
1118              var g = new Vector(gArr);
1119              nodeValues.Add(n, Tuple.Create(y_i, g));
1120            }
1121          }
1122        }
1123
1124        for (int i = 0; i < trees.Length; i++) {
1125          var tree = trees[i];
1126          var res = InterpretRec(tree.Root.GetSubtree(0).GetSubtree(0), nodeValues);
1127          var g = res.Item2;
1128          for (int j = 0; j < calculatedVariables.Length; j++) {
1129            CVODES.SUNDenseMatrix_Set(Jac, i, j, g[j]);
1130          }
1131        }
1132        return 0; // on success
1133      };
1134    }
1135
1136
1137    // to calculate sensitivities RHS for all equations at once
1138    // must compute (∂f/∂y)s_i(t) + ∂f/∂p_i and store in ySdot.
1139    // Index i refers to parameters, dimensionality of matrix and vectors is number of equations
1140    private static CVODES.CVSensRhsFn CreateSensitivityRhs(ISymbolicExpressionTree[] trees, string[] calculatedVariables, double[] parameterValues) {
1141      return (
1142              int Ns, // number of parameters
1143              double t, // current time
1144              IntPtr y, // N_Vector y(t) (input)
1145              IntPtr ydot, // N_Vector dy/dt(t) (input)
1146              IntPtr yS, // N_Vector*, one vector for each parameter (input)
1147              IntPtr ySdot, // N_Vector*, one vector for each parameter (output)
1148              IntPtr user_data, // optional (unused here)
1149              IntPtr tmp1, // N_Vector, optional (unused here)
1150              IntPtr tmp2 // N_Vector, optional (unused here)
1151        ) => {
1152          // here we need to calculate partial derivatives for the calculated variables y as well as for the parameters
1153          var nodeValues = new Dictionary<ISymbolicExpressionTreeNode, Tuple<double, Vector>>();
1154          var d = calculatedVariables.Length + parameterValues.Length; // dimensionality of gradient
1155          // first collect variable values
1156          foreach (var tree in trees) {
1157            foreach (var n in tree.IterateNodesPrefix()) {
1158              if (IsVariableNode(n)) {
1159                // for variables and latent variables we use values supplied in y and init gradient vectors accordingly
1160                var varName = n.Symbol.Name;
1161                var varIdx = Array.IndexOf(calculatedVariables, varName); // TODO: perf!
1162                if (varIdx < 0) throw new InvalidProgramException();
1163
1164                var y_i = CVODES.NV_Get_Ith_S(y, (long)varIdx);
1165                var gArr = new double[d]; // backing array
1166                gArr[varIdx] = 1.0;
1167                var g = new Vector(gArr);
1168                nodeValues.Add(n, Tuple.Create(y_i, g));
1169              }
1170            }
1171          }
1172          // then collect constants
1173          int pIdx = 0;
1174          foreach (var tree in trees) {
1175            foreach (var n in tree.IterateNodesPrefix()) {
1176              if (IsConstantNode(n)) {
1177                var gArr = new double[d];
1178                gArr[calculatedVariables.Length + pIdx] = 1.0;
1179                var g = new Vector(gArr);
1180                nodeValues.Add(n, Tuple.Create(parameterValues[pIdx], g));
1181                pIdx++;
1182              }
1183            }
1184          }
1185          // gradient vector is [∂f/∂y_1, ∂f/∂y_2, ... ∂f/∂yN, ∂f/∂p_1 ... ∂f/∂p_K]
1186
1187
1188          for (pIdx = 0; pIdx < Ns; pIdx++) {
1189            unsafe {
1190              var sDot_pi = *((IntPtr*)ySdot.ToPointer() + pIdx);
1191              CVODES.N_VConst_Serial(0.0, sDot_pi);
1192            }
1193          }
1194
1195          for (int i = 0; i < trees.Length; i++) {
1196            var tree = trees[i];
1197            var res = InterpretRec(tree.Root.GetSubtree(0).GetSubtree(0), nodeValues);
1198            var g = res.Item2;
1199
1200
1201            // update ySdot = (∂f/∂y)s_i(t) + ∂f/∂p_i
1202
1203            for (pIdx = 0; pIdx < Ns; pIdx++) {
1204              unsafe {
1205                var sDot_pi = *((IntPtr*)ySdot.ToPointer() + pIdx);
1206                var s_pi = *((IntPtr*)yS.ToPointer() + pIdx);
1207
1208                var v = CVODES.NV_Get_Ith_S(sDot_pi, i);
1209                // (∂f/∂y)s_i(t)
1210                var p = 0.0;
1211                for (int yIdx = 0; yIdx < calculatedVariables.Length; yIdx++) {
1212                  p += g[yIdx] * CVODES.NV_Get_Ith_S(s_pi, yIdx);
1213                }
1214                // + ∂f/∂p_i
1215                CVODES.NV_Set_Ith_S(sDot_pi, i, v + p + g[calculatedVariables.Length + pIdx]);
1216              }
1217            }
1218
1219          }
1220          return 0; // on success
1221        };
1222    }
1223    */
1224    #endregion
1225
1226    private static void IntegrateHL(
1227      ISymbolicExpressionTree[] trees,
1228      string[] calculatedVariables, // names of integrated variables
1229      NodeValueLookup nodeValues,
1230      int numericIntegrationSteps) {
1231
1232
1233      double[] deltaF = new double[calculatedVariables.Length];
1234      Vector[] deltaG = new Vector[calculatedVariables.Length];
1235
1236      double h = 1.0 / numericIntegrationSteps;
1237      for (int step = 0; step < numericIntegrationSteps; step++) {
1238
1239        // evaluate all trees
1240        for (int i = 0; i < trees.Length; i++) {
1241          var tree = trees[i];
1242
1243          // Root.GetSubtree(0).GetSubtree(0) skips programRoot and startSymbol
1244          double f; Vector g;
1245          InterpretRec(tree.Root.GetSubtree(0).GetSubtree(0), nodeValues, out f, out g);
1246          deltaF[i] = f;
1247          deltaG[i] = g;
1248        }
1249
1250        // update variableValues for next step, trapezoid integration
1251        for (int i = 0; i < trees.Length; i++) {
1252          var varName = calculatedVariables[i];
1253          var oldVal = nodeValues.GetVariableValue(varName);
1254          nodeValues.SetVariableValue(varName, oldVal.Item1 + h * deltaF[i], oldVal.Item2.Add(deltaG[i].Scale(h)));
1255        }
1256      }
1257    }
1258
1259    // TODO: use an existing interpreter implementation instead
1260    private static double InterpretRec(ISymbolicExpressionTreeNode node, NodeValueLookup nodeValues) {
1261      if (node is ConstantTreeNode) {
1262        return ((ConstantTreeNode)node).Value;
1263      } else if (node is VariableTreeNode) {
1264        return nodeValues.NodeValue(node);
1265      } else if (node.Symbol is Addition) {
1266        var f = InterpretRec(node.GetSubtree(0), nodeValues);
1267        for (int i = 1; i < node.SubtreeCount; i++) {
1268          f += InterpretRec(node.GetSubtree(i), nodeValues);
1269        }
1270        return f;
1271      } else if (node.Symbol is Multiplication) {
1272        var f = InterpretRec(node.GetSubtree(0), nodeValues);
1273        for (int i = 1; i < node.SubtreeCount; i++) {
1274          f *= InterpretRec(node.GetSubtree(i), nodeValues);
1275        }
1276        return f;
1277      } else if (node.Symbol is Subtraction) {
1278        if (node.SubtreeCount == 1) {
1279          return -InterpretRec(node.GetSubtree(0), nodeValues);
1280        } else {
1281          var f = InterpretRec(node.GetSubtree(0), nodeValues);
1282          for (int i = 1; i < node.SubtreeCount; i++) {
1283            f -= InterpretRec(node.GetSubtree(i), nodeValues);
1284          }
1285          return f;
1286        }
1287      } else if (node.Symbol is Division) {
1288        if (node.SubtreeCount == 1) {
1289          var f = InterpretRec(node.GetSubtree(0), nodeValues);
1290          // protected division
1291          if (f.IsAlmost(0.0)) {
1292            return 0;
1293          } else {
1294            return 1.0 / f;
1295          }
1296        } else {
1297          var f = InterpretRec(node.GetSubtree(0), nodeValues);
1298          for (int i = 1; i < node.SubtreeCount; i++) {
1299            var g = InterpretRec(node.GetSubtree(i), nodeValues);
1300            // protected division
1301            if (g.IsAlmost(0.0)) {
1302              return 0;
1303            } else {
1304              f /= g;
1305            }
1306          }
1307          return f;
1308        }
1309      } else if (node.Symbol is Sine) {
1310        Assert(node.SubtreeCount == 1);
1311
1312        var f = InterpretRec(node.GetSubtree(0), nodeValues);
1313        return Math.Sin(f);
1314      } else if (node.Symbol is Cosine) {
1315        Assert(node.SubtreeCount == 1);
1316
1317        var f = InterpretRec(node.GetSubtree(0), nodeValues);
1318        return Math.Cos(f);
1319      } else if (node.Symbol is Square) {
1320        Assert(node.SubtreeCount == 1);
1321
1322        var f = InterpretRec(node.GetSubtree(0), nodeValues);
1323        return f * f;
1324      } else if (node.Symbol is Exponential) {
1325        Assert(node.SubtreeCount == 1);
1326
1327        var f = InterpretRec(node.GetSubtree(0), nodeValues);
1328        return Math.Exp(f);
1329      } else if (node.Symbol is Logarithm) {
1330        Assert(node.SubtreeCount == 1);
1331
1332        var f = InterpretRec(node.GetSubtree(0), nodeValues);
1333        return Math.Log(f);
1334      } else throw new NotSupportedException("unsupported symbol");
1335    }
1336
1337    private static void Assert(bool cond) {
1338#if DEBUG
1339      if (!cond) throw new InvalidOperationException("Assertion failed");
1340#endif
1341    }
1342
1343    private static void InterpretRec(
1344      ISymbolicExpressionTreeNode node,
1345       NodeValueLookup nodeValues,      // contains value and gradient vector for a node (variables and constants only)
1346      out double z,
1347      out Vector dz
1348      ) {
1349      double f, g;
1350      Vector df, dg;
1351      if (node.Symbol is Constant || node.Symbol is Variable) {
1352        z = nodeValues.NodeValue(node);
1353        dz = Vector.CreateNew(nodeValues.NodeGradient(node)); // original gradient vectors are never changed by evaluation
1354      } else if (node.Symbol is Addition) {
1355        InterpretRec(node.GetSubtree(0), nodeValues, out f, out df);
1356        for (int i = 1; i < node.SubtreeCount; i++) {
1357          InterpretRec(node.GetSubtree(i), nodeValues, out g, out dg);
1358          f = f + g;
1359          df = df.Add(dg);
1360        }
1361        z = f;
1362        dz = df;
1363      } else if (node.Symbol is Multiplication) {
1364        InterpretRec(node.GetSubtree(0), nodeValues, out f, out df);
1365        for (int i = 1; i < node.SubtreeCount; i++) {
1366          InterpretRec(node.GetSubtree(i), nodeValues, out g, out dg);
1367          f = f * g;
1368          df = df.Scale(g).Add(dg.Scale(f));  // f'*g + f*g'
1369        }
1370        z = f;
1371        dz = df;
1372      } else if (node.Symbol is Subtraction) {
1373        if (node.SubtreeCount == 1) {
1374          InterpretRec(node.GetSubtree(0), nodeValues, out f, out df);
1375          z = -f;
1376          dz = df.Scale(-1.0);
1377        } else {
1378          InterpretRec(node.GetSubtree(0), nodeValues, out f, out df);
1379          for (int i = 1; i < node.SubtreeCount; i++) {
1380            InterpretRec(node.GetSubtree(i), nodeValues, out g, out dg);
1381            f = f - g;
1382            df = df.Subtract(dg);
1383          }
1384          z = f;
1385          dz = df;
1386        }
1387      } else if (node.Symbol is Division) {
1388        if (node.SubtreeCount == 1) {
1389          InterpretRec(node.GetSubtree(0), nodeValues, out f, out df);
1390          // protected division
1391          if (f.IsAlmost(0.0)) {
1392            z = 0;
1393            dz = Vector.Zero;
1394          } else {
1395            z = 1.0 / f;
1396            dz = df.Scale(-1 * z * z);
1397          }
1398        } else {
1399          InterpretRec(node.GetSubtree(0), nodeValues, out f, out df);
1400          for (int i = 1; i < node.SubtreeCount; i++) {
1401            InterpretRec(node.GetSubtree(i), nodeValues, out g, out dg);
1402            // protected division
1403            if (g.IsAlmost(0.0)) {
1404              z = 0;
1405              dz = Vector.Zero;
1406              return;
1407            } else {
1408              var inv_g = 1.0 / g;
1409              f = f * inv_g;
1410              df = dg.Scale(-f * inv_g * inv_g).Add(df.Scale(inv_g));
1411            }
1412          }
1413          z = f;
1414          dz = df;
1415        }
1416      } else if (node.Symbol is Sine) {
1417        Assert(node.SubtreeCount == 1);
1418        InterpretRec(node.GetSubtree(0), nodeValues, out f, out df);
1419        z = Math.Sin(f);
1420        dz = df.Scale(Math.Cos(f));
1421      } else if (node.Symbol is Cosine) {
1422        Assert(node.SubtreeCount == 1);
1423        InterpretRec(node.GetSubtree(0), nodeValues, out f, out df);
1424        z = Math.Cos(f);
1425        dz = df.Scale(-Math.Sin(f));
1426      } else if (node.Symbol is Square) {
1427        Assert(node.SubtreeCount == 1);
1428        InterpretRec(node.GetSubtree(0), nodeValues, out f, out df);
1429        z = f * f;
1430        dz = df.Scale(2.0 * f);
1431      } else if (node.Symbol is Exponential) {
1432        Assert(node.SubtreeCount == 1);
1433        InterpretRec(node.GetSubtree(0), nodeValues, out f, out df);
1434        z = Math.Exp(f);
1435        dz = df.Scale(Math.Exp(f));
1436      } else if (node.Symbol is Logarithm) {
1437        Assert(node.SubtreeCount == 1);
1438        InterpretRec(node.GetSubtree(0), nodeValues, out f, out df);
1439        z = Math.Log(f);
1440        dz = df.Scale(1.0 / f);
1441      } else {
1442        throw new NotSupportedException("unsupported symbol");
1443      }
1444    }
1445
1446    #endregion
1447
1448    #region events
1449    /*
1450     * Dependencies between parameters:
1451     *
1452     * ProblemData
1453     *    |
1454     *    V
1455     * TargetVariables   FunctionSet    MaximumLength    NumberOfLatentVariables
1456     *               |   |                 |                   |
1457     *               V   V                 |                   |
1458     *             Grammar <---------------+-------------------
1459     *                |
1460     *                V
1461     *            Encoding
1462     */
1463    private void RegisterEventHandlers() {
1464      ProblemDataParameter.ValueChanged += ProblemDataParameter_ValueChanged;
1465      if (ProblemDataParameter.Value != null) ProblemDataParameter.Value.Changed += ProblemData_Changed;
1466
1467      TargetVariablesParameter.ValueChanged += TargetVariablesParameter_ValueChanged;
1468      if (TargetVariablesParameter.Value != null) TargetVariablesParameter.Value.CheckedItemsChanged += CheckedTargetVariablesChanged;
1469
1470      FunctionSetParameter.ValueChanged += FunctionSetParameter_ValueChanged;
1471      if (FunctionSetParameter.Value != null) FunctionSetParameter.Value.CheckedItemsChanged += CheckedFunctionsChanged;
1472
1473      MaximumLengthParameter.Value.ValueChanged += MaximumLengthChanged;
1474
1475      NumberOfLatentVariablesParameter.Value.ValueChanged += NumLatentVariablesChanged;
1476    }
1477
1478    private void NumLatentVariablesChanged(object sender, EventArgs e) {
1479      UpdateGrammarAndEncoding();
1480    }
1481
1482    private void MaximumLengthChanged(object sender, EventArgs e) {
1483      UpdateGrammarAndEncoding();
1484    }
1485
1486    private void FunctionSetParameter_ValueChanged(object sender, EventArgs e) {
1487      FunctionSetParameter.Value.CheckedItemsChanged += CheckedFunctionsChanged;
1488    }
1489
1490    private void CheckedFunctionsChanged(object sender, CollectionItemsChangedEventArgs<IndexedItem<StringValue>> e) {
1491      UpdateGrammarAndEncoding();
1492    }
1493
1494    private void TargetVariablesParameter_ValueChanged(object sender, EventArgs e) {
1495      TargetVariablesParameter.Value.CheckedItemsChanged += CheckedTargetVariablesChanged;
1496    }
1497
1498    private void CheckedTargetVariablesChanged(object sender, CollectionItemsChangedEventArgs<IndexedItem<StringValue>> e) {
1499      UpdateGrammarAndEncoding();
1500    }
1501
1502    private void ProblemDataParameter_ValueChanged(object sender, EventArgs e) {
1503      ProblemDataParameter.Value.Changed += ProblemData_Changed;
1504      OnProblemDataChanged();
1505      OnReset();
1506    }
1507
1508    private void ProblemData_Changed(object sender, EventArgs e) {
1509      OnProblemDataChanged();
1510      OnReset();
1511    }
1512
1513    private void OnProblemDataChanged() {
1514      UpdateTargetVariables();        // implicitly updates other dependent parameters
1515      var handler = ProblemDataChanged;
1516      if (handler != null) handler(this, EventArgs.Empty);
1517    }
1518
1519    #endregion
1520
1521    #region  helper
1522
1523    private static IEnumerable<T> EveryNth<T>(IEnumerable<T> xs, int step) {
1524      var e = xs.GetEnumerator();
1525      while (e.MoveNext()) {
1526        for (int i = 0; i < step; i++) {
1527          if (!e.MoveNext()) yield break;
1528        }
1529        yield return e.Current;
1530      }
1531    }
1532
1533    private void InitAllParameters() {
1534      UpdateTargetVariables(); // implicitly updates the grammar and the encoding
1535    }
1536
1537    private ReadOnlyCheckedItemList<StringValue> CreateFunctionSet() {
1538      var l = new CheckedItemList<StringValue>();
1539      l.Add(new StringValue("Addition").AsReadOnly());
1540      l.Add(new StringValue("Multiplication").AsReadOnly());
1541      l.Add(new StringValue("Division").AsReadOnly());
1542      l.Add(new StringValue("Subtraction").AsReadOnly());
1543      l.Add(new StringValue("Sine").AsReadOnly());
1544      l.Add(new StringValue("Cosine").AsReadOnly());
1545      l.Add(new StringValue("Square").AsReadOnly());
1546      return l.AsReadOnly();
1547    }
1548
1549    private static bool IsConstantNode(ISymbolicExpressionTreeNode n) {
1550      // return n.Symbol.Name[0] == 'θ';
1551      return n is ConstantTreeNode;
1552    }
1553    private static double GetConstantValue(ISymbolicExpressionTreeNode n) {
1554      return ((ConstantTreeNode)n).Value;
1555    }
1556    private static bool IsLatentVariableNode(ISymbolicExpressionTreeNode n) {
1557      return n.Symbol.Name[0] == 'λ';
1558    }
1559    private static bool IsVariableNode(ISymbolicExpressionTreeNode n) {
1560      return (n.SubtreeCount == 0) && !IsConstantNode(n) && !IsLatentVariableNode(n);
1561    }
1562    private static string GetVariableName(ISymbolicExpressionTreeNode n) {
1563      return ((VariableTreeNode)n).VariableName;
1564    }
1565
1566    private void UpdateTargetVariables() {
1567      var currentlySelectedVariables = TargetVariables.CheckedItems
1568        .OrderBy(i => i.Index)
1569        .Select(i => i.Value.Value)
1570        .ToArray();
1571
1572      var newVariablesList = new CheckedItemList<StringValue>(ProblemData.Dataset.VariableNames.Select(str => new StringValue(str).AsReadOnly()).ToArray()).AsReadOnly();
1573      var matchingItems = newVariablesList.Where(item => currentlySelectedVariables.Contains(item.Value)).ToArray();
1574      foreach (var item in newVariablesList) {
1575        if (currentlySelectedVariables.Contains(item.Value)) {
1576          newVariablesList.SetItemCheckedState(item, true);
1577        } else {
1578          newVariablesList.SetItemCheckedState(item, false);
1579        }
1580      }
1581      TargetVariablesParameter.Value = newVariablesList;
1582    }
1583
1584    private void UpdateGrammarAndEncoding() {
1585      var encoding = new MultiEncoding();
1586      var g = CreateGrammar();
1587      foreach (var targetVar in TargetVariables.CheckedItems) {
1588        var e = new SymbolicExpressionTreeEncoding(targetVar + "_tree", g, MaximumLength, MaximumLength);
1589        var multiManipulator = e.Operators.Where(op => op is MultiSymbolicExpressionTreeManipulator).First();
1590        var filteredOperators = e.Operators.Where(op => !(op is IManipulator)).ToArray();
1591        // make sure our multi-manipulator is the only manipulator
1592        e.Operators = new IOperator[] { multiManipulator }.Concat(filteredOperators);
1593
1594        // set the crossover probability to reduce likelihood that multiple trees are crossed at the same time
1595        var subtreeCrossovers = e.Operators.OfType<SubtreeCrossover>();
1596        foreach (var xover in subtreeCrossovers) {
1597          xover.CrossoverProbability.Value = 0.3;
1598        }
1599
1600        encoding = encoding.Add(e); // only limit by length
1601      }
1602      for (int i = 1; i <= NumberOfLatentVariables; i++) {
1603        var e = new SymbolicExpressionTreeEncoding("λ" + i + "_tree", g, MaximumLength, MaximumLength);
1604        var multiManipulator = e.Operators.Where(op => op is MultiSymbolicExpressionTreeManipulator).First();
1605        var filteredOperators = e.Operators.Where(op => !(op is IManipulator)).ToArray();
1606        // make sure our multi-manipulator is the only manipulator
1607        e.Operators = new IOperator[] { multiManipulator }.Concat(filteredOperators);
1608
1609        // set the crossover probability to reduce likelihood that multiple trees are crossed at the same time
1610        var subtreeCrossovers = e.Operators.OfType<SubtreeCrossover>();
1611        foreach (var xover in subtreeCrossovers) {
1612          xover.CrossoverProbability.Value = 0.3;
1613        }
1614
1615        encoding = encoding.Add(e);
1616      }
1617      Encoding = encoding;
1618    }
1619
1620    private ISymbolicExpressionGrammar CreateGrammar() {
1621      var grammar = new TypeCoherentExpressionGrammar();
1622      grammar.StartGrammarManipulation();
1623
1624      var problemData = ProblemData;
1625      var ds = problemData.Dataset;
1626      grammar.MaximumFunctionArguments = 0;
1627      grammar.MaximumFunctionDefinitions = 0;
1628      var allowedVariables = problemData.AllowedInputVariables.Concat(TargetVariables.CheckedItems.Select(chk => chk.Value.Value));
1629      foreach (var varSymbol in grammar.Symbols.OfType<HeuristicLab.Problems.DataAnalysis.Symbolic.VariableBase>()) {
1630        if (!varSymbol.Fixed) {
1631          varSymbol.AllVariableNames = problemData.InputVariables.Select(x => x.Value).Where(x => ds.VariableHasType<double>(x));
1632          varSymbol.VariableNames = allowedVariables.Where(x => ds.VariableHasType<double>(x));
1633        }
1634      }
1635      foreach (var factorSymbol in grammar.Symbols.OfType<BinaryFactorVariable>()) {
1636        if (!factorSymbol.Fixed) {
1637          factorSymbol.AllVariableNames = problemData.InputVariables.Select(x => x.Value).Where(x => ds.VariableHasType<string>(x));
1638          factorSymbol.VariableNames = problemData.AllowedInputVariables.Where(x => ds.VariableHasType<string>(x));
1639          factorSymbol.VariableValues = factorSymbol.VariableNames
1640            .ToDictionary(varName => varName, varName => ds.GetStringValues(varName).Distinct().ToList());
1641        }
1642      }
1643      foreach (var factorSymbol in grammar.Symbols.OfType<FactorVariable>()) {
1644        if (!factorSymbol.Fixed) {
1645          factorSymbol.AllVariableNames = problemData.InputVariables.Select(x => x.Value).Where(x => ds.VariableHasType<string>(x));
1646          factorSymbol.VariableNames = problemData.AllowedInputVariables.Where(x => ds.VariableHasType<string>(x));
1647          factorSymbol.VariableValues = factorSymbol.VariableNames
1648            .ToDictionary(varName => varName,
1649            varName => ds.GetStringValues(varName).Distinct()
1650            .Select((n, i) => Tuple.Create(n, i))
1651            .ToDictionary(tup => tup.Item1, tup => tup.Item2));
1652        }
1653      }
1654
1655      grammar.ConfigureAsDefaultRegressionGrammar();
1656      grammar.GetSymbol("Logarithm").Enabled = false; // not supported yet
1657      grammar.GetSymbol("Exponential").Enabled = false; // not supported yet
1658
1659      // configure initialization of constants
1660      var constSy = (Constant)grammar.GetSymbol("Constant");
1661      // max and min are only relevant for initialization
1662      constSy.MaxValue = +1.0e-1; // small initial values for constant opt
1663      constSy.MinValue = -1.0e-1;
1664      constSy.MultiplicativeManipulatorSigma = 1.0; // allow large jumps for manipulation
1665      constSy.ManipulatorMu = 0.0;
1666      constSy.ManipulatorSigma = 1.0; // allow large jumps
1667
1668      // configure initialization of variables
1669      var varSy = (Variable)grammar.GetSymbol("Variable");
1670      // fix variable weights to 1.0
1671      varSy.WeightMu = 1.0;
1672      varSy.WeightSigma = 0.0;
1673      varSy.WeightManipulatorMu = 0.0;
1674      varSy.WeightManipulatorSigma = 0.0;
1675      varSy.MultiplicativeWeightManipulatorSigma = 0.0;
1676
1677      foreach (var f in FunctionSet) {
1678        grammar.GetSymbol(f.Value).Enabled = FunctionSet.ItemChecked(f);
1679      }
1680
1681      grammar.FinishedGrammarManipulation();
1682      return grammar;
1683      // // whenever ProblemData is changed we create a new grammar with the necessary symbols
1684      // var g = new SimpleSymbolicExpressionGrammar();
1685      // var unaryFunc = new string[] { "sin", "cos", "sqr" };
1686      // var binaryFunc = new string[] { "+", "-", "*", "%" };
1687      // foreach (var func in unaryFunc) {
1688      //   if (FunctionSet.CheckedItems.Any(ci => ci.Value.Value == func)) g.AddSymbol(func, 1, 1);
1689      // }
1690      // foreach (var func in binaryFunc) {
1691      //   if (FunctionSet.CheckedItems.Any(ci => ci.Value.Value == func)) g.AddSymbol(func, 2, 2);
1692      // }
1693      //
1694      // foreach (var variableName in ProblemData.AllowedInputVariables.Union(TargetVariables.CheckedItems.Select(i => i.Value.Value)))
1695      //   g.AddTerminalSymbol(variableName);
1696      //
1697      // // generate symbols for numeric parameters for which the value is optimized using AutoDiff
1698      // // we generate multiple symbols to balance the probability for selecting a numeric parameter in the generation of random trees
1699      // var numericConstantsFactor = 2.0;
1700      // for (int i = 0; i < numericConstantsFactor * (ProblemData.AllowedInputVariables.Count() + TargetVariables.CheckedItems.Count()); i++) {
1701      //   g.AddTerminalSymbol("θ" + i); // numeric parameter for which the value is optimized using AutoDiff
1702      // }
1703      //
1704      // // generate symbols for latent variables
1705      // for (int i = 1; i <= NumberOfLatentVariables; i++) {
1706      //   g.AddTerminalSymbol("λ" + i); // numeric parameter for which the value is optimized using AutoDiff
1707      // }
1708      //
1709      // return g;
1710    }
1711    #endregion
1712
1713
1714    #region Import & Export
1715    public void Load(IRegressionProblemData data) {
1716      Name = data.Name;
1717      Description = data.Description;
1718      ProblemData = data;
1719    }
1720
1721    public IRegressionProblemData Export() {
1722      return ProblemData;
1723    }
1724    #endregion
1725
1726
1727    // TODO: for integration we only need a part of the data that we need for optimization
1728
1729    public class OptimizationData {
1730      public readonly ISymbolicExpressionTree[] trees;
1731      public readonly string[] targetVariables;
1732      public readonly IRegressionProblemData problemData;
1733      public readonly double[][] targetValues;
1734      public readonly double[] inverseStandardDeviation;
1735      public readonly IntRange[] episodes;
1736      public readonly int numericIntegrationSteps;
1737      public readonly string[] latentVariables;
1738      public readonly string odeSolver;
1739      public readonly NodeValueLookup nodeValueLookup;
1740      public readonly int[] rows;
1741      internal readonly string[] variables;
1742
1743      public OptimizationData(ISymbolicExpressionTree[] trees, string[] targetVars, string[] inputVariables,
1744        IRegressionProblemData problemData,
1745        double[][] targetValues,
1746        IntRange[] episodes,
1747        int numericIntegrationSteps, string[] latentVariables, string odeSolver) {
1748        this.trees = trees;
1749        this.targetVariables = targetVars;
1750        this.problemData = problemData;
1751        this.targetValues = targetValues;
1752        this.variables = inputVariables;
1753        if (targetValues != null) {
1754          this.inverseStandardDeviation = new double[targetValues.Length];
1755          for (int i = 0; i < targetValues.Length; i++) {
1756            // calculate variance for each episode separately and calc the average
1757            var epStartIdx = 0;
1758            var stdevs = new List<double>();
1759            foreach (var ep in episodes) {
1760              var epValues = targetValues[i].Skip(epStartIdx).Take(ep.Size);
1761              stdevs.Add(epValues.StandardDeviation());
1762              epStartIdx += ep.Size;
1763            }
1764            inverseStandardDeviation[i] = 1.0 / stdevs.Average();
1765          }
1766        } else
1767          this.inverseStandardDeviation = Enumerable.Repeat(1.0, trees.Length).ToArray();
1768        this.episodes = episodes;
1769        this.numericIntegrationSteps = numericIntegrationSteps;
1770        this.latentVariables = latentVariables;
1771        this.odeSolver = odeSolver;
1772        this.nodeValueLookup = new NodeValueLookup(trees);
1773        this.rows = episodes.SelectMany(ep => Enumerable.Range(ep.Start, ep.Size)).ToArray();
1774      }
1775    }
1776
1777    public class NodeValueLookup {
1778      private readonly Dictionary<ISymbolicExpressionTreeNode, Tuple<double, Vector>> node2val = new Dictionary<ISymbolicExpressionTreeNode, Tuple<double, Vector>>();
1779      private readonly Dictionary<string, List<ISymbolicExpressionTreeNode>> name2nodes = new Dictionary<string, List<ISymbolicExpressionTreeNode>>();
1780      private readonly ConstantTreeNode[] constantNodes;
1781      private readonly Vector[] constantGradientVectors;
1782
1783      // private readonly Dictionary<int, ISymbolicExpressionTreeNode> paramIdx2node = new Dictionary<int, ISymbolicExpressionTreeNode>();
1784
1785      public double NodeValue(ISymbolicExpressionTreeNode node) => node2val[node].Item1;
1786      public Vector NodeGradient(ISymbolicExpressionTreeNode node) => node2val[node].Item2;
1787
1788      public NodeValueLookup(ISymbolicExpressionTree[] trees) {
1789
1790        this.constantNodes = trees.SelectMany(t => t.IterateNodesPrefix().OfType<ConstantTreeNode>()).ToArray();
1791        constantGradientVectors = new Vector[constantNodes.Length];
1792        for (int paramIdx = 0; paramIdx < constantNodes.Length; paramIdx++) {
1793          constantGradientVectors[paramIdx] = Vector.CreateIndicator(length: constantNodes.Length, idx: paramIdx);
1794
1795          var node = constantNodes[paramIdx];
1796          node2val[node] = Tuple.Create(node.Value, constantGradientVectors[paramIdx]);
1797        }
1798
1799        foreach (var tree in trees) {
1800          foreach (var node in tree.IterateNodesPrefix().Where(IsVariableNode)) {
1801            var varName = GetVariableName(node);
1802            if (!name2nodes.TryGetValue(varName, out List<ISymbolicExpressionTreeNode> nodes)) {
1803              nodes = new List<ISymbolicExpressionTreeNode>();
1804              name2nodes.Add(varName, nodes);
1805            }
1806            nodes.Add(node);
1807            SetVariableValue(varName, 0.0);  // this value is updated in the prediction loop
1808          }
1809        }
1810      }
1811
1812      public int ParameterCount => constantNodes.Length;
1813
1814      public void SetVariableValue(string variableName, double val) {
1815        SetVariableValue(variableName, val, Vector.Zero);
1816      }
1817      public Tuple<double, Vector> GetVariableValue(string variableName) {
1818        return node2val[name2nodes[variableName].First()];
1819      }
1820      public void SetVariableValue(string variableName, double val, Vector dVal) {
1821        if (name2nodes.TryGetValue(variableName, out List<ISymbolicExpressionTreeNode> nodes)) {
1822          nodes.ForEach(n => node2val[n] = Tuple.Create(val, dVal));
1823        } else {
1824          var fakeNode = new VariableTreeNode(new Variable());
1825          fakeNode.Weight = 1.0;
1826          fakeNode.VariableName = variableName;
1827          var newNodeList = new List<ISymbolicExpressionTreeNode>();
1828          newNodeList.Add(fakeNode);
1829          name2nodes.Add(variableName, newNodeList);
1830          node2val[fakeNode] = Tuple.Create(val, dVal);
1831        }
1832      }
1833
1834      internal void UpdateParamValues(double[] x) {
1835        for (int i = 0; i < x.Length; i++) {
1836          constantNodes[i].Value = x[i];
1837          node2val[constantNodes[i]] = Tuple.Create(x[i], constantGradientVectors[i]);
1838        }
1839      }
1840    }
1841  }
1842}
Note: See TracBrowser for help on using the repository browser.