Free cookie consent management tool by TermsFeed Policy Generator

source: branches/2925_AutoDiffForDynamicalModels/HeuristicLab.Problems.DynamicalSystemsModelling/3.3/Problem.cs @ 16610

Last change on this file since 16610 was 16610, checked in by gkronber, 5 years ago

#2925: added support for multiple training episodes (and log, exp functions)

File size: 79.7 KB
Line 
1#region License Information
2/* HeuristicLab
3 * Copyright (C) 2002-2018 Heuristic and Evolutionary Algorithms Laboratory (HEAL)
4 *
5 * This file is part of HeuristicLab.
6 *
7 * HeuristicLab is free software: you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation, either version 3 of the License, or
10 * (at your option) any later version.
11 *
12 * HeuristicLab is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15 * GNU General Public License for more details.
16 *
17 * You should have received a copy of the GNU General Public License
18 * along with HeuristicLab. If not, see <http://www.gnu.org/licenses/>.
19 */
20#endregion
21
22using System;
23using System.Collections.Generic;
24using System.Diagnostics;
25using System.Linq;
26using HeuristicLab.Analysis;
27using HeuristicLab.Collections;
28using HeuristicLab.Common;
29using HeuristicLab.Core;
30using HeuristicLab.Data;
31using HeuristicLab.Encodings.SymbolicExpressionTreeEncoding;
32using HeuristicLab.Optimization;
33using HeuristicLab.Parameters;
34using HeuristicLab.Persistence.Default.CompositeSerializers.Storable;
35using HeuristicLab.Problems.DataAnalysis;
36using HeuristicLab.Problems.DataAnalysis.Symbolic;
37using HeuristicLab.Problems.Instances;
38using Variable = HeuristicLab.Problems.DataAnalysis.Symbolic.Variable;
39
40namespace HeuristicLab.Problems.DynamicalSystemsModelling {
41  [Item("Dynamical Systems Modelling Problem", "TODO")]
42  [Creatable(CreatableAttribute.Categories.GeneticProgrammingProblems, Priority = 900)]
43  [StorableClass]
44  public sealed class Problem : SingleObjectiveBasicProblem<MultiEncoding>, IRegressionProblem, IProblemInstanceConsumer<IRegressionProblemData>, IProblemInstanceExporter<IRegressionProblemData> {
45    #region parameter names
46    private const string ProblemDataParameterName = "Data";
47    private const string TargetVariablesParameterName = "Target variables";
48    private const string FunctionSetParameterName = "Function set";
49    private const string MaximumLengthParameterName = "Size limit";
50    private const string MaximumParameterOptimizationIterationsParameterName = "Max. parameter optimization iterations";
51    private const string NumberOfLatentVariablesParameterName = "Number of latent variables";
52    private const string NumericIntegrationStepsParameterName = "Steps for numeric integration";
53    private const string TrainingEpisodesParameterName = "Training episodes";
54    private const string OptimizeParametersForEpisodesParameterName = "Optimize parameters for episodes";
55    private const string OdeSolverParameterName = "ODE Solver";
56    #endregion
57
58    #region Parameter Properties
59    IParameter IDataAnalysisProblem.ProblemDataParameter { get { return ProblemDataParameter; } }
60
61    public IValueParameter<IRegressionProblemData> ProblemDataParameter {
62      get { return (IValueParameter<IRegressionProblemData>)Parameters[ProblemDataParameterName]; }
63    }
64    public IValueParameter<ReadOnlyCheckedItemList<StringValue>> TargetVariablesParameter {
65      get { return (IValueParameter<ReadOnlyCheckedItemList<StringValue>>)Parameters[TargetVariablesParameterName]; }
66    }
67    public IValueParameter<ReadOnlyCheckedItemList<StringValue>> FunctionSetParameter {
68      get { return (IValueParameter<ReadOnlyCheckedItemList<StringValue>>)Parameters[FunctionSetParameterName]; }
69    }
70    public IFixedValueParameter<IntValue> MaximumLengthParameter {
71      get { return (IFixedValueParameter<IntValue>)Parameters[MaximumLengthParameterName]; }
72    }
73
74    public IFixedValueParameter<IntValue> MaximumParameterOptimizationIterationsParameter {
75      get { return (IFixedValueParameter<IntValue>)Parameters[MaximumParameterOptimizationIterationsParameterName]; }
76    }
77    public IFixedValueParameter<IntValue> NumberOfLatentVariablesParameter {
78      get { return (IFixedValueParameter<IntValue>)Parameters[NumberOfLatentVariablesParameterName]; }
79    }
80    public IFixedValueParameter<IntValue> NumericIntegrationStepsParameter {
81      get { return (IFixedValueParameter<IntValue>)Parameters[NumericIntegrationStepsParameterName]; }
82    }
83    public IValueParameter<ItemList<IntRange>> TrainingEpisodesParameter {
84      get { return (IValueParameter<ItemList<IntRange>>)Parameters[TrainingEpisodesParameterName]; }
85    }
86    public IFixedValueParameter<BoolValue> OptimizeParametersForEpisodesParameter {
87      get { return (IFixedValueParameter<BoolValue>)Parameters[OptimizeParametersForEpisodesParameterName]; }
88    }
89    public IConstrainedValueParameter<StringValue> OdeSolverParameter {
90      get { return (IConstrainedValueParameter<StringValue>)Parameters[OdeSolverParameterName]; }
91    }
92    #endregion
93
94    #region Properties
95    public IRegressionProblemData ProblemData {
96      get { return ProblemDataParameter.Value; }
97      set { ProblemDataParameter.Value = value; }
98    }
99    IDataAnalysisProblemData IDataAnalysisProblem.ProblemData { get { return ProblemData; } }
100
101    public ReadOnlyCheckedItemList<StringValue> TargetVariables {
102      get { return TargetVariablesParameter.Value; }
103    }
104
105    public ReadOnlyCheckedItemList<StringValue> FunctionSet {
106      get { return FunctionSetParameter.Value; }
107    }
108
109    public int MaximumLength {
110      get { return MaximumLengthParameter.Value.Value; }
111    }
112    public int MaximumParameterOptimizationIterations {
113      get { return MaximumParameterOptimizationIterationsParameter.Value.Value; }
114    }
115    public int NumberOfLatentVariables {
116      get { return NumberOfLatentVariablesParameter.Value.Value; }
117    }
118    public int NumericIntegrationSteps {
119      get { return NumericIntegrationStepsParameter.Value.Value; }
120    }
121    public IEnumerable<IntRange> TrainingEpisodes {
122      get { return TrainingEpisodesParameter.Value; }
123    }
124    public bool OptimizeParametersForEpisodes {
125      get { return OptimizeParametersForEpisodesParameter.Value.Value; }
126    }
127
128    public string OdeSolver {
129      get { return OdeSolverParameter.Value.Value; }
130      set {
131        var matchingValue = OdeSolverParameter.ValidValues.FirstOrDefault(v => v.Value == value);
132        if (matchingValue == null) throw new ArgumentOutOfRangeException();
133        else OdeSolverParameter.Value = matchingValue;
134      }
135    }
136
137    #endregion
138
139    public event EventHandler ProblemDataChanged;
140
141    public override bool Maximization {
142      get { return false; } // we minimize NMSE
143    }
144
145    #region item cloning and persistence
146    // persistence
147    [StorableConstructor]
148    private Problem(bool deserializing) : base(deserializing) { }
149    [StorableHook(HookType.AfterDeserialization)]
150    private void AfterDeserialization() {
151      if (!Parameters.ContainsKey(OptimizeParametersForEpisodesParameterName)) {
152        Parameters.Add(new FixedValueParameter<BoolValue>(OptimizeParametersForEpisodesParameterName, "Flag to select if parameters should be optimized globally or for each episode individually.", new BoolValue(false)));
153      }
154      RegisterEventHandlers();
155    }
156
157    // cloning
158    private Problem(Problem original, Cloner cloner)
159      : base(original, cloner) {
160      RegisterEventHandlers();
161    }
162    public override IDeepCloneable Clone(Cloner cloner) { return new Problem(this, cloner); }
163    #endregion
164
165    public Problem()
166      : base() {
167      var targetVariables = new CheckedItemList<StringValue>().AsReadOnly(); // HACK: it would be better to provide a new class derived from IDataAnalysisProblem
168      var functions = CreateFunctionSet();
169      Parameters.Add(new ValueParameter<IRegressionProblemData>(ProblemDataParameterName, "The data captured from the dynamical system. Use CSV import functionality to import data.", new RegressionProblemData()));
170      Parameters.Add(new ValueParameter<ReadOnlyCheckedItemList<StringValue>>(TargetVariablesParameterName, "Target variables (overrides setting in ProblemData)", targetVariables));
171      Parameters.Add(new ValueParameter<ReadOnlyCheckedItemList<StringValue>>(FunctionSetParameterName, "The list of allowed functions", functions));
172      Parameters.Add(new FixedValueParameter<IntValue>(MaximumLengthParameterName, "The maximally allowed length of each expression. Set to a small value (5 - 25). Default = 10", new IntValue(10)));
173      Parameters.Add(new FixedValueParameter<IntValue>(MaximumParameterOptimizationIterationsParameterName, "The maximum number of iterations for optimization of parameters (using L-BFGS). More iterations makes the algorithm slower, fewer iterations might prevent convergence in the optimization scheme. Default = 100", new IntValue(100)));
174      Parameters.Add(new FixedValueParameter<IntValue>(NumberOfLatentVariablesParameterName, "Latent variables (unobserved variables) allow us to produce expressions which are integrated up and can be used in other expressions. They are handled similarly to target variables in forward simulation / integration. The difference to target variables is that there are no data to which the calculated values of latent variables are compared. Set to a small value (0 .. 5) as necessary (default = 0)", new IntValue(0)));
175      Parameters.Add(new FixedValueParameter<IntValue>(NumericIntegrationStepsParameterName, "Number of steps in the numeric integration that are taken from one row to the next (set to 1 to 100). More steps makes the algorithm slower, less steps worsens the accuracy of the numeric integration scheme.", new IntValue(10)));
176      Parameters.Add(new ValueParameter<ItemList<IntRange>>(TrainingEpisodesParameterName, "A list of ranges that should be used for training, each range represents an independent episode. This overrides the TrainingSet parameter in ProblemData.", new ItemList<IntRange>()));
177      Parameters.Add(new FixedValueParameter<BoolValue>(OptimizeParametersForEpisodesParameterName, "Flag to select if parameters should be optimized globally or for each episode individually.", new BoolValue(false)));
178
179      var solversStr = new string[] { "HeuristicLab" /* , "CVODES" */};
180      var solvers = new ItemSet<StringValue>(
181        solversStr.Select(s => new StringValue(s).AsReadOnly())
182        );
183      Parameters.Add(new ConstrainedValueParameter<StringValue>(OdeSolverParameterName, "The solver to use for solving the initial value ODE problems", solvers, solvers.First()));
184
185      RegisterEventHandlers();
186      InitAllParameters();
187
188      // TODO: UI hangs when selecting / deselecting input variables because the encoding is updated on each item
189      // TODO: use training range as default training episode
190      // TODO: write back optimized parameters to solution?
191      // TODO: optimization of starting values for latent variables in CVODES solver
192      // TODO: allow to specify the name for the time variable in the dataset and allow variable step-sizes
193      // TODO: check grammars (input variables) after cloning
194
195    }
196
197    public override double Evaluate(Individual individual, IRandom random) {
198      var trees = individual.Values.Select(v => v.Value).OfType<ISymbolicExpressionTree>().ToArray(); // extract all trees from individual
199
200      var problemData = ProblemData;
201      var targetVars = TargetVariables.CheckedItems.OrderBy(i => i.Index).Select(i => i.Value.Value).ToArray();
202      var latentVariables = Enumerable.Range(1, NumberOfLatentVariables).Select(i => "λ" + i).ToArray(); // TODO: must coincide with the variables which are actually defined in the grammar and also for which we actually have trees
203      if (OptimizeParametersForEpisodes) {
204        throw new NotImplementedException();
205        int eIdx = 0;
206        double totalNMSE = 0.0;
207        int totalSize = 0;
208        foreach (var episode in TrainingEpisodes) {
209          // double[] optTheta;
210          double nmse = OptimizeForEpisodes(trees, problemData, targetVars, latentVariables, random, new[] { episode }, MaximumParameterOptimizationIterations, NumericIntegrationSteps, OdeSolver);
211          // individual["OptTheta_" + eIdx] = new DoubleArray(optTheta); // write back optimized parameters so that we can use them in the Analysis method
212          eIdx++;
213          totalNMSE += nmse * episode.Size;
214          totalSize += episode.Size;
215        }
216        return totalNMSE / totalSize;
217      } else {
218        // double[] optTheta;
219        double nmse = OptimizeForEpisodes(trees, problemData, targetVars, latentVariables, random, TrainingEpisodes, MaximumParameterOptimizationIterations, NumericIntegrationSteps, OdeSolver);
220        // individual["OptTheta"] = new DoubleArray(optTheta); // write back optimized parameters so that we can use them in the Analysis method
221        return nmse;
222      }
223    }
224
225    public static double OptimizeForEpisodes(
226      ISymbolicExpressionTree[] trees,
227      IRegressionProblemData problemData,
228      string[] targetVars,
229      string[] latentVariables,
230      IRandom random,
231      IEnumerable<IntRange> episodes,
232      int maxParameterOptIterations,
233      int numericIntegrationSteps,
234      string odeSolver) {
235
236      // extract constants from tree
237      var constantNodes = trees.Select(t => t.IterateNodesPrefix().OfType<ConstantTreeNode>().ToArray()).ToArray();
238      var initialTheta = constantNodes.Select(nodes => nodes.Select(n => n.Value).ToArray()).ToArray();
239
240      // optimize parameters by fitting f(x,y) to calculated differences dy/dt(t)
241      double nmse = PreTuneParameters(trees, problemData, targetVars, latentVariables, random, episodes, maxParameterOptIterations,
242        initialTheta, out double[] pretunedParameters);
243
244      // optimize parameters using integration of f(x,y) to calculate y(t)
245      nmse = OptimizeParameters(trees, problemData, targetVars, latentVariables, episodes, maxParameterOptIterations, pretunedParameters, numericIntegrationSteps, odeSolver,
246        out double[] optTheta);
247
248      if (double.IsNaN(nmse) ||
249        double.IsInfinity(nmse) ||
250        nmse > 100 * trees.Length * episodes.Sum(ep => ep.Size))
251        return 100 * trees.Length * episodes.Sum(ep => ep.Size);
252
253      // update tree nodes with optimized values
254      var paramIdx = 0;
255      for (var treeIdx = 0; treeIdx < constantNodes.Length; treeIdx++) {
256        for (int i = 0; i < constantNodes[treeIdx].Length; i++)
257          constantNodes[treeIdx][i].Value = optTheta[paramIdx++];
258      }
259      return nmse;
260    }
261
262    private static double PreTuneParameters(
263      ISymbolicExpressionTree[] trees,
264      IRegressionProblemData problemData,
265      string[] targetVars,
266      string[] latentVariables,
267      IRandom random,
268      IEnumerable<IntRange> episodes,
269      int maxParameterOptIterations,
270      double[][] initialTheta,
271      out double[] optTheta) {
272      var thetas = new List<double>();
273      double nmse = 0.0;
274      var maxTreeNmse = 100 * episodes.Sum(ep => ep.Size);
275
276      // NOTE: the order of values in parameter matches prefix order of constant nodes in trees
277      for (int treeIdx = 0; treeIdx < trees.Length; treeIdx++) {
278        var t = trees[treeIdx];
279
280        var targetValuesDiff = new List<double>();
281        foreach (var ep in episodes) {
282          var episodeRows = Enumerable.Range(ep.Start, ep.Size);
283          var targetValues = problemData.Dataset.GetDoubleValues(targetVars[treeIdx], episodeRows).ToArray();
284          targetValuesDiff.AddRange(targetValues.Skip(1).Zip(targetValues, (t1, t0) => t1 - t0));// TODO: smoothing or multi-pole);
285        }
286        var adjustedEpisodes = episodes.Select(ep => new IntRange(ep.Start, ep.End - 1)); // because we lose the last row in the differencing step
287        var myState = new OptimizationData(new[] { t },
288          targetVars,
289          problemData.AllowedInputVariables.Concat(targetVars).ToArray(),
290          problemData, new[] { targetValuesDiff.ToArray() }, adjustedEpisodes.ToArray(), -99, latentVariables, string.Empty); // TODO
291        var paramCount = myState.nodeValueLookup.ParameterCount;
292
293        optTheta = new double[0];
294        if (initialTheta[treeIdx].Length > 0) {
295          try {
296            alglib.minlmstate state;
297            alglib.minlmreport report;
298            var p = new double[initialTheta[treeIdx].Length];
299            var lowerBounds = Enumerable.Repeat(-100.0, p.Length).ToArray();
300            var upperBounds = Enumerable.Repeat(100.0, p.Length).ToArray();
301            Array.Copy(initialTheta[treeIdx], p, p.Length);
302            alglib.minlmcreatevj(targetValuesDiff.Count, p, out state);
303            alglib.minlmsetcond(state, 0.0, 0.0, 0.0, maxParameterOptIterations);
304            alglib.minlmsetbc(state, lowerBounds, upperBounds);
305            // alglib.minlmsetgradientcheck(state, 1.0e-3);
306            alglib.minlmoptimize(state, EvaluateObjectiveVector, EvaluateObjectiveVectorAndJacobian, null, myState);
307
308            alglib.minlmresults(state, out optTheta, out report);
309            if (report.terminationtype < 0) { optTheta = initialTheta[treeIdx]; }
310          } catch (alglib.alglibexception) {
311            optTheta = initialTheta[treeIdx];
312          }
313        }
314        var tree_nmse = EvaluateMSE(optTheta, myState);
315        if (double.IsNaN(tree_nmse) || double.IsInfinity(tree_nmse) || tree_nmse > maxTreeNmse) {
316          nmse += maxTreeNmse;
317          thetas.AddRange(initialTheta[treeIdx]);
318        } else {
319          nmse += tree_nmse;
320          thetas.AddRange(optTheta);
321        }
322      } // foreach tree
323      optTheta = thetas.ToArray();
324
325      return nmse;
326    }
327
328
329    // similar to above but this time we integrate and optimize all parameters for all targets concurrently
330    private static double OptimizeParameters(ISymbolicExpressionTree[] trees, IRegressionProblemData problemData, string[] targetVars, string[] latentVariables,
331      IEnumerable<IntRange> episodes, int maxParameterOptIterations, double[] initialTheta, int numericIntegrationSteps, string odeSolver, out double[] optTheta) {
332      var rowsForDataExtraction = episodes.SelectMany(e => Enumerable.Range(e.Start, e.Size)).ToArray();
333      var targetValues = new double[trees.Length][];
334      for (int treeIdx = 0; treeIdx < trees.Length; treeIdx++) {
335        var t = trees[treeIdx];
336
337        targetValues[treeIdx] = problemData.Dataset.GetDoubleValues(targetVars[treeIdx], rowsForDataExtraction).ToArray();
338      }
339
340      var myState = new OptimizationData(trees, targetVars, problemData.AllowedInputVariables.ToArray(), problemData, targetValues, episodes.ToArray(), numericIntegrationSteps, latentVariables, odeSolver);
341      optTheta = initialTheta;
342
343      if (initialTheta.Length > 0) {
344        var lowerBounds = Enumerable.Repeat(-100.0, initialTheta.Length).ToArray();
345        var upperBounds = Enumerable.Repeat(100.0, initialTheta.Length).ToArray();
346        try {
347          alglib.minlmstate state;
348          alglib.minlmreport report;
349          alglib.minlmcreatevj(rowsForDataExtraction.Length * trees.Length, initialTheta, out state);
350          alglib.minlmsetbc(state, lowerBounds, upperBounds);
351          alglib.minlmsetcond(state, 0.0, 0.0, 0.0, maxParameterOptIterations);
352          // alglib.minlmsetgradientcheck(state, 1.0e-3);
353          alglib.minlmoptimize(state, IntegrateAndEvaluateObjectiveVector, IntegrateAndEvaluateObjectiveVectorAndJacobian, null, myState);
354
355          alglib.minlmresults(state, out optTheta, out report);
356
357          if (report.terminationtype < 0) {
358            // there was a problem: reset theta and evaluate for inital values
359            optTheta = initialTheta;
360          }
361        } catch (alglib.alglibexception) {
362          optTheta = initialTheta;
363        }
364      }
365      var nmse = EvaluateIntegratedMSE(optTheta, myState);
366      var maxNmse = 100 * targetValues.Length * rowsForDataExtraction.Length;
367      if (double.IsNaN(nmse) || double.IsInfinity(nmse) || nmse > maxNmse) nmse = maxNmse;
368      return nmse;
369    }
370
371
372    // helper
373    public static double EvaluateMSE(double[] x, OptimizationData optimizationData) {
374      var fi = new double[optimizationData.rows.Count()];
375      EvaluateObjectiveVector(x, fi, optimizationData);
376      return fi.Sum(fii => fii * fii) / fi.Length;
377    }
378    public static void EvaluateObjectiveVector(double[] x, double[] fi, object optimizationData) { EvaluateObjectiveVector(x, fi, (OptimizationData)optimizationData); } // for alglib
379    public static void EvaluateObjectiveVector(double[] x, double[] fi, OptimizationData optimizationData) {
380      var rows = optimizationData.rows;
381      var problemData = optimizationData.problemData;
382      var nodeValueLookup = optimizationData.nodeValueLookup;
383      var ds = problemData.Dataset;
384      var variables = optimizationData.variables;
385
386      nodeValueLookup.UpdateParamValues(x);
387
388      int outputIdx = 0;
389      for (int trainIdx = 0; trainIdx < rows.Length; trainIdx++) {
390        // update variable values
391        foreach (var variable in variables) {
392          nodeValueLookup.SetVariableValue(variable, ds.GetDoubleValue(variable, rows[trainIdx])); // TODO: perf
393        }
394        // interpret all trees
395        for (int treeIdx = 0; treeIdx < optimizationData.trees.Length; treeIdx++) {
396          var tree = optimizationData.trees[treeIdx];
397          var pred = InterpretRec(tree.Root.GetSubtree(0).GetSubtree(0), nodeValueLookup);
398          var y = optimizationData.targetValues[treeIdx][trainIdx];
399          fi[outputIdx++] = y - pred;
400        }
401      }
402    }
403
404    public static void EvaluateObjectiveVectorAndJacobian(double[] x, double[] fi, double[,] jac, object optimizationData) { EvaluateObjectiveVectorAndJacobian(x, fi, jac, (OptimizationData)optimizationData); } // for alglib
405    public static void EvaluateObjectiveVectorAndJacobian(double[] x, double[] fi, double[,] jac, OptimizationData optimizationData) {
406      // extract variable values from dataset
407      var variableValues = new Dictionary<string, Tuple<double, Vector>>();
408      var problemData = optimizationData.problemData;
409      var ds = problemData.Dataset;
410      var rows = optimizationData.rows;
411      var variables = optimizationData.variables;
412
413      var nodeValueLookup = optimizationData.nodeValueLookup;
414      nodeValueLookup.UpdateParamValues(x);
415
416      int termIdx = 0;
417
418      for (int trainIdx = 0; trainIdx < rows.Length; trainIdx++) {
419        // update variable values
420        foreach (var variable in variables) {
421          nodeValueLookup.SetVariableValue(variable, ds.GetDoubleValue(variable, rows[trainIdx])); // TODO: perf
422        }
423
424        var calculatedVariables = optimizationData.targetVariables;
425
426        var trees = optimizationData.trees;
427        for (int i = 0; i < trees.Length; i++) {
428          var tree = trees[i];
429          var targetVarName = calculatedVariables[i];
430
431          double f; Vector g;
432          InterpretRec(tree.Root.GetSubtree(0).GetSubtree(0), nodeValueLookup, out f, out g);
433
434          var y = optimizationData.targetValues[i][trainIdx];
435          fi[termIdx] = (y - f) * optimizationData.inverseStandardDeviation[i]; // scale of NMSE
436          if (jac != null && g != Vector.Zero) for (int j = 0; j < g.Length; j++) jac[termIdx, j] = -g[j] * optimizationData.inverseStandardDeviation[i];
437
438          termIdx++;
439        }
440      }
441
442    }
443
444    // helper
445    public static double EvaluateIntegratedMSE(double[] x, OptimizationData optimizationData) {
446      var fi = new double[optimizationData.rows.Count() * optimizationData.targetVariables.Length];
447      IntegrateAndEvaluateObjectiveVector(x, fi, optimizationData);
448      return fi.Sum(fii => fii * fii) / fi.Length;
449    }
450    public static void IntegrateAndEvaluateObjectiveVector(double[] x, double[] fi, object optimizationData) { IntegrateAndEvaluateObjectiveVector(x, fi, (OptimizationData)optimizationData); } // for alglib
451    public static void IntegrateAndEvaluateObjectiveVector(double[] x, double[] fi, OptimizationData optimizationData) {
452      IntegrateAndEvaluateObjectiveVectorAndJacobian(x, fi, null, optimizationData);
453    }
454
455    public static void IntegrateAndEvaluateObjectiveVectorAndJacobian(double[] x, double[] fi, double[,] jac, object optimizationData) { IntegrateAndEvaluateObjectiveVectorAndJacobian(x, fi, jac, (OptimizationData)optimizationData); } // for alglib
456    public static void IntegrateAndEvaluateObjectiveVectorAndJacobian(double[] x, double[] fi, double[,] jac, OptimizationData optimizationData) {
457      var rows = optimizationData.rows.ToArray();
458      var problemData = optimizationData.problemData;
459      var nodeValueLookup = optimizationData.nodeValueLookup;
460      var ds = problemData.Dataset;
461      int outputIdx = 0;
462
463      nodeValueLookup.UpdateParamValues(x);
464
465      Integrate(optimizationData, fi, jac);
466      var trees = optimizationData.trees;
467
468      // update result with error
469      for (int trainIdx = 0; trainIdx < rows.Length; trainIdx++) {
470        for (int i = 0; i < trees.Length; i++) {
471          var tree = trees[i];
472          var y = optimizationData.targetValues[i][trainIdx];
473          fi[outputIdx] = (y - fi[outputIdx]) * optimizationData.inverseStandardDeviation[i];  // scale for normalized squared error
474          if (jac != null) for (int j = 0; j < x.Length; j++) jac[outputIdx, j] = -jac[outputIdx, j] * optimizationData.inverseStandardDeviation[i];
475          outputIdx++;
476        }
477      }
478    }
479
480    public override void Analyze(Individual[] individuals, double[] qualities, ResultCollection results, IRandom random) {
481      base.Analyze(individuals, qualities, results, random);
482
483      if (!results.ContainsKey("Prediction (training)")) {
484        results.Add(new Result("Prediction (training)", typeof(ReadOnlyItemList<DataTable>)));
485      }
486      if (!results.ContainsKey("Prediction (test)")) {
487        results.Add(new Result("Prediction (test)", typeof(ReadOnlyItemList<DataTable>)));
488      }
489      if (!results.ContainsKey("Models")) {
490        results.Add(new Result("Models", typeof(VariableCollection)));
491      }
492      if (!results.ContainsKey("SNMSE")) {
493        results.Add(new Result("SNMSE", typeof(DoubleValue)));
494      }
495      if (!results.ContainsKey("Solution")) {
496        results.Add(new Result("Solution", typeof(Solution)));
497      }
498      if (!results.ContainsKey("Squared error and gradient")) {
499        results.Add(new Result("Squared error and gradient", typeof(DataTable)));
500      }
501
502      var bestIndividualAndQuality = this.GetBestIndividual(individuals, qualities);
503      var trees = bestIndividualAndQuality.Item1.Values.Select(v => v.Value).OfType<ISymbolicExpressionTree>().ToArray(); // extract all trees from individual
504
505      results["SNMSE"].Value = new DoubleValue(bestIndividualAndQuality.Item2);
506
507      var problemData = ProblemData;
508      var targetVars = TargetVariables.CheckedItems.OrderBy(i => i.Index).Select(i => i.Value.Value).ToArray();
509      var latentVariables = Enumerable.Range(1, NumberOfLatentVariables).Select(i => "λ" + i).ToArray(); // TODO: must coincide with the variables which are actually defined in the grammar and also for which we actually have trees
510
511      var trainingList = new ItemList<DataTable>();
512
513      if (OptimizeParametersForEpisodes) {
514        throw new NotSupportedException();
515        var eIdx = 0;
516        var trainingPredictions = new List<Tuple<double, Vector>[][]>();
517        foreach (var episode in TrainingEpisodes) {
518          var episodes = new[] { episode };
519          var optimizationData = new OptimizationData(trees, targetVars, problemData.AllowedInputVariables.ToArray(), problemData, null, episodes, NumericIntegrationSteps, latentVariables, OdeSolver);
520          var trainingPrediction = Integrate(optimizationData).ToArray();
521          trainingPredictions.Add(trainingPrediction);
522          eIdx++;
523        }
524
525        // only for target values
526        var trainingRows = TrainingEpisodes.SelectMany(e => Enumerable.Range(e.Start, e.End - e.Start));
527        for (int colIdx = 0; colIdx < targetVars.Length; colIdx++) {
528          var targetVar = targetVars[colIdx];
529          var trainingDataTable = new DataTable(targetVar + " prediction (training)");
530          var actualValuesRow = new DataRow(targetVar, "The values of " + targetVar, problemData.Dataset.GetDoubleValues(targetVar, trainingRows));
531          var predictedValuesRow = new DataRow(targetVar + " pred.", "Predicted values for " + targetVar, trainingPredictions.SelectMany(arr => arr.Select(row => row[colIdx].Item1)).ToArray());
532          trainingDataTable.Rows.Add(actualValuesRow);
533          trainingDataTable.Rows.Add(predictedValuesRow);
534          trainingList.Add(trainingDataTable);
535        }
536        results["Prediction (training)"].Value = trainingList.AsReadOnly();
537
538
539        var models = new VariableCollection();
540
541        foreach (var tup in targetVars.Zip(trees, Tuple.Create)) {
542          var targetVarName = tup.Item1;
543          var tree = tup.Item2;
544
545          var origTreeVar = new HeuristicLab.Core.Variable(targetVarName + "(original)");
546          origTreeVar.Value = (ISymbolicExpressionTree)tree.Clone();
547          models.Add(origTreeVar);
548        }
549        results["Models"].Value = models;
550      } else {
551        var optimizationData = new OptimizationData(trees, targetVars, problemData.AllowedInputVariables.ToArray(), problemData, null, TrainingEpisodes.ToArray(), NumericIntegrationSteps, latentVariables, OdeSolver);
552        var trainingPrediction = Integrate(optimizationData).ToArray();
553
554
555        var numParams = optimizationData.nodeValueLookup.ParameterCount;
556        // for target values and latent variables
557        var trainingRows = optimizationData.rows;
558        for (int colIdx = 0; colIdx < trees.Length; colIdx++) {
559          // is target variable
560          if (colIdx < targetVars.Length) {
561            var targetVar = targetVars[colIdx];
562            var trainingDataTable = new DataTable(targetVar + " prediction (training)");
563            var actualValuesRow = new DataRow(targetVar, "The values of " + targetVar, problemData.Dataset.GetDoubleValues(targetVar, trainingRows));
564            var predictedValuesRow = new DataRow(targetVar + " pred.", "Predicted values for " + targetVar, trainingPrediction.Select(arr => arr[colIdx].Item1).ToArray());
565            trainingDataTable.Rows.Add(actualValuesRow);
566            trainingDataTable.Rows.Add(predictedValuesRow);
567
568            for (int paramIdx = 0; paramIdx < numParams; paramIdx++) {
569              var paramSensitivityRow = new DataRow($"∂{targetVar}/∂θ{paramIdx}", $"Sensitivities of parameter {paramIdx}", trainingPrediction.Select(arr => arr[colIdx].Item2[paramIdx]).ToArray());
570              paramSensitivityRow.VisualProperties.SecondYAxis = true;
571              trainingDataTable.Rows.Add(paramSensitivityRow);
572            }
573            trainingList.Add(trainingDataTable);
574          } else {
575            var latentVar = latentVariables[colIdx - targetVars.Length];
576            var trainingDataTable = new DataTable(latentVar + " prediction (training)");
577            var predictedValuesRow = new DataRow(latentVar + " pred.", "Predicted values for " + latentVar, trainingPrediction.Select(arr => arr[colIdx].Item1).ToArray());
578            var emptyRow = new DataRow(latentVar);
579            trainingDataTable.Rows.Add(emptyRow);
580            trainingDataTable.Rows.Add(predictedValuesRow);
581            trainingList.Add(trainingDataTable);
582          }
583        }
584
585        var errorTable = new DataTable("Squared error and gradient");
586        var seRow = new DataRow("Squared error");
587        var gradientRows = Enumerable.Range(0, numParams).Select(i => new DataRow($"∂SE/∂θ{i}")).ToArray();
588        errorTable.Rows.Add(seRow);
589        foreach (var gRow in gradientRows) {
590          gRow.VisualProperties.SecondYAxis = true;
591          errorTable.Rows.Add(gRow);
592        }
593        var targetValues = targetVars.Select(v => problemData.Dataset.GetDoubleValues(v, trainingRows).ToArray()).ToArray();
594        int r = 0;
595
596        foreach (var y_pred in trainingPrediction) {
597          // calculate objective function gradient
598          double f_i = 0.0;
599          Vector g_i = Vector.CreateNew(new double[numParams]);
600          for (int colIdx = 0; colIdx < targetVars.Length; colIdx++) {
601            var y_pred_f = y_pred[colIdx].Item1;
602            var y = targetValues[colIdx][r];
603
604            var res = (y - y_pred_f);
605            var ressq = res * res;
606            f_i += ressq * optimizationData.inverseStandardDeviation[colIdx];
607            g_i.Add(y_pred[colIdx].Item2.Scale(-2.0 * res * optimizationData.inverseStandardDeviation[colIdx]));
608          }
609          seRow.Values.Add(f_i);
610          for (int j = 0; j < g_i.Length; j++) gradientRows[j].Values.Add(g_i[j]);
611          r++;
612        }
613        results["Squared error and gradient"].Value = errorTable;
614
615        // TODO: DRY for training and test
616        var testList = new ItemList<DataTable>();
617        var testRows = ProblemData.TestIndices.ToArray();
618        var testOptimizationData = new OptimizationData(trees, targetVars, problemData.AllowedInputVariables.ToArray(), problemData, null, new IntRange[] { ProblemData.TestPartition }, NumericIntegrationSteps, latentVariables, OdeSolver);
619        var testPrediction = Integrate(testOptimizationData).ToArray();
620
621        for (int colIdx = 0; colIdx < trees.Length; colIdx++) {
622          // is target variable
623          if (colIdx < targetVars.Length) {
624            var targetVar = targetVars[colIdx];
625            var testDataTable = new DataTable(targetVar + " prediction (test)");
626            var actualValuesRow = new DataRow(targetVar, "The values of " + targetVar, problemData.Dataset.GetDoubleValues(targetVar, testRows));
627            var predictedValuesRow = new DataRow(targetVar + " pred.", "Predicted values for " + targetVar, testPrediction.Select(arr => arr[colIdx].Item1).ToArray());
628            testDataTable.Rows.Add(actualValuesRow);
629            testDataTable.Rows.Add(predictedValuesRow);
630            testList.Add(testDataTable);
631
632          } else {
633            var latentVar = latentVariables[colIdx - targetVars.Length];
634            var testDataTable = new DataTable(latentVar + " prediction (test)");
635            var predictedValuesRow = new DataRow(latentVar + " pred.", "Predicted values for " + latentVar, testPrediction.Select(arr => arr[colIdx].Item1).ToArray());
636            var emptyRow = new DataRow(latentVar);
637            testDataTable.Rows.Add(emptyRow);
638            testDataTable.Rows.Add(predictedValuesRow);
639            testList.Add(testDataTable);
640          }
641        }
642
643        results["Prediction (training)"].Value = trainingList.AsReadOnly();
644        results["Prediction (test)"].Value = testList.AsReadOnly();
645
646
647        #region simplification of models
648        // TODO the dependency of HeuristicLab.Problems.DataAnalysis.Symbolic is not ideal
649        var models = new VariableCollection();    // to store target var names and original version of tree
650
651        var clonedTrees = new List<ISymbolicExpressionTree>();
652        for (int idx = 0; idx < trees.Length; idx++) {
653          clonedTrees.Add((ISymbolicExpressionTree)trees[idx].Clone());
654        }
655        var ds = problemData.Dataset;
656        var newProblemData = new RegressionProblemData((IDataset)ds.Clone(), problemData.AllowedInputVariables, problemData.TargetVariable);
657        results["Solution"].Value = new Solution(clonedTrees.ToArray(),
658                   // optTheta,
659                   newProblemData,
660                   targetVars,
661                   latentVariables,
662                   TrainingEpisodes,
663                   OdeSolver,
664                   NumericIntegrationSteps);
665
666
667        for (int idx = 0; idx < trees.Length; idx++) {
668          var varName = string.Empty;
669          if (idx < targetVars.Length) {
670            varName = targetVars[idx];
671          } else {
672            varName = latentVariables[idx - targetVars.Length];
673          }
674          var tree = trees[idx];
675
676          var origTreeVar = new HeuristicLab.Core.Variable(varName + "(original)");
677          origTreeVar.Value = (ISymbolicExpressionTree)tree.Clone();
678          models.Add(origTreeVar);
679          var simplifiedTreeVar = new HeuristicLab.Core.Variable(varName + "(simplified)");
680          simplifiedTreeVar.Value = TreeSimplifier.Simplify(tree);
681          models.Add(simplifiedTreeVar);
682
683        }
684
685        results["Models"].Value = models;
686        #endregion
687      }
688    }
689
690    #region interpretation
691
692    // the following uses auto-diff to calculate the gradient w.r.t. the parameters forward in time.
693    // this is basically the method described in Gronwall T. Note on the derivatives with respect to a parameter of the solutions of a system of differential equations. Ann. Math. 1919;20:292–296.
694
695    // a comparison of three potential calculation methods for the gradient is given in:
696    // Sengupta, B., Friston, K. J., & Penny, W. D. (2014). Efficient gradient computation for dynamical models. Neuroimage, 98(100), 521–527. http://doi.org/10.1016/j.neuroimage.2014.04.040
697    // "Our comparison establishes that the adjoint method is computationally more efficient for numerical estimation of parametric gradients
698    // for state-space models — both linear and non-linear, as in the case of a dynamical causal model (DCM)"
699
700    // for a solver with the necessary features see: https://computation.llnl.gov/projects/sundials/cvodes
701
702    public static IEnumerable<Tuple<double, Vector>[]> Integrate(OptimizationData optimizationData) {
703      var nTargets = optimizationData.targetVariables.Length;
704      var n = optimizationData.rows.Length * optimizationData.targetVariables.Length;
705      var d = optimizationData.nodeValueLookup.ParameterCount;
706      double[] fi = new double[n];
707      double[,] jac = new double[n, d];
708      Integrate(optimizationData, fi, jac);
709      for (int i = 0; i < optimizationData.rows.Length; i++) {
710        var res = new Tuple<double, Vector>[nTargets];
711        for (int j = 0; j < nTargets; j++) {
712          res[j] = Tuple.Create(fi[i * nTargets + j], Vector.CreateFromMatrixRow(jac, i * nTargets + j));
713        }
714        yield return res;
715      }
716    }
717
718    public static void Integrate(OptimizationData optimizationData, double[] fi, double[,] jac) {
719      var trees = optimizationData.trees;
720      var dataset = optimizationData.problemData.Dataset;
721      var inputVariables = optimizationData.problemData.AllowedInputVariables.ToArray();
722      var targetVariables = optimizationData.targetVariables;
723      var latentVariables = optimizationData.latentVariables;
724      var episodes = optimizationData.episodes;
725      var odeSolver = optimizationData.odeSolver;
726      var numericIntegrationSteps = optimizationData.numericIntegrationSteps;
727      var calculatedVariables = targetVariables.Concat(latentVariables).ToArray(); // TODO: must conincide with the order of trees in the encoding
728
729
730
731      var nodeValues = optimizationData.nodeValueLookup;
732
733      // TODO: numericIntegrationSteps is only relevant for the HeuristicLab solver
734      var outputRowIdx = 0;
735      var episodeIdx = 0;
736      foreach (var episode in optimizationData.episodes) {
737        var rows = Enumerable.Range(episode.Start, episode.End - episode.Start).ToArray();
738
739        var t0 = rows.First();
740
741        // initialize values for inputs and targets from dataset
742        foreach (var varName in inputVariables) {
743          var y0 = dataset.GetDoubleValue(varName, t0);
744          nodeValues.SetVariableValue(varName, y0, Vector.Zero);
745        }
746        foreach (var varName in targetVariables) {
747          var y0 = dataset.GetDoubleValue(varName, t0);
748          nodeValues.SetVariableValue(varName, y0, Vector.Zero);
749
750          // output starting value
751          fi[outputRowIdx] = y0;
752          Vector.Zero.CopyTo(jac, outputRowIdx);
753
754          outputRowIdx++;
755        }
756
757        { // CODE BELOW DOESN'T WORK ANYMORE
758          // if (latentVariables.Length > 0) throw new NotImplementedException();
759          //
760          // // add value entries for latent variables which are also integrated
761          // // initial values are at the end of the parameter vector
762          // // separate initial values for each episode
763          // var initialValueIdx = parameterValues.Length - episodes.Count() * latentVariables.Length + episodeIdx * latentVariables.Length;
764          // foreach (var latentVar in latentVariables) {
765          //   var arr = new double[parameterValues.Length]; // backing array
766          //   arr[initialValueIdx] = 1.0;
767          //   var g = new Vector(arr);
768          //   nodeValues.SetVariableValue(latentVar, parameterValues[initialValueIdx], g); // we don't have observations for latent variables therefore we optimize the initial value for each episode
769          //   initialValueIdx++;
770          // }
771        }
772
773        var prevT = t0; // TODO: here we should use a variable for t if it is available. Right now we assume equidistant measurements.
774        foreach (var t in rows.Skip(1)) {
775          if (odeSolver == "HeuristicLab")
776            IntegrateHL(trees, calculatedVariables, nodeValues, numericIntegrationSteps); // integrator updates nodeValues
777          else if (odeSolver == "CVODES")
778            throw new NotImplementedException();
779          // IntegrateCVODES(trees, calculatedVariables, variableValues, parameterValues, t - prevT);
780          else throw new InvalidOperationException("Unknown ODE solver " + odeSolver);
781          prevT = t;
782
783          for (int i = 0; i < calculatedVariables.Length; i++) {
784            var targetVar = calculatedVariables[i];
785            var yt = nodeValues.GetVariableValue(targetVar);
786
787            // fill up remaining rows with last valid value if there are invalid values
788            if (double.IsNaN(yt.Item1) || double.IsInfinity(yt.Item1)) {
789              for (; outputRowIdx < fi.Length; outputRowIdx++) {
790                var prevIdx = outputRowIdx - calculatedVariables.Length;
791                fi[outputRowIdx] = fi[prevIdx]; // current <- prev
792                if (jac != null) for (int j = 0; j < jac.GetLength(1); j++) jac[outputRowIdx, j] = jac[prevIdx, j];
793              }
794              return;
795            };
796
797            fi[outputRowIdx] = yt.Item1;
798            var g = yt.Item2;
799            g.CopyTo(jac, outputRowIdx);
800            outputRowIdx++;
801          }
802
803          // update for next time step (only the inputs)
804          foreach (var varName in inputVariables) {
805            nodeValues.SetVariableValue(varName, dataset.GetDoubleValue(varName, t), Vector.Zero);
806          }
807        }
808        episodeIdx++;
809      }
810    }
811
812    #region CVODES
813
814    /*
815    /// <summary>
816    ///  Here we use CVODES to solve the ODE. Forward sensitivities are used to calculate the gradient for parameter optimization
817    /// </summary>
818    /// <param name="trees">Each equation in the ODE represented as a tree</param>
819    /// <param name="calculatedVariables">The names of the calculated variables</param>
820    /// <param name="variableValues">The start values of the calculated variables as well as their sensitivites over parameters</param>
821    /// <param name="parameterValues">The current parameter values</param>
822    /// <param name="t">The time t up to which we need to integrate.</param>
823    private static void IntegrateCVODES(
824      ISymbolicExpressionTree[] trees, // f(y,p) in tree representation
825      string[] calculatedVariables, // names of elements of y
826      Dictionary<string, Tuple<double, Vector>> variableValues,  //  y (intput and output) input: y(t0), output: y(t0+t)
827      double[] parameterValues, // p
828      double t // duration t for which we want to integrate
829      ) {
830
831      // the RHS of the ODE
832      // dy/dt = f(y_t,x_t,p)
833      CVODES.CVRhsFunc f = CreateOdeRhs(trees, calculatedVariables, parameterValues);
834      // the Jacobian ∂f/∂y
835      CVODES.CVDlsJacFunc jac = CreateJac(trees, calculatedVariables, parameterValues);
836
837      // the RHS for the forward sensitivities (∂f/∂y)s_i(t) + ∂f/∂p_i
838      CVODES.CVSensRhsFn sensF = CreateSensitivityRhs(trees, calculatedVariables, parameterValues);
839
840      // setup solver
841      int numberOfEquations = trees.Length;
842      IntPtr y = IntPtr.Zero;
843      IntPtr cvode_mem = IntPtr.Zero;
844      IntPtr A = IntPtr.Zero;
845      IntPtr yS0 = IntPtr.Zero;
846      IntPtr linearSolver = IntPtr.Zero;
847      var ns = parameterValues.Length; // number of parameters
848
849      try {
850        y = CVODES.N_VNew_Serial(numberOfEquations);
851        // init y to current values of variables
852        // y must be initialized before calling CVodeInit
853        for (int i = 0; i < calculatedVariables.Length; i++) {
854          CVODES.NV_Set_Ith_S(y, i, variableValues[calculatedVariables[i]].Item1);
855        }
856
857        cvode_mem = CVODES.CVodeCreate(CVODES.MultistepMethod.CV_ADAMS, CVODES.NonlinearSolverIteration.CV_FUNCTIONAL);
858
859        var flag = CVODES.CVodeInit(cvode_mem, f, 0.0, y);
860        Debug.Assert(CVODES.CV_SUCCESS == flag);
861
862        double relTol = 1.0e-2;
863        double absTol = 1.0;
864        flag = CVODES.CVodeSStolerances(cvode_mem, relTol, absTol);  // TODO: probably need to adjust absTol per variable
865        Debug.Assert(CVODES.CV_SUCCESS == flag);
866
867        A = CVODES.SUNDenseMatrix(numberOfEquations, numberOfEquations);
868        Debug.Assert(A != IntPtr.Zero);
869
870        linearSolver = CVODES.SUNDenseLinearSolver(y, A);
871        Debug.Assert(linearSolver != IntPtr.Zero);
872
873        flag = CVODES.CVDlsSetLinearSolver(cvode_mem, linearSolver, A);
874        Debug.Assert(CVODES.CV_SUCCESS == flag);
875
876        flag = CVODES.CVDlsSetJacFn(cvode_mem, jac);
877        Debug.Assert(CVODES.CV_SUCCESS == flag);
878
879        yS0 = CVODES.N_VCloneVectorArray_Serial(ns, y); // clone the output vector for each parameter
880        unsafe {
881          // set to initial sensitivities supplied by caller
882          for (int pIdx = 0; pIdx < ns; pIdx++) {
883            var yS0_i = *((IntPtr*)yS0.ToPointer() + pIdx);
884            for (var varIdx = 0; varIdx < calculatedVariables.Length; varIdx++) {
885              CVODES.NV_Set_Ith_S(yS0_i, varIdx, variableValues[calculatedVariables[varIdx]].Item2[pIdx]);
886            }
887          }
888        }
889
890        flag = CVODES.CVodeSensInit(cvode_mem, ns, CVODES.CV_SIMULTANEOUS, sensF, yS0);
891        Debug.Assert(CVODES.CV_SUCCESS == flag);
892
893        flag = CVODES.CVodeSensEEtolerances(cvode_mem);
894        Debug.Assert(CVODES.CV_SUCCESS == flag);
895
896        // make one forward integration step
897        double tout = 0.0; // first output time
898        flag = CVODES.CVode(cvode_mem, t, y, ref tout, CVODES.CV_NORMAL);
899        if (flag == CVODES.CV_SUCCESS) {
900          Debug.Assert(t == tout);
901
902          // get sensitivities
903          flag = CVODES.CVodeGetSens(cvode_mem, ref tout, yS0);
904          Debug.Assert(CVODES.CV_SUCCESS == flag);
905
906          // update variableValues based on integration results
907          for (int varIdx = 0; varIdx < calculatedVariables.Length; varIdx++) {
908            var yi = CVODES.NV_Get_Ith_S(y, varIdx);
909            var gArr = new double[parameterValues.Length];
910            for (var pIdx = 0; pIdx < parameterValues.Length; pIdx++) {
911              unsafe {
912                var yS0_pi = *((IntPtr*)yS0.ToPointer() + pIdx);
913                gArr[pIdx] = CVODES.NV_Get_Ith_S(yS0_pi, varIdx);
914              }
915            }
916            variableValues[calculatedVariables[varIdx]] = Tuple.Create(yi, new Vector(gArr));
917          }
918        } else {
919          variableValues.Clear();   // indicate problems by not returning new values
920        }
921
922        // cleanup all allocated objects
923      } finally {
924        if (y != IntPtr.Zero) CVODES.N_VDestroy_Serial(y);
925        if (cvode_mem != IntPtr.Zero) CVODES.CVodeFree(ref cvode_mem);
926        if (linearSolver != IntPtr.Zero) CVODES.SUNLinSolFree(linearSolver);
927        if (A != IntPtr.Zero) CVODES.SUNMatDestroy(A);
928        if (yS0 != IntPtr.Zero) CVODES.N_VDestroyVectorArray_Serial(yS0, ns);
929      }
930    }
931
932
933    private static CVODES.CVRhsFunc CreateOdeRhs(
934      ISymbolicExpressionTree[] trees,
935      string[] calculatedVariables,
936      double[] parameterValues) {
937      // we don't need to calculate a gradient here
938      return (double t,
939              IntPtr y, // N_Vector, current value of y (input)
940              IntPtr ydot, // N_Vector (calculated value of y' (output)
941              IntPtr user_data // optional user data, (unused here)
942              ) => {
943                // TODO: perf
944                var nodeValues = new Dictionary<ISymbolicExpressionTreeNode, Tuple<double, Vector>>();
945
946                int pIdx = 0;
947                foreach (var tree in trees) {
948                  foreach (var n in tree.IterateNodesPrefix()) {
949                    if (IsConstantNode(n)) {
950                      nodeValues.Add(n, Tuple.Create(parameterValues[pIdx], Vector.Zero)); // here we do not need a gradient
951                      pIdx++;
952                    } else if (n.SubtreeCount == 0) {
953                      // for variables and latent variables get the value from variableValues
954                      var varName = n.Symbol.Name;
955                      var varIdx = Array.IndexOf(calculatedVariables, varName); // TODO: perf!
956                      if (varIdx < 0) throw new InvalidProgramException();
957                      var y_i = CVODES.NV_Get_Ith_S(y, (long)varIdx);
958                      nodeValues.Add(n, Tuple.Create(y_i, Vector.Zero)); // no gradient needed
959                    }
960                  }
961                }
962                for (int i = 0; i < trees.Length; i++) {
963                  var tree = trees[i];
964                  var res_i = InterpretRec(tree.Root.GetSubtree(0).GetSubtree(0), nodeValues);
965                  CVODES.NV_Set_Ith_S(ydot, i, res_i.Item1);
966                }
967                return 0;
968              };
969    }
970
971    private static CVODES.CVDlsJacFunc CreateJac(
972      ISymbolicExpressionTree[] trees,
973      string[] calculatedVariables,
974      double[] parameterValues) {
975
976      return (
977        double t, // current time (input)
978        IntPtr y, // N_Vector, current value of y (input)
979        IntPtr fy, // N_Vector, current value of f (input)
980        IntPtr Jac, // SUNMatrix ∂f/∂y (output, rows i contains are ∂f_i/∂y vector)
981        IntPtr user_data, // optional (unused here)
982        IntPtr tmp1, // N_Vector, optional (unused here)
983        IntPtr tmp2, // N_Vector, optional (unused here)
984        IntPtr tmp3 // N_Vector, optional (unused here)
985      ) => {
986        // here we need to calculate partial derivatives for the calculated variables y
987        var nodeValues = new Dictionary<ISymbolicExpressionTreeNode, Tuple<double, Vector>>();
988        int pIdx = 0;
989        foreach (var tree in trees) {
990          foreach (var n in tree.IterateNodesPrefix()) {
991            if (IsConstantNode(n)) {
992              nodeValues.Add(n, Tuple.Create(parameterValues[pIdx], Vector.Zero)); // here we need a gradient over y which is zero for parameters
993              pIdx++;
994            } else if (n.SubtreeCount == 0) {
995              // for variables and latent variables we use values supplied in y and init gradient vectors accordingly
996              var varName = n.Symbol.Name;
997              var varIdx = Array.IndexOf(calculatedVariables, varName); // TODO: perf!
998              if (varIdx < 0) throw new InvalidProgramException();
999
1000              var y_i = CVODES.NV_Get_Ith_S(y, (long)varIdx);
1001              var gArr = new double[CVODES.NV_LENGTH_S(y)]; // backing array
1002              gArr[varIdx] = 1.0;
1003              var g = new Vector(gArr);
1004              nodeValues.Add(n, Tuple.Create(y_i, g));
1005            }
1006          }
1007        }
1008
1009        for (int i = 0; i < trees.Length; i++) {
1010          var tree = trees[i];
1011          var res = InterpretRec(tree.Root.GetSubtree(0).GetSubtree(0), nodeValues);
1012          var g = res.Item2;
1013          for (int j = 0; j < calculatedVariables.Length; j++) {
1014            CVODES.SUNDenseMatrix_Set(Jac, i, j, g[j]);
1015          }
1016        }
1017        return 0; // on success
1018      };
1019    }
1020
1021
1022    // to calculate sensitivities RHS for all equations at once
1023    // must compute (∂f/∂y)s_i(t) + ∂f/∂p_i and store in ySdot.
1024    // Index i refers to parameters, dimensionality of matrix and vectors is number of equations
1025    private static CVODES.CVSensRhsFn CreateSensitivityRhs(ISymbolicExpressionTree[] trees, string[] calculatedVariables, double[] parameterValues) {
1026      return (
1027              int Ns, // number of parameters
1028              double t, // current time
1029              IntPtr y, // N_Vector y(t) (input)
1030              IntPtr ydot, // N_Vector dy/dt(t) (input)
1031              IntPtr yS, // N_Vector*, one vector for each parameter (input)
1032              IntPtr ySdot, // N_Vector*, one vector for each parameter (output)
1033              IntPtr user_data, // optional (unused here)
1034              IntPtr tmp1, // N_Vector, optional (unused here)
1035              IntPtr tmp2 // N_Vector, optional (unused here)
1036        ) => {
1037          // here we need to calculate partial derivatives for the calculated variables y as well as for the parameters
1038          var nodeValues = new Dictionary<ISymbolicExpressionTreeNode, Tuple<double, Vector>>();
1039          var d = calculatedVariables.Length + parameterValues.Length; // dimensionality of gradient
1040          // first collect variable values
1041          foreach (var tree in trees) {
1042            foreach (var n in tree.IterateNodesPrefix()) {
1043              if (IsVariableNode(n)) {
1044                // for variables and latent variables we use values supplied in y and init gradient vectors accordingly
1045                var varName = n.Symbol.Name;
1046                var varIdx = Array.IndexOf(calculatedVariables, varName); // TODO: perf!
1047                if (varIdx < 0) throw new InvalidProgramException();
1048
1049                var y_i = CVODES.NV_Get_Ith_S(y, (long)varIdx);
1050                var gArr = new double[d]; // backing array
1051                gArr[varIdx] = 1.0;
1052                var g = new Vector(gArr);
1053                nodeValues.Add(n, Tuple.Create(y_i, g));
1054              }
1055            }
1056          }
1057          // then collect constants
1058          int pIdx = 0;
1059          foreach (var tree in trees) {
1060            foreach (var n in tree.IterateNodesPrefix()) {
1061              if (IsConstantNode(n)) {
1062                var gArr = new double[d];
1063                gArr[calculatedVariables.Length + pIdx] = 1.0;
1064                var g = new Vector(gArr);
1065                nodeValues.Add(n, Tuple.Create(parameterValues[pIdx], g));
1066                pIdx++;
1067              }
1068            }
1069          }
1070          // gradient vector is [∂f/∂y_1, ∂f/∂y_2, ... ∂f/∂yN, ∂f/∂p_1 ... ∂f/∂p_K]
1071
1072
1073          for (pIdx = 0; pIdx < Ns; pIdx++) {
1074            unsafe {
1075              var sDot_pi = *((IntPtr*)ySdot.ToPointer() + pIdx);
1076              CVODES.N_VConst_Serial(0.0, sDot_pi);
1077            }
1078          }
1079
1080          for (int i = 0; i < trees.Length; i++) {
1081            var tree = trees[i];
1082            var res = InterpretRec(tree.Root.GetSubtree(0).GetSubtree(0), nodeValues);
1083            var g = res.Item2;
1084
1085
1086            // update ySdot = (∂f/∂y)s_i(t) + ∂f/∂p_i
1087
1088            for (pIdx = 0; pIdx < Ns; pIdx++) {
1089              unsafe {
1090                var sDot_pi = *((IntPtr*)ySdot.ToPointer() + pIdx);
1091                var s_pi = *((IntPtr*)yS.ToPointer() + pIdx);
1092
1093                var v = CVODES.NV_Get_Ith_S(sDot_pi, i);
1094                // (∂f/∂y)s_i(t)
1095                var p = 0.0;
1096                for (int yIdx = 0; yIdx < calculatedVariables.Length; yIdx++) {
1097                  p += g[yIdx] * CVODES.NV_Get_Ith_S(s_pi, yIdx);
1098                }
1099                // + ∂f/∂p_i
1100                CVODES.NV_Set_Ith_S(sDot_pi, i, v + p + g[calculatedVariables.Length + pIdx]);
1101              }
1102            }
1103
1104          }
1105          return 0; // on success
1106        };
1107    }
1108    */
1109    #endregion
1110
1111    private static void IntegrateHL(
1112      ISymbolicExpressionTree[] trees,
1113      string[] calculatedVariables, // names of integrated variables
1114      NodeValueLookup nodeValues,
1115      int numericIntegrationSteps) {
1116
1117
1118      double[] deltaF = new double[calculatedVariables.Length];
1119      Vector[] deltaG = new Vector[calculatedVariables.Length];
1120
1121      double h = 1.0 / numericIntegrationSteps;
1122      for (int step = 0; step < numericIntegrationSteps; step++) {
1123
1124        // evaluate all trees
1125        for (int i = 0; i < trees.Length; i++) {
1126          var tree = trees[i];
1127
1128          // Root.GetSubtree(0).GetSubtree(0) skips programRoot and startSymbol
1129          double f; Vector g;
1130          InterpretRec(tree.Root.GetSubtree(0).GetSubtree(0), nodeValues, out f, out g);
1131          deltaF[i] = f;
1132          deltaG[i] = g;
1133        }
1134
1135        // update variableValues for next step, trapezoid integration
1136        for (int i = 0; i < trees.Length; i++) {
1137          var varName = calculatedVariables[i];
1138          var oldVal = nodeValues.GetVariableValue(varName);
1139          nodeValues.SetVariableValue(varName, oldVal.Item1 + h * deltaF[i], oldVal.Item2.Add(deltaG[i].Scale(h)));
1140        }
1141      }
1142    }
1143
1144    // TODO: use an existing interpreter implementation instead
1145    private static double InterpretRec(ISymbolicExpressionTreeNode node, NodeValueLookup nodeValues) {
1146      if (node is ConstantTreeNode) {
1147        return ((ConstantTreeNode)node).Value;
1148      } else if (node is VariableTreeNode) {
1149        return nodeValues.NodeValue(node);
1150      } else if (node.Symbol is Addition) {
1151        Debug.Assert(node.SubtreeCount == 2);
1152        var f = InterpretRec(node.GetSubtree(0), nodeValues);
1153        var g = InterpretRec(node.GetSubtree(1), nodeValues);
1154        return f + g;
1155      } else if (node.Symbol is Multiplication) {
1156        Debug.Assert(node.SubtreeCount == 2);
1157        var f = InterpretRec(node.GetSubtree(0), nodeValues);
1158        var g = InterpretRec(node.GetSubtree(1), nodeValues);
1159        return f * g;
1160      } else if (node.Symbol is Subtraction) {
1161        Debug.Assert(node.SubtreeCount <= 2);
1162        if (node.SubtreeCount == 1) {
1163          var f = InterpretRec(node.GetSubtree(0), nodeValues);
1164          return -f;
1165        } else {
1166          var f = InterpretRec(node.GetSubtree(0), nodeValues);
1167          var g = InterpretRec(node.GetSubtree(1), nodeValues);
1168
1169          return f - g;
1170        }
1171      } else if (node.Symbol is Division) {
1172        Debug.Assert(node.SubtreeCount <= 2);
1173
1174        if (node.SubtreeCount == 1) {
1175          var f = InterpretRec(node.GetSubtree(0), nodeValues);
1176          // protected division
1177          if (f.IsAlmost(0.0)) {
1178            return 0;
1179          } else {
1180            return 1.0 / f;
1181          }
1182        } else {
1183          var f = InterpretRec(node.GetSubtree(0), nodeValues);
1184          var g = InterpretRec(node.GetSubtree(1), nodeValues);
1185
1186          // protected division
1187          if (g.IsAlmost(0.0)) {
1188            return 0;
1189          } else {
1190            return f / g;
1191          }
1192        }
1193      } else if (node.Symbol is Sine) {
1194        Debug.Assert(node.SubtreeCount == 1);
1195
1196        var f = InterpretRec(node.GetSubtree(0), nodeValues);
1197        return Math.Sin(f);
1198      } else if (node.Symbol is Cosine) {
1199        Debug.Assert(node.SubtreeCount == 1);
1200
1201        var f = InterpretRec(node.GetSubtree(0), nodeValues);
1202        return Math.Cos(f);
1203      } else if (node.Symbol is Square) {
1204        Debug.Assert(node.SubtreeCount == 1);
1205
1206        var f = InterpretRec(node.GetSubtree(0), nodeValues);
1207        return f * f;
1208      } else if (node.Symbol is Exponential) {
1209        Debug.Assert(node.SubtreeCount == 1);
1210
1211        var f = InterpretRec(node.GetSubtree(0), nodeValues);
1212        return Math.Exp(f);
1213      } else if (node.Symbol is Logarithm) {
1214        Debug.Assert(node.SubtreeCount == 1);
1215
1216        var f = InterpretRec(node.GetSubtree(0), nodeValues);
1217        return Math.Log(f);
1218      } else throw new NotSupportedException("unsupported symbol");
1219    }
1220
1221    private static void InterpretRec(
1222      ISymbolicExpressionTreeNode node,
1223       NodeValueLookup nodeValues,      // contains value and gradient vector for a node (variables and constants only)
1224      out double z,
1225      out Vector dz
1226      ) {
1227      double f, g;
1228      Vector df, dg;
1229      if (node.Symbol is Constant || node.Symbol is Variable) {
1230        z = nodeValues.NodeValue(node);
1231        dz = Vector.CreateNew(nodeValues.NodeGradient(node)); // original gradient vectors are never changed by evaluation
1232      } else if (node.Symbol is Addition) {
1233        InterpretRec(node.GetSubtree(0), nodeValues, out f, out df);
1234        InterpretRec(node.GetSubtree(1), nodeValues, out g, out dg);
1235        z = f + g;
1236        dz = df.Add(dg);
1237      } else if (node.Symbol is Multiplication) {
1238        InterpretRec(node.GetSubtree(0), nodeValues, out f, out df);
1239        InterpretRec(node.GetSubtree(1), nodeValues, out g, out dg);
1240        z = f * g;
1241        dz = df.Scale(g).Add(dg.Scale(f));  // f'*g + f*g'
1242
1243      } else if (node.Symbol is Subtraction) {
1244        if (node.SubtreeCount == 1) {
1245          InterpretRec(node.GetSubtree(0), nodeValues, out f, out df);
1246          z = -f;
1247          dz = df.Scale(-1.0);
1248        } else {
1249          InterpretRec(node.GetSubtree(0), nodeValues, out f, out df);
1250          InterpretRec(node.GetSubtree(1), nodeValues, out g, out dg);
1251          z = f - g;
1252          dz = df.Subtract(dg);
1253        }
1254
1255      } else if (node.Symbol is Division) {
1256        if (node.SubtreeCount == 1) {
1257          InterpretRec(node.GetSubtree(0), nodeValues, out f, out df);
1258          // protected division
1259          if (f.IsAlmost(0.0)) {
1260            z = 0;
1261            dz = Vector.Zero;
1262          } else {
1263            z = 1.0 / f;
1264            dz = df.Scale(z * z);
1265          }
1266        } else {
1267          InterpretRec(node.GetSubtree(0), nodeValues, out f, out df);
1268          InterpretRec(node.GetSubtree(1), nodeValues, out g, out dg);
1269
1270          // protected division
1271          if (g.IsAlmost(0.0)) {
1272            z = 0;
1273            dz = Vector.Zero;
1274          } else {
1275            var inv_g = 1.0 / g;
1276            z = f * inv_g;
1277
1278            dz = dg.Scale(-f * inv_g * inv_g).Add(df.Scale(inv_g));
1279          }
1280        }
1281
1282      } else if (node.Symbol is Sine) {
1283        InterpretRec(node.GetSubtree(0), nodeValues, out f, out df);
1284        z = Math.Sin(f);
1285        dz = df.Scale(Math.Cos(f));
1286
1287      } else if (node.Symbol is Cosine) {
1288        InterpretRec(node.GetSubtree(0), nodeValues, out f, out df);
1289        z = Math.Cos(f);
1290        dz = df.Scale(-Math.Sin(f));
1291      } else if (node.Symbol is Square) {
1292        InterpretRec(node.GetSubtree(0), nodeValues, out f, out df);
1293        z = f * f;
1294        dz = df.Scale(2.0 * f);
1295      } else if (node.Symbol is Exponential) {
1296        InterpretRec(node.GetSubtree(0), nodeValues, out f, out df);
1297        z = Math.Exp(f);
1298        dz = df.Scale(Math.Exp(f));
1299      } else if (node.Symbol is Logarithm) {
1300        InterpretRec(node.GetSubtree(0), nodeValues, out f, out df);
1301        z = Math.Log(f);
1302        dz = df.Scale(1.0 / f);
1303      } else {
1304        throw new NotSupportedException("unsupported symbol");
1305      }
1306    }
1307
1308    #endregion
1309
1310    #region events
1311    /*
1312     * Dependencies between parameters:
1313     *
1314     * ProblemData
1315     *    |
1316     *    V
1317     * TargetVariables   FunctionSet    MaximumLength    NumberOfLatentVariables
1318     *               |   |                 |                   |
1319     *               V   V                 |                   |
1320     *             Grammar <---------------+-------------------
1321     *                |
1322     *                V
1323     *            Encoding
1324     */
1325    private void RegisterEventHandlers() {
1326      ProblemDataParameter.ValueChanged += ProblemDataParameter_ValueChanged;
1327      if (ProblemDataParameter.Value != null) ProblemDataParameter.Value.Changed += ProblemData_Changed;
1328
1329      TargetVariablesParameter.ValueChanged += TargetVariablesParameter_ValueChanged;
1330      if (TargetVariablesParameter.Value != null) TargetVariablesParameter.Value.CheckedItemsChanged += CheckedTargetVariablesChanged;
1331
1332      FunctionSetParameter.ValueChanged += FunctionSetParameter_ValueChanged;
1333      if (FunctionSetParameter.Value != null) FunctionSetParameter.Value.CheckedItemsChanged += CheckedFunctionsChanged;
1334
1335      MaximumLengthParameter.Value.ValueChanged += MaximumLengthChanged;
1336
1337      NumberOfLatentVariablesParameter.Value.ValueChanged += NumLatentVariablesChanged;
1338    }
1339
1340    private void NumLatentVariablesChanged(object sender, EventArgs e) {
1341      UpdateGrammarAndEncoding();
1342    }
1343
1344    private void MaximumLengthChanged(object sender, EventArgs e) {
1345      UpdateGrammarAndEncoding();
1346    }
1347
1348    private void FunctionSetParameter_ValueChanged(object sender, EventArgs e) {
1349      FunctionSetParameter.Value.CheckedItemsChanged += CheckedFunctionsChanged;
1350    }
1351
1352    private void CheckedFunctionsChanged(object sender, CollectionItemsChangedEventArgs<IndexedItem<StringValue>> e) {
1353      UpdateGrammarAndEncoding();
1354    }
1355
1356    private void TargetVariablesParameter_ValueChanged(object sender, EventArgs e) {
1357      TargetVariablesParameter.Value.CheckedItemsChanged += CheckedTargetVariablesChanged;
1358    }
1359
1360    private void CheckedTargetVariablesChanged(object sender, CollectionItemsChangedEventArgs<IndexedItem<StringValue>> e) {
1361      UpdateGrammarAndEncoding();
1362    }
1363
1364    private void ProblemDataParameter_ValueChanged(object sender, EventArgs e) {
1365      ProblemDataParameter.Value.Changed += ProblemData_Changed;
1366      OnProblemDataChanged();
1367      OnReset();
1368    }
1369
1370    private void ProblemData_Changed(object sender, EventArgs e) {
1371      OnProblemDataChanged();
1372      OnReset();
1373    }
1374
1375    private void OnProblemDataChanged() {
1376      UpdateTargetVariables();        // implicitly updates other dependent parameters
1377      var handler = ProblemDataChanged;
1378      if (handler != null) handler(this, EventArgs.Empty);
1379    }
1380
1381    #endregion
1382
1383    #region  helper
1384
1385    private void InitAllParameters() {
1386      UpdateTargetVariables(); // implicitly updates the grammar and the encoding
1387    }
1388
1389    private ReadOnlyCheckedItemList<StringValue> CreateFunctionSet() {
1390      var l = new CheckedItemList<StringValue>();
1391      l.Add(new StringValue("Addition").AsReadOnly());
1392      l.Add(new StringValue("Multiplication").AsReadOnly());
1393      l.Add(new StringValue("Division").AsReadOnly());
1394      l.Add(new StringValue("Subtraction").AsReadOnly());
1395      l.Add(new StringValue("Sine").AsReadOnly());
1396      l.Add(new StringValue("Cosine").AsReadOnly());
1397      l.Add(new StringValue("Square").AsReadOnly());
1398      return l.AsReadOnly();
1399    }
1400
1401    private static bool IsConstantNode(ISymbolicExpressionTreeNode n) {
1402      // return n.Symbol.Name[0] == 'θ';
1403      return n is ConstantTreeNode;
1404    }
1405    private static double GetConstantValue(ISymbolicExpressionTreeNode n) {
1406      return ((ConstantTreeNode)n).Value;
1407    }
1408    private static bool IsLatentVariableNode(ISymbolicExpressionTreeNode n) {
1409      return n.Symbol.Name[0] == 'λ';
1410    }
1411    private static bool IsVariableNode(ISymbolicExpressionTreeNode n) {
1412      return (n.SubtreeCount == 0) && !IsConstantNode(n) && !IsLatentVariableNode(n);
1413    }
1414    private static string GetVariableName(ISymbolicExpressionTreeNode n) {
1415      return ((VariableTreeNode)n).VariableName;
1416    }
1417
1418    private void UpdateTargetVariables() {
1419      var currentlySelectedVariables = TargetVariables.CheckedItems
1420        .OrderBy(i => i.Index)
1421        .Select(i => i.Value.Value)
1422        .ToArray();
1423
1424      var newVariablesList = new CheckedItemList<StringValue>(ProblemData.Dataset.VariableNames.Select(str => new StringValue(str).AsReadOnly()).ToArray()).AsReadOnly();
1425      var matchingItems = newVariablesList.Where(item => currentlySelectedVariables.Contains(item.Value)).ToArray();
1426      foreach (var item in newVariablesList) {
1427        if (currentlySelectedVariables.Contains(item.Value)) {
1428          newVariablesList.SetItemCheckedState(item, true);
1429        } else {
1430          newVariablesList.SetItemCheckedState(item, false);
1431        }
1432      }
1433      TargetVariablesParameter.Value = newVariablesList;
1434    }
1435
1436    private void UpdateGrammarAndEncoding() {
1437      var encoding = new MultiEncoding();
1438      var g = CreateGrammar();
1439      foreach (var targetVar in TargetVariables.CheckedItems) {
1440        var e = new SymbolicExpressionTreeEncoding(targetVar + "_tree", g, MaximumLength, MaximumLength);
1441        var multiManipulator = e.Operators.Where(op => op is MultiSymbolicExpressionTreeManipulator).First();
1442        var filteredOperators = e.Operators.Where(op => !(op is IManipulator)).ToArray();
1443        // make sure our multi-manipulator is the only manipulator
1444        e.Operators = new IOperator[] { multiManipulator }.Concat(filteredOperators);
1445        encoding = encoding.Add(e); // only limit by length
1446      }
1447      for (int i = 1; i <= NumberOfLatentVariables; i++) {
1448        var e = new SymbolicExpressionTreeEncoding("λ" + i + "_tree", g, MaximumLength, MaximumLength);
1449        var multiManipulator = e.Operators.Where(op => op is MultiSymbolicExpressionTreeManipulator).First();
1450        var filteredOperators = e.Operators.Where(op => !(op is IManipulator)).ToArray();
1451        // make sure our multi-manipulator is the only manipulator
1452        e.Operators = new IOperator[] { multiManipulator }.Concat(filteredOperators);
1453        encoding = encoding.Add(e);
1454      }
1455      Encoding = encoding;
1456    }
1457
1458    private ISymbolicExpressionGrammar CreateGrammar() {
1459      var grammar = new TypeCoherentExpressionGrammar();
1460      grammar.StartGrammarManipulation();
1461
1462      var problemData = ProblemData;
1463      var ds = problemData.Dataset;
1464      grammar.MaximumFunctionArguments = 0;
1465      grammar.MaximumFunctionDefinitions = 0;
1466      var allowedVariables = problemData.AllowedInputVariables.Concat(TargetVariables.CheckedItems.Select(chk => chk.Value.Value));
1467      foreach (var varSymbol in grammar.Symbols.OfType<HeuristicLab.Problems.DataAnalysis.Symbolic.VariableBase>()) {
1468        if (!varSymbol.Fixed) {
1469          varSymbol.AllVariableNames = problemData.InputVariables.Select(x => x.Value).Where(x => ds.VariableHasType<double>(x));
1470          varSymbol.VariableNames = allowedVariables.Where(x => ds.VariableHasType<double>(x));
1471        }
1472      }
1473      foreach (var factorSymbol in grammar.Symbols.OfType<BinaryFactorVariable>()) {
1474        if (!factorSymbol.Fixed) {
1475          factorSymbol.AllVariableNames = problemData.InputVariables.Select(x => x.Value).Where(x => ds.VariableHasType<string>(x));
1476          factorSymbol.VariableNames = problemData.AllowedInputVariables.Where(x => ds.VariableHasType<string>(x));
1477          factorSymbol.VariableValues = factorSymbol.VariableNames
1478            .ToDictionary(varName => varName, varName => ds.GetStringValues(varName).Distinct().ToList());
1479        }
1480      }
1481      foreach (var factorSymbol in grammar.Symbols.OfType<FactorVariable>()) {
1482        if (!factorSymbol.Fixed) {
1483          factorSymbol.AllVariableNames = problemData.InputVariables.Select(x => x.Value).Where(x => ds.VariableHasType<string>(x));
1484          factorSymbol.VariableNames = problemData.AllowedInputVariables.Where(x => ds.VariableHasType<string>(x));
1485          factorSymbol.VariableValues = factorSymbol.VariableNames
1486            .ToDictionary(varName => varName,
1487            varName => ds.GetStringValues(varName).Distinct()
1488            .Select((n, i) => Tuple.Create(n, i))
1489            .ToDictionary(tup => tup.Item1, tup => tup.Item2));
1490        }
1491      }
1492
1493      grammar.ConfigureAsDefaultRegressionGrammar();
1494      grammar.GetSymbol("Logarithm").Enabled = false; // not supported yet
1495      grammar.GetSymbol("Exponential").Enabled = false; // not supported yet
1496
1497      // configure initialization of constants
1498      var constSy = (Constant)grammar.GetSymbol("Constant");
1499      // max and min are only relevant for initialization
1500      constSy.MaxValue = +1.0e-1; // small initial values for constant opt
1501      constSy.MinValue = -1.0e-1;
1502      constSy.MultiplicativeManipulatorSigma = 1.0; // allow large jumps for manipulation
1503      constSy.ManipulatorMu = 0.0;
1504      constSy.ManipulatorSigma = 1.0; // allow large jumps
1505
1506      // configure initialization of variables
1507      var varSy = (Variable)grammar.GetSymbol("Variable");
1508      // fix variable weights to 1.0
1509      varSy.WeightMu = 1.0;
1510      varSy.WeightSigma = 0.0;
1511      varSy.WeightManipulatorMu = 0.0;
1512      varSy.WeightManipulatorSigma = 0.0;
1513      varSy.MultiplicativeWeightManipulatorSigma = 0.0;
1514
1515      foreach (var f in FunctionSet) {
1516        grammar.GetSymbol(f.Value).Enabled = FunctionSet.ItemChecked(f);
1517      }
1518
1519      grammar.FinishedGrammarManipulation();
1520      return grammar;
1521      // // whenever ProblemData is changed we create a new grammar with the necessary symbols
1522      // var g = new SimpleSymbolicExpressionGrammar();
1523      // var unaryFunc = new string[] { "sin", "cos", "sqr" };
1524      // var binaryFunc = new string[] { "+", "-", "*", "%" };
1525      // foreach (var func in unaryFunc) {
1526      //   if (FunctionSet.CheckedItems.Any(ci => ci.Value.Value == func)) g.AddSymbol(func, 1, 1);
1527      // }
1528      // foreach (var func in binaryFunc) {
1529      //   if (FunctionSet.CheckedItems.Any(ci => ci.Value.Value == func)) g.AddSymbol(func, 2, 2);
1530      // }
1531      //
1532      // foreach (var variableName in ProblemData.AllowedInputVariables.Union(TargetVariables.CheckedItems.Select(i => i.Value.Value)))
1533      //   g.AddTerminalSymbol(variableName);
1534      //
1535      // // generate symbols for numeric parameters for which the value is optimized using AutoDiff
1536      // // we generate multiple symbols to balance the probability for selecting a numeric parameter in the generation of random trees
1537      // var numericConstantsFactor = 2.0;
1538      // for (int i = 0; i < numericConstantsFactor * (ProblemData.AllowedInputVariables.Count() + TargetVariables.CheckedItems.Count()); i++) {
1539      //   g.AddTerminalSymbol("θ" + i); // numeric parameter for which the value is optimized using AutoDiff
1540      // }
1541      //
1542      // // generate symbols for latent variables
1543      // for (int i = 1; i <= NumberOfLatentVariables; i++) {
1544      //   g.AddTerminalSymbol("λ" + i); // numeric parameter for which the value is optimized using AutoDiff
1545      // }
1546      //
1547      // return g;
1548    }
1549    #endregion
1550
1551
1552    #region Import & Export
1553    public void Load(IRegressionProblemData data) {
1554      Name = data.Name;
1555      Description = data.Description;
1556      ProblemData = data;
1557    }
1558
1559    public IRegressionProblemData Export() {
1560      return ProblemData;
1561    }
1562    #endregion
1563
1564
1565    // TODO: for integration we only need a part of the data that we need for optimization
1566
1567    public class OptimizationData {
1568      public readonly ISymbolicExpressionTree[] trees;
1569      public readonly string[] targetVariables;
1570      public readonly IRegressionProblemData problemData;
1571      public readonly double[][] targetValues;
1572      public readonly double[] inverseStandardDeviation;
1573      public readonly IntRange[] episodes;
1574      public readonly int numericIntegrationSteps;
1575      public readonly string[] latentVariables;
1576      public readonly string odeSolver;
1577      public readonly NodeValueLookup nodeValueLookup;
1578      public readonly int[] rows;
1579      internal readonly string[] variables;
1580
1581      public OptimizationData(ISymbolicExpressionTree[] trees, string[] targetVars, string[] inputVariables,
1582        IRegressionProblemData problemData,
1583        double[][] targetValues,
1584        IntRange[] episodes,
1585        int numericIntegrationSteps, string[] latentVariables, string odeSolver) {
1586        this.trees = trees;
1587        this.targetVariables = targetVars;
1588        this.problemData = problemData;
1589        this.targetValues = targetValues;
1590        this.variables = inputVariables;
1591        // if (targetValues != null) {
1592        //   this.inverseStandardDeviation = new double[targetValues.Length];
1593        //   for (int i = 0; i < targetValues.Length; i++) {
1594        //     // calculate variance for each episode separately and calc the average
1595        //     var epStartIdx = 0;
1596        //     var stdevs = new List<double>();
1597        //     foreach (var ep in episodes) {
1598        //       var epValues = targetValues[i].Skip(epStartIdx).Take(ep.Size);
1599        //       stdevs.Add(epValues.StandardDeviation());
1600        //       epStartIdx += ep.Size;
1601        //     }
1602        //     inverseStandardDeviation[i] = 1.0 / stdevs.Average();
1603        //   }
1604        // } else
1605        this.inverseStandardDeviation = Enumerable.Repeat(1.0, trees.Length).ToArray();
1606        this.episodes = episodes;
1607        this.numericIntegrationSteps = numericIntegrationSteps;
1608        this.latentVariables = latentVariables;
1609        this.odeSolver = odeSolver;
1610        this.nodeValueLookup = new NodeValueLookup(trees);
1611        this.rows = episodes.SelectMany(ep => Enumerable.Range(ep.Start, ep.Size)).ToArray();
1612      }
1613    }
1614
1615    public class NodeValueLookup {
1616      private readonly Dictionary<ISymbolicExpressionTreeNode, Tuple<double, Vector>> node2val = new Dictionary<ISymbolicExpressionTreeNode, Tuple<double, Vector>>();
1617      private readonly Dictionary<string, List<ISymbolicExpressionTreeNode>> name2nodes = new Dictionary<string, List<ISymbolicExpressionTreeNode>>();
1618      private readonly ConstantTreeNode[] constantNodes;
1619      private readonly Vector[] constantGradientVectors;
1620
1621      // private readonly Dictionary<int, ISymbolicExpressionTreeNode> paramIdx2node = new Dictionary<int, ISymbolicExpressionTreeNode>();
1622
1623      public double NodeValue(ISymbolicExpressionTreeNode node) => node2val[node].Item1;
1624      public Vector NodeGradient(ISymbolicExpressionTreeNode node) => node2val[node].Item2;
1625
1626      public NodeValueLookup(ISymbolicExpressionTree[] trees) {
1627
1628        this.constantNodes = trees.SelectMany(t => t.IterateNodesPrefix().OfType<ConstantTreeNode>()).ToArray();
1629        constantGradientVectors = new Vector[constantNodes.Length];
1630        for (int paramIdx = 0; paramIdx < constantNodes.Length; paramIdx++) {
1631          constantGradientVectors[paramIdx] = Vector.CreateIndicator(length: constantNodes.Length, idx: paramIdx);
1632
1633          var node = constantNodes[paramIdx];
1634          node2val[node] = Tuple.Create(node.Value, constantGradientVectors[paramIdx]);
1635        }
1636
1637        foreach (var tree in trees) {
1638          foreach (var node in tree.IterateNodesPrefix().Where(IsVariableNode)) {
1639            var varName = GetVariableName(node);
1640            if (!name2nodes.TryGetValue(varName, out List<ISymbolicExpressionTreeNode> nodes)) {
1641              nodes = new List<ISymbolicExpressionTreeNode>();
1642              name2nodes.Add(varName, nodes);
1643            }
1644            nodes.Add(node);
1645            SetVariableValue(varName, 0.0);  // this value is updated in the prediction loop
1646          }
1647        }
1648      }
1649
1650      public int ParameterCount => constantNodes.Length;
1651
1652      public void SetVariableValue(string variableName, double val) {
1653        SetVariableValue(variableName, val, Vector.Zero);
1654      }
1655      public Tuple<double, Vector> GetVariableValue(string variableName) {
1656        return node2val[name2nodes[variableName].First()];
1657      }
1658      public void SetVariableValue(string variableName, double val, Vector dVal) {
1659        if (name2nodes.TryGetValue(variableName, out List<ISymbolicExpressionTreeNode> nodes)) {
1660          nodes.ForEach(n => node2val[n] = Tuple.Create(val, dVal));
1661        } else {
1662          var fakeNode = new VariableTreeNode(new Variable());
1663          fakeNode.Weight = 1.0;
1664          fakeNode.VariableName = variableName;
1665          var newNodeList = new List<ISymbolicExpressionTreeNode>();
1666          newNodeList.Add(fakeNode);
1667          name2nodes.Add(variableName, newNodeList);
1668          node2val[fakeNode] = Tuple.Create(val, dVal);
1669        }
1670      }
1671
1672      internal void UpdateParamValues(double[] x) {
1673        for (int i = 0; i < x.Length; i++) {
1674          constantNodes[i].Value = x[i];
1675          node2val[constantNodes[i]] = Tuple.Create(x[i], constantGradientVectors[i]);
1676        }
1677      }
1678    }
1679  }
1680}
Note: See TracBrowser for help on using the repository browser.