Free cookie consent management tool by TermsFeed Policy Generator

source: branches/2925_AutoDiffForDynamicalModels/HeuristicLab.Problems.DynamicalSystemsModelling/3.3/Problem.cs @ 16268

Last change on this file since 16268 was 16268, checked in by gkronber, 5 years ago

#2925 created a separate algorithm (similar to NLR but for parameter identification of ODE systems) for debugging of the parameter optimization and fixed a problem with the order of items in the checkedItemCollection for TargetVariables

File size: 58.2 KB
Line 
1#region License Information
2/* HeuristicLab
3 * Copyright (C) 2002-2018 Heuristic and Evolutionary Algorithms Laboratory (HEAL)
4 *
5 * This file is part of HeuristicLab.
6 *
7 * HeuristicLab is free software: you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation, either version 3 of the License, or
10 * (at your option) any later version.
11 *
12 * HeuristicLab is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15 * GNU General Public License for more details.
16 *
17 * You should have received a copy of the GNU General Public License
18 * along with HeuristicLab. If not, see <http://www.gnu.org/licenses/>.
19 */
20#endregion
21
22using System;
23using System.Collections.Generic;
24using System.Diagnostics;
25using System.Linq;
26using HeuristicLab.Analysis;
27using HeuristicLab.Collections;
28using HeuristicLab.Common;
29using HeuristicLab.Core;
30using HeuristicLab.Data;
31using HeuristicLab.Encodings.SymbolicExpressionTreeEncoding;
32using HeuristicLab.Optimization;
33using HeuristicLab.Parameters;
34using HeuristicLab.Persistence.Default.CompositeSerializers.Storable;
35using HeuristicLab.Problems.DataAnalysis;
36using HeuristicLab.Problems.DataAnalysis.Symbolic;
37using HeuristicLab.Problems.Instances;
38using Variable = HeuristicLab.Problems.DataAnalysis.Symbolic.Variable;
39
40namespace HeuristicLab.Problems.DynamicalSystemsModelling {
41
42
43
44
45  // Eine weitere Möglichkeit ist spline-smoothing der Daten (über Zeit) um damit für jede Zielvariable
46  // einen bereinigten (Rauschen) Wert und die Ableitung dy/dt für alle Beobachtungen zu bekommen
47  // danach kann man die Regression direkt für dy/dt anwenden (mit bereinigten y als inputs)
48  [Item("Dynamical Systems Modelling Problem", "TODO")]
49  [Creatable(CreatableAttribute.Categories.GeneticProgrammingProblems, Priority = 900)]
50  [StorableClass]
51  public sealed class Problem : SingleObjectiveBasicProblem<MultiEncoding>, IRegressionProblem, IProblemInstanceConsumer<IRegressionProblemData>, IProblemInstanceExporter<IRegressionProblemData> {
52    #region parameter names
53    private const string ProblemDataParameterName = "Data";
54    private const string TargetVariablesParameterName = "Target variables";
55    private const string FunctionSetParameterName = "Function set";
56    private const string MaximumLengthParameterName = "Size limit";
57    private const string MaximumParameterOptimizationIterationsParameterName = "Max. parameter optimization iterations";
58    private const string NumberOfLatentVariablesParameterName = "Number of latent variables";
59    private const string NumericIntegrationStepsParameterName = "Steps for numeric integration";
60    private const string TrainingEpisodesParameterName = "Training episodes";
61    private const string OptimizeParametersForEpisodesParameterName = "Optimize parameters for episodes";
62    private const string OdeSolverParameterName = "ODE Solver";
63    #endregion
64
65    #region Parameter Properties
66    IParameter IDataAnalysisProblem.ProblemDataParameter { get { return ProblemDataParameter; } }
67
68    public IValueParameter<IRegressionProblemData> ProblemDataParameter {
69      get { return (IValueParameter<IRegressionProblemData>)Parameters[ProblemDataParameterName]; }
70    }
71    public IValueParameter<ReadOnlyCheckedItemList<StringValue>> TargetVariablesParameter {
72      get { return (IValueParameter<ReadOnlyCheckedItemList<StringValue>>)Parameters[TargetVariablesParameterName]; }
73    }
74    public IValueParameter<ReadOnlyCheckedItemList<StringValue>> FunctionSetParameter {
75      get { return (IValueParameter<ReadOnlyCheckedItemList<StringValue>>)Parameters[FunctionSetParameterName]; }
76    }
77    public IFixedValueParameter<IntValue> MaximumLengthParameter {
78      get { return (IFixedValueParameter<IntValue>)Parameters[MaximumLengthParameterName]; }
79    }
80    public IFixedValueParameter<IntValue> MaximumParameterOptimizationIterationsParameter {
81      get { return (IFixedValueParameter<IntValue>)Parameters[MaximumParameterOptimizationIterationsParameterName]; }
82    }
83    public IFixedValueParameter<IntValue> NumberOfLatentVariablesParameter {
84      get { return (IFixedValueParameter<IntValue>)Parameters[NumberOfLatentVariablesParameterName]; }
85    }
86    public IFixedValueParameter<IntValue> NumericIntegrationStepsParameter {
87      get { return (IFixedValueParameter<IntValue>)Parameters[NumericIntegrationStepsParameterName]; }
88    }
89    public IValueParameter<ItemList<IntRange>> TrainingEpisodesParameter {
90      get { return (IValueParameter<ItemList<IntRange>>)Parameters[TrainingEpisodesParameterName]; }
91    }
92    public IFixedValueParameter<BoolValue> OptimizeParametersForEpisodesParameter {
93      get { return (IFixedValueParameter<BoolValue>)Parameters[OptimizeParametersForEpisodesParameterName]; }
94    }
95    public IConstrainedValueParameter<StringValue> OdeSolverParameter {
96      get { return (IConstrainedValueParameter<StringValue>)Parameters[OdeSolverParameterName]; }
97    }
98    #endregion
99
100    #region Properties
101    public IRegressionProblemData ProblemData {
102      get { return ProblemDataParameter.Value; }
103      set { ProblemDataParameter.Value = value; }
104    }
105    IDataAnalysisProblemData IDataAnalysisProblem.ProblemData { get { return ProblemData; } }
106
107    public ReadOnlyCheckedItemList<StringValue> TargetVariables {
108      get { return TargetVariablesParameter.Value; }
109    }
110
111    public ReadOnlyCheckedItemList<StringValue> FunctionSet {
112      get { return FunctionSetParameter.Value; }
113    }
114
115    public int MaximumLength {
116      get { return MaximumLengthParameter.Value.Value; }
117    }
118    public int MaximumParameterOptimizationIterations {
119      get { return MaximumParameterOptimizationIterationsParameter.Value.Value; }
120    }
121    public int NumberOfLatentVariables {
122      get { return NumberOfLatentVariablesParameter.Value.Value; }
123    }
124    public int NumericIntegrationSteps {
125      get { return NumericIntegrationStepsParameter.Value.Value; }
126    }
127    public IEnumerable<IntRange> TrainingEpisodes {
128      get { return TrainingEpisodesParameter.Value; }
129    }
130    public bool OptimizeParametersForEpisodes {
131      get { return OptimizeParametersForEpisodesParameter.Value.Value; }
132    }
133
134    public string OdeSolver {
135      get { return OdeSolverParameter.Value.Value; }
136      set {
137        var matchingValue = OdeSolverParameter.ValidValues.FirstOrDefault(v => v.Value == value);
138        if (matchingValue == null) throw new ArgumentOutOfRangeException();
139        else OdeSolverParameter.Value = matchingValue;
140      }
141    }
142
143    #endregion
144
145    public event EventHandler ProblemDataChanged;
146
147    public override bool Maximization {
148      get { return false; } // we minimize NMSE
149    }
150
151    #region item cloning and persistence
152    // persistence
153    [StorableConstructor]
154    private Problem(bool deserializing) : base(deserializing) { }
155    [StorableHook(HookType.AfterDeserialization)]
156    private void AfterDeserialization() {
157      if (!Parameters.ContainsKey(OptimizeParametersForEpisodesParameterName)) {
158        Parameters.Add(new FixedValueParameter<BoolValue>(OptimizeParametersForEpisodesParameterName, "Flag to select if parameters should be optimized globally or for each episode individually.", new BoolValue(false)));
159      }
160      RegisterEventHandlers();
161    }
162
163    // cloning
164    private Problem(Problem original, Cloner cloner)
165      : base(original, cloner) {
166      RegisterEventHandlers();
167    }
168    public override IDeepCloneable Clone(Cloner cloner) { return new Problem(this, cloner); }
169    #endregion
170
171    public Problem()
172      : base() {
173      var targetVariables = new CheckedItemList<StringValue>().AsReadOnly(); // HACK: it would be better to provide a new class derived from IDataAnalysisProblem
174      var functions = CreateFunctionSet();
175      Parameters.Add(new ValueParameter<IRegressionProblemData>(ProblemDataParameterName, "The data captured from the dynamical system. Use CSV import functionality to import data.", new RegressionProblemData()));
176      Parameters.Add(new ValueParameter<ReadOnlyCheckedItemList<StringValue>>(TargetVariablesParameterName, "Target variables (overrides setting in ProblemData)", targetVariables));
177      Parameters.Add(new ValueParameter<ReadOnlyCheckedItemList<StringValue>>(FunctionSetParameterName, "The list of allowed functions", functions));
178      Parameters.Add(new FixedValueParameter<IntValue>(MaximumLengthParameterName, "The maximally allowed length of each expression. Set to a small value (5 - 25). Default = 10", new IntValue(10)));
179      Parameters.Add(new FixedValueParameter<IntValue>(MaximumParameterOptimizationIterationsParameterName, "The maximum number of iterations for optimization of parameters (using L-BFGS). More iterations makes the algorithm slower, fewer iterations might prevent convergence in the optimization scheme. Default = 100", new IntValue(100)));
180      Parameters.Add(new FixedValueParameter<IntValue>(NumberOfLatentVariablesParameterName, "Latent variables (unobserved variables) allow us to produce expressions which are integrated up and can be used in other expressions. They are handled similarly to target variables in forward simulation / integration. The difference to target variables is that there are no data to which the calculated values of latent variables are compared. Set to a small value (0 .. 5) as necessary (default = 0)", new IntValue(0)));
181      Parameters.Add(new FixedValueParameter<IntValue>(NumericIntegrationStepsParameterName, "Number of steps in the numeric integration that are taken from one row to the next (set to 1 to 100). More steps makes the algorithm slower, less steps worsens the accuracy of the numeric integration scheme.", new IntValue(10)));
182      Parameters.Add(new ValueParameter<ItemList<IntRange>>(TrainingEpisodesParameterName, "A list of ranges that should be used for training, each range represents an independent episode. This overrides the TrainingSet parameter in ProblemData.", new ItemList<IntRange>()));
183      Parameters.Add(new FixedValueParameter<BoolValue>(OptimizeParametersForEpisodesParameterName, "Flag to select if parameters should be optimized globally or for each episode individually.", new BoolValue(false)));
184
185      var solversStr = new string[] { "HeuristicLab", "CVODES" };
186      var solvers = new ItemSet<StringValue>(
187        solversStr.Select(s => new StringValue(s).AsReadOnly())
188        );
189      Parameters.Add(new ConstrainedValueParameter<StringValue>(OdeSolverParameterName, "The solver to use for solving the initial value ODE problems", solvers, solvers.First()));
190
191      RegisterEventHandlers();
192      InitAllParameters();
193
194      // TODO: do not clear selection of target variables when the input variables are changed (keep selected target variables)
195      // TODO: UI hangs when selecting / deselecting input variables because the encoding is updated on each item
196      // TODO: use training range as default training episode
197      // TODO: write back optimized parameters to solution?
198
199    }
200
201    public override double Evaluate(Individual individual, IRandom random) {
202      var trees = individual.Values.Select(v => v.Value).OfType<ISymbolicExpressionTree>().ToArray(); // extract all trees from individual
203      // write back optimized parameters to tree nodes instead of the separate OptTheta variable
204      // retreive optimized parameters from nodes?
205
206      if (OptimizeParametersForEpisodes) {
207        int eIdx = 0;
208        double totalNMSE = 0.0;
209        int totalSize = 0;
210        foreach (var episode in TrainingEpisodes) {
211          double[] optTheta;
212          double nmse;
213          OptimizeForEpisodes(trees, random, new[] { episode }, out optTheta, out nmse);
214          individual["OptTheta_" + eIdx] = new DoubleArray(optTheta); // write back optimized parameters so that we can use them in the Analysis method
215          eIdx++;
216          totalNMSE += nmse * episode.Size;
217          totalSize += episode.Size;
218        }
219        return totalNMSE / totalSize;
220      } else {
221        double[] optTheta;
222        double nmse;
223        OptimizeForEpisodes(trees, random, TrainingEpisodes, out optTheta, out nmse);
224        individual["OptTheta"] = new DoubleArray(optTheta); // write back optimized parameters so that we can use them in the Analysis method
225        return nmse;
226      }
227    }
228
229    private void OptimizeForEpisodes(
230      ISymbolicExpressionTree[] trees,
231      IRandom random,
232      IEnumerable<IntRange> episodes,
233      out double[] optTheta,
234      out double nmse) {
235      var rows = episodes.SelectMany(e => Enumerable.Range(e.Start, e.End - e.Start)).ToArray();
236      var problemData = ProblemData;
237      var targetVars = TargetVariables.CheckedItems.OrderBy(i => i.Index).Select(i => i.Value.Value).ToArray();
238      var latentVariables = Enumerable.Range(1, NumberOfLatentVariables).Select(i => "λ" + i).ToArray(); // TODO: must coincide with the variables which are actually defined in the grammar and also for which we actually have trees
239      var targetValues = new double[rows.Length, targetVars.Length];
240
241      // collect values of all target variables
242      var colIdx = 0;
243      foreach (var targetVar in targetVars) {
244        int rowIdx = 0;
245        foreach (var value in problemData.Dataset.GetDoubleValues(targetVar, rows)) {
246          targetValues[rowIdx, colIdx] = value;
247          rowIdx++;
248        }
249        colIdx++;
250      }
251
252      // NOTE: the order of values in parameter matches prefix order of constant nodes in trees
253      var paramNodes = new List<ISymbolicExpressionTreeNode>();
254      foreach (var t in trees) {
255        foreach (var n in t.IterateNodesPrefix()) {
256          if (IsConstantNode(n))
257            paramNodes.Add(n);
258        }
259      }
260      // init params randomly from Unif(-1e-5, 1e-5)
261      var theta = paramNodes.Select(_ => random.NextDouble() * 2.0e-5 - 1.0e-5).ToArray();
262
263      optTheta = new double[0];
264      if (theta.Length > 0) {
265        alglib.minlbfgsstate state;
266        alglib.minlbfgsreport report;
267        alglib.minlbfgscreate(Math.Min(theta.Length, 5), theta, out state);
268        alglib.minlbfgssetcond(state, 0.0, 0.0, 0.0, MaximumParameterOptimizationIterations);
269        alglib.minlbfgssetgradientcheck(state, 1e-6);
270        alglib.minlbfgsoptimize(state, EvaluateObjectiveAndGradient, null,
271          new object[] { trees, targetVars, problemData, targetValues, episodes.ToArray(), NumericIntegrationSteps, latentVariables, OdeSolver }); //TODO: create a type
272
273        alglib.minlbfgsresults(state, out optTheta, out report);
274
275        /*
276         *
277         *         L-BFGS algorithm results
278
279          INPUT PARAMETERS:
280              State   -   algorithm state
281
282          OUTPUT PARAMETERS:
283              X       -   array[0..N-1], solution
284              Rep     -   optimization report:
285                          * Rep.TerminationType completetion code:
286                              * -7    gradient verification failed.
287                                      See MinLBFGSSetGradientCheck() for more information.
288                              * -2    rounding errors prevent further improvement.
289                                      X contains best point found.
290                              * -1    incorrect parameters were specified
291                              *  1    relative function improvement is no more than
292                                      EpsF.
293                              *  2    relative step is no more than EpsX.
294                              *  4    gradient norm is no more than EpsG
295                              *  5    MaxIts steps was taken
296                              *  7    stopping conditions are too stringent,
297                                      further improvement is impossible
298                          * Rep.IterationsCount contains iterations count
299                          * NFEV countains number of function calculations
300         */
301        if (report.terminationtype < 0) { nmse = 10E6; return; }
302      }
303
304      // perform evaluation for optimal theta to get quality value
305      double[] grad = new double[optTheta.Length];
306      nmse = double.NaN;
307      EvaluateObjectiveAndGradient(optTheta, ref nmse, grad,
308        new object[] { trees, targetVars, problemData, targetValues, episodes.ToArray(), NumericIntegrationSteps, latentVariables, OdeSolver });
309      if (double.IsNaN(nmse) || double.IsInfinity(nmse)) { nmse = 10E6; return; } // return a large value (TODO: be consistent by using NMSE)
310    }
311
312    private static void EvaluateObjectiveAndGradient(double[] x, ref double f, double[] grad, object obj) {
313      var trees = (ISymbolicExpressionTree[])((object[])obj)[0];
314      var targetVariables = (string[])((object[])obj)[1];
315      var problemData = (IRegressionProblemData)((object[])obj)[2];
316      var targetValues = (double[,])((object[])obj)[3];
317      var episodes = (IntRange[])((object[])obj)[4];
318      var numericIntegrationSteps = (int)((object[])obj)[5];
319      var latentVariables = (string[])((object[])obj)[6];
320      var odeSolver = (string)((object[])obj)[7];
321
322
323      Tuple<double, Vector>[][] predicted = null; // one array of predictions for each episode
324      // TODO: stop integration early for diverging solutions
325      predicted = Integrate(
326          trees,  // we assume trees contain expressions for the change of each target variable over time y'(t)
327          problemData.Dataset,
328          problemData.AllowedInputVariables.ToArray(),
329          targetVariables,
330          latentVariables,
331          episodes,
332          x,
333          odeSolver,
334          numericIntegrationSteps).ToArray();
335
336      if (predicted.Length != targetValues.GetLength(0)) {
337        f = double.MaxValue;
338        Array.Clear(grad, 0, grad.Length);
339        return;
340      }
341
342      // for normalized MSE = 1/variance(t) * MSE(t, pred)
343      // TODO: Perf. (by standardization of target variables before evaluation of all trees)     
344      var invVar = Enumerable.Range(0, targetVariables.Length)
345        .Select(c => Enumerable.Range(0, targetValues.GetLength(0)).Select(row => targetValues[row, c])) // column vectors
346        .Select(vec => vec.Variance())
347        .Select(v => 1.0 / v)
348        .ToArray();
349
350      // objective function is NMSE
351      f = 0.0;
352      int n = predicted.Length;
353      double invN = 1.0 / n;
354      var g = Vector.Zero;
355      int r = 0;
356      foreach (var y_pred in predicted) {
357        for (int c = 0; c < y_pred.Length; c++) {
358
359          var y_pred_f = y_pred[c].Item1;
360          var y = targetValues[r, c];
361
362          var res = (y - y_pred_f);
363          var ressq = res * res;
364          f += ressq * invN * invVar[c];
365          g += -2.0 * res * y_pred[c].Item2 * invN * invVar[c];
366        }
367        r++;
368      }
369
370      g.CopyTo(grad);
371    }
372
373    public override void Analyze(Individual[] individuals, double[] qualities, ResultCollection results, IRandom random) {
374      base.Analyze(individuals, qualities, results, random);
375
376      if (!results.ContainsKey("Prediction (training)")) {
377        results.Add(new Result("Prediction (training)", typeof(ReadOnlyItemList<DataTable>)));
378      }
379      if (!results.ContainsKey("Prediction (test)")) {
380        results.Add(new Result("Prediction (test)", typeof(ReadOnlyItemList<DataTable>)));
381      }
382      if (!results.ContainsKey("Models")) {
383        results.Add(new Result("Models", typeof(VariableCollection)));
384      }
385
386      var bestIndividualAndQuality = this.GetBestIndividual(individuals, qualities);
387      var trees = bestIndividualAndQuality.Item1.Values.Select(v => v.Value).OfType<ISymbolicExpressionTree>().ToArray(); // extract all trees from individual
388
389      var problemData = ProblemData;
390      var targetVars = TargetVariables.CheckedItems.OrderBy(i => i.Index).Select(i => i.Value.Value).ToArray();
391      var latentVariables = Enumerable.Range(1, NumberOfLatentVariables).Select(i => "λ" + i).ToArray(); // TODO: must coincide with the variables which are actually defined in the grammar and also for which we actually have trees
392
393      var trainingList = new ItemList<DataTable>();
394
395      if (OptimizeParametersForEpisodes) {
396        var eIdx = 0;
397        var trainingPredictions = new List<Tuple<double, Vector>[][]>();
398        foreach (var episode in TrainingEpisodes) {
399          var episodes = new[] { episode };
400          var optTheta = ((DoubleArray)bestIndividualAndQuality.Item1["OptTheta_" + eIdx]).ToArray(); // see evaluate
401          var trainingPrediction = Integrate(
402                                   trees,  // we assume trees contain expressions for the change of each target variable over time y'(t)
403                                   problemData.Dataset,
404                                   problemData.AllowedInputVariables.ToArray(),
405                                   targetVars,
406                                   latentVariables,
407                                   episodes,
408                                   optTheta,
409                                   OdeSolver,
410                                   NumericIntegrationSteps).ToArray();
411          trainingPredictions.Add(trainingPrediction);
412          eIdx++;
413        }
414
415        // only for actual target values
416        var trainingRows = TrainingEpisodes.SelectMany(e => Enumerable.Range(e.Start, e.End - e.Start));
417        for (int colIdx = 0; colIdx < targetVars.Length; colIdx++) {
418          var targetVar = targetVars[colIdx];
419          var trainingDataTable = new DataTable(targetVar + " prediction (training)");
420          var actualValuesRow = new DataRow(targetVar, "The values of " + targetVar, problemData.Dataset.GetDoubleValues(targetVar, trainingRows));
421          var predictedValuesRow = new DataRow(targetVar + " pred.", "Predicted values for " + targetVar, trainingPredictions.SelectMany(arr => arr.Select(row => row[colIdx].Item1)).ToArray());
422          trainingDataTable.Rows.Add(actualValuesRow);
423          trainingDataTable.Rows.Add(predictedValuesRow);
424          trainingList.Add(trainingDataTable);
425        }
426        results["Prediction (training)"].Value = trainingList.AsReadOnly();
427
428
429        var models = new VariableCollection();
430
431        foreach (var tup in targetVars.Zip(trees, Tuple.Create)) {
432          var targetVarName = tup.Item1;
433          var tree = tup.Item2;
434
435          var origTreeVar = new HeuristicLab.Core.Variable(targetVarName + "(original)");
436          origTreeVar.Value = (ISymbolicExpressionTree)tree.Clone();
437          models.Add(origTreeVar);
438        }
439        results["Models"].Value = models;
440      } else {
441        var optTheta = ((DoubleArray)bestIndividualAndQuality.Item1["OptTheta"]).ToArray(); // see evaluate
442        var trainingPrediction = Integrate(
443                                   trees,  // we assume trees contain expressions for the change of each target variable over time dy/dt
444                                   problemData.Dataset,
445                                   problemData.AllowedInputVariables.ToArray(),
446                                   targetVars,
447                                   latentVariables,
448                                   TrainingEpisodes,
449                                   optTheta,
450                                   OdeSolver,
451                                   NumericIntegrationSteps).ToArray();
452        // only for actual target values
453        var trainingRows = TrainingEpisodes.SelectMany(e => Enumerable.Range(e.Start, e.End - e.Start));
454        for (int colIdx = 0; colIdx < targetVars.Length; colIdx++) {
455          var targetVar = targetVars[colIdx];
456          var trainingDataTable = new DataTable(targetVar + " prediction (training)");
457          var actualValuesRow = new DataRow(targetVar, "The values of " + targetVar, problemData.Dataset.GetDoubleValues(targetVar, trainingRows));
458          var predictedValuesRow = new DataRow(targetVar + " pred.", "Predicted values for " + targetVar, trainingPrediction.Select(arr => arr[colIdx].Item1).ToArray());
459          trainingDataTable.Rows.Add(actualValuesRow);
460          trainingDataTable.Rows.Add(predictedValuesRow);
461          trainingList.Add(trainingDataTable);
462        }
463        // TODO: DRY for training and test
464        var testList = new ItemList<DataTable>();
465        var testRows = ProblemData.TestIndices.ToArray();
466        var testPrediction = Integrate(
467         trees,  // we assume trees contain expressions for the change of each target variable over time y'(t)
468         problemData.Dataset,
469         problemData.AllowedInputVariables.ToArray(),
470         targetVars,
471         latentVariables,
472         new IntRange[] { ProblemData.TestPartition },
473         optTheta,
474         OdeSolver,
475         NumericIntegrationSteps).ToArray();
476
477        for (int colIdx = 0; colIdx < targetVars.Length; colIdx++) {
478          var targetVar = targetVars[colIdx];
479          var testDataTable = new DataTable(targetVar + " prediction (test)");
480          var actualValuesRow = new DataRow(targetVar, "The values of " + targetVar, problemData.Dataset.GetDoubleValues(targetVar, testRows));
481          var predictedValuesRow = new DataRow(targetVar + " pred.", "Predicted values for " + targetVar, testPrediction.Select(arr => arr[colIdx].Item1).ToArray());
482          testDataTable.Rows.Add(actualValuesRow);
483          testDataTable.Rows.Add(predictedValuesRow);
484          testList.Add(testDataTable);
485        }
486
487        results["Prediction (training)"].Value = trainingList.AsReadOnly();
488        results["Prediction (test)"].Value = testList.AsReadOnly();
489        #region simplification of models
490        // TODO the dependency of HeuristicLab.Problems.DataAnalysis.Symbolic is not ideal
491        var models = new VariableCollection();    // to store target var names and original version of tree
492
493        foreach (var tup in targetVars.Zip(trees, Tuple.Create)) {
494          var targetVarName = tup.Item1;
495          var tree = tup.Item2;
496
497          // when we reference HeuristicLab.Problems.DataAnalysis.Symbolic we can translate symbols
498          int nextParIdx = 0;
499          var shownTree = new SymbolicExpressionTree(TranslateTreeNode(tree.Root, optTheta, ref nextParIdx));
500
501          // var shownTree = (SymbolicExpressionTree)tree.Clone();
502          // var constantsNodeOrig = tree.IterateNodesPrefix().Where(IsConstantNode);
503          // var constantsNodeShown = shownTree.IterateNodesPrefix().Where(IsConstantNode);
504          //
505          // foreach (var n in constantsNodeOrig.Zip(constantsNodeShown, (original, shown) => new { original, shown })) {
506          //   double constantsVal = optTheta[nodeIdx[n.original]];
507          //
508          //   ConstantTreeNode replacementNode = new ConstantTreeNode(new Constant()) { Value = constantsVal };
509          //
510          //   var parentNode = n.shown.Parent;
511          //   int replacementIndex = parentNode.IndexOfSubtree(n.shown);
512          //   parentNode.RemoveSubtree(replacementIndex);
513          //   parentNode.InsertSubtree(replacementIndex, replacementNode);
514          // }
515
516          var origTreeVar = new HeuristicLab.Core.Variable(targetVarName + "(original)");
517          origTreeVar.Value = (ISymbolicExpressionTree)tree.Clone();
518          models.Add(origTreeVar);
519          var simplifiedTreeVar = new HeuristicLab.Core.Variable(targetVarName + "(simplified)");
520          simplifiedTreeVar.Value = TreeSimplifier.Simplify(shownTree);
521          models.Add(simplifiedTreeVar);
522
523        }
524        results["Models"].Value = models;
525        #endregion
526      }
527    }
528
529
530    #region interpretation
531
532    // the following uses auto-diff to calculate the gradient w.r.t. the parameters forward in time.
533    // this is basically the method described in Gronwall T. Note on the derivatives with respect to a parameter of the solutions of a system of differential equations. Ann. Math. 1919;20:292–296.
534
535    // a comparison of three potential calculation methods for the gradient is given in:
536    // Sengupta, B., Friston, K. J., & Penny, W. D. (2014). Efficient gradient computation for dynamical models. Neuroimage, 98(100), 521–527. http://doi.org/10.1016/j.neuroimage.2014.04.040
537    // "Our comparison establishes that the adjoint method is computationally more efficient for numerical estimation of parametric gradients
538    // for state-space models — both linear and non-linear, as in the case of a dynamical causal model (DCM)"
539
540    // for a solver with the necessary features see: https://computation.llnl.gov/projects/sundials/cvodes
541
542    private static IEnumerable<Tuple<double, Vector>[]> Integrate(
543      ISymbolicExpressionTree[] trees, IDataset dataset, string[] inputVariables, string[] targetVariables, string[] latentVariables, IEnumerable<IntRange> episodes,
544      double[] parameterValues,
545      string odeSolver, int numericIntegrationSteps = 100) {
546
547      // TODO: numericIntegrationSteps is only relevant for the HeuristicLab solver
548
549      foreach (var episode in episodes) {
550        var rows = Enumerable.Range(episode.Start, episode.End - episode.Start);
551        // return first value as stored in the dataset
552        yield return targetVariables
553          .Select(targetVar => Tuple.Create(dataset.GetDoubleValue(targetVar, rows.First()), Vector.Zero))
554          .ToArray();
555
556        // integrate forward starting with known values for the target in t0
557
558        var variableValues = new Dictionary<string, Tuple<double, Vector>>();
559        var t0 = rows.First();
560        foreach (var varName in inputVariables) {
561          variableValues.Add(varName, Tuple.Create(dataset.GetDoubleValue(varName, t0), Vector.Zero));
562        }
563        foreach (var varName in targetVariables) {
564          variableValues.Add(varName, Tuple.Create(dataset.GetDoubleValue(varName, t0), Vector.Zero));
565        }
566        // add value entries for latent variables which are also integrated
567        foreach (var latentVar in latentVariables) {
568          variableValues.Add(latentVar, Tuple.Create(0.0, Vector.Zero)); // we don't have observations for latent variables -> assume zero as starting value TODO
569        }
570        var calculatedVariables = targetVariables.Concat(latentVariables).ToArray(); // TODO: must conincide with the order of trees in the encoding
571
572        var prevT = rows.First(); // TODO: here we should use a variable for t if it is available. Right now we assume equidistant measurements.
573        foreach (var t in rows.Skip(1)) {
574          if (odeSolver == "HeuristicLab")
575            IntegrateHL(trees, calculatedVariables, variableValues, parameterValues, numericIntegrationSteps);
576          else if (odeSolver == "CVODES")
577            IntegrateCVODES(trees, calculatedVariables, variableValues, parameterValues, t - prevT);
578          else throw new InvalidOperationException("Unknown ODE solver " + odeSolver);
579          prevT = t;
580
581          if (variableValues.Count == targetVariables.Length) {
582            // only return the target variables for calculation of errors
583            var res = targetVariables
584              .Select(targetVar => variableValues[targetVar])
585              .ToArray();
586            if (res.Any(ri => double.IsNaN(ri.Item1) || double.IsInfinity(ri.Item1))) yield break;
587            yield return res;
588          } else {
589            yield break; // stop early on problems
590          }
591
592
593          // update for next time step
594          foreach (var varName in inputVariables) {
595            variableValues[varName] = Tuple.Create(dataset.GetDoubleValue(varName, t), Vector.Zero);
596          }
597        }
598      }
599    }
600
601
602    /// <summary>
603    ///  Here we use CVODES to solve the ODE. Forward sensitivities are used to calculate the gradient for parameter optimization
604    /// </summary>
605    /// <param name="trees">Each equation in the ODE represented as a tree</param>
606    /// <param name="calculatedVariables">The names of the calculated variables</param>
607    /// <param name="variableValues">The start values of the calculated variables as well as their sensitivites over parameters</param>
608    /// <param name="parameterValues">The current parameter values</param>
609    /// <param name="t">The time t up to which we need to integrate.</param>
610    private static void IntegrateCVODES(
611      ISymbolicExpressionTree[] trees, // f(y,p) in tree representation
612      string[] calculatedVariables, // names of elements of y
613      Dictionary<string, Tuple<double, Vector>> variableValues,  //  y (intput and output) input: y(t0), output: y(t0+t)
614      double[] parameterValues, // p
615      double t // duration t for which we want to integrate
616      ) {
617
618      // the RHS of the ODE
619      // dy/dt = f(y_t,x_t,p)
620      CVODES.CVRhsFunc f = CreateOdeRhs(trees, calculatedVariables, parameterValues);
621      // the Jacobian ∂f/∂y
622      CVODES.CVDlsJacFunc jac = CreateJac(trees, calculatedVariables, parameterValues);
623
624      // the RHS for the forward sensitivities (∂f/∂y)s_i(t) + ∂f/∂p_i
625      CVODES.CVSensRhsFn sensF = CreateSensitivityRhs(trees, calculatedVariables, parameterValues);
626
627      // setup solver
628      int numberOfEquations = trees.Length;
629      IntPtr y = IntPtr.Zero;
630      IntPtr cvode_mem = IntPtr.Zero;
631      IntPtr A = IntPtr.Zero;
632      IntPtr yS0 = IntPtr.Zero;
633      IntPtr linearSolver = IntPtr.Zero;
634      var ns = parameterValues.Length; // number of parameters
635
636      try {
637        y = CVODES.N_VNew_Serial(numberOfEquations);
638        // init y to current values of variables
639        // y must be initialized before calling CVodeInit
640        for (int i = 0; i < calculatedVariables.Length; i++) {
641          CVODES.NV_Set_Ith_S(y, i, variableValues[calculatedVariables[i]].Item1);
642        }
643
644        cvode_mem = CVODES.CVodeCreate(CVODES.MultistepMethod.CV_ADAMS, CVODES.NonlinearSolverIteration.CV_FUNCTIONAL);
645
646        var flag = CVODES.CVodeInit(cvode_mem, f, 0.0, y);
647        Debug.Assert(CVODES.CV_SUCCESS == flag);
648
649        double relTol = 1.0e-2;
650        double absTol = 1.0;
651        flag = CVODES.CVodeSStolerances(cvode_mem, relTol, absTol);  // TODO: probably need to adjust absTol per variable
652        Debug.Assert(CVODES.CV_SUCCESS == flag);
653
654        A = CVODES.SUNDenseMatrix(numberOfEquations, numberOfEquations);
655        Debug.Assert(A != IntPtr.Zero);
656
657        linearSolver = CVODES.SUNDenseLinearSolver(y, A);
658        Debug.Assert(linearSolver != IntPtr.Zero);
659
660        flag = CVODES.CVDlsSetLinearSolver(cvode_mem, linearSolver, A);
661        Debug.Assert(CVODES.CV_SUCCESS == flag);
662
663        flag = CVODES.CVDlsSetJacFn(cvode_mem, jac);
664        Debug.Assert(CVODES.CV_SUCCESS == flag);
665
666        yS0 = CVODES.N_VCloneVectorArray_Serial(ns, y); // clone the output vector for each parameter
667        unsafe {
668          // set to initial sensitivities supplied by caller
669          for (int pIdx = 0; pIdx < ns; pIdx++) {
670            var yS0_i = *((IntPtr*)yS0.ToPointer() + pIdx);
671            for (var varIdx = 0; varIdx < calculatedVariables.Length; varIdx++) {
672              CVODES.NV_Set_Ith_S(yS0_i, varIdx, variableValues[calculatedVariables[varIdx]].Item2[pIdx]);
673            }
674          }
675        }
676
677        flag = CVODES.CVodeSensInit(cvode_mem, ns, CVODES.CV_SIMULTANEOUS, sensF, yS0);
678        Debug.Assert(CVODES.CV_SUCCESS == flag);
679
680        flag = CVODES.CVodeSensEEtolerances(cvode_mem);
681        Debug.Assert(CVODES.CV_SUCCESS == flag);
682
683        // make one forward integration step
684        double tout = 0.0; // first output time
685        flag = CVODES.CVode(cvode_mem, t, y, ref tout, CVODES.CV_NORMAL);
686        if (flag == CVODES.CV_SUCCESS) {
687          Debug.Assert(t == tout);
688
689          // get sensitivities
690          flag = CVODES.CVodeGetSens(cvode_mem, ref tout, yS0);
691          Debug.Assert(CVODES.CV_SUCCESS == flag);
692
693          // update variableValues based on integration results
694          for (int varIdx = 0; varIdx < calculatedVariables.Length; varIdx++) {
695            var yi = CVODES.NV_Get_Ith_S(y, varIdx);
696            var gArr = new double[parameterValues.Length];
697            for (var pIdx = 0; pIdx < parameterValues.Length; pIdx++) {
698              unsafe {
699                var yS0_pi = *((IntPtr*)yS0.ToPointer() + pIdx);
700                gArr[pIdx] = CVODES.NV_Get_Ith_S(yS0_pi, varIdx);
701              }
702            }
703            variableValues[calculatedVariables[varIdx]] = Tuple.Create(yi, new Vector(gArr));
704          }
705        } else {
706          variableValues.Clear();   // indicate problems by not returning new values
707        }
708
709        // cleanup all allocated objects
710      } finally {
711        if (y != IntPtr.Zero) CVODES.N_VDestroy_Serial(y);
712        if (cvode_mem != IntPtr.Zero) CVODES.CVodeFree(ref cvode_mem);
713        if (linearSolver != IntPtr.Zero) CVODES.SUNLinSolFree(linearSolver);
714        if (A != IntPtr.Zero) CVODES.SUNMatDestroy(A);
715        if (yS0 != IntPtr.Zero) CVODES.N_VDestroyVectorArray_Serial(yS0, ns);
716      }
717    }
718
719
720    private static CVODES.CVRhsFunc CreateOdeRhs(
721      ISymbolicExpressionTree[] trees,
722      string[] calculatedVariables,
723      double[] parameterValues) {
724      // we don't need to calculate a gradient here -> no nodes are selected for
725      // --> no nodes are selected to be included in the gradient calculation
726      var nodeIdx = new Dictionary<ISymbolicExpressionTreeNode, int>();
727      return (double t,
728              IntPtr y, // N_Vector, current value of y (input)
729              IntPtr ydot, // N_Vector (calculated value of y' (output)
730              IntPtr user_data // optional user data, (unused here)
731              ) => {
732                // TODO: perf
733                var nodeValues = new Dictionary<ISymbolicExpressionTreeNode, Tuple<double, Vector>>();
734
735                int pIdx = 0;
736                foreach (var tree in trees) {
737                  foreach (var n in tree.IterateNodesPrefix()) {
738                    if (IsConstantNode(n)) {
739                      nodeValues.Add(n, Tuple.Create(parameterValues[pIdx], Vector.Zero)); // here we do not need a gradient
740                      pIdx++;
741                    } else if (n.SubtreeCount == 0) {
742                      // for variables and latent variables get the value from variableValues
743                      var varName = n.Symbol.Name;
744                      var varIdx = Array.IndexOf(calculatedVariables, varName); // TODO: perf!
745                      if (varIdx < 0) throw new InvalidProgramException();
746                      var y_i = CVODES.NV_Get_Ith_S(y, (long)varIdx);
747                      nodeValues.Add(n, Tuple.Create(y_i, Vector.Zero)); // no gradient needed
748                    }
749                  }
750                }
751                for (int i = 0; i < trees.Length; i++) {
752                  var tree = trees[i];
753                  var res_i = InterpretRec(tree.Root.GetSubtree(0).GetSubtree(0), nodeValues);
754                  CVODES.NV_Set_Ith_S(ydot, i, res_i.Item1);
755                }
756                return 0;
757              };
758    }
759
760    private static CVODES.CVDlsJacFunc CreateJac(
761      ISymbolicExpressionTree[] trees,
762      string[] calculatedVariables,
763      double[] parameterValues) {
764
765      return (
766        double t, // current time (input)
767        IntPtr y, // N_Vector, current value of y (input)
768        IntPtr fy, // N_Vector, current value of f (input)
769        IntPtr Jac, // SUNMatrix ∂f/∂y (output, rows i contains are ∂f_i/∂y vector)
770        IntPtr user_data, // optional (unused here)
771        IntPtr tmp1, // N_Vector, optional (unused here)
772        IntPtr tmp2, // N_Vector, optional (unused here)
773        IntPtr tmp3 // N_Vector, optional (unused here)
774      ) => {
775        // here we need to calculate partial derivatives for the calculated variables y
776        var nodeValues = new Dictionary<ISymbolicExpressionTreeNode, Tuple<double, Vector>>();
777        int pIdx = 0;
778        foreach (var tree in trees) {
779          foreach (var n in tree.IterateNodesPrefix()) {
780            if (IsConstantNode(n)) {
781              nodeValues.Add(n, Tuple.Create(parameterValues[pIdx], Vector.Zero)); // here we need a gradient over y which is zero for parameters
782              pIdx++;
783            } else if (n.SubtreeCount == 0) {
784              // for variables and latent variables we use values supplied in y and init gradient vectors accordingly
785              var varName = n.Symbol.Name;
786              var varIdx = Array.IndexOf(calculatedVariables, varName); // TODO: perf!
787              if (varIdx < 0) throw new InvalidProgramException();
788
789              var y_i = CVODES.NV_Get_Ith_S(y, (long)varIdx);
790              var gArr = new double[CVODES.NV_LENGTH_S(y)]; // backing array
791              gArr[varIdx] = 1.0;
792              var g = new Vector(gArr);
793              nodeValues.Add(n, Tuple.Create(y_i, g));
794            }
795          }
796        }
797
798        for (int i = 0; i < trees.Length; i++) {
799          var tree = trees[i];
800          var res = InterpretRec(tree.Root.GetSubtree(0).GetSubtree(0), nodeValues);
801          var g = res.Item2;
802          for (int j = 0; j < calculatedVariables.Length; j++) {
803            CVODES.SUNDenseMatrix_Set(Jac, i, j, g[j]);
804          }
805        }
806        return 0; // on success
807      };
808    }
809
810
811    // to calculate sensitivities RHS for all equations at once
812    // must compute (∂f/∂y)s_i(t) + ∂f/∂p_i and store in ySdot.
813    // Index i refers to parameters, dimensionality of matrix and vectors is number of equations
814    private static CVODES.CVSensRhsFn CreateSensitivityRhs(ISymbolicExpressionTree[] trees, string[] calculatedVariables, double[] parameterValues) {
815      return (
816              int Ns, // number of parameters
817              double t, // current time
818              IntPtr y, // N_Vector y(t) (input)
819              IntPtr ydot, // N_Vector dy/dt(t) (input)
820              IntPtr yS, // N_Vector*, one vector for each parameter (input)
821              IntPtr ySdot, // N_Vector*, one vector for each parameter (output)
822              IntPtr user_data, // optional (unused here)
823              IntPtr tmp1, // N_Vector, optional (unused here)
824              IntPtr tmp2 // N_Vector, optional (unused here)
825        ) => {
826          // here we need to calculate partial derivatives for the calculated variables y as well as for the parameters
827          var nodeValues = new Dictionary<ISymbolicExpressionTreeNode, Tuple<double, Vector>>();
828          var d = calculatedVariables.Length + parameterValues.Length; // dimensionality of gradient
829          // first collect variable values
830          foreach (var tree in trees) {
831            foreach (var n in tree.IterateNodesPrefix()) {
832              if (IsVariableNode(n)) {
833                // for variables and latent variables we use values supplied in y and init gradient vectors accordingly
834                var varName = n.Symbol.Name;
835                var varIdx = Array.IndexOf(calculatedVariables, varName); // TODO: perf!
836                if (varIdx < 0) throw new InvalidProgramException();
837
838                var y_i = CVODES.NV_Get_Ith_S(y, (long)varIdx);
839                var gArr = new double[d]; // backing array
840                gArr[varIdx] = 1.0;
841                var g = new Vector(gArr);
842                nodeValues.Add(n, Tuple.Create(y_i, g));
843              }
844            }
845          }
846          // then collect constants
847          int pIdx = 0;
848          foreach (var tree in trees) {
849            foreach (var n in tree.IterateNodesPrefix()) {
850              if (IsConstantNode(n)) {
851                var gArr = new double[d];
852                gArr[calculatedVariables.Length + pIdx] = 1.0;
853                var g = new Vector(gArr);
854                nodeValues.Add(n, Tuple.Create(parameterValues[pIdx], g));
855                pIdx++;
856              }
857            }
858          }
859          // gradient vector is [∂f/∂y_1, ∂f/∂y_2, ... ∂f/∂yN, ∂f/∂p_1 ... ∂f/∂p_K]
860
861
862          for (pIdx = 0; pIdx < Ns; pIdx++) {
863            unsafe {
864              var sDot_pi = *((IntPtr*)ySdot.ToPointer() + pIdx);
865              CVODES.N_VConst_Serial(0.0, sDot_pi);
866            }
867          }
868
869          for (int i = 0; i < trees.Length; i++) {
870            var tree = trees[i];
871            var res = InterpretRec(tree.Root.GetSubtree(0).GetSubtree(0), nodeValues);
872            var g = res.Item2;
873
874
875            // update ySdot = (∂f/∂y)s_i(t) + ∂f/∂p_i
876
877            for (pIdx = 0; pIdx < Ns; pIdx++) {
878              unsafe {
879                var sDot_pi = *((IntPtr*)ySdot.ToPointer() + pIdx);
880                var s_pi = *((IntPtr*)yS.ToPointer() + pIdx);
881
882                var v = CVODES.NV_Get_Ith_S(sDot_pi, i);
883                // (∂f/∂y)s_i(t)
884                var p = 0.0;
885                for (int yIdx = 0; yIdx < calculatedVariables.Length; yIdx++) {
886                  p += g[yIdx] * CVODES.NV_Get_Ith_S(s_pi, yIdx);
887                }
888                // + ∂f/∂p_i
889                CVODES.NV_Set_Ith_S(sDot_pi, i, v + p + g[calculatedVariables.Length + pIdx]);
890              }
891            }
892
893          }
894          return 0; // on success
895        };
896    }
897
898    private static void IntegrateHL(
899      ISymbolicExpressionTree[] trees,
900      string[] calculatedVariables, // names of integrated variables
901      Dictionary<string, Tuple<double, Vector>> variableValues, //y (intput and output) input: y(t0), output: y(t0+1)
902      double[] parameterValues,
903      int numericIntegrationSteps) {
904
905      var nodeValues = new Dictionary<ISymbolicExpressionTreeNode, Tuple<double, Vector>>();
906
907      // the nodeValues for parameters are constant over time
908      // TODO: this needs to be done only once for each iteration in gradient descent (whenever parameter values change)
909      // NOTE: the order must be the same as above (prefix order for nodes)
910      int pIdx = 0;
911      foreach (var tree in trees) {
912        foreach (var node in tree.Root.IterateNodesPrefix()) {
913          if (IsConstantNode(node)) {
914            var gArr = new double[parameterValues.Length]; // backing array
915            gArr[pIdx] = 1.0;
916            var g = new Vector(gArr);
917            nodeValues.Add(node, new Tuple<double, Vector>(parameterValues[pIdx], g));
918            pIdx++;
919          } else if (node.SubtreeCount == 0) {
920            // for (latent) variables get the values from variableValues
921            var varName = node.Symbol.Name;
922            nodeValues.Add(node, variableValues[varName]);
923          }
924        }
925      }
926
927
928      double h = 1.0 / numericIntegrationSteps;
929      for (int step = 0; step < numericIntegrationSteps; step++) {
930        var deltaValues = new Dictionary<string, Tuple<double, Vector>>();
931        for (int i = 0; i < trees.Length; i++) {
932          var tree = trees[i];
933          var targetVarName = calculatedVariables[i];
934
935          // Root.GetSubtree(0).GetSubtree(0) skips programRoot and startSymbol
936          var res = InterpretRec(tree.Root.GetSubtree(0).GetSubtree(0), nodeValues);
937          deltaValues.Add(targetVarName, res);
938        }
939
940        // update variableValues for next step, trapezoid integration
941        foreach (var kvp in deltaValues) {
942          var oldVal = variableValues[kvp.Key];
943          var newVal = Tuple.Create(
944            oldVal.Item1 + h * kvp.Value.Item1,
945            oldVal.Item2 + h * kvp.Value.Item2
946          );
947          variableValues[kvp.Key] = newVal;
948        }
949        // update nodeValues from variableValues
950        // TODO: perf using dictionary with list of nodes for each variable
951        foreach (var tree in trees) {
952          foreach (var node in tree.Root.IterateNodesPrefix().Where(n => IsVariableNode(n))) {
953            var varName = node.Symbol.Name;
954            nodeValues[node] = variableValues[varName];
955          }
956        }
957      }
958    }
959
960    private static Tuple<double, Vector> InterpretRec(
961      ISymbolicExpressionTreeNode node,
962      Dictionary<ISymbolicExpressionTreeNode, Tuple<double, Vector>> nodeValues      // contains value and gradient vector for a node (variables and constants only)
963        ) {
964
965      switch (node.Symbol.Name) {
966        case "+": {
967            var l = InterpretRec(node.GetSubtree(0), nodeValues);
968            var r = InterpretRec(node.GetSubtree(1), nodeValues);
969
970            return Tuple.Create(l.Item1 + r.Item1, l.Item2 + r.Item2);
971          }
972        case "*": {
973            var l = InterpretRec(node.GetSubtree(0), nodeValues);
974            var r = InterpretRec(node.GetSubtree(1), nodeValues);
975
976            return Tuple.Create(l.Item1 * r.Item1, l.Item2 * r.Item1 + l.Item1 * r.Item2);
977          }
978
979        case "-": {
980            var l = InterpretRec(node.GetSubtree(0), nodeValues);
981            var r = InterpretRec(node.GetSubtree(1), nodeValues);
982
983            return Tuple.Create(l.Item1 - r.Item1, l.Item2 - r.Item2);
984          }
985        case "%": {
986            var l = InterpretRec(node.GetSubtree(0), nodeValues);
987            var r = InterpretRec(node.GetSubtree(1), nodeValues);
988
989            // protected division
990            if (r.Item1.IsAlmost(0.0)) {
991              return Tuple.Create(0.0, Vector.Zero);
992            } else {
993              return Tuple.Create(
994                l.Item1 / r.Item1,
995                l.Item1 * -1.0 / (r.Item1 * r.Item1) * r.Item2 + 1.0 / r.Item1 * l.Item2 // (f/g)' = f * (1/g)' + 1/g * f' = f * -1/g² * g' + 1/g * f'
996                );
997            }
998          }
999        case "sin": {
1000            var x = InterpretRec(node.GetSubtree(0), nodeValues);
1001            return Tuple.Create(
1002              Math.Sin(x.Item1),
1003              Vector.Cos(x.Item2) * x.Item2
1004            );
1005          }
1006        case "cos": {
1007            var x = InterpretRec(node.GetSubtree(0), nodeValues);
1008            return Tuple.Create(
1009              Math.Cos(x.Item1),
1010              -Vector.Sin(x.Item2) * x.Item2
1011            );
1012          }
1013        default: {
1014            return nodeValues[node];  // value and gradient for constants and variables must be set by the caller
1015          }
1016      }
1017    }
1018    #endregion
1019
1020    #region events
1021    /*
1022     * Dependencies between parameters:
1023     *
1024     * ProblemData
1025     *    |
1026     *    V
1027     * TargetVariables   FunctionSet    MaximumLength    NumberOfLatentVariables
1028     *               |   |                 |                   |
1029     *               V   V                 |                   |
1030     *             Grammar <---------------+-------------------
1031     *                |
1032     *                V
1033     *            Encoding
1034     */
1035    private void RegisterEventHandlers() {
1036      ProblemDataParameter.ValueChanged += ProblemDataParameter_ValueChanged;
1037      if (ProblemDataParameter.Value != null) ProblemDataParameter.Value.Changed += ProblemData_Changed;
1038
1039      TargetVariablesParameter.ValueChanged += TargetVariablesParameter_ValueChanged;
1040      if (TargetVariablesParameter.Value != null) TargetVariablesParameter.Value.CheckedItemsChanged += CheckedTargetVariablesChanged;
1041
1042      FunctionSetParameter.ValueChanged += FunctionSetParameter_ValueChanged;
1043      if (FunctionSetParameter.Value != null) FunctionSetParameter.Value.CheckedItemsChanged += CheckedFunctionsChanged;
1044
1045      MaximumLengthParameter.Value.ValueChanged += MaximumLengthChanged;
1046
1047      NumberOfLatentVariablesParameter.Value.ValueChanged += NumLatentVariablesChanged;
1048    }
1049
1050    private void NumLatentVariablesChanged(object sender, EventArgs e) {
1051      UpdateGrammarAndEncoding();
1052    }
1053
1054    private void MaximumLengthChanged(object sender, EventArgs e) {
1055      UpdateGrammarAndEncoding();
1056    }
1057
1058    private void FunctionSetParameter_ValueChanged(object sender, EventArgs e) {
1059      FunctionSetParameter.Value.CheckedItemsChanged += CheckedFunctionsChanged;
1060    }
1061
1062    private void CheckedFunctionsChanged(object sender, CollectionItemsChangedEventArgs<IndexedItem<StringValue>> e) {
1063      UpdateGrammarAndEncoding();
1064    }
1065
1066    private void TargetVariablesParameter_ValueChanged(object sender, EventArgs e) {
1067      TargetVariablesParameter.Value.CheckedItemsChanged += CheckedTargetVariablesChanged;
1068    }
1069
1070    private void CheckedTargetVariablesChanged(object sender, CollectionItemsChangedEventArgs<IndexedItem<StringValue>> e) {
1071      UpdateGrammarAndEncoding();
1072    }
1073
1074    private void ProblemDataParameter_ValueChanged(object sender, EventArgs e) {
1075      ProblemDataParameter.Value.Changed += ProblemData_Changed;
1076      OnProblemDataChanged();
1077      OnReset();
1078    }
1079
1080    private void ProblemData_Changed(object sender, EventArgs e) {
1081      OnProblemDataChanged();
1082      OnReset();
1083    }
1084
1085    private void OnProblemDataChanged() {
1086      UpdateTargetVariables();        // implicitly updates other dependent parameters
1087      var handler = ProblemDataChanged;
1088      if (handler != null) handler(this, EventArgs.Empty);
1089    }
1090
1091    #endregion
1092
1093    #region  helper
1094
1095    private void InitAllParameters() {
1096      UpdateTargetVariables(); // implicitly updates the grammar and the encoding     
1097    }
1098
1099    private ReadOnlyCheckedItemList<StringValue> CreateFunctionSet() {
1100      var l = new CheckedItemList<StringValue>();
1101      l.Add(new StringValue("+").AsReadOnly());
1102      l.Add(new StringValue("*").AsReadOnly());
1103      l.Add(new StringValue("%").AsReadOnly());
1104      l.Add(new StringValue("-").AsReadOnly());
1105      l.Add(new StringValue("sin").AsReadOnly());
1106      l.Add(new StringValue("cos").AsReadOnly());
1107      return l.AsReadOnly();
1108    }
1109
1110    private static bool IsConstantNode(ISymbolicExpressionTreeNode n) {
1111      return n.Symbol.Name.StartsWith("θ");
1112    }
1113    private static bool IsLatentVariableNode(ISymbolicExpressionTreeNode n) {
1114      return n.Symbol.Name.StartsWith("λ");
1115    }
1116    private static bool IsVariableNode(ISymbolicExpressionTreeNode n) {
1117      return (n.SubtreeCount == 0) && !IsConstantNode(n) && !IsLatentVariableNode(n);
1118    }
1119
1120
1121    private void UpdateTargetVariables() {
1122      var currentlySelectedVariables = TargetVariables.CheckedItems
1123        .OrderBy(i => i.Index)
1124        .Select(i => i.Value.Value)
1125        .ToArray();
1126
1127      var newVariablesList = new CheckedItemList<StringValue>(ProblemData.Dataset.VariableNames.Select(str => new StringValue(str).AsReadOnly()).ToArray()).AsReadOnly();
1128      var matchingItems = newVariablesList.Where(item => currentlySelectedVariables.Contains(item.Value)).ToArray();
1129      foreach (var matchingItem in matchingItems) {
1130        newVariablesList.SetItemCheckedState(matchingItem, true);
1131      }
1132      TargetVariablesParameter.Value = newVariablesList;
1133    }
1134
1135    private void UpdateGrammarAndEncoding() {
1136      var encoding = new MultiEncoding();
1137      var g = CreateGrammar();
1138      foreach (var targetVar in TargetVariables.CheckedItems) {
1139        encoding = encoding.Add(new SymbolicExpressionTreeEncoding(targetVar + "_tree", g, MaximumLength, MaximumLength)); // only limit by length
1140      }
1141      for (int i = 1; i <= NumberOfLatentVariables; i++) {
1142        encoding = encoding.Add(new SymbolicExpressionTreeEncoding("λ" + i + "_tree", g, MaximumLength, MaximumLength));
1143      }
1144      Encoding = encoding;
1145    }
1146
1147    private ISymbolicExpressionGrammar CreateGrammar() {
1148      // whenever ProblemData is changed we create a new grammar with the necessary symbols
1149      var g = new SimpleSymbolicExpressionGrammar();
1150      g.AddSymbols(FunctionSet.CheckedItems.OrderBy(i => i.Index).Select(i => i.Value.Value).ToArray(), 2, 2);
1151
1152      // TODO
1153      //g.AddSymbols(new[] {
1154      //  "exp",
1155      //  "log", // log( <expr> ) // TODO: init a theta to ensure the value is always positive
1156      //  "exp_minus" // exp((-1) * <expr>
1157      //}, 1, 1);
1158
1159      foreach (var variableName in ProblemData.AllowedInputVariables.Union(TargetVariables.CheckedItems.Select(i => i.Value.Value)))
1160        g.AddTerminalSymbol(variableName);
1161
1162      // generate symbols for numeric parameters for which the value is optimized using AutoDiff
1163      // we generate multiple symbols to balance the probability for selecting a numeric parameter in the generation of random trees
1164      var numericConstantsFactor = 2.0;
1165      for (int i = 0; i < numericConstantsFactor * (ProblemData.AllowedInputVariables.Count() + TargetVariables.CheckedItems.Count()); i++) {
1166        g.AddTerminalSymbol("θ" + i); // numeric parameter for which the value is optimized using AutoDiff
1167      }
1168
1169      // generate symbols for latent variables
1170      for (int i = 1; i <= NumberOfLatentVariables; i++) {
1171        g.AddTerminalSymbol("λ" + i); // numeric parameter for which the value is optimized using AutoDiff
1172      }
1173
1174      return g;
1175    }
1176
1177
1178    private ISymbolicExpressionTreeNode TranslateTreeNode(ISymbolicExpressionTreeNode n, double[] parameterValues, ref int nextParIdx) {
1179      ISymbolicExpressionTreeNode translatedNode = null;
1180      if (n.Symbol is StartSymbol) {
1181        translatedNode = new StartSymbol().CreateTreeNode();
1182      } else if (n.Symbol is ProgramRootSymbol) {
1183        translatedNode = new ProgramRootSymbol().CreateTreeNode();
1184      } else if (n.Symbol.Name == "+") {
1185        translatedNode = new Addition().CreateTreeNode();
1186      } else if (n.Symbol.Name == "-") {
1187        translatedNode = new Subtraction().CreateTreeNode();
1188      } else if (n.Symbol.Name == "*") {
1189        translatedNode = new Multiplication().CreateTreeNode();
1190      } else if (n.Symbol.Name == "%") {
1191        translatedNode = new Division().CreateTreeNode();
1192      } else if (n.Symbol.Name == "sin") {
1193        translatedNode = new Sine().CreateTreeNode();
1194      } else if (n.Symbol.Name == "cos") {
1195        translatedNode = new Cosine().CreateTreeNode();
1196      } else if (IsConstantNode(n)) {
1197        var constNode = (ConstantTreeNode)new Constant().CreateTreeNode();
1198        constNode.Value = parameterValues[nextParIdx];
1199        nextParIdx++;
1200        translatedNode = constNode;
1201      } else {
1202        // assume a variable name
1203        var varName = n.Symbol.Name;
1204        var varNode = (VariableTreeNode)new Variable().CreateTreeNode();
1205        varNode.Weight = 1.0;
1206        varNode.VariableName = varName;
1207        translatedNode = varNode;
1208      }
1209      foreach (var child in n.Subtrees) {
1210        translatedNode.AddSubtree(TranslateTreeNode(child, parameterValues, ref nextParIdx));
1211      }
1212      return translatedNode;
1213    }
1214    #endregion
1215
1216    #region Import & Export
1217    public void Load(IRegressionProblemData data) {
1218      Name = data.Name;
1219      Description = data.Description;
1220      ProblemData = data;
1221    }
1222
1223    public IRegressionProblemData Export() {
1224      return ProblemData;
1225    }
1226    #endregion
1227
1228  }
1229}
Note: See TracBrowser for help on using the repository browser.