Free cookie consent management tool by TermsFeed Policy Generator

source: branches/2925_AutoDiffForDynamicalModels/HeuristicLab.Problems.DynamicalSystemsModelling/3.3/Problem.cs @ 16893

Last change on this file since 16893 was 16893, checked in by gkronber, 5 years ago

#2925: allow separate configuration of const opt steps for pre-tuning and the full ODE. Allow weighted combination of fitness using pretuning NMSE and the full ODE NMSE

File size: 93.7 KB
RevLine 
[15964]1#region License Information
2/* HeuristicLab
3 * Copyright (C) 2002-2018 Heuristic and Evolutionary Algorithms Laboratory (HEAL)
4 *
5 * This file is part of HeuristicLab.
6 *
7 * HeuristicLab is free software: you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation, either version 3 of the License, or
10 * (at your option) any later version.
11 *
12 * HeuristicLab is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15 * GNU General Public License for more details.
16 *
17 * You should have received a copy of the GNU General Public License
18 * along with HeuristicLab. If not, see <http://www.gnu.org/licenses/>.
19 */
20#endregion
21
22using System;
23using System.Collections.Generic;
24using System.Diagnostics;
[16653]25using System.Globalization;
[15964]26using System.Linq;
[15968]27using HeuristicLab.Analysis;
28using HeuristicLab.Collections;
[15964]29using HeuristicLab.Common;
30using HeuristicLab.Core;
[15968]31using HeuristicLab.Data;
[15964]32using HeuristicLab.Encodings.SymbolicExpressionTreeEncoding;
[15968]33using HeuristicLab.Optimization;
[15964]34using HeuristicLab.Parameters;
35using HeuristicLab.Persistence.Default.CompositeSerializers.Storable;
36using HeuristicLab.Problems.DataAnalysis;
[16126]37using HeuristicLab.Problems.DataAnalysis.Symbolic;
[15964]38using HeuristicLab.Problems.Instances;
[16153]39using Variable = HeuristicLab.Problems.DataAnalysis.Symbolic.Variable;
[16663]40using HEAL.Attic;
[16664]41using HeuristicLab.Problems.DataAnalysis.Symbolic.Regression;
[15964]42
43namespace HeuristicLab.Problems.DynamicalSystemsModelling {
44  [Item("Dynamical Systems Modelling Problem", "TODO")]
45  [Creatable(CreatableAttribute.Categories.GeneticProgrammingProblems, Priority = 900)]
[16663]46  [StorableType("065C6A61-773A-42C9-9DE5-61A5D1D823EB")]
[15968]47  public sealed class Problem : SingleObjectiveBasicProblem<MultiEncoding>, IRegressionProblem, IProblemInstanceConsumer<IRegressionProblemData>, IProblemInstanceExporter<IRegressionProblemData> {
[15964]48    #region parameter names
[15968]49    private const string ProblemDataParameterName = "Data";
50    private const string TargetVariablesParameterName = "Target variables";
51    private const string FunctionSetParameterName = "Function set";
52    private const string MaximumLengthParameterName = "Size limit";
[16893]53    private const string MaximumPretuningParameterOptimizationIterationsParameterName = "Max. pre-tuning parameter optimization iterations";
54    private const string MaximumOdeParameterOptimizationIterationsParameterName = "Max. ODE parameter optimization iterations";
[15970]55    private const string NumberOfLatentVariablesParameterName = "Number of latent variables";
56    private const string NumericIntegrationStepsParameterName = "Steps for numeric integration";
[16153]57    private const string TrainingEpisodesParameterName = "Training episodes";
[16155]58    private const string OptimizeParametersForEpisodesParameterName = "Optimize parameters for episodes";
[16250]59    private const string OdeSolverParameterName = "ODE Solver";
[15964]60    #endregion
61
62    #region Parameter Properties
63    IParameter IDataAnalysisProblem.ProblemDataParameter { get { return ProblemDataParameter; } }
64
65    public IValueParameter<IRegressionProblemData> ProblemDataParameter {
66      get { return (IValueParameter<IRegressionProblemData>)Parameters[ProblemDataParameterName]; }
67    }
[16268]68    public IValueParameter<ReadOnlyCheckedItemList<StringValue>> TargetVariablesParameter {
69      get { return (IValueParameter<ReadOnlyCheckedItemList<StringValue>>)Parameters[TargetVariablesParameterName]; }
[15968]70    }
[16268]71    public IValueParameter<ReadOnlyCheckedItemList<StringValue>> FunctionSetParameter {
72      get { return (IValueParameter<ReadOnlyCheckedItemList<StringValue>>)Parameters[FunctionSetParameterName]; }
[15968]73    }
74    public IFixedValueParameter<IntValue> MaximumLengthParameter {
75      get { return (IFixedValueParameter<IntValue>)Parameters[MaximumLengthParameterName]; }
76    }
[16602]77
[16893]78    public IFixedValueParameter<IntValue> MaximumPretuningParameterOptimizationIterationsParameter {
79      get { return (IFixedValueParameter<IntValue>)Parameters[MaximumPretuningParameterOptimizationIterationsParameterName]; }
[15968]80    }
[16893]81    public IFixedValueParameter<IntValue> MaximumOdeParameterOptimizationIterationsParameter {
82      get { return (IFixedValueParameter<IntValue>)Parameters[MaximumOdeParameterOptimizationIterationsParameterName]; }
83    }
[15970]84    public IFixedValueParameter<IntValue> NumberOfLatentVariablesParameter {
85      get { return (IFixedValueParameter<IntValue>)Parameters[NumberOfLatentVariablesParameterName]; }
86    }
87    public IFixedValueParameter<IntValue> NumericIntegrationStepsParameter {
88      get { return (IFixedValueParameter<IntValue>)Parameters[NumericIntegrationStepsParameterName]; }
89    }
[16153]90    public IValueParameter<ItemList<IntRange>> TrainingEpisodesParameter {
91      get { return (IValueParameter<ItemList<IntRange>>)Parameters[TrainingEpisodesParameterName]; }
92    }
[16155]93    public IFixedValueParameter<BoolValue> OptimizeParametersForEpisodesParameter {
94      get { return (IFixedValueParameter<BoolValue>)Parameters[OptimizeParametersForEpisodesParameterName]; }
95    }
[16250]96    public IConstrainedValueParameter<StringValue> OdeSolverParameter {
97      get { return (IConstrainedValueParameter<StringValue>)Parameters[OdeSolverParameterName]; }
98    }
[16893]99    public IFixedValueParameter<DoubleValue> PretuningErrorWeight {
100      get { return (IFixedValueParameter<DoubleValue>)Parameters["Pretuning NMSE weight"]; }
101    }
102    public IFixedValueParameter<DoubleValue> OdeErrorWeight {
103      get { return (IFixedValueParameter<DoubleValue>)Parameters["ODE NMSE weight"]; }
104    }
[15964]105    #endregion
106
107    #region Properties
108    public IRegressionProblemData ProblemData {
109      get { return ProblemDataParameter.Value; }
110      set { ProblemDataParameter.Value = value; }
111    }
112    IDataAnalysisProblemData IDataAnalysisProblem.ProblemData { get { return ProblemData; } }
113
[16268]114    public ReadOnlyCheckedItemList<StringValue> TargetVariables {
[15968]115      get { return TargetVariablesParameter.Value; }
116    }
117
[16268]118    public ReadOnlyCheckedItemList<StringValue> FunctionSet {
[15968]119      get { return FunctionSetParameter.Value; }
120    }
121
122    public int MaximumLength {
123      get { return MaximumLengthParameter.Value.Value; }
124    }
[16893]125    public int MaximumPretuningParameterOptimizationIterations {
126      get { return MaximumPretuningParameterOptimizationIterationsParameter.Value.Value; }
[15968]127    }
[16893]128    public int MaximumOdeParameterOptimizationIterations {
129      get { return MaximumOdeParameterOptimizationIterationsParameter.Value.Value; }
130    }
[15970]131    public int NumberOfLatentVariables {
132      get { return NumberOfLatentVariablesParameter.Value.Value; }
133    }
134    public int NumericIntegrationSteps {
135      get { return NumericIntegrationStepsParameter.Value.Value; }
136    }
[16153]137    public IEnumerable<IntRange> TrainingEpisodes {
138      get { return TrainingEpisodesParameter.Value; }
139    }
[16155]140    public bool OptimizeParametersForEpisodes {
141      get { return OptimizeParametersForEpisodesParameter.Value.Value; }
142    }
[15970]143
[16250]144    public string OdeSolver {
145      get { return OdeSolverParameter.Value.Value; }
146      set {
147        var matchingValue = OdeSolverParameter.ValidValues.FirstOrDefault(v => v.Value == value);
148        if (matchingValue == null) throw new ArgumentOutOfRangeException();
149        else OdeSolverParameter.Value = matchingValue;
150      }
151    }
152
[16153]153    #endregion
[15968]154
[15964]155    public event EventHandler ProblemDataChanged;
156
157    public override bool Maximization {
158      get { return false; } // we minimize NMSE
159    }
160
161    #region item cloning and persistence
162    // persistence
163    [StorableConstructor]
[16663]164    private Problem(StorableConstructorFlag _) : base(_) { }
[15964]165    [StorableHook(HookType.AfterDeserialization)]
166    private void AfterDeserialization() {
[16215]167      if (!Parameters.ContainsKey(OptimizeParametersForEpisodesParameterName)) {
[16155]168        Parameters.Add(new FixedValueParameter<BoolValue>(OptimizeParametersForEpisodesParameterName, "Flag to select if parameters should be optimized globally or for each episode individually.", new BoolValue(false)));
169      }
[16893]170      int iters = 100;
171      if (Parameters.ContainsKey("Max. parameter optimization iterations")) {
172        iters = ((IFixedValueParameter<IntValue>)Parameters["Max. parameter optimization iterations"]).Value.Value;
173      }
174      if (!Parameters.ContainsKey(MaximumPretuningParameterOptimizationIterationsParameterName)) {
175        Parameters.Add(new FixedValueParameter<IntValue>(MaximumPretuningParameterOptimizationIterationsParameterName, "The maximum number of iterations for optimization of parameters of individual equations for numerical derivatives (using L-BFGS). More iterations makes the algorithm slower, fewer iterations might prevent convergence in the optimization scheme. Default = 100", new IntValue(iters)));
176      }
177      if (!Parameters.ContainsKey(MaximumOdeParameterOptimizationIterationsParameterName)) {
178        Parameters.Add(new FixedValueParameter<IntValue>(MaximumOdeParameterOptimizationIterationsParameterName, "The maximum number of iterations for optimization of the full ODE parameters (using L-BFGS). More iterations makes the algorithm slower, fewer iterations might prevent convergence in the optimization scheme. Default = 100", new IntValue(iters)));
179      }
180
181      if (!Parameters.ContainsKey("Pretuning NMSE weight"))
182        Parameters.Add(new FixedValueParameter<DoubleValue>("Pretuning NMSE weight", "For fitness weighting", new DoubleValue(0.5)));
183      if (!Parameters.ContainsKey("ODE NMSE weight"))
184        Parameters.Add(new FixedValueParameter<DoubleValue>("ODE NMSE weight", "For fitness weighting", new DoubleValue(0.5)));
185
186
[15964]187      RegisterEventHandlers();
188    }
189
190    // cloning
191    private Problem(Problem original, Cloner cloner)
192      : base(original, cloner) {
193      RegisterEventHandlers();
194    }
195    public override IDeepCloneable Clone(Cloner cloner) { return new Problem(this, cloner); }
196    #endregion
197
198    public Problem()
199      : base() {
[16268]200      var targetVariables = new CheckedItemList<StringValue>().AsReadOnly(); // HACK: it would be better to provide a new class derived from IDataAnalysisProblem
[15968]201      var functions = CreateFunctionSet();
[15970]202      Parameters.Add(new ValueParameter<IRegressionProblemData>(ProblemDataParameterName, "The data captured from the dynamical system. Use CSV import functionality to import data.", new RegressionProblemData()));
[16268]203      Parameters.Add(new ValueParameter<ReadOnlyCheckedItemList<StringValue>>(TargetVariablesParameterName, "Target variables (overrides setting in ProblemData)", targetVariables));
204      Parameters.Add(new ValueParameter<ReadOnlyCheckedItemList<StringValue>>(FunctionSetParameterName, "The list of allowed functions", functions));
[15970]205      Parameters.Add(new FixedValueParameter<IntValue>(MaximumLengthParameterName, "The maximally allowed length of each expression. Set to a small value (5 - 25). Default = 10", new IntValue(10)));
[16893]206      Parameters.Add(new FixedValueParameter<IntValue>(MaximumPretuningParameterOptimizationIterationsParameterName, "The maximum number of iterations for optimization of parameters of individual equations for numerical derivatives (using L-BFGS). More iterations makes the algorithm slower, fewer iterations might prevent convergence in the optimization scheme. Default = 100", new IntValue(100)));
207      Parameters.Add(new FixedValueParameter<IntValue>(MaximumOdeParameterOptimizationIterationsParameterName, "The maximum number of iterations for optimization of the full ODE parameters (using L-BFGS). More iterations makes the algorithm slower, fewer iterations might prevent convergence in the optimization scheme. Default = 100", new IntValue(100)));
[15970]208      Parameters.Add(new FixedValueParameter<IntValue>(NumberOfLatentVariablesParameterName, "Latent variables (unobserved variables) allow us to produce expressions which are integrated up and can be used in other expressions. They are handled similarly to target variables in forward simulation / integration. The difference to target variables is that there are no data to which the calculated values of latent variables are compared. Set to a small value (0 .. 5) as necessary (default = 0)", new IntValue(0)));
209      Parameters.Add(new FixedValueParameter<IntValue>(NumericIntegrationStepsParameterName, "Number of steps in the numeric integration that are taken from one row to the next (set to 1 to 100). More steps makes the algorithm slower, less steps worsens the accuracy of the numeric integration scheme.", new IntValue(10)));
[16153]210      Parameters.Add(new ValueParameter<ItemList<IntRange>>(TrainingEpisodesParameterName, "A list of ranges that should be used for training, each range represents an independent episode. This overrides the TrainingSet parameter in ProblemData.", new ItemList<IntRange>()));
[16155]211      Parameters.Add(new FixedValueParameter<BoolValue>(OptimizeParametersForEpisodesParameterName, "Flag to select if parameters should be optimized globally or for each episode individually.", new BoolValue(false)));
[16893]212      Parameters.Add(new FixedValueParameter<DoubleValue>("Pretuning NMSE weight", "For fitness weighting", new DoubleValue(0.5)));
213      Parameters.Add(new FixedValueParameter<DoubleValue>("ODE NMSE weight", "For fitness weighting", new DoubleValue(0.5)));
[16250]214
[16601]215      var solversStr = new string[] { "HeuristicLab" /* , "CVODES" */};
[16250]216      var solvers = new ItemSet<StringValue>(
217        solversStr.Select(s => new StringValue(s).AsReadOnly())
218        );
[16251]219      Parameters.Add(new ConstrainedValueParameter<StringValue>(OdeSolverParameterName, "The solver to use for solving the initial value ODE problems", solvers, solvers.First()));
[16250]220
[15964]221      RegisterEventHandlers();
[15968]222      InitAllParameters();
[16152]223
[16215]224      // TODO: use training range as default training episode
[16398]225      // TODO: optimization of starting values for latent variables in CVODES solver
[16601]226      // TODO: allow to specify the name for the time variable in the dataset and allow variable step-sizes
[15964]227    }
228
[15968]229    public override double Evaluate(Individual individual, IRandom random) {
230      var trees = individual.Values.Select(v => v.Value).OfType<ISymbolicExpressionTree>().ToArray(); // extract all trees from individual
231
[16601]232      var problemData = ProblemData;
[16399]233      var targetVars = TargetVariables.CheckedItems.OrderBy(i => i.Index).Select(i => i.Value.Value).ToArray();
234      var latentVariables = Enumerable.Range(1, NumberOfLatentVariables).Select(i => "λ" + i).ToArray(); // TODO: must coincide with the variables which are actually defined in the grammar and also for which we actually have trees
[16215]235      if (OptimizeParametersForEpisodes) {
[16601]236        throw new NotImplementedException();
[16215]237        int eIdx = 0;
[16155]238        double totalNMSE = 0.0;
239        int totalSize = 0;
[16215]240        foreach (var episode in TrainingEpisodes) {
[16602]241          // double[] optTheta;
[16893]242          double nmse = OptimizeForEpisodes(trees, problemData, targetVars, latentVariables, random, new[] { episode }, MaximumPretuningParameterOptimizationIterations, NumericIntegrationSteps, OdeSolver, MaximumOdeParameterOptimizationIterations);
[16602]243          // individual["OptTheta_" + eIdx] = new DoubleArray(optTheta); // write back optimized parameters so that we can use them in the Analysis method
[16155]244          eIdx++;
245          totalNMSE += nmse * episode.Size;
246          totalSize += episode.Size;
247        }
248        return totalNMSE / totalSize;
249      } else {
[16602]250        // double[] optTheta;
[16893]251        double nmse = OptimizeForEpisodes(trees, problemData, targetVars, latentVariables, random, TrainingEpisodes, MaximumPretuningParameterOptimizationIterations, NumericIntegrationSteps, OdeSolver, MaximumOdeParameterOptimizationIterations,
252          PretuningErrorWeight.Value.Value, OdeErrorWeight.Value.Value);
[16602]253        // individual["OptTheta"] = new DoubleArray(optTheta); // write back optimized parameters so that we can use them in the Analysis method
[16155]254        return nmse;
255      }
256    }
257
[16602]258    public static double OptimizeForEpisodes(
[16250]259      ISymbolicExpressionTree[] trees,
[16399]260      IRegressionProblemData problemData,
261      string[] targetVars,
262      string[] latentVariables,
[16250]263      IRandom random,
264      IEnumerable<IntRange> episodes,
[16893]265      int maxPretuningParameterOptIterations,
[16399]266      int numericIntegrationSteps,
[16893]267      string odeSolver,
268      int maxOdeParameterOptIterations,
269      double pretuningErrorWeight = 0.5,
270      double odeErrorWeight = 0.5
271      ) {
[15970]272
[16893]273
274
[16660]275      // extract constants from trees (without trees for latent variables)
276      var targetVariableTrees = trees.Take(targetVars.Length).ToArray();
277      var latentVariableTrees = trees.Skip(targetVars.Length).ToArray();
278      var constantNodes = targetVariableTrees.Select(t => t.IterateNodesPrefix().OfType<ConstantTreeNode>().ToArray()).ToArray();
[16602]279      var initialTheta = constantNodes.Select(nodes => nodes.Select(n => n.Value).ToArray()).ToArray();
[16600]280
[16601]281      // optimize parameters by fitting f(x,y) to calculated differences dy/dt(t)
[16893]282      double nmse = pretuningErrorWeight * PreTuneParameters(trees, problemData, targetVars, latentVariables, random, episodes, maxPretuningParameterOptIterations,
[16602]283        initialTheta, out double[] pretunedParameters);
[15964]284
[16660]285      // extend parameter vector to include parameters for latent variable trees
286      pretunedParameters = pretunedParameters
287        .Concat(latentVariableTrees
288        .SelectMany(t => t.IterateNodesPrefix().OfType<ConstantTreeNode>().Select(n => n.Value)))
289        .ToArray();
290
[16601]291      // optimize parameters using integration of f(x,y) to calculate y(t)
[16893]292      nmse += odeErrorWeight * OptimizeParameters(trees, problemData, targetVars, latentVariables, episodes, maxOdeParameterOptIterations, pretunedParameters, numericIntegrationSteps, odeSolver,
[16602]293        out double[] optTheta);
[16616]294      // var optTheta = pretunedParameters;
[16601]295
[16602]296      if (double.IsNaN(nmse) ||
297        double.IsInfinity(nmse) ||
298        nmse > 100 * trees.Length * episodes.Sum(ep => ep.Size))
299        return 100 * trees.Length * episodes.Sum(ep => ep.Size);
300
301      // update tree nodes with optimized values
302      var paramIdx = 0;
303      for (var treeIdx = 0; treeIdx < constantNodes.Length; treeIdx++) {
304        for (int i = 0; i < constantNodes[treeIdx].Length; i++)
305          constantNodes[treeIdx][i].Value = optTheta[paramIdx++];
306      }
307      return nmse;
[16601]308    }
309
310    private static double PreTuneParameters(
311      ISymbolicExpressionTree[] trees,
312      IRegressionProblemData problemData,
313      string[] targetVars,
314      string[] latentVariables,
315      IRandom random,
316      IEnumerable<IntRange> episodes,
317      int maxParameterOptIterations,
[16602]318      double[][] initialTheta,
[16601]319      out double[] optTheta) {
320      var thetas = new List<double>();
321      double nmse = 0.0;
[16602]322      var maxTreeNmse = 100 * episodes.Sum(ep => ep.Size);
323
[16660]324      var targetTrees = trees.Take(targetVars.Length).ToArray();
325      var latentTrees = trees.Take(latentVariables.Length).ToArray();
326
[16893]327      // first calculate values of latent variables by integration
328      if(latentVariables.Length > 0) {
[16660]329        var inputVariables = targetVars.Concat(latentTrees.SelectMany(t => t.IterateNodesPrefix().OfType<VariableTreeNode>().Select(n => n.VariableName))).Except(latentVariables).Distinct();
330        var myState = new OptimizationData(latentTrees, targetVars, inputVariables.ToArray(), problemData, null, episodes.ToArray(), 10, latentVariables, "HeuristicLab");
331
332        var fi = new double[myState.rows.Length * targetVars.Length];
333        var jac = new double[myState.rows.Length * targetVars.Length, myState.nodeValueLookup.ParameterCount];
334        var latentValues = new double[myState.rows.Length, latentVariables.Length];
335        Integrate(myState, fi, jac, latentValues);
336
337        // add integrated latent variables to dataset
338        var modifiedDataset = ((Dataset)problemData.Dataset).ToModifiable();
339        foreach (var variable in latentVariables) {
340          modifiedDataset.AddVariable(variable, Enumerable.Repeat(0.0, modifiedDataset.Rows).ToList()); // empty column
341        }
342        int predIdx = 0;
343        foreach (var ep in episodes) {
344          for (int r = ep.Start; r < ep.End; r++) {
345            for (int latVarIdx = 0; latVarIdx < latentVariables.Length; latVarIdx++) {
346              modifiedDataset.SetVariableValue(latentValues[predIdx, latVarIdx], latentVariables[latVarIdx], r);
347            }
348            predIdx++;
349          }
350        }
351
352        problemData = new RegressionProblemData(modifiedDataset, problemData.AllowedInputVariables, problemData.TargetVariable);
353      }
[16251]354      // NOTE: the order of values in parameter matches prefix order of constant nodes in trees
[16660]355      for (int treeIdx = 0; treeIdx < targetTrees.Length; treeIdx++) {
356        var t = targetTrees[treeIdx];
[16601]357
[16610]358        var targetValuesDiff = new List<double>();
359        foreach (var ep in episodes) {
360          var episodeRows = Enumerable.Range(ep.Start, ep.Size);
361          var targetValues = problemData.Dataset.GetDoubleValues(targetVars[treeIdx], episodeRows).ToArray();
362          targetValuesDiff.AddRange(targetValues.Skip(1).Zip(targetValues, (t1, t0) => t1 - t0));// TODO: smoothing or multi-pole);
363        }
364        var adjustedEpisodes = episodes.Select(ep => new IntRange(ep.Start, ep.End - 1)); // because we lose the last row in the differencing step
[16653]365
366        // data for input variables is assumed to be known
367        // input variables in pretuning are all target variables and all variable names that occur in the tree
[16660]368        var inputVariables = targetVars.Concat(t.IterateNodesPrefix().OfType<VariableTreeNode>().Select(n => n.VariableName)).Distinct();
[16653]369
[16610]370        var myState = new OptimizationData(new[] { t },
371          targetVars,
[16653]372          inputVariables.ToArray(),
[16610]373          problemData, new[] { targetValuesDiff.ToArray() }, adjustedEpisodes.ToArray(), -99, latentVariables, string.Empty); // TODO
[16601]374        var paramCount = myState.nodeValueLookup.ParameterCount;
375
[16893]376        optTheta = initialTheta[treeIdx];
377        if (initialTheta[treeIdx].Length > 0 && maxParameterOptIterations > -1) {
[16602]378          try {
379            alglib.minlmstate state;
380            alglib.minlmreport report;
381            var p = new double[initialTheta[treeIdx].Length];
[16616]382            var lowerBounds = Enumerable.Repeat(-1000.0, p.Length).ToArray();
383            var upperBounds = Enumerable.Repeat(1000.0, p.Length).ToArray();
[16602]384            Array.Copy(initialTheta[treeIdx], p, p.Length);
[16610]385            alglib.minlmcreatevj(targetValuesDiff.Count, p, out state);
[16602]386            alglib.minlmsetcond(state, 0.0, 0.0, 0.0, maxParameterOptIterations);
387            alglib.minlmsetbc(state, lowerBounds, upperBounds);
[16616]388#if DEBUG
389            //alglib.minlmsetgradientcheck(state, 1.0e-7);
390#endif
[16602]391            alglib.minlmoptimize(state, EvaluateObjectiveVector, EvaluateObjectiveVectorAndJacobian, null, myState);
[16601]392
[16602]393            alglib.minlmresults(state, out optTheta, out report);
[16616]394            if (report.terminationtype < 0) {
395#if DEBUG
396              if (report.terminationtype == -7) throw new InvalidProgramException("gradient calculation fail!");
397#endif
398              optTheta = initialTheta[treeIdx];
399            }
[16602]400          } catch (alglib.alglibexception) {
401            optTheta = initialTheta[treeIdx];
402          }
403        }
404        var tree_nmse = EvaluateMSE(optTheta, myState);
405        if (double.IsNaN(tree_nmse) || double.IsInfinity(tree_nmse) || tree_nmse > maxTreeNmse) {
406          nmse += maxTreeNmse;
407          thetas.AddRange(initialTheta[treeIdx]);
408        } else {
409          nmse += tree_nmse;
[16601]410          thetas.AddRange(optTheta);
[15968]411        }
[16601]412      } // foreach tree
413      optTheta = thetas.ToArray();
414
415      return nmse;
416    }
417
418
419    // similar to above but this time we integrate and optimize all parameters for all targets concurrently
420    private static double OptimizeParameters(ISymbolicExpressionTree[] trees, IRegressionProblemData problemData, string[] targetVars, string[] latentVariables,
421      IEnumerable<IntRange> episodes, int maxParameterOptIterations, double[] initialTheta, int numericIntegrationSteps, string odeSolver, out double[] optTheta) {
[16610]422      var rowsForDataExtraction = episodes.SelectMany(e => Enumerable.Range(e.Start, e.Size)).ToArray();
[16660]423      var targetValues = new double[targetVars.Length][];
424      for (int treeIdx = 0; treeIdx < targetVars.Length; treeIdx++) {
[16601]425        var t = trees[treeIdx];
426
427        targetValues[treeIdx] = problemData.Dataset.GetDoubleValues(targetVars[treeIdx], rowsForDataExtraction).ToArray();
[15964]428      }
429
[16653]430      // data for input variables is assumed to be known
431      // input variables are all variable names that occur in the trees except for target variables (we assume that trees have been generated correctly)
[16660]432      var inputVariables = trees.SelectMany(t => t.IterateNodesPrefix().OfType<VariableTreeNode>().Select(n => n.VariableName))
433        .Except(targetVars)
434        .Except(latentVariables)
435        .Distinct();
[16653]436
437      var myState = new OptimizationData(trees, targetVars, inputVariables.ToArray(), problemData, targetValues, episodes.ToArray(), numericIntegrationSteps, latentVariables, odeSolver);
[16601]438      optTheta = initialTheta;
[16250]439
[16893]440      if (initialTheta.Length > 0 && maxParameterOptIterations > -1) {
[16616]441        var lowerBounds = Enumerable.Repeat(-1000.0, initialTheta.Length).ToArray();
442        var upperBounds = Enumerable.Repeat(1000.0, initialTheta.Length).ToArray();
[16601]443        try {
444          alglib.minlmstate state;
445          alglib.minlmreport report;
446          alglib.minlmcreatevj(rowsForDataExtraction.Length * trees.Length, initialTheta, out state);
[16602]447          alglib.minlmsetbc(state, lowerBounds, upperBounds);
[16601]448          alglib.minlmsetcond(state, 0.0, 0.0, 0.0, maxParameterOptIterations);
[16616]449#if DEBUG         
450          //alglib.minlmsetgradientcheck(state, 1.0e-7);
451#endif
[16601]452          alglib.minlmoptimize(state, IntegrateAndEvaluateObjectiveVector, IntegrateAndEvaluateObjectiveVectorAndJacobian, null, myState);
[15964]453
[16601]454          alglib.minlmresults(state, out optTheta, out report);
[15964]455
[16601]456          if (report.terminationtype < 0) {
[16616]457#if DEBUG
458            if (report.terminationtype == -7) throw new InvalidProgramException("gradient calculation fail!");
459#endif            // there was a problem: reset theta and evaluate for inital values
[16601]460            optTheta = initialTheta;
461          }
462        } catch (alglib.alglibexception) {
463          optTheta = initialTheta;
464        }
[15964]465      }
[16601]466      var nmse = EvaluateIntegratedMSE(optTheta, myState);
467      var maxNmse = 100 * targetValues.Length * rowsForDataExtraction.Length;
468      if (double.IsNaN(nmse) || double.IsInfinity(nmse) || nmse > maxNmse) nmse = maxNmse;
469      return nmse;
[16599]470    }
[15964]471
[16599]472
[16601]473    // helper
474    public static double EvaluateMSE(double[] x, OptimizationData optimizationData) {
475      var fi = new double[optimizationData.rows.Count()];
476      EvaluateObjectiveVector(x, fi, optimizationData);
477      return fi.Sum(fii => fii * fii) / fi.Length;
478    }
[16600]479    public static void EvaluateObjectiveVector(double[] x, double[] fi, object optimizationData) { EvaluateObjectiveVector(x, fi, (OptimizationData)optimizationData); } // for alglib
480    public static void EvaluateObjectiveVector(double[] x, double[] fi, OptimizationData optimizationData) {
[16603]481      var rows = optimizationData.rows;
[16601]482      var problemData = optimizationData.problemData;
483      var nodeValueLookup = optimizationData.nodeValueLookup;
484      var ds = problemData.Dataset;
[16610]485      var variables = optimizationData.variables;
[16601]486
487      nodeValueLookup.UpdateParamValues(x);
488
[16610]489      int outputIdx = 0;
[16601]490      for (int trainIdx = 0; trainIdx < rows.Length; trainIdx++) {
491        // update variable values
[16610]492        foreach (var variable in variables) {
[16653]493          // in this problem we also allow fixed numeric parameters (represented as variables with the value as name)
494          if (double.TryParse(variable, NumberStyles.Float, CultureInfo.InvariantCulture, out double value)) {
495            nodeValueLookup.SetVariableValue(variable, value); // TODO: Perf we don't need to set this for each index
496          } else {
497            nodeValueLookup.SetVariableValue(variable, ds.GetDoubleValue(variable, rows[trainIdx])); // TODO: perf
498          }
[16601]499        }
500        // interpret all trees
501        for (int treeIdx = 0; treeIdx < optimizationData.trees.Length; treeIdx++) {
502          var tree = optimizationData.trees[treeIdx];
503          var pred = InterpretRec(tree.Root.GetSubtree(0).GetSubtree(0), nodeValueLookup);
504          var y = optimizationData.targetValues[treeIdx][trainIdx];
[16616]505          fi[outputIdx++] = (y - pred) * optimizationData.inverseStandardDeviation[treeIdx];
[16601]506        }
507      }
[15964]508    }
509
[16600]510    public static void EvaluateObjectiveVectorAndJacobian(double[] x, double[] fi, double[,] jac, object optimizationData) { EvaluateObjectiveVectorAndJacobian(x, fi, jac, (OptimizationData)optimizationData); } // for alglib
511    public static void EvaluateObjectiveVectorAndJacobian(double[] x, double[] fi, double[,] jac, OptimizationData optimizationData) {
[16601]512      // extract variable values from dataset
513      var variableValues = new Dictionary<string, Tuple<double, Vector>>();
514      var problemData = optimizationData.problemData;
515      var ds = problemData.Dataset;
[16603]516      var rows = optimizationData.rows;
[16610]517      var variables = optimizationData.variables;
[15964]518
[16601]519      var nodeValueLookup = optimizationData.nodeValueLookup;
520      nodeValueLookup.UpdateParamValues(x);
[15964]521
[16601]522      int termIdx = 0;
[15968]523
[16601]524      for (int trainIdx = 0; trainIdx < rows.Length; trainIdx++) {
525        // update variable values
[16610]526        foreach (var variable in variables) {
[16653]527          // in this problem we also allow fixed numeric parameters (represented as variables with the value as name)
528          if (double.TryParse(variable, NumberStyles.Float, CultureInfo.InvariantCulture, out double value)) {
529            nodeValueLookup.SetVariableValue(variable, value); // TODO: Perf we don't need to set this for each index
530          } else {
531            nodeValueLookup.SetVariableValue(variable, ds.GetDoubleValue(variable, rows[trainIdx])); // TODO: perf
532          }
[16601]533        }
[16599]534
[16601]535        var calculatedVariables = optimizationData.targetVariables;
536
537        var trees = optimizationData.trees;
538        for (int i = 0; i < trees.Length; i++) {
539          var tree = trees[i];
540          var targetVarName = calculatedVariables[i];
541
542          double f; Vector g;
543          InterpretRec(tree.Root.GetSubtree(0).GetSubtree(0), nodeValueLookup, out f, out g);
544
545          var y = optimizationData.targetValues[i][trainIdx];
[16603]546          fi[termIdx] = (y - f) * optimizationData.inverseStandardDeviation[i]; // scale of NMSE
[16604]547          if (jac != null && g != Vector.Zero) for (int j = 0; j < g.Length; j++) jac[termIdx, j] = -g[j] * optimizationData.inverseStandardDeviation[i];
[16601]548
549          termIdx++;
550        }
[16251]551      }
[16250]552
[16601]553    }
[15968]554
[16601]555    // helper
556    public static double EvaluateIntegratedMSE(double[] x, OptimizationData optimizationData) {
557      var fi = new double[optimizationData.rows.Count() * optimizationData.targetVariables.Length];
558      IntegrateAndEvaluateObjectiveVector(x, fi, optimizationData);
559      return fi.Sum(fii => fii * fii) / fi.Length;
560    }
561    public static void IntegrateAndEvaluateObjectiveVector(double[] x, double[] fi, object optimizationData) { IntegrateAndEvaluateObjectiveVector(x, fi, (OptimizationData)optimizationData); } // for alglib
562    public static void IntegrateAndEvaluateObjectiveVector(double[] x, double[] fi, OptimizationData optimizationData) {
563      IntegrateAndEvaluateObjectiveVectorAndJacobian(x, fi, null, optimizationData);
564    }
[16597]565
[16601]566    public static void IntegrateAndEvaluateObjectiveVectorAndJacobian(double[] x, double[] fi, double[,] jac, object optimizationData) { IntegrateAndEvaluateObjectiveVectorAndJacobian(x, fi, jac, (OptimizationData)optimizationData); } // for alglib
567    public static void IntegrateAndEvaluateObjectiveVectorAndJacobian(double[] x, double[] fi, double[,] jac, OptimizationData optimizationData) {
568      var rows = optimizationData.rows.ToArray();
569      var problemData = optimizationData.problemData;
570      var nodeValueLookup = optimizationData.nodeValueLookup;
571      var ds = problemData.Dataset;
572      int outputIdx = 0;
[16597]573
[16603]574      nodeValueLookup.UpdateParamValues(x);
575
[16660]576      Integrate(optimizationData, fi, jac, null);
[16601]577      var trees = optimizationData.trees;
[15970]578
[16604]579      // update result with error
[16601]580      for (int trainIdx = 0; trainIdx < rows.Length; trainIdx++) {
[16660]581        for (int i = 0; i < optimizationData.targetVariables.Length; i++) {
[16601]582          var tree = trees[i];
583          var y = optimizationData.targetValues[i][trainIdx];
[16604]584          fi[outputIdx] = (y - fi[outputIdx]) * optimizationData.inverseStandardDeviation[i];  // scale for normalized squared error
585          if (jac != null) for (int j = 0; j < x.Length; j++) jac[outputIdx, j] = -jac[outputIdx, j] * optimizationData.inverseStandardDeviation[i];
[16601]586          outputIdx++;
[15968]587        }
[15964]588      }
589    }
590
[15968]591    public override void Analyze(Individual[] individuals, double[] qualities, ResultCollection results, IRandom random) {
592      base.Analyze(individuals, qualities, results, random);
[15964]593
[16215]594      if (!results.ContainsKey("Prediction (training)")) {
[15968]595        results.Add(new Result("Prediction (training)", typeof(ReadOnlyItemList<DataTable>)));
596      }
[16215]597      if (!results.ContainsKey("Prediction (test)")) {
[15968]598        results.Add(new Result("Prediction (test)", typeof(ReadOnlyItemList<DataTable>)));
599      }
[16215]600      if (!results.ContainsKey("Models")) {
[16153]601        results.Add(new Result("Models", typeof(VariableCollection)));
[15968]602      }
[16399]603      if (!results.ContainsKey("SNMSE")) {
[16398]604        results.Add(new Result("SNMSE", typeof(DoubleValue)));
605      }
[16399]606      if (!results.ContainsKey("Solution")) {
607        results.Add(new Result("Solution", typeof(Solution)));
608      }
[16597]609      if (!results.ContainsKey("Squared error and gradient")) {
610        results.Add(new Result("Squared error and gradient", typeof(DataTable)));
611      }
[15968]612
613      var bestIndividualAndQuality = this.GetBestIndividual(individuals, qualities);
614      var trees = bestIndividualAndQuality.Item1.Values.Select(v => v.Value).OfType<ISymbolicExpressionTree>().ToArray(); // extract all trees from individual
[16155]615
[16398]616      results["SNMSE"].Value = new DoubleValue(bestIndividualAndQuality.Item2);
617
[16601]618      var problemData = ProblemData;
[16268]619      var targetVars = TargetVariables.CheckedItems.OrderBy(i => i.Index).Select(i => i.Value.Value).ToArray();
[15970]620      var latentVariables = Enumerable.Range(1, NumberOfLatentVariables).Select(i => "λ" + i).ToArray(); // TODO: must coincide with the variables which are actually defined in the grammar and also for which we actually have trees
[15968]621
622      var trainingList = new ItemList<DataTable>();
623
[16215]624      if (OptimizeParametersForEpisodes) {
[16602]625        throw new NotSupportedException();
[16155]626        var eIdx = 0;
627        var trainingPredictions = new List<Tuple<double, Vector>[][]>();
[16215]628        foreach (var episode in TrainingEpisodes) {
[16155]629          var episodes = new[] { episode };
[16610]630          var optimizationData = new OptimizationData(trees, targetVars, problemData.AllowedInputVariables.ToArray(), problemData, null, episodes, NumericIntegrationSteps, latentVariables, OdeSolver);
[16603]631          var trainingPrediction = Integrate(optimizationData).ToArray();
[16155]632          trainingPredictions.Add(trainingPrediction);
633          eIdx++;
634        }
[15968]635
[16329]636        // only for target values
[16155]637        var trainingRows = TrainingEpisodes.SelectMany(e => Enumerable.Range(e.Start, e.End - e.Start));
[16215]638        for (int colIdx = 0; colIdx < targetVars.Length; colIdx++) {
[16155]639          var targetVar = targetVars[colIdx];
640          var trainingDataTable = new DataTable(targetVar + " prediction (training)");
641          var actualValuesRow = new DataRow(targetVar, "The values of " + targetVar, problemData.Dataset.GetDoubleValues(targetVar, trainingRows));
642          var predictedValuesRow = new DataRow(targetVar + " pred.", "Predicted values for " + targetVar, trainingPredictions.SelectMany(arr => arr.Select(row => row[colIdx].Item1)).ToArray());
643          trainingDataTable.Rows.Add(actualValuesRow);
644          trainingDataTable.Rows.Add(predictedValuesRow);
645          trainingList.Add(trainingDataTable);
646        }
647        results["Prediction (training)"].Value = trainingList.AsReadOnly();
[15968]648
649
[16155]650        var models = new VariableCollection();
[16126]651
[16215]652        foreach (var tup in targetVars.Zip(trees, Tuple.Create)) {
[16155]653          var targetVarName = tup.Item1;
654          var tree = tup.Item2;
[16126]655
[16155]656          var origTreeVar = new HeuristicLab.Core.Variable(targetVarName + "(original)");
657          origTreeVar.Value = (ISymbolicExpressionTree)tree.Clone();
658          models.Add(origTreeVar);
659        }
660        results["Models"].Value = models;
661      } else {
[16653]662        // data for input variables is assumed to be known
663        // input variables are all variable names that occur in the trees except for target variables (we assume that trees have been generated correctly)
[16660]664        var inputVariables = trees
665          .SelectMany(t => t.IterateNodesPrefix().OfType<VariableTreeNode>().Select(n => n.VariableName))
666          .Except(targetVars)
667          .Except(latentVariables)
668          .Distinct();
[16653]669
670        var optimizationData = new OptimizationData(trees, targetVars, inputVariables.ToArray(), problemData, null, TrainingEpisodes.ToArray(), NumericIntegrationSteps, latentVariables, OdeSolver);
[16660]671        var numParams = optimizationData.nodeValueLookup.ParameterCount;
[16603]672
[16660]673        var fi = new double[optimizationData.rows.Length * targetVars.Length];
674        var jac = new double[optimizationData.rows.Length * targetVars.Length, numParams];
675        var latentValues = new double[optimizationData.rows.Length, latentVariables.Length];
676        Integrate(optimizationData, fi, jac, latentValues);
[16603]677
[16660]678
[16329]679        // for target values and latent variables
[16610]680        var trainingRows = optimizationData.rows;
[16329]681        for (int colIdx = 0; colIdx < trees.Length; colIdx++) {
682          // is target variable
683          if (colIdx < targetVars.Length) {
684            var targetVar = targetVars[colIdx];
685            var trainingDataTable = new DataTable(targetVar + " prediction (training)");
686            var actualValuesRow = new DataRow(targetVar, "The values of " + targetVar, problemData.Dataset.GetDoubleValues(targetVar, trainingRows));
[16660]687            var idx = Enumerable.Range(0, trainingRows.Length).Select(i => i * targetVars.Length + colIdx);
688            var pred = idx.Select(i => fi[i]);
689            var predictedValuesRow = new DataRow(targetVar + " pred.", "Predicted values for " + targetVar, pred.ToArray());
[16329]690            trainingDataTable.Rows.Add(actualValuesRow);
691            trainingDataTable.Rows.Add(predictedValuesRow);
[16597]692
[16603]693            for (int paramIdx = 0; paramIdx < numParams; paramIdx++) {
[16660]694              var paramSensitivityRow = new DataRow($"∂{targetVar}/∂θ{paramIdx}", $"Sensitivities of parameter {paramIdx}", idx.Select(i => jac[i, paramIdx]).ToArray());
[16597]695              paramSensitivityRow.VisualProperties.SecondYAxis = true;
696              trainingDataTable.Rows.Add(paramSensitivityRow);
697            }
[16329]698            trainingList.Add(trainingDataTable);
699          } else {
700            var latentVar = latentVariables[colIdx - targetVars.Length];
701            var trainingDataTable = new DataTable(latentVar + " prediction (training)");
[16660]702            var idx = Enumerable.Range(0, trainingRows.Length);
703            var pred = idx.Select(i => latentValues[i, colIdx - targetVars.Length]);
704            var predictedValuesRow = new DataRow(latentVar + " pred.", "Predicted values for " + latentVar, pred.ToArray());
[16329]705            var emptyRow = new DataRow(latentVar);
706            trainingDataTable.Rows.Add(emptyRow);
707            trainingDataTable.Rows.Add(predictedValuesRow);
708            trainingList.Add(trainingDataTable);
709          }
[16155]710        }
[16597]711
712        var errorTable = new DataTable("Squared error and gradient");
713        var seRow = new DataRow("Squared error");
[16603]714        var gradientRows = Enumerable.Range(0, numParams).Select(i => new DataRow($"∂SE/∂θ{i}")).ToArray();
[16597]715        errorTable.Rows.Add(seRow);
716        foreach (var gRow in gradientRows) {
717          gRow.VisualProperties.SecondYAxis = true;
718          errorTable.Rows.Add(gRow);
719        }
720        var targetValues = targetVars.Select(v => problemData.Dataset.GetDoubleValues(v, trainingRows).ToArray()).ToArray();
721        int r = 0;
[16610]722
[16660]723        // foreach (var y_pred in trainingPrediction) {
724        //   // calculate objective function gradient
725        //   double f_i = 0.0;
726        //   Vector g_i = Vector.CreateNew(new double[numParams]);
727        //   for (int colIdx = 0; colIdx < targetVars.Length; colIdx++) {
728        //     var y_pred_f = y_pred[colIdx].Item1;
729        //     var y = targetValues[colIdx][r];
730        //
731        //     var res = (y - y_pred_f) * optimizationData.inverseStandardDeviation[colIdx];
732        //     var ressq = res * res;
733        //     f_i += ressq;
734        //     g_i.Add(y_pred[colIdx].Item2.Scale(-2.0 * res));
735        //   }
736        //   seRow.Values.Add(f_i);
737        //   for (int j = 0; j < g_i.Length; j++) gradientRows[j].Values.Add(g_i[j]);
738        //   r++;
739        // }
740        // results["Squared error and gradient"].Value = errorTable;
[16597]741
[16155]742        // TODO: DRY for training and test
743        var testList = new ItemList<DataTable>();
744        var testRows = ProblemData.TestIndices.ToArray();
[16610]745        var testOptimizationData = new OptimizationData(trees, targetVars, problemData.AllowedInputVariables.ToArray(), problemData, null, new IntRange[] { ProblemData.TestPartition }, NumericIntegrationSteps, latentVariables, OdeSolver);
[16603]746        var testPrediction = Integrate(testOptimizationData).ToArray();
[16126]747
[16329]748        for (int colIdx = 0; colIdx < trees.Length; colIdx++) {
749          // is target variable
750          if (colIdx < targetVars.Length) {
751            var targetVar = targetVars[colIdx];
752            var testDataTable = new DataTable(targetVar + " prediction (test)");
753            var actualValuesRow = new DataRow(targetVar, "The values of " + targetVar, problemData.Dataset.GetDoubleValues(targetVar, testRows));
754            var predictedValuesRow = new DataRow(targetVar + " pred.", "Predicted values for " + targetVar, testPrediction.Select(arr => arr[colIdx].Item1).ToArray());
755            testDataTable.Rows.Add(actualValuesRow);
756            testDataTable.Rows.Add(predictedValuesRow);
757            testList.Add(testDataTable);
758
759          } else {
[16660]760            // var latentVar = latentVariables[colIdx - targetVars.Length];
761            // var testDataTable = new DataTable(latentVar + " prediction (test)");
762            // var predictedValuesRow = new DataRow(latentVar + " pred.", "Predicted values for " + latentVar, testPrediction.Select(arr => arr[colIdx].Item1).ToArray());
763            // var emptyRow = new DataRow(latentVar);
764            // testDataTable.Rows.Add(emptyRow);
765            // testDataTable.Rows.Add(predictedValuesRow);
766            // testList.Add(testDataTable);
[16329]767          }
[16155]768        }
[16126]769
[16155]770        results["Prediction (training)"].Value = trainingList.AsReadOnly();
771        results["Prediction (test)"].Value = testList.AsReadOnly();
[16399]772
773
[16155]774        #region simplification of models
775        // TODO the dependency of HeuristicLab.Problems.DataAnalysis.Symbolic is not ideal
776        var models = new VariableCollection();    // to store target var names and original version of tree
[16126]777
[16603]778        var clonedTrees = new List<ISymbolicExpressionTree>();
[16329]779        for (int idx = 0; idx < trees.Length; idx++) {
[16604]780          clonedTrees.Add((ISymbolicExpressionTree)trees[idx].Clone());
[16399]781        }
782        var ds = problemData.Dataset;
[16603]783        var newProblemData = new RegressionProblemData((IDataset)ds.Clone(), problemData.AllowedInputVariables, problemData.TargetVariable);
784        results["Solution"].Value = new Solution(clonedTrees.ToArray(),
[16399]785                   // optTheta,
786                   newProblemData,
787                   targetVars,
788                   latentVariables,
789                   TrainingEpisodes,
790                   OdeSolver,
791                   NumericIntegrationSteps);
792
793
794        for (int idx = 0; idx < trees.Length; idx++) {
[16329]795          var varName = string.Empty;
796          if (idx < targetVars.Length) {
797            varName = targetVars[idx];
798          } else {
799            varName = latentVariables[idx - targetVars.Length];
800          }
801          var tree = trees[idx];
[16153]802
[16329]803          var origTreeVar = new HeuristicLab.Core.Variable(varName + "(original)");
[16155]804          origTreeVar.Value = (ISymbolicExpressionTree)tree.Clone();
805          models.Add(origTreeVar);
[16329]806          var simplifiedTreeVar = new HeuristicLab.Core.Variable(varName + "(simplified)");
[16602]807          simplifiedTreeVar.Value = TreeSimplifier.Simplify(tree);
[16155]808          models.Add(simplifiedTreeVar);
809        }
[16399]810
[16155]811        results["Models"].Value = models;
812        #endregion
[16664]813
814        #region produce classical solutions to allow visualization with PDP
[16786]815        for (int treeIdx = 0; treeIdx < targetVars.Length; treeIdx++) {
[16664]816          var t = (ISymbolicExpressionTree)trees[treeIdx].Clone();
817          var name = targetVars.Concat(latentVariables).ElementAt(treeIdx); // whatever
818          var model = new SymbolicRegressionModel(name + "_diff", t, new SymbolicDataAnalysisExpressionTreeLinearInterpreter());
819          var solutionDataset = ((Dataset)problemData.Dataset).ToModifiable();
[16786]820          var absValues = solutionDataset.GetDoubleValues(name).ToArray();
821          solutionDataset.AddVariable(name + "_diff", absValues.Skip(1).Zip(absValues, (v1, v0) => v1 - v0).Concat(new double[] { 0.0 }).ToList());
[16664]822          var solutionProblemData = new RegressionProblemData(solutionDataset, problemData.AllowedInputVariables, name + "_diff");
823          var solution = model.CreateRegressionSolution(solutionProblemData);
824          results.AddOrUpdateResult("Solution " + name, solution);
825        }
826        #endregion
[16126]827      }
[15968]828    }
829
830    #region interpretation
[16222]831
832    // the following uses auto-diff to calculate the gradient w.r.t. the parameters forward in time.
833    // this is basically the method described in Gronwall T. Note on the derivatives with respect to a parameter of the solutions of a system of differential equations. Ann. Math. 1919;20:292–296.
834
835    // a comparison of three potential calculation methods for the gradient is given in:
836    // Sengupta, B., Friston, K. J., & Penny, W. D. (2014). Efficient gradient computation for dynamical models. Neuroimage, 98(100), 521–527. http://doi.org/10.1016/j.neuroimage.2014.04.040
837    // "Our comparison establishes that the adjoint method is computationally more efficient for numerical estimation of parametric gradients
838    // for state-space models — both linear and non-linear, as in the case of a dynamical causal model (DCM)"
839
840    // for a solver with the necessary features see: https://computation.llnl.gov/projects/sundials/cvodes
[16253]841
[16603]842    public static IEnumerable<Tuple<double, Vector>[]> Integrate(OptimizationData optimizationData) {
[16604]843      var nTargets = optimizationData.targetVariables.Length;
844      var n = optimizationData.rows.Length * optimizationData.targetVariables.Length;
845      var d = optimizationData.nodeValueLookup.ParameterCount;
846      double[] fi = new double[n];
847      double[,] jac = new double[n, d];
[16660]848      Integrate(optimizationData, fi, jac, null);
[16610]849      for (int i = 0; i < optimizationData.rows.Length; i++) {
[16604]850        var res = new Tuple<double, Vector>[nTargets];
[16610]851        for (int j = 0; j < nTargets; j++) {
[16604]852          res[j] = Tuple.Create(fi[i * nTargets + j], Vector.CreateFromMatrixRow(jac, i * nTargets + j));
853        }
854        yield return res;
855      }
856    }
[15964]857
[16660]858    public static void Integrate(OptimizationData optimizationData, double[] fi, double[,] jac, double[,] latentValues) {
[16600]859      var trees = optimizationData.trees;
860      var dataset = optimizationData.problemData.Dataset;
[16653]861      var inputVariables = optimizationData.variables;
[16600]862      var targetVariables = optimizationData.targetVariables;
863      var latentVariables = optimizationData.latentVariables;
864      var episodes = optimizationData.episodes;
865      var odeSolver = optimizationData.odeSolver;
866      var numericIntegrationSteps = optimizationData.numericIntegrationSteps;
[16601]867      var calculatedVariables = targetVariables.Concat(latentVariables).ToArray(); // TODO: must conincide with the order of trees in the encoding
[16600]868
[16610]869
870
[16601]871      var nodeValues = optimizationData.nodeValueLookup;
872
[16250]873      // TODO: numericIntegrationSteps is only relevant for the HeuristicLab solver
[16610]874      var outputRowIdx = 0;
[16329]875      var episodeIdx = 0;
[16600]876      foreach (var episode in optimizationData.episodes) {
[16601]877        var rows = Enumerable.Range(episode.Start, episode.End - episode.Start).ToArray();
[15968]878
[16601]879        var t0 = rows.First();
[15964]880
[16601]881        // initialize values for inputs and targets from dataset
[16604]882        foreach (var varName in inputVariables) {
[16653]883          // in this problem we also allow fixed numeric parameters (represented as variables with the value as name)
884          if (double.TryParse(varName, NumberStyles.Float, CultureInfo.InvariantCulture, out double value)) {
885            nodeValues.SetVariableValue(varName, value, Vector.Zero);
886          } else {
887            var y0 = dataset.GetDoubleValue(varName, t0);
888            nodeValues.SetVariableValue(varName, y0, Vector.Zero);
889          }
[16153]890        }
[16610]891        foreach (var varName in targetVariables) {
[16604]892          var y0 = dataset.GetDoubleValue(varName, t0);
893          nodeValues.SetVariableValue(varName, y0, Vector.Zero);
[16601]894
[16604]895          // output starting value
896          fi[outputRowIdx] = y0;
897          Vector.Zero.CopyTo(jac, outputRowIdx);
898
899          outputRowIdx++;
900        }
901
[16660]902        var latentValueRowIdx = 0;
903        var latentValueColIdx = 0;
904        foreach (var varName in latentVariables) {
905          var y0 = 0.0; // assume we start at zero
906          nodeValues.SetVariableValue(varName, y0, Vector.Zero);
907
908          if (latentValues != null) {
909            latentValues[latentValueRowIdx, latentValueColIdx++] = y0;
910          }
911        }
912        latentValueColIdx = 0; latentValueRowIdx++;
913
[16603]914        { // CODE BELOW DOESN'T WORK ANYMORE
915          // if (latentVariables.Length > 0) throw new NotImplementedException();
916          //
917          // // add value entries for latent variables which are also integrated
918          // // initial values are at the end of the parameter vector
919          // // separate initial values for each episode
920          // var initialValueIdx = parameterValues.Length - episodes.Count() * latentVariables.Length + episodeIdx * latentVariables.Length;
921          // foreach (var latentVar in latentVariables) {
922          //   var arr = new double[parameterValues.Length]; // backing array
923          //   arr[initialValueIdx] = 1.0;
924          //   var g = new Vector(arr);
925          //   nodeValues.SetVariableValue(latentVar, parameterValues[initialValueIdx], g); // we don't have observations for latent variables therefore we optimize the initial value for each episode
926          //   initialValueIdx++;
927          // }
[16153]928        }
[16329]929
[16601]930        var prevT = t0; // TODO: here we should use a variable for t if it is available. Right now we assume equidistant measurements.
[16215]931        foreach (var t in rows.Skip(1)) {
[16250]932          if (odeSolver == "HeuristicLab")
[16601]933            IntegrateHL(trees, calculatedVariables, nodeValues, numericIntegrationSteps); // integrator updates nodeValues
[16250]934          else if (odeSolver == "CVODES")
[16597]935            throw new NotImplementedException();
936          // IntegrateCVODES(trees, calculatedVariables, variableValues, parameterValues, t - prevT);
[16250]937          else throw new InvalidOperationException("Unknown ODE solver " + odeSolver);
[16253]938          prevT = t;
[15964]939
[16660]940          // update output for target variables (TODO: if we want to visualize the latent variables then we need to provide a separate output)
941          for (int i = 0; i < targetVariables.Length; i++) {
942            var targetVar = targetVariables[i];
[16604]943            var yt = nodeValues.GetVariableValue(targetVar);
[16601]944
[16604]945            // fill up remaining rows with last valid value if there are invalid values
946            if (double.IsNaN(yt.Item1) || double.IsInfinity(yt.Item1)) {
947              for (; outputRowIdx < fi.Length; outputRowIdx++) {
[16660]948                var prevIdx = outputRowIdx - targetVariables.Length;
[16604]949                fi[outputRowIdx] = fi[prevIdx]; // current <- prev
950                if (jac != null) for (int j = 0; j < jac.GetLength(1); j++) jac[outputRowIdx, j] = jac[prevIdx, j];
951              }
952              return;
953            };
[16601]954
[16604]955            fi[outputRowIdx] = yt.Item1;
956            var g = yt.Item2;
957            g.CopyTo(jac, outputRowIdx);
958            outputRowIdx++;
959          }
[16660]960          if (latentValues != null) {
961            foreach (var latentVariable in latentVariables) {
962              var lt = nodeValues.GetVariableValue(latentVariable).Item1;
963              latentValues[latentValueRowIdx, latentValueColIdx++] = lt;
964            }
965            latentValueRowIdx++; latentValueColIdx = 0;
966          }
[15964]967
[16601]968          // update for next time step (only the inputs)
[16215]969          foreach (var varName in inputVariables) {
[16653]970            // in this problem we also allow fixed numeric parameters (represented as variables with the value as name)
971            if (double.TryParse(varName, NumberStyles.Float, CultureInfo.InvariantCulture, out double value)) {
972              // value is unchanged
973            } else {
974              nodeValues.SetVariableValue(varName, dataset.GetDoubleValue(varName, t), Vector.Zero);
975            }
[16153]976          }
[15964]977        }
[16329]978        episodeIdx++;
[15964]979      }
980    }
981
[16398]982    #region CVODES
[16253]983
[16597]984    /*
[16253]985    /// <summary>
986    ///  Here we use CVODES to solve the ODE. Forward sensitivities are used to calculate the gradient for parameter optimization
987    /// </summary>
988    /// <param name="trees">Each equation in the ODE represented as a tree</param>
989    /// <param name="calculatedVariables">The names of the calculated variables</param>
990    /// <param name="variableValues">The start values of the calculated variables as well as their sensitivites over parameters</param>
991    /// <param name="parameterValues">The current parameter values</param>
992    /// <param name="t">The time t up to which we need to integrate.</param>
[16250]993    private static void IntegrateCVODES(
[16251]994      ISymbolicExpressionTree[] trees, // f(y,p) in tree representation
995      string[] calculatedVariables, // names of elements of y
996      Dictionary<string, Tuple<double, Vector>> variableValues,  //  y (intput and output) input: y(t0), output: y(t0+t)
997      double[] parameterValues, // p
998      double t // duration t for which we want to integrate
[16250]999      ) {
[16251]1000
[16250]1001      // the RHS of the ODE
[16251]1002      // dy/dt = f(y_t,x_t,p)
1003      CVODES.CVRhsFunc f = CreateOdeRhs(trees, calculatedVariables, parameterValues);
1004      // the Jacobian ∂f/∂y
1005      CVODES.CVDlsJacFunc jac = CreateJac(trees, calculatedVariables, parameterValues);
1006
1007      // the RHS for the forward sensitivities (∂f/∂y)s_i(t) + ∂f/∂p_i
1008      CVODES.CVSensRhsFn sensF = CreateSensitivityRhs(trees, calculatedVariables, parameterValues);
1009
1010      // setup solver
1011      int numberOfEquations = trees.Length;
1012      IntPtr y = IntPtr.Zero;
1013      IntPtr cvode_mem = IntPtr.Zero;
1014      IntPtr A = IntPtr.Zero;
1015      IntPtr yS0 = IntPtr.Zero;
1016      IntPtr linearSolver = IntPtr.Zero;
1017      var ns = parameterValues.Length; // number of parameters
1018
1019      try {
1020        y = CVODES.N_VNew_Serial(numberOfEquations);
1021        // init y to current values of variables
1022        // y must be initialized before calling CVodeInit
1023        for (int i = 0; i < calculatedVariables.Length; i++) {
1024          CVODES.NV_Set_Ith_S(y, i, variableValues[calculatedVariables[i]].Item1);
1025        }
1026
1027        cvode_mem = CVODES.CVodeCreate(CVODES.MultistepMethod.CV_ADAMS, CVODES.NonlinearSolverIteration.CV_FUNCTIONAL);
1028
1029        var flag = CVODES.CVodeInit(cvode_mem, f, 0.0, y);
[16616]1030        Assert(CVODES.CV_SUCCESS == flag);
[16251]1031
1032        double relTol = 1.0e-2;
1033        double absTol = 1.0;
1034        flag = CVODES.CVodeSStolerances(cvode_mem, relTol, absTol);  // TODO: probably need to adjust absTol per variable
[16616]1035        Assert(CVODES.CV_SUCCESS == flag);
[16251]1036
1037        A = CVODES.SUNDenseMatrix(numberOfEquations, numberOfEquations);
[16616]1038        Assert(A != IntPtr.Zero);
[16251]1039
1040        linearSolver = CVODES.SUNDenseLinearSolver(y, A);
[16616]1041        Assert(linearSolver != IntPtr.Zero);
[16251]1042
1043        flag = CVODES.CVDlsSetLinearSolver(cvode_mem, linearSolver, A);
[16616]1044        Assert(CVODES.CV_SUCCESS == flag);
[16251]1045
1046        flag = CVODES.CVDlsSetJacFn(cvode_mem, jac);
[16616]1047        Assert(CVODES.CV_SUCCESS == flag);
[16251]1048
1049        yS0 = CVODES.N_VCloneVectorArray_Serial(ns, y); // clone the output vector for each parameter
1050        unsafe {
1051          // set to initial sensitivities supplied by caller
1052          for (int pIdx = 0; pIdx < ns; pIdx++) {
1053            var yS0_i = *((IntPtr*)yS0.ToPointer() + pIdx);
1054            for (var varIdx = 0; varIdx < calculatedVariables.Length; varIdx++) {
1055              CVODES.NV_Set_Ith_S(yS0_i, varIdx, variableValues[calculatedVariables[varIdx]].Item2[pIdx]);
1056            }
1057          }
1058        }
1059
1060        flag = CVODES.CVodeSensInit(cvode_mem, ns, CVODES.CV_SIMULTANEOUS, sensF, yS0);
[16616]1061        Assert(CVODES.CV_SUCCESS == flag);
[16251]1062
1063        flag = CVODES.CVodeSensEEtolerances(cvode_mem);
[16616]1064        Assert(CVODES.CV_SUCCESS == flag);
[16251]1065
1066        // make one forward integration step
1067        double tout = 0.0; // first output time
1068        flag = CVODES.CVode(cvode_mem, t, y, ref tout, CVODES.CV_NORMAL);
1069        if (flag == CVODES.CV_SUCCESS) {
[16616]1070          Assert(t == tout);
[16251]1071
1072          // get sensitivities
1073          flag = CVODES.CVodeGetSens(cvode_mem, ref tout, yS0);
[16616]1074          Assert(CVODES.CV_SUCCESS == flag);
[16251]1075
1076          // update variableValues based on integration results
1077          for (int varIdx = 0; varIdx < calculatedVariables.Length; varIdx++) {
1078            var yi = CVODES.NV_Get_Ith_S(y, varIdx);
1079            var gArr = new double[parameterValues.Length];
1080            for (var pIdx = 0; pIdx < parameterValues.Length; pIdx++) {
1081              unsafe {
1082                var yS0_pi = *((IntPtr*)yS0.ToPointer() + pIdx);
1083                gArr[pIdx] = CVODES.NV_Get_Ith_S(yS0_pi, varIdx);
1084              }
1085            }
1086            variableValues[calculatedVariables[varIdx]] = Tuple.Create(yi, new Vector(gArr));
1087          }
1088        } else {
1089          variableValues.Clear();   // indicate problems by not returning new values
1090        }
1091
1092        // cleanup all allocated objects
1093      } finally {
1094        if (y != IntPtr.Zero) CVODES.N_VDestroy_Serial(y);
[16253]1095        if (cvode_mem != IntPtr.Zero) CVODES.CVodeFree(ref cvode_mem);
[16251]1096        if (linearSolver != IntPtr.Zero) CVODES.SUNLinSolFree(linearSolver);
1097        if (A != IntPtr.Zero) CVODES.SUNMatDestroy(A);
1098        if (yS0 != IntPtr.Zero) CVODES.N_VDestroyVectorArray_Serial(yS0, ns);
1099      }
[16250]1100    }
1101
[16251]1102
[16250]1103    private static CVODES.CVRhsFunc CreateOdeRhs(
1104      ISymbolicExpressionTree[] trees,
1105      string[] calculatedVariables,
1106      double[] parameterValues) {
[16398]1107      // we don't need to calculate a gradient here
[16250]1108      return (double t,
1109              IntPtr y, // N_Vector, current value of y (input)
1110              IntPtr ydot, // N_Vector (calculated value of y' (output)
1111              IntPtr user_data // optional user data, (unused here)
1112              ) => {
[16251]1113                // TODO: perf
1114                var nodeValues = new Dictionary<ISymbolicExpressionTreeNode, Tuple<double, Vector>>();
1115
1116                int pIdx = 0;
1117                foreach (var tree in trees) {
1118                  foreach (var n in tree.IterateNodesPrefix()) {
1119                    if (IsConstantNode(n)) {
1120                      nodeValues.Add(n, Tuple.Create(parameterValues[pIdx], Vector.Zero)); // here we do not need a gradient
1121                      pIdx++;
1122                    } else if (n.SubtreeCount == 0) {
1123                      // for variables and latent variables get the value from variableValues
1124                      var varName = n.Symbol.Name;
1125                      var varIdx = Array.IndexOf(calculatedVariables, varName); // TODO: perf!
[16268]1126                      if (varIdx < 0) throw new InvalidProgramException();
[16251]1127                      var y_i = CVODES.NV_Get_Ith_S(y, (long)varIdx);
1128                      nodeValues.Add(n, Tuple.Create(y_i, Vector.Zero)); // no gradient needed
1129                    }
1130                  }
[16250]1131                }
1132                for (int i = 0; i < trees.Length; i++) {
1133                  var tree = trees[i];
[16251]1134                  var res_i = InterpretRec(tree.Root.GetSubtree(0).GetSubtree(0), nodeValues);
[16250]1135                  CVODES.NV_Set_Ith_S(ydot, i, res_i.Item1);
1136                }
1137                return 0;
1138              };
1139    }
1140
[16251]1141    private static CVODES.CVDlsJacFunc CreateJac(
1142      ISymbolicExpressionTree[] trees,
[16250]1143      string[] calculatedVariables,
[16251]1144      double[] parameterValues) {
1145
1146      return (
1147        double t, // current time (input)
1148        IntPtr y, // N_Vector, current value of y (input)
1149        IntPtr fy, // N_Vector, current value of f (input)
1150        IntPtr Jac, // SUNMatrix ∂f/∂y (output, rows i contains are ∂f_i/∂y vector)
1151        IntPtr user_data, // optional (unused here)
1152        IntPtr tmp1, // N_Vector, optional (unused here)
1153        IntPtr tmp2, // N_Vector, optional (unused here)
1154        IntPtr tmp3 // N_Vector, optional (unused here)
1155      ) => {
1156        // here we need to calculate partial derivatives for the calculated variables y
1157        var nodeValues = new Dictionary<ISymbolicExpressionTreeNode, Tuple<double, Vector>>();
1158        int pIdx = 0;
1159        foreach (var tree in trees) {
1160          foreach (var n in tree.IterateNodesPrefix()) {
1161            if (IsConstantNode(n)) {
1162              nodeValues.Add(n, Tuple.Create(parameterValues[pIdx], Vector.Zero)); // here we need a gradient over y which is zero for parameters
1163              pIdx++;
1164            } else if (n.SubtreeCount == 0) {
1165              // for variables and latent variables we use values supplied in y and init gradient vectors accordingly
1166              var varName = n.Symbol.Name;
1167              var varIdx = Array.IndexOf(calculatedVariables, varName); // TODO: perf!
[16268]1168              if (varIdx < 0) throw new InvalidProgramException();
1169
[16251]1170              var y_i = CVODES.NV_Get_Ith_S(y, (long)varIdx);
1171              var gArr = new double[CVODES.NV_LENGTH_S(y)]; // backing array
1172              gArr[varIdx] = 1.0;
1173              var g = new Vector(gArr);
1174              nodeValues.Add(n, Tuple.Create(y_i, g));
1175            }
1176          }
1177        }
1178
1179        for (int i = 0; i < trees.Length; i++) {
1180          var tree = trees[i];
1181          var res = InterpretRec(tree.Root.GetSubtree(0).GetSubtree(0), nodeValues);
1182          var g = res.Item2;
1183          for (int j = 0; j < calculatedVariables.Length; j++) {
1184            CVODES.SUNDenseMatrix_Set(Jac, i, j, g[j]);
1185          }
1186        }
1187        return 0; // on success
1188      };
1189    }
1190
1191
1192    // to calculate sensitivities RHS for all equations at once
1193    // must compute (∂f/∂y)s_i(t) + ∂f/∂p_i and store in ySdot.
1194    // Index i refers to parameters, dimensionality of matrix and vectors is number of equations
1195    private static CVODES.CVSensRhsFn CreateSensitivityRhs(ISymbolicExpressionTree[] trees, string[] calculatedVariables, double[] parameterValues) {
1196      return (
1197              int Ns, // number of parameters
1198              double t, // current time
1199              IntPtr y, // N_Vector y(t) (input)
1200              IntPtr ydot, // N_Vector dy/dt(t) (input)
1201              IntPtr yS, // N_Vector*, one vector for each parameter (input)
1202              IntPtr ySdot, // N_Vector*, one vector for each parameter (output)
1203              IntPtr user_data, // optional (unused here)
1204              IntPtr tmp1, // N_Vector, optional (unused here)
1205              IntPtr tmp2 // N_Vector, optional (unused here)
1206        ) => {
1207          // here we need to calculate partial derivatives for the calculated variables y as well as for the parameters
1208          var nodeValues = new Dictionary<ISymbolicExpressionTreeNode, Tuple<double, Vector>>();
1209          var d = calculatedVariables.Length + parameterValues.Length; // dimensionality of gradient
1210          // first collect variable values
1211          foreach (var tree in trees) {
1212            foreach (var n in tree.IterateNodesPrefix()) {
1213              if (IsVariableNode(n)) {
1214                // for variables and latent variables we use values supplied in y and init gradient vectors accordingly
1215                var varName = n.Symbol.Name;
1216                var varIdx = Array.IndexOf(calculatedVariables, varName); // TODO: perf!
[16268]1217                if (varIdx < 0) throw new InvalidProgramException();
1218
[16251]1219                var y_i = CVODES.NV_Get_Ith_S(y, (long)varIdx);
1220                var gArr = new double[d]; // backing array
1221                gArr[varIdx] = 1.0;
1222                var g = new Vector(gArr);
1223                nodeValues.Add(n, Tuple.Create(y_i, g));
1224              }
1225            }
1226          }
1227          // then collect constants
1228          int pIdx = 0;
1229          foreach (var tree in trees) {
1230            foreach (var n in tree.IterateNodesPrefix()) {
1231              if (IsConstantNode(n)) {
1232                var gArr = new double[d];
1233                gArr[calculatedVariables.Length + pIdx] = 1.0;
1234                var g = new Vector(gArr);
1235                nodeValues.Add(n, Tuple.Create(parameterValues[pIdx], g));
1236                pIdx++;
1237              }
1238            }
1239          }
1240          // gradient vector is [∂f/∂y_1, ∂f/∂y_2, ... ∂f/∂yN, ∂f/∂p_1 ... ∂f/∂p_K]
1241
1242
1243          for (pIdx = 0; pIdx < Ns; pIdx++) {
1244            unsafe {
1245              var sDot_pi = *((IntPtr*)ySdot.ToPointer() + pIdx);
1246              CVODES.N_VConst_Serial(0.0, sDot_pi);
1247            }
1248          }
1249
1250          for (int i = 0; i < trees.Length; i++) {
1251            var tree = trees[i];
1252            var res = InterpretRec(tree.Root.GetSubtree(0).GetSubtree(0), nodeValues);
1253            var g = res.Item2;
1254
1255
1256            // update ySdot = (∂f/∂y)s_i(t) + ∂f/∂p_i
1257
1258            for (pIdx = 0; pIdx < Ns; pIdx++) {
1259              unsafe {
1260                var sDot_pi = *((IntPtr*)ySdot.ToPointer() + pIdx);
1261                var s_pi = *((IntPtr*)yS.ToPointer() + pIdx);
1262
1263                var v = CVODES.NV_Get_Ith_S(sDot_pi, i);
1264                // (∂f/∂y)s_i(t)
1265                var p = 0.0;
1266                for (int yIdx = 0; yIdx < calculatedVariables.Length; yIdx++) {
1267                  p += g[yIdx] * CVODES.NV_Get_Ith_S(s_pi, yIdx);
1268                }
1269                // + ∂f/∂p_i
1270                CVODES.NV_Set_Ith_S(sDot_pi, i, v + p + g[calculatedVariables.Length + pIdx]);
1271              }
1272            }
1273
1274          }
1275          return 0; // on success
1276        };
1277    }
[16597]1278    */
[16398]1279    #endregion
[16251]1280
1281    private static void IntegrateHL(
1282      ISymbolicExpressionTree[] trees,
1283      string[] calculatedVariables, // names of integrated variables
[16601]1284      NodeValueLookup nodeValues,
[16250]1285      int numericIntegrationSteps) {
[16251]1286
1287
[16597]1288      double[] deltaF = new double[calculatedVariables.Length];
1289      Vector[] deltaG = new Vector[calculatedVariables.Length];
[16251]1290
[16250]1291      double h = 1.0 / numericIntegrationSteps;
1292      for (int step = 0; step < numericIntegrationSteps; step++) {
[16601]1293
1294        // evaluate all trees
[16251]1295        for (int i = 0; i < trees.Length; i++) {
1296          var tree = trees[i];
1297
1298          // Root.GetSubtree(0).GetSubtree(0) skips programRoot and startSymbol
[16597]1299          double f; Vector g;
1300          InterpretRec(tree.Root.GetSubtree(0).GetSubtree(0), nodeValues, out f, out g);
1301          deltaF[i] = f;
1302          deltaG[i] = g;
[16250]1303        }
1304
[16251]1305        // update variableValues for next step, trapezoid integration
[16597]1306        for (int i = 0; i < trees.Length; i++) {
1307          var varName = calculatedVariables[i];
[16601]1308          var oldVal = nodeValues.GetVariableValue(varName);
[16604]1309          nodeValues.SetVariableValue(varName, oldVal.Item1 + h * deltaF[i], oldVal.Item2.Add(deltaG[i].Scale(h)));
[16250]1310        }
[16601]1311      }
1312    }
[16398]1313
[16602]1314    // TODO: use an existing interpreter implementation instead
[16601]1315    private static double InterpretRec(ISymbolicExpressionTreeNode node, NodeValueLookup nodeValues) {
[16603]1316      if (node is ConstantTreeNode) {
1317        return ((ConstantTreeNode)node).Value;
[16604]1318      } else if (node is VariableTreeNode) {
[16602]1319        return nodeValues.NodeValue(node);
1320      } else if (node.Symbol is Addition) {
1321        var f = InterpretRec(node.GetSubtree(0), nodeValues);
[16652]1322        for (int i = 1; i < node.SubtreeCount; i++) {
1323          f += InterpretRec(node.GetSubtree(i), nodeValues);
1324        }
1325        return f;
[16602]1326      } else if (node.Symbol is Multiplication) {
1327        var f = InterpretRec(node.GetSubtree(0), nodeValues);
[16652]1328        for (int i = 1; i < node.SubtreeCount; i++) {
1329          f *= InterpretRec(node.GetSubtree(i), nodeValues);
1330        }
1331        return f;
[16602]1332      } else if (node.Symbol is Subtraction) {
1333        if (node.SubtreeCount == 1) {
[16652]1334          return -InterpretRec(node.GetSubtree(0), nodeValues);
[16602]1335        } else {
1336          var f = InterpretRec(node.GetSubtree(0), nodeValues);
[16652]1337          for (int i = 1; i < node.SubtreeCount; i++) {
1338            f -= InterpretRec(node.GetSubtree(i), nodeValues);
1339          }
1340          return f;
[16602]1341        }
1342      } else if (node.Symbol is Division) {
[16610]1343        if (node.SubtreeCount == 1) {
1344          var f = InterpretRec(node.GetSubtree(0), nodeValues);
1345          // protected division
1346          if (f.IsAlmost(0.0)) {
1347            return 0;
1348          } else {
1349            return 1.0 / f;
1350          }
[16602]1351        } else {
[16610]1352          var f = InterpretRec(node.GetSubtree(0), nodeValues);
[16652]1353          for (int i = 1; i < node.SubtreeCount; i++) {
1354            var g = InterpretRec(node.GetSubtree(i), nodeValues);
1355            // protected division
1356            if (g.IsAlmost(0.0)) {
1357              return 0;
1358            } else {
1359              f /= g;
1360            }
[16610]1361          }
[16652]1362          return f;
[16602]1363        }
1364      } else if (node.Symbol is Sine) {
[16616]1365        Assert(node.SubtreeCount == 1);
[16610]1366
[16602]1367        var f = InterpretRec(node.GetSubtree(0), nodeValues);
1368        return Math.Sin(f);
1369      } else if (node.Symbol is Cosine) {
[16616]1370        Assert(node.SubtreeCount == 1);
[16610]1371
[16602]1372        var f = InterpretRec(node.GetSubtree(0), nodeValues);
1373        return Math.Cos(f);
1374      } else if (node.Symbol is Square) {
[16616]1375        Assert(node.SubtreeCount == 1);
[16610]1376
[16602]1377        var f = InterpretRec(node.GetSubtree(0), nodeValues);
1378        return f * f;
[16610]1379      } else if (node.Symbol is Exponential) {
[16616]1380        Assert(node.SubtreeCount == 1);
[16610]1381
1382        var f = InterpretRec(node.GetSubtree(0), nodeValues);
1383        return Math.Exp(f);
1384      } else if (node.Symbol is Logarithm) {
[16616]1385        Assert(node.SubtreeCount == 1);
[16610]1386
1387        var f = InterpretRec(node.GetSubtree(0), nodeValues);
1388        return Math.Log(f);
[16664]1389      } else if (node.Symbol is HyperbolicTangent) {
1390        Assert(node.SubtreeCount == 1);
1391
1392        var f = InterpretRec(node.GetSubtree(0), nodeValues);
1393        return Math.Tanh(f);
1394      } else if (node.Symbol is AnalyticQuotient) {
1395        Assert(node.SubtreeCount == 2);
1396
1397        var f = InterpretRec(node.GetSubtree(0), nodeValues);
1398        var g = InterpretRec(node.GetSubtree(1), nodeValues);
1399        return f / Math.Sqrt(1 + g * g);
[16602]1400      } else throw new NotSupportedException("unsupported symbol");
[16250]1401    }
1402
[16616]1403    private static void Assert(bool cond) {
1404#if DEBUG
1405      if (!cond) throw new InvalidOperationException("Assertion failed");
1406#endif
1407    }
1408
[16597]1409    private static void InterpretRec(
[15964]1410      ISymbolicExpressionTreeNode node,
[16601]1411       NodeValueLookup nodeValues,      // contains value and gradient vector for a node (variables and constants only)
[16600]1412      out double z,
1413      out Vector dz
[16597]1414      ) {
[16600]1415      double f, g;
1416      Vector df, dg;
[16602]1417      if (node.Symbol is Constant || node.Symbol is Variable) {
1418        z = nodeValues.NodeValue(node);
[16604]1419        dz = Vector.CreateNew(nodeValues.NodeGradient(node)); // original gradient vectors are never changed by evaluation
[16602]1420      } else if (node.Symbol is Addition) {
1421        InterpretRec(node.GetSubtree(0), nodeValues, out f, out df);
[16652]1422        for (int i = 1; i < node.SubtreeCount; i++) {
1423          InterpretRec(node.GetSubtree(i), nodeValues, out g, out dg);
1424          f = f + g;
1425          df = df.Add(dg);
1426        }
1427        z = f;
1428        dz = df;
[16602]1429      } else if (node.Symbol is Multiplication) {
1430        InterpretRec(node.GetSubtree(0), nodeValues, out f, out df);
[16652]1431        for (int i = 1; i < node.SubtreeCount; i++) {
1432          InterpretRec(node.GetSubtree(i), nodeValues, out g, out dg);
1433          f = f * g;
1434          df = df.Scale(g).Add(dg.Scale(f));  // f'*g + f*g'
1435        }
1436        z = f;
1437        dz = df;
[16602]1438      } else if (node.Symbol is Subtraction) {
1439        if (node.SubtreeCount == 1) {
1440          InterpretRec(node.GetSubtree(0), nodeValues, out f, out df);
1441          z = -f;
[16604]1442          dz = df.Scale(-1.0);
[16602]1443        } else {
1444          InterpretRec(node.GetSubtree(0), nodeValues, out f, out df);
[16652]1445          for (int i = 1; i < node.SubtreeCount; i++) {
1446            InterpretRec(node.GetSubtree(i), nodeValues, out g, out dg);
1447            f = f - g;
1448            df = df.Subtract(dg);
1449          }
1450          z = f;
1451          dz = df;
[16602]1452        }
1453      } else if (node.Symbol is Division) {
[16610]1454        if (node.SubtreeCount == 1) {
1455          InterpretRec(node.GetSubtree(0), nodeValues, out f, out df);
1456          // protected division
1457          if (f.IsAlmost(0.0)) {
1458            z = 0;
1459            dz = Vector.Zero;
1460          } else {
1461            z = 1.0 / f;
[16652]1462            dz = df.Scale(-1 * z * z);
[16610]1463          }
[16602]1464        } else {
[16610]1465          InterpretRec(node.GetSubtree(0), nodeValues, out f, out df);
[16652]1466          for (int i = 1; i < node.SubtreeCount; i++) {
1467            InterpretRec(node.GetSubtree(i), nodeValues, out g, out dg);
1468            // protected division
1469            if (g.IsAlmost(0.0)) {
1470              z = 0;
1471              dz = Vector.Zero;
1472              return;
1473            } else {
1474              var inv_g = 1.0 / g;
1475              f = f * inv_g;
1476              df = dg.Scale(-f * inv_g * inv_g).Add(df.Scale(inv_g));
1477            }
[16610]1478          }
[16652]1479          z = f;
1480          dz = df;
[16602]1481        }
1482      } else if (node.Symbol is Sine) {
[16616]1483        Assert(node.SubtreeCount == 1);
[16602]1484        InterpretRec(node.GetSubtree(0), nodeValues, out f, out df);
1485        z = Math.Sin(f);
[16604]1486        dz = df.Scale(Math.Cos(f));
[16602]1487      } else if (node.Symbol is Cosine) {
[16616]1488        Assert(node.SubtreeCount == 1);
[16602]1489        InterpretRec(node.GetSubtree(0), nodeValues, out f, out df);
1490        z = Math.Cos(f);
[16604]1491        dz = df.Scale(-Math.Sin(f));
[16602]1492      } else if (node.Symbol is Square) {
[16616]1493        Assert(node.SubtreeCount == 1);
[16602]1494        InterpretRec(node.GetSubtree(0), nodeValues, out f, out df);
1495        z = f * f;
[16604]1496        dz = df.Scale(2.0 * f);
[16610]1497      } else if (node.Symbol is Exponential) {
[16616]1498        Assert(node.SubtreeCount == 1);
[16610]1499        InterpretRec(node.GetSubtree(0), nodeValues, out f, out df);
1500        z = Math.Exp(f);
1501        dz = df.Scale(Math.Exp(f));
1502      } else if (node.Symbol is Logarithm) {
[16616]1503        Assert(node.SubtreeCount == 1);
[16610]1504        InterpretRec(node.GetSubtree(0), nodeValues, out f, out df);
1505        z = Math.Log(f);
1506        dz = df.Scale(1.0 / f);
[16664]1507      } else if (node.Symbol is HyperbolicTangent) {
1508        Assert(node.SubtreeCount == 1);
1509        InterpretRec(node.GetSubtree(0), nodeValues, out f, out df);
1510        z = Math.Tanh(f);
1511        dz = df.Scale(1 - z * z); // tanh(f(x))' = f(x)'sech²(f(x)) = f(x)'(1 - tanh²(f(x)))
1512      } else if (node.Symbol is AnalyticQuotient) {
1513        Assert(node.SubtreeCount == 2);
1514        InterpretRec(node.GetSubtree(0), nodeValues, out f, out df);
[16785]1515        InterpretRec(node.GetSubtree(1), nodeValues, out g, out dg);
[16664]1516        z = f / Math.Sqrt(1 + g * g);
1517        var denom = 1.0 / Math.Pow(1 + g * g, 1.5);
1518        dz = df.Scale(1 + g * g).Subtract(dg.Scale(f * g)).Scale(denom);
[16602]1519      } else {
1520        throw new NotSupportedException("unsupported symbol");
[15964]1521      }
1522    }
[16602]1523
[15968]1524    #endregion
[15964]1525
1526    #region events
[15968]1527    /*
1528     * Dependencies between parameters:
1529     *
1530     * ProblemData
1531     *    |
1532     *    V
[15970]1533     * TargetVariables   FunctionSet    MaximumLength    NumberOfLatentVariables
1534     *               |   |                 |                   |
1535     *               V   V                 |                   |
1536     *             Grammar <---------------+-------------------
[15968]1537     *                |
1538     *                V
1539     *            Encoding
1540     */
[15964]1541    private void RegisterEventHandlers() {
[15968]1542      ProblemDataParameter.ValueChanged += ProblemDataParameter_ValueChanged;
[16215]1543      if (ProblemDataParameter.Value != null) ProblemDataParameter.Value.Changed += ProblemData_Changed;
[15968]1544
1545      TargetVariablesParameter.ValueChanged += TargetVariablesParameter_ValueChanged;
[16215]1546      if (TargetVariablesParameter.Value != null) TargetVariablesParameter.Value.CheckedItemsChanged += CheckedTargetVariablesChanged;
[15968]1547
1548      FunctionSetParameter.ValueChanged += FunctionSetParameter_ValueChanged;
[16215]1549      if (FunctionSetParameter.Value != null) FunctionSetParameter.Value.CheckedItemsChanged += CheckedFunctionsChanged;
[15968]1550
1551      MaximumLengthParameter.Value.ValueChanged += MaximumLengthChanged;
[15970]1552
1553      NumberOfLatentVariablesParameter.Value.ValueChanged += NumLatentVariablesChanged;
[15964]1554    }
1555
[15970]1556    private void NumLatentVariablesChanged(object sender, EventArgs e) {
1557      UpdateGrammarAndEncoding();
1558    }
1559
[15968]1560    private void MaximumLengthChanged(object sender, EventArgs e) {
1561      UpdateGrammarAndEncoding();
1562    }
1563
1564    private void FunctionSetParameter_ValueChanged(object sender, EventArgs e) {
1565      FunctionSetParameter.Value.CheckedItemsChanged += CheckedFunctionsChanged;
1566    }
1567
[16268]1568    private void CheckedFunctionsChanged(object sender, CollectionItemsChangedEventArgs<IndexedItem<StringValue>> e) {
[15968]1569      UpdateGrammarAndEncoding();
1570    }
1571
1572    private void TargetVariablesParameter_ValueChanged(object sender, EventArgs e) {
1573      TargetVariablesParameter.Value.CheckedItemsChanged += CheckedTargetVariablesChanged;
1574    }
1575
[16268]1576    private void CheckedTargetVariablesChanged(object sender, CollectionItemsChangedEventArgs<IndexedItem<StringValue>> e) {
[15968]1577      UpdateGrammarAndEncoding();
1578    }
1579
[15964]1580    private void ProblemDataParameter_ValueChanged(object sender, EventArgs e) {
[15968]1581      ProblemDataParameter.Value.Changed += ProblemData_Changed;
[15964]1582      OnProblemDataChanged();
1583      OnReset();
1584    }
1585
1586    private void ProblemData_Changed(object sender, EventArgs e) {
[15968]1587      OnProblemDataChanged();
[15964]1588      OnReset();
1589    }
1590
1591    private void OnProblemDataChanged() {
[15968]1592      UpdateTargetVariables();        // implicitly updates other dependent parameters
[15964]1593      var handler = ProblemDataChanged;
[16215]1594      if (handler != null) handler(this, EventArgs.Empty);
[15964]1595    }
1596
[15968]1597    #endregion
1598
1599    #region  helper
1600
[16660]1601    private static IEnumerable<T> EveryNth<T>(IEnumerable<T> xs, int step) {
1602      var e = xs.GetEnumerator();
1603      while (e.MoveNext()) {
1604        for (int i = 0; i < step; i++) {
1605          if (!e.MoveNext()) yield break;
1606        }
1607        yield return e.Current;
1608      }
1609    }
1610
[15968]1611    private void InitAllParameters() {
[16602]1612      UpdateTargetVariables(); // implicitly updates the grammar and the encoding
[15968]1613    }
1614
[16268]1615    private ReadOnlyCheckedItemList<StringValue> CreateFunctionSet() {
1616      var l = new CheckedItemList<StringValue>();
[16602]1617      l.Add(new StringValue("Addition").AsReadOnly());
1618      l.Add(new StringValue("Multiplication").AsReadOnly());
1619      l.Add(new StringValue("Division").AsReadOnly());
1620      l.Add(new StringValue("Subtraction").AsReadOnly());
1621      l.Add(new StringValue("Sine").AsReadOnly());
1622      l.Add(new StringValue("Cosine").AsReadOnly());
1623      l.Add(new StringValue("Square").AsReadOnly());
[16664]1624      l.Add(new StringValue("Logarithm").AsReadOnly());
1625      l.Add(new StringValue("Exponential").AsReadOnly());
1626      l.Add(new StringValue("HyperbolicTangent").AsReadOnly());
1627      l.Add(new StringValue("AnalyticQuotient").AsReadOnly());
[15968]1628      return l.AsReadOnly();
1629    }
1630
1631    private static bool IsConstantNode(ISymbolicExpressionTreeNode n) {
[16602]1632      // return n.Symbol.Name[0] == 'θ';
1633      return n is ConstantTreeNode;
[15968]1634    }
[16601]1635    private static double GetConstantValue(ISymbolicExpressionTreeNode n) {
[16602]1636      return ((ConstantTreeNode)n).Value;
[16601]1637    }
[15970]1638    private static bool IsLatentVariableNode(ISymbolicExpressionTreeNode n) {
[16399]1639      return n.Symbol.Name[0] == 'λ';
[15970]1640    }
[16251]1641    private static bool IsVariableNode(ISymbolicExpressionTreeNode n) {
1642      return (n.SubtreeCount == 0) && !IsConstantNode(n) && !IsLatentVariableNode(n);
1643    }
[16601]1644    private static string GetVariableName(ISymbolicExpressionTreeNode n) {
[16602]1645      return ((VariableTreeNode)n).VariableName;
[16601]1646    }
[15968]1647
1648    private void UpdateTargetVariables() {
[16268]1649      var currentlySelectedVariables = TargetVariables.CheckedItems
1650        .OrderBy(i => i.Index)
1651        .Select(i => i.Value.Value)
1652        .ToArray();
[15968]1653
[16268]1654      var newVariablesList = new CheckedItemList<StringValue>(ProblemData.Dataset.VariableNames.Select(str => new StringValue(str).AsReadOnly()).ToArray()).AsReadOnly();
[15968]1655      var matchingItems = newVariablesList.Where(item => currentlySelectedVariables.Contains(item.Value)).ToArray();
[16597]1656      foreach (var item in newVariablesList) {
1657        if (currentlySelectedVariables.Contains(item.Value)) {
1658          newVariablesList.SetItemCheckedState(item, true);
1659        } else {
1660          newVariablesList.SetItemCheckedState(item, false);
1661        }
[15968]1662      }
1663      TargetVariablesParameter.Value = newVariablesList;
1664    }
1665
1666    private void UpdateGrammarAndEncoding() {
1667      var encoding = new MultiEncoding();
1668      var g = CreateGrammar();
[16215]1669      foreach (var targetVar in TargetVariables.CheckedItems) {
[16603]1670        var e = new SymbolicExpressionTreeEncoding(targetVar + "_tree", g, MaximumLength, MaximumLength);
1671        var multiManipulator = e.Operators.Where(op => op is MultiSymbolicExpressionTreeManipulator).First();
1672        var filteredOperators = e.Operators.Where(op => !(op is IManipulator)).ToArray();
1673        // make sure our multi-manipulator is the only manipulator
1674        e.Operators = new IOperator[] { multiManipulator }.Concat(filteredOperators);
[16616]1675
1676        // set the crossover probability to reduce likelihood that multiple trees are crossed at the same time
1677        var subtreeCrossovers = e.Operators.OfType<SubtreeCrossover>();
1678        foreach (var xover in subtreeCrossovers) {
1679          xover.CrossoverProbability.Value = 0.3;
1680        }
1681
[16603]1682        encoding = encoding.Add(e); // only limit by length
[15968]1683      }
[16215]1684      for (int i = 1; i <= NumberOfLatentVariables; i++) {
[16603]1685        var e = new SymbolicExpressionTreeEncoding("λ" + i + "_tree", g, MaximumLength, MaximumLength);
1686        var multiManipulator = e.Operators.Where(op => op is MultiSymbolicExpressionTreeManipulator).First();
1687        var filteredOperators = e.Operators.Where(op => !(op is IManipulator)).ToArray();
1688        // make sure our multi-manipulator is the only manipulator
1689        e.Operators = new IOperator[] { multiManipulator }.Concat(filteredOperators);
[16616]1690
1691        // set the crossover probability to reduce likelihood that multiple trees are crossed at the same time
1692        var subtreeCrossovers = e.Operators.OfType<SubtreeCrossover>();
1693        foreach (var xover in subtreeCrossovers) {
1694          xover.CrossoverProbability.Value = 0.3;
1695        }
1696
[16603]1697        encoding = encoding.Add(e);
[15970]1698      }
[15968]1699      Encoding = encoding;
1700    }
1701
1702    private ISymbolicExpressionGrammar CreateGrammar() {
[16602]1703      var grammar = new TypeCoherentExpressionGrammar();
1704      grammar.StartGrammarManipulation();
1705
1706      var problemData = ProblemData;
1707      var ds = problemData.Dataset;
1708      grammar.MaximumFunctionArguments = 0;
1709      grammar.MaximumFunctionDefinitions = 0;
1710      var allowedVariables = problemData.AllowedInputVariables.Concat(TargetVariables.CheckedItems.Select(chk => chk.Value.Value));
1711      foreach (var varSymbol in grammar.Symbols.OfType<HeuristicLab.Problems.DataAnalysis.Symbolic.VariableBase>()) {
1712        if (!varSymbol.Fixed) {
1713          varSymbol.AllVariableNames = problemData.InputVariables.Select(x => x.Value).Where(x => ds.VariableHasType<double>(x));
1714          varSymbol.VariableNames = allowedVariables.Where(x => ds.VariableHasType<double>(x));
1715        }
[16597]1716      }
[16602]1717      foreach (var factorSymbol in grammar.Symbols.OfType<BinaryFactorVariable>()) {
1718        if (!factorSymbol.Fixed) {
1719          factorSymbol.AllVariableNames = problemData.InputVariables.Select(x => x.Value).Where(x => ds.VariableHasType<string>(x));
1720          factorSymbol.VariableNames = problemData.AllowedInputVariables.Where(x => ds.VariableHasType<string>(x));
1721          factorSymbol.VariableValues = factorSymbol.VariableNames
1722            .ToDictionary(varName => varName, varName => ds.GetStringValues(varName).Distinct().ToList());
1723        }
[16597]1724      }
[16602]1725      foreach (var factorSymbol in grammar.Symbols.OfType<FactorVariable>()) {
1726        if (!factorSymbol.Fixed) {
1727          factorSymbol.AllVariableNames = problemData.InputVariables.Select(x => x.Value).Where(x => ds.VariableHasType<string>(x));
1728          factorSymbol.VariableNames = problemData.AllowedInputVariables.Where(x => ds.VariableHasType<string>(x));
1729          factorSymbol.VariableValues = factorSymbol.VariableNames
1730            .ToDictionary(varName => varName,
1731            varName => ds.GetStringValues(varName).Distinct()
1732            .Select((n, i) => Tuple.Create(n, i))
1733            .ToDictionary(tup => tup.Item1, tup => tup.Item2));
1734        }
[15964]1735      }
[15970]1736
[16602]1737      grammar.ConfigureAsDefaultRegressionGrammar();
1738      grammar.GetSymbol("Logarithm").Enabled = false; // not supported yet
1739      grammar.GetSymbol("Exponential").Enabled = false; // not supported yet
[15970]1740
[16602]1741      // configure initialization of constants
1742      var constSy = (Constant)grammar.GetSymbol("Constant");
1743      // max and min are only relevant for initialization
1744      constSy.MaxValue = +1.0e-1; // small initial values for constant opt
1745      constSy.MinValue = -1.0e-1;
1746      constSy.MultiplicativeManipulatorSigma = 1.0; // allow large jumps for manipulation
1747      constSy.ManipulatorMu = 0.0;
1748      constSy.ManipulatorSigma = 1.0; // allow large jumps
[15968]1749
[16602]1750      // configure initialization of variables
1751      var varSy = (Variable)grammar.GetSymbol("Variable");
1752      // fix variable weights to 1.0
1753      varSy.WeightMu = 1.0;
1754      varSy.WeightSigma = 0.0;
1755      varSy.WeightManipulatorMu = 0.0;
1756      varSy.WeightManipulatorSigma = 0.0;
1757      varSy.MultiplicativeWeightManipulatorSigma = 0.0;
[16251]1758
[16602]1759      foreach (var f in FunctionSet) {
1760        grammar.GetSymbol(f.Value).Enabled = FunctionSet.ItemChecked(f);
[16399]1761      }
1762
[16602]1763      grammar.FinishedGrammarManipulation();
1764      return grammar;
1765      // // whenever ProblemData is changed we create a new grammar with the necessary symbols
1766      // var g = new SimpleSymbolicExpressionGrammar();
1767      // var unaryFunc = new string[] { "sin", "cos", "sqr" };
1768      // var binaryFunc = new string[] { "+", "-", "*", "%" };
1769      // foreach (var func in unaryFunc) {
1770      //   if (FunctionSet.CheckedItems.Any(ci => ci.Value.Value == func)) g.AddSymbol(func, 1, 1);
1771      // }
1772      // foreach (var func in binaryFunc) {
1773      //   if (FunctionSet.CheckedItems.Any(ci => ci.Value.Value == func)) g.AddSymbol(func, 2, 2);
1774      // }
1775      //
1776      // foreach (var variableName in ProblemData.AllowedInputVariables.Union(TargetVariables.CheckedItems.Select(i => i.Value.Value)))
1777      //   g.AddTerminalSymbol(variableName);
1778      //
1779      // // generate symbols for numeric parameters for which the value is optimized using AutoDiff
1780      // // we generate multiple symbols to balance the probability for selecting a numeric parameter in the generation of random trees
1781      // var numericConstantsFactor = 2.0;
1782      // for (int i = 0; i < numericConstantsFactor * (ProblemData.AllowedInputVariables.Count() + TargetVariables.CheckedItems.Count()); i++) {
1783      //   g.AddTerminalSymbol("θ" + i); // numeric parameter for which the value is optimized using AutoDiff
1784      // }
1785      //
1786      // // generate symbols for latent variables
1787      // for (int i = 1; i <= NumberOfLatentVariables; i++) {
1788      //   g.AddTerminalSymbol("λ" + i); // numeric parameter for which the value is optimized using AutoDiff
1789      // }
1790      //
1791      // return g;
[16251]1792    }
[15964]1793    #endregion
1794
[16601]1795
[15964]1796    #region Import & Export
1797    public void Load(IRegressionProblemData data) {
1798      Name = data.Name;
1799      Description = data.Description;
1800      ProblemData = data;
1801    }
1802
1803    public IRegressionProblemData Export() {
1804      return ProblemData;
1805    }
[16601]1806    #endregion
[16600]1807
[16601]1808
1809    // TODO: for integration we only need a part of the data that we need for optimization
1810
[16600]1811    public class OptimizationData {
1812      public readonly ISymbolicExpressionTree[] trees;
1813      public readonly string[] targetVariables;
1814      public readonly IRegressionProblemData problemData;
[16601]1815      public readonly double[][] targetValues;
[16603]1816      public readonly double[] inverseStandardDeviation;
[16600]1817      public readonly IntRange[] episodes;
1818      public readonly int numericIntegrationSteps;
1819      public readonly string[] latentVariables;
1820      public readonly string odeSolver;
[16601]1821      public readonly NodeValueLookup nodeValueLookup;
[16603]1822      public readonly int[] rows;
[16610]1823      internal readonly string[] variables;
[16600]1824
[16610]1825      public OptimizationData(ISymbolicExpressionTree[] trees, string[] targetVars, string[] inputVariables,
1826        IRegressionProblemData problemData,
[16601]1827        double[][] targetValues,
1828        IntRange[] episodes,
1829        int numericIntegrationSteps, string[] latentVariables, string odeSolver) {
[16600]1830        this.trees = trees;
1831        this.targetVariables = targetVars;
1832        this.problemData = problemData;
1833        this.targetValues = targetValues;
[16610]1834        this.variables = inputVariables;
[16616]1835        if (targetValues != null) {
1836          this.inverseStandardDeviation = new double[targetValues.Length];
1837          for (int i = 0; i < targetValues.Length; i++) {
1838            // calculate variance for each episode separately and calc the average
1839            var epStartIdx = 0;
1840            var stdevs = new List<double>();
1841            foreach (var ep in episodes) {
1842              var epValues = targetValues[i].Skip(epStartIdx).Take(ep.Size);
1843              stdevs.Add(epValues.StandardDeviation());
1844              epStartIdx += ep.Size;
1845            }
1846            inverseStandardDeviation[i] = 1.0 / stdevs.Average();
1847          }
1848        } else
1849          this.inverseStandardDeviation = Enumerable.Repeat(1.0, trees.Length).ToArray();
[16600]1850        this.episodes = episodes;
1851        this.numericIntegrationSteps = numericIntegrationSteps;
1852        this.latentVariables = latentVariables;
1853        this.odeSolver = odeSolver;
[16601]1854        this.nodeValueLookup = new NodeValueLookup(trees);
[16604]1855        this.rows = episodes.SelectMany(ep => Enumerable.Range(ep.Start, ep.Size)).ToArray();
[16600]1856      }
1857    }
[15964]1858
[16601]1859    public class NodeValueLookup {
1860      private readonly Dictionary<ISymbolicExpressionTreeNode, Tuple<double, Vector>> node2val = new Dictionary<ISymbolicExpressionTreeNode, Tuple<double, Vector>>();
1861      private readonly Dictionary<string, List<ISymbolicExpressionTreeNode>> name2nodes = new Dictionary<string, List<ISymbolicExpressionTreeNode>>();
[16603]1862      private readonly ConstantTreeNode[] constantNodes;
1863      private readonly Vector[] constantGradientVectors;
[16601]1864
[16603]1865      // private readonly Dictionary<int, ISymbolicExpressionTreeNode> paramIdx2node = new Dictionary<int, ISymbolicExpressionTreeNode>();
1866
[16601]1867      public double NodeValue(ISymbolicExpressionTreeNode node) => node2val[node].Item1;
1868      public Vector NodeGradient(ISymbolicExpressionTreeNode node) => node2val[node].Item2;
1869
1870      public NodeValueLookup(ISymbolicExpressionTree[] trees) {
1871
[16603]1872        this.constantNodes = trees.SelectMany(t => t.IterateNodesPrefix().OfType<ConstantTreeNode>()).ToArray();
1873        constantGradientVectors = new Vector[constantNodes.Length];
[16604]1874        for (int paramIdx = 0; paramIdx < constantNodes.Length; paramIdx++) {
[16603]1875          constantGradientVectors[paramIdx] = Vector.CreateIndicator(length: constantNodes.Length, idx: paramIdx);
1876
1877          var node = constantNodes[paramIdx];
1878          node2val[node] = Tuple.Create(node.Value, constantGradientVectors[paramIdx]);
[16601]1879        }
1880
1881        foreach (var tree in trees) {
1882          foreach (var node in tree.IterateNodesPrefix().Where(IsVariableNode)) {
1883            var varName = GetVariableName(node);
1884            if (!name2nodes.TryGetValue(varName, out List<ISymbolicExpressionTreeNode> nodes)) {
1885              nodes = new List<ISymbolicExpressionTreeNode>();
1886              name2nodes.Add(varName, nodes);
1887            }
1888            nodes.Add(node);
[16602]1889            SetVariableValue(varName, 0.0);  // this value is updated in the prediction loop
[16601]1890          }
1891        }
1892      }
1893
[16603]1894      public int ParameterCount => constantNodes.Length;
[16601]1895
1896      public void SetVariableValue(string variableName, double val) {
1897        SetVariableValue(variableName, val, Vector.Zero);
1898      }
1899      public Tuple<double, Vector> GetVariableValue(string variableName) {
1900        return node2val[name2nodes[variableName].First()];
1901      }
1902      public void SetVariableValue(string variableName, double val, Vector dVal) {
1903        if (name2nodes.TryGetValue(variableName, out List<ISymbolicExpressionTreeNode> nodes)) {
1904          nodes.ForEach(n => node2val[n] = Tuple.Create(val, dVal));
1905        } else {
[16602]1906          var fakeNode = new VariableTreeNode(new Variable());
[16610]1907          fakeNode.Weight = 1.0;
1908          fakeNode.VariableName = variableName;
[16601]1909          var newNodeList = new List<ISymbolicExpressionTreeNode>();
1910          newNodeList.Add(fakeNode);
1911          name2nodes.Add(variableName, newNodeList);
1912          node2val[fakeNode] = Tuple.Create(val, dVal);
1913        }
1914      }
1915
1916      internal void UpdateParamValues(double[] x) {
[16603]1917        for (int i = 0; i < x.Length; i++) {
1918          constantNodes[i].Value = x[i];
1919          node2val[constantNodes[i]] = Tuple.Create(x[i], constantGradientVectors[i]);
[16601]1920        }
1921      }
1922    }
[15964]1923  }
1924}
Note: See TracBrowser for help on using the repository browser.