#region License Information /* HeuristicLab * Copyright (C) 2002-2018 Heuristic and Evolutionary Algorithms Laboratory (HEAL) * * This file is part of HeuristicLab. * * HeuristicLab is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation, either version 3 of the License, or * (at your option) any later version. * * HeuristicLab is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with HeuristicLab. If not, see . */ #endregion using System; using System.Collections.Generic; using System.Diagnostics; using System.Linq; using HeuristicLab.Analysis; using HeuristicLab.Collections; using HeuristicLab.Common; using HeuristicLab.Core; using HeuristicLab.Data; using HeuristicLab.Encodings.SymbolicExpressionTreeEncoding; using HeuristicLab.Optimization; using HeuristicLab.Parameters; using HeuristicLab.Persistence.Default.CompositeSerializers.Storable; using HeuristicLab.Problems.DataAnalysis; using HeuristicLab.Problems.DataAnalysis.Symbolic; using HeuristicLab.Problems.Instances; using Variable = HeuristicLab.Problems.DataAnalysis.Symbolic.Variable; namespace HeuristicLab.Problems.DynamicalSystemsModelling { // Eine weitere Möglichkeit ist spline-smoothing der Daten (über Zeit) um damit für jede Zielvariable // einen bereinigten (Rauschen) Wert und die Ableitung dy/dt für alle Beobachtungen zu bekommen // danach kann man die Regression direkt für dy/dt anwenden (mit bereinigten y als inputs) [Item("Dynamical Systems Modelling Problem", "TODO")] [Creatable(CreatableAttribute.Categories.GeneticProgrammingProblems, Priority = 900)] [StorableClass] public sealed class Problem : SingleObjectiveBasicProblem, IRegressionProblem, IProblemInstanceConsumer, IProblemInstanceExporter { #region parameter names private const string ProblemDataParameterName = "Data"; private const string TargetVariablesParameterName = "Target variables"; private const string FunctionSetParameterName = "Function set"; private const string MaximumLengthParameterName = "Size limit"; private const string MaximumParameterOptimizationIterationsParameterName = "Max. parameter optimization iterations"; private const string NumberOfLatentVariablesParameterName = "Number of latent variables"; private const string NumericIntegrationStepsParameterName = "Steps for numeric integration"; private const string TrainingEpisodesParameterName = "Training episodes"; private const string OptimizeParametersForEpisodesParameterName = "Optimize parameters for episodes"; private const string OdeSolverParameterName = "ODE Solver"; #endregion #region Parameter Properties IParameter IDataAnalysisProblem.ProblemDataParameter { get { return ProblemDataParameter; } } public IValueParameter ProblemDataParameter { get { return (IValueParameter)Parameters[ProblemDataParameterName]; } } public IValueParameter> TargetVariablesParameter { get { return (IValueParameter>)Parameters[TargetVariablesParameterName]; } } public IValueParameter> FunctionSetParameter { get { return (IValueParameter>)Parameters[FunctionSetParameterName]; } } public IFixedValueParameter MaximumLengthParameter { get { return (IFixedValueParameter)Parameters[MaximumLengthParameterName]; } } public IFixedValueParameter MaximumParameterOptimizationIterationsParameter { get { return (IFixedValueParameter)Parameters[MaximumParameterOptimizationIterationsParameterName]; } } public IFixedValueParameter NumberOfLatentVariablesParameter { get { return (IFixedValueParameter)Parameters[NumberOfLatentVariablesParameterName]; } } public IFixedValueParameter NumericIntegrationStepsParameter { get { return (IFixedValueParameter)Parameters[NumericIntegrationStepsParameterName]; } } public IValueParameter> TrainingEpisodesParameter { get { return (IValueParameter>)Parameters[TrainingEpisodesParameterName]; } } public IFixedValueParameter OptimizeParametersForEpisodesParameter { get { return (IFixedValueParameter)Parameters[OptimizeParametersForEpisodesParameterName]; } } public IConstrainedValueParameter OdeSolverParameter { get { return (IConstrainedValueParameter)Parameters[OdeSolverParameterName]; } } #endregion #region Properties public IRegressionProblemData ProblemData { get { return ProblemDataParameter.Value; } set { ProblemDataParameter.Value = value; } } IDataAnalysisProblemData IDataAnalysisProblem.ProblemData { get { return ProblemData; } } public ReadOnlyCheckedItemList TargetVariables { get { return TargetVariablesParameter.Value; } } public ReadOnlyCheckedItemList FunctionSet { get { return FunctionSetParameter.Value; } } public int MaximumLength { get { return MaximumLengthParameter.Value.Value; } } public int MaximumParameterOptimizationIterations { get { return MaximumParameterOptimizationIterationsParameter.Value.Value; } } public int NumberOfLatentVariables { get { return NumberOfLatentVariablesParameter.Value.Value; } } public int NumericIntegrationSteps { get { return NumericIntegrationStepsParameter.Value.Value; } } public IEnumerable TrainingEpisodes { get { return TrainingEpisodesParameter.Value; } } public bool OptimizeParametersForEpisodes { get { return OptimizeParametersForEpisodesParameter.Value.Value; } } public string OdeSolver { get { return OdeSolverParameter.Value.Value; } set { var matchingValue = OdeSolverParameter.ValidValues.FirstOrDefault(v => v.Value == value); if (matchingValue == null) throw new ArgumentOutOfRangeException(); else OdeSolverParameter.Value = matchingValue; } } #endregion public event EventHandler ProblemDataChanged; public override bool Maximization { get { return false; } // we minimize NMSE } #region item cloning and persistence // persistence [StorableConstructor] private Problem(bool deserializing) : base(deserializing) { } [StorableHook(HookType.AfterDeserialization)] private void AfterDeserialization() { if (!Parameters.ContainsKey(OptimizeParametersForEpisodesParameterName)) { Parameters.Add(new FixedValueParameter(OptimizeParametersForEpisodesParameterName, "Flag to select if parameters should be optimized globally or for each episode individually.", new BoolValue(false))); } RegisterEventHandlers(); } // cloning private Problem(Problem original, Cloner cloner) : base(original, cloner) { RegisterEventHandlers(); } public override IDeepCloneable Clone(Cloner cloner) { return new Problem(this, cloner); } #endregion public Problem() : base() { var targetVariables = new CheckedItemList().AsReadOnly(); // HACK: it would be better to provide a new class derived from IDataAnalysisProblem var functions = CreateFunctionSet(); Parameters.Add(new ValueParameter(ProblemDataParameterName, "The data captured from the dynamical system. Use CSV import functionality to import data.", new RegressionProblemData())); Parameters.Add(new ValueParameter>(TargetVariablesParameterName, "Target variables (overrides setting in ProblemData)", targetVariables)); Parameters.Add(new ValueParameter>(FunctionSetParameterName, "The list of allowed functions", functions)); Parameters.Add(new FixedValueParameter(MaximumLengthParameterName, "The maximally allowed length of each expression. Set to a small value (5 - 25). Default = 10", new IntValue(10))); Parameters.Add(new FixedValueParameter(MaximumParameterOptimizationIterationsParameterName, "The maximum number of iterations for optimization of parameters (using L-BFGS). More iterations makes the algorithm slower, fewer iterations might prevent convergence in the optimization scheme. Default = 100", new IntValue(100))); Parameters.Add(new FixedValueParameter(NumberOfLatentVariablesParameterName, "Latent variables (unobserved variables) allow us to produce expressions which are integrated up and can be used in other expressions. They are handled similarly to target variables in forward simulation / integration. The difference to target variables is that there are no data to which the calculated values of latent variables are compared. Set to a small value (0 .. 5) as necessary (default = 0)", new IntValue(0))); Parameters.Add(new FixedValueParameter(NumericIntegrationStepsParameterName, "Number of steps in the numeric integration that are taken from one row to the next (set to 1 to 100). More steps makes the algorithm slower, less steps worsens the accuracy of the numeric integration scheme.", new IntValue(10))); Parameters.Add(new ValueParameter>(TrainingEpisodesParameterName, "A list of ranges that should be used for training, each range represents an independent episode. This overrides the TrainingSet parameter in ProblemData.", new ItemList())); Parameters.Add(new FixedValueParameter(OptimizeParametersForEpisodesParameterName, "Flag to select if parameters should be optimized globally or for each episode individually.", new BoolValue(false))); var solversStr = new string[] { "HeuristicLab", "CVODES" }; var solvers = new ItemSet( solversStr.Select(s => new StringValue(s).AsReadOnly()) ); Parameters.Add(new ConstrainedValueParameter(OdeSolverParameterName, "The solver to use for solving the initial value ODE problems", solvers, solvers.First())); RegisterEventHandlers(); InitAllParameters(); // TODO: do not clear selection of target variables when the input variables are changed (keep selected target variables) // TODO: UI hangs when selecting / deselecting input variables because the encoding is updated on each item // TODO: use training range as default training episode // TODO: write back optimized parameters to solution? // TODO: optimization of starting values for latent variables in CVODES solver } public override double Evaluate(Individual individual, IRandom random) { var trees = individual.Values.Select(v => v.Value).OfType().ToArray(); // extract all trees from individual // write back optimized parameters to tree nodes instead of the separate OptTheta variable // retreive optimized parameters from nodes? if (OptimizeParametersForEpisodes) { int eIdx = 0; double totalNMSE = 0.0; int totalSize = 0; foreach (var episode in TrainingEpisodes) { double[] optTheta; double nmse; OptimizeForEpisodes(trees, random, new[] { episode }, out optTheta, out nmse); individual["OptTheta_" + eIdx] = new DoubleArray(optTheta); // write back optimized parameters so that we can use them in the Analysis method eIdx++; totalNMSE += nmse * episode.Size; totalSize += episode.Size; } return totalNMSE / totalSize; } else { double[] optTheta; double nmse; OptimizeForEpisodes(trees, random, TrainingEpisodes, out optTheta, out nmse); individual["OptTheta"] = new DoubleArray(optTheta); // write back optimized parameters so that we can use them in the Analysis method return nmse; } } private void OptimizeForEpisodes( ISymbolicExpressionTree[] trees, IRandom random, IEnumerable episodes, out double[] optTheta, out double nmse) { var rows = episodes.SelectMany(e => Enumerable.Range(e.Start, e.End - e.Start)).ToArray(); var problemData = ProblemData; var targetVars = TargetVariables.CheckedItems.OrderBy(i => i.Index).Select(i => i.Value.Value).ToArray(); var latentVariables = Enumerable.Range(1, NumberOfLatentVariables).Select(i => "λ" + i).ToArray(); // TODO: must coincide with the variables which are actually defined in the grammar and also for which we actually have trees var targetValues = new double[rows.Length, targetVars.Length]; // collect values of all target variables var colIdx = 0; foreach (var targetVar in targetVars) { int rowIdx = 0; foreach (var value in problemData.Dataset.GetDoubleValues(targetVar, rows)) { targetValues[rowIdx, colIdx] = value; rowIdx++; } colIdx++; } // NOTE: the order of values in parameter matches prefix order of constant nodes in trees var paramNodes = new List(); foreach (var t in trees) { foreach (var n in t.IterateNodesPrefix()) { if (IsConstantNode(n)) paramNodes.Add(n); } } // init params randomly // theta contains parameter values for trees and then the initial values for latent variables (a separate vector for each episode) // inital values for latent variables are also optimized var theta = new double[paramNodes.Count + latentVariables.Length * episodes.Count()]; for (int i = 0; i < theta.Length; i++) theta[i] = random.NextDouble() * 2.0 - 1.0; optTheta = new double[0]; if (theta.Length > 0) { alglib.minlbfgsstate state; alglib.minlbfgsreport report; alglib.minlbfgscreate(Math.Min(theta.Length, 5), theta, out state); alglib.minlbfgssetcond(state, 0.0, 0.0, 0.0, MaximumParameterOptimizationIterations); //alglib.minlbfgssetgradientcheck(state, 1e-6); alglib.minlbfgsoptimize(state, EvaluateObjectiveAndGradient, null, new object[] { trees, targetVars, problemData, targetValues, episodes.ToArray(), NumericIntegrationSteps, latentVariables, OdeSolver }); //TODO: create a type alglib.minlbfgsresults(state, out optTheta, out report); /* * * L-BFGS algorithm results INPUT PARAMETERS: State - algorithm state OUTPUT PARAMETERS: X - array[0..N-1], solution Rep - optimization report: * Rep.TerminationType completetion code: * -7 gradient verification failed. See MinLBFGSSetGradientCheck() for more information. * -2 rounding errors prevent further improvement. X contains best point found. * -1 incorrect parameters were specified * 1 relative function improvement is no more than EpsF. * 2 relative step is no more than EpsX. * 4 gradient norm is no more than EpsG * 5 MaxIts steps was taken * 7 stopping conditions are too stringent, further improvement is impossible * Rep.IterationsCount contains iterations count * NFEV countains number of function calculations */ if (report.terminationtype < 0) { nmse = 10E6; return; } } // perform evaluation for optimal theta to get quality value double[] grad = new double[optTheta.Length]; nmse = double.NaN; EvaluateObjectiveAndGradient(optTheta, ref nmse, grad, new object[] { trees, targetVars, problemData, targetValues, episodes.ToArray(), NumericIntegrationSteps, latentVariables, OdeSolver }); if (double.IsNaN(nmse) || double.IsInfinity(nmse)) { nmse = 10E6; return; } // return a large value (TODO: be consistent by using NMSE) } private static void EvaluateObjectiveAndGradient(double[] x, ref double f, double[] grad, object obj) { var trees = (ISymbolicExpressionTree[])((object[])obj)[0]; var targetVariables = (string[])((object[])obj)[1]; var problemData = (IRegressionProblemData)((object[])obj)[2]; var targetValues = (double[,])((object[])obj)[3]; var episodes = (IntRange[])((object[])obj)[4]; var numericIntegrationSteps = (int)((object[])obj)[5]; var latentVariables = (string[])((object[])obj)[6]; var odeSolver = (string)((object[])obj)[7]; Tuple[][] predicted = null; // one array of predictions for each episode // TODO: stop integration early for diverging solutions predicted = Integrate( trees, // we assume trees contain expressions for the change of each target variable over time y'(t) problemData.Dataset, problemData.AllowedInputVariables.ToArray(), targetVariables, latentVariables, episodes, x, odeSolver, numericIntegrationSteps).ToArray(); if (predicted.Length != targetValues.GetLength(0)) { f = double.MaxValue; Array.Clear(grad, 0, grad.Length); return; } // for normalized MSE = 1/variance(t) * MSE(t, pred) // TODO: Perf. (by standardization of target variables before evaluation of all trees) var invVar = Enumerable.Range(0, targetVariables.Length) .Select(c => Enumerable.Range(0, targetValues.GetLength(0)).Select(row => targetValues[row, c])) // column vectors .Select(vec => vec.Variance()) .Select(v => 1.0 / v) .ToArray(); // objective function is NMSE f = 0.0; int n = predicted.Length; double invN = 1.0 / n; var g = Vector.Zero; int r = 0; foreach (var y_pred in predicted) { // y_pred contains the predicted values for target variables first and then predicted values for latent variables for (int c = 0; c < targetVariables.Length; c++) { var y_pred_f = y_pred[c].Item1; var y = targetValues[r, c]; var res = (y - y_pred_f); var ressq = res * res; f += ressq * invN * invVar[c]; g += -2.0 * res * y_pred[c].Item2 * invN * invVar[c]; } r++; } g.CopyTo(grad); } public override void Analyze(Individual[] individuals, double[] qualities, ResultCollection results, IRandom random) { base.Analyze(individuals, qualities, results, random); if (!results.ContainsKey("Prediction (training)")) { results.Add(new Result("Prediction (training)", typeof(ReadOnlyItemList))); } if (!results.ContainsKey("Prediction (test)")) { results.Add(new Result("Prediction (test)", typeof(ReadOnlyItemList))); } if (!results.ContainsKey("Models")) { results.Add(new Result("Models", typeof(VariableCollection))); } if(!results.ContainsKey("SNMSE")) { results.Add(new Result("SNMSE", typeof(DoubleValue))); } var bestIndividualAndQuality = this.GetBestIndividual(individuals, qualities); var trees = bestIndividualAndQuality.Item1.Values.Select(v => v.Value).OfType().ToArray(); // extract all trees from individual results["SNMSE"].Value = new DoubleValue(bestIndividualAndQuality.Item2); var problemData = ProblemData; var targetVars = TargetVariables.CheckedItems.OrderBy(i => i.Index).Select(i => i.Value.Value).ToArray(); var latentVariables = Enumerable.Range(1, NumberOfLatentVariables).Select(i => "λ" + i).ToArray(); // TODO: must coincide with the variables which are actually defined in the grammar and also for which we actually have trees var trainingList = new ItemList(); if (OptimizeParametersForEpisodes) { var eIdx = 0; var trainingPredictions = new List[][]>(); foreach (var episode in TrainingEpisodes) { var episodes = new[] { episode }; var optTheta = ((DoubleArray)bestIndividualAndQuality.Item1["OptTheta_" + eIdx]).ToArray(); // see evaluate var trainingPrediction = Integrate( trees, // we assume trees contain expressions for the change of each target variable over time y'(t) problemData.Dataset, problemData.AllowedInputVariables.ToArray(), targetVars, latentVariables, episodes, optTheta, OdeSolver, NumericIntegrationSteps).ToArray(); trainingPredictions.Add(trainingPrediction); eIdx++; } // only for target values var trainingRows = TrainingEpisodes.SelectMany(e => Enumerable.Range(e.Start, e.End - e.Start)); for (int colIdx = 0; colIdx < targetVars.Length; colIdx++) { var targetVar = targetVars[colIdx]; var trainingDataTable = new DataTable(targetVar + " prediction (training)"); var actualValuesRow = new DataRow(targetVar, "The values of " + targetVar, problemData.Dataset.GetDoubleValues(targetVar, trainingRows)); var predictedValuesRow = new DataRow(targetVar + " pred.", "Predicted values for " + targetVar, trainingPredictions.SelectMany(arr => arr.Select(row => row[colIdx].Item1)).ToArray()); trainingDataTable.Rows.Add(actualValuesRow); trainingDataTable.Rows.Add(predictedValuesRow); trainingList.Add(trainingDataTable); } results["Prediction (training)"].Value = trainingList.AsReadOnly(); var models = new VariableCollection(); foreach (var tup in targetVars.Zip(trees, Tuple.Create)) { var targetVarName = tup.Item1; var tree = tup.Item2; var origTreeVar = new HeuristicLab.Core.Variable(targetVarName + "(original)"); origTreeVar.Value = (ISymbolicExpressionTree)tree.Clone(); models.Add(origTreeVar); } results["Models"].Value = models; } else { var optTheta = ((DoubleArray)bestIndividualAndQuality.Item1["OptTheta"]).ToArray(); // see evaluate var trainingPrediction = Integrate( trees, // we assume trees contain expressions for the change of each target variable over time dy/dt problemData.Dataset, problemData.AllowedInputVariables.ToArray(), targetVars, latentVariables, TrainingEpisodes, optTheta, OdeSolver, NumericIntegrationSteps).ToArray(); // for target values and latent variables var trainingRows = TrainingEpisodes.SelectMany(e => Enumerable.Range(e.Start, e.End - e.Start)); for (int colIdx = 0; colIdx < trees.Length; colIdx++) { // is target variable if (colIdx < targetVars.Length) { var targetVar = targetVars[colIdx]; var trainingDataTable = new DataTable(targetVar + " prediction (training)"); var actualValuesRow = new DataRow(targetVar, "The values of " + targetVar, problemData.Dataset.GetDoubleValues(targetVar, trainingRows)); var predictedValuesRow = new DataRow(targetVar + " pred.", "Predicted values for " + targetVar, trainingPrediction.Select(arr => arr[colIdx].Item1).ToArray()); trainingDataTable.Rows.Add(actualValuesRow); trainingDataTable.Rows.Add(predictedValuesRow); trainingList.Add(trainingDataTable); } else { var latentVar = latentVariables[colIdx - targetVars.Length]; var trainingDataTable = new DataTable(latentVar + " prediction (training)"); var predictedValuesRow = new DataRow(latentVar + " pred.", "Predicted values for " + latentVar, trainingPrediction.Select(arr => arr[colIdx].Item1).ToArray()); var emptyRow = new DataRow(latentVar); trainingDataTable.Rows.Add(emptyRow); trainingDataTable.Rows.Add(predictedValuesRow); trainingList.Add(trainingDataTable); } } // TODO: DRY for training and test var testList = new ItemList(); var testRows = ProblemData.TestIndices.ToArray(); var testPrediction = Integrate( trees, // we assume trees contain expressions for the change of each target variable over time y'(t) problemData.Dataset, problemData.AllowedInputVariables.ToArray(), targetVars, latentVariables, new IntRange[] { ProblemData.TestPartition }, optTheta, OdeSolver, NumericIntegrationSteps).ToArray(); for (int colIdx = 0; colIdx < trees.Length; colIdx++) { // is target variable if (colIdx < targetVars.Length) { var targetVar = targetVars[colIdx]; var testDataTable = new DataTable(targetVar + " prediction (test)"); var actualValuesRow = new DataRow(targetVar, "The values of " + targetVar, problemData.Dataset.GetDoubleValues(targetVar, testRows)); var predictedValuesRow = new DataRow(targetVar + " pred.", "Predicted values for " + targetVar, testPrediction.Select(arr => arr[colIdx].Item1).ToArray()); testDataTable.Rows.Add(actualValuesRow); testDataTable.Rows.Add(predictedValuesRow); testList.Add(testDataTable); } else { var latentVar = latentVariables[colIdx - targetVars.Length]; var testDataTable = new DataTable(latentVar + " prediction (test)"); var predictedValuesRow = new DataRow(latentVar + " pred.", "Predicted values for " + latentVar, testPrediction.Select(arr => arr[colIdx].Item1).ToArray()); var emptyRow = new DataRow(latentVar); testDataTable.Rows.Add(emptyRow); testDataTable.Rows.Add(predictedValuesRow); testList.Add(testDataTable); } } results["Prediction (training)"].Value = trainingList.AsReadOnly(); results["Prediction (test)"].Value = testList.AsReadOnly(); #region simplification of models // TODO the dependency of HeuristicLab.Problems.DataAnalysis.Symbolic is not ideal var models = new VariableCollection(); // to store target var names and original version of tree int nextParIdx = 0; for (int idx = 0; idx < trees.Length; idx++) { var varName = string.Empty; if (idx < targetVars.Length) { varName = targetVars[idx]; } else { varName = latentVariables[idx - targetVars.Length]; } var tree = trees[idx]; // when we reference HeuristicLab.Problems.DataAnalysis.Symbolic we can translate symbols var shownTree = new SymbolicExpressionTree(TranslateTreeNode(tree.Root, optTheta.ToArray(), ref nextParIdx)); // var shownTree = (SymbolicExpressionTree)tree.Clone(); // var constantsNodeOrig = tree.IterateNodesPrefix().Where(IsConstantNode); // var constantsNodeShown = shownTree.IterateNodesPrefix().Where(IsConstantNode); // // foreach (var n in constantsNodeOrig.Zip(constantsNodeShown, (original, shown) => new { original, shown })) { // double constantsVal = optTheta[nodeIdx[n.original]]; // // ConstantTreeNode replacementNode = new ConstantTreeNode(new Constant()) { Value = constantsVal }; // // var parentNode = n.shown.Parent; // int replacementIndex = parentNode.IndexOfSubtree(n.shown); // parentNode.RemoveSubtree(replacementIndex); // parentNode.InsertSubtree(replacementIndex, replacementNode); // } var origTreeVar = new HeuristicLab.Core.Variable(varName + "(original)"); origTreeVar.Value = (ISymbolicExpressionTree)tree.Clone(); models.Add(origTreeVar); var simplifiedTreeVar = new HeuristicLab.Core.Variable(varName + "(simplified)"); simplifiedTreeVar.Value = TreeSimplifier.Simplify(shownTree); models.Add(simplifiedTreeVar); } results["Models"].Value = models; #endregion } } #region interpretation // the following uses auto-diff to calculate the gradient w.r.t. the parameters forward in time. // this is basically the method described in Gronwall T. Note on the derivatives with respect to a parameter of the solutions of a system of differential equations. Ann. Math. 1919;20:292–296. // a comparison of three potential calculation methods for the gradient is given in: // Sengupta, B., Friston, K. J., & Penny, W. D. (2014). Efficient gradient computation for dynamical models. Neuroimage, 98(100), 521–527. http://doi.org/10.1016/j.neuroimage.2014.04.040 // "Our comparison establishes that the adjoint method is computationally more efficient for numerical estimation of parametric gradients // for state-space models — both linear and non-linear, as in the case of a dynamical causal model (DCM)" // for a solver with the necessary features see: https://computation.llnl.gov/projects/sundials/cvodes private static IEnumerable[]> Integrate( ISymbolicExpressionTree[] trees, IDataset dataset, string[] inputVariables, string[] targetVariables, string[] latentVariables, IEnumerable episodes, double[] parameterValues, string odeSolver, int numericIntegrationSteps = 100) { // TODO: numericIntegrationSteps is only relevant for the HeuristicLab solver var episodeIdx = 0; foreach (var episode in episodes) { var rows = Enumerable.Range(episode.Start, episode.End - episode.Start); // integrate forward starting with known values for the target in t0 var variableValues = new Dictionary>(); var t0 = rows.First(); foreach (var varName in inputVariables) { variableValues.Add(varName, Tuple.Create(dataset.GetDoubleValue(varName, t0), Vector.Zero)); } foreach (var varName in targetVariables) { variableValues.Add(varName, Tuple.Create(dataset.GetDoubleValue(varName, t0), Vector.Zero)); } // add value entries for latent variables which are also integrated // initial values are at the end of the parameter vector // separete initial values for each episode var initialValueIdx = parameterValues.Length - episodes.Count() * latentVariables.Length + episodeIdx * latentVariables.Length; foreach (var latentVar in latentVariables) { var arr = new double[parameterValues.Length]; // backing array arr[initialValueIdx] = 1.0; var g = new Vector(arr); variableValues.Add(latentVar, Tuple.Create(parameterValues[initialValueIdx], g)); // we don't have observations for latent variables therefore we optimize the initial value for each episode initialValueIdx++; } var calculatedVariables = targetVariables.Concat(latentVariables).ToArray(); // TODO: must conincide with the order of trees in the encoding // return first value as stored in the dataset yield return calculatedVariables .Select(calcVarName => variableValues[calcVarName]) .ToArray(); var prevT = rows.First(); // TODO: here we should use a variable for t if it is available. Right now we assume equidistant measurements. foreach (var t in rows.Skip(1)) { if (odeSolver == "HeuristicLab") IntegrateHL(trees, calculatedVariables, variableValues, parameterValues, numericIntegrationSteps); else if (odeSolver == "CVODES") IntegrateCVODES(trees, calculatedVariables, variableValues, parameterValues, t - prevT); else throw new InvalidOperationException("Unknown ODE solver " + odeSolver); prevT = t; // This check doesn't work with the HeuristicLab integrator if there are input variables //if (variableValues.Count == targetVariables.Length) { // only return the target variables for calculation of errors var res = calculatedVariables .Select(targetVar => variableValues[targetVar]) .ToArray(); if (res.Any(ri => double.IsNaN(ri.Item1) || double.IsInfinity(ri.Item1))) yield break; yield return res; //} else { // yield break; // stop early on problems //} // update for next time step foreach (var varName in inputVariables) { variableValues[varName] = Tuple.Create(dataset.GetDoubleValue(varName, t), Vector.Zero); } } episodeIdx++; } } #region CVODES /// /// Here we use CVODES to solve the ODE. Forward sensitivities are used to calculate the gradient for parameter optimization /// /// Each equation in the ODE represented as a tree /// The names of the calculated variables /// The start values of the calculated variables as well as their sensitivites over parameters /// The current parameter values /// The time t up to which we need to integrate. private static void IntegrateCVODES( ISymbolicExpressionTree[] trees, // f(y,p) in tree representation string[] calculatedVariables, // names of elements of y Dictionary> variableValues, // y (intput and output) input: y(t0), output: y(t0+t) double[] parameterValues, // p double t // duration t for which we want to integrate ) { // the RHS of the ODE // dy/dt = f(y_t,x_t,p) CVODES.CVRhsFunc f = CreateOdeRhs(trees, calculatedVariables, parameterValues); // the Jacobian ∂f/∂y CVODES.CVDlsJacFunc jac = CreateJac(trees, calculatedVariables, parameterValues); // the RHS for the forward sensitivities (∂f/∂y)s_i(t) + ∂f/∂p_i CVODES.CVSensRhsFn sensF = CreateSensitivityRhs(trees, calculatedVariables, parameterValues); // setup solver int numberOfEquations = trees.Length; IntPtr y = IntPtr.Zero; IntPtr cvode_mem = IntPtr.Zero; IntPtr A = IntPtr.Zero; IntPtr yS0 = IntPtr.Zero; IntPtr linearSolver = IntPtr.Zero; var ns = parameterValues.Length; // number of parameters try { y = CVODES.N_VNew_Serial(numberOfEquations); // init y to current values of variables // y must be initialized before calling CVodeInit for (int i = 0; i < calculatedVariables.Length; i++) { CVODES.NV_Set_Ith_S(y, i, variableValues[calculatedVariables[i]].Item1); } cvode_mem = CVODES.CVodeCreate(CVODES.MultistepMethod.CV_ADAMS, CVODES.NonlinearSolverIteration.CV_FUNCTIONAL); var flag = CVODES.CVodeInit(cvode_mem, f, 0.0, y); Debug.Assert(CVODES.CV_SUCCESS == flag); double relTol = 1.0e-2; double absTol = 1.0; flag = CVODES.CVodeSStolerances(cvode_mem, relTol, absTol); // TODO: probably need to adjust absTol per variable Debug.Assert(CVODES.CV_SUCCESS == flag); A = CVODES.SUNDenseMatrix(numberOfEquations, numberOfEquations); Debug.Assert(A != IntPtr.Zero); linearSolver = CVODES.SUNDenseLinearSolver(y, A); Debug.Assert(linearSolver != IntPtr.Zero); flag = CVODES.CVDlsSetLinearSolver(cvode_mem, linearSolver, A); Debug.Assert(CVODES.CV_SUCCESS == flag); flag = CVODES.CVDlsSetJacFn(cvode_mem, jac); Debug.Assert(CVODES.CV_SUCCESS == flag); yS0 = CVODES.N_VCloneVectorArray_Serial(ns, y); // clone the output vector for each parameter unsafe { // set to initial sensitivities supplied by caller for (int pIdx = 0; pIdx < ns; pIdx++) { var yS0_i = *((IntPtr*)yS0.ToPointer() + pIdx); for (var varIdx = 0; varIdx < calculatedVariables.Length; varIdx++) { CVODES.NV_Set_Ith_S(yS0_i, varIdx, variableValues[calculatedVariables[varIdx]].Item2[pIdx]); } } } flag = CVODES.CVodeSensInit(cvode_mem, ns, CVODES.CV_SIMULTANEOUS, sensF, yS0); Debug.Assert(CVODES.CV_SUCCESS == flag); flag = CVODES.CVodeSensEEtolerances(cvode_mem); Debug.Assert(CVODES.CV_SUCCESS == flag); // make one forward integration step double tout = 0.0; // first output time flag = CVODES.CVode(cvode_mem, t, y, ref tout, CVODES.CV_NORMAL); if (flag == CVODES.CV_SUCCESS) { Debug.Assert(t == tout); // get sensitivities flag = CVODES.CVodeGetSens(cvode_mem, ref tout, yS0); Debug.Assert(CVODES.CV_SUCCESS == flag); // update variableValues based on integration results for (int varIdx = 0; varIdx < calculatedVariables.Length; varIdx++) { var yi = CVODES.NV_Get_Ith_S(y, varIdx); var gArr = new double[parameterValues.Length]; for (var pIdx = 0; pIdx < parameterValues.Length; pIdx++) { unsafe { var yS0_pi = *((IntPtr*)yS0.ToPointer() + pIdx); gArr[pIdx] = CVODES.NV_Get_Ith_S(yS0_pi, varIdx); } } variableValues[calculatedVariables[varIdx]] = Tuple.Create(yi, new Vector(gArr)); } } else { variableValues.Clear(); // indicate problems by not returning new values } // cleanup all allocated objects } finally { if (y != IntPtr.Zero) CVODES.N_VDestroy_Serial(y); if (cvode_mem != IntPtr.Zero) CVODES.CVodeFree(ref cvode_mem); if (linearSolver != IntPtr.Zero) CVODES.SUNLinSolFree(linearSolver); if (A != IntPtr.Zero) CVODES.SUNMatDestroy(A); if (yS0 != IntPtr.Zero) CVODES.N_VDestroyVectorArray_Serial(yS0, ns); } } private static CVODES.CVRhsFunc CreateOdeRhs( ISymbolicExpressionTree[] trees, string[] calculatedVariables, double[] parameterValues) { // we don't need to calculate a gradient here return (double t, IntPtr y, // N_Vector, current value of y (input) IntPtr ydot, // N_Vector (calculated value of y' (output) IntPtr user_data // optional user data, (unused here) ) => { // TODO: perf var nodeValues = new Dictionary>(); int pIdx = 0; foreach (var tree in trees) { foreach (var n in tree.IterateNodesPrefix()) { if (IsConstantNode(n)) { nodeValues.Add(n, Tuple.Create(parameterValues[pIdx], Vector.Zero)); // here we do not need a gradient pIdx++; } else if (n.SubtreeCount == 0) { // for variables and latent variables get the value from variableValues var varName = n.Symbol.Name; var varIdx = Array.IndexOf(calculatedVariables, varName); // TODO: perf! if (varIdx < 0) throw new InvalidProgramException(); var y_i = CVODES.NV_Get_Ith_S(y, (long)varIdx); nodeValues.Add(n, Tuple.Create(y_i, Vector.Zero)); // no gradient needed } } } for (int i = 0; i < trees.Length; i++) { var tree = trees[i]; var res_i = InterpretRec(tree.Root.GetSubtree(0).GetSubtree(0), nodeValues); CVODES.NV_Set_Ith_S(ydot, i, res_i.Item1); } return 0; }; } private static CVODES.CVDlsJacFunc CreateJac( ISymbolicExpressionTree[] trees, string[] calculatedVariables, double[] parameterValues) { return ( double t, // current time (input) IntPtr y, // N_Vector, current value of y (input) IntPtr fy, // N_Vector, current value of f (input) IntPtr Jac, // SUNMatrix ∂f/∂y (output, rows i contains are ∂f_i/∂y vector) IntPtr user_data, // optional (unused here) IntPtr tmp1, // N_Vector, optional (unused here) IntPtr tmp2, // N_Vector, optional (unused here) IntPtr tmp3 // N_Vector, optional (unused here) ) => { // here we need to calculate partial derivatives for the calculated variables y var nodeValues = new Dictionary>(); int pIdx = 0; foreach (var tree in trees) { foreach (var n in tree.IterateNodesPrefix()) { if (IsConstantNode(n)) { nodeValues.Add(n, Tuple.Create(parameterValues[pIdx], Vector.Zero)); // here we need a gradient over y which is zero for parameters pIdx++; } else if (n.SubtreeCount == 0) { // for variables and latent variables we use values supplied in y and init gradient vectors accordingly var varName = n.Symbol.Name; var varIdx = Array.IndexOf(calculatedVariables, varName); // TODO: perf! if (varIdx < 0) throw new InvalidProgramException(); var y_i = CVODES.NV_Get_Ith_S(y, (long)varIdx); var gArr = new double[CVODES.NV_LENGTH_S(y)]; // backing array gArr[varIdx] = 1.0; var g = new Vector(gArr); nodeValues.Add(n, Tuple.Create(y_i, g)); } } } for (int i = 0; i < trees.Length; i++) { var tree = trees[i]; var res = InterpretRec(tree.Root.GetSubtree(0).GetSubtree(0), nodeValues); var g = res.Item2; for (int j = 0; j < calculatedVariables.Length; j++) { CVODES.SUNDenseMatrix_Set(Jac, i, j, g[j]); } } return 0; // on success }; } // to calculate sensitivities RHS for all equations at once // must compute (∂f/∂y)s_i(t) + ∂f/∂p_i and store in ySdot. // Index i refers to parameters, dimensionality of matrix and vectors is number of equations private static CVODES.CVSensRhsFn CreateSensitivityRhs(ISymbolicExpressionTree[] trees, string[] calculatedVariables, double[] parameterValues) { return ( int Ns, // number of parameters double t, // current time IntPtr y, // N_Vector y(t) (input) IntPtr ydot, // N_Vector dy/dt(t) (input) IntPtr yS, // N_Vector*, one vector for each parameter (input) IntPtr ySdot, // N_Vector*, one vector for each parameter (output) IntPtr user_data, // optional (unused here) IntPtr tmp1, // N_Vector, optional (unused here) IntPtr tmp2 // N_Vector, optional (unused here) ) => { // here we need to calculate partial derivatives for the calculated variables y as well as for the parameters var nodeValues = new Dictionary>(); var d = calculatedVariables.Length + parameterValues.Length; // dimensionality of gradient // first collect variable values foreach (var tree in trees) { foreach (var n in tree.IterateNodesPrefix()) { if (IsVariableNode(n)) { // for variables and latent variables we use values supplied in y and init gradient vectors accordingly var varName = n.Symbol.Name; var varIdx = Array.IndexOf(calculatedVariables, varName); // TODO: perf! if (varIdx < 0) throw new InvalidProgramException(); var y_i = CVODES.NV_Get_Ith_S(y, (long)varIdx); var gArr = new double[d]; // backing array gArr[varIdx] = 1.0; var g = new Vector(gArr); nodeValues.Add(n, Tuple.Create(y_i, g)); } } } // then collect constants int pIdx = 0; foreach (var tree in trees) { foreach (var n in tree.IterateNodesPrefix()) { if (IsConstantNode(n)) { var gArr = new double[d]; gArr[calculatedVariables.Length + pIdx] = 1.0; var g = new Vector(gArr); nodeValues.Add(n, Tuple.Create(parameterValues[pIdx], g)); pIdx++; } } } // gradient vector is [∂f/∂y_1, ∂f/∂y_2, ... ∂f/∂yN, ∂f/∂p_1 ... ∂f/∂p_K] for (pIdx = 0; pIdx < Ns; pIdx++) { unsafe { var sDot_pi = *((IntPtr*)ySdot.ToPointer() + pIdx); CVODES.N_VConst_Serial(0.0, sDot_pi); } } for (int i = 0; i < trees.Length; i++) { var tree = trees[i]; var res = InterpretRec(tree.Root.GetSubtree(0).GetSubtree(0), nodeValues); var g = res.Item2; // update ySdot = (∂f/∂y)s_i(t) + ∂f/∂p_i for (pIdx = 0; pIdx < Ns; pIdx++) { unsafe { var sDot_pi = *((IntPtr*)ySdot.ToPointer() + pIdx); var s_pi = *((IntPtr*)yS.ToPointer() + pIdx); var v = CVODES.NV_Get_Ith_S(sDot_pi, i); // (∂f/∂y)s_i(t) var p = 0.0; for (int yIdx = 0; yIdx < calculatedVariables.Length; yIdx++) { p += g[yIdx] * CVODES.NV_Get_Ith_S(s_pi, yIdx); } // + ∂f/∂p_i CVODES.NV_Set_Ith_S(sDot_pi, i, v + p + g[calculatedVariables.Length + pIdx]); } } } return 0; // on success }; } #endregion private static void IntegrateHL( ISymbolicExpressionTree[] trees, string[] calculatedVariables, // names of integrated variables Dictionary> variableValues, //y (intput and output) input: y(t0), output: y(t0+1) double[] parameterValues, int numericIntegrationSteps) { var nodeValues = new Dictionary>(); // the nodeValues for parameters are constant over time // TODO: this needs to be done only once for each iteration in gradient descent (whenever parameter values change) // NOTE: the order must be the same as above (prefix order for nodes) int pIdx = 0; foreach (var tree in trees) { foreach (var node in tree.Root.IterateNodesPrefix()) { if (IsConstantNode(node)) { var gArr = new double[parameterValues.Length]; // backing array gArr[pIdx] = 1.0; var g = new Vector(gArr); nodeValues.Add(node, new Tuple(parameterValues[pIdx], g)); pIdx++; } else if (node.SubtreeCount == 0) { // for (latent) variables get the values from variableValues var varName = node.Symbol.Name; nodeValues.Add(node, variableValues[varName]); } } } double h = 1.0 / numericIntegrationSteps; for (int step = 0; step < numericIntegrationSteps; step++) { var deltaValues = new Dictionary>(); for (int i = 0; i < trees.Length; i++) { var tree = trees[i]; var targetVarName = calculatedVariables[i]; // Root.GetSubtree(0).GetSubtree(0) skips programRoot and startSymbol var res = InterpretRec(tree.Root.GetSubtree(0).GetSubtree(0), nodeValues); deltaValues.Add(targetVarName, res); } // update variableValues for next step, trapezoid integration foreach (var kvp in deltaValues) { var oldVal = variableValues[kvp.Key]; var newVal = Tuple.Create( oldVal.Item1 + h * kvp.Value.Item1, oldVal.Item2 + h * kvp.Value.Item2 ); variableValues[kvp.Key] = newVal; } foreach(var node in nodeValues.Keys.ToArray()) { if(node.SubtreeCount == 0 && !IsConstantNode(node)) { // update values for (latent) variables var varName = node.Symbol.Name; nodeValues[node] = variableValues[varName]; } } } } private static Tuple InterpretRec( ISymbolicExpressionTreeNode node, Dictionary> nodeValues // contains value and gradient vector for a node (variables and constants only) ) { switch (node.Symbol.Name) { case "+": { var l = InterpretRec(node.GetSubtree(0), nodeValues); var r = InterpretRec(node.GetSubtree(1), nodeValues); return Tuple.Create(l.Item1 + r.Item1, l.Item2 + r.Item2); } case "*": { var l = InterpretRec(node.GetSubtree(0), nodeValues); var r = InterpretRec(node.GetSubtree(1), nodeValues); return Tuple.Create(l.Item1 * r.Item1, l.Item2 * r.Item1 + l.Item1 * r.Item2); } case "-": { var l = InterpretRec(node.GetSubtree(0), nodeValues); var r = InterpretRec(node.GetSubtree(1), nodeValues); return Tuple.Create(l.Item1 - r.Item1, l.Item2 - r.Item2); } case "%": { var l = InterpretRec(node.GetSubtree(0), nodeValues); var r = InterpretRec(node.GetSubtree(1), nodeValues); // protected division if (r.Item1.IsAlmost(0.0)) { return Tuple.Create(0.0, Vector.Zero); } else { return Tuple.Create( l.Item1 / r.Item1, l.Item1 * -1.0 / (r.Item1 * r.Item1) * r.Item2 + 1.0 / r.Item1 * l.Item2 // (f/g)' = f * (1/g)' + 1/g * f' = f * -1/g² * g' + 1/g * f' ); } } case "sin": { var x = InterpretRec(node.GetSubtree(0), nodeValues); return Tuple.Create( Math.Sin(x.Item1), Vector.Cos(x.Item2) * x.Item2 ); } case "cos": { var x = InterpretRec(node.GetSubtree(0), nodeValues); return Tuple.Create( Math.Cos(x.Item1), -Vector.Sin(x.Item2) * x.Item2 ); } case "sqr": { var x = InterpretRec(node.GetSubtree(0), nodeValues); return Tuple.Create( x.Item1 * x.Item1, 2.0 * x.Item1 * x.Item2 ); } default: { return nodeValues[node]; // value and gradient for constants and variables must be set by the caller } } } #endregion #region events /* * Dependencies between parameters: * * ProblemData * | * V * TargetVariables FunctionSet MaximumLength NumberOfLatentVariables * | | | | * V V | | * Grammar <---------------+------------------- * | * V * Encoding */ private void RegisterEventHandlers() { ProblemDataParameter.ValueChanged += ProblemDataParameter_ValueChanged; if (ProblemDataParameter.Value != null) ProblemDataParameter.Value.Changed += ProblemData_Changed; TargetVariablesParameter.ValueChanged += TargetVariablesParameter_ValueChanged; if (TargetVariablesParameter.Value != null) TargetVariablesParameter.Value.CheckedItemsChanged += CheckedTargetVariablesChanged; FunctionSetParameter.ValueChanged += FunctionSetParameter_ValueChanged; if (FunctionSetParameter.Value != null) FunctionSetParameter.Value.CheckedItemsChanged += CheckedFunctionsChanged; MaximumLengthParameter.Value.ValueChanged += MaximumLengthChanged; NumberOfLatentVariablesParameter.Value.ValueChanged += NumLatentVariablesChanged; } private void NumLatentVariablesChanged(object sender, EventArgs e) { UpdateGrammarAndEncoding(); } private void MaximumLengthChanged(object sender, EventArgs e) { UpdateGrammarAndEncoding(); } private void FunctionSetParameter_ValueChanged(object sender, EventArgs e) { FunctionSetParameter.Value.CheckedItemsChanged += CheckedFunctionsChanged; } private void CheckedFunctionsChanged(object sender, CollectionItemsChangedEventArgs> e) { UpdateGrammarAndEncoding(); } private void TargetVariablesParameter_ValueChanged(object sender, EventArgs e) { TargetVariablesParameter.Value.CheckedItemsChanged += CheckedTargetVariablesChanged; } private void CheckedTargetVariablesChanged(object sender, CollectionItemsChangedEventArgs> e) { UpdateGrammarAndEncoding(); } private void ProblemDataParameter_ValueChanged(object sender, EventArgs e) { ProblemDataParameter.Value.Changed += ProblemData_Changed; OnProblemDataChanged(); OnReset(); } private void ProblemData_Changed(object sender, EventArgs e) { OnProblemDataChanged(); OnReset(); } private void OnProblemDataChanged() { UpdateTargetVariables(); // implicitly updates other dependent parameters var handler = ProblemDataChanged; if (handler != null) handler(this, EventArgs.Empty); } #endregion #region helper private void InitAllParameters() { UpdateTargetVariables(); // implicitly updates the grammar and the encoding } private ReadOnlyCheckedItemList CreateFunctionSet() { var l = new CheckedItemList(); l.Add(new StringValue("+").AsReadOnly()); l.Add(new StringValue("*").AsReadOnly()); l.Add(new StringValue("%").AsReadOnly()); l.Add(new StringValue("-").AsReadOnly()); l.Add(new StringValue("sin").AsReadOnly()); l.Add(new StringValue("cos").AsReadOnly()); l.Add(new StringValue("sqr").AsReadOnly()); return l.AsReadOnly(); } private static bool IsConstantNode(ISymbolicExpressionTreeNode n) { return n.Symbol.Name.StartsWith("θ"); } private static bool IsLatentVariableNode(ISymbolicExpressionTreeNode n) { return n.Symbol.Name.StartsWith("λ"); } private static bool IsVariableNode(ISymbolicExpressionTreeNode n) { return (n.SubtreeCount == 0) && !IsConstantNode(n) && !IsLatentVariableNode(n); } private void UpdateTargetVariables() { var currentlySelectedVariables = TargetVariables.CheckedItems .OrderBy(i => i.Index) .Select(i => i.Value.Value) .ToArray(); var newVariablesList = new CheckedItemList(ProblemData.Dataset.VariableNames.Select(str => new StringValue(str).AsReadOnly()).ToArray()).AsReadOnly(); var matchingItems = newVariablesList.Where(item => currentlySelectedVariables.Contains(item.Value)).ToArray(); foreach (var matchingItem in matchingItems) { newVariablesList.SetItemCheckedState(matchingItem, true); } TargetVariablesParameter.Value = newVariablesList; } private void UpdateGrammarAndEncoding() { var encoding = new MultiEncoding(); var g = CreateGrammar(); foreach (var targetVar in TargetVariables.CheckedItems) { encoding = encoding.Add(new SymbolicExpressionTreeEncoding(targetVar + "_tree", g, MaximumLength, MaximumLength)); // only limit by length } for (int i = 1; i <= NumberOfLatentVariables; i++) { encoding = encoding.Add(new SymbolicExpressionTreeEncoding("λ" + i + "_tree", g, MaximumLength, MaximumLength)); } Encoding = encoding; } private ISymbolicExpressionGrammar CreateGrammar() { // whenever ProblemData is changed we create a new grammar with the necessary symbols var g = new SimpleSymbolicExpressionGrammar(); g.AddSymbols(FunctionSet.CheckedItems.OrderBy(i => i.Index).Select(i => i.Value.Value).ToArray(), 2, 2); // TODO //g.AddSymbols(new[] { // "exp", // "log", // log( ) // TODO: init a theta to ensure the value is always positive // "exp_minus" // exp((-1) * //}, 1, 1); foreach (var variableName in ProblemData.AllowedInputVariables.Union(TargetVariables.CheckedItems.Select(i => i.Value.Value))) g.AddTerminalSymbol(variableName); // generate symbols for numeric parameters for which the value is optimized using AutoDiff // we generate multiple symbols to balance the probability for selecting a numeric parameter in the generation of random trees var numericConstantsFactor = 2.0; for (int i = 0; i < numericConstantsFactor * (ProblemData.AllowedInputVariables.Count() + TargetVariables.CheckedItems.Count()); i++) { g.AddTerminalSymbol("θ" + i); // numeric parameter for which the value is optimized using AutoDiff } // generate symbols for latent variables for (int i = 1; i <= NumberOfLatentVariables; i++) { g.AddTerminalSymbol("λ" + i); // numeric parameter for which the value is optimized using AutoDiff } return g; } private ISymbolicExpressionTreeNode TranslateTreeNode(ISymbolicExpressionTreeNode n, double[] parameterValues, ref int nextParIdx) { ISymbolicExpressionTreeNode translatedNode = null; if (n.Symbol is StartSymbol) { translatedNode = new StartSymbol().CreateTreeNode(); } else if (n.Symbol is ProgramRootSymbol) { translatedNode = new ProgramRootSymbol().CreateTreeNode(); } else if (n.Symbol.Name == "+") { translatedNode = new Addition().CreateTreeNode(); } else if (n.Symbol.Name == "-") { translatedNode = new Subtraction().CreateTreeNode(); } else if (n.Symbol.Name == "*") { translatedNode = new Multiplication().CreateTreeNode(); } else if (n.Symbol.Name == "%") { translatedNode = new Division().CreateTreeNode(); } else if (n.Symbol.Name == "sin") { translatedNode = new Sine().CreateTreeNode(); } else if (n.Symbol.Name == "cos") { translatedNode = new Cosine().CreateTreeNode(); } else if (n.Symbol.Name == "sqr") { translatedNode = new Square().CreateTreeNode(); } else if (IsConstantNode(n)) { var constNode = (ConstantTreeNode)new Constant().CreateTreeNode(); constNode.Value = parameterValues[nextParIdx]; nextParIdx++; translatedNode = constNode; } else { // assume a variable name var varName = n.Symbol.Name; var varNode = (VariableTreeNode)new Variable().CreateTreeNode(); varNode.Weight = 1.0; varNode.VariableName = varName; translatedNode = varNode; } foreach (var child in n.Subtrees) { translatedNode.AddSubtree(TranslateTreeNode(child, parameterValues, ref nextParIdx)); } return translatedNode; } #endregion #region Import & Export public void Load(IRegressionProblemData data) { Name = data.Name; Description = data.Description; ProblemData = data; } public IRegressionProblemData Export() { return ProblemData; } #endregion } }