#region License Information /* HeuristicLab * Copyright (C) 2002-2018 Heuristic and Evolutionary Algorithms Laboratory (HEAL) * * This file is part of HeuristicLab. * * HeuristicLab is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation, either version 3 of the License, or * (at your option) any later version. * * HeuristicLab is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with HeuristicLab. If not, see . */ #endregion using System; using System.Collections.Generic; using System.Diagnostics; using System.Linq; using HeuristicLab.Common; using HeuristicLab.Core; using HeuristicLab.Encodings.SymbolicExpressionTreeEncoding; using HeuristicLab.Parameters; using HeuristicLab.Persistence.Default.CompositeSerializers.Storable; using HeuristicLab.Problems.DataAnalysis; using HeuristicLab.Problems.Instances; namespace HeuristicLab.Problems.DynamicalSystemsModelling { public class Vector { public readonly static Vector Zero = new Vector(new double[0]); public static Vector operator +(Vector a, Vector b) { if (a == Zero) return b; if (b == Zero) return a; Debug.Assert(a.arr.Length == b.arr.Length); var res = new double[a.arr.Length]; for (int i = 0; i < res.Length; i++) res[i] = a.arr[i] + b.arr[i]; return new Vector(res); } public static Vector operator -(Vector a, Vector b) { if (b == Zero) return a; if (a == Zero) return -b; Debug.Assert(a.arr.Length == b.arr.Length); var res = new double[a.arr.Length]; for (int i = 0; i < res.Length; i++) res[i] = a.arr[i] - b.arr[i]; return new Vector(res); } public static Vector operator -(Vector v) { if (v == Zero) return Zero; for (int i = 0; i < v.arr.Length; i++) v.arr[i] = -v.arr[i]; return v; } public static Vector operator *(double s, Vector v) { if (v == Zero) return Zero; if (s == 0.0) return Zero; var res = new double[v.arr.Length]; for (int i = 0; i < res.Length; i++) res[i] = s * v.arr[i]; return new Vector(res); } public static Vector operator *(Vector v, double s) { return s * v; } public static Vector operator /(double s, Vector v) { if (s == 0.0) return Zero; if (v == Zero) throw new ArgumentException("Division by zero vector"); var res = new double[v.arr.Length]; for (int i = 0; i < res.Length; i++) res[i] = 1.0 / v.arr[i]; return new Vector(res); } public static Vector operator /(Vector v, double s) { return v * 1.0 / s; } private readonly double[] arr; // backing array; public Vector(double[] v) { this.arr = v; } public void CopyTo(double[] target) { Debug.Assert(arr.Length <= target.Length); Array.Copy(arr, target, arr.Length); } } [Item("Dynamical Systems Modelling Problem", "TODO")] [Creatable(CreatableAttribute.Categories.GeneticProgrammingProblems, Priority = 900)] [StorableClass] public sealed class Problem : SymbolicExpressionTreeProblem, IRegressionProblem, IProblemInstanceConsumer, IProblemInstanceExporter { #region parameter names private const string ProblemDataParameterName = "ProblemData"; #endregion #region Parameter Properties IParameter IDataAnalysisProblem.ProblemDataParameter { get { return ProblemDataParameter; } } public IValueParameter ProblemDataParameter { get { return (IValueParameter)Parameters[ProblemDataParameterName]; } } #endregion #region Properties public IRegressionProblemData ProblemData { get { return ProblemDataParameter.Value; } set { ProblemDataParameter.Value = value; } } IDataAnalysisProblemData IDataAnalysisProblem.ProblemData { get { return ProblemData; } } #endregion public event EventHandler ProblemDataChanged; public override bool Maximization { get { return false; } // we minimize NMSE } #region item cloning and persistence // persistence [StorableConstructor] private Problem(bool deserializing) : base(deserializing) { } [StorableHook(HookType.AfterDeserialization)] private void AfterDeserialization() { RegisterEventHandlers(); } // cloning private Problem(Problem original, Cloner cloner) : base(original, cloner) { RegisterEventHandlers(); } public override IDeepCloneable Clone(Cloner cloner) { return new Problem(this, cloner); } #endregion public Problem() : base() { Parameters.Add(new ValueParameter(ProblemDataParameterName, "The data captured from the dynamical system", new RegressionProblemData())); // TODO: support multiple target variables var g = new SimpleSymbolicExpressionGrammar(); // empty grammar is replaced in UpdateGrammar() base.Encoding = new SymbolicExpressionTreeEncoding(g, 10, 5); // small for testing UpdateGrammar(); RegisterEventHandlers(); } public override double Evaluate(ISymbolicExpressionTree tree, IRandom random) { var problemData = ProblemData; var rows = ProblemData.TrainingIndices.ToArray(); var target = problemData.Dataset.GetDoubleValues(problemData.TargetVariable, rows); var nodeIdx = new Dictionary(); foreach(var node in tree.Root.IterateNodesPrefix().Where(n => IsConstantNode(n))) { nodeIdx.Add(node, nodeIdx.Count); } var theta = nodeIdx.Select(_ => random.NextDouble() * 2.0 - 1.0).ToArray(); // init params randomly from Unif(-1,1) double[] optTheta = new double[0]; if (theta.Length > 0) { alglib.minlbfgsstate state; alglib.minlbfgsreport report; alglib.minlbfgscreate(Math.Min(theta.Length, 5), theta, out state); alglib.minlbfgssetcond(state, 0.0, 0.0, 0.0, 100); alglib.minlbfgsoptimize(state, EvaluateObjectiveAndGradient, null, new object[] { tree, problemData, nodeIdx }); alglib.minlbfgsresults(state, out optTheta, out report); /* * * L-BFGS algorithm results INPUT PARAMETERS: State - algorithm state OUTPUT PARAMETERS: X - array[0..N-1], solution Rep - optimization report: * Rep.TerminationType completetion code: * -7 gradient verification failed. See MinLBFGSSetGradientCheck() for more information. * -2 rounding errors prevent further improvement. X contains best point found. * -1 incorrect parameters were specified * 1 relative function improvement is no more than EpsF. * 2 relative step is no more than EpsX. * 4 gradient norm is no more than EpsG * 5 MaxIts steps was taken * 7 stopping conditions are too stringent, further improvement is impossible * Rep.IterationsCount contains iterations count * NFEV countains number of function calculations */ if (report.terminationtype < 0) return double.MaxValue; } // perform evaluation for optimal theta to get quality value double[] grad = new double[optTheta.Length]; double optQuality = double.NaN; EvaluateObjectiveAndGradient(optTheta, ref optQuality, grad, new object[] { tree, problemData, nodeIdx}); if (double.IsNaN(optQuality) || double.IsInfinity(optQuality)) return 10E6; // return a large value (TODO: be consistent by using NMSE) // TODO: write back values return optQuality; } private static void EvaluateObjectiveAndGradient(double[] x, ref double f, double[] grad, object obj) { var tree = (ISymbolicExpressionTree)((object[])obj)[0]; var problemData = (IRegressionProblemData)((object[])obj)[1]; var nodeIdx = (Dictionary)((object[])obj)[2]; var predicted = Integrate( new[] { tree }, // we assume tree contains an expression for the change of the target variable over time y'(t) problemData.Dataset, problemData.AllowedInputVariables.ToArray(), new[] { problemData.TargetVariable }, problemData.TrainingIndices, nodeIdx, x).ToArray(); // objective function is MSE f = 0.0; int n = predicted.Length; double invN = 1.0 / n; var g = Vector.Zero; foreach(var pair in predicted.Zip(problemData.TargetVariableTrainingValues, Tuple.Create)) { var y_pred = pair.Item1; var y = pair.Item2; var res = (y - y_pred.Item1); var ressq = res * res; f += ressq * invN; g += -2.0 * res * y_pred.Item2 * invN; } g.CopyTo(grad); } private static IEnumerable> Integrate( ISymbolicExpressionTree[] trees, IDataset dataset, string[] inputVariables, string[] targetVariables, IEnumerable rows, Dictionary nodeIdx, double[] parameterValues) { int NUM_STEPS = 1; double h = 1.0 / NUM_STEPS; // return first value as stored in the dataset yield return Tuple.Create(dataset.GetDoubleValue(targetVariables.First(), rows.First()), Vector.Zero); // integrate forward starting with known values for the target in t0 var variableValues = new Dictionary>(); var t0 = rows.First(); foreach (var varName in inputVariables) { variableValues.Add(varName, Tuple.Create(dataset.GetDoubleValue(varName, t0), Vector.Zero)); } foreach (var varName in targetVariables) { variableValues.Add(varName, Tuple.Create(dataset.GetDoubleValue(varName, t0), Vector.Zero)); } foreach (var t in rows.Skip(1)) { for (int step = 0; step < NUM_STEPS; step++) { var deltaValues = new Dictionary>(); foreach (var tup in trees.Zip(targetVariables, Tuple.Create)) { var tree = tup.Item1; var targetVarName = tup.Item2; // skip programRoot and startSymbol var res = InterpretRec(tree.Root.GetSubtree(0).GetSubtree(0), variableValues, nodeIdx, parameterValues); deltaValues.Add(targetVarName, res); } // update variableValues for next step foreach (var kvp in deltaValues) { var oldVal = variableValues[kvp.Key]; variableValues[kvp.Key] = Tuple.Create( oldVal.Item1 + h * kvp.Value.Item1, oldVal.Item2 + h * kvp.Value.Item2 ); } } // yield target values foreach (var varName in targetVariables) { yield return variableValues[varName]; } // update for next time step foreach (var varName in inputVariables) { variableValues[varName] = Tuple.Create(dataset.GetDoubleValue(varName, t), Vector.Zero); } } } private static Tuple InterpretRec( ISymbolicExpressionTreeNode node, Dictionary> variableValues, Dictionary nodeIdx, double[] parameterValues ) { switch (node.Symbol.Name) { case "+": { var l = InterpretRec(node.GetSubtree(0), variableValues, nodeIdx, parameterValues); var r = InterpretRec(node.GetSubtree(1), variableValues, nodeIdx, parameterValues); return Tuple.Create(l.Item1 + r.Item1, l.Item2 + r.Item2); } case "*": { var l = InterpretRec(node.GetSubtree(0), variableValues, nodeIdx,parameterValues); var r = InterpretRec(node.GetSubtree(1), variableValues, nodeIdx, parameterValues); return Tuple.Create(l.Item1 * r.Item1, l.Item2 * r.Item1 + l.Item1 * r.Item2); } case "-": { var l = InterpretRec(node.GetSubtree(0), variableValues, nodeIdx,parameterValues); var r = InterpretRec(node.GetSubtree(1), variableValues, nodeIdx, parameterValues); return Tuple.Create(l.Item1 - r.Item1, l.Item2 - r.Item2); } case "%": { var l = InterpretRec(node.GetSubtree(0), variableValues, nodeIdx,parameterValues); var r = InterpretRec(node.GetSubtree(1), variableValues, nodeIdx, parameterValues); // protected division if (r.Item1.IsAlmost(0.0)) { return Tuple.Create(0.0, Vector.Zero); } else { return Tuple.Create( l.Item1 / r.Item1, l.Item1 * -1.0 / (r.Item1 * r.Item1) * r.Item2 + 1.0 / r.Item1 * l.Item2 // (f/g)' = f * (1/g)' + 1/g * f' = f * -1/g² * g' + 1/g * f' ); } } default: { // distinguish other cases if (IsConstantNode(node)) { var vArr = new double[parameterValues.Length]; // backing array for vector vArr[nodeIdx[node]] = 1.0; var g = new Vector(vArr); return Tuple.Create(parameterValues[nodeIdx[node]], g); } else { // assume a variable name var varName = node.Symbol.Name; return variableValues[varName]; } } } } #region events private void RegisterEventHandlers() { ProblemDataParameter.ValueChanged += new EventHandler(ProblemDataParameter_ValueChanged); if (ProblemDataParameter.Value != null) ProblemDataParameter.Value.Changed += new EventHandler(ProblemData_Changed); } private void ProblemDataParameter_ValueChanged(object sender, EventArgs e) { ProblemDataParameter.Value.Changed += new EventHandler(ProblemData_Changed); OnProblemDataChanged(); OnReset(); } private void ProblemData_Changed(object sender, EventArgs e) { OnReset(); } private void OnProblemDataChanged() { UpdateGrammar(); var handler = ProblemDataChanged; if (handler != null) handler(this, EventArgs.Empty); } private void UpdateGrammar() { // whenever ProblemData is changed we create a new grammar with the necessary symbols var g = new SimpleSymbolicExpressionGrammar(); g.AddSymbols(new[] { "+", "*", // "%", // % is protected division 1/0 := 0 // removed for testing "-", }, 2, 2); // TODO //g.AddSymbols(new[] { // "exp", // "log", // log( ) // TODO: init a theta to ensure the value is always positive // "exp_minus" // exp((-1) * //}, 1, 1); foreach (var variableName in ProblemData.AllowedInputVariables) g.AddTerminalSymbol(variableName); foreach (var variableName in new string[] { ProblemData.TargetVariable }) // TODO: multiple target variables g.AddTerminalSymbol(variableName); // generate symbols for numeric parameters for which the value is optimized using AutoDiff // we generate multiple symbols to balance the probability for selecting a numeric parameter in the generation of random trees var numericConstantsFactor = 2.0; for (int i = 0; i < numericConstantsFactor * (ProblemData.AllowedInputVariables.Count() + 1); i++) { g.AddTerminalSymbol("θ" + i); // numeric parameter for which the value is optimized using AutoDiff } Encoding.Grammar = g; } #endregion #region Import & Export public void Load(IRegressionProblemData data) { Name = data.Name; Description = data.Description; ProblemData = data; } public IRegressionProblemData Export() { return ProblemData; } #endregion #region helper private static bool IsConstantNode(ISymbolicExpressionTreeNode n) { return n.Symbol.Name.StartsWith("θ"); } #endregion } }