#region License Information /* HeuristicLab * Copyright (C) 2002-2016 Heuristic and Evolutionary Algorithms Laboratory (HEAL) * * This file is part of HeuristicLab. * * HeuristicLab is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation, either version 3 of the License, or * (at your option) any later version. * * HeuristicLab is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with HeuristicLab. If not, see . */ #endregion using System; using System.Linq; using HeuristicLab.Common; using HeuristicLab.Core; using HeuristicLab.Data; using HeuristicLab.Optimization; using HeuristicLab.Parameters; using HeuristicLab.Persistence.Default.CompositeSerializers.Storable; using HeuristicLab.Problems.DataAnalysis; using HeuristicLab.Problems.DataAnalysis.Symbolic; using HeuristicLab.Problems.DataAnalysis.Symbolic.Regression; using HeuristicLab.Random; namespace HeuristicLab.Algorithms.DataAnalysis { /// /// Nonlinear regression data analysis algorithm. /// [Item("Nonlinear Regression (NLR)", "Nonlinear regression (curve fitting) data analysis algorithm (wrapper for ALGLIB).")] [Creatable(CreatableAttribute.Categories.DataAnalysisRegression, Priority = 120)] [StorableClass] public sealed class NonlinearRegression : FixedDataAnalysisAlgorithm { private const string RegressionSolutionResultName = "Regression solution"; private const string ModelStructureParameterName = "Model structure"; private const string IterationsParameterName = "Iterations"; private const string RestartsParameterName = "Restarts"; private const string SetSeedRandomlyParameterName = "SetSeedRandomly"; private const string SeedParameterName = "Seed"; public IFixedValueParameter ModelStructureParameter { get { return (IFixedValueParameter)Parameters[ModelStructureParameterName]; } } public IFixedValueParameter IterationsParameter { get { return (IFixedValueParameter)Parameters[IterationsParameterName]; } } public IFixedValueParameter SetSeedRandomlyParameter { get { return (IFixedValueParameter)Parameters[SetSeedRandomlyParameterName]; } } public IFixedValueParameter SeedParameter { get { return (IFixedValueParameter)Parameters[SeedParameterName]; } } public IFixedValueParameter RestartsParameter { get { return (IFixedValueParameter)Parameters[RestartsParameterName]; } } public string ModelStructure { get { return ModelStructureParameter.Value.Value; } set { ModelStructureParameter.Value.Value = value; } } public int Iterations { get { return IterationsParameter.Value.Value; } set { IterationsParameter.Value.Value = value; } } public int Restarts { get { return RestartsParameter.Value.Value; } set { RestartsParameter.Value.Value = value; } } public int Seed { get { return SeedParameter.Value.Value; } set { SeedParameter.Value.Value = value; } } public bool SetSeedRandomly { get { return SetSeedRandomlyParameter.Value.Value; } set { SetSeedRandomlyParameter.Value.Value = value; } } [StorableConstructor] private NonlinearRegression(bool deserializing) : base(deserializing) { } private NonlinearRegression(NonlinearRegression original, Cloner cloner) : base(original, cloner) { } public NonlinearRegression() : base() { Problem = new RegressionProblem(); Parameters.Add(new FixedValueParameter(ModelStructureParameterName, "The function for which the parameters must be fit (only numeric constants are tuned).", new StringValue("1.0 * x*x + 0.0"))); Parameters.Add(new FixedValueParameter(IterationsParameterName, "The maximum number of iterations for constants optimization.", new IntValue(200))); Parameters.Add(new FixedValueParameter(RestartsParameterName, "The number of independent random restarts", new IntValue(10))); Parameters.Add(new FixedValueParameter(SeedParameterName, "The PRNG seed value.", new IntValue())); Parameters.Add(new FixedValueParameter(SetSeedRandomlyParameterName, "Switch to determine if the random number seed should be initialized randomly.", new BoolValue(true))); } [StorableHook(HookType.AfterDeserialization)] private void AfterDeserialization() { // BackwardsCompatibility3.3 #region Backwards compatible code, remove with 3.4 if (!Parameters.ContainsKey(RestartsParameterName)) Parameters.Add(new FixedValueParameter(RestartsParameterName, "The number of independent random restarts", new IntValue(1))); if (!Parameters.ContainsKey(SeedParameterName)) Parameters.Add(new FixedValueParameter(SeedParameterName, "The PRNG seed value.", new IntValue())); if (!Parameters.ContainsKey(SetSeedRandomlyParameterName)) Parameters.Add(new FixedValueParameter(SetSeedRandomlyParameterName, "Switch to determine if the random number seed should be initialized randomly.", new BoolValue(true))); #endregion } public override IDeepCloneable Clone(Cloner cloner) { return new NonlinearRegression(this, cloner); } #region nonlinear regression protected override void Run() { if (SetSeedRandomly) Seed = (new System.Random()).Next(); var rand = new MersenneTwister((uint)Seed); var bestSolution = CreateRegressionSolution(Problem.ProblemData, ModelStructure, Iterations, rand); for (int r = 0; r < Restarts; r++) { var solution = CreateRegressionSolution(Problem.ProblemData, ModelStructure, Iterations, rand); if (solution.TrainingRootMeanSquaredError < bestSolution.TrainingRootMeanSquaredError) { bestSolution = solution; } } Results.Add(new Result(RegressionSolutionResultName, "The nonlinear regression solution.", bestSolution)); Results.Add(new Result("Root mean square error (train)", "The root of the mean of squared errors of the regression solution on the training set.", new DoubleValue(bestSolution.TrainingRootMeanSquaredError))); Results.Add(new Result("Root mean square error (test)", "The root of the mean of squared errors of the regression solution on the test set.", new DoubleValue(bestSolution.TestRootMeanSquaredError))); } /// /// Fits a model to the data by optimizing the numeric constants. /// Model is specified as infix expression containing variable names and numbers. /// The starting point for the numeric constants is initialized randomly if a random number generator is specified (~N(0,1)). Otherwise the user specified constants are /// used as a starting point. /// - /// Training and test data /// The function as infix expression /// Number of constant optimization iterations (using Levenberg-Marquardt algorithm) /// Optional random number generator for random initialization of numeric constants. /// public static ISymbolicRegressionSolution CreateRegressionSolution(IRegressionProblemData problemData, string modelStructure, int maxIterations, IRandom random = null) { var parser = new InfixExpressionParser(); var tree = parser.Parse(modelStructure); if (!SymbolicRegressionConstantOptimizationEvaluator.CanOptimizeConstants(tree)) throw new ArgumentException("The optimizer does not support the specified model structure."); // initialize constants randomly if (random != null) { foreach (var node in tree.IterateNodesPrefix().OfType()) { node.Value = NormalDistributedRandom.NextDouble(random, 0, 1); } } var interpreter = new SymbolicDataAnalysisExpressionTreeLinearInterpreter(); SymbolicRegressionConstantOptimizationEvaluator.OptimizeConstants(interpreter, tree, problemData, problemData.TrainingIndices, applyLinearScaling: false, maxIterations: maxIterations, updateVariableWeights: false, updateConstantsInTree: true); var scaledModel = new SymbolicRegressionModel(problemData.TargetVariable, tree, (ISymbolicDataAnalysisExpressionTreeInterpreter)interpreter.Clone()); scaledModel.Scale(problemData); SymbolicRegressionSolution solution = new SymbolicRegressionSolution(scaledModel, (IRegressionProblemData)problemData.Clone()); solution.Model.Name = "Regression Model"; solution.Name = "Regression Solution"; return solution; } #endregion } }