#region License Information /* HeuristicLab * Copyright (C) 2002-2016 Heuristic and Evolutionary Algorithms Laboratory (HEAL) * * This file is part of HeuristicLab. * * HeuristicLab is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation, either version 3 of the License, or * (at your option) any later version. * * HeuristicLab is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with HeuristicLab. If not, see . */ #endregion using System.IO; using System.Linq; using HeuristicLab.Algorithms.GeneticAlgorithm; using HeuristicLab.Encodings.SymbolicExpressionTreeEncoding; using HeuristicLab.Persistence.Default.Xml; using HeuristicLab.Problems.DataAnalysis; using HeuristicLab.Problems.DataAnalysis.Symbolic; using HeuristicLab.Problems.DataAnalysis.Symbolic.Regression; using HeuristicLab.Problems.Instances.DataAnalysis; using HeuristicLab.Selection; using Microsoft.VisualStudio.TestTools.UnitTesting; namespace HeuristicLab.Tests { [TestClass] public class GPSymbolicRegressionSampleTest { private const string SampleFileName = "SGP_SymbReg"; [TestMethod] [TestCategory("Samples.Create")] [TestProperty("Time", "medium")] public void CreateGpSymbolicRegressionSampleTest() { var ga = CreateGpSymbolicRegressionSample(); string path = Path.Combine(SamplesUtils.SamplesDirectory, SampleFileName + SamplesUtils.SampleFileExtension); XmlGenerator.Serialize(ga, path); } [TestMethod] [TestCategory("Samples.Execute")] [TestProperty("Time", "long")] public void RunGpSymbolicRegressionSampleTest() { var ga = CreateGpSymbolicRegressionSample(); ga.SetSeedRandomly.Value = false; SamplesUtils.RunAlgorithm(ga); Assert.AreEqual(0.858344291534625, SamplesUtils.GetDoubleResult(ga, "BestQuality"), 1E-8); Assert.AreEqual(0.56758466520692641, SamplesUtils.GetDoubleResult(ga, "CurrentAverageQuality"), 1E-8); Assert.AreEqual(0, SamplesUtils.GetDoubleResult(ga, "CurrentWorstQuality"), 1E-8); Assert.AreEqual(50950, SamplesUtils.GetIntResult(ga, "EvaluatedSolutions")); var bestTrainingSolution = (IRegressionSolution)ga.Results["Best training solution"].Value; Assert.AreEqual(0.85504801557844745, bestTrainingSolution.TrainingRSquared, 1E-8); Assert.AreEqual(0.86259381948647817, bestTrainingSolution.TestRSquared, 1E-8); var bestValidationSolution = (IRegressionSolution)ga.Results["Best validation solution"].Value; Assert.AreEqual(0.84854338315539746, bestValidationSolution.TrainingRSquared, 1E-8); Assert.AreEqual(0.8662813452656678, bestValidationSolution.TestRSquared, 1E-8); } private GeneticAlgorithm CreateGpSymbolicRegressionSample() { GeneticAlgorithm ga = new GeneticAlgorithm(); #region Problem Configuration SymbolicRegressionSingleObjectiveProblem symbRegProblem = new SymbolicRegressionSingleObjectiveProblem(); symbRegProblem.Name = "Tower Symbolic Regression Problem"; symbRegProblem.Description = "Tower Dataset (downloaded from: http://www.symbolicregression.com/?q=towerProblem)"; RegressionRealWorldInstanceProvider provider = new RegressionRealWorldInstanceProvider(); var instance = provider.GetDataDescriptors().Where(x => x.Name.Equals("Tower")).Single(); var towerProblemData = (RegressionProblemData)provider.LoadData(instance); towerProblemData.TargetVariableParameter.Value = towerProblemData.TargetVariableParameter.ValidValues .First(v => v.Value == "towerResponse"); towerProblemData.InputVariables.SetItemCheckedState( towerProblemData.InputVariables.Single(x => x.Value == "x1"), true); towerProblemData.InputVariables.SetItemCheckedState( towerProblemData.InputVariables.Single(x => x.Value == "x7"), false); towerProblemData.InputVariables.SetItemCheckedState( towerProblemData.InputVariables.Single(x => x.Value == "x11"), false); towerProblemData.InputVariables.SetItemCheckedState( towerProblemData.InputVariables.Single(x => x.Value == "x16"), false); towerProblemData.InputVariables.SetItemCheckedState( towerProblemData.InputVariables.Single(x => x.Value == "x21"), false); towerProblemData.InputVariables.SetItemCheckedState( towerProblemData.InputVariables.Single(x => x.Value == "x25"), false); towerProblemData.InputVariables.SetItemCheckedState( towerProblemData.InputVariables.Single(x => x.Value == "towerResponse"), false); towerProblemData.TrainingPartition.Start = 0; towerProblemData.TrainingPartition.End = 3136; towerProblemData.TestPartition.Start = 3136; towerProblemData.TestPartition.End = 4999; towerProblemData.Name = "Data imported from towerData.txt"; towerProblemData.Description = "Chemical concentration at top of distillation tower, dataset downloaded from: http://vanillamodeling.com/realproblems.html, best R² achieved with nu-SVR = 0.97"; symbRegProblem.ProblemData = towerProblemData; // configure grammar var grammar = new TypeCoherentExpressionGrammar(); grammar.ConfigureAsDefaultRegressionGrammar(); grammar.Symbols.OfType().Single().InitialFrequency = 0.0; var varSymbol = grammar.Symbols.OfType().Where(x => !(x is LaggedVariable)).Single(); varSymbol.WeightMu = 1.0; varSymbol.WeightSigma = 1.0; varSymbol.WeightManipulatorMu = 0.0; varSymbol.WeightManipulatorSigma = 0.05; varSymbol.MultiplicativeWeightManipulatorSigma = 0.03; var constSymbol = grammar.Symbols.OfType().Single(); constSymbol.MaxValue = 20; constSymbol.MinValue = -20; constSymbol.ManipulatorMu = 0.0; constSymbol.ManipulatorSigma = 1; constSymbol.MultiplicativeManipulatorSigma = 0.03; symbRegProblem.SymbolicExpressionTreeGrammar = grammar; // configure remaining problem parameters symbRegProblem.BestKnownQuality.Value = 0.97; symbRegProblem.FitnessCalculationPartition.Start = 0; symbRegProblem.FitnessCalculationPartition.End = 2300; symbRegProblem.ValidationPartition.Start = 2300; symbRegProblem.ValidationPartition.End = 3136; symbRegProblem.RelativeNumberOfEvaluatedSamples.Value = 1; symbRegProblem.MaximumSymbolicExpressionTreeLength.Value = 150; symbRegProblem.MaximumSymbolicExpressionTreeDepth.Value = 12; symbRegProblem.MaximumFunctionDefinitions.Value = 0; symbRegProblem.MaximumFunctionArguments.Value = 0; symbRegProblem.EvaluatorParameter.Value = new SymbolicRegressionSingleObjectivePearsonRSquaredEvaluator(); #endregion #region Algorithm Configuration ga.Problem = symbRegProblem; ga.Name = "Genetic Programming - Symbolic Regression"; ga.Description = "A standard genetic programming algorithm to solve a symbolic regression problem (tower dataset)"; SamplesUtils.ConfigureGeneticAlgorithmParameters( ga, 1000, 1, 50, 0.15, 5); var mutator = (MultiSymbolicExpressionTreeManipulator)ga.Mutator; mutator.Operators.OfType().Single().ShakingFactor = 0.1; mutator.Operators.OfType().Single().ShakingFactor = 1.0; ga.Analyzer.Operators.SetItemCheckedState( ga.Analyzer.Operators .OfType() .Single(), false); ga.Analyzer.Operators.SetItemCheckedState( ga.Analyzer.Operators .OfType() .First(), false); #endregion return ga; } } }